1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate /* 30*7c478bd9Sstevel@tonic-gate * Basic NUMA support in terms of locality groups 31*7c478bd9Sstevel@tonic-gate * 32*7c478bd9Sstevel@tonic-gate * Solaris needs to know which CPUs, memory, etc. are near each other to 33*7c478bd9Sstevel@tonic-gate * provide good performance on NUMA machines by optimizing for locality. 34*7c478bd9Sstevel@tonic-gate * In order to do this, a new abstraction called a "locality group (lgroup)" 35*7c478bd9Sstevel@tonic-gate * has been introduced to keep track of which CPU-like and memory-like hardware 36*7c478bd9Sstevel@tonic-gate * resources are close to each other. Currently, latency is the only measure 37*7c478bd9Sstevel@tonic-gate * used to determine how to group hardware resources into lgroups, but this 38*7c478bd9Sstevel@tonic-gate * does not limit the groupings to be based solely on latency. Other factors 39*7c478bd9Sstevel@tonic-gate * may be used to determine the groupings in the future. 40*7c478bd9Sstevel@tonic-gate * 41*7c478bd9Sstevel@tonic-gate * Lgroups are organized into a hieararchy or topology that represents the 42*7c478bd9Sstevel@tonic-gate * latency topology of the machine. There is always at least a root lgroup in 43*7c478bd9Sstevel@tonic-gate * the system. It represents all the hardware resources in the machine at a 44*7c478bd9Sstevel@tonic-gate * latency big enough that any hardware resource can at least access any other 45*7c478bd9Sstevel@tonic-gate * hardware resource within that latency. A Uniform Memory Access (UMA) 46*7c478bd9Sstevel@tonic-gate * machine is represented with one lgroup (the root). In contrast, a NUMA 47*7c478bd9Sstevel@tonic-gate * machine is represented at least by the root lgroup and some number of leaf 48*7c478bd9Sstevel@tonic-gate * lgroups where the leaf lgroups contain the hardware resources within the 49*7c478bd9Sstevel@tonic-gate * least latency of each other and the root lgroup still contains all the 50*7c478bd9Sstevel@tonic-gate * resources in the machine. Some number of intermediate lgroups may exist 51*7c478bd9Sstevel@tonic-gate * which represent more levels of locality than just the local latency of the 52*7c478bd9Sstevel@tonic-gate * leaf lgroups and the system latency of the root lgroup. Non-leaf lgroups 53*7c478bd9Sstevel@tonic-gate * (eg. root and intermediate lgroups) contain the next nearest resources to 54*7c478bd9Sstevel@tonic-gate * its children lgroups. Thus, the lgroup hierarchy from a given leaf lgroup 55*7c478bd9Sstevel@tonic-gate * to the root lgroup shows the hardware resources from closest to farthest 56*7c478bd9Sstevel@tonic-gate * from the leaf lgroup such that each successive ancestor lgroup contains 57*7c478bd9Sstevel@tonic-gate * the next nearest resources at the next level of locality from the previous. 58*7c478bd9Sstevel@tonic-gate * 59*7c478bd9Sstevel@tonic-gate * The kernel uses the lgroup abstraction to know how to allocate resources 60*7c478bd9Sstevel@tonic-gate * near a given process/thread. At fork() and lwp/thread_create() time, a 61*7c478bd9Sstevel@tonic-gate * "home" lgroup is chosen for a thread. This is done by picking the lgroup 62*7c478bd9Sstevel@tonic-gate * with the lowest load average. Binding to a processor or processor set will 63*7c478bd9Sstevel@tonic-gate * change the home lgroup for a thread. The scheduler has been modified to try 64*7c478bd9Sstevel@tonic-gate * to dispatch a thread on a CPU in its home lgroup. Physical memory 65*7c478bd9Sstevel@tonic-gate * allocation is lgroup aware too, so memory will be allocated from the current 66*7c478bd9Sstevel@tonic-gate * thread's home lgroup if possible. If the desired resources are not 67*7c478bd9Sstevel@tonic-gate * available, the kernel traverses the lgroup hierarchy going to the parent 68*7c478bd9Sstevel@tonic-gate * lgroup to find resources at the next level of locality until it reaches the 69*7c478bd9Sstevel@tonic-gate * root lgroup. 70*7c478bd9Sstevel@tonic-gate */ 71*7c478bd9Sstevel@tonic-gate 72*7c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 73*7c478bd9Sstevel@tonic-gate #include <sys/lgrp_user.h> 74*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 75*7c478bd9Sstevel@tonic-gate #include <sys/mman.h> 76*7c478bd9Sstevel@tonic-gate #include <sys/param.h> 77*7c478bd9Sstevel@tonic-gate #include <sys/var.h> 78*7c478bd9Sstevel@tonic-gate #include <sys/thread.h> 79*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 80*7c478bd9Sstevel@tonic-gate #include <sys/cpupart.h> 81*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 82*7c478bd9Sstevel@tonic-gate #include <vm/seg.h> 83*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 84*7c478bd9Sstevel@tonic-gate #include <vm/seg_spt.h> 85*7c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 86*7c478bd9Sstevel@tonic-gate #include <vm/as.h> 87*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 88*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 89*7c478bd9Sstevel@tonic-gate #include <sys/errno.h> 90*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 91*7c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 92*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 93*7c478bd9Sstevel@tonic-gate #include <sys/chip.h> 94*7c478bd9Sstevel@tonic-gate #include <sys/promif.h> 95*7c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 96*7c478bd9Sstevel@tonic-gate 97*7c478bd9Sstevel@tonic-gate lgrp_gen_t lgrp_gen = 0; /* generation of lgroup hierarchy */ 98*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_table[NLGRPS_MAX]; /* table of all initialized lgrp_t structs */ 99*7c478bd9Sstevel@tonic-gate /* indexed by lgrp_id */ 100*7c478bd9Sstevel@tonic-gate int nlgrps; /* number of lgroups in machine */ 101*7c478bd9Sstevel@tonic-gate int lgrp_alloc_hint = -1; /* hint for where to try to allocate next */ 102*7c478bd9Sstevel@tonic-gate int lgrp_alloc_max = 0; /* max lgroup ID allocated so far */ 103*7c478bd9Sstevel@tonic-gate 104*7c478bd9Sstevel@tonic-gate /* 105*7c478bd9Sstevel@tonic-gate * Kstat data for lgroups. 106*7c478bd9Sstevel@tonic-gate * 107*7c478bd9Sstevel@tonic-gate * Actual kstat data is collected in lgrp_stats array. 108*7c478bd9Sstevel@tonic-gate * The lgrp_kstat_data array of named kstats is used to extract data from 109*7c478bd9Sstevel@tonic-gate * lgrp_stats and present it to kstat framework. It is protected from partallel 110*7c478bd9Sstevel@tonic-gate * modifications by lgrp_kstat_mutex. This may cause some contention when 111*7c478bd9Sstevel@tonic-gate * several kstat commands run in parallel but this is not the 112*7c478bd9Sstevel@tonic-gate * performance-critical path. 113*7c478bd9Sstevel@tonic-gate */ 114*7c478bd9Sstevel@tonic-gate extern struct lgrp_stats lgrp_stats[]; /* table of per-lgrp stats */ 115*7c478bd9Sstevel@tonic-gate 116*7c478bd9Sstevel@tonic-gate /* 117*7c478bd9Sstevel@tonic-gate * Declare kstat names statically for enums as defined in the header file. 118*7c478bd9Sstevel@tonic-gate */ 119*7c478bd9Sstevel@tonic-gate LGRP_KSTAT_NAMES; 120*7c478bd9Sstevel@tonic-gate 121*7c478bd9Sstevel@tonic-gate static void lgrp_kstat_init(void); 122*7c478bd9Sstevel@tonic-gate static int lgrp_kstat_extract(kstat_t *, int); 123*7c478bd9Sstevel@tonic-gate static void lgrp_kstat_reset(lgrp_id_t); 124*7c478bd9Sstevel@tonic-gate 125*7c478bd9Sstevel@tonic-gate static struct kstat_named lgrp_kstat_data[LGRP_NUM_STATS]; 126*7c478bd9Sstevel@tonic-gate static kmutex_t lgrp_kstat_mutex; 127*7c478bd9Sstevel@tonic-gate 128*7c478bd9Sstevel@tonic-gate 129*7c478bd9Sstevel@tonic-gate /* 130*7c478bd9Sstevel@tonic-gate * max number of lgroups supported by the platform 131*7c478bd9Sstevel@tonic-gate */ 132*7c478bd9Sstevel@tonic-gate int nlgrpsmax = 0; 133*7c478bd9Sstevel@tonic-gate 134*7c478bd9Sstevel@tonic-gate /* 135*7c478bd9Sstevel@tonic-gate * The root lgroup. Represents the set of resources at the system wide 136*7c478bd9Sstevel@tonic-gate * level of locality. 137*7c478bd9Sstevel@tonic-gate */ 138*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_root = NULL; 139*7c478bd9Sstevel@tonic-gate 140*7c478bd9Sstevel@tonic-gate /* 141*7c478bd9Sstevel@tonic-gate * During system bootstrap cp_default does not contain the list of lgrp load 142*7c478bd9Sstevel@tonic-gate * averages (cp_lgrploads). The list is allocated after the first CPU is brought 143*7c478bd9Sstevel@tonic-gate * on-line when cp_default is initialized by cpupart_initialize_default(). 144*7c478bd9Sstevel@tonic-gate * Configuring CPU0 may create a two-level topology with root and one leaf node 145*7c478bd9Sstevel@tonic-gate * containing CPU0. This topology is initially constructed in a special 146*7c478bd9Sstevel@tonic-gate * statically allocated 2-element lpl list lpl_bootstrap_list and later cloned 147*7c478bd9Sstevel@tonic-gate * to cp_default when cp_default is initialized. The lpl_bootstrap_list is used 148*7c478bd9Sstevel@tonic-gate * for all lpl operations until cp_default is fully constructed. 149*7c478bd9Sstevel@tonic-gate * 150*7c478bd9Sstevel@tonic-gate * The lpl_bootstrap_list is maintained by the code in lgrp.c. Every other 151*7c478bd9Sstevel@tonic-gate * consumer who needs default lpl should use lpl_bootstrap which is a pointer to 152*7c478bd9Sstevel@tonic-gate * the first element of lpl_bootstrap_list. 153*7c478bd9Sstevel@tonic-gate */ 154*7c478bd9Sstevel@tonic-gate #define LPL_BOOTSTRAP_SIZE 2 155*7c478bd9Sstevel@tonic-gate static lpl_t lpl_bootstrap_list[LPL_BOOTSTRAP_SIZE]; 156*7c478bd9Sstevel@tonic-gate lpl_t *lpl_bootstrap; 157*7c478bd9Sstevel@tonic-gate 158*7c478bd9Sstevel@tonic-gate static lgrp_t lroot; 159*7c478bd9Sstevel@tonic-gate 160*7c478bd9Sstevel@tonic-gate 161*7c478bd9Sstevel@tonic-gate /* 162*7c478bd9Sstevel@tonic-gate * Size, in bytes, beyond which random memory allocation policy is applied 163*7c478bd9Sstevel@tonic-gate * to non-shared memory. Default is the maximum size, so random memory 164*7c478bd9Sstevel@tonic-gate * allocation won't be used for non-shared memory by default. 165*7c478bd9Sstevel@tonic-gate */ 166*7c478bd9Sstevel@tonic-gate size_t lgrp_privm_random_thresh = (size_t)(-1); 167*7c478bd9Sstevel@tonic-gate 168*7c478bd9Sstevel@tonic-gate /* 169*7c478bd9Sstevel@tonic-gate * Size, in bytes, beyond which random memory allocation policy is applied to 170*7c478bd9Sstevel@tonic-gate * shared memory. Default is 8MB (2 ISM pages). 171*7c478bd9Sstevel@tonic-gate */ 172*7c478bd9Sstevel@tonic-gate size_t lgrp_shm_random_thresh = 8*1024*1024; 173*7c478bd9Sstevel@tonic-gate 174*7c478bd9Sstevel@tonic-gate /* 175*7c478bd9Sstevel@tonic-gate * Whether to do processor set aware memory allocation by default 176*7c478bd9Sstevel@tonic-gate */ 177*7c478bd9Sstevel@tonic-gate int lgrp_mem_pset_aware = 0; 178*7c478bd9Sstevel@tonic-gate 179*7c478bd9Sstevel@tonic-gate /* 180*7c478bd9Sstevel@tonic-gate * Set the default memory allocation policy for root lgroup 181*7c478bd9Sstevel@tonic-gate */ 182*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_t lgrp_mem_policy_root = LGRP_MEM_POLICY_RANDOM; 183*7c478bd9Sstevel@tonic-gate 184*7c478bd9Sstevel@tonic-gate /* 185*7c478bd9Sstevel@tonic-gate * Set the default memory allocation policy. For most platforms, 186*7c478bd9Sstevel@tonic-gate * next touch is sufficient, but some platforms may wish to override 187*7c478bd9Sstevel@tonic-gate * this. 188*7c478bd9Sstevel@tonic-gate */ 189*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_t lgrp_mem_default_policy = LGRP_MEM_POLICY_NEXT; 190*7c478bd9Sstevel@tonic-gate 191*7c478bd9Sstevel@tonic-gate 192*7c478bd9Sstevel@tonic-gate /* 193*7c478bd9Sstevel@tonic-gate * lgroup CPU event handlers 194*7c478bd9Sstevel@tonic-gate */ 195*7c478bd9Sstevel@tonic-gate static void lgrp_cpu_init(struct cpu *); 196*7c478bd9Sstevel@tonic-gate static void lgrp_cpu_fini(struct cpu *, lgrp_id_t); 197*7c478bd9Sstevel@tonic-gate static lgrp_t *lgrp_cpu_to_lgrp(struct cpu *); 198*7c478bd9Sstevel@tonic-gate 199*7c478bd9Sstevel@tonic-gate static void lgrp_latency_change(u_longlong_t, u_longlong_t); 200*7c478bd9Sstevel@tonic-gate 201*7c478bd9Sstevel@tonic-gate /* 202*7c478bd9Sstevel@tonic-gate * lgroup memory event handlers 203*7c478bd9Sstevel@tonic-gate */ 204*7c478bd9Sstevel@tonic-gate static void lgrp_mem_init(int, lgrp_handle_t, boolean_t); 205*7c478bd9Sstevel@tonic-gate static void lgrp_mem_fini(int, lgrp_handle_t, boolean_t); 206*7c478bd9Sstevel@tonic-gate static void lgrp_mem_rename(int, lgrp_handle_t, lgrp_handle_t); 207*7c478bd9Sstevel@tonic-gate 208*7c478bd9Sstevel@tonic-gate /* 209*7c478bd9Sstevel@tonic-gate * lgroup CPU partition event handlers 210*7c478bd9Sstevel@tonic-gate */ 211*7c478bd9Sstevel@tonic-gate static void lgrp_part_add_cpu(struct cpu *, lgrp_id_t); 212*7c478bd9Sstevel@tonic-gate static void lgrp_part_del_cpu(struct cpu *); 213*7c478bd9Sstevel@tonic-gate 214*7c478bd9Sstevel@tonic-gate static void lgrp_root_init(void); 215*7c478bd9Sstevel@tonic-gate 216*7c478bd9Sstevel@tonic-gate /* 217*7c478bd9Sstevel@tonic-gate * lpl topology 218*7c478bd9Sstevel@tonic-gate */ 219*7c478bd9Sstevel@tonic-gate static void lpl_init(lpl_t *, lpl_t *, lgrp_t *); 220*7c478bd9Sstevel@tonic-gate static void lpl_clear(lpl_t *); 221*7c478bd9Sstevel@tonic-gate static void lpl_leaf_insert(lpl_t *, struct cpupart *); 222*7c478bd9Sstevel@tonic-gate static void lpl_leaf_remove(lpl_t *, struct cpupart *); 223*7c478bd9Sstevel@tonic-gate static void lpl_rset_add(lpl_t *, lpl_t *); 224*7c478bd9Sstevel@tonic-gate static void lpl_rset_del(lpl_t *, lpl_t *); 225*7c478bd9Sstevel@tonic-gate static int lpl_rset_contains(lpl_t *, lpl_t *); 226*7c478bd9Sstevel@tonic-gate static void lpl_cpu_adjcnt(lpl_act_t, struct cpu *); 227*7c478bd9Sstevel@tonic-gate static void lpl_child_update(lpl_t *, struct cpupart *); 228*7c478bd9Sstevel@tonic-gate static int lpl_pick(lpl_t *, lpl_t *); 229*7c478bd9Sstevel@tonic-gate static void lpl_verify_wrapper(struct cpupart *); 230*7c478bd9Sstevel@tonic-gate 231*7c478bd9Sstevel@tonic-gate /* 232*7c478bd9Sstevel@tonic-gate * defines for lpl topology verifier return codes 233*7c478bd9Sstevel@tonic-gate */ 234*7c478bd9Sstevel@tonic-gate 235*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_CORRECT 0 236*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_PART_HAS_NO_LPL -1 237*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_CPUS_NOT_EMPTY -2 238*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_LGRP_MISMATCH -3 239*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_MISSING_PARENT -4 240*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_PARENT_MISMATCH -5 241*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_BAD_CPUCNT -6 242*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_RSET_MISMATCH -7 243*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_LPL_ORPHANED -8 244*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_LPL_BAD_NCPU -9 245*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_RSET_MSSNG_LF -10 246*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_CPU_HAS_BAD_LPL -11 247*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_BOGUS_HINT -12 248*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_NONLEAF_HAS_CPUS -13 249*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_LGRP_NOT_LEAF -14 250*7c478bd9Sstevel@tonic-gate #define LPL_TOPO_BAD_RSETCNT -15 251*7c478bd9Sstevel@tonic-gate 252*7c478bd9Sstevel@tonic-gate /* 253*7c478bd9Sstevel@tonic-gate * Return whether lgroup optimizations should be enabled on this system 254*7c478bd9Sstevel@tonic-gate */ 255*7c478bd9Sstevel@tonic-gate int 256*7c478bd9Sstevel@tonic-gate lgrp_optimizations(void) 257*7c478bd9Sstevel@tonic-gate { 258*7c478bd9Sstevel@tonic-gate /* 259*7c478bd9Sstevel@tonic-gate * System must have more than 2 lgroups to enable lgroup optimizations 260*7c478bd9Sstevel@tonic-gate * 261*7c478bd9Sstevel@tonic-gate * XXX This assumes that a 2 lgroup system has an empty root lgroup 262*7c478bd9Sstevel@tonic-gate * with one child lgroup containing all the resources. A 2 lgroup 263*7c478bd9Sstevel@tonic-gate * system with a root lgroup directly containing CPUs or memory might 264*7c478bd9Sstevel@tonic-gate * need lgroup optimizations with its child lgroup, but there 265*7c478bd9Sstevel@tonic-gate * isn't such a machine for now.... 266*7c478bd9Sstevel@tonic-gate */ 267*7c478bd9Sstevel@tonic-gate if (nlgrps > 2) 268*7c478bd9Sstevel@tonic-gate return (1); 269*7c478bd9Sstevel@tonic-gate 270*7c478bd9Sstevel@tonic-gate return (0); 271*7c478bd9Sstevel@tonic-gate } 272*7c478bd9Sstevel@tonic-gate 273*7c478bd9Sstevel@tonic-gate /* 274*7c478bd9Sstevel@tonic-gate * Build full lgroup topology 275*7c478bd9Sstevel@tonic-gate */ 276*7c478bd9Sstevel@tonic-gate static void 277*7c478bd9Sstevel@tonic-gate lgrp_root_init(void) 278*7c478bd9Sstevel@tonic-gate { 279*7c478bd9Sstevel@tonic-gate lgrp_handle_t hand; 280*7c478bd9Sstevel@tonic-gate int i; 281*7c478bd9Sstevel@tonic-gate lgrp_id_t id; 282*7c478bd9Sstevel@tonic-gate 283*7c478bd9Sstevel@tonic-gate /* 284*7c478bd9Sstevel@tonic-gate * Create the "root" lgroup 285*7c478bd9Sstevel@tonic-gate */ 286*7c478bd9Sstevel@tonic-gate ASSERT(nlgrps == 0); 287*7c478bd9Sstevel@tonic-gate id = nlgrps++; 288*7c478bd9Sstevel@tonic-gate 289*7c478bd9Sstevel@tonic-gate lgrp_root = &lroot; 290*7c478bd9Sstevel@tonic-gate 291*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_cpu = NULL; 292*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_mnodes = 0; 293*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_nmnodes = 0; 294*7c478bd9Sstevel@tonic-gate hand = lgrp_plat_root_hand(); 295*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_plathand = hand; 296*7c478bd9Sstevel@tonic-gate 297*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_id = id; 298*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_cpucnt = 0; 299*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_childcnt = 0; 300*7c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_children); 301*7c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_leaves); 302*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_parent = NULL; 303*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_chips = NULL; 304*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_chipcnt = 0; 305*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_latency = lgrp_plat_latency(hand, hand); 306*7c478bd9Sstevel@tonic-gate 307*7c478bd9Sstevel@tonic-gate for (i = 0; i < LGRP_RSRC_COUNT; i++) 308*7c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_set[i]); 309*7c478bd9Sstevel@tonic-gate 310*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_kstat = NULL; 311*7c478bd9Sstevel@tonic-gate 312*7c478bd9Sstevel@tonic-gate lgrp_table[id] = lgrp_root; 313*7c478bd9Sstevel@tonic-gate 314*7c478bd9Sstevel@tonic-gate /* 315*7c478bd9Sstevel@tonic-gate * Setup initial lpl list for CPU0 and initial t0 home. 316*7c478bd9Sstevel@tonic-gate * The only lpl space we have so far is lpl_bootstrap. It is used for 317*7c478bd9Sstevel@tonic-gate * all topology operations untill cp_default until cp_default is 318*7c478bd9Sstevel@tonic-gate * initialized at which point t0.t_lpl will be updated. 319*7c478bd9Sstevel@tonic-gate */ 320*7c478bd9Sstevel@tonic-gate lpl_bootstrap = lpl_bootstrap_list; 321*7c478bd9Sstevel@tonic-gate t0.t_lpl = lpl_bootstrap; 322*7c478bd9Sstevel@tonic-gate cp_default.cp_nlgrploads = LPL_BOOTSTRAP_SIZE; 323*7c478bd9Sstevel@tonic-gate lpl_bootstrap_list[1].lpl_lgrpid = 1; 324*7c478bd9Sstevel@tonic-gate cp_default.cp_lgrploads = lpl_bootstrap; 325*7c478bd9Sstevel@tonic-gate } 326*7c478bd9Sstevel@tonic-gate 327*7c478bd9Sstevel@tonic-gate /* 328*7c478bd9Sstevel@tonic-gate * Initialize the lgroup framework and allow the platform to do the same 329*7c478bd9Sstevel@tonic-gate */ 330*7c478bd9Sstevel@tonic-gate void 331*7c478bd9Sstevel@tonic-gate lgrp_init(void) 332*7c478bd9Sstevel@tonic-gate { 333*7c478bd9Sstevel@tonic-gate /* 334*7c478bd9Sstevel@tonic-gate * Initialize the platform 335*7c478bd9Sstevel@tonic-gate */ 336*7c478bd9Sstevel@tonic-gate lgrp_plat_init(); 337*7c478bd9Sstevel@tonic-gate 338*7c478bd9Sstevel@tonic-gate /* 339*7c478bd9Sstevel@tonic-gate * Set max number of lgroups supported on this platform which must be 340*7c478bd9Sstevel@tonic-gate * less than the max number of lgroups supported by the common lgroup 341*7c478bd9Sstevel@tonic-gate * framework (eg. NLGRPS_MAX is max elements in lgrp_table[], etc.) 342*7c478bd9Sstevel@tonic-gate */ 343*7c478bd9Sstevel@tonic-gate nlgrpsmax = lgrp_plat_max_lgrps(); 344*7c478bd9Sstevel@tonic-gate ASSERT(nlgrpsmax <= NLGRPS_MAX); 345*7c478bd9Sstevel@tonic-gate } 346*7c478bd9Sstevel@tonic-gate 347*7c478bd9Sstevel@tonic-gate /* 348*7c478bd9Sstevel@tonic-gate * Create the root and cpu0's lgroup, and set t0's home. 349*7c478bd9Sstevel@tonic-gate */ 350*7c478bd9Sstevel@tonic-gate void 351*7c478bd9Sstevel@tonic-gate lgrp_setup(void) 352*7c478bd9Sstevel@tonic-gate { 353*7c478bd9Sstevel@tonic-gate /* 354*7c478bd9Sstevel@tonic-gate * Setup the root lgroup 355*7c478bd9Sstevel@tonic-gate */ 356*7c478bd9Sstevel@tonic-gate lgrp_root_init(); 357*7c478bd9Sstevel@tonic-gate 358*7c478bd9Sstevel@tonic-gate /* 359*7c478bd9Sstevel@tonic-gate * Add cpu0 to an lgroup 360*7c478bd9Sstevel@tonic-gate */ 361*7c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)CPU, 0); 362*7c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)CPU, 0); 363*7c478bd9Sstevel@tonic-gate } 364*7c478bd9Sstevel@tonic-gate 365*7c478bd9Sstevel@tonic-gate /* 366*7c478bd9Sstevel@tonic-gate * Lgroup initialization is split in two parts. The first part 367*7c478bd9Sstevel@tonic-gate * (lgrp_main_init()) is called right before start_other_cpus() in main. The 368*7c478bd9Sstevel@tonic-gate * second part (lgrp_main_mp_init()) is called right after start_other_cpus() 369*7c478bd9Sstevel@tonic-gate * when all CPUs are brought online and all distance information is available. 370*7c478bd9Sstevel@tonic-gate * 371*7c478bd9Sstevel@tonic-gate * When lgrp_main_init() is complete it sets lgrp_initialized. The 372*7c478bd9Sstevel@tonic-gate * lgrp_main_mp_init() sets lgrp_topo_initialized. 373*7c478bd9Sstevel@tonic-gate */ 374*7c478bd9Sstevel@tonic-gate 375*7c478bd9Sstevel@tonic-gate /* 376*7c478bd9Sstevel@tonic-gate * true when lgrp initialization has been completed. 377*7c478bd9Sstevel@tonic-gate */ 378*7c478bd9Sstevel@tonic-gate int lgrp_initialized = 0; 379*7c478bd9Sstevel@tonic-gate 380*7c478bd9Sstevel@tonic-gate /* 381*7c478bd9Sstevel@tonic-gate * True when lgrp topology is constructed. 382*7c478bd9Sstevel@tonic-gate */ 383*7c478bd9Sstevel@tonic-gate int lgrp_topo_initialized = 0; 384*7c478bd9Sstevel@tonic-gate 385*7c478bd9Sstevel@tonic-gate /* 386*7c478bd9Sstevel@tonic-gate * Init routine called after startup(), /etc/system has been processed, 387*7c478bd9Sstevel@tonic-gate * and cpu0 has been added to an lgroup. 388*7c478bd9Sstevel@tonic-gate */ 389*7c478bd9Sstevel@tonic-gate void 390*7c478bd9Sstevel@tonic-gate lgrp_main_init(void) 391*7c478bd9Sstevel@tonic-gate { 392*7c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 393*7c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 394*7c478bd9Sstevel@tonic-gate int i; 395*7c478bd9Sstevel@tonic-gate /* 396*7c478bd9Sstevel@tonic-gate * Enforce a valid lgrp_mem_default_policy 397*7c478bd9Sstevel@tonic-gate */ 398*7c478bd9Sstevel@tonic-gate if ((lgrp_mem_default_policy <= LGRP_MEM_POLICY_DEFAULT) || 399*7c478bd9Sstevel@tonic-gate (lgrp_mem_default_policy >= LGRP_NUM_MEM_POLICIES)) 400*7c478bd9Sstevel@tonic-gate lgrp_mem_default_policy = LGRP_MEM_POLICY_NEXT; 401*7c478bd9Sstevel@tonic-gate 402*7c478bd9Sstevel@tonic-gate /* 403*7c478bd9Sstevel@tonic-gate * See if mpo should be disabled. 404*7c478bd9Sstevel@tonic-gate * This may happen in the case of null proc LPA on Starcat. 405*7c478bd9Sstevel@tonic-gate * The platform won't be able to detect null proc LPA until after 406*7c478bd9Sstevel@tonic-gate * cpu0 and memory have already been added to lgroups. 407*7c478bd9Sstevel@tonic-gate * When and if it is detected, the Starcat platform will return 408*7c478bd9Sstevel@tonic-gate * a different platform handle for cpu0 which is what we check for 409*7c478bd9Sstevel@tonic-gate * here. If mpo should be disabled move cpu0 to it's rightful place 410*7c478bd9Sstevel@tonic-gate * (the root), and destroy the remaining lgroups. This effectively 411*7c478bd9Sstevel@tonic-gate * provides an UMA lgroup topology. 412*7c478bd9Sstevel@tonic-gate */ 413*7c478bd9Sstevel@tonic-gate lgrpid = cp->cpu_lpl->lpl_lgrpid; 414*7c478bd9Sstevel@tonic-gate if (lgrp_table[lgrpid]->lgrp_plathand != 415*7c478bd9Sstevel@tonic-gate lgrp_plat_cpu_to_hand(cp->cpu_id)) { 416*7c478bd9Sstevel@tonic-gate lgrp_part_del_cpu(cp); 417*7c478bd9Sstevel@tonic-gate lgrp_cpu_fini(cp, lgrpid); 418*7c478bd9Sstevel@tonic-gate 419*7c478bd9Sstevel@tonic-gate lgrp_cpu_init(cp); 420*7c478bd9Sstevel@tonic-gate lgrp_part_add_cpu(cp, cp->cpu_lpl->lpl_lgrpid); 421*7c478bd9Sstevel@tonic-gate 422*7c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_lpl->lpl_lgrpid == LGRP_ROOTID); 423*7c478bd9Sstevel@tonic-gate 424*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 425*7c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp_table[i]) && 426*7c478bd9Sstevel@tonic-gate lgrp_table[i] != lgrp_root) 427*7c478bd9Sstevel@tonic-gate lgrp_destroy(lgrp_table[i]); 428*7c478bd9Sstevel@tonic-gate } 429*7c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_set[LGRP_RSRC_MEM]); 430*7c478bd9Sstevel@tonic-gate klgrpset_add(lgrp_root->lgrp_set[LGRP_RSRC_MEM], LGRP_ROOTID); 431*7c478bd9Sstevel@tonic-gate } 432*7c478bd9Sstevel@tonic-gate 433*7c478bd9Sstevel@tonic-gate /* 434*7c478bd9Sstevel@tonic-gate * Initialize kstats framework. 435*7c478bd9Sstevel@tonic-gate */ 436*7c478bd9Sstevel@tonic-gate lgrp_kstat_init(); 437*7c478bd9Sstevel@tonic-gate /* 438*7c478bd9Sstevel@tonic-gate * cpu0 is finally where it should be, so create it's lgroup's kstats 439*7c478bd9Sstevel@tonic-gate */ 440*7c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 441*7c478bd9Sstevel@tonic-gate lgrp_kstat_create(cp); 442*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 443*7c478bd9Sstevel@tonic-gate 444*7c478bd9Sstevel@tonic-gate lgrp_plat_main_init(); 445*7c478bd9Sstevel@tonic-gate lgrp_initialized = 1; 446*7c478bd9Sstevel@tonic-gate } 447*7c478bd9Sstevel@tonic-gate 448*7c478bd9Sstevel@tonic-gate /* 449*7c478bd9Sstevel@tonic-gate * Finish lgrp initialization after all CPUS are brought on-line. 450*7c478bd9Sstevel@tonic-gate * This routine is called after start_other_cpus(). 451*7c478bd9Sstevel@tonic-gate */ 452*7c478bd9Sstevel@tonic-gate void 453*7c478bd9Sstevel@tonic-gate lgrp_main_mp_init(void) 454*7c478bd9Sstevel@tonic-gate { 455*7c478bd9Sstevel@tonic-gate klgrpset_t changed; 456*7c478bd9Sstevel@tonic-gate 457*7c478bd9Sstevel@tonic-gate /* 458*7c478bd9Sstevel@tonic-gate * Update lgroup topology (if necessary) 459*7c478bd9Sstevel@tonic-gate */ 460*7c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 461*7c478bd9Sstevel@tonic-gate (void) lgrp_topo_update(lgrp_table, lgrp_alloc_max + 1, &changed); 462*7c478bd9Sstevel@tonic-gate lgrp_topo_initialized = 1; 463*7c478bd9Sstevel@tonic-gate } 464*7c478bd9Sstevel@tonic-gate 465*7c478bd9Sstevel@tonic-gate /* 466*7c478bd9Sstevel@tonic-gate * Handle lgroup (re)configuration events (eg. addition of CPU, etc.) 467*7c478bd9Sstevel@tonic-gate */ 468*7c478bd9Sstevel@tonic-gate void 469*7c478bd9Sstevel@tonic-gate lgrp_config(lgrp_config_flag_t event, uintptr_t resource, uintptr_t where) 470*7c478bd9Sstevel@tonic-gate { 471*7c478bd9Sstevel@tonic-gate klgrpset_t changed; 472*7c478bd9Sstevel@tonic-gate cpu_t *cp; 473*7c478bd9Sstevel@tonic-gate lgrp_id_t id; 474*7c478bd9Sstevel@tonic-gate int rc; 475*7c478bd9Sstevel@tonic-gate 476*7c478bd9Sstevel@tonic-gate switch (event) { 477*7c478bd9Sstevel@tonic-gate /* 478*7c478bd9Sstevel@tonic-gate * The following (re)configuration events are common code 479*7c478bd9Sstevel@tonic-gate * initiated. lgrp_plat_config() is called here to inform the 480*7c478bd9Sstevel@tonic-gate * platform of the reconfiguration event. 481*7c478bd9Sstevel@tonic-gate */ 482*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_ADD: 483*7c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 484*7c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 485*7c478bd9Sstevel@tonic-gate 486*7c478bd9Sstevel@tonic-gate break; 487*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_DEL: 488*7c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 489*7c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 490*7c478bd9Sstevel@tonic-gate 491*7c478bd9Sstevel@tonic-gate break; 492*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_ONLINE: 493*7c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource; 494*7c478bd9Sstevel@tonic-gate lgrp_cpu_init(cp); 495*7c478bd9Sstevel@tonic-gate lgrp_part_add_cpu(cp, cp->cpu_lpl->lpl_lgrpid); 496*7c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part); 497*7c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) { 498*7c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc); 499*7c478bd9Sstevel@tonic-gate } 500*7c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 501*7c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 502*7c478bd9Sstevel@tonic-gate 503*7c478bd9Sstevel@tonic-gate break; 504*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_OFFLINE: 505*7c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource; 506*7c478bd9Sstevel@tonic-gate id = cp->cpu_lpl->lpl_lgrpid; 507*7c478bd9Sstevel@tonic-gate lgrp_part_del_cpu(cp); 508*7c478bd9Sstevel@tonic-gate lgrp_cpu_fini(cp, id); 509*7c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part); 510*7c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) { 511*7c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc); 512*7c478bd9Sstevel@tonic-gate } 513*7c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 514*7c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 515*7c478bd9Sstevel@tonic-gate 516*7c478bd9Sstevel@tonic-gate break; 517*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPUPART_ADD: 518*7c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource; 519*7c478bd9Sstevel@tonic-gate lgrp_part_add_cpu((cpu_t *)resource, (lgrp_id_t)where); 520*7c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part); 521*7c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) { 522*7c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc); 523*7c478bd9Sstevel@tonic-gate } 524*7c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 525*7c478bd9Sstevel@tonic-gate 526*7c478bd9Sstevel@tonic-gate break; 527*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPUPART_DEL: 528*7c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource; 529*7c478bd9Sstevel@tonic-gate lgrp_part_del_cpu((cpu_t *)resource); 530*7c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part); 531*7c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) { 532*7c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc); 533*7c478bd9Sstevel@tonic-gate } 534*7c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource); 535*7c478bd9Sstevel@tonic-gate 536*7c478bd9Sstevel@tonic-gate break; 537*7c478bd9Sstevel@tonic-gate /* 538*7c478bd9Sstevel@tonic-gate * The following events are initiated by the memnode 539*7c478bd9Sstevel@tonic-gate * subsystem. 540*7c478bd9Sstevel@tonic-gate */ 541*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_MEM_ADD: 542*7c478bd9Sstevel@tonic-gate lgrp_mem_init((int)resource, where, B_FALSE); 543*7c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 544*7c478bd9Sstevel@tonic-gate 545*7c478bd9Sstevel@tonic-gate break; 546*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_MEM_DEL: 547*7c478bd9Sstevel@tonic-gate lgrp_mem_fini((int)resource, where, B_FALSE); 548*7c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 549*7c478bd9Sstevel@tonic-gate 550*7c478bd9Sstevel@tonic-gate break; 551*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_MEM_RENAME: { 552*7c478bd9Sstevel@tonic-gate lgrp_config_mem_rename_t *ren_arg = 553*7c478bd9Sstevel@tonic-gate (lgrp_config_mem_rename_t *)where; 554*7c478bd9Sstevel@tonic-gate 555*7c478bd9Sstevel@tonic-gate lgrp_mem_rename((int)resource, 556*7c478bd9Sstevel@tonic-gate ren_arg->lmem_rename_from, 557*7c478bd9Sstevel@tonic-gate ren_arg->lmem_rename_to); 558*7c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 559*7c478bd9Sstevel@tonic-gate 560*7c478bd9Sstevel@tonic-gate break; 561*7c478bd9Sstevel@tonic-gate } 562*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_GEN_UPDATE: 563*7c478bd9Sstevel@tonic-gate atomic_add_32(&lgrp_gen, 1); 564*7c478bd9Sstevel@tonic-gate 565*7c478bd9Sstevel@tonic-gate break; 566*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_FLATTEN: 567*7c478bd9Sstevel@tonic-gate if (where == 0) 568*7c478bd9Sstevel@tonic-gate lgrp_topo_levels = (int)resource; 569*7c478bd9Sstevel@tonic-gate else 570*7c478bd9Sstevel@tonic-gate (void) lgrp_topo_flatten(resource, 571*7c478bd9Sstevel@tonic-gate lgrp_table, lgrp_alloc_max, &changed); 572*7c478bd9Sstevel@tonic-gate 573*7c478bd9Sstevel@tonic-gate break; 574*7c478bd9Sstevel@tonic-gate /* 575*7c478bd9Sstevel@tonic-gate * Initiated by platform latency probing code 576*7c478bd9Sstevel@tonic-gate */ 577*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_LATENCY_CHANGE: 578*7c478bd9Sstevel@tonic-gate lgrp_latency_change((u_longlong_t)resource, 579*7c478bd9Sstevel@tonic-gate (u_longlong_t)where); 580*7c478bd9Sstevel@tonic-gate 581*7c478bd9Sstevel@tonic-gate break; 582*7c478bd9Sstevel@tonic-gate case LGRP_CONFIG_NOP: 583*7c478bd9Sstevel@tonic-gate 584*7c478bd9Sstevel@tonic-gate break; 585*7c478bd9Sstevel@tonic-gate default: 586*7c478bd9Sstevel@tonic-gate break; 587*7c478bd9Sstevel@tonic-gate } 588*7c478bd9Sstevel@tonic-gate 589*7c478bd9Sstevel@tonic-gate } 590*7c478bd9Sstevel@tonic-gate 591*7c478bd9Sstevel@tonic-gate /* 592*7c478bd9Sstevel@tonic-gate * Called to add lgrp info into cpu structure from cpu_add_unit; 593*7c478bd9Sstevel@tonic-gate * do not assume cpu is in cpu[] yet! 594*7c478bd9Sstevel@tonic-gate * 595*7c478bd9Sstevel@tonic-gate * CPUs are brought online with all other CPUs paused so we can't 596*7c478bd9Sstevel@tonic-gate * allocate memory or we could deadlock the system, so we rely on 597*7c478bd9Sstevel@tonic-gate * the platform to statically allocate as much space as we need 598*7c478bd9Sstevel@tonic-gate * for the lgrp structs and stats. 599*7c478bd9Sstevel@tonic-gate */ 600*7c478bd9Sstevel@tonic-gate static void 601*7c478bd9Sstevel@tonic-gate lgrp_cpu_init(struct cpu *cp) 602*7c478bd9Sstevel@tonic-gate { 603*7c478bd9Sstevel@tonic-gate klgrpset_t changed; 604*7c478bd9Sstevel@tonic-gate int count; 605*7c478bd9Sstevel@tonic-gate lgrp_handle_t hand; 606*7c478bd9Sstevel@tonic-gate int first_cpu; 607*7c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 608*7c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 609*7c478bd9Sstevel@tonic-gate struct cpu *cptr; 610*7c478bd9Sstevel@tonic-gate struct chip *chp; 611*7c478bd9Sstevel@tonic-gate 612*7c478bd9Sstevel@tonic-gate /* 613*7c478bd9Sstevel@tonic-gate * This is the first time through if the resource set 614*7c478bd9Sstevel@tonic-gate * for the root lgroup is empty. After cpu0 has been 615*7c478bd9Sstevel@tonic-gate * initially added to an lgroup, the root's CPU resource 616*7c478bd9Sstevel@tonic-gate * set can never be empty, since the system's last CPU 617*7c478bd9Sstevel@tonic-gate * cannot be offlined. 618*7c478bd9Sstevel@tonic-gate */ 619*7c478bd9Sstevel@tonic-gate if (klgrpset_isempty(lgrp_root->lgrp_set[LGRP_RSRC_CPU])) { 620*7c478bd9Sstevel@tonic-gate /* 621*7c478bd9Sstevel@tonic-gate * First time through. 622*7c478bd9Sstevel@tonic-gate */ 623*7c478bd9Sstevel@tonic-gate first_cpu = 1; 624*7c478bd9Sstevel@tonic-gate } else { 625*7c478bd9Sstevel@tonic-gate /* 626*7c478bd9Sstevel@tonic-gate * If cpu0 needs to move lgroups, we may come 627*7c478bd9Sstevel@tonic-gate * through here again, at which time cpu_lock won't 628*7c478bd9Sstevel@tonic-gate * be held, and lgrp_initialized will be false. 629*7c478bd9Sstevel@tonic-gate */ 630*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized); 631*7c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_part != NULL); 632*7c478bd9Sstevel@tonic-gate first_cpu = 0; 633*7c478bd9Sstevel@tonic-gate } 634*7c478bd9Sstevel@tonic-gate 635*7c478bd9Sstevel@tonic-gate hand = lgrp_plat_cpu_to_hand(cp->cpu_id); 636*7c478bd9Sstevel@tonic-gate my_lgrp = lgrp_hand_to_lgrp(hand); 637*7c478bd9Sstevel@tonic-gate 638*7c478bd9Sstevel@tonic-gate if (my_lgrp == NULL) { 639*7c478bd9Sstevel@tonic-gate /* 640*7c478bd9Sstevel@tonic-gate * Create new lgrp and add it to lgroup topology 641*7c478bd9Sstevel@tonic-gate */ 642*7c478bd9Sstevel@tonic-gate my_lgrp = lgrp_create(); 643*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_plathand = hand; 644*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_latency = lgrp_plat_latency(hand, hand); 645*7c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 646*7c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_leaves, lgrpid); 647*7c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 648*7c478bd9Sstevel@tonic-gate 649*7c478bd9Sstevel@tonic-gate count = 0; 650*7c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 651*7c478bd9Sstevel@tonic-gate count += lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1, 652*7c478bd9Sstevel@tonic-gate &changed); 653*7c478bd9Sstevel@tonic-gate } else if (my_lgrp->lgrp_latency == 0 && lgrp_plat_latency(hand, hand) 654*7c478bd9Sstevel@tonic-gate > 0) { 655*7c478bd9Sstevel@tonic-gate /* 656*7c478bd9Sstevel@tonic-gate * Leaf lgroup was created, but latency wasn't available 657*7c478bd9Sstevel@tonic-gate * then. So, set latency for it and fill in rest of lgroup 658*7c478bd9Sstevel@tonic-gate * topology now that we know how far it is from other leaf 659*7c478bd9Sstevel@tonic-gate * lgroups. 660*7c478bd9Sstevel@tonic-gate */ 661*7c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 662*7c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 663*7c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_CPU], 664*7c478bd9Sstevel@tonic-gate lgrpid)) 665*7c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 666*7c478bd9Sstevel@tonic-gate count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1, 667*7c478bd9Sstevel@tonic-gate &changed); 668*7c478bd9Sstevel@tonic-gate 669*7c478bd9Sstevel@tonic-gate /* 670*7c478bd9Sstevel@tonic-gate * May have added new intermediate lgroups, so need to add 671*7c478bd9Sstevel@tonic-gate * resources other than CPUs which are added below 672*7c478bd9Sstevel@tonic-gate */ 673*7c478bd9Sstevel@tonic-gate (void) lgrp_mnode_update(changed, NULL); 674*7c478bd9Sstevel@tonic-gate } else if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_CPU], 675*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_id)) { 676*7c478bd9Sstevel@tonic-gate int i; 677*7c478bd9Sstevel@tonic-gate 678*7c478bd9Sstevel@tonic-gate /* 679*7c478bd9Sstevel@tonic-gate * Update existing lgroup and lgroups containing it with CPU 680*7c478bd9Sstevel@tonic-gate * resource 681*7c478bd9Sstevel@tonic-gate */ 682*7c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 683*7c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 684*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 685*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 686*7c478bd9Sstevel@tonic-gate 687*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 688*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 689*7c478bd9Sstevel@tonic-gate !lgrp_rsets_member(lgrp->lgrp_set, lgrpid)) 690*7c478bd9Sstevel@tonic-gate continue; 691*7c478bd9Sstevel@tonic-gate 692*7c478bd9Sstevel@tonic-gate klgrpset_add(lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 693*7c478bd9Sstevel@tonic-gate } 694*7c478bd9Sstevel@tonic-gate } 695*7c478bd9Sstevel@tonic-gate 696*7c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 697*7c478bd9Sstevel@tonic-gate cp->cpu_lpl = &cp->cpu_part->cp_lgrploads[lgrpid]; 698*7c478bd9Sstevel@tonic-gate 699*7c478bd9Sstevel@tonic-gate /* 700*7c478bd9Sstevel@tonic-gate * For multi-lgroup systems, need to setup lpl for CPU0 or CPU0 will 701*7c478bd9Sstevel@tonic-gate * end up in lpl for lgroup 0 whether it is supposed to be in there or 702*7c478bd9Sstevel@tonic-gate * not since none of lgroup IDs in the lpl's have been set yet. 703*7c478bd9Sstevel@tonic-gate */ 704*7c478bd9Sstevel@tonic-gate if (first_cpu && nlgrpsmax > 1 && lgrpid != cp->cpu_lpl->lpl_lgrpid) 705*7c478bd9Sstevel@tonic-gate cp->cpu_lpl->lpl_lgrpid = lgrpid; 706*7c478bd9Sstevel@tonic-gate 707*7c478bd9Sstevel@tonic-gate /* 708*7c478bd9Sstevel@tonic-gate * link the CPU into the lgrp's CPU list 709*7c478bd9Sstevel@tonic-gate */ 710*7c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_cpucnt == 0) { 711*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = cp; 712*7c478bd9Sstevel@tonic-gate cp->cpu_next_lgrp = cp->cpu_prev_lgrp = cp; 713*7c478bd9Sstevel@tonic-gate } else { 714*7c478bd9Sstevel@tonic-gate cptr = my_lgrp->lgrp_cpu; 715*7c478bd9Sstevel@tonic-gate cp->cpu_next_lgrp = cptr; 716*7c478bd9Sstevel@tonic-gate cp->cpu_prev_lgrp = cptr->cpu_prev_lgrp; 717*7c478bd9Sstevel@tonic-gate cptr->cpu_prev_lgrp->cpu_next_lgrp = cp; 718*7c478bd9Sstevel@tonic-gate cptr->cpu_prev_lgrp = cp; 719*7c478bd9Sstevel@tonic-gate } 720*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpucnt++; 721*7c478bd9Sstevel@tonic-gate 722*7c478bd9Sstevel@tonic-gate /* 723*7c478bd9Sstevel@tonic-gate * Add this cpu's chip to the per lgroup list 724*7c478bd9Sstevel@tonic-gate * if necessary 725*7c478bd9Sstevel@tonic-gate */ 726*7c478bd9Sstevel@tonic-gate if (cp->cpu_chip->chip_lgrp == NULL) { 727*7c478bd9Sstevel@tonic-gate struct chip *lcpr; 728*7c478bd9Sstevel@tonic-gate 729*7c478bd9Sstevel@tonic-gate chp = cp->cpu_chip; 730*7c478bd9Sstevel@tonic-gate 731*7c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_chipcnt == 0) { 732*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chips = chp; 733*7c478bd9Sstevel@tonic-gate chp->chip_next_lgrp = 734*7c478bd9Sstevel@tonic-gate chp->chip_prev_lgrp = chp; 735*7c478bd9Sstevel@tonic-gate } else { 736*7c478bd9Sstevel@tonic-gate lcpr = my_lgrp->lgrp_chips; 737*7c478bd9Sstevel@tonic-gate chp->chip_next_lgrp = lcpr; 738*7c478bd9Sstevel@tonic-gate chp->chip_prev_lgrp = 739*7c478bd9Sstevel@tonic-gate lcpr->chip_prev_lgrp; 740*7c478bd9Sstevel@tonic-gate lcpr->chip_prev_lgrp->chip_next_lgrp = 741*7c478bd9Sstevel@tonic-gate chp; 742*7c478bd9Sstevel@tonic-gate lcpr->chip_prev_lgrp = chp; 743*7c478bd9Sstevel@tonic-gate } 744*7c478bd9Sstevel@tonic-gate chp->chip_lgrp = my_lgrp; 745*7c478bd9Sstevel@tonic-gate chp->chip_balance = chp->chip_next_lgrp; 746*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chipcnt++; 747*7c478bd9Sstevel@tonic-gate } 748*7c478bd9Sstevel@tonic-gate } 749*7c478bd9Sstevel@tonic-gate 750*7c478bd9Sstevel@tonic-gate lgrp_t * 751*7c478bd9Sstevel@tonic-gate lgrp_create(void) 752*7c478bd9Sstevel@tonic-gate { 753*7c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 754*7c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 755*7c478bd9Sstevel@tonic-gate int i; 756*7c478bd9Sstevel@tonic-gate 757*7c478bd9Sstevel@tonic-gate ASSERT(!lgrp_initialized || MUTEX_HELD(&cpu_lock)); 758*7c478bd9Sstevel@tonic-gate 759*7c478bd9Sstevel@tonic-gate /* 760*7c478bd9Sstevel@tonic-gate * Find an open slot in the lgroup table and recycle unused lgroup 761*7c478bd9Sstevel@tonic-gate * left there if any 762*7c478bd9Sstevel@tonic-gate */ 763*7c478bd9Sstevel@tonic-gate my_lgrp = NULL; 764*7c478bd9Sstevel@tonic-gate if (lgrp_alloc_hint == -1) 765*7c478bd9Sstevel@tonic-gate /* 766*7c478bd9Sstevel@tonic-gate * Allocate from end when hint not set yet because no lgroups 767*7c478bd9Sstevel@tonic-gate * have been deleted yet 768*7c478bd9Sstevel@tonic-gate */ 769*7c478bd9Sstevel@tonic-gate lgrpid = nlgrps++; 770*7c478bd9Sstevel@tonic-gate else { 771*7c478bd9Sstevel@tonic-gate /* 772*7c478bd9Sstevel@tonic-gate * Start looking for next open slot from hint and leave hint 773*7c478bd9Sstevel@tonic-gate * at slot allocated 774*7c478bd9Sstevel@tonic-gate */ 775*7c478bd9Sstevel@tonic-gate for (i = lgrp_alloc_hint; i < nlgrpsmax; i++) { 776*7c478bd9Sstevel@tonic-gate my_lgrp = lgrp_table[i]; 777*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(my_lgrp)) { 778*7c478bd9Sstevel@tonic-gate lgrpid = i; 779*7c478bd9Sstevel@tonic-gate nlgrps++; 780*7c478bd9Sstevel@tonic-gate break; 781*7c478bd9Sstevel@tonic-gate } 782*7c478bd9Sstevel@tonic-gate } 783*7c478bd9Sstevel@tonic-gate lgrp_alloc_hint = lgrpid; 784*7c478bd9Sstevel@tonic-gate } 785*7c478bd9Sstevel@tonic-gate 786*7c478bd9Sstevel@tonic-gate /* 787*7c478bd9Sstevel@tonic-gate * Keep track of max lgroup ID allocated so far to cut down on searches 788*7c478bd9Sstevel@tonic-gate */ 789*7c478bd9Sstevel@tonic-gate if (lgrpid > lgrp_alloc_max) 790*7c478bd9Sstevel@tonic-gate lgrp_alloc_max = lgrpid; 791*7c478bd9Sstevel@tonic-gate 792*7c478bd9Sstevel@tonic-gate /* 793*7c478bd9Sstevel@tonic-gate * Need to allocate new lgroup if next open slot didn't have one 794*7c478bd9Sstevel@tonic-gate * for recycling 795*7c478bd9Sstevel@tonic-gate */ 796*7c478bd9Sstevel@tonic-gate if (my_lgrp == NULL) 797*7c478bd9Sstevel@tonic-gate my_lgrp = lgrp_plat_alloc(lgrpid); 798*7c478bd9Sstevel@tonic-gate 799*7c478bd9Sstevel@tonic-gate if (nlgrps > nlgrpsmax || my_lgrp == NULL) 800*7c478bd9Sstevel@tonic-gate panic("Too many lgrps for platform (%d)", nlgrps); 801*7c478bd9Sstevel@tonic-gate 802*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_id = lgrpid; 803*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_latency = 0; 804*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_plathand = LGRP_NULL_HANDLE; 805*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_parent = NULL; 806*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_childcnt = 0; 807*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_mnodes = (mnodeset_t)0; 808*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_nmnodes = 0; 809*7c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_children); 810*7c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_leaves); 811*7c478bd9Sstevel@tonic-gate for (i = 0; i < LGRP_RSRC_COUNT; i++) 812*7c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_set[i]); 813*7c478bd9Sstevel@tonic-gate 814*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = NULL; 815*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpucnt = 0; 816*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chips = NULL; 817*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chipcnt = 0; 818*7c478bd9Sstevel@tonic-gate 819*7c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_kstat != NULL) 820*7c478bd9Sstevel@tonic-gate lgrp_kstat_reset(lgrpid); 821*7c478bd9Sstevel@tonic-gate 822*7c478bd9Sstevel@tonic-gate lgrp_table[my_lgrp->lgrp_id] = my_lgrp; 823*7c478bd9Sstevel@tonic-gate 824*7c478bd9Sstevel@tonic-gate return (my_lgrp); 825*7c478bd9Sstevel@tonic-gate } 826*7c478bd9Sstevel@tonic-gate 827*7c478bd9Sstevel@tonic-gate void 828*7c478bd9Sstevel@tonic-gate lgrp_destroy(lgrp_t *lgrp) 829*7c478bd9Sstevel@tonic-gate { 830*7c478bd9Sstevel@tonic-gate int i; 831*7c478bd9Sstevel@tonic-gate 832*7c478bd9Sstevel@tonic-gate /* 833*7c478bd9Sstevel@tonic-gate * Unless this lgroup is being destroyed on behalf of 834*7c478bd9Sstevel@tonic-gate * the boot CPU, cpu_lock must be held 835*7c478bd9Sstevel@tonic-gate */ 836*7c478bd9Sstevel@tonic-gate ASSERT(!lgrp_initialized || MUTEX_HELD(&cpu_lock)); 837*7c478bd9Sstevel@tonic-gate 838*7c478bd9Sstevel@tonic-gate if (nlgrps == 1) 839*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "Can't destroy only lgroup!"); 840*7c478bd9Sstevel@tonic-gate 841*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) 842*7c478bd9Sstevel@tonic-gate return; 843*7c478bd9Sstevel@tonic-gate 844*7c478bd9Sstevel@tonic-gate /* 845*7c478bd9Sstevel@tonic-gate * Set hint to lgroup being deleted and try to keep lower numbered 846*7c478bd9Sstevel@tonic-gate * hints to facilitate finding empty slots 847*7c478bd9Sstevel@tonic-gate */ 848*7c478bd9Sstevel@tonic-gate if (lgrp_alloc_hint == -1 || lgrp->lgrp_id < lgrp_alloc_hint) 849*7c478bd9Sstevel@tonic-gate lgrp_alloc_hint = lgrp->lgrp_id; 850*7c478bd9Sstevel@tonic-gate 851*7c478bd9Sstevel@tonic-gate /* 852*7c478bd9Sstevel@tonic-gate * Mark this lgroup to be recycled by setting its lgroup ID to 853*7c478bd9Sstevel@tonic-gate * LGRP_NONE and clear relevant fields 854*7c478bd9Sstevel@tonic-gate */ 855*7c478bd9Sstevel@tonic-gate lgrp->lgrp_id = LGRP_NONE; 856*7c478bd9Sstevel@tonic-gate lgrp->lgrp_latency = 0; 857*7c478bd9Sstevel@tonic-gate lgrp->lgrp_plathand = LGRP_NULL_HANDLE; 858*7c478bd9Sstevel@tonic-gate lgrp->lgrp_parent = NULL; 859*7c478bd9Sstevel@tonic-gate lgrp->lgrp_childcnt = 0; 860*7c478bd9Sstevel@tonic-gate 861*7c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp->lgrp_children); 862*7c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp->lgrp_leaves); 863*7c478bd9Sstevel@tonic-gate for (i = 0; i < LGRP_RSRC_COUNT; i++) 864*7c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp->lgrp_set[i]); 865*7c478bd9Sstevel@tonic-gate 866*7c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes = (mnodeset_t)0; 867*7c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes = 0; 868*7c478bd9Sstevel@tonic-gate 869*7c478bd9Sstevel@tonic-gate lgrp->lgrp_cpu = NULL; 870*7c478bd9Sstevel@tonic-gate lgrp->lgrp_cpucnt = 0; 871*7c478bd9Sstevel@tonic-gate lgrp->lgrp_chipcnt = 0; 872*7c478bd9Sstevel@tonic-gate lgrp->lgrp_chips = NULL; 873*7c478bd9Sstevel@tonic-gate 874*7c478bd9Sstevel@tonic-gate nlgrps--; 875*7c478bd9Sstevel@tonic-gate } 876*7c478bd9Sstevel@tonic-gate 877*7c478bd9Sstevel@tonic-gate /* 878*7c478bd9Sstevel@tonic-gate * Initialize kstat data. Called from lgrp intialization code. 879*7c478bd9Sstevel@tonic-gate */ 880*7c478bd9Sstevel@tonic-gate static void 881*7c478bd9Sstevel@tonic-gate lgrp_kstat_init(void) 882*7c478bd9Sstevel@tonic-gate { 883*7c478bd9Sstevel@tonic-gate lgrp_stat_t stat; 884*7c478bd9Sstevel@tonic-gate 885*7c478bd9Sstevel@tonic-gate mutex_init(&lgrp_kstat_mutex, NULL, MUTEX_DEFAULT, NULL); 886*7c478bd9Sstevel@tonic-gate 887*7c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_STATS; stat++) 888*7c478bd9Sstevel@tonic-gate kstat_named_init(&lgrp_kstat_data[stat], 889*7c478bd9Sstevel@tonic-gate lgrp_kstat_names[stat], KSTAT_DATA_INT64); 890*7c478bd9Sstevel@tonic-gate } 891*7c478bd9Sstevel@tonic-gate 892*7c478bd9Sstevel@tonic-gate /* 893*7c478bd9Sstevel@tonic-gate * initialize an lgrp's kstats if needed 894*7c478bd9Sstevel@tonic-gate * called with cpu_lock held but not with cpus paused. 895*7c478bd9Sstevel@tonic-gate * we don't tear these down now because we don't know about 896*7c478bd9Sstevel@tonic-gate * memory leaving the lgrp yet... 897*7c478bd9Sstevel@tonic-gate */ 898*7c478bd9Sstevel@tonic-gate 899*7c478bd9Sstevel@tonic-gate void 900*7c478bd9Sstevel@tonic-gate lgrp_kstat_create(cpu_t *cp) 901*7c478bd9Sstevel@tonic-gate { 902*7c478bd9Sstevel@tonic-gate kstat_t *lgrp_kstat; 903*7c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 904*7c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 905*7c478bd9Sstevel@tonic-gate 906*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 907*7c478bd9Sstevel@tonic-gate 908*7c478bd9Sstevel@tonic-gate lgrpid = cp->cpu_lpl->lpl_lgrpid; 909*7c478bd9Sstevel@tonic-gate my_lgrp = lgrp_table[lgrpid]; 910*7c478bd9Sstevel@tonic-gate 911*7c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_kstat != NULL) 912*7c478bd9Sstevel@tonic-gate return; /* already initialized */ 913*7c478bd9Sstevel@tonic-gate 914*7c478bd9Sstevel@tonic-gate lgrp_kstat = kstat_create("lgrp", lgrpid, NULL, "misc", 915*7c478bd9Sstevel@tonic-gate KSTAT_TYPE_NAMED, LGRP_NUM_STATS, 916*7c478bd9Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE); 917*7c478bd9Sstevel@tonic-gate 918*7c478bd9Sstevel@tonic-gate if (lgrp_kstat != NULL) { 919*7c478bd9Sstevel@tonic-gate lgrp_kstat->ks_lock = &lgrp_kstat_mutex; 920*7c478bd9Sstevel@tonic-gate lgrp_kstat->ks_private = my_lgrp; 921*7c478bd9Sstevel@tonic-gate lgrp_kstat->ks_data = &lgrp_kstat_data; 922*7c478bd9Sstevel@tonic-gate lgrp_kstat->ks_update = lgrp_kstat_extract; 923*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_kstat = lgrp_kstat; 924*7c478bd9Sstevel@tonic-gate kstat_install(lgrp_kstat); 925*7c478bd9Sstevel@tonic-gate } 926*7c478bd9Sstevel@tonic-gate } 927*7c478bd9Sstevel@tonic-gate 928*7c478bd9Sstevel@tonic-gate /* 929*7c478bd9Sstevel@tonic-gate * this will do something when we manage to remove now unused lgrps 930*7c478bd9Sstevel@tonic-gate */ 931*7c478bd9Sstevel@tonic-gate 932*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 933*7c478bd9Sstevel@tonic-gate void 934*7c478bd9Sstevel@tonic-gate lgrp_kstat_destroy(cpu_t *cp) 935*7c478bd9Sstevel@tonic-gate { 936*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 937*7c478bd9Sstevel@tonic-gate } 938*7c478bd9Sstevel@tonic-gate 939*7c478bd9Sstevel@tonic-gate /* 940*7c478bd9Sstevel@tonic-gate * Called when a CPU is off-lined. 941*7c478bd9Sstevel@tonic-gate */ 942*7c478bd9Sstevel@tonic-gate static void 943*7c478bd9Sstevel@tonic-gate lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid) 944*7c478bd9Sstevel@tonic-gate { 945*7c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 946*7c478bd9Sstevel@tonic-gate struct cpu *prev; 947*7c478bd9Sstevel@tonic-gate struct cpu *next; 948*7c478bd9Sstevel@tonic-gate chip_t *chp; 949*7c478bd9Sstevel@tonic-gate 950*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized); 951*7c478bd9Sstevel@tonic-gate 952*7c478bd9Sstevel@tonic-gate prev = cp->cpu_prev_lgrp; 953*7c478bd9Sstevel@tonic-gate next = cp->cpu_next_lgrp; 954*7c478bd9Sstevel@tonic-gate 955*7c478bd9Sstevel@tonic-gate prev->cpu_next_lgrp = next; 956*7c478bd9Sstevel@tonic-gate next->cpu_prev_lgrp = prev; 957*7c478bd9Sstevel@tonic-gate 958*7c478bd9Sstevel@tonic-gate /* 959*7c478bd9Sstevel@tonic-gate * just because I'm paranoid doesn't mean... 960*7c478bd9Sstevel@tonic-gate */ 961*7c478bd9Sstevel@tonic-gate 962*7c478bd9Sstevel@tonic-gate cp->cpu_next_lgrp = cp->cpu_prev_lgrp = NULL; 963*7c478bd9Sstevel@tonic-gate 964*7c478bd9Sstevel@tonic-gate my_lgrp = lgrp_table[lgrpid]; 965*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpucnt--; 966*7c478bd9Sstevel@tonic-gate 967*7c478bd9Sstevel@tonic-gate /* 968*7c478bd9Sstevel@tonic-gate * If the last CPU on it's chip is being offlined 969*7c478bd9Sstevel@tonic-gate * then remove this chip from the per lgroup list. 970*7c478bd9Sstevel@tonic-gate * 971*7c478bd9Sstevel@tonic-gate * This is also done for the boot CPU when it needs 972*7c478bd9Sstevel@tonic-gate * to move between lgroups as a consequence of 973*7c478bd9Sstevel@tonic-gate * null proc lpa. 974*7c478bd9Sstevel@tonic-gate */ 975*7c478bd9Sstevel@tonic-gate chp = cp->cpu_chip; 976*7c478bd9Sstevel@tonic-gate if (chp->chip_ncpu == 0 || !lgrp_initialized) { 977*7c478bd9Sstevel@tonic-gate 978*7c478bd9Sstevel@tonic-gate chip_t *chpp; 979*7c478bd9Sstevel@tonic-gate 980*7c478bd9Sstevel@tonic-gate if (--my_lgrp->lgrp_chipcnt == 0) 981*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chips = NULL; 982*7c478bd9Sstevel@tonic-gate else if (my_lgrp->lgrp_chips == chp) 983*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_chips = chp->chip_next_lgrp; 984*7c478bd9Sstevel@tonic-gate 985*7c478bd9Sstevel@tonic-gate /* 986*7c478bd9Sstevel@tonic-gate * Walk this lgroup's chip list looking for chips that 987*7c478bd9Sstevel@tonic-gate * may try to balance against the one that's leaving 988*7c478bd9Sstevel@tonic-gate */ 989*7c478bd9Sstevel@tonic-gate for (chpp = chp->chip_next_lgrp; chpp != chp; 990*7c478bd9Sstevel@tonic-gate chpp = chpp->chip_next_lgrp) { 991*7c478bd9Sstevel@tonic-gate if (chpp->chip_balance == chp) 992*7c478bd9Sstevel@tonic-gate chpp->chip_balance = chp->chip_next_lgrp; 993*7c478bd9Sstevel@tonic-gate } 994*7c478bd9Sstevel@tonic-gate 995*7c478bd9Sstevel@tonic-gate chp->chip_prev_lgrp->chip_next_lgrp = chp->chip_next_lgrp; 996*7c478bd9Sstevel@tonic-gate chp->chip_next_lgrp->chip_prev_lgrp = chp->chip_prev_lgrp; 997*7c478bd9Sstevel@tonic-gate 998*7c478bd9Sstevel@tonic-gate chp->chip_next_lgrp = chp->chip_prev_lgrp = NULL; 999*7c478bd9Sstevel@tonic-gate chp->chip_lgrp = NULL; 1000*7c478bd9Sstevel@tonic-gate chp->chip_balance = NULL; 1001*7c478bd9Sstevel@tonic-gate } 1002*7c478bd9Sstevel@tonic-gate 1003*7c478bd9Sstevel@tonic-gate /* 1004*7c478bd9Sstevel@tonic-gate * Removing last CPU in lgroup, so update lgroup topology 1005*7c478bd9Sstevel@tonic-gate */ 1006*7c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_cpucnt == 0) { 1007*7c478bd9Sstevel@tonic-gate klgrpset_t changed; 1008*7c478bd9Sstevel@tonic-gate int count; 1009*7c478bd9Sstevel@tonic-gate int i; 1010*7c478bd9Sstevel@tonic-gate 1011*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = NULL; 1012*7c478bd9Sstevel@tonic-gate 1013*7c478bd9Sstevel@tonic-gate /* 1014*7c478bd9Sstevel@tonic-gate * Remove this lgroup from its lgroup CPU resources and remove 1015*7c478bd9Sstevel@tonic-gate * lgroup from lgroup topology if it doesn't have any more 1016*7c478bd9Sstevel@tonic-gate * resources in it now 1017*7c478bd9Sstevel@tonic-gate */ 1018*7c478bd9Sstevel@tonic-gate klgrpset_del(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 1019*7c478bd9Sstevel@tonic-gate if (lgrp_rsets_empty(my_lgrp->lgrp_set)) { 1020*7c478bd9Sstevel@tonic-gate count = 0; 1021*7c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 1022*7c478bd9Sstevel@tonic-gate count += lgrp_leaf_delete(my_lgrp, lgrp_table, 1023*7c478bd9Sstevel@tonic-gate lgrp_alloc_max + 1, &changed); 1024*7c478bd9Sstevel@tonic-gate return; 1025*7c478bd9Sstevel@tonic-gate } 1026*7c478bd9Sstevel@tonic-gate 1027*7c478bd9Sstevel@tonic-gate /* 1028*7c478bd9Sstevel@tonic-gate * This lgroup isn't empty, so just remove it from CPU 1029*7c478bd9Sstevel@tonic-gate * resources of any lgroups that contain it as such 1030*7c478bd9Sstevel@tonic-gate */ 1031*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 1032*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 1033*7c478bd9Sstevel@tonic-gate 1034*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 1035*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 1036*7c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_CPU], 1037*7c478bd9Sstevel@tonic-gate lgrpid)) 1038*7c478bd9Sstevel@tonic-gate continue; 1039*7c478bd9Sstevel@tonic-gate 1040*7c478bd9Sstevel@tonic-gate klgrpset_del(lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid); 1041*7c478bd9Sstevel@tonic-gate } 1042*7c478bd9Sstevel@tonic-gate return; 1043*7c478bd9Sstevel@tonic-gate } 1044*7c478bd9Sstevel@tonic-gate 1045*7c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_cpu == cp) 1046*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = next; 1047*7c478bd9Sstevel@tonic-gate 1048*7c478bd9Sstevel@tonic-gate } 1049*7c478bd9Sstevel@tonic-gate 1050*7c478bd9Sstevel@tonic-gate /* 1051*7c478bd9Sstevel@tonic-gate * Update memory nodes in target lgroups and return ones that get changed 1052*7c478bd9Sstevel@tonic-gate */ 1053*7c478bd9Sstevel@tonic-gate int 1054*7c478bd9Sstevel@tonic-gate lgrp_mnode_update(klgrpset_t target, klgrpset_t *changed) 1055*7c478bd9Sstevel@tonic-gate { 1056*7c478bd9Sstevel@tonic-gate int count; 1057*7c478bd9Sstevel@tonic-gate int i; 1058*7c478bd9Sstevel@tonic-gate int j; 1059*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 1060*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_rsrc; 1061*7c478bd9Sstevel@tonic-gate 1062*7c478bd9Sstevel@tonic-gate count = 0; 1063*7c478bd9Sstevel@tonic-gate if (changed) 1064*7c478bd9Sstevel@tonic-gate klgrpset_clear(*changed); 1065*7c478bd9Sstevel@tonic-gate 1066*7c478bd9Sstevel@tonic-gate if (klgrpset_isempty(target)) 1067*7c478bd9Sstevel@tonic-gate return (0); 1068*7c478bd9Sstevel@tonic-gate 1069*7c478bd9Sstevel@tonic-gate /* 1070*7c478bd9Sstevel@tonic-gate * Find each lgroup in target lgroups 1071*7c478bd9Sstevel@tonic-gate */ 1072*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 1073*7c478bd9Sstevel@tonic-gate /* 1074*7c478bd9Sstevel@tonic-gate * Skip any lgroups that don't exist or aren't in target group 1075*7c478bd9Sstevel@tonic-gate */ 1076*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 1077*7c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(target, i) || !LGRP_EXISTS(lgrp)) { 1078*7c478bd9Sstevel@tonic-gate continue; 1079*7c478bd9Sstevel@tonic-gate } 1080*7c478bd9Sstevel@tonic-gate 1081*7c478bd9Sstevel@tonic-gate /* 1082*7c478bd9Sstevel@tonic-gate * Initialize memnodes for intermediate lgroups to 0 1083*7c478bd9Sstevel@tonic-gate * and update them from scratch since they may have completely 1084*7c478bd9Sstevel@tonic-gate * changed 1085*7c478bd9Sstevel@tonic-gate */ 1086*7c478bd9Sstevel@tonic-gate if (lgrp->lgrp_childcnt && lgrp != lgrp_root) { 1087*7c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes = (mnodeset_t)0; 1088*7c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes = 0; 1089*7c478bd9Sstevel@tonic-gate } 1090*7c478bd9Sstevel@tonic-gate 1091*7c478bd9Sstevel@tonic-gate /* 1092*7c478bd9Sstevel@tonic-gate * Update memory nodes of of target lgroup with memory nodes 1093*7c478bd9Sstevel@tonic-gate * from each lgroup in its lgroup memory resource set 1094*7c478bd9Sstevel@tonic-gate */ 1095*7c478bd9Sstevel@tonic-gate for (j = 0; j <= lgrp_alloc_max; j++) { 1096*7c478bd9Sstevel@tonic-gate int k; 1097*7c478bd9Sstevel@tonic-gate 1098*7c478bd9Sstevel@tonic-gate /* 1099*7c478bd9Sstevel@tonic-gate * Skip any lgroups that don't exist or aren't in 1100*7c478bd9Sstevel@tonic-gate * memory resources of target lgroup 1101*7c478bd9Sstevel@tonic-gate */ 1102*7c478bd9Sstevel@tonic-gate lgrp_rsrc = lgrp_table[j]; 1103*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_rsrc) || 1104*7c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], 1105*7c478bd9Sstevel@tonic-gate j)) 1106*7c478bd9Sstevel@tonic-gate continue; 1107*7c478bd9Sstevel@tonic-gate 1108*7c478bd9Sstevel@tonic-gate /* 1109*7c478bd9Sstevel@tonic-gate * Update target lgroup's memnodes to include memnodes 1110*7c478bd9Sstevel@tonic-gate * of this lgroup 1111*7c478bd9Sstevel@tonic-gate */ 1112*7c478bd9Sstevel@tonic-gate for (k = 0; k < sizeof (mnodeset_t) * NBBY; k++) { 1113*7c478bd9Sstevel@tonic-gate mnodeset_t mnode_mask; 1114*7c478bd9Sstevel@tonic-gate 1115*7c478bd9Sstevel@tonic-gate mnode_mask = (mnodeset_t)1 << k; 1116*7c478bd9Sstevel@tonic-gate if ((lgrp_rsrc->lgrp_mnodes & mnode_mask) && 1117*7c478bd9Sstevel@tonic-gate !(lgrp->lgrp_mnodes & mnode_mask)) { 1118*7c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes |= mnode_mask; 1119*7c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes++; 1120*7c478bd9Sstevel@tonic-gate } 1121*7c478bd9Sstevel@tonic-gate } 1122*7c478bd9Sstevel@tonic-gate count++; 1123*7c478bd9Sstevel@tonic-gate if (changed) 1124*7c478bd9Sstevel@tonic-gate klgrpset_add(*changed, lgrp->lgrp_id); 1125*7c478bd9Sstevel@tonic-gate } 1126*7c478bd9Sstevel@tonic-gate } 1127*7c478bd9Sstevel@tonic-gate 1128*7c478bd9Sstevel@tonic-gate return (count); 1129*7c478bd9Sstevel@tonic-gate } 1130*7c478bd9Sstevel@tonic-gate 1131*7c478bd9Sstevel@tonic-gate /* 1132*7c478bd9Sstevel@tonic-gate * Memory copy-rename. Called when the "mnode" containing the kernel cage memory 1133*7c478bd9Sstevel@tonic-gate * is moved from one board to another. The "from" and "to" arguments specify the 1134*7c478bd9Sstevel@tonic-gate * source and the destination of the move. 1135*7c478bd9Sstevel@tonic-gate * 1136*7c478bd9Sstevel@tonic-gate * See plat_lgrp_config() for a detailed description of the copy-rename 1137*7c478bd9Sstevel@tonic-gate * semantics. 1138*7c478bd9Sstevel@tonic-gate * 1139*7c478bd9Sstevel@tonic-gate * The lgrp_mem_rename() is called by the platform copy-rename code to update 1140*7c478bd9Sstevel@tonic-gate * the lgroup topology which is changing as memory moves from one lgroup to 1141*7c478bd9Sstevel@tonic-gate * another. It removes the mnode from the source lgroup and re-inserts it in the 1142*7c478bd9Sstevel@tonic-gate * target lgroup. 1143*7c478bd9Sstevel@tonic-gate * 1144*7c478bd9Sstevel@tonic-gate * The lgrp_mem_rename() function passes a flag to lgrp_mem_init() and 1145*7c478bd9Sstevel@tonic-gate * lgrp_mem_fini() telling that the insertion and deleteion are part of a DR 1146*7c478bd9Sstevel@tonic-gate * copy-rename operation. 1147*7c478bd9Sstevel@tonic-gate * 1148*7c478bd9Sstevel@tonic-gate * There is one case which requires special handling. If the system contains 1149*7c478bd9Sstevel@tonic-gate * only two boards (mnodes), the lgrp_mem_fini() removes the only mnode from the 1150*7c478bd9Sstevel@tonic-gate * lgroup hierarchy. This mnode is soon re-inserted back in the hierarchy by 1151*7c478bd9Sstevel@tonic-gate * lgrp_mem_init), but there is a window when the system has no memory in the 1152*7c478bd9Sstevel@tonic-gate * lgroup hierarchy. If another thread tries to allocate memory during this 1153*7c478bd9Sstevel@tonic-gate * window, the allocation will fail, although the system has physical memory. 1154*7c478bd9Sstevel@tonic-gate * This may cause a system panic or a deadlock (some sleeping memory allocations 1155*7c478bd9Sstevel@tonic-gate * happen with cpu_lock held which prevents lgrp_mem_init() from re-inserting 1156*7c478bd9Sstevel@tonic-gate * the mnode back). 1157*7c478bd9Sstevel@tonic-gate * 1158*7c478bd9Sstevel@tonic-gate * The lgrp_memnode_choose() function walks the lgroup hierarchy looking for the 1159*7c478bd9Sstevel@tonic-gate * lgrp with non-empty lgrp_mnodes. To deal with the special case above, 1160*7c478bd9Sstevel@tonic-gate * lgrp_mem_fini() does not remove the last mnode from the lroot->lgrp_mnodes, 1161*7c478bd9Sstevel@tonic-gate * but it updates the rest of the lgroup topology as if the mnode was actually 1162*7c478bd9Sstevel@tonic-gate * removed. The lgrp_mem_init() function recognizes that the mnode being 1163*7c478bd9Sstevel@tonic-gate * inserted represents such a special case and updates the topology 1164*7c478bd9Sstevel@tonic-gate * appropriately. 1165*7c478bd9Sstevel@tonic-gate */ 1166*7c478bd9Sstevel@tonic-gate void 1167*7c478bd9Sstevel@tonic-gate lgrp_mem_rename(int mnode, lgrp_handle_t from, lgrp_handle_t to) 1168*7c478bd9Sstevel@tonic-gate { 1169*7c478bd9Sstevel@tonic-gate /* 1170*7c478bd9Sstevel@tonic-gate * Remove the memory from the source node and add it to the destination 1171*7c478bd9Sstevel@tonic-gate * node. 1172*7c478bd9Sstevel@tonic-gate */ 1173*7c478bd9Sstevel@tonic-gate lgrp_mem_fini(mnode, from, B_TRUE); 1174*7c478bd9Sstevel@tonic-gate lgrp_mem_init(mnode, to, B_TRUE); 1175*7c478bd9Sstevel@tonic-gate } 1176*7c478bd9Sstevel@tonic-gate 1177*7c478bd9Sstevel@tonic-gate /* 1178*7c478bd9Sstevel@tonic-gate * Called to indicate that the lgrp with platform handle "hand" now 1179*7c478bd9Sstevel@tonic-gate * contains the memory identified by "mnode". 1180*7c478bd9Sstevel@tonic-gate * 1181*7c478bd9Sstevel@tonic-gate * LOCKING for this routine is a bit tricky. Usually it is called without 1182*7c478bd9Sstevel@tonic-gate * cpu_lock and it must must grab cpu_lock here to prevent racing with other 1183*7c478bd9Sstevel@tonic-gate * callers. During DR of the board containing the caged memory it may be called 1184*7c478bd9Sstevel@tonic-gate * with cpu_lock already held and CPUs paused. 1185*7c478bd9Sstevel@tonic-gate * 1186*7c478bd9Sstevel@tonic-gate * If the insertion is part of the DR copy-rename and the inserted mnode (and 1187*7c478bd9Sstevel@tonic-gate * only this mnode) is already present in the lgrp_root->lgrp_mnodes set, we are 1188*7c478bd9Sstevel@tonic-gate * dealing with the special case of DR copy-rename described in 1189*7c478bd9Sstevel@tonic-gate * lgrp_mem_rename(). 1190*7c478bd9Sstevel@tonic-gate */ 1191*7c478bd9Sstevel@tonic-gate void 1192*7c478bd9Sstevel@tonic-gate lgrp_mem_init(int mnode, lgrp_handle_t hand, boolean_t is_copy_rename) 1193*7c478bd9Sstevel@tonic-gate { 1194*7c478bd9Sstevel@tonic-gate klgrpset_t changed; 1195*7c478bd9Sstevel@tonic-gate int count; 1196*7c478bd9Sstevel@tonic-gate int i; 1197*7c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 1198*7c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 1199*7c478bd9Sstevel@tonic-gate mnodeset_t mnodes_mask = ((mnodeset_t)1 << mnode); 1200*7c478bd9Sstevel@tonic-gate boolean_t drop_lock = B_FALSE; 1201*7c478bd9Sstevel@tonic-gate boolean_t need_synch = B_FALSE; 1202*7c478bd9Sstevel@tonic-gate 1203*7c478bd9Sstevel@tonic-gate /* 1204*7c478bd9Sstevel@tonic-gate * Grab CPU lock (if we haven't already) 1205*7c478bd9Sstevel@tonic-gate */ 1206*7c478bd9Sstevel@tonic-gate if (!MUTEX_HELD(&cpu_lock)) { 1207*7c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 1208*7c478bd9Sstevel@tonic-gate drop_lock = B_TRUE; 1209*7c478bd9Sstevel@tonic-gate } 1210*7c478bd9Sstevel@tonic-gate 1211*7c478bd9Sstevel@tonic-gate /* 1212*7c478bd9Sstevel@tonic-gate * This routine may be called from a context where we already 1213*7c478bd9Sstevel@tonic-gate * hold cpu_lock, and have already paused cpus. 1214*7c478bd9Sstevel@tonic-gate */ 1215*7c478bd9Sstevel@tonic-gate if (!cpus_paused()) 1216*7c478bd9Sstevel@tonic-gate need_synch = B_TRUE; 1217*7c478bd9Sstevel@tonic-gate 1218*7c478bd9Sstevel@tonic-gate /* 1219*7c478bd9Sstevel@tonic-gate * Check if this mnode is already configured and return immediately if 1220*7c478bd9Sstevel@tonic-gate * it is. 1221*7c478bd9Sstevel@tonic-gate * 1222*7c478bd9Sstevel@tonic-gate * NOTE: in special case of copy-rename of the only remaining mnode, 1223*7c478bd9Sstevel@tonic-gate * lgrp_mem_fini() refuses to remove the last mnode from the root, so we 1224*7c478bd9Sstevel@tonic-gate * recognize this case and continue as usual, but skip the update to 1225*7c478bd9Sstevel@tonic-gate * the lgrp_mnodes and the lgrp_nmnodes. This restores the inconsistency 1226*7c478bd9Sstevel@tonic-gate * in topology, temporarily introduced by lgrp_mem_fini(). 1227*7c478bd9Sstevel@tonic-gate */ 1228*7c478bd9Sstevel@tonic-gate if (! (is_copy_rename && (lgrp_root->lgrp_mnodes == mnodes_mask)) && 1229*7c478bd9Sstevel@tonic-gate lgrp_root->lgrp_mnodes & mnodes_mask) { 1230*7c478bd9Sstevel@tonic-gate if (drop_lock) 1231*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 1232*7c478bd9Sstevel@tonic-gate return; 1233*7c478bd9Sstevel@tonic-gate } 1234*7c478bd9Sstevel@tonic-gate 1235*7c478bd9Sstevel@tonic-gate /* 1236*7c478bd9Sstevel@tonic-gate * Update lgroup topology with new memory resources, keeping track of 1237*7c478bd9Sstevel@tonic-gate * which lgroups change 1238*7c478bd9Sstevel@tonic-gate */ 1239*7c478bd9Sstevel@tonic-gate count = 0; 1240*7c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 1241*7c478bd9Sstevel@tonic-gate my_lgrp = lgrp_hand_to_lgrp(hand); 1242*7c478bd9Sstevel@tonic-gate if (my_lgrp == NULL) { 1243*7c478bd9Sstevel@tonic-gate /* new lgrp */ 1244*7c478bd9Sstevel@tonic-gate my_lgrp = lgrp_create(); 1245*7c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 1246*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_plathand = hand; 1247*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_latency = lgrp_plat_latency(hand, hand); 1248*7c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_leaves, lgrpid); 1249*7c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid); 1250*7c478bd9Sstevel@tonic-gate 1251*7c478bd9Sstevel@tonic-gate if (need_synch) 1252*7c478bd9Sstevel@tonic-gate pause_cpus(NULL); 1253*7c478bd9Sstevel@tonic-gate count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1, 1254*7c478bd9Sstevel@tonic-gate &changed); 1255*7c478bd9Sstevel@tonic-gate if (need_synch) 1256*7c478bd9Sstevel@tonic-gate start_cpus(); 1257*7c478bd9Sstevel@tonic-gate } else if (my_lgrp->lgrp_latency == 0 && lgrp_plat_latency(hand, hand) 1258*7c478bd9Sstevel@tonic-gate > 0) { 1259*7c478bd9Sstevel@tonic-gate /* 1260*7c478bd9Sstevel@tonic-gate * Leaf lgroup was created, but latency wasn't available 1261*7c478bd9Sstevel@tonic-gate * then. So, set latency for it and fill in rest of lgroup 1262*7c478bd9Sstevel@tonic-gate * topology now that we know how far it is from other leaf 1263*7c478bd9Sstevel@tonic-gate * lgroups. 1264*7c478bd9Sstevel@tonic-gate */ 1265*7c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 1266*7c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 1267*7c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_MEM], 1268*7c478bd9Sstevel@tonic-gate lgrpid)) 1269*7c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid); 1270*7c478bd9Sstevel@tonic-gate if (need_synch) 1271*7c478bd9Sstevel@tonic-gate pause_cpus(NULL); 1272*7c478bd9Sstevel@tonic-gate count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1, 1273*7c478bd9Sstevel@tonic-gate &changed); 1274*7c478bd9Sstevel@tonic-gate if (need_synch) 1275*7c478bd9Sstevel@tonic-gate start_cpus(); 1276*7c478bd9Sstevel@tonic-gate } else if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_MEM], 1277*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_id)) { 1278*7c478bd9Sstevel@tonic-gate klgrpset_add(changed, lgrpid); 1279*7c478bd9Sstevel@tonic-gate count = 1; 1280*7c478bd9Sstevel@tonic-gate 1281*7c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 1282*7c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid); 1283*7c478bd9Sstevel@tonic-gate klgrpset_add(changed, lgrpid); 1284*7c478bd9Sstevel@tonic-gate count++; 1285*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 1286*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 1287*7c478bd9Sstevel@tonic-gate 1288*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 1289*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 1290*7c478bd9Sstevel@tonic-gate !lgrp_rsets_member(lgrp->lgrp_set, lgrpid)) 1291*7c478bd9Sstevel@tonic-gate continue; 1292*7c478bd9Sstevel@tonic-gate 1293*7c478bd9Sstevel@tonic-gate klgrpset_add(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid); 1294*7c478bd9Sstevel@tonic-gate klgrpset_add(changed, lgrp->lgrp_id); 1295*7c478bd9Sstevel@tonic-gate count++; 1296*7c478bd9Sstevel@tonic-gate } 1297*7c478bd9Sstevel@tonic-gate } 1298*7c478bd9Sstevel@tonic-gate 1299*7c478bd9Sstevel@tonic-gate /* 1300*7c478bd9Sstevel@tonic-gate * Add memory node to lgroup and remove lgroup from ones that need 1301*7c478bd9Sstevel@tonic-gate * to be updated 1302*7c478bd9Sstevel@tonic-gate */ 1303*7c478bd9Sstevel@tonic-gate if (!(my_lgrp->lgrp_mnodes & mnodes_mask)) { 1304*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_mnodes |= mnodes_mask; 1305*7c478bd9Sstevel@tonic-gate my_lgrp->lgrp_nmnodes++; 1306*7c478bd9Sstevel@tonic-gate } 1307*7c478bd9Sstevel@tonic-gate klgrpset_del(changed, lgrpid); 1308*7c478bd9Sstevel@tonic-gate 1309*7c478bd9Sstevel@tonic-gate /* 1310*7c478bd9Sstevel@tonic-gate * Update memory node information for all lgroups that changed and 1311*7c478bd9Sstevel@tonic-gate * contain new memory node as a resource 1312*7c478bd9Sstevel@tonic-gate */ 1313*7c478bd9Sstevel@tonic-gate if (count) 1314*7c478bd9Sstevel@tonic-gate (void) lgrp_mnode_update(changed, NULL); 1315*7c478bd9Sstevel@tonic-gate 1316*7c478bd9Sstevel@tonic-gate if (drop_lock) 1317*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 1318*7c478bd9Sstevel@tonic-gate } 1319*7c478bd9Sstevel@tonic-gate 1320*7c478bd9Sstevel@tonic-gate /* 1321*7c478bd9Sstevel@tonic-gate * Called to indicate that the lgroup associated with the platform 1322*7c478bd9Sstevel@tonic-gate * handle "hand" no longer contains given memory node 1323*7c478bd9Sstevel@tonic-gate * 1324*7c478bd9Sstevel@tonic-gate * LOCKING for this routine is a bit tricky. Usually it is called without 1325*7c478bd9Sstevel@tonic-gate * cpu_lock and it must must grab cpu_lock here to prevent racing with other 1326*7c478bd9Sstevel@tonic-gate * callers. During DR of the board containing the caged memory it may be called 1327*7c478bd9Sstevel@tonic-gate * with cpu_lock already held and CPUs paused. 1328*7c478bd9Sstevel@tonic-gate * 1329*7c478bd9Sstevel@tonic-gate * If the deletion is part of the DR copy-rename and the deleted mnode is the 1330*7c478bd9Sstevel@tonic-gate * only one present in the lgrp_root->lgrp_mnodes, all the topology is updated, 1331*7c478bd9Sstevel@tonic-gate * but lgrp_root->lgrp_mnodes is left intact. Later, lgrp_mem_init() will insert 1332*7c478bd9Sstevel@tonic-gate * the same mnode back into the topology. See lgrp_mem_rename() and 1333*7c478bd9Sstevel@tonic-gate * lgrp_mem_init() for additional details. 1334*7c478bd9Sstevel@tonic-gate */ 1335*7c478bd9Sstevel@tonic-gate void 1336*7c478bd9Sstevel@tonic-gate lgrp_mem_fini(int mnode, lgrp_handle_t hand, boolean_t is_copy_rename) 1337*7c478bd9Sstevel@tonic-gate { 1338*7c478bd9Sstevel@tonic-gate klgrpset_t changed; 1339*7c478bd9Sstevel@tonic-gate int count; 1340*7c478bd9Sstevel@tonic-gate int i; 1341*7c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp; 1342*7c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 1343*7c478bd9Sstevel@tonic-gate mnodeset_t mnodes_mask; 1344*7c478bd9Sstevel@tonic-gate boolean_t drop_lock = B_FALSE; 1345*7c478bd9Sstevel@tonic-gate boolean_t need_synch = B_FALSE; 1346*7c478bd9Sstevel@tonic-gate 1347*7c478bd9Sstevel@tonic-gate /* 1348*7c478bd9Sstevel@tonic-gate * Grab CPU lock (if we haven't already) 1349*7c478bd9Sstevel@tonic-gate */ 1350*7c478bd9Sstevel@tonic-gate if (!MUTEX_HELD(&cpu_lock)) { 1351*7c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 1352*7c478bd9Sstevel@tonic-gate drop_lock = B_TRUE; 1353*7c478bd9Sstevel@tonic-gate } 1354*7c478bd9Sstevel@tonic-gate 1355*7c478bd9Sstevel@tonic-gate /* 1356*7c478bd9Sstevel@tonic-gate * This routine may be called from a context where we already 1357*7c478bd9Sstevel@tonic-gate * hold cpu_lock and have already paused cpus. 1358*7c478bd9Sstevel@tonic-gate */ 1359*7c478bd9Sstevel@tonic-gate if (!cpus_paused()) 1360*7c478bd9Sstevel@tonic-gate need_synch = B_TRUE; 1361*7c478bd9Sstevel@tonic-gate 1362*7c478bd9Sstevel@tonic-gate my_lgrp = lgrp_hand_to_lgrp(hand); 1363*7c478bd9Sstevel@tonic-gate 1364*7c478bd9Sstevel@tonic-gate /* 1365*7c478bd9Sstevel@tonic-gate * The lgrp *must* be pre-existing 1366*7c478bd9Sstevel@tonic-gate */ 1367*7c478bd9Sstevel@tonic-gate ASSERT(my_lgrp != NULL); 1368*7c478bd9Sstevel@tonic-gate 1369*7c478bd9Sstevel@tonic-gate /* 1370*7c478bd9Sstevel@tonic-gate * Delete memory node from lgroups which contain it 1371*7c478bd9Sstevel@tonic-gate */ 1372*7c478bd9Sstevel@tonic-gate mnodes_mask = ((mnodeset_t)1 << mnode); 1373*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 1374*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp = lgrp_table[i]; 1375*7c478bd9Sstevel@tonic-gate /* 1376*7c478bd9Sstevel@tonic-gate * Skip any non-existent lgroups and any lgroups that don't 1377*7c478bd9Sstevel@tonic-gate * contain leaf lgroup of memory as a memory resource 1378*7c478bd9Sstevel@tonic-gate */ 1379*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 1380*7c478bd9Sstevel@tonic-gate !(lgrp->lgrp_mnodes & mnodes_mask)) 1381*7c478bd9Sstevel@tonic-gate continue; 1382*7c478bd9Sstevel@tonic-gate 1383*7c478bd9Sstevel@tonic-gate /* 1384*7c478bd9Sstevel@tonic-gate * Avoid removing the last mnode from the root in the DR 1385*7c478bd9Sstevel@tonic-gate * copy-rename case. See lgrp_mem_rename() for details. 1386*7c478bd9Sstevel@tonic-gate */ 1387*7c478bd9Sstevel@tonic-gate if (is_copy_rename && 1388*7c478bd9Sstevel@tonic-gate (lgrp == lgrp_root) && (lgrp->lgrp_mnodes == mnodes_mask)) 1389*7c478bd9Sstevel@tonic-gate continue; 1390*7c478bd9Sstevel@tonic-gate 1391*7c478bd9Sstevel@tonic-gate /* 1392*7c478bd9Sstevel@tonic-gate * Remove memory node from lgroup. 1393*7c478bd9Sstevel@tonic-gate */ 1394*7c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes &= ~mnodes_mask; 1395*7c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes--; 1396*7c478bd9Sstevel@tonic-gate ASSERT(lgrp->lgrp_nmnodes >= 0); 1397*7c478bd9Sstevel@tonic-gate } 1398*7c478bd9Sstevel@tonic-gate ASSERT(lgrp_root->lgrp_nmnodes > 0); 1399*7c478bd9Sstevel@tonic-gate 1400*7c478bd9Sstevel@tonic-gate /* 1401*7c478bd9Sstevel@tonic-gate * Don't need to update lgroup topology if this lgroup still has memory. 1402*7c478bd9Sstevel@tonic-gate * 1403*7c478bd9Sstevel@tonic-gate * In the special case of DR copy-rename with the only mnode being 1404*7c478bd9Sstevel@tonic-gate * removed, the lgrp_mnodes for the root is always non-zero, but we 1405*7c478bd9Sstevel@tonic-gate * still need to update the lgroup topology. 1406*7c478bd9Sstevel@tonic-gate */ 1407*7c478bd9Sstevel@tonic-gate if ((my_lgrp->lgrp_nmnodes > 0) && 1408*7c478bd9Sstevel@tonic-gate !(is_copy_rename && 1409*7c478bd9Sstevel@tonic-gate (my_lgrp == lgrp_root) && 1410*7c478bd9Sstevel@tonic-gate (my_lgrp->lgrp_mnodes == mnodes_mask))) { 1411*7c478bd9Sstevel@tonic-gate if (drop_lock) 1412*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 1413*7c478bd9Sstevel@tonic-gate return; 1414*7c478bd9Sstevel@tonic-gate } 1415*7c478bd9Sstevel@tonic-gate 1416*7c478bd9Sstevel@tonic-gate /* 1417*7c478bd9Sstevel@tonic-gate * This lgroup does not contain any memory now 1418*7c478bd9Sstevel@tonic-gate */ 1419*7c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_set[LGRP_RSRC_MEM]); 1420*7c478bd9Sstevel@tonic-gate 1421*7c478bd9Sstevel@tonic-gate /* 1422*7c478bd9Sstevel@tonic-gate * Remove this lgroup from lgroup topology if it does not contain any 1423*7c478bd9Sstevel@tonic-gate * resources now 1424*7c478bd9Sstevel@tonic-gate */ 1425*7c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id; 1426*7c478bd9Sstevel@tonic-gate count = 0; 1427*7c478bd9Sstevel@tonic-gate klgrpset_clear(changed); 1428*7c478bd9Sstevel@tonic-gate if (lgrp_rsets_empty(my_lgrp->lgrp_set)) { 1429*7c478bd9Sstevel@tonic-gate /* 1430*7c478bd9Sstevel@tonic-gate * Delete lgroup when no more resources 1431*7c478bd9Sstevel@tonic-gate */ 1432*7c478bd9Sstevel@tonic-gate if (need_synch) 1433*7c478bd9Sstevel@tonic-gate pause_cpus(NULL); 1434*7c478bd9Sstevel@tonic-gate count = lgrp_leaf_delete(my_lgrp, lgrp_table, 1435*7c478bd9Sstevel@tonic-gate lgrp_alloc_max + 1, &changed); 1436*7c478bd9Sstevel@tonic-gate ASSERT(count > 0); 1437*7c478bd9Sstevel@tonic-gate if (need_synch) 1438*7c478bd9Sstevel@tonic-gate start_cpus(); 1439*7c478bd9Sstevel@tonic-gate } else { 1440*7c478bd9Sstevel@tonic-gate /* 1441*7c478bd9Sstevel@tonic-gate * Remove lgroup from memory resources of any lgroups that 1442*7c478bd9Sstevel@tonic-gate * contain it as such 1443*7c478bd9Sstevel@tonic-gate */ 1444*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 1445*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 1446*7c478bd9Sstevel@tonic-gate 1447*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 1448*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 1449*7c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], 1450*7c478bd9Sstevel@tonic-gate lgrpid)) 1451*7c478bd9Sstevel@tonic-gate continue; 1452*7c478bd9Sstevel@tonic-gate 1453*7c478bd9Sstevel@tonic-gate klgrpset_del(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid); 1454*7c478bd9Sstevel@tonic-gate } 1455*7c478bd9Sstevel@tonic-gate } 1456*7c478bd9Sstevel@tonic-gate if (drop_lock) 1457*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 1458*7c478bd9Sstevel@tonic-gate } 1459*7c478bd9Sstevel@tonic-gate 1460*7c478bd9Sstevel@tonic-gate /* 1461*7c478bd9Sstevel@tonic-gate * Return lgroup with given platform handle 1462*7c478bd9Sstevel@tonic-gate */ 1463*7c478bd9Sstevel@tonic-gate lgrp_t * 1464*7c478bd9Sstevel@tonic-gate lgrp_hand_to_lgrp(lgrp_handle_t hand) 1465*7c478bd9Sstevel@tonic-gate { 1466*7c478bd9Sstevel@tonic-gate int i; 1467*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 1468*7c478bd9Sstevel@tonic-gate 1469*7c478bd9Sstevel@tonic-gate if (hand == LGRP_NULL_HANDLE) 1470*7c478bd9Sstevel@tonic-gate return (NULL); 1471*7c478bd9Sstevel@tonic-gate 1472*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 1473*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 1474*7c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand) 1475*7c478bd9Sstevel@tonic-gate return (lgrp); 1476*7c478bd9Sstevel@tonic-gate } 1477*7c478bd9Sstevel@tonic-gate return (NULL); 1478*7c478bd9Sstevel@tonic-gate } 1479*7c478bd9Sstevel@tonic-gate 1480*7c478bd9Sstevel@tonic-gate /* 1481*7c478bd9Sstevel@tonic-gate * Return the home lgroup of the current thread. 1482*7c478bd9Sstevel@tonic-gate * We must do this with kernel preemption disabled, since we don't want our 1483*7c478bd9Sstevel@tonic-gate * thread to be re-homed while we're poking around with its lpl, and the lpl 1484*7c478bd9Sstevel@tonic-gate * should never be NULL. 1485*7c478bd9Sstevel@tonic-gate * 1486*7c478bd9Sstevel@tonic-gate * NOTE: Can't guarantee that lgroup will be valid once kernel preemption 1487*7c478bd9Sstevel@tonic-gate * is enabled because of DR. Callers can use disable kernel preemption 1488*7c478bd9Sstevel@tonic-gate * around this call to guarantee that the lgroup will be valid beyond this 1489*7c478bd9Sstevel@tonic-gate * routine, since kernel preemption can be recursive. 1490*7c478bd9Sstevel@tonic-gate */ 1491*7c478bd9Sstevel@tonic-gate lgrp_t * 1492*7c478bd9Sstevel@tonic-gate lgrp_home_lgrp(void) 1493*7c478bd9Sstevel@tonic-gate { 1494*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 1495*7c478bd9Sstevel@tonic-gate lpl_t *lpl; 1496*7c478bd9Sstevel@tonic-gate 1497*7c478bd9Sstevel@tonic-gate kpreempt_disable(); 1498*7c478bd9Sstevel@tonic-gate 1499*7c478bd9Sstevel@tonic-gate lpl = curthread->t_lpl; 1500*7c478bd9Sstevel@tonic-gate ASSERT(lpl != NULL); 1501*7c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_lgrpid >= 0 && lpl->lpl_lgrpid <= lgrp_alloc_max); 1502*7c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_table[lpl->lpl_lgrpid])); 1503*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[lpl->lpl_lgrpid]; 1504*7c478bd9Sstevel@tonic-gate 1505*7c478bd9Sstevel@tonic-gate kpreempt_enable(); 1506*7c478bd9Sstevel@tonic-gate 1507*7c478bd9Sstevel@tonic-gate return (lgrp); 1508*7c478bd9Sstevel@tonic-gate } 1509*7c478bd9Sstevel@tonic-gate 1510*7c478bd9Sstevel@tonic-gate /* 1511*7c478bd9Sstevel@tonic-gate * Return ID of home lgroup for given thread 1512*7c478bd9Sstevel@tonic-gate * (See comments for lgrp_home_lgrp() for special care and handling 1513*7c478bd9Sstevel@tonic-gate * instructions) 1514*7c478bd9Sstevel@tonic-gate */ 1515*7c478bd9Sstevel@tonic-gate lgrp_id_t 1516*7c478bd9Sstevel@tonic-gate lgrp_home_id(kthread_t *t) 1517*7c478bd9Sstevel@tonic-gate { 1518*7c478bd9Sstevel@tonic-gate lgrp_id_t lgrp; 1519*7c478bd9Sstevel@tonic-gate lpl_t *lpl; 1520*7c478bd9Sstevel@tonic-gate 1521*7c478bd9Sstevel@tonic-gate ASSERT(t != NULL); 1522*7c478bd9Sstevel@tonic-gate /* 1523*7c478bd9Sstevel@tonic-gate * We'd like to ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)), but we 1524*7c478bd9Sstevel@tonic-gate * cannot since the HAT layer can call into this routine to 1525*7c478bd9Sstevel@tonic-gate * determine the locality for its data structures in the context 1526*7c478bd9Sstevel@tonic-gate * of a page fault. 1527*7c478bd9Sstevel@tonic-gate */ 1528*7c478bd9Sstevel@tonic-gate 1529*7c478bd9Sstevel@tonic-gate kpreempt_disable(); 1530*7c478bd9Sstevel@tonic-gate 1531*7c478bd9Sstevel@tonic-gate lpl = t->t_lpl; 1532*7c478bd9Sstevel@tonic-gate ASSERT(lpl != NULL); 1533*7c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_lgrpid >= 0 && lpl->lpl_lgrpid <= lgrp_alloc_max); 1534*7c478bd9Sstevel@tonic-gate lgrp = lpl->lpl_lgrpid; 1535*7c478bd9Sstevel@tonic-gate 1536*7c478bd9Sstevel@tonic-gate kpreempt_enable(); 1537*7c478bd9Sstevel@tonic-gate 1538*7c478bd9Sstevel@tonic-gate return (lgrp); 1539*7c478bd9Sstevel@tonic-gate } 1540*7c478bd9Sstevel@tonic-gate 1541*7c478bd9Sstevel@tonic-gate /* 1542*7c478bd9Sstevel@tonic-gate * Return lgroup containing the physical memory for the given page frame number 1543*7c478bd9Sstevel@tonic-gate */ 1544*7c478bd9Sstevel@tonic-gate lgrp_t * 1545*7c478bd9Sstevel@tonic-gate lgrp_pfn_to_lgrp(pfn_t pfn) 1546*7c478bd9Sstevel@tonic-gate { 1547*7c478bd9Sstevel@tonic-gate lgrp_handle_t hand; 1548*7c478bd9Sstevel@tonic-gate int i; 1549*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 1550*7c478bd9Sstevel@tonic-gate 1551*7c478bd9Sstevel@tonic-gate hand = lgrp_plat_pfn_to_hand(pfn); 1552*7c478bd9Sstevel@tonic-gate if (hand != LGRP_NULL_HANDLE) 1553*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 1554*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 1555*7c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand) 1556*7c478bd9Sstevel@tonic-gate return (lgrp); 1557*7c478bd9Sstevel@tonic-gate } 1558*7c478bd9Sstevel@tonic-gate return (NULL); 1559*7c478bd9Sstevel@tonic-gate } 1560*7c478bd9Sstevel@tonic-gate 1561*7c478bd9Sstevel@tonic-gate /* 1562*7c478bd9Sstevel@tonic-gate * Return lgroup containing the physical memory for the given page frame number 1563*7c478bd9Sstevel@tonic-gate */ 1564*7c478bd9Sstevel@tonic-gate lgrp_t * 1565*7c478bd9Sstevel@tonic-gate lgrp_phys_to_lgrp(u_longlong_t physaddr) 1566*7c478bd9Sstevel@tonic-gate { 1567*7c478bd9Sstevel@tonic-gate lgrp_handle_t hand; 1568*7c478bd9Sstevel@tonic-gate int i; 1569*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 1570*7c478bd9Sstevel@tonic-gate pfn_t pfn; 1571*7c478bd9Sstevel@tonic-gate 1572*7c478bd9Sstevel@tonic-gate pfn = btop(physaddr); 1573*7c478bd9Sstevel@tonic-gate hand = lgrp_plat_pfn_to_hand(pfn); 1574*7c478bd9Sstevel@tonic-gate if (hand != LGRP_NULL_HANDLE) 1575*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 1576*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 1577*7c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand) 1578*7c478bd9Sstevel@tonic-gate return (lgrp); 1579*7c478bd9Sstevel@tonic-gate } 1580*7c478bd9Sstevel@tonic-gate return (NULL); 1581*7c478bd9Sstevel@tonic-gate } 1582*7c478bd9Sstevel@tonic-gate 1583*7c478bd9Sstevel@tonic-gate /* 1584*7c478bd9Sstevel@tonic-gate * Return the leaf lgroup containing the given CPU 1585*7c478bd9Sstevel@tonic-gate */ 1586*7c478bd9Sstevel@tonic-gate static lgrp_t * 1587*7c478bd9Sstevel@tonic-gate lgrp_cpu_to_lgrp(cpu_t *cpu) 1588*7c478bd9Sstevel@tonic-gate { 1589*7c478bd9Sstevel@tonic-gate return (cpu->cpu_chip->chip_lgrp); 1590*7c478bd9Sstevel@tonic-gate } 1591*7c478bd9Sstevel@tonic-gate 1592*7c478bd9Sstevel@tonic-gate /* 1593*7c478bd9Sstevel@tonic-gate * Return the sum of the partition loads in an lgrp divided by 1594*7c478bd9Sstevel@tonic-gate * the number of CPUs in the lgrp. This is our best approximation 1595*7c478bd9Sstevel@tonic-gate * of an 'lgroup load average' for a useful per-lgroup kstat. 1596*7c478bd9Sstevel@tonic-gate */ 1597*7c478bd9Sstevel@tonic-gate static uint64_t 1598*7c478bd9Sstevel@tonic-gate lgrp_sum_loadavgs(lgrp_t *lgrp) 1599*7c478bd9Sstevel@tonic-gate { 1600*7c478bd9Sstevel@tonic-gate cpu_t *cpu; 1601*7c478bd9Sstevel@tonic-gate int ncpu; 1602*7c478bd9Sstevel@tonic-gate uint64_t loads = 0; 1603*7c478bd9Sstevel@tonic-gate 1604*7c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 1605*7c478bd9Sstevel@tonic-gate 1606*7c478bd9Sstevel@tonic-gate cpu = lgrp->lgrp_cpu; 1607*7c478bd9Sstevel@tonic-gate ncpu = lgrp->lgrp_cpucnt; 1608*7c478bd9Sstevel@tonic-gate 1609*7c478bd9Sstevel@tonic-gate if (cpu == NULL || ncpu == 0) { 1610*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 1611*7c478bd9Sstevel@tonic-gate return (0ull); 1612*7c478bd9Sstevel@tonic-gate } 1613*7c478bd9Sstevel@tonic-gate 1614*7c478bd9Sstevel@tonic-gate do { 1615*7c478bd9Sstevel@tonic-gate loads += cpu->cpu_lpl->lpl_loadavg; 1616*7c478bd9Sstevel@tonic-gate cpu = cpu->cpu_next_lgrp; 1617*7c478bd9Sstevel@tonic-gate } while (cpu != lgrp->lgrp_cpu); 1618*7c478bd9Sstevel@tonic-gate 1619*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 1620*7c478bd9Sstevel@tonic-gate 1621*7c478bd9Sstevel@tonic-gate return (loads / ncpu); 1622*7c478bd9Sstevel@tonic-gate } 1623*7c478bd9Sstevel@tonic-gate 1624*7c478bd9Sstevel@tonic-gate void 1625*7c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp_id_t lgrpid, lgrp_stat_t stat, int64_t val) 1626*7c478bd9Sstevel@tonic-gate { 1627*7c478bd9Sstevel@tonic-gate struct lgrp_stats *pstats; 1628*7c478bd9Sstevel@tonic-gate 1629*7c478bd9Sstevel@tonic-gate /* 1630*7c478bd9Sstevel@tonic-gate * Verify that the caller isn't trying to add to 1631*7c478bd9Sstevel@tonic-gate * a statistic for an lgroup that has gone away 1632*7c478bd9Sstevel@tonic-gate */ 1633*7c478bd9Sstevel@tonic-gate if (lgrpid < 0 || lgrpid > lgrp_alloc_max) 1634*7c478bd9Sstevel@tonic-gate return; 1635*7c478bd9Sstevel@tonic-gate 1636*7c478bd9Sstevel@tonic-gate pstats = &lgrp_stats[lgrpid]; 1637*7c478bd9Sstevel@tonic-gate atomic_add_64((uint64_t *)LGRP_STAT_WRITE_PTR(pstats, stat), val); 1638*7c478bd9Sstevel@tonic-gate } 1639*7c478bd9Sstevel@tonic-gate 1640*7c478bd9Sstevel@tonic-gate int64_t 1641*7c478bd9Sstevel@tonic-gate lgrp_stat_read(lgrp_id_t lgrpid, lgrp_stat_t stat) 1642*7c478bd9Sstevel@tonic-gate { 1643*7c478bd9Sstevel@tonic-gate uint64_t val; 1644*7c478bd9Sstevel@tonic-gate struct lgrp_stats *pstats; 1645*7c478bd9Sstevel@tonic-gate 1646*7c478bd9Sstevel@tonic-gate if (lgrpid < 0 || lgrpid > lgrp_alloc_max) 1647*7c478bd9Sstevel@tonic-gate return ((int64_t)0); 1648*7c478bd9Sstevel@tonic-gate 1649*7c478bd9Sstevel@tonic-gate pstats = &lgrp_stats[lgrpid]; 1650*7c478bd9Sstevel@tonic-gate LGRP_STAT_READ(pstats, stat, val); 1651*7c478bd9Sstevel@tonic-gate return (val); 1652*7c478bd9Sstevel@tonic-gate } 1653*7c478bd9Sstevel@tonic-gate 1654*7c478bd9Sstevel@tonic-gate /* 1655*7c478bd9Sstevel@tonic-gate * Reset all kstats for lgrp specified by its lgrpid. 1656*7c478bd9Sstevel@tonic-gate */ 1657*7c478bd9Sstevel@tonic-gate static void 1658*7c478bd9Sstevel@tonic-gate lgrp_kstat_reset(lgrp_id_t lgrpid) 1659*7c478bd9Sstevel@tonic-gate { 1660*7c478bd9Sstevel@tonic-gate lgrp_stat_t stat; 1661*7c478bd9Sstevel@tonic-gate 1662*7c478bd9Sstevel@tonic-gate if (lgrpid < 0 || lgrpid > lgrp_alloc_max) 1663*7c478bd9Sstevel@tonic-gate return; 1664*7c478bd9Sstevel@tonic-gate 1665*7c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) { 1666*7c478bd9Sstevel@tonic-gate LGRP_STAT_RESET(&lgrp_stats[lgrpid], stat); 1667*7c478bd9Sstevel@tonic-gate } 1668*7c478bd9Sstevel@tonic-gate } 1669*7c478bd9Sstevel@tonic-gate 1670*7c478bd9Sstevel@tonic-gate /* 1671*7c478bd9Sstevel@tonic-gate * Collect all per-lgrp statistics for the lgrp associated with this 1672*7c478bd9Sstevel@tonic-gate * kstat, and store them in the ks_data array. 1673*7c478bd9Sstevel@tonic-gate * 1674*7c478bd9Sstevel@tonic-gate * The superuser can reset all the running counter statistics for an 1675*7c478bd9Sstevel@tonic-gate * lgrp by writing to any of the lgrp's stats. 1676*7c478bd9Sstevel@tonic-gate */ 1677*7c478bd9Sstevel@tonic-gate static int 1678*7c478bd9Sstevel@tonic-gate lgrp_kstat_extract(kstat_t *ksp, int rw) 1679*7c478bd9Sstevel@tonic-gate { 1680*7c478bd9Sstevel@tonic-gate lgrp_stat_t stat; 1681*7c478bd9Sstevel@tonic-gate struct kstat_named *ksd; 1682*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 1683*7c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid; 1684*7c478bd9Sstevel@tonic-gate 1685*7c478bd9Sstevel@tonic-gate lgrp = (lgrp_t *)ksp->ks_private; 1686*7c478bd9Sstevel@tonic-gate 1687*7c478bd9Sstevel@tonic-gate ksd = (struct kstat_named *)ksp->ks_data; 1688*7c478bd9Sstevel@tonic-gate ASSERT(ksd == (struct kstat_named *)&lgrp_kstat_data); 1689*7c478bd9Sstevel@tonic-gate 1690*7c478bd9Sstevel@tonic-gate lgrpid = lgrp->lgrp_id; 1691*7c478bd9Sstevel@tonic-gate 1692*7c478bd9Sstevel@tonic-gate if (lgrpid == LGRP_NONE) { 1693*7c478bd9Sstevel@tonic-gate /* 1694*7c478bd9Sstevel@tonic-gate * Return all zeroes as stats for freed lgrp. 1695*7c478bd9Sstevel@tonic-gate */ 1696*7c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) { 1697*7c478bd9Sstevel@tonic-gate ksd[stat].value.i64 = 0; 1698*7c478bd9Sstevel@tonic-gate } 1699*7c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_CPUS].value.i64 = 0; 1700*7c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_INSTALL].value.i64 = 0; 1701*7c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_AVAIL].value.i64 = 0; 1702*7c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_FREE].value.i64 = 0; 1703*7c478bd9Sstevel@tonic-gate ksd[stat + LGRP_LOADAVG].value.i64 = 0; 1704*7c478bd9Sstevel@tonic-gate } else if (rw != KSTAT_WRITE) { 1705*7c478bd9Sstevel@tonic-gate /* 1706*7c478bd9Sstevel@tonic-gate * Handle counter stats 1707*7c478bd9Sstevel@tonic-gate */ 1708*7c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) { 1709*7c478bd9Sstevel@tonic-gate ksd[stat].value.i64 = lgrp_stat_read(lgrpid, stat); 1710*7c478bd9Sstevel@tonic-gate } 1711*7c478bd9Sstevel@tonic-gate 1712*7c478bd9Sstevel@tonic-gate /* 1713*7c478bd9Sstevel@tonic-gate * Handle kernel data snapshot stats 1714*7c478bd9Sstevel@tonic-gate */ 1715*7c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_CPUS].value.i64 = lgrp->lgrp_cpucnt; 1716*7c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_INSTALL].value.i64 = 1717*7c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_INSTALL); 1718*7c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_AVAIL].value.i64 = 1719*7c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_AVAIL); 1720*7c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_FREE].value.i64 = 1721*7c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_FREE); 1722*7c478bd9Sstevel@tonic-gate ksd[stat + LGRP_LOADAVG].value.i64 = lgrp_sum_loadavgs(lgrp); 1723*7c478bd9Sstevel@tonic-gate } else { 1724*7c478bd9Sstevel@tonic-gate lgrp_kstat_reset(lgrpid); 1725*7c478bd9Sstevel@tonic-gate } 1726*7c478bd9Sstevel@tonic-gate 1727*7c478bd9Sstevel@tonic-gate return (0); 1728*7c478bd9Sstevel@tonic-gate } 1729*7c478bd9Sstevel@tonic-gate 1730*7c478bd9Sstevel@tonic-gate int 1731*7c478bd9Sstevel@tonic-gate lgrp_query_cpu(processorid_t id, lgrp_id_t *lp) 1732*7c478bd9Sstevel@tonic-gate { 1733*7c478bd9Sstevel@tonic-gate cpu_t *cp; 1734*7c478bd9Sstevel@tonic-gate 1735*7c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 1736*7c478bd9Sstevel@tonic-gate 1737*7c478bd9Sstevel@tonic-gate if ((cp = cpu_get(id)) == NULL) { 1738*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 1739*7c478bd9Sstevel@tonic-gate return (EINVAL); 1740*7c478bd9Sstevel@tonic-gate } 1741*7c478bd9Sstevel@tonic-gate 1742*7c478bd9Sstevel@tonic-gate if (cpu_is_offline(cp) || cpu_is_poweredoff(cp)) { 1743*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 1744*7c478bd9Sstevel@tonic-gate return (EINVAL); 1745*7c478bd9Sstevel@tonic-gate } 1746*7c478bd9Sstevel@tonic-gate 1747*7c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_lpl != NULL); 1748*7c478bd9Sstevel@tonic-gate 1749*7c478bd9Sstevel@tonic-gate *lp = cp->cpu_lpl->lpl_lgrpid; 1750*7c478bd9Sstevel@tonic-gate 1751*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 1752*7c478bd9Sstevel@tonic-gate 1753*7c478bd9Sstevel@tonic-gate return (0); 1754*7c478bd9Sstevel@tonic-gate } 1755*7c478bd9Sstevel@tonic-gate 1756*7c478bd9Sstevel@tonic-gate int 1757*7c478bd9Sstevel@tonic-gate lgrp_query_load(processorid_t id, lgrp_load_t *lp) 1758*7c478bd9Sstevel@tonic-gate { 1759*7c478bd9Sstevel@tonic-gate cpu_t *cp; 1760*7c478bd9Sstevel@tonic-gate 1761*7c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 1762*7c478bd9Sstevel@tonic-gate 1763*7c478bd9Sstevel@tonic-gate if ((cp = cpu_get(id)) == NULL) { 1764*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 1765*7c478bd9Sstevel@tonic-gate return (EINVAL); 1766*7c478bd9Sstevel@tonic-gate } 1767*7c478bd9Sstevel@tonic-gate 1768*7c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_lpl != NULL); 1769*7c478bd9Sstevel@tonic-gate 1770*7c478bd9Sstevel@tonic-gate *lp = cp->cpu_lpl->lpl_loadavg; 1771*7c478bd9Sstevel@tonic-gate 1772*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 1773*7c478bd9Sstevel@tonic-gate 1774*7c478bd9Sstevel@tonic-gate return (0); 1775*7c478bd9Sstevel@tonic-gate } 1776*7c478bd9Sstevel@tonic-gate 1777*7c478bd9Sstevel@tonic-gate void 1778*7c478bd9Sstevel@tonic-gate lgrp_latency_change(u_longlong_t oldtime, u_longlong_t newtime) 1779*7c478bd9Sstevel@tonic-gate { 1780*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 1781*7c478bd9Sstevel@tonic-gate int i; 1782*7c478bd9Sstevel@tonic-gate 1783*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 1784*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 1785*7c478bd9Sstevel@tonic-gate 1786*7c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp) && (lgrp->lgrp_latency == oldtime)) 1787*7c478bd9Sstevel@tonic-gate lgrp->lgrp_latency = (int)newtime; 1788*7c478bd9Sstevel@tonic-gate } 1789*7c478bd9Sstevel@tonic-gate } 1790*7c478bd9Sstevel@tonic-gate 1791*7c478bd9Sstevel@tonic-gate /* 1792*7c478bd9Sstevel@tonic-gate * Add a resource named by lpl_leaf to rset of lpl_target 1793*7c478bd9Sstevel@tonic-gate * 1794*7c478bd9Sstevel@tonic-gate * This routine also adjusts ncpu and nrset if the call succeeds in adding a 1795*7c478bd9Sstevel@tonic-gate * resource. It is adjusted here, as this is presently the only place that we 1796*7c478bd9Sstevel@tonic-gate * can be certain a resource addition has succeeded. 1797*7c478bd9Sstevel@tonic-gate * 1798*7c478bd9Sstevel@tonic-gate * We keep the list of rsets sorted so that the dispatcher can quickly walk the 1799*7c478bd9Sstevel@tonic-gate * list in order until it reaches a NULL. (This list is required to be NULL 1800*7c478bd9Sstevel@tonic-gate * terminated, too). This is done so that we can mark start pos + 1, so that 1801*7c478bd9Sstevel@tonic-gate * each lpl is traversed sequentially, but in a different order. We hope this 1802*7c478bd9Sstevel@tonic-gate * will improve performance a bit. (Hopefully, less read-to-own traffic...) 1803*7c478bd9Sstevel@tonic-gate */ 1804*7c478bd9Sstevel@tonic-gate 1805*7c478bd9Sstevel@tonic-gate void 1806*7c478bd9Sstevel@tonic-gate lpl_rset_add(lpl_t *lpl_target, lpl_t *lpl_leaf) 1807*7c478bd9Sstevel@tonic-gate { 1808*7c478bd9Sstevel@tonic-gate int i; 1809*7c478bd9Sstevel@tonic-gate int entry_slot = 0; 1810*7c478bd9Sstevel@tonic-gate 1811*7c478bd9Sstevel@tonic-gate /* return if leaf is already present */ 1812*7c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_target->lpl_nrset; i++) { 1813*7c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] == lpl_leaf) { 1814*7c478bd9Sstevel@tonic-gate return; 1815*7c478bd9Sstevel@tonic-gate } 1816*7c478bd9Sstevel@tonic-gate 1817*7c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i]->lpl_lgrpid > 1818*7c478bd9Sstevel@tonic-gate lpl_leaf->lpl_lgrpid) { 1819*7c478bd9Sstevel@tonic-gate break; 1820*7c478bd9Sstevel@tonic-gate } 1821*7c478bd9Sstevel@tonic-gate } 1822*7c478bd9Sstevel@tonic-gate 1823*7c478bd9Sstevel@tonic-gate /* insert leaf, update counts */ 1824*7c478bd9Sstevel@tonic-gate entry_slot = i; 1825*7c478bd9Sstevel@tonic-gate i = lpl_target->lpl_nrset++; 1826*7c478bd9Sstevel@tonic-gate if (lpl_target->lpl_nrset >= LPL_RSET_MAX) { 1827*7c478bd9Sstevel@tonic-gate panic("More leaf lgrps in system than are supported!\n"); 1828*7c478bd9Sstevel@tonic-gate } 1829*7c478bd9Sstevel@tonic-gate 1830*7c478bd9Sstevel@tonic-gate /* 1831*7c478bd9Sstevel@tonic-gate * Start at the end of the rset array and work backwards towards the 1832*7c478bd9Sstevel@tonic-gate * slot into which the new lpl will be inserted. This effectively 1833*7c478bd9Sstevel@tonic-gate * preserves the current ordering by scooting everybody over one entry, 1834*7c478bd9Sstevel@tonic-gate * and placing the new entry into the space created. 1835*7c478bd9Sstevel@tonic-gate */ 1836*7c478bd9Sstevel@tonic-gate 1837*7c478bd9Sstevel@tonic-gate while (i-- > entry_slot) { 1838*7c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[i + 1] = lpl_target->lpl_rset[i]; 1839*7c478bd9Sstevel@tonic-gate } 1840*7c478bd9Sstevel@tonic-gate 1841*7c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[entry_slot] = lpl_leaf; 1842*7c478bd9Sstevel@tonic-gate lpl_target->lpl_ncpu += lpl_leaf->lpl_ncpu; 1843*7c478bd9Sstevel@tonic-gate } 1844*7c478bd9Sstevel@tonic-gate 1845*7c478bd9Sstevel@tonic-gate /* 1846*7c478bd9Sstevel@tonic-gate * Update each of lpl_parent's children with a proper hint and 1847*7c478bd9Sstevel@tonic-gate * a reference to their parent. 1848*7c478bd9Sstevel@tonic-gate * The lgrp topology is used as the reference since it is fully 1849*7c478bd9Sstevel@tonic-gate * consistent and correct at this point. 1850*7c478bd9Sstevel@tonic-gate * 1851*7c478bd9Sstevel@tonic-gate * Each child's hint will reference an element in lpl_parent's 1852*7c478bd9Sstevel@tonic-gate * rset that designates where the child should start searching 1853*7c478bd9Sstevel@tonic-gate * for CPU resources. The hint selected is the highest order leaf present 1854*7c478bd9Sstevel@tonic-gate * in the child's lineage. 1855*7c478bd9Sstevel@tonic-gate * 1856*7c478bd9Sstevel@tonic-gate * This should be called after any potential change in lpl_parent's 1857*7c478bd9Sstevel@tonic-gate * rset. 1858*7c478bd9Sstevel@tonic-gate */ 1859*7c478bd9Sstevel@tonic-gate static void 1860*7c478bd9Sstevel@tonic-gate lpl_child_update(lpl_t *lpl_parent, struct cpupart *cp) 1861*7c478bd9Sstevel@tonic-gate { 1862*7c478bd9Sstevel@tonic-gate klgrpset_t children, leaves; 1863*7c478bd9Sstevel@tonic-gate lpl_t *lpl; 1864*7c478bd9Sstevel@tonic-gate int hint; 1865*7c478bd9Sstevel@tonic-gate int i, j; 1866*7c478bd9Sstevel@tonic-gate 1867*7c478bd9Sstevel@tonic-gate children = lgrp_table[lpl_parent->lpl_lgrpid]->lgrp_children; 1868*7c478bd9Sstevel@tonic-gate if (klgrpset_isempty(children)) 1869*7c478bd9Sstevel@tonic-gate return; /* nothing to do */ 1870*7c478bd9Sstevel@tonic-gate 1871*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 1872*7c478bd9Sstevel@tonic-gate if (klgrpset_ismember(children, i)) { 1873*7c478bd9Sstevel@tonic-gate 1874*7c478bd9Sstevel@tonic-gate /* 1875*7c478bd9Sstevel@tonic-gate * Given the set of leaves in this child's lineage, 1876*7c478bd9Sstevel@tonic-gate * find the highest order leaf present in the parent's 1877*7c478bd9Sstevel@tonic-gate * rset. Select this as the hint for the child. 1878*7c478bd9Sstevel@tonic-gate */ 1879*7c478bd9Sstevel@tonic-gate leaves = lgrp_table[i]->lgrp_leaves; 1880*7c478bd9Sstevel@tonic-gate hint = 0; 1881*7c478bd9Sstevel@tonic-gate for (j = 0; j < lpl_parent->lpl_nrset; j++) { 1882*7c478bd9Sstevel@tonic-gate lpl = lpl_parent->lpl_rset[j]; 1883*7c478bd9Sstevel@tonic-gate if (klgrpset_ismember(leaves, lpl->lpl_lgrpid)) 1884*7c478bd9Sstevel@tonic-gate hint = j; 1885*7c478bd9Sstevel@tonic-gate } 1886*7c478bd9Sstevel@tonic-gate cp->cp_lgrploads[i].lpl_hint = hint; 1887*7c478bd9Sstevel@tonic-gate 1888*7c478bd9Sstevel@tonic-gate /* 1889*7c478bd9Sstevel@tonic-gate * (Re)set the parent. It may be incorrect if 1890*7c478bd9Sstevel@tonic-gate * lpl_parent is new in the topology. 1891*7c478bd9Sstevel@tonic-gate */ 1892*7c478bd9Sstevel@tonic-gate cp->cp_lgrploads[i].lpl_parent = lpl_parent; 1893*7c478bd9Sstevel@tonic-gate } 1894*7c478bd9Sstevel@tonic-gate } 1895*7c478bd9Sstevel@tonic-gate } 1896*7c478bd9Sstevel@tonic-gate 1897*7c478bd9Sstevel@tonic-gate /* 1898*7c478bd9Sstevel@tonic-gate * Delete resource lpl_leaf from rset of lpl_target, assuming it's there. 1899*7c478bd9Sstevel@tonic-gate * 1900*7c478bd9Sstevel@tonic-gate * This routine also adjusts ncpu and nrset if the call succeeds in deleting a 1901*7c478bd9Sstevel@tonic-gate * resource. The values are adjusted here, as this is the only place that we can 1902*7c478bd9Sstevel@tonic-gate * be certain a resource was successfully deleted. 1903*7c478bd9Sstevel@tonic-gate */ 1904*7c478bd9Sstevel@tonic-gate void 1905*7c478bd9Sstevel@tonic-gate lpl_rset_del(lpl_t *lpl_target, lpl_t *lpl_leaf) 1906*7c478bd9Sstevel@tonic-gate { 1907*7c478bd9Sstevel@tonic-gate int i; 1908*7c478bd9Sstevel@tonic-gate 1909*7c478bd9Sstevel@tonic-gate /* find leaf in intermediate node */ 1910*7c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_target->lpl_nrset; i++) { 1911*7c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] == lpl_leaf) 1912*7c478bd9Sstevel@tonic-gate break; 1913*7c478bd9Sstevel@tonic-gate } 1914*7c478bd9Sstevel@tonic-gate 1915*7c478bd9Sstevel@tonic-gate /* return if leaf not found */ 1916*7c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] != lpl_leaf) 1917*7c478bd9Sstevel@tonic-gate return; 1918*7c478bd9Sstevel@tonic-gate 1919*7c478bd9Sstevel@tonic-gate /* prune leaf, compress array */ 1920*7c478bd9Sstevel@tonic-gate ASSERT(lpl_target->lpl_nrset < LPL_RSET_MAX); 1921*7c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[lpl_target->lpl_nrset--] = NULL; 1922*7c478bd9Sstevel@tonic-gate lpl_target->lpl_ncpu--; 1923*7c478bd9Sstevel@tonic-gate do { 1924*7c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[i] = lpl_target->lpl_rset[i + 1]; 1925*7c478bd9Sstevel@tonic-gate } while (i++ < lpl_target->lpl_nrset); 1926*7c478bd9Sstevel@tonic-gate } 1927*7c478bd9Sstevel@tonic-gate 1928*7c478bd9Sstevel@tonic-gate /* 1929*7c478bd9Sstevel@tonic-gate * Check to see if the resource set of the target lpl contains the 1930*7c478bd9Sstevel@tonic-gate * supplied leaf lpl. This returns 1 if the lpl is found, 0 if it is not. 1931*7c478bd9Sstevel@tonic-gate */ 1932*7c478bd9Sstevel@tonic-gate 1933*7c478bd9Sstevel@tonic-gate int 1934*7c478bd9Sstevel@tonic-gate lpl_rset_contains(lpl_t *lpl_target, lpl_t *lpl_leaf) 1935*7c478bd9Sstevel@tonic-gate { 1936*7c478bd9Sstevel@tonic-gate int i; 1937*7c478bd9Sstevel@tonic-gate 1938*7c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_target->lpl_nrset; i++) { 1939*7c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] == lpl_leaf) 1940*7c478bd9Sstevel@tonic-gate return (1); 1941*7c478bd9Sstevel@tonic-gate } 1942*7c478bd9Sstevel@tonic-gate 1943*7c478bd9Sstevel@tonic-gate return (0); 1944*7c478bd9Sstevel@tonic-gate } 1945*7c478bd9Sstevel@tonic-gate 1946*7c478bd9Sstevel@tonic-gate /* 1947*7c478bd9Sstevel@tonic-gate * Called when we change cpu lpl membership. This increments or decrements the 1948*7c478bd9Sstevel@tonic-gate * per-cpu counter in every lpl in which our leaf appears. 1949*7c478bd9Sstevel@tonic-gate */ 1950*7c478bd9Sstevel@tonic-gate void 1951*7c478bd9Sstevel@tonic-gate lpl_cpu_adjcnt(lpl_act_t act, cpu_t *cp) 1952*7c478bd9Sstevel@tonic-gate { 1953*7c478bd9Sstevel@tonic-gate cpupart_t *cpupart; 1954*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_leaf; 1955*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur; 1956*7c478bd9Sstevel@tonic-gate lpl_t *lpl_leaf; 1957*7c478bd9Sstevel@tonic-gate lpl_t *lpl_cur; 1958*7c478bd9Sstevel@tonic-gate int i; 1959*7c478bd9Sstevel@tonic-gate 1960*7c478bd9Sstevel@tonic-gate ASSERT(act == LPL_DECREMENT || act == LPL_INCREMENT); 1961*7c478bd9Sstevel@tonic-gate 1962*7c478bd9Sstevel@tonic-gate cpupart = cp->cpu_part; 1963*7c478bd9Sstevel@tonic-gate lpl_leaf = cp->cpu_lpl; 1964*7c478bd9Sstevel@tonic-gate lgrp_leaf = lgrp_table[lpl_leaf->lpl_lgrpid]; 1965*7c478bd9Sstevel@tonic-gate 1966*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 1967*7c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i]; 1968*7c478bd9Sstevel@tonic-gate 1969*7c478bd9Sstevel@tonic-gate /* 1970*7c478bd9Sstevel@tonic-gate * Don't adjust if the lgrp isn't there, if we're the leaf lpl 1971*7c478bd9Sstevel@tonic-gate * for the cpu in question, or if the current lgrp and leaf 1972*7c478bd9Sstevel@tonic-gate * don't share the same resources. 1973*7c478bd9Sstevel@tonic-gate */ 1974*7c478bd9Sstevel@tonic-gate 1975*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur) || (lgrp_cur == lgrp_leaf) || 1976*7c478bd9Sstevel@tonic-gate !klgrpset_intersects(lgrp_leaf->lgrp_set[LGRP_RSRC_CPU], 1977*7c478bd9Sstevel@tonic-gate lgrp_cur->lgrp_set[LGRP_RSRC_CPU])) 1978*7c478bd9Sstevel@tonic-gate continue; 1979*7c478bd9Sstevel@tonic-gate 1980*7c478bd9Sstevel@tonic-gate 1981*7c478bd9Sstevel@tonic-gate lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id]; 1982*7c478bd9Sstevel@tonic-gate 1983*7c478bd9Sstevel@tonic-gate if (lpl_cur->lpl_nrset > 0) { 1984*7c478bd9Sstevel@tonic-gate if (act == LPL_INCREMENT) { 1985*7c478bd9Sstevel@tonic-gate lpl_cur->lpl_ncpu++; 1986*7c478bd9Sstevel@tonic-gate } else if (act == LPL_DECREMENT) { 1987*7c478bd9Sstevel@tonic-gate lpl_cur->lpl_ncpu--; 1988*7c478bd9Sstevel@tonic-gate } 1989*7c478bd9Sstevel@tonic-gate } 1990*7c478bd9Sstevel@tonic-gate } 1991*7c478bd9Sstevel@tonic-gate } 1992*7c478bd9Sstevel@tonic-gate 1993*7c478bd9Sstevel@tonic-gate /* 1994*7c478bd9Sstevel@tonic-gate * Initialize lpl with given resources and specified lgrp 1995*7c478bd9Sstevel@tonic-gate */ 1996*7c478bd9Sstevel@tonic-gate 1997*7c478bd9Sstevel@tonic-gate void 1998*7c478bd9Sstevel@tonic-gate lpl_init(lpl_t *lpl, lpl_t *lpl_leaf, lgrp_t *lgrp) 1999*7c478bd9Sstevel@tonic-gate { 2000*7c478bd9Sstevel@tonic-gate lpl->lpl_lgrpid = lgrp->lgrp_id; 2001*7c478bd9Sstevel@tonic-gate lpl->lpl_loadavg = 0; 2002*7c478bd9Sstevel@tonic-gate if (lpl == lpl_leaf) 2003*7c478bd9Sstevel@tonic-gate lpl->lpl_ncpu = 1; 2004*7c478bd9Sstevel@tonic-gate else 2005*7c478bd9Sstevel@tonic-gate lpl->lpl_ncpu = lpl_leaf->lpl_ncpu; 2006*7c478bd9Sstevel@tonic-gate lpl->lpl_nrset = 1; 2007*7c478bd9Sstevel@tonic-gate lpl->lpl_rset[0] = lpl_leaf; 2008*7c478bd9Sstevel@tonic-gate lpl->lpl_lgrp = lgrp; 2009*7c478bd9Sstevel@tonic-gate lpl->lpl_parent = NULL; /* set by lpl_leaf_insert() */ 2010*7c478bd9Sstevel@tonic-gate lpl->lpl_cpus = NULL; /* set by lgrp_part_add_cpu() */ 2011*7c478bd9Sstevel@tonic-gate } 2012*7c478bd9Sstevel@tonic-gate 2013*7c478bd9Sstevel@tonic-gate /* 2014*7c478bd9Sstevel@tonic-gate * Clear an unused lpl 2015*7c478bd9Sstevel@tonic-gate */ 2016*7c478bd9Sstevel@tonic-gate 2017*7c478bd9Sstevel@tonic-gate void 2018*7c478bd9Sstevel@tonic-gate lpl_clear(lpl_t *lpl) 2019*7c478bd9Sstevel@tonic-gate { 2020*7c478bd9Sstevel@tonic-gate lgrpid_t lid; 2021*7c478bd9Sstevel@tonic-gate 2022*7c478bd9Sstevel@tonic-gate /* save lid for debugging purposes */ 2023*7c478bd9Sstevel@tonic-gate lid = lpl->lpl_lgrpid; 2024*7c478bd9Sstevel@tonic-gate bzero(lpl, sizeof (lpl_t)); 2025*7c478bd9Sstevel@tonic-gate lpl->lpl_lgrpid = lid; 2026*7c478bd9Sstevel@tonic-gate } 2027*7c478bd9Sstevel@tonic-gate 2028*7c478bd9Sstevel@tonic-gate /* 2029*7c478bd9Sstevel@tonic-gate * Given a CPU-partition, verify that the lpl topology in the CPU-partition 2030*7c478bd9Sstevel@tonic-gate * is in sync with the lgroup toplogy in the system. The lpl topology may not 2031*7c478bd9Sstevel@tonic-gate * make full use of all of the lgroup topology, but this checks to make sure 2032*7c478bd9Sstevel@tonic-gate * that for the parts that it does use, it has correctly understood the 2033*7c478bd9Sstevel@tonic-gate * relationships that exist. This function returns 2034*7c478bd9Sstevel@tonic-gate * 0 if the topology is correct, and a non-zero error code, for non-debug 2035*7c478bd9Sstevel@tonic-gate * kernels if incorrect. Asserts are spread throughout the code to aid in 2036*7c478bd9Sstevel@tonic-gate * debugging on a DEBUG kernel. 2037*7c478bd9Sstevel@tonic-gate */ 2038*7c478bd9Sstevel@tonic-gate int 2039*7c478bd9Sstevel@tonic-gate lpl_topo_verify(cpupart_t *cpupart) 2040*7c478bd9Sstevel@tonic-gate { 2041*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 2042*7c478bd9Sstevel@tonic-gate lpl_t *lpl; 2043*7c478bd9Sstevel@tonic-gate klgrpset_t rset; 2044*7c478bd9Sstevel@tonic-gate klgrpset_t cset; 2045*7c478bd9Sstevel@tonic-gate cpu_t *cpu; 2046*7c478bd9Sstevel@tonic-gate cpu_t *cp_start; 2047*7c478bd9Sstevel@tonic-gate int i; 2048*7c478bd9Sstevel@tonic-gate int j; 2049*7c478bd9Sstevel@tonic-gate int sum; 2050*7c478bd9Sstevel@tonic-gate 2051*7c478bd9Sstevel@tonic-gate /* topology can't be incorrect if it doesn't exist */ 2052*7c478bd9Sstevel@tonic-gate if (!lgrp_topo_initialized || !lgrp_initialized) 2053*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_CORRECT); 2054*7c478bd9Sstevel@tonic-gate 2055*7c478bd9Sstevel@tonic-gate ASSERT(cpupart != NULL); 2056*7c478bd9Sstevel@tonic-gate 2057*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 2058*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 2059*7c478bd9Sstevel@tonic-gate lpl = NULL; 2060*7c478bd9Sstevel@tonic-gate /* make sure lpls are allocated */ 2061*7c478bd9Sstevel@tonic-gate ASSERT(cpupart->cp_lgrploads); 2062*7c478bd9Sstevel@tonic-gate if (!cpupart->cp_lgrploads) 2063*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_PART_HAS_NO_LPL); 2064*7c478bd9Sstevel@tonic-gate 2065*7c478bd9Sstevel@tonic-gate lpl = &cpupart->cp_lgrploads[i]; 2066*7c478bd9Sstevel@tonic-gate /* make sure our index is good */ 2067*7c478bd9Sstevel@tonic-gate ASSERT(i < cpupart->cp_nlgrploads); 2068*7c478bd9Sstevel@tonic-gate 2069*7c478bd9Sstevel@tonic-gate /* if lgroup doesn't exist, make sure lpl is empty */ 2070*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) { 2071*7c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu == 0); 2072*7c478bd9Sstevel@tonic-gate if (lpl->lpl_ncpu > 0) { 2073*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_CPUS_NOT_EMPTY); 2074*7c478bd9Sstevel@tonic-gate } else { 2075*7c478bd9Sstevel@tonic-gate continue; 2076*7c478bd9Sstevel@tonic-gate } 2077*7c478bd9Sstevel@tonic-gate } 2078*7c478bd9Sstevel@tonic-gate 2079*7c478bd9Sstevel@tonic-gate /* verify that lgroup and lpl are identically numbered */ 2080*7c478bd9Sstevel@tonic-gate ASSERT(lgrp->lgrp_id == lpl->lpl_lgrpid); 2081*7c478bd9Sstevel@tonic-gate 2082*7c478bd9Sstevel@tonic-gate /* if lgroup isn't in our partition, make sure lpl is empty */ 2083*7c478bd9Sstevel@tonic-gate if (!klgrpset_intersects(lgrp->lgrp_leaves, 2084*7c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset)) { 2085*7c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu == 0); 2086*7c478bd9Sstevel@tonic-gate if (lpl->lpl_ncpu > 0) { 2087*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_CPUS_NOT_EMPTY); 2088*7c478bd9Sstevel@tonic-gate } 2089*7c478bd9Sstevel@tonic-gate /* 2090*7c478bd9Sstevel@tonic-gate * lpl is empty, and lgroup isn't in partition. verify 2091*7c478bd9Sstevel@tonic-gate * that lpl doesn't show up in anyone else's rsets (in 2092*7c478bd9Sstevel@tonic-gate * this partition, anyway) 2093*7c478bd9Sstevel@tonic-gate */ 2094*7c478bd9Sstevel@tonic-gate 2095*7c478bd9Sstevel@tonic-gate for (j = 0; j < cpupart->cp_nlgrploads; j++) { 2096*7c478bd9Sstevel@tonic-gate lpl_t *i_lpl; /* lpl we're iterating over */ 2097*7c478bd9Sstevel@tonic-gate 2098*7c478bd9Sstevel@tonic-gate i_lpl = &cpupart->cp_lgrploads[j]; 2099*7c478bd9Sstevel@tonic-gate 2100*7c478bd9Sstevel@tonic-gate ASSERT(!lpl_rset_contains(i_lpl, lpl)); 2101*7c478bd9Sstevel@tonic-gate if (lpl_rset_contains(i_lpl, lpl)) { 2102*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_LPL_ORPHANED); 2103*7c478bd9Sstevel@tonic-gate } 2104*7c478bd9Sstevel@tonic-gate } 2105*7c478bd9Sstevel@tonic-gate /* lgroup is empty, and everything is ok. continue */ 2106*7c478bd9Sstevel@tonic-gate continue; 2107*7c478bd9Sstevel@tonic-gate } 2108*7c478bd9Sstevel@tonic-gate 2109*7c478bd9Sstevel@tonic-gate 2110*7c478bd9Sstevel@tonic-gate /* lgroup is in this partition, now check it against lpl */ 2111*7c478bd9Sstevel@tonic-gate 2112*7c478bd9Sstevel@tonic-gate /* do both have matching lgrps? */ 2113*7c478bd9Sstevel@tonic-gate ASSERT(lgrp == lpl->lpl_lgrp); 2114*7c478bd9Sstevel@tonic-gate if (lgrp != lpl->lpl_lgrp) { 2115*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_LGRP_MISMATCH); 2116*7c478bd9Sstevel@tonic-gate } 2117*7c478bd9Sstevel@tonic-gate 2118*7c478bd9Sstevel@tonic-gate /* do the parent lgroups exist and do they match? */ 2119*7c478bd9Sstevel@tonic-gate if (lgrp->lgrp_parent) { 2120*7c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_parent); 2121*7c478bd9Sstevel@tonic-gate ASSERT(lgrp->lgrp_parent->lgrp_id == 2122*7c478bd9Sstevel@tonic-gate lpl->lpl_parent->lpl_lgrpid); 2123*7c478bd9Sstevel@tonic-gate 2124*7c478bd9Sstevel@tonic-gate if (!lpl->lpl_parent) { 2125*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_MISSING_PARENT); 2126*7c478bd9Sstevel@tonic-gate } else if (lgrp->lgrp_parent->lgrp_id != 2127*7c478bd9Sstevel@tonic-gate lpl->lpl_parent->lpl_lgrpid) { 2128*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_PARENT_MISMATCH); 2129*7c478bd9Sstevel@tonic-gate } 2130*7c478bd9Sstevel@tonic-gate } 2131*7c478bd9Sstevel@tonic-gate 2132*7c478bd9Sstevel@tonic-gate /* only leaf lgroups keep a cpucnt, only check leaves */ 2133*7c478bd9Sstevel@tonic-gate if ((lpl->lpl_nrset == 1) && (lpl == lpl->lpl_rset[0])) { 2134*7c478bd9Sstevel@tonic-gate 2135*7c478bd9Sstevel@tonic-gate /* verify that lgrp is also a leaf */ 2136*7c478bd9Sstevel@tonic-gate ASSERT((lgrp->lgrp_childcnt == 0) && 2137*7c478bd9Sstevel@tonic-gate (klgrpset_ismember(lgrp->lgrp_leaves, 2138*7c478bd9Sstevel@tonic-gate lpl->lpl_lgrpid))); 2139*7c478bd9Sstevel@tonic-gate 2140*7c478bd9Sstevel@tonic-gate if ((lgrp->lgrp_childcnt > 0) || 2141*7c478bd9Sstevel@tonic-gate (!klgrpset_ismember(lgrp->lgrp_leaves, 2142*7c478bd9Sstevel@tonic-gate lpl->lpl_lgrpid))) { 2143*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_LGRP_NOT_LEAF); 2144*7c478bd9Sstevel@tonic-gate } 2145*7c478bd9Sstevel@tonic-gate 2146*7c478bd9Sstevel@tonic-gate ASSERT((lgrp->lgrp_cpucnt >= lpl->lpl_ncpu) && 2147*7c478bd9Sstevel@tonic-gate (lpl->lpl_ncpu > 0)); 2148*7c478bd9Sstevel@tonic-gate if ((lgrp->lgrp_cpucnt < lpl->lpl_ncpu) || 2149*7c478bd9Sstevel@tonic-gate (lpl->lpl_ncpu <= 0)) { 2150*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_BAD_CPUCNT); 2151*7c478bd9Sstevel@tonic-gate } 2152*7c478bd9Sstevel@tonic-gate 2153*7c478bd9Sstevel@tonic-gate /* 2154*7c478bd9Sstevel@tonic-gate * Check that lpl_ncpu also matches the number of 2155*7c478bd9Sstevel@tonic-gate * cpus in the lpl's linked list. This only exists in 2156*7c478bd9Sstevel@tonic-gate * leaves, but they should always match. 2157*7c478bd9Sstevel@tonic-gate */ 2158*7c478bd9Sstevel@tonic-gate j = 0; 2159*7c478bd9Sstevel@tonic-gate cpu = cp_start = lpl->lpl_cpus; 2160*7c478bd9Sstevel@tonic-gate while (cpu != NULL) { 2161*7c478bd9Sstevel@tonic-gate j++; 2162*7c478bd9Sstevel@tonic-gate 2163*7c478bd9Sstevel@tonic-gate /* check to make sure cpu's lpl is leaf lpl */ 2164*7c478bd9Sstevel@tonic-gate ASSERT(cpu->cpu_lpl == lpl); 2165*7c478bd9Sstevel@tonic-gate if (cpu->cpu_lpl != lpl) { 2166*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_CPU_HAS_BAD_LPL); 2167*7c478bd9Sstevel@tonic-gate } 2168*7c478bd9Sstevel@tonic-gate 2169*7c478bd9Sstevel@tonic-gate /* check next cpu */ 2170*7c478bd9Sstevel@tonic-gate if ((cpu = cpu->cpu_next_lpl) != cp_start) { 2171*7c478bd9Sstevel@tonic-gate continue; 2172*7c478bd9Sstevel@tonic-gate } else { 2173*7c478bd9Sstevel@tonic-gate cpu = NULL; 2174*7c478bd9Sstevel@tonic-gate } 2175*7c478bd9Sstevel@tonic-gate } 2176*7c478bd9Sstevel@tonic-gate 2177*7c478bd9Sstevel@tonic-gate ASSERT(j == lpl->lpl_ncpu); 2178*7c478bd9Sstevel@tonic-gate if (j != lpl->lpl_ncpu) { 2179*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_LPL_BAD_NCPU); 2180*7c478bd9Sstevel@tonic-gate } 2181*7c478bd9Sstevel@tonic-gate 2182*7c478bd9Sstevel@tonic-gate /* 2183*7c478bd9Sstevel@tonic-gate * Also, check that leaf lpl is contained in all 2184*7c478bd9Sstevel@tonic-gate * intermediate lpls that name the leaf as a descendant 2185*7c478bd9Sstevel@tonic-gate */ 2186*7c478bd9Sstevel@tonic-gate 2187*7c478bd9Sstevel@tonic-gate for (j = 0; j <= lgrp_alloc_max; j++) { 2188*7c478bd9Sstevel@tonic-gate klgrpset_t intersect; 2189*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cand; 2190*7c478bd9Sstevel@tonic-gate lpl_t *lpl_cand; 2191*7c478bd9Sstevel@tonic-gate 2192*7c478bd9Sstevel@tonic-gate lgrp_cand = lgrp_table[j]; 2193*7c478bd9Sstevel@tonic-gate intersect = klgrpset_intersects( 2194*7c478bd9Sstevel@tonic-gate lgrp_cand->lgrp_set[LGRP_RSRC_CPU], 2195*7c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset); 2196*7c478bd9Sstevel@tonic-gate 2197*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cand) || 2198*7c478bd9Sstevel@tonic-gate !klgrpset_intersects(lgrp_cand->lgrp_leaves, 2199*7c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset) || 2200*7c478bd9Sstevel@tonic-gate (intersect == 0)) 2201*7c478bd9Sstevel@tonic-gate continue; 2202*7c478bd9Sstevel@tonic-gate 2203*7c478bd9Sstevel@tonic-gate lpl_cand = 2204*7c478bd9Sstevel@tonic-gate &cpupart->cp_lgrploads[lgrp_cand->lgrp_id]; 2205*7c478bd9Sstevel@tonic-gate 2206*7c478bd9Sstevel@tonic-gate if (klgrpset_ismember(intersect, 2207*7c478bd9Sstevel@tonic-gate lgrp->lgrp_id)) { 2208*7c478bd9Sstevel@tonic-gate ASSERT(lpl_rset_contains(lpl_cand, 2209*7c478bd9Sstevel@tonic-gate lpl)); 2210*7c478bd9Sstevel@tonic-gate 2211*7c478bd9Sstevel@tonic-gate if (!lpl_rset_contains(lpl_cand, lpl)) { 2212*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_RSET_MSSNG_LF); 2213*7c478bd9Sstevel@tonic-gate } 2214*7c478bd9Sstevel@tonic-gate } 2215*7c478bd9Sstevel@tonic-gate } 2216*7c478bd9Sstevel@tonic-gate 2217*7c478bd9Sstevel@tonic-gate } else { /* non-leaf specific checks */ 2218*7c478bd9Sstevel@tonic-gate 2219*7c478bd9Sstevel@tonic-gate /* 2220*7c478bd9Sstevel@tonic-gate * Non-leaf lpls should have lpl_cpus == NULL 2221*7c478bd9Sstevel@tonic-gate * verify that this is so 2222*7c478bd9Sstevel@tonic-gate */ 2223*7c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_cpus == NULL); 2224*7c478bd9Sstevel@tonic-gate if (lpl->lpl_cpus != NULL) { 2225*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_NONLEAF_HAS_CPUS); 2226*7c478bd9Sstevel@tonic-gate } 2227*7c478bd9Sstevel@tonic-gate 2228*7c478bd9Sstevel@tonic-gate /* 2229*7c478bd9Sstevel@tonic-gate * verify that the sum of the cpus in the leaf resources 2230*7c478bd9Sstevel@tonic-gate * is equal to the total ncpu in the intermediate 2231*7c478bd9Sstevel@tonic-gate */ 2232*7c478bd9Sstevel@tonic-gate for (j = sum = 0; j < lpl->lpl_nrset; j++) { 2233*7c478bd9Sstevel@tonic-gate sum += lpl->lpl_rset[j]->lpl_ncpu; 2234*7c478bd9Sstevel@tonic-gate } 2235*7c478bd9Sstevel@tonic-gate 2236*7c478bd9Sstevel@tonic-gate ASSERT(sum == lpl->lpl_ncpu); 2237*7c478bd9Sstevel@tonic-gate if (sum != lpl->lpl_ncpu) { 2238*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_LPL_BAD_NCPU); 2239*7c478bd9Sstevel@tonic-gate } 2240*7c478bd9Sstevel@tonic-gate } 2241*7c478bd9Sstevel@tonic-gate 2242*7c478bd9Sstevel@tonic-gate /* 2243*7c478bd9Sstevel@tonic-gate * check on lpl_hint. Don't check root, since it has no parent. 2244*7c478bd9Sstevel@tonic-gate */ 2245*7c478bd9Sstevel@tonic-gate if (lpl->lpl_parent != NULL) { 2246*7c478bd9Sstevel@tonic-gate int hint; 2247*7c478bd9Sstevel@tonic-gate lpl_t *hint_lpl; 2248*7c478bd9Sstevel@tonic-gate 2249*7c478bd9Sstevel@tonic-gate /* make sure hint is within limits of nrset */ 2250*7c478bd9Sstevel@tonic-gate hint = lpl->lpl_hint; 2251*7c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_parent->lpl_nrset >= hint); 2252*7c478bd9Sstevel@tonic-gate if (lpl->lpl_parent->lpl_nrset < hint) { 2253*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_BOGUS_HINT); 2254*7c478bd9Sstevel@tonic-gate } 2255*7c478bd9Sstevel@tonic-gate 2256*7c478bd9Sstevel@tonic-gate /* make sure hint points to valid lpl */ 2257*7c478bd9Sstevel@tonic-gate hint_lpl = lpl->lpl_parent->lpl_rset[hint]; 2258*7c478bd9Sstevel@tonic-gate ASSERT(hint_lpl->lpl_ncpu > 0); 2259*7c478bd9Sstevel@tonic-gate if (hint_lpl->lpl_ncpu <= 0) { 2260*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_BOGUS_HINT); 2261*7c478bd9Sstevel@tonic-gate } 2262*7c478bd9Sstevel@tonic-gate } 2263*7c478bd9Sstevel@tonic-gate 2264*7c478bd9Sstevel@tonic-gate /* 2265*7c478bd9Sstevel@tonic-gate * Check the rset of the lpl in question. Make sure that each 2266*7c478bd9Sstevel@tonic-gate * rset contains a subset of the resources in 2267*7c478bd9Sstevel@tonic-gate * lgrp_set[LGRP_RSRC_CPU] and in cp_lgrpset. This also makes 2268*7c478bd9Sstevel@tonic-gate * sure that each rset doesn't include resources that are 2269*7c478bd9Sstevel@tonic-gate * outside of that set. (Which would be resources somehow not 2270*7c478bd9Sstevel@tonic-gate * accounted for). 2271*7c478bd9Sstevel@tonic-gate */ 2272*7c478bd9Sstevel@tonic-gate 2273*7c478bd9Sstevel@tonic-gate klgrpset_clear(rset); 2274*7c478bd9Sstevel@tonic-gate for (j = 0; j < lpl->lpl_nrset; j++) { 2275*7c478bd9Sstevel@tonic-gate klgrpset_add(rset, lpl->lpl_rset[j]->lpl_lgrpid); 2276*7c478bd9Sstevel@tonic-gate } 2277*7c478bd9Sstevel@tonic-gate klgrpset_copy(cset, rset); 2278*7c478bd9Sstevel@tonic-gate /* make sure lpl rset matches lgrp rset */ 2279*7c478bd9Sstevel@tonic-gate klgrpset_diff(rset, lgrp->lgrp_set[LGRP_RSRC_CPU]); 2280*7c478bd9Sstevel@tonic-gate /* make sure rset is contained with in partition, too */ 2281*7c478bd9Sstevel@tonic-gate klgrpset_diff(cset, cpupart->cp_lgrpset); 2282*7c478bd9Sstevel@tonic-gate 2283*7c478bd9Sstevel@tonic-gate ASSERT(klgrpset_isempty(rset) && 2284*7c478bd9Sstevel@tonic-gate klgrpset_isempty(cset)); 2285*7c478bd9Sstevel@tonic-gate if (!klgrpset_isempty(rset) || 2286*7c478bd9Sstevel@tonic-gate !klgrpset_isempty(cset)) { 2287*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_RSET_MISMATCH); 2288*7c478bd9Sstevel@tonic-gate } 2289*7c478bd9Sstevel@tonic-gate 2290*7c478bd9Sstevel@tonic-gate /* 2291*7c478bd9Sstevel@tonic-gate * check to make sure lpl_nrset matches the number of rsets 2292*7c478bd9Sstevel@tonic-gate * contained in the lpl 2293*7c478bd9Sstevel@tonic-gate */ 2294*7c478bd9Sstevel@tonic-gate 2295*7c478bd9Sstevel@tonic-gate for (j = 0; (lpl->lpl_rset[j] != NULL) && (j < LPL_RSET_MAX); 2296*7c478bd9Sstevel@tonic-gate j++); 2297*7c478bd9Sstevel@tonic-gate 2298*7c478bd9Sstevel@tonic-gate ASSERT(j == lpl->lpl_nrset); 2299*7c478bd9Sstevel@tonic-gate if (j != lpl->lpl_nrset) { 2300*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_BAD_RSETCNT); 2301*7c478bd9Sstevel@tonic-gate } 2302*7c478bd9Sstevel@tonic-gate 2303*7c478bd9Sstevel@tonic-gate } 2304*7c478bd9Sstevel@tonic-gate return (LPL_TOPO_CORRECT); 2305*7c478bd9Sstevel@tonic-gate } 2306*7c478bd9Sstevel@tonic-gate 2307*7c478bd9Sstevel@tonic-gate /* 2308*7c478bd9Sstevel@tonic-gate * Flatten lpl topology to given number of levels. This is presently only 2309*7c478bd9Sstevel@tonic-gate * implemented for a flatten to 2 levels, which will prune out the intermediates 2310*7c478bd9Sstevel@tonic-gate * and home the leaf lpls to the root lpl. 2311*7c478bd9Sstevel@tonic-gate */ 2312*7c478bd9Sstevel@tonic-gate int 2313*7c478bd9Sstevel@tonic-gate lpl_topo_flatten(int levels) 2314*7c478bd9Sstevel@tonic-gate { 2315*7c478bd9Sstevel@tonic-gate int i; 2316*7c478bd9Sstevel@tonic-gate uint_t sum; 2317*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur; 2318*7c478bd9Sstevel@tonic-gate lpl_t *lpl_cur; 2319*7c478bd9Sstevel@tonic-gate lpl_t *lpl_root; 2320*7c478bd9Sstevel@tonic-gate cpupart_t *cp; 2321*7c478bd9Sstevel@tonic-gate 2322*7c478bd9Sstevel@tonic-gate if (levels != 2) 2323*7c478bd9Sstevel@tonic-gate return (0); 2324*7c478bd9Sstevel@tonic-gate 2325*7c478bd9Sstevel@tonic-gate /* called w/ cpus paused - grab no locks! */ 2326*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 || 2327*7c478bd9Sstevel@tonic-gate !lgrp_initialized); 2328*7c478bd9Sstevel@tonic-gate 2329*7c478bd9Sstevel@tonic-gate cp = cp_list_head; 2330*7c478bd9Sstevel@tonic-gate do { 2331*7c478bd9Sstevel@tonic-gate lpl_root = &cp->cp_lgrploads[lgrp_root->lgrp_id]; 2332*7c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_root) && (lpl_root->lpl_ncpu > 0)); 2333*7c478bd9Sstevel@tonic-gate 2334*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 2335*7c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i]; 2336*7c478bd9Sstevel@tonic-gate lpl_cur = &cp->cp_lgrploads[i]; 2337*7c478bd9Sstevel@tonic-gate 2338*7c478bd9Sstevel@tonic-gate if ((lgrp_cur == lgrp_root) || 2339*7c478bd9Sstevel@tonic-gate (!LGRP_EXISTS(lgrp_cur) && 2340*7c478bd9Sstevel@tonic-gate (lpl_cur->lpl_ncpu == 0))) 2341*7c478bd9Sstevel@tonic-gate continue; 2342*7c478bd9Sstevel@tonic-gate 2343*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur) && (lpl_cur->lpl_ncpu > 0)) { 2344*7c478bd9Sstevel@tonic-gate /* 2345*7c478bd9Sstevel@tonic-gate * this should be a deleted intermediate, so 2346*7c478bd9Sstevel@tonic-gate * clear it 2347*7c478bd9Sstevel@tonic-gate */ 2348*7c478bd9Sstevel@tonic-gate lpl_clear(lpl_cur); 2349*7c478bd9Sstevel@tonic-gate } else if ((lpl_cur->lpl_nrset == 1) && 2350*7c478bd9Sstevel@tonic-gate (lpl_cur->lpl_rset[0] == lpl_cur) && 2351*7c478bd9Sstevel@tonic-gate ((lpl_cur->lpl_parent->lpl_ncpu == 0) || 2352*7c478bd9Sstevel@tonic-gate (!LGRP_EXISTS(lpl_cur->lpl_parent->lpl_lgrp)))) { 2353*7c478bd9Sstevel@tonic-gate /* 2354*7c478bd9Sstevel@tonic-gate * this is a leaf whose parent was deleted, or 2355*7c478bd9Sstevel@tonic-gate * whose parent had their lgrp deleted. (And 2356*7c478bd9Sstevel@tonic-gate * whose parent will soon be deleted). Point 2357*7c478bd9Sstevel@tonic-gate * this guy back to the root lpl. 2358*7c478bd9Sstevel@tonic-gate */ 2359*7c478bd9Sstevel@tonic-gate lpl_cur->lpl_parent = lpl_root; 2360*7c478bd9Sstevel@tonic-gate lpl_rset_add(lpl_root, lpl_cur); 2361*7c478bd9Sstevel@tonic-gate } 2362*7c478bd9Sstevel@tonic-gate 2363*7c478bd9Sstevel@tonic-gate } 2364*7c478bd9Sstevel@tonic-gate 2365*7c478bd9Sstevel@tonic-gate /* 2366*7c478bd9Sstevel@tonic-gate * Now that we're done, make sure the count on the root lpl is 2367*7c478bd9Sstevel@tonic-gate * correct, and update the hints of the children for the sake of 2368*7c478bd9Sstevel@tonic-gate * thoroughness 2369*7c478bd9Sstevel@tonic-gate */ 2370*7c478bd9Sstevel@tonic-gate for (i = sum = 0; i < lpl_root->lpl_nrset; i++) { 2371*7c478bd9Sstevel@tonic-gate sum += lpl_root->lpl_rset[i]->lpl_ncpu; 2372*7c478bd9Sstevel@tonic-gate } 2373*7c478bd9Sstevel@tonic-gate lpl_root->lpl_ncpu = sum; 2374*7c478bd9Sstevel@tonic-gate lpl_child_update(lpl_root, cp); 2375*7c478bd9Sstevel@tonic-gate 2376*7c478bd9Sstevel@tonic-gate cp = cp->cp_next; 2377*7c478bd9Sstevel@tonic-gate } while (cp != cp_list_head); 2378*7c478bd9Sstevel@tonic-gate 2379*7c478bd9Sstevel@tonic-gate return (levels); 2380*7c478bd9Sstevel@tonic-gate } 2381*7c478bd9Sstevel@tonic-gate 2382*7c478bd9Sstevel@tonic-gate /* 2383*7c478bd9Sstevel@tonic-gate * Insert a lpl into the resource hierarchy and create any additional lpls that 2384*7c478bd9Sstevel@tonic-gate * are necessary to represent the varying states of locality for the cpu 2385*7c478bd9Sstevel@tonic-gate * resoruces newly added to the partition. 2386*7c478bd9Sstevel@tonic-gate * 2387*7c478bd9Sstevel@tonic-gate * This routine is clever enough that it can correctly add resources from the 2388*7c478bd9Sstevel@tonic-gate * new leaf into both direct and indirect resource sets in the hierarchy. (Ie, 2389*7c478bd9Sstevel@tonic-gate * those for which the lpl is a leaf as opposed to simply a named equally local 2390*7c478bd9Sstevel@tonic-gate * resource). The one special case that needs additional processing is when a 2391*7c478bd9Sstevel@tonic-gate * new intermediate lpl is introduced. Since the main loop only traverses 2392*7c478bd9Sstevel@tonic-gate * looking to add the leaf resource where it does not yet exist, additional work 2393*7c478bd9Sstevel@tonic-gate * is necessary to add other leaf resources that may need to exist in the newly 2394*7c478bd9Sstevel@tonic-gate * created intermediate. This is performed by the second inner loop, and is 2395*7c478bd9Sstevel@tonic-gate * only done when the check for more than one overlapping resource succeeds. 2396*7c478bd9Sstevel@tonic-gate */ 2397*7c478bd9Sstevel@tonic-gate 2398*7c478bd9Sstevel@tonic-gate void 2399*7c478bd9Sstevel@tonic-gate lpl_leaf_insert(lpl_t *lpl_leaf, cpupart_t *cpupart) 2400*7c478bd9Sstevel@tonic-gate { 2401*7c478bd9Sstevel@tonic-gate int i; 2402*7c478bd9Sstevel@tonic-gate int j; 2403*7c478bd9Sstevel@tonic-gate int hint; 2404*7c478bd9Sstevel@tonic-gate int rset_num_intersect; 2405*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur; 2406*7c478bd9Sstevel@tonic-gate lpl_t *lpl_cur; 2407*7c478bd9Sstevel@tonic-gate lpl_t *lpl_parent; 2408*7c478bd9Sstevel@tonic-gate lgrpid_t parent_id; 2409*7c478bd9Sstevel@tonic-gate klgrpset_t rset_intersect; /* resources in cpupart and lgrp */ 2410*7c478bd9Sstevel@tonic-gate 2411*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 2412*7c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i]; 2413*7c478bd9Sstevel@tonic-gate 2414*7c478bd9Sstevel@tonic-gate /* 2415*7c478bd9Sstevel@tonic-gate * Don't insert if the lgrp isn't there, if the leaf isn't 2416*7c478bd9Sstevel@tonic-gate * contained within the current lgrp, or if the current lgrp has 2417*7c478bd9Sstevel@tonic-gate * no leaves in this partition 2418*7c478bd9Sstevel@tonic-gate */ 2419*7c478bd9Sstevel@tonic-gate 2420*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur) || 2421*7c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp_cur->lgrp_set[LGRP_RSRC_CPU], 2422*7c478bd9Sstevel@tonic-gate lpl_leaf->lpl_lgrpid) || 2423*7c478bd9Sstevel@tonic-gate !klgrpset_intersects(lgrp_cur->lgrp_leaves, 2424*7c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset)) 2425*7c478bd9Sstevel@tonic-gate continue; 2426*7c478bd9Sstevel@tonic-gate 2427*7c478bd9Sstevel@tonic-gate lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id]; 2428*7c478bd9Sstevel@tonic-gate if (lgrp_cur->lgrp_parent != NULL) { 2429*7c478bd9Sstevel@tonic-gate /* if lgrp has a parent, assign it properly */ 2430*7c478bd9Sstevel@tonic-gate parent_id = lgrp_cur->lgrp_parent->lgrp_id; 2431*7c478bd9Sstevel@tonic-gate lpl_parent = &cpupart->cp_lgrploads[parent_id]; 2432*7c478bd9Sstevel@tonic-gate } else { 2433*7c478bd9Sstevel@tonic-gate /* if not, make sure parent ptr gets set to null */ 2434*7c478bd9Sstevel@tonic-gate lpl_parent = NULL; 2435*7c478bd9Sstevel@tonic-gate } 2436*7c478bd9Sstevel@tonic-gate 2437*7c478bd9Sstevel@tonic-gate if (lpl_cur == lpl_leaf) { 2438*7c478bd9Sstevel@tonic-gate /* 2439*7c478bd9Sstevel@tonic-gate * Almost all leaf state was initialized elsewhere. The 2440*7c478bd9Sstevel@tonic-gate * only thing left to do is to set the parent. 2441*7c478bd9Sstevel@tonic-gate */ 2442*7c478bd9Sstevel@tonic-gate lpl_cur->lpl_parent = lpl_parent; 2443*7c478bd9Sstevel@tonic-gate continue; 2444*7c478bd9Sstevel@tonic-gate } 2445*7c478bd9Sstevel@tonic-gate 2446*7c478bd9Sstevel@tonic-gate /* 2447*7c478bd9Sstevel@tonic-gate * Initialize intermediate lpl 2448*7c478bd9Sstevel@tonic-gate * Save this lpl's hint though. Since we're changing this 2449*7c478bd9Sstevel@tonic-gate * lpl's resources, we need to update the hint in this lpl's 2450*7c478bd9Sstevel@tonic-gate * children, but the hint in this lpl is unaffected and 2451*7c478bd9Sstevel@tonic-gate * should be preserved. 2452*7c478bd9Sstevel@tonic-gate */ 2453*7c478bd9Sstevel@tonic-gate hint = lpl_cur->lpl_hint; 2454*7c478bd9Sstevel@tonic-gate 2455*7c478bd9Sstevel@tonic-gate lpl_clear(lpl_cur); 2456*7c478bd9Sstevel@tonic-gate lpl_init(lpl_cur, lpl_leaf, lgrp_cur); 2457*7c478bd9Sstevel@tonic-gate 2458*7c478bd9Sstevel@tonic-gate lpl_cur->lpl_hint = hint; 2459*7c478bd9Sstevel@tonic-gate lpl_cur->lpl_parent = lpl_parent; 2460*7c478bd9Sstevel@tonic-gate 2461*7c478bd9Sstevel@tonic-gate /* does new lpl need to be populated with other resources? */ 2462*7c478bd9Sstevel@tonic-gate rset_intersect = 2463*7c478bd9Sstevel@tonic-gate klgrpset_intersects(lgrp_cur->lgrp_set[LGRP_RSRC_CPU], 2464*7c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset); 2465*7c478bd9Sstevel@tonic-gate klgrpset_nlgrps(rset_intersect, rset_num_intersect); 2466*7c478bd9Sstevel@tonic-gate 2467*7c478bd9Sstevel@tonic-gate if (rset_num_intersect > 1) { 2468*7c478bd9Sstevel@tonic-gate /* 2469*7c478bd9Sstevel@tonic-gate * If so, figure out what lpls have resources that 2470*7c478bd9Sstevel@tonic-gate * intersect this one, and add them. 2471*7c478bd9Sstevel@tonic-gate */ 2472*7c478bd9Sstevel@tonic-gate for (j = 0; j <= lgrp_alloc_max; j++) { 2473*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cand; /* candidate lgrp */ 2474*7c478bd9Sstevel@tonic-gate lpl_t *lpl_cand; /* candidate lpl */ 2475*7c478bd9Sstevel@tonic-gate 2476*7c478bd9Sstevel@tonic-gate lgrp_cand = lgrp_table[j]; 2477*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cand) || 2478*7c478bd9Sstevel@tonic-gate !klgrpset_ismember(rset_intersect, 2479*7c478bd9Sstevel@tonic-gate lgrp_cand->lgrp_id)) 2480*7c478bd9Sstevel@tonic-gate continue; 2481*7c478bd9Sstevel@tonic-gate lpl_cand = 2482*7c478bd9Sstevel@tonic-gate &cpupart->cp_lgrploads[lgrp_cand->lgrp_id]; 2483*7c478bd9Sstevel@tonic-gate lpl_rset_add(lpl_cur, lpl_cand); 2484*7c478bd9Sstevel@tonic-gate } 2485*7c478bd9Sstevel@tonic-gate } 2486*7c478bd9Sstevel@tonic-gate /* 2487*7c478bd9Sstevel@tonic-gate * This lpl's rset has changed. Update the hint in it's 2488*7c478bd9Sstevel@tonic-gate * children. 2489*7c478bd9Sstevel@tonic-gate */ 2490*7c478bd9Sstevel@tonic-gate lpl_child_update(lpl_cur, cpupart); 2491*7c478bd9Sstevel@tonic-gate } 2492*7c478bd9Sstevel@tonic-gate } 2493*7c478bd9Sstevel@tonic-gate 2494*7c478bd9Sstevel@tonic-gate /* 2495*7c478bd9Sstevel@tonic-gate * remove a lpl from the hierarchy of resources, clearing its state when 2496*7c478bd9Sstevel@tonic-gate * finished. If the lpls at the intermediate levels of the hierarchy have no 2497*7c478bd9Sstevel@tonic-gate * remaining resources, or no longer name a leaf resource in the cpu-partition, 2498*7c478bd9Sstevel@tonic-gate * delete them as well. 2499*7c478bd9Sstevel@tonic-gate */ 2500*7c478bd9Sstevel@tonic-gate 2501*7c478bd9Sstevel@tonic-gate void 2502*7c478bd9Sstevel@tonic-gate lpl_leaf_remove(lpl_t *lpl_leaf, cpupart_t *cpupart) 2503*7c478bd9Sstevel@tonic-gate { 2504*7c478bd9Sstevel@tonic-gate int i; 2505*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur; 2506*7c478bd9Sstevel@tonic-gate lpl_t *lpl_cur; 2507*7c478bd9Sstevel@tonic-gate klgrpset_t leaf_intersect; /* intersection of leaves */ 2508*7c478bd9Sstevel@tonic-gate 2509*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 2510*7c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i]; 2511*7c478bd9Sstevel@tonic-gate 2512*7c478bd9Sstevel@tonic-gate /* 2513*7c478bd9Sstevel@tonic-gate * Don't attempt to remove from lgrps that aren't there, that 2514*7c478bd9Sstevel@tonic-gate * don't contain our leaf, or from the leaf itself. (We do that 2515*7c478bd9Sstevel@tonic-gate * later) 2516*7c478bd9Sstevel@tonic-gate */ 2517*7c478bd9Sstevel@tonic-gate 2518*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur)) 2519*7c478bd9Sstevel@tonic-gate continue; 2520*7c478bd9Sstevel@tonic-gate 2521*7c478bd9Sstevel@tonic-gate lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id]; 2522*7c478bd9Sstevel@tonic-gate 2523*7c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrp_cur->lgrp_set[LGRP_RSRC_CPU], 2524*7c478bd9Sstevel@tonic-gate lpl_leaf->lpl_lgrpid) || 2525*7c478bd9Sstevel@tonic-gate (lpl_cur == lpl_leaf)) { 2526*7c478bd9Sstevel@tonic-gate continue; 2527*7c478bd9Sstevel@tonic-gate } 2528*7c478bd9Sstevel@tonic-gate 2529*7c478bd9Sstevel@tonic-gate /* 2530*7c478bd9Sstevel@tonic-gate * This is a slightly sleazy simplification in that we have 2531*7c478bd9Sstevel@tonic-gate * already marked the cp_lgrpset as no longer containing the 2532*7c478bd9Sstevel@tonic-gate * leaf we've deleted. Any lpls that pass the above checks 2533*7c478bd9Sstevel@tonic-gate * based upon lgrp membership but not necessarily cpu-part 2534*7c478bd9Sstevel@tonic-gate * membership also get cleared by the checks below. Currently 2535*7c478bd9Sstevel@tonic-gate * this is harmless, as the lpls should be empty anyway. 2536*7c478bd9Sstevel@tonic-gate * 2537*7c478bd9Sstevel@tonic-gate * In particular, we want to preserve lpls that have additional 2538*7c478bd9Sstevel@tonic-gate * leaf resources, even though we don't yet have a processor 2539*7c478bd9Sstevel@tonic-gate * architecture that represents resources this way. 2540*7c478bd9Sstevel@tonic-gate */ 2541*7c478bd9Sstevel@tonic-gate 2542*7c478bd9Sstevel@tonic-gate leaf_intersect = klgrpset_intersects(lgrp_cur->lgrp_leaves, 2543*7c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset); 2544*7c478bd9Sstevel@tonic-gate 2545*7c478bd9Sstevel@tonic-gate lpl_rset_del(lpl_cur, lpl_leaf); 2546*7c478bd9Sstevel@tonic-gate if ((lpl_cur->lpl_nrset == 0) || (!leaf_intersect)) { 2547*7c478bd9Sstevel@tonic-gate lpl_clear(lpl_cur); 2548*7c478bd9Sstevel@tonic-gate } else { 2549*7c478bd9Sstevel@tonic-gate /* 2550*7c478bd9Sstevel@tonic-gate * Update this lpl's children 2551*7c478bd9Sstevel@tonic-gate */ 2552*7c478bd9Sstevel@tonic-gate lpl_child_update(lpl_cur, cpupart); 2553*7c478bd9Sstevel@tonic-gate } 2554*7c478bd9Sstevel@tonic-gate } 2555*7c478bd9Sstevel@tonic-gate lpl_clear(lpl_leaf); 2556*7c478bd9Sstevel@tonic-gate } 2557*7c478bd9Sstevel@tonic-gate 2558*7c478bd9Sstevel@tonic-gate /* 2559*7c478bd9Sstevel@tonic-gate * add a cpu to a partition in terms of lgrp load avg bookeeping 2560*7c478bd9Sstevel@tonic-gate * 2561*7c478bd9Sstevel@tonic-gate * The lpl (cpu partition load average information) is now arranged in a 2562*7c478bd9Sstevel@tonic-gate * hierarchical fashion whereby resources that are closest, ie. most local, to 2563*7c478bd9Sstevel@tonic-gate * the cpu in question are considered to be leaves in a tree of resources. 2564*7c478bd9Sstevel@tonic-gate * There are two general cases for cpu additon: 2565*7c478bd9Sstevel@tonic-gate * 2566*7c478bd9Sstevel@tonic-gate * 1. A lpl structure that contains resources already in the hierarchy tree. 2567*7c478bd9Sstevel@tonic-gate * In this case, all of the associated lpl relationships have been defined, and 2568*7c478bd9Sstevel@tonic-gate * all that is necessary is that we link the new cpu into the per-lpl list of 2569*7c478bd9Sstevel@tonic-gate * cpus, and increment the ncpu count of all places where this cpu resource will 2570*7c478bd9Sstevel@tonic-gate * be accounted for. lpl_cpu_adjcnt updates the cpu count, and the cpu pointer 2571*7c478bd9Sstevel@tonic-gate * pushing is accomplished by this routine. 2572*7c478bd9Sstevel@tonic-gate * 2573*7c478bd9Sstevel@tonic-gate * 2. The lpl to contain the resources in this cpu-partition for this lgrp does 2574*7c478bd9Sstevel@tonic-gate * not exist yet. In this case, it is necessary to build the leaf lpl, and 2575*7c478bd9Sstevel@tonic-gate * construct the hierarchy of state necessary to name it's more distant 2576*7c478bd9Sstevel@tonic-gate * resources, if they should exist. The leaf structure is initialized by this 2577*7c478bd9Sstevel@tonic-gate * routine, as is the cpu-partition state for the lgrp membership. This routine 2578*7c478bd9Sstevel@tonic-gate * also calls lpl_leaf_insert() which inserts the named lpl into the hierarchy 2579*7c478bd9Sstevel@tonic-gate * and builds all of the "ancestoral" state necessary to identify resources at 2580*7c478bd9Sstevel@tonic-gate * differing levels of locality. 2581*7c478bd9Sstevel@tonic-gate */ 2582*7c478bd9Sstevel@tonic-gate void 2583*7c478bd9Sstevel@tonic-gate lgrp_part_add_cpu(cpu_t *cp, lgrp_id_t lgrpid) 2584*7c478bd9Sstevel@tonic-gate { 2585*7c478bd9Sstevel@tonic-gate cpupart_t *cpupart; 2586*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_leaf; 2587*7c478bd9Sstevel@tonic-gate lpl_t *lpl_leaf; 2588*7c478bd9Sstevel@tonic-gate 2589*7c478bd9Sstevel@tonic-gate /* called sometimes w/ cpus paused - grab no locks */ 2590*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized); 2591*7c478bd9Sstevel@tonic-gate 2592*7c478bd9Sstevel@tonic-gate cpupart = cp->cpu_part; 2593*7c478bd9Sstevel@tonic-gate lgrp_leaf = lgrp_table[lgrpid]; 2594*7c478bd9Sstevel@tonic-gate 2595*7c478bd9Sstevel@tonic-gate /* don't add non-existent lgrp */ 2596*7c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_leaf)); 2597*7c478bd9Sstevel@tonic-gate lpl_leaf = &cpupart->cp_lgrploads[lgrpid]; 2598*7c478bd9Sstevel@tonic-gate cp->cpu_lpl = lpl_leaf; 2599*7c478bd9Sstevel@tonic-gate 2600*7c478bd9Sstevel@tonic-gate /* only leaf lpls contain cpus */ 2601*7c478bd9Sstevel@tonic-gate 2602*7c478bd9Sstevel@tonic-gate if (lpl_leaf->lpl_ncpu++ == 0) { 2603*7c478bd9Sstevel@tonic-gate lpl_init(lpl_leaf, lpl_leaf, lgrp_leaf); 2604*7c478bd9Sstevel@tonic-gate klgrpset_add(cpupart->cp_lgrpset, lgrpid); 2605*7c478bd9Sstevel@tonic-gate lpl_leaf_insert(lpl_leaf, cpupart); 2606*7c478bd9Sstevel@tonic-gate } else { 2607*7c478bd9Sstevel@tonic-gate /* 2608*7c478bd9Sstevel@tonic-gate * the lpl should already exist in the parent, so just update 2609*7c478bd9Sstevel@tonic-gate * the count of available CPUs 2610*7c478bd9Sstevel@tonic-gate */ 2611*7c478bd9Sstevel@tonic-gate lpl_cpu_adjcnt(LPL_INCREMENT, cp); 2612*7c478bd9Sstevel@tonic-gate } 2613*7c478bd9Sstevel@tonic-gate 2614*7c478bd9Sstevel@tonic-gate /* link cpu into list of cpus in lpl */ 2615*7c478bd9Sstevel@tonic-gate 2616*7c478bd9Sstevel@tonic-gate if (lpl_leaf->lpl_cpus) { 2617*7c478bd9Sstevel@tonic-gate cp->cpu_next_lpl = lpl_leaf->lpl_cpus; 2618*7c478bd9Sstevel@tonic-gate cp->cpu_prev_lpl = lpl_leaf->lpl_cpus->cpu_prev_lpl; 2619*7c478bd9Sstevel@tonic-gate lpl_leaf->lpl_cpus->cpu_prev_lpl->cpu_next_lpl = cp; 2620*7c478bd9Sstevel@tonic-gate lpl_leaf->lpl_cpus->cpu_prev_lpl = cp; 2621*7c478bd9Sstevel@tonic-gate } else { 2622*7c478bd9Sstevel@tonic-gate /* 2623*7c478bd9Sstevel@tonic-gate * We increment ncpu immediately after we create a new leaf 2624*7c478bd9Sstevel@tonic-gate * lpl, so assert that ncpu == 1 for the case where we don't 2625*7c478bd9Sstevel@tonic-gate * have any cpu pointers yet. 2626*7c478bd9Sstevel@tonic-gate */ 2627*7c478bd9Sstevel@tonic-gate ASSERT(lpl_leaf->lpl_ncpu == 1); 2628*7c478bd9Sstevel@tonic-gate lpl_leaf->lpl_cpus = cp->cpu_next_lpl = cp->cpu_prev_lpl = cp; 2629*7c478bd9Sstevel@tonic-gate } 2630*7c478bd9Sstevel@tonic-gate 2631*7c478bd9Sstevel@tonic-gate } 2632*7c478bd9Sstevel@tonic-gate 2633*7c478bd9Sstevel@tonic-gate 2634*7c478bd9Sstevel@tonic-gate /* 2635*7c478bd9Sstevel@tonic-gate * remove a cpu from a partition in terms of lgrp load avg bookeeping 2636*7c478bd9Sstevel@tonic-gate * 2637*7c478bd9Sstevel@tonic-gate * The lpl (cpu partition load average information) is now arranged in a 2638*7c478bd9Sstevel@tonic-gate * hierarchical fashion whereby resources that are closest, ie. most local, to 2639*7c478bd9Sstevel@tonic-gate * the cpu in question are considered to be leaves in a tree of resources. 2640*7c478bd9Sstevel@tonic-gate * There are two removal cases in question: 2641*7c478bd9Sstevel@tonic-gate * 2642*7c478bd9Sstevel@tonic-gate * 1. Removal of the resource in the leaf leaves other resources remaining in 2643*7c478bd9Sstevel@tonic-gate * that leaf. (Another cpu still exists at this level of locality). In this 2644*7c478bd9Sstevel@tonic-gate * case, the count of available cpus is decremented in all assocated lpls by 2645*7c478bd9Sstevel@tonic-gate * calling lpl_adj_cpucnt(), and the pointer to the removed cpu is pruned 2646*7c478bd9Sstevel@tonic-gate * from the per-cpu lpl list. 2647*7c478bd9Sstevel@tonic-gate * 2648*7c478bd9Sstevel@tonic-gate * 2. Removal of the resource results in the lpl containing no resources. (It's 2649*7c478bd9Sstevel@tonic-gate * empty) In this case, all of what has occurred for the first step must take 2650*7c478bd9Sstevel@tonic-gate * place; however, additionally we must remove the lpl structure itself, prune 2651*7c478bd9Sstevel@tonic-gate * out any stranded lpls that do not directly name a leaf resource, and mark the 2652*7c478bd9Sstevel@tonic-gate * cpu partition in question as no longer containing resources from the lgrp of 2653*7c478bd9Sstevel@tonic-gate * the lpl that has been delted. Cpu-partition changes are handled by this 2654*7c478bd9Sstevel@tonic-gate * method, but the lpl_leaf_remove function deals with the details of pruning 2655*7c478bd9Sstevel@tonic-gate * out the empty lpl and any of its orphaned direct ancestors. 2656*7c478bd9Sstevel@tonic-gate */ 2657*7c478bd9Sstevel@tonic-gate void 2658*7c478bd9Sstevel@tonic-gate lgrp_part_del_cpu(cpu_t *cp) 2659*7c478bd9Sstevel@tonic-gate { 2660*7c478bd9Sstevel@tonic-gate lpl_t *lpl; 2661*7c478bd9Sstevel@tonic-gate lpl_t *leaf_lpl; 2662*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp_leaf; 2663*7c478bd9Sstevel@tonic-gate 2664*7c478bd9Sstevel@tonic-gate /* called sometimes w/ cpus paused - grab no locks */ 2665*7c478bd9Sstevel@tonic-gate 2666*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized); 2667*7c478bd9Sstevel@tonic-gate 2668*7c478bd9Sstevel@tonic-gate lpl = leaf_lpl = cp->cpu_lpl; 2669*7c478bd9Sstevel@tonic-gate lgrp_leaf = leaf_lpl->lpl_lgrp; 2670*7c478bd9Sstevel@tonic-gate 2671*7c478bd9Sstevel@tonic-gate /* don't delete a leaf that isn't there */ 2672*7c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_leaf)); 2673*7c478bd9Sstevel@tonic-gate 2674*7c478bd9Sstevel@tonic-gate /* no double-deletes */ 2675*7c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu); 2676*7c478bd9Sstevel@tonic-gate if (--lpl->lpl_ncpu == 0) { 2677*7c478bd9Sstevel@tonic-gate /* 2678*7c478bd9Sstevel@tonic-gate * This was the last cpu in this lgroup for this partition, 2679*7c478bd9Sstevel@tonic-gate * clear its bit in the partition's lgroup bitmask 2680*7c478bd9Sstevel@tonic-gate */ 2681*7c478bd9Sstevel@tonic-gate klgrpset_del(cp->cpu_part->cp_lgrpset, lpl->lpl_lgrpid); 2682*7c478bd9Sstevel@tonic-gate 2683*7c478bd9Sstevel@tonic-gate /* eliminate remaning lpl link pointers in cpu, lpl */ 2684*7c478bd9Sstevel@tonic-gate lpl->lpl_cpus = cp->cpu_next_lpl = cp->cpu_prev_lpl = NULL; 2685*7c478bd9Sstevel@tonic-gate 2686*7c478bd9Sstevel@tonic-gate lpl_leaf_remove(leaf_lpl, cp->cpu_part); 2687*7c478bd9Sstevel@tonic-gate } else { 2688*7c478bd9Sstevel@tonic-gate 2689*7c478bd9Sstevel@tonic-gate /* unlink cpu from lists of cpus in lpl */ 2690*7c478bd9Sstevel@tonic-gate cp->cpu_prev_lpl->cpu_next_lpl = cp->cpu_next_lpl; 2691*7c478bd9Sstevel@tonic-gate cp->cpu_next_lpl->cpu_prev_lpl = cp->cpu_prev_lpl; 2692*7c478bd9Sstevel@tonic-gate if (lpl->lpl_cpus == cp) { 2693*7c478bd9Sstevel@tonic-gate lpl->lpl_cpus = cp->cpu_next_lpl; 2694*7c478bd9Sstevel@tonic-gate } 2695*7c478bd9Sstevel@tonic-gate 2696*7c478bd9Sstevel@tonic-gate /* 2697*7c478bd9Sstevel@tonic-gate * Update the cpu count in the lpls associated with parent 2698*7c478bd9Sstevel@tonic-gate * lgroups. 2699*7c478bd9Sstevel@tonic-gate */ 2700*7c478bd9Sstevel@tonic-gate lpl_cpu_adjcnt(LPL_DECREMENT, cp); 2701*7c478bd9Sstevel@tonic-gate 2702*7c478bd9Sstevel@tonic-gate } 2703*7c478bd9Sstevel@tonic-gate /* clear cpu's lpl ptr when we're all done */ 2704*7c478bd9Sstevel@tonic-gate cp->cpu_lpl = NULL; 2705*7c478bd9Sstevel@tonic-gate } 2706*7c478bd9Sstevel@tonic-gate 2707*7c478bd9Sstevel@tonic-gate /* 2708*7c478bd9Sstevel@tonic-gate * Recompute load average for the specified partition/lgrp fragment. 2709*7c478bd9Sstevel@tonic-gate * 2710*7c478bd9Sstevel@tonic-gate * We rely on the fact that this routine is called from the clock thread 2711*7c478bd9Sstevel@tonic-gate * at a point before the clock thread can block (i.e. before its first 2712*7c478bd9Sstevel@tonic-gate * lock request). Since the clock thread can not be preempted (since it 2713*7c478bd9Sstevel@tonic-gate * runs at highest priority), we know that cpu partitions can not change 2714*7c478bd9Sstevel@tonic-gate * (since doing so would require either the repartition requester or the 2715*7c478bd9Sstevel@tonic-gate * cpu_pause thread to run on this cpu), so we can update the cpu's load 2716*7c478bd9Sstevel@tonic-gate * without grabbing cpu_lock. 2717*7c478bd9Sstevel@tonic-gate */ 2718*7c478bd9Sstevel@tonic-gate void 2719*7c478bd9Sstevel@tonic-gate lgrp_loadavg(lpl_t *lpl, uint_t nrcpus, int ageflag) 2720*7c478bd9Sstevel@tonic-gate { 2721*7c478bd9Sstevel@tonic-gate uint_t ncpu; 2722*7c478bd9Sstevel@tonic-gate int64_t old, new, f; 2723*7c478bd9Sstevel@tonic-gate 2724*7c478bd9Sstevel@tonic-gate /* 2725*7c478bd9Sstevel@tonic-gate * 1 - exp(-1/(20 * ncpu)) << 13 = 400 for 1 cpu... 2726*7c478bd9Sstevel@tonic-gate */ 2727*7c478bd9Sstevel@tonic-gate static short expval[] = { 2728*7c478bd9Sstevel@tonic-gate 0, 3196, 1618, 1083, 2729*7c478bd9Sstevel@tonic-gate 814, 652, 543, 466, 2730*7c478bd9Sstevel@tonic-gate 408, 363, 326, 297, 2731*7c478bd9Sstevel@tonic-gate 272, 251, 233, 218, 2732*7c478bd9Sstevel@tonic-gate 204, 192, 181, 172, 2733*7c478bd9Sstevel@tonic-gate 163, 155, 148, 142, 2734*7c478bd9Sstevel@tonic-gate 136, 130, 125, 121, 2735*7c478bd9Sstevel@tonic-gate 116, 112, 109, 105 2736*7c478bd9Sstevel@tonic-gate }; 2737*7c478bd9Sstevel@tonic-gate 2738*7c478bd9Sstevel@tonic-gate /* ASSERT (called from clock level) */ 2739*7c478bd9Sstevel@tonic-gate 2740*7c478bd9Sstevel@tonic-gate if ((lpl == NULL) || /* we're booting - this is easiest for now */ 2741*7c478bd9Sstevel@tonic-gate ((ncpu = lpl->lpl_ncpu) == 0)) { 2742*7c478bd9Sstevel@tonic-gate return; 2743*7c478bd9Sstevel@tonic-gate } 2744*7c478bd9Sstevel@tonic-gate 2745*7c478bd9Sstevel@tonic-gate for (;;) { 2746*7c478bd9Sstevel@tonic-gate 2747*7c478bd9Sstevel@tonic-gate if (ncpu >= sizeof (expval) / sizeof (expval[0])) 2748*7c478bd9Sstevel@tonic-gate f = expval[1]/ncpu; /* good approx. for large ncpu */ 2749*7c478bd9Sstevel@tonic-gate else 2750*7c478bd9Sstevel@tonic-gate f = expval[ncpu]; 2751*7c478bd9Sstevel@tonic-gate 2752*7c478bd9Sstevel@tonic-gate /* 2753*7c478bd9Sstevel@tonic-gate * Modify the load average atomically to avoid losing 2754*7c478bd9Sstevel@tonic-gate * anticipatory load updates (see lgrp_move_thread()). 2755*7c478bd9Sstevel@tonic-gate */ 2756*7c478bd9Sstevel@tonic-gate if (ageflag) { 2757*7c478bd9Sstevel@tonic-gate /* 2758*7c478bd9Sstevel@tonic-gate * We're supposed to both update and age the load. 2759*7c478bd9Sstevel@tonic-gate * This happens 10 times/sec. per cpu. We do a 2760*7c478bd9Sstevel@tonic-gate * little hoop-jumping to avoid integer overflow. 2761*7c478bd9Sstevel@tonic-gate */ 2762*7c478bd9Sstevel@tonic-gate int64_t q, r; 2763*7c478bd9Sstevel@tonic-gate 2764*7c478bd9Sstevel@tonic-gate do { 2765*7c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg; 2766*7c478bd9Sstevel@tonic-gate q = (old >> 16) << 7; 2767*7c478bd9Sstevel@tonic-gate r = (old & 0xffff) << 7; 2768*7c478bd9Sstevel@tonic-gate new += ((long long)(nrcpus - q) * f - 2769*7c478bd9Sstevel@tonic-gate ((r * f) >> 16)) >> 7; 2770*7c478bd9Sstevel@tonic-gate 2771*7c478bd9Sstevel@tonic-gate /* 2772*7c478bd9Sstevel@tonic-gate * Check for overflow 2773*7c478bd9Sstevel@tonic-gate */ 2774*7c478bd9Sstevel@tonic-gate if (new > LGRP_LOADAVG_MAX) 2775*7c478bd9Sstevel@tonic-gate new = LGRP_LOADAVG_MAX; 2776*7c478bd9Sstevel@tonic-gate else if (new < 0) 2777*7c478bd9Sstevel@tonic-gate new = 0; 2778*7c478bd9Sstevel@tonic-gate } while (cas32((lgrp_load_t *)&lpl->lpl_loadavg, old, 2779*7c478bd9Sstevel@tonic-gate new) != old); 2780*7c478bd9Sstevel@tonic-gate } else { 2781*7c478bd9Sstevel@tonic-gate /* 2782*7c478bd9Sstevel@tonic-gate * We're supposed to update the load, but not age it. 2783*7c478bd9Sstevel@tonic-gate * This option is used to update the load (which either 2784*7c478bd9Sstevel@tonic-gate * has already been aged in this 1/10 sec. interval or 2785*7c478bd9Sstevel@tonic-gate * soon will be) to account for a remotely executing 2786*7c478bd9Sstevel@tonic-gate * thread. 2787*7c478bd9Sstevel@tonic-gate */ 2788*7c478bd9Sstevel@tonic-gate do { 2789*7c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg; 2790*7c478bd9Sstevel@tonic-gate new += f; 2791*7c478bd9Sstevel@tonic-gate /* 2792*7c478bd9Sstevel@tonic-gate * Check for overflow 2793*7c478bd9Sstevel@tonic-gate * Underflow not possible here 2794*7c478bd9Sstevel@tonic-gate */ 2795*7c478bd9Sstevel@tonic-gate if (new < old) 2796*7c478bd9Sstevel@tonic-gate new = LGRP_LOADAVG_MAX; 2797*7c478bd9Sstevel@tonic-gate } while (cas32((lgrp_load_t *)&lpl->lpl_loadavg, old, 2798*7c478bd9Sstevel@tonic-gate new) != old); 2799*7c478bd9Sstevel@tonic-gate } 2800*7c478bd9Sstevel@tonic-gate 2801*7c478bd9Sstevel@tonic-gate /* 2802*7c478bd9Sstevel@tonic-gate * Do the same for this lpl's parent 2803*7c478bd9Sstevel@tonic-gate */ 2804*7c478bd9Sstevel@tonic-gate if ((lpl = lpl->lpl_parent) == NULL) 2805*7c478bd9Sstevel@tonic-gate break; 2806*7c478bd9Sstevel@tonic-gate ncpu = lpl->lpl_ncpu; 2807*7c478bd9Sstevel@tonic-gate } 2808*7c478bd9Sstevel@tonic-gate } 2809*7c478bd9Sstevel@tonic-gate 2810*7c478bd9Sstevel@tonic-gate /* 2811*7c478bd9Sstevel@tonic-gate * Initialize lpl topology in the target based on topology currently present in 2812*7c478bd9Sstevel@tonic-gate * lpl_bootstrap. 2813*7c478bd9Sstevel@tonic-gate * 2814*7c478bd9Sstevel@tonic-gate * lpl_topo_bootstrap is only called once from cpupart_initialize_default() to 2815*7c478bd9Sstevel@tonic-gate * initialize cp_default list of lpls. Up to this point all topology operations 2816*7c478bd9Sstevel@tonic-gate * were performed using lpl_bootstrap. Now cp_default has its own list of lpls 2817*7c478bd9Sstevel@tonic-gate * and all subsequent lpl operations should use it instead of lpl_bootstrap. The 2818*7c478bd9Sstevel@tonic-gate * `target' points to the list of lpls in cp_default and `size' is the size of 2819*7c478bd9Sstevel@tonic-gate * this list. 2820*7c478bd9Sstevel@tonic-gate * 2821*7c478bd9Sstevel@tonic-gate * This function walks the lpl topology in lpl_bootstrap and does for things: 2822*7c478bd9Sstevel@tonic-gate * 2823*7c478bd9Sstevel@tonic-gate * 1) Copies all fields from lpl_bootstrap to the target. 2824*7c478bd9Sstevel@tonic-gate * 2825*7c478bd9Sstevel@tonic-gate * 2) Sets CPU0 lpl pointer to the correct element of the target list. 2826*7c478bd9Sstevel@tonic-gate * 2827*7c478bd9Sstevel@tonic-gate * 3) Updates lpl_parent pointers to point to the lpls in the target list 2828*7c478bd9Sstevel@tonic-gate * instead of lpl_bootstrap. 2829*7c478bd9Sstevel@tonic-gate * 2830*7c478bd9Sstevel@tonic-gate * 4) Updates pointers in the resource list of the target to point to the lpls 2831*7c478bd9Sstevel@tonic-gate * in the target list instead of lpl_bootstrap. 2832*7c478bd9Sstevel@tonic-gate * 2833*7c478bd9Sstevel@tonic-gate * After lpl_topo_bootstrap() completes, target contains the same information 2834*7c478bd9Sstevel@tonic-gate * that would be present there if it were used during boot instead of 2835*7c478bd9Sstevel@tonic-gate * lpl_bootstrap. There is no need in information in lpl_bootstrap after this 2836*7c478bd9Sstevel@tonic-gate * and it is bzeroed. 2837*7c478bd9Sstevel@tonic-gate */ 2838*7c478bd9Sstevel@tonic-gate void 2839*7c478bd9Sstevel@tonic-gate lpl_topo_bootstrap(lpl_t *target, int size) 2840*7c478bd9Sstevel@tonic-gate { 2841*7c478bd9Sstevel@tonic-gate lpl_t *lpl = lpl_bootstrap; 2842*7c478bd9Sstevel@tonic-gate lpl_t *target_lpl = target; 2843*7c478bd9Sstevel@tonic-gate int howmany; 2844*7c478bd9Sstevel@tonic-gate int id; 2845*7c478bd9Sstevel@tonic-gate int i; 2846*7c478bd9Sstevel@tonic-gate 2847*7c478bd9Sstevel@tonic-gate /* 2848*7c478bd9Sstevel@tonic-gate * The only target that should be passed here is cp_default lpl list. 2849*7c478bd9Sstevel@tonic-gate */ 2850*7c478bd9Sstevel@tonic-gate ASSERT(target == cp_default.cp_lgrploads); 2851*7c478bd9Sstevel@tonic-gate ASSERT(size == cp_default.cp_nlgrploads); 2852*7c478bd9Sstevel@tonic-gate ASSERT(!lgrp_topo_initialized); 2853*7c478bd9Sstevel@tonic-gate ASSERT(ncpus == 1); 2854*7c478bd9Sstevel@tonic-gate 2855*7c478bd9Sstevel@tonic-gate howmany = MIN(LPL_BOOTSTRAP_SIZE, size); 2856*7c478bd9Sstevel@tonic-gate for (i = 0; i < howmany; i++, lpl++, target_lpl++) { 2857*7c478bd9Sstevel@tonic-gate /* 2858*7c478bd9Sstevel@tonic-gate * Copy all fields from lpl. 2859*7c478bd9Sstevel@tonic-gate */ 2860*7c478bd9Sstevel@tonic-gate 2861*7c478bd9Sstevel@tonic-gate *target_lpl = *lpl; 2862*7c478bd9Sstevel@tonic-gate 2863*7c478bd9Sstevel@tonic-gate /* 2864*7c478bd9Sstevel@tonic-gate * Substitute CPU0 lpl pointer with one relative to target. 2865*7c478bd9Sstevel@tonic-gate */ 2866*7c478bd9Sstevel@tonic-gate if (lpl->lpl_cpus == CPU) { 2867*7c478bd9Sstevel@tonic-gate ASSERT(CPU->cpu_lpl == lpl); 2868*7c478bd9Sstevel@tonic-gate CPU->cpu_lpl = target_lpl; 2869*7c478bd9Sstevel@tonic-gate } 2870*7c478bd9Sstevel@tonic-gate 2871*7c478bd9Sstevel@tonic-gate /* 2872*7c478bd9Sstevel@tonic-gate * Substitute parent information with parent relative to target. 2873*7c478bd9Sstevel@tonic-gate */ 2874*7c478bd9Sstevel@tonic-gate if (lpl->lpl_parent != NULL) 2875*7c478bd9Sstevel@tonic-gate target_lpl->lpl_parent = (lpl_t *) 2876*7c478bd9Sstevel@tonic-gate (((uintptr_t)lpl->lpl_parent - 2877*7c478bd9Sstevel@tonic-gate (uintptr_t)lpl_bootstrap) + 2878*7c478bd9Sstevel@tonic-gate (uintptr_t)target); 2879*7c478bd9Sstevel@tonic-gate 2880*7c478bd9Sstevel@tonic-gate /* 2881*7c478bd9Sstevel@tonic-gate * Walk over resource set substituting pointers relative to 2882*7c478bd9Sstevel@tonic-gate * lpl_bootstrap to pointers relative to target. 2883*7c478bd9Sstevel@tonic-gate */ 2884*7c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_nrset <= 1); 2885*7c478bd9Sstevel@tonic-gate 2886*7c478bd9Sstevel@tonic-gate for (id = 0; id < lpl->lpl_nrset; id++) { 2887*7c478bd9Sstevel@tonic-gate if (lpl->lpl_rset[id] != NULL) { 2888*7c478bd9Sstevel@tonic-gate target_lpl->lpl_rset[id] = 2889*7c478bd9Sstevel@tonic-gate (lpl_t *) 2890*7c478bd9Sstevel@tonic-gate (((uintptr_t)lpl->lpl_rset[id] - 2891*7c478bd9Sstevel@tonic-gate (uintptr_t)lpl_bootstrap) + 2892*7c478bd9Sstevel@tonic-gate (uintptr_t)target); 2893*7c478bd9Sstevel@tonic-gate } 2894*7c478bd9Sstevel@tonic-gate } 2895*7c478bd9Sstevel@tonic-gate } 2896*7c478bd9Sstevel@tonic-gate 2897*7c478bd9Sstevel@tonic-gate /* 2898*7c478bd9Sstevel@tonic-gate * Topology information in lpl_bootstrap is no longer needed. 2899*7c478bd9Sstevel@tonic-gate */ 2900*7c478bd9Sstevel@tonic-gate bzero(lpl_bootstrap_list, sizeof (lpl_bootstrap_list)); 2901*7c478bd9Sstevel@tonic-gate } 2902*7c478bd9Sstevel@tonic-gate 2903*7c478bd9Sstevel@tonic-gate /* the maximum effect that a single thread can have on it's lgroup's load */ 2904*7c478bd9Sstevel@tonic-gate #define LGRP_LOADAVG_MAX_EFFECT(ncpu) \ 2905*7c478bd9Sstevel@tonic-gate ((lgrp_loadavg_max_effect) / (ncpu)) 2906*7c478bd9Sstevel@tonic-gate uint32_t lgrp_loadavg_max_effect = LGRP_LOADAVG_THREAD_MAX; 2907*7c478bd9Sstevel@tonic-gate 2908*7c478bd9Sstevel@tonic-gate /* 2909*7c478bd9Sstevel@tonic-gate * If the lowest load among the lgroups a process' threads are currently 2910*7c478bd9Sstevel@tonic-gate * spread across is greater than lgrp_expand_proc_thresh, we'll consider 2911*7c478bd9Sstevel@tonic-gate * expanding the process to a new lgroup. 2912*7c478bd9Sstevel@tonic-gate */ 2913*7c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_THRESH_DEFAULT 62250 2914*7c478bd9Sstevel@tonic-gate lgrp_load_t lgrp_expand_proc_thresh = LGRP_EXPAND_PROC_THRESH_DEFAULT; 2915*7c478bd9Sstevel@tonic-gate 2916*7c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_THRESH(ncpu) \ 2917*7c478bd9Sstevel@tonic-gate ((lgrp_expand_proc_thresh) / (ncpu)) 2918*7c478bd9Sstevel@tonic-gate 2919*7c478bd9Sstevel@tonic-gate /* 2920*7c478bd9Sstevel@tonic-gate * A process will be expanded to a new lgroup only if the difference between 2921*7c478bd9Sstevel@tonic-gate * the lowest load on the lgroups the process' thread's are currently spread 2922*7c478bd9Sstevel@tonic-gate * across and the lowest load on the other lgroups in the process' partition 2923*7c478bd9Sstevel@tonic-gate * is greater than lgrp_expand_proc_diff. 2924*7c478bd9Sstevel@tonic-gate */ 2925*7c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_DIFF_DEFAULT 60000 2926*7c478bd9Sstevel@tonic-gate lgrp_load_t lgrp_expand_proc_diff = LGRP_EXPAND_PROC_DIFF_DEFAULT; 2927*7c478bd9Sstevel@tonic-gate 2928*7c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_DIFF(ncpu) \ 2929*7c478bd9Sstevel@tonic-gate ((lgrp_expand_proc_diff) / (ncpu)) 2930*7c478bd9Sstevel@tonic-gate 2931*7c478bd9Sstevel@tonic-gate /* 2932*7c478bd9Sstevel@tonic-gate * The loadavg tolerance accounts for "noise" inherent in the load, which may 2933*7c478bd9Sstevel@tonic-gate * be present due to impreciseness of the load average decay algorithm. 2934*7c478bd9Sstevel@tonic-gate * 2935*7c478bd9Sstevel@tonic-gate * The default tolerance is lgrp_loadavg_max_effect. Note that the tunable 2936*7c478bd9Sstevel@tonic-gate * tolerance is scaled by the number of cpus in the lgroup just like 2937*7c478bd9Sstevel@tonic-gate * lgrp_loadavg_max_effect. For example, if lgrp_loadavg_tolerance = 0x10000, 2938*7c478bd9Sstevel@tonic-gate * and ncpu = 4, then lgrp_choose will consider differences in lgroup loads 2939*7c478bd9Sstevel@tonic-gate * of: 0x10000 / 4 => 0x4000 or greater to be significant. 2940*7c478bd9Sstevel@tonic-gate */ 2941*7c478bd9Sstevel@tonic-gate uint32_t lgrp_loadavg_tolerance = LGRP_LOADAVG_THREAD_MAX; 2942*7c478bd9Sstevel@tonic-gate #define LGRP_LOADAVG_TOLERANCE(ncpu) \ 2943*7c478bd9Sstevel@tonic-gate ((lgrp_loadavg_tolerance) / ncpu) 2944*7c478bd9Sstevel@tonic-gate 2945*7c478bd9Sstevel@tonic-gate /* 2946*7c478bd9Sstevel@tonic-gate * lgrp_choose() will choose root lgroup as home when lowest lgroup load 2947*7c478bd9Sstevel@tonic-gate * average is above this threshold 2948*7c478bd9Sstevel@tonic-gate */ 2949*7c478bd9Sstevel@tonic-gate uint32_t lgrp_load_thresh = UINT32_MAX; 2950*7c478bd9Sstevel@tonic-gate 2951*7c478bd9Sstevel@tonic-gate /* 2952*7c478bd9Sstevel@tonic-gate * lgrp_choose() will try to skip any lgroups with less memory 2953*7c478bd9Sstevel@tonic-gate * than this free when choosing a home lgroup 2954*7c478bd9Sstevel@tonic-gate */ 2955*7c478bd9Sstevel@tonic-gate pgcnt_t lgrp_mem_free_thresh = 0; 2956*7c478bd9Sstevel@tonic-gate 2957*7c478bd9Sstevel@tonic-gate /* 2958*7c478bd9Sstevel@tonic-gate * When choosing between similarly loaded lgroups, lgrp_choose() will pick 2959*7c478bd9Sstevel@tonic-gate * one based on one of the following policies: 2960*7c478bd9Sstevel@tonic-gate * - Random selection 2961*7c478bd9Sstevel@tonic-gate * - Pseudo round robin placement 2962*7c478bd9Sstevel@tonic-gate * - Longest time since a thread was last placed 2963*7c478bd9Sstevel@tonic-gate */ 2964*7c478bd9Sstevel@tonic-gate #define LGRP_CHOOSE_RANDOM 1 2965*7c478bd9Sstevel@tonic-gate #define LGRP_CHOOSE_RR 2 2966*7c478bd9Sstevel@tonic-gate #define LGRP_CHOOSE_TIME 3 2967*7c478bd9Sstevel@tonic-gate 2968*7c478bd9Sstevel@tonic-gate int lgrp_choose_policy = LGRP_CHOOSE_TIME; 2969*7c478bd9Sstevel@tonic-gate 2970*7c478bd9Sstevel@tonic-gate /* 2971*7c478bd9Sstevel@tonic-gate * Choose a suitable leaf lgroup for a kthread. The kthread is assumed not to 2972*7c478bd9Sstevel@tonic-gate * be bound to a CPU or processor set. 2973*7c478bd9Sstevel@tonic-gate * 2974*7c478bd9Sstevel@tonic-gate * Arguments: 2975*7c478bd9Sstevel@tonic-gate * t The thread 2976*7c478bd9Sstevel@tonic-gate * cpupart The partition the thread belongs to. 2977*7c478bd9Sstevel@tonic-gate * 2978*7c478bd9Sstevel@tonic-gate * NOTE: Should at least be called with the cpu_lock held, kernel preemption 2979*7c478bd9Sstevel@tonic-gate * disabled, or thread_lock held (at splhigh) to protect against the CPU 2980*7c478bd9Sstevel@tonic-gate * partitions changing out from under us and assumes that given thread is 2981*7c478bd9Sstevel@tonic-gate * protected. Also, called sometimes w/ cpus paused or kernel preemption 2982*7c478bd9Sstevel@tonic-gate * disabled, so don't grab any locks because we should never block under 2983*7c478bd9Sstevel@tonic-gate * those conditions. 2984*7c478bd9Sstevel@tonic-gate */ 2985*7c478bd9Sstevel@tonic-gate lpl_t * 2986*7c478bd9Sstevel@tonic-gate lgrp_choose(kthread_t *t, cpupart_t *cpupart) 2987*7c478bd9Sstevel@tonic-gate { 2988*7c478bd9Sstevel@tonic-gate lgrp_load_t bestload, bestrload; 2989*7c478bd9Sstevel@tonic-gate int lgrpid_offset, lgrp_count; 2990*7c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid, lgrpid_start; 2991*7c478bd9Sstevel@tonic-gate lpl_t *lpl, *bestlpl, *bestrlpl; 2992*7c478bd9Sstevel@tonic-gate klgrpset_t lgrpset; 2993*7c478bd9Sstevel@tonic-gate proc_t *p; 2994*7c478bd9Sstevel@tonic-gate 2995*7c478bd9Sstevel@tonic-gate ASSERT(t != NULL); 2996*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 || 2997*7c478bd9Sstevel@tonic-gate THREAD_LOCK_HELD(t)); 2998*7c478bd9Sstevel@tonic-gate ASSERT(cpupart != NULL); 2999*7c478bd9Sstevel@tonic-gate 3000*7c478bd9Sstevel@tonic-gate p = t->t_procp; 3001*7c478bd9Sstevel@tonic-gate 3002*7c478bd9Sstevel@tonic-gate /* A process should always be in an active partition */ 3003*7c478bd9Sstevel@tonic-gate ASSERT(!klgrpset_isempty(cpupart->cp_lgrpset)); 3004*7c478bd9Sstevel@tonic-gate 3005*7c478bd9Sstevel@tonic-gate bestlpl = bestrlpl = NULL; 3006*7c478bd9Sstevel@tonic-gate bestload = bestrload = LGRP_LOADAVG_MAX; 3007*7c478bd9Sstevel@tonic-gate lgrpset = cpupart->cp_lgrpset; 3008*7c478bd9Sstevel@tonic-gate 3009*7c478bd9Sstevel@tonic-gate switch (lgrp_choose_policy) { 3010*7c478bd9Sstevel@tonic-gate case LGRP_CHOOSE_RR: 3011*7c478bd9Sstevel@tonic-gate lgrpid = cpupart->cp_lgrp_hint; 3012*7c478bd9Sstevel@tonic-gate do { 3013*7c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max) 3014*7c478bd9Sstevel@tonic-gate lgrpid = 0; 3015*7c478bd9Sstevel@tonic-gate } while (!klgrpset_ismember(lgrpset, lgrpid)); 3016*7c478bd9Sstevel@tonic-gate 3017*7c478bd9Sstevel@tonic-gate break; 3018*7c478bd9Sstevel@tonic-gate default: 3019*7c478bd9Sstevel@tonic-gate case LGRP_CHOOSE_TIME: 3020*7c478bd9Sstevel@tonic-gate case LGRP_CHOOSE_RANDOM: 3021*7c478bd9Sstevel@tonic-gate klgrpset_nlgrps(lgrpset, lgrp_count); 3022*7c478bd9Sstevel@tonic-gate lgrpid_offset = 3023*7c478bd9Sstevel@tonic-gate (((ushort_t)(gethrtime() >> 4)) % lgrp_count) + 1; 3024*7c478bd9Sstevel@tonic-gate for (lgrpid = 0; ; lgrpid++) { 3025*7c478bd9Sstevel@tonic-gate if (klgrpset_ismember(lgrpset, lgrpid)) { 3026*7c478bd9Sstevel@tonic-gate if (--lgrpid_offset == 0) 3027*7c478bd9Sstevel@tonic-gate break; 3028*7c478bd9Sstevel@tonic-gate } 3029*7c478bd9Sstevel@tonic-gate } 3030*7c478bd9Sstevel@tonic-gate break; 3031*7c478bd9Sstevel@tonic-gate } 3032*7c478bd9Sstevel@tonic-gate 3033*7c478bd9Sstevel@tonic-gate lgrpid_start = lgrpid; 3034*7c478bd9Sstevel@tonic-gate 3035*7c478bd9Sstevel@tonic-gate DTRACE_PROBE2(lgrp_choose_start, lgrp_id_t, lgrpid_start, 3036*7c478bd9Sstevel@tonic-gate lgrp_id_t, cpupart->cp_lgrp_hint); 3037*7c478bd9Sstevel@tonic-gate 3038*7c478bd9Sstevel@tonic-gate /* 3039*7c478bd9Sstevel@tonic-gate * Use lgroup affinities (if any) to choose best lgroup 3040*7c478bd9Sstevel@tonic-gate * 3041*7c478bd9Sstevel@tonic-gate * NOTE: Assumes that thread is protected from going away and its 3042*7c478bd9Sstevel@tonic-gate * lgroup affinities won't change (ie. p_lock, or 3043*7c478bd9Sstevel@tonic-gate * thread_lock() being held and/or CPUs paused) 3044*7c478bd9Sstevel@tonic-gate */ 3045*7c478bd9Sstevel@tonic-gate if (t->t_lgrp_affinity) { 3046*7c478bd9Sstevel@tonic-gate lpl = lgrp_affinity_best(t, cpupart, lgrpid_start); 3047*7c478bd9Sstevel@tonic-gate if (lpl != NULL) 3048*7c478bd9Sstevel@tonic-gate return (lpl); 3049*7c478bd9Sstevel@tonic-gate } 3050*7c478bd9Sstevel@tonic-gate 3051*7c478bd9Sstevel@tonic-gate ASSERT(klgrpset_ismember(lgrpset, lgrpid_start)); 3052*7c478bd9Sstevel@tonic-gate bestlpl = &cpupart->cp_lgrploads[lgrpid_start]; 3053*7c478bd9Sstevel@tonic-gate 3054*7c478bd9Sstevel@tonic-gate do { 3055*7c478bd9Sstevel@tonic-gate pgcnt_t npgs; 3056*7c478bd9Sstevel@tonic-gate 3057*7c478bd9Sstevel@tonic-gate /* 3058*7c478bd9Sstevel@tonic-gate * Skip any lgroups outside of thread's pset 3059*7c478bd9Sstevel@tonic-gate */ 3060*7c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrpset, lgrpid)) { 3061*7c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max) 3062*7c478bd9Sstevel@tonic-gate lgrpid = 0; /* wrap the search */ 3063*7c478bd9Sstevel@tonic-gate continue; 3064*7c478bd9Sstevel@tonic-gate } 3065*7c478bd9Sstevel@tonic-gate 3066*7c478bd9Sstevel@tonic-gate /* 3067*7c478bd9Sstevel@tonic-gate * Skip any non-leaf lgroups 3068*7c478bd9Sstevel@tonic-gate */ 3069*7c478bd9Sstevel@tonic-gate if (lgrp_table[lgrpid]->lgrp_childcnt != 0) 3070*7c478bd9Sstevel@tonic-gate continue; 3071*7c478bd9Sstevel@tonic-gate 3072*7c478bd9Sstevel@tonic-gate /* 3073*7c478bd9Sstevel@tonic-gate * Skip any lgroups without enough free memory 3074*7c478bd9Sstevel@tonic-gate * (when threshold set to nonzero positive value) 3075*7c478bd9Sstevel@tonic-gate */ 3076*7c478bd9Sstevel@tonic-gate if (lgrp_mem_free_thresh > 0) { 3077*7c478bd9Sstevel@tonic-gate npgs = lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_FREE); 3078*7c478bd9Sstevel@tonic-gate if (npgs < lgrp_mem_free_thresh) { 3079*7c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max) 3080*7c478bd9Sstevel@tonic-gate lgrpid = 0; /* wrap the search */ 3081*7c478bd9Sstevel@tonic-gate continue; 3082*7c478bd9Sstevel@tonic-gate } 3083*7c478bd9Sstevel@tonic-gate } 3084*7c478bd9Sstevel@tonic-gate 3085*7c478bd9Sstevel@tonic-gate lpl = &cpupart->cp_lgrploads[lgrpid]; 3086*7c478bd9Sstevel@tonic-gate if (klgrpset_isempty(p->p_lgrpset) || 3087*7c478bd9Sstevel@tonic-gate klgrpset_ismember(p->p_lgrpset, lgrpid)) { 3088*7c478bd9Sstevel@tonic-gate /* 3089*7c478bd9Sstevel@tonic-gate * Either this is a new process or the process already 3090*7c478bd9Sstevel@tonic-gate * has threads on this lgrp, so this is a preferred 3091*7c478bd9Sstevel@tonic-gate * lgroup for the thread. 3092*7c478bd9Sstevel@tonic-gate */ 3093*7c478bd9Sstevel@tonic-gate if (lpl_pick(lpl, bestlpl)) { 3094*7c478bd9Sstevel@tonic-gate bestload = lpl->lpl_loadavg; 3095*7c478bd9Sstevel@tonic-gate bestlpl = lpl; 3096*7c478bd9Sstevel@tonic-gate } 3097*7c478bd9Sstevel@tonic-gate } else { 3098*7c478bd9Sstevel@tonic-gate /* 3099*7c478bd9Sstevel@tonic-gate * The process doesn't have any threads on this lgrp, 3100*7c478bd9Sstevel@tonic-gate * but we're willing to consider this lgrp if the load 3101*7c478bd9Sstevel@tonic-gate * difference is big enough to justify splitting up 3102*7c478bd9Sstevel@tonic-gate * the process' threads. 3103*7c478bd9Sstevel@tonic-gate */ 3104*7c478bd9Sstevel@tonic-gate if (lpl_pick(lpl, bestrlpl)) { 3105*7c478bd9Sstevel@tonic-gate bestrload = lpl->lpl_loadavg; 3106*7c478bd9Sstevel@tonic-gate bestrlpl = lpl; 3107*7c478bd9Sstevel@tonic-gate } 3108*7c478bd9Sstevel@tonic-gate } 3109*7c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max) 3110*7c478bd9Sstevel@tonic-gate lgrpid = 0; /* wrap the search */ 3111*7c478bd9Sstevel@tonic-gate } while (lgrpid != lgrpid_start); 3112*7c478bd9Sstevel@tonic-gate 3113*7c478bd9Sstevel@tonic-gate /* 3114*7c478bd9Sstevel@tonic-gate * Return root lgroup if threshold isn't set to maximum value and 3115*7c478bd9Sstevel@tonic-gate * lowest lgroup load average more than a certain threshold 3116*7c478bd9Sstevel@tonic-gate */ 3117*7c478bd9Sstevel@tonic-gate if (lgrp_load_thresh != UINT32_MAX && 3118*7c478bd9Sstevel@tonic-gate bestload >= lgrp_load_thresh && bestrload >= lgrp_load_thresh) 3119*7c478bd9Sstevel@tonic-gate return (&cpupart->cp_lgrploads[lgrp_root->lgrp_id]); 3120*7c478bd9Sstevel@tonic-gate 3121*7c478bd9Sstevel@tonic-gate /* 3122*7c478bd9Sstevel@tonic-gate * If all the lgroups over which the thread's process is spread are 3123*7c478bd9Sstevel@tonic-gate * heavily loaded, we'll consider placing the thread on one of the 3124*7c478bd9Sstevel@tonic-gate * other leaf lgroups in the thread's partition. 3125*7c478bd9Sstevel@tonic-gate */ 3126*7c478bd9Sstevel@tonic-gate if ((bestload > LGRP_EXPAND_PROC_THRESH(bestlpl->lpl_ncpu)) && 3127*7c478bd9Sstevel@tonic-gate (bestrload < bestload) && /* paranoid about wraparound */ 3128*7c478bd9Sstevel@tonic-gate (bestrload + LGRP_EXPAND_PROC_DIFF(bestrlpl->lpl_ncpu) < 3129*7c478bd9Sstevel@tonic-gate bestload)) { 3130*7c478bd9Sstevel@tonic-gate bestlpl = bestrlpl; 3131*7c478bd9Sstevel@tonic-gate } 3132*7c478bd9Sstevel@tonic-gate 3133*7c478bd9Sstevel@tonic-gate cpupart->cp_lgrp_hint = bestlpl->lpl_lgrpid; 3134*7c478bd9Sstevel@tonic-gate bestlpl->lpl_homed_time = gethrtime_unscaled(); 3135*7c478bd9Sstevel@tonic-gate 3136*7c478bd9Sstevel@tonic-gate ASSERT(bestlpl->lpl_ncpu > 0); 3137*7c478bd9Sstevel@tonic-gate return (bestlpl); 3138*7c478bd9Sstevel@tonic-gate } 3139*7c478bd9Sstevel@tonic-gate 3140*7c478bd9Sstevel@tonic-gate /* 3141*7c478bd9Sstevel@tonic-gate * Return 1 if lpl1 is a better candidate than lpl2 for lgrp homing. 3142*7c478bd9Sstevel@tonic-gate */ 3143*7c478bd9Sstevel@tonic-gate static int 3144*7c478bd9Sstevel@tonic-gate lpl_pick(lpl_t *lpl1, lpl_t *lpl2) 3145*7c478bd9Sstevel@tonic-gate { 3146*7c478bd9Sstevel@tonic-gate lgrp_load_t l1, l2; 3147*7c478bd9Sstevel@tonic-gate lgrp_load_t tolerance = LGRP_LOADAVG_TOLERANCE(lpl1->lpl_ncpu); 3148*7c478bd9Sstevel@tonic-gate 3149*7c478bd9Sstevel@tonic-gate 3150*7c478bd9Sstevel@tonic-gate if (lpl2 == NULL) 3151*7c478bd9Sstevel@tonic-gate return (1); 3152*7c478bd9Sstevel@tonic-gate 3153*7c478bd9Sstevel@tonic-gate l1 = lpl1->lpl_loadavg; 3154*7c478bd9Sstevel@tonic-gate l2 = lpl2->lpl_loadavg; 3155*7c478bd9Sstevel@tonic-gate 3156*7c478bd9Sstevel@tonic-gate if ((l1 + tolerance < l2) && (l1 < l2)) { 3157*7c478bd9Sstevel@tonic-gate /* lpl1 is significantly less loaded than lpl2 */ 3158*7c478bd9Sstevel@tonic-gate return (1); 3159*7c478bd9Sstevel@tonic-gate } 3160*7c478bd9Sstevel@tonic-gate 3161*7c478bd9Sstevel@tonic-gate if (lgrp_choose_policy == LGRP_CHOOSE_TIME && 3162*7c478bd9Sstevel@tonic-gate l1 + tolerance >= l2 && l1 < l2 && 3163*7c478bd9Sstevel@tonic-gate lpl1->lpl_homed_time < lpl2->lpl_homed_time) { 3164*7c478bd9Sstevel@tonic-gate /* 3165*7c478bd9Sstevel@tonic-gate * lpl1's load is within the tolerance of lpl2. We're 3166*7c478bd9Sstevel@tonic-gate * willing to consider it be to better however if 3167*7c478bd9Sstevel@tonic-gate * it has been longer since we last homed a thread there 3168*7c478bd9Sstevel@tonic-gate */ 3169*7c478bd9Sstevel@tonic-gate return (1); 3170*7c478bd9Sstevel@tonic-gate } 3171*7c478bd9Sstevel@tonic-gate 3172*7c478bd9Sstevel@tonic-gate return (0); 3173*7c478bd9Sstevel@tonic-gate } 3174*7c478bd9Sstevel@tonic-gate 3175*7c478bd9Sstevel@tonic-gate /* 3176*7c478bd9Sstevel@tonic-gate * An LWP is expected to be assigned to an lgroup for at least this long 3177*7c478bd9Sstevel@tonic-gate * for its anticipatory load to be justified. NOTE that this value should 3178*7c478bd9Sstevel@tonic-gate * not be set extremely huge (say, larger than 100 years), to avoid problems 3179*7c478bd9Sstevel@tonic-gate * with overflow in the calculation that uses it. 3180*7c478bd9Sstevel@tonic-gate */ 3181*7c478bd9Sstevel@tonic-gate #define LGRP_MIN_NSEC (NANOSEC / 10) /* 1/10 of a second */ 3182*7c478bd9Sstevel@tonic-gate hrtime_t lgrp_min_nsec = LGRP_MIN_NSEC; 3183*7c478bd9Sstevel@tonic-gate 3184*7c478bd9Sstevel@tonic-gate /* 3185*7c478bd9Sstevel@tonic-gate * Routine to change a thread's lgroup affiliation. This routine updates 3186*7c478bd9Sstevel@tonic-gate * the thread's kthread_t struct and its process' proc_t struct to note the 3187*7c478bd9Sstevel@tonic-gate * thread's new lgroup affiliation, and its lgroup affinities. 3188*7c478bd9Sstevel@tonic-gate * 3189*7c478bd9Sstevel@tonic-gate * Note that this is the only routine that modifies a thread's t_lpl field, 3190*7c478bd9Sstevel@tonic-gate * and that adds in or removes anticipatory load. 3191*7c478bd9Sstevel@tonic-gate * 3192*7c478bd9Sstevel@tonic-gate * If the thread is exiting, newlpl is NULL. 3193*7c478bd9Sstevel@tonic-gate * 3194*7c478bd9Sstevel@tonic-gate * Locking: 3195*7c478bd9Sstevel@tonic-gate * The following lock must be held on entry: 3196*7c478bd9Sstevel@tonic-gate * cpu_lock, kpreempt_disable(), or thread_lock -- to assure t's new lgrp 3197*7c478bd9Sstevel@tonic-gate * doesn't get removed from t's partition 3198*7c478bd9Sstevel@tonic-gate * 3199*7c478bd9Sstevel@tonic-gate * This routine is not allowed to grab any locks, since it may be called 3200*7c478bd9Sstevel@tonic-gate * with cpus paused (such as from cpu_offline). 3201*7c478bd9Sstevel@tonic-gate */ 3202*7c478bd9Sstevel@tonic-gate void 3203*7c478bd9Sstevel@tonic-gate lgrp_move_thread(kthread_t *t, lpl_t *newlpl, int do_lgrpset_delete) 3204*7c478bd9Sstevel@tonic-gate { 3205*7c478bd9Sstevel@tonic-gate proc_t *p; 3206*7c478bd9Sstevel@tonic-gate lpl_t *lpl, *oldlpl; 3207*7c478bd9Sstevel@tonic-gate lgrp_id_t oldid; 3208*7c478bd9Sstevel@tonic-gate kthread_t *tp; 3209*7c478bd9Sstevel@tonic-gate uint_t ncpu; 3210*7c478bd9Sstevel@tonic-gate lgrp_load_t old, new; 3211*7c478bd9Sstevel@tonic-gate 3212*7c478bd9Sstevel@tonic-gate ASSERT(t); 3213*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 || 3214*7c478bd9Sstevel@tonic-gate THREAD_LOCK_HELD(t)); 3215*7c478bd9Sstevel@tonic-gate 3216*7c478bd9Sstevel@tonic-gate /* 3217*7c478bd9Sstevel@tonic-gate * If not changing lpls, just return 3218*7c478bd9Sstevel@tonic-gate */ 3219*7c478bd9Sstevel@tonic-gate if ((oldlpl = t->t_lpl) == newlpl) 3220*7c478bd9Sstevel@tonic-gate return; 3221*7c478bd9Sstevel@tonic-gate 3222*7c478bd9Sstevel@tonic-gate /* 3223*7c478bd9Sstevel@tonic-gate * Make sure the thread's lwp hasn't exited (if so, this thread is now 3224*7c478bd9Sstevel@tonic-gate * associated with process 0 rather than with its original process). 3225*7c478bd9Sstevel@tonic-gate */ 3226*7c478bd9Sstevel@tonic-gate if (t->t_proc_flag & TP_LWPEXIT) { 3227*7c478bd9Sstevel@tonic-gate if (newlpl != NULL) { 3228*7c478bd9Sstevel@tonic-gate t->t_lpl = newlpl; 3229*7c478bd9Sstevel@tonic-gate } 3230*7c478bd9Sstevel@tonic-gate return; 3231*7c478bd9Sstevel@tonic-gate } 3232*7c478bd9Sstevel@tonic-gate 3233*7c478bd9Sstevel@tonic-gate p = ttoproc(t); 3234*7c478bd9Sstevel@tonic-gate 3235*7c478bd9Sstevel@tonic-gate /* 3236*7c478bd9Sstevel@tonic-gate * If the thread had a previous lgroup, update its process' p_lgrpset 3237*7c478bd9Sstevel@tonic-gate * to account for it being moved from its old lgroup. 3238*7c478bd9Sstevel@tonic-gate */ 3239*7c478bd9Sstevel@tonic-gate if ((oldlpl != NULL) && /* thread had a previous lgroup */ 3240*7c478bd9Sstevel@tonic-gate (p->p_tlist != NULL)) { 3241*7c478bd9Sstevel@tonic-gate oldid = oldlpl->lpl_lgrpid; 3242*7c478bd9Sstevel@tonic-gate 3243*7c478bd9Sstevel@tonic-gate if (newlpl != NULL) 3244*7c478bd9Sstevel@tonic-gate lgrp_stat_add(oldid, LGRP_NUM_MIGR, 1); 3245*7c478bd9Sstevel@tonic-gate 3246*7c478bd9Sstevel@tonic-gate if ((do_lgrpset_delete) && 3247*7c478bd9Sstevel@tonic-gate (klgrpset_ismember(p->p_lgrpset, oldid))) { 3248*7c478bd9Sstevel@tonic-gate for (tp = p->p_tlist->t_forw; ; tp = tp->t_forw) { 3249*7c478bd9Sstevel@tonic-gate /* 3250*7c478bd9Sstevel@tonic-gate * Check if a thread other than the thread 3251*7c478bd9Sstevel@tonic-gate * that's moving is assigned to the same 3252*7c478bd9Sstevel@tonic-gate * lgroup as the thread that's moving. Note 3253*7c478bd9Sstevel@tonic-gate * that we have to compare lgroup IDs, rather 3254*7c478bd9Sstevel@tonic-gate * than simply comparing t_lpl's, since the 3255*7c478bd9Sstevel@tonic-gate * threads may belong to different partitions 3256*7c478bd9Sstevel@tonic-gate * but be assigned to the same lgroup. 3257*7c478bd9Sstevel@tonic-gate */ 3258*7c478bd9Sstevel@tonic-gate ASSERT(tp->t_lpl != NULL); 3259*7c478bd9Sstevel@tonic-gate 3260*7c478bd9Sstevel@tonic-gate if ((tp != t) && 3261*7c478bd9Sstevel@tonic-gate (tp->t_lpl->lpl_lgrpid == oldid)) { 3262*7c478bd9Sstevel@tonic-gate /* 3263*7c478bd9Sstevel@tonic-gate * Another thread is assigned to the 3264*7c478bd9Sstevel@tonic-gate * same lgroup as the thread that's 3265*7c478bd9Sstevel@tonic-gate * moving, p_lgrpset doesn't change. 3266*7c478bd9Sstevel@tonic-gate */ 3267*7c478bd9Sstevel@tonic-gate break; 3268*7c478bd9Sstevel@tonic-gate } else if (tp == p->p_tlist) { 3269*7c478bd9Sstevel@tonic-gate /* 3270*7c478bd9Sstevel@tonic-gate * No other thread is assigned to the 3271*7c478bd9Sstevel@tonic-gate * same lgroup as the exiting thread, 3272*7c478bd9Sstevel@tonic-gate * clear the lgroup's bit in p_lgrpset. 3273*7c478bd9Sstevel@tonic-gate */ 3274*7c478bd9Sstevel@tonic-gate klgrpset_del(p->p_lgrpset, oldid); 3275*7c478bd9Sstevel@tonic-gate break; 3276*7c478bd9Sstevel@tonic-gate } 3277*7c478bd9Sstevel@tonic-gate } 3278*7c478bd9Sstevel@tonic-gate } 3279*7c478bd9Sstevel@tonic-gate 3280*7c478bd9Sstevel@tonic-gate /* 3281*7c478bd9Sstevel@tonic-gate * If this thread was assigned to its old lgroup for such a 3282*7c478bd9Sstevel@tonic-gate * short amount of time that the anticipatory load that was 3283*7c478bd9Sstevel@tonic-gate * added on its behalf has aged very little, remove that 3284*7c478bd9Sstevel@tonic-gate * anticipatory load. 3285*7c478bd9Sstevel@tonic-gate */ 3286*7c478bd9Sstevel@tonic-gate if ((t->t_anttime + lgrp_min_nsec > gethrtime()) && 3287*7c478bd9Sstevel@tonic-gate ((ncpu = oldlpl->lpl_ncpu) > 0)) { 3288*7c478bd9Sstevel@tonic-gate lpl = oldlpl; 3289*7c478bd9Sstevel@tonic-gate for (;;) { 3290*7c478bd9Sstevel@tonic-gate do { 3291*7c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg; 3292*7c478bd9Sstevel@tonic-gate new -= LGRP_LOADAVG_MAX_EFFECT(ncpu); 3293*7c478bd9Sstevel@tonic-gate if (new > old) { 3294*7c478bd9Sstevel@tonic-gate /* 3295*7c478bd9Sstevel@tonic-gate * this can happen if the load 3296*7c478bd9Sstevel@tonic-gate * average was aged since we 3297*7c478bd9Sstevel@tonic-gate * added in the anticipatory 3298*7c478bd9Sstevel@tonic-gate * load 3299*7c478bd9Sstevel@tonic-gate */ 3300*7c478bd9Sstevel@tonic-gate new = 0; 3301*7c478bd9Sstevel@tonic-gate } 3302*7c478bd9Sstevel@tonic-gate } while (cas32( 3303*7c478bd9Sstevel@tonic-gate (lgrp_load_t *)&lpl->lpl_loadavg, old, 3304*7c478bd9Sstevel@tonic-gate new) != old); 3305*7c478bd9Sstevel@tonic-gate 3306*7c478bd9Sstevel@tonic-gate lpl = lpl->lpl_parent; 3307*7c478bd9Sstevel@tonic-gate if (lpl == NULL) 3308*7c478bd9Sstevel@tonic-gate break; 3309*7c478bd9Sstevel@tonic-gate 3310*7c478bd9Sstevel@tonic-gate ncpu = lpl->lpl_ncpu; 3311*7c478bd9Sstevel@tonic-gate ASSERT(ncpu > 0); 3312*7c478bd9Sstevel@tonic-gate } 3313*7c478bd9Sstevel@tonic-gate } 3314*7c478bd9Sstevel@tonic-gate } 3315*7c478bd9Sstevel@tonic-gate /* 3316*7c478bd9Sstevel@tonic-gate * If the thread has a new lgroup (i.e. it's not exiting), update its 3317*7c478bd9Sstevel@tonic-gate * t_lpl and its process' p_lgrpset, and apply an anticipatory load 3318*7c478bd9Sstevel@tonic-gate * to its new lgroup to account for its move to its new lgroup. 3319*7c478bd9Sstevel@tonic-gate */ 3320*7c478bd9Sstevel@tonic-gate if (newlpl != NULL) { 3321*7c478bd9Sstevel@tonic-gate /* 3322*7c478bd9Sstevel@tonic-gate * This thread is moving to a new lgroup 3323*7c478bd9Sstevel@tonic-gate */ 3324*7c478bd9Sstevel@tonic-gate t->t_lpl = newlpl; 3325*7c478bd9Sstevel@tonic-gate 3326*7c478bd9Sstevel@tonic-gate /* 3327*7c478bd9Sstevel@tonic-gate * Reflect move in load average of new lgroup 3328*7c478bd9Sstevel@tonic-gate * unless it is root lgroup 3329*7c478bd9Sstevel@tonic-gate */ 3330*7c478bd9Sstevel@tonic-gate if (lgrp_table[newlpl->lpl_lgrpid] == lgrp_root) 3331*7c478bd9Sstevel@tonic-gate return; 3332*7c478bd9Sstevel@tonic-gate 3333*7c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(p->p_lgrpset, newlpl->lpl_lgrpid)) { 3334*7c478bd9Sstevel@tonic-gate klgrpset_add(p->p_lgrpset, newlpl->lpl_lgrpid); 3335*7c478bd9Sstevel@tonic-gate } 3336*7c478bd9Sstevel@tonic-gate 3337*7c478bd9Sstevel@tonic-gate /* 3338*7c478bd9Sstevel@tonic-gate * It'll take some time for the load on the new lgroup 3339*7c478bd9Sstevel@tonic-gate * to reflect this thread's placement on it. We'd 3340*7c478bd9Sstevel@tonic-gate * like not, however, to have all threads between now 3341*7c478bd9Sstevel@tonic-gate * and then also piling on to this lgroup. To avoid 3342*7c478bd9Sstevel@tonic-gate * this pileup, we anticipate the load this thread 3343*7c478bd9Sstevel@tonic-gate * will generate on its new lgroup. The goal is to 3344*7c478bd9Sstevel@tonic-gate * make the lgroup's load appear as though the thread 3345*7c478bd9Sstevel@tonic-gate * had been there all along. We're very conservative 3346*7c478bd9Sstevel@tonic-gate * in calculating this anticipatory load, we assume 3347*7c478bd9Sstevel@tonic-gate * the worst case case (100% CPU-bound thread). This 3348*7c478bd9Sstevel@tonic-gate * may be modified in the future to be more accurate. 3349*7c478bd9Sstevel@tonic-gate */ 3350*7c478bd9Sstevel@tonic-gate lpl = newlpl; 3351*7c478bd9Sstevel@tonic-gate for (;;) { 3352*7c478bd9Sstevel@tonic-gate ncpu = lpl->lpl_ncpu; 3353*7c478bd9Sstevel@tonic-gate ASSERT(ncpu > 0); 3354*7c478bd9Sstevel@tonic-gate do { 3355*7c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg; 3356*7c478bd9Sstevel@tonic-gate new += LGRP_LOADAVG_MAX_EFFECT(ncpu); 3357*7c478bd9Sstevel@tonic-gate /* 3358*7c478bd9Sstevel@tonic-gate * Check for overflow 3359*7c478bd9Sstevel@tonic-gate * Underflow not possible here 3360*7c478bd9Sstevel@tonic-gate */ 3361*7c478bd9Sstevel@tonic-gate if (new < old) 3362*7c478bd9Sstevel@tonic-gate new = UINT32_MAX; 3363*7c478bd9Sstevel@tonic-gate } while (cas32((lgrp_load_t *)&lpl->lpl_loadavg, old, 3364*7c478bd9Sstevel@tonic-gate new) != old); 3365*7c478bd9Sstevel@tonic-gate 3366*7c478bd9Sstevel@tonic-gate lpl = lpl->lpl_parent; 3367*7c478bd9Sstevel@tonic-gate if (lpl == NULL) 3368*7c478bd9Sstevel@tonic-gate break; 3369*7c478bd9Sstevel@tonic-gate } 3370*7c478bd9Sstevel@tonic-gate t->t_anttime = gethrtime(); 3371*7c478bd9Sstevel@tonic-gate } 3372*7c478bd9Sstevel@tonic-gate } 3373*7c478bd9Sstevel@tonic-gate 3374*7c478bd9Sstevel@tonic-gate /* 3375*7c478bd9Sstevel@tonic-gate * Return lgroup memory allocation policy given advice from madvise(3C) 3376*7c478bd9Sstevel@tonic-gate */ 3377*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_t 3378*7c478bd9Sstevel@tonic-gate lgrp_madv_to_policy(uchar_t advice, size_t size, int type) 3379*7c478bd9Sstevel@tonic-gate { 3380*7c478bd9Sstevel@tonic-gate switch (advice) { 3381*7c478bd9Sstevel@tonic-gate case MADV_ACCESS_LWP: 3382*7c478bd9Sstevel@tonic-gate return (LGRP_MEM_POLICY_NEXT); 3383*7c478bd9Sstevel@tonic-gate case MADV_ACCESS_MANY: 3384*7c478bd9Sstevel@tonic-gate return (LGRP_MEM_POLICY_RANDOM); 3385*7c478bd9Sstevel@tonic-gate default: 3386*7c478bd9Sstevel@tonic-gate return (lgrp_mem_policy_default(size, type)); 3387*7c478bd9Sstevel@tonic-gate } 3388*7c478bd9Sstevel@tonic-gate } 3389*7c478bd9Sstevel@tonic-gate 3390*7c478bd9Sstevel@tonic-gate /* 3391*7c478bd9Sstevel@tonic-gate * Figure out default policy 3392*7c478bd9Sstevel@tonic-gate */ 3393*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_t 3394*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_default(size_t size, int type) 3395*7c478bd9Sstevel@tonic-gate { 3396*7c478bd9Sstevel@tonic-gate cpupart_t *cp; 3397*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_t policy; 3398*7c478bd9Sstevel@tonic-gate size_t pset_mem_size; 3399*7c478bd9Sstevel@tonic-gate 3400*7c478bd9Sstevel@tonic-gate /* 3401*7c478bd9Sstevel@tonic-gate * Randomly allocate memory across lgroups for shared memory 3402*7c478bd9Sstevel@tonic-gate * beyond a certain threshold 3403*7c478bd9Sstevel@tonic-gate */ 3404*7c478bd9Sstevel@tonic-gate if ((type != MAP_SHARED && size > lgrp_privm_random_thresh) || 3405*7c478bd9Sstevel@tonic-gate (type == MAP_SHARED && size > lgrp_shm_random_thresh)) { 3406*7c478bd9Sstevel@tonic-gate /* 3407*7c478bd9Sstevel@tonic-gate * Get total memory size of current thread's pset 3408*7c478bd9Sstevel@tonic-gate */ 3409*7c478bd9Sstevel@tonic-gate kpreempt_disable(); 3410*7c478bd9Sstevel@tonic-gate cp = curthread->t_cpupart; 3411*7c478bd9Sstevel@tonic-gate klgrpset_totalsize(cp->cp_lgrpset, pset_mem_size); 3412*7c478bd9Sstevel@tonic-gate kpreempt_enable(); 3413*7c478bd9Sstevel@tonic-gate 3414*7c478bd9Sstevel@tonic-gate /* 3415*7c478bd9Sstevel@tonic-gate * Choose policy to randomly allocate memory across 3416*7c478bd9Sstevel@tonic-gate * lgroups in pset if it will fit and is not default 3417*7c478bd9Sstevel@tonic-gate * partition. Otherwise, allocate memory randomly 3418*7c478bd9Sstevel@tonic-gate * across machine. 3419*7c478bd9Sstevel@tonic-gate */ 3420*7c478bd9Sstevel@tonic-gate if (lgrp_mem_pset_aware && size < pset_mem_size) 3421*7c478bd9Sstevel@tonic-gate policy = LGRP_MEM_POLICY_RANDOM_PSET; 3422*7c478bd9Sstevel@tonic-gate else 3423*7c478bd9Sstevel@tonic-gate policy = LGRP_MEM_POLICY_RANDOM; 3424*7c478bd9Sstevel@tonic-gate } else 3425*7c478bd9Sstevel@tonic-gate /* 3426*7c478bd9Sstevel@tonic-gate * Apply default policy for private memory and 3427*7c478bd9Sstevel@tonic-gate * shared memory under the respective random 3428*7c478bd9Sstevel@tonic-gate * threshold. 3429*7c478bd9Sstevel@tonic-gate */ 3430*7c478bd9Sstevel@tonic-gate policy = lgrp_mem_default_policy; 3431*7c478bd9Sstevel@tonic-gate 3432*7c478bd9Sstevel@tonic-gate return (policy); 3433*7c478bd9Sstevel@tonic-gate } 3434*7c478bd9Sstevel@tonic-gate 3435*7c478bd9Sstevel@tonic-gate /* 3436*7c478bd9Sstevel@tonic-gate * Get memory allocation policy for this segment 3437*7c478bd9Sstevel@tonic-gate */ 3438*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t * 3439*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_get(struct seg *seg, caddr_t vaddr) 3440*7c478bd9Sstevel@tonic-gate { 3441*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 3442*7c478bd9Sstevel@tonic-gate extern struct seg_ops segspt_ops; 3443*7c478bd9Sstevel@tonic-gate extern struct seg_ops segspt_shmops; 3444*7c478bd9Sstevel@tonic-gate 3445*7c478bd9Sstevel@tonic-gate /* 3446*7c478bd9Sstevel@tonic-gate * This is for binary compatibility to protect against third party 3447*7c478bd9Sstevel@tonic-gate * segment drivers which haven't recompiled to allow for 3448*7c478bd9Sstevel@tonic-gate * SEGOP_GETPOLICY() 3449*7c478bd9Sstevel@tonic-gate */ 3450*7c478bd9Sstevel@tonic-gate if (seg->s_ops != &segvn_ops && seg->s_ops != &segspt_ops && 3451*7c478bd9Sstevel@tonic-gate seg->s_ops != &segspt_shmops) 3452*7c478bd9Sstevel@tonic-gate return (NULL); 3453*7c478bd9Sstevel@tonic-gate 3454*7c478bd9Sstevel@tonic-gate policy_info = NULL; 3455*7c478bd9Sstevel@tonic-gate if (seg->s_ops->getpolicy != NULL) 3456*7c478bd9Sstevel@tonic-gate policy_info = SEGOP_GETPOLICY(seg, vaddr); 3457*7c478bd9Sstevel@tonic-gate 3458*7c478bd9Sstevel@tonic-gate return (policy_info); 3459*7c478bd9Sstevel@tonic-gate } 3460*7c478bd9Sstevel@tonic-gate 3461*7c478bd9Sstevel@tonic-gate /* 3462*7c478bd9Sstevel@tonic-gate * Set policy for allocating private memory given desired policy, policy info, 3463*7c478bd9Sstevel@tonic-gate * size in bytes of memory that policy is being applied. 3464*7c478bd9Sstevel@tonic-gate * Return 0 if policy wasn't set already and 1 if policy was set already 3465*7c478bd9Sstevel@tonic-gate */ 3466*7c478bd9Sstevel@tonic-gate int 3467*7c478bd9Sstevel@tonic-gate lgrp_privm_policy_set(lgrp_mem_policy_t policy, 3468*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info, size_t size) 3469*7c478bd9Sstevel@tonic-gate { 3470*7c478bd9Sstevel@tonic-gate 3471*7c478bd9Sstevel@tonic-gate ASSERT(policy_info != NULL); 3472*7c478bd9Sstevel@tonic-gate 3473*7c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_DEFAULT) 3474*7c478bd9Sstevel@tonic-gate policy = lgrp_mem_policy_default(size, MAP_PRIVATE); 3475*7c478bd9Sstevel@tonic-gate 3476*7c478bd9Sstevel@tonic-gate /* 3477*7c478bd9Sstevel@tonic-gate * Policy set already? 3478*7c478bd9Sstevel@tonic-gate */ 3479*7c478bd9Sstevel@tonic-gate if (policy == policy_info->mem_policy) 3480*7c478bd9Sstevel@tonic-gate return (1); 3481*7c478bd9Sstevel@tonic-gate 3482*7c478bd9Sstevel@tonic-gate /* 3483*7c478bd9Sstevel@tonic-gate * Set policy 3484*7c478bd9Sstevel@tonic-gate */ 3485*7c478bd9Sstevel@tonic-gate policy_info->mem_policy = policy; 3486*7c478bd9Sstevel@tonic-gate policy_info->mem_reserved = 0; 3487*7c478bd9Sstevel@tonic-gate 3488*7c478bd9Sstevel@tonic-gate return (0); 3489*7c478bd9Sstevel@tonic-gate } 3490*7c478bd9Sstevel@tonic-gate 3491*7c478bd9Sstevel@tonic-gate 3492*7c478bd9Sstevel@tonic-gate /* 3493*7c478bd9Sstevel@tonic-gate * Get shared memory allocation policy with given tree and offset 3494*7c478bd9Sstevel@tonic-gate */ 3495*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t * 3496*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_get(struct anon_map *amp, ulong_t anon_index, vnode_t *vp, 3497*7c478bd9Sstevel@tonic-gate u_offset_t vn_off) 3498*7c478bd9Sstevel@tonic-gate { 3499*7c478bd9Sstevel@tonic-gate u_offset_t off; 3500*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 3501*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *policy_seg; 3502*7c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality; 3503*7c478bd9Sstevel@tonic-gate avl_tree_t *tree; 3504*7c478bd9Sstevel@tonic-gate avl_index_t where; 3505*7c478bd9Sstevel@tonic-gate 3506*7c478bd9Sstevel@tonic-gate /* 3507*7c478bd9Sstevel@tonic-gate * Get policy segment tree from anon_map or vnode and use specified 3508*7c478bd9Sstevel@tonic-gate * anon index or vnode offset as offset 3509*7c478bd9Sstevel@tonic-gate * 3510*7c478bd9Sstevel@tonic-gate * Assume that no lock needs to be held on anon_map or vnode, since 3511*7c478bd9Sstevel@tonic-gate * they should be protected by their reference count which must be 3512*7c478bd9Sstevel@tonic-gate * nonzero for an existing segment 3513*7c478bd9Sstevel@tonic-gate */ 3514*7c478bd9Sstevel@tonic-gate if (amp) { 3515*7c478bd9Sstevel@tonic-gate ASSERT(amp->refcnt != 0); 3516*7c478bd9Sstevel@tonic-gate shm_locality = amp->locality; 3517*7c478bd9Sstevel@tonic-gate if (shm_locality == NULL) 3518*7c478bd9Sstevel@tonic-gate return (NULL); 3519*7c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree; 3520*7c478bd9Sstevel@tonic-gate off = ptob(anon_index); 3521*7c478bd9Sstevel@tonic-gate } else if (vp) { 3522*7c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality; 3523*7c478bd9Sstevel@tonic-gate if (shm_locality == NULL) 3524*7c478bd9Sstevel@tonic-gate return (NULL); 3525*7c478bd9Sstevel@tonic-gate ASSERT(shm_locality->loc_count != 0); 3526*7c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree; 3527*7c478bd9Sstevel@tonic-gate off = vn_off; 3528*7c478bd9Sstevel@tonic-gate } 3529*7c478bd9Sstevel@tonic-gate 3530*7c478bd9Sstevel@tonic-gate if (tree == NULL) 3531*7c478bd9Sstevel@tonic-gate return (NULL); 3532*7c478bd9Sstevel@tonic-gate 3533*7c478bd9Sstevel@tonic-gate /* 3534*7c478bd9Sstevel@tonic-gate * Lookup policy segment for offset into shared object and return 3535*7c478bd9Sstevel@tonic-gate * policy info 3536*7c478bd9Sstevel@tonic-gate */ 3537*7c478bd9Sstevel@tonic-gate rw_enter(&shm_locality->loc_lock, RW_READER); 3538*7c478bd9Sstevel@tonic-gate policy_info = NULL; 3539*7c478bd9Sstevel@tonic-gate policy_seg = avl_find(tree, &off, &where); 3540*7c478bd9Sstevel@tonic-gate if (policy_seg) 3541*7c478bd9Sstevel@tonic-gate policy_info = &policy_seg->shm_policy; 3542*7c478bd9Sstevel@tonic-gate rw_exit(&shm_locality->loc_lock); 3543*7c478bd9Sstevel@tonic-gate 3544*7c478bd9Sstevel@tonic-gate return (policy_info); 3545*7c478bd9Sstevel@tonic-gate } 3546*7c478bd9Sstevel@tonic-gate 3547*7c478bd9Sstevel@tonic-gate /* 3548*7c478bd9Sstevel@tonic-gate * Return lgroup to use for allocating memory 3549*7c478bd9Sstevel@tonic-gate * given the segment and address 3550*7c478bd9Sstevel@tonic-gate * 3551*7c478bd9Sstevel@tonic-gate * There isn't any mutual exclusion that exists between calls 3552*7c478bd9Sstevel@tonic-gate * to this routine and DR, so this routine and whomever calls it 3553*7c478bd9Sstevel@tonic-gate * should be mindful of the possibility that the lgrp returned 3554*7c478bd9Sstevel@tonic-gate * may be deleted. If this happens, dereferences of the lgrp 3555*7c478bd9Sstevel@tonic-gate * pointer will still be safe, but the resources in the lgrp will 3556*7c478bd9Sstevel@tonic-gate * be gone, and LGRP_EXISTS() will no longer be true. 3557*7c478bd9Sstevel@tonic-gate */ 3558*7c478bd9Sstevel@tonic-gate lgrp_t * 3559*7c478bd9Sstevel@tonic-gate lgrp_mem_choose(struct seg *seg, caddr_t vaddr, size_t pgsz) 3560*7c478bd9Sstevel@tonic-gate { 3561*7c478bd9Sstevel@tonic-gate int i; 3562*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 3563*7c478bd9Sstevel@tonic-gate klgrpset_t lgrpset; 3564*7c478bd9Sstevel@tonic-gate int lgrps_spanned; 3565*7c478bd9Sstevel@tonic-gate unsigned long off; 3566*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_t policy; 3567*7c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 3568*7c478bd9Sstevel@tonic-gate ushort_t random; 3569*7c478bd9Sstevel@tonic-gate int stat = 0; 3570*7c478bd9Sstevel@tonic-gate 3571*7c478bd9Sstevel@tonic-gate /* 3572*7c478bd9Sstevel@tonic-gate * Just return null if the lgrp framework hasn't finished 3573*7c478bd9Sstevel@tonic-gate * initializing or if this is a UMA machine. 3574*7c478bd9Sstevel@tonic-gate */ 3575*7c478bd9Sstevel@tonic-gate if (nlgrps == 1 || !lgrp_initialized) 3576*7c478bd9Sstevel@tonic-gate return (lgrp_root); 3577*7c478bd9Sstevel@tonic-gate 3578*7c478bd9Sstevel@tonic-gate /* 3579*7c478bd9Sstevel@tonic-gate * Get memory allocation policy for this segment 3580*7c478bd9Sstevel@tonic-gate */ 3581*7c478bd9Sstevel@tonic-gate policy = lgrp_mem_default_policy; 3582*7c478bd9Sstevel@tonic-gate if (seg != NULL) { 3583*7c478bd9Sstevel@tonic-gate if (seg->s_as == &kas) { 3584*7c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_RANDOM_PROC || 3585*7c478bd9Sstevel@tonic-gate policy == LGRP_MEM_POLICY_RANDOM_PSET) 3586*7c478bd9Sstevel@tonic-gate policy = LGRP_MEM_POLICY_RANDOM; 3587*7c478bd9Sstevel@tonic-gate } else { 3588*7c478bd9Sstevel@tonic-gate policy_info = lgrp_mem_policy_get(seg, vaddr); 3589*7c478bd9Sstevel@tonic-gate if (policy_info != NULL) 3590*7c478bd9Sstevel@tonic-gate policy = policy_info->mem_policy; 3591*7c478bd9Sstevel@tonic-gate } 3592*7c478bd9Sstevel@tonic-gate } 3593*7c478bd9Sstevel@tonic-gate lgrpset = 0; 3594*7c478bd9Sstevel@tonic-gate 3595*7c478bd9Sstevel@tonic-gate /* 3596*7c478bd9Sstevel@tonic-gate * Initialize lgroup to home by default 3597*7c478bd9Sstevel@tonic-gate */ 3598*7c478bd9Sstevel@tonic-gate lgrp = lgrp_home_lgrp(); 3599*7c478bd9Sstevel@tonic-gate 3600*7c478bd9Sstevel@tonic-gate /* 3601*7c478bd9Sstevel@tonic-gate * When homing threads on root lgrp, override default memory 3602*7c478bd9Sstevel@tonic-gate * allocation policies with root lgroup memory allocation policy 3603*7c478bd9Sstevel@tonic-gate */ 3604*7c478bd9Sstevel@tonic-gate if (lgrp == lgrp_root) 3605*7c478bd9Sstevel@tonic-gate policy = lgrp_mem_policy_root; 3606*7c478bd9Sstevel@tonic-gate 3607*7c478bd9Sstevel@tonic-gate /* 3608*7c478bd9Sstevel@tonic-gate * Implement policy 3609*7c478bd9Sstevel@tonic-gate */ 3610*7c478bd9Sstevel@tonic-gate switch (policy) { 3611*7c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_NEXT_CPU: 3612*7c478bd9Sstevel@tonic-gate 3613*7c478bd9Sstevel@tonic-gate /* 3614*7c478bd9Sstevel@tonic-gate * Return lgroup of current CPU which faulted on memory 3615*7c478bd9Sstevel@tonic-gate */ 3616*7c478bd9Sstevel@tonic-gate lgrp = lgrp_cpu_to_lgrp(CPU); 3617*7c478bd9Sstevel@tonic-gate break; 3618*7c478bd9Sstevel@tonic-gate 3619*7c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_NEXT: 3620*7c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_DEFAULT: 3621*7c478bd9Sstevel@tonic-gate default: 3622*7c478bd9Sstevel@tonic-gate 3623*7c478bd9Sstevel@tonic-gate /* 3624*7c478bd9Sstevel@tonic-gate * Just return current thread's home lgroup 3625*7c478bd9Sstevel@tonic-gate * for default policy (next touch) 3626*7c478bd9Sstevel@tonic-gate * If the thread is homed to the root, 3627*7c478bd9Sstevel@tonic-gate * then the default policy is random across lgroups. 3628*7c478bd9Sstevel@tonic-gate * Fallthrough to the random case. 3629*7c478bd9Sstevel@tonic-gate */ 3630*7c478bd9Sstevel@tonic-gate if (lgrp != lgrp_root) { 3631*7c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_NEXT) 3632*7c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_NEXT, 1); 3633*7c478bd9Sstevel@tonic-gate else 3634*7c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, 3635*7c478bd9Sstevel@tonic-gate LGRP_NUM_DEFAULT, 1); 3636*7c478bd9Sstevel@tonic-gate break; 3637*7c478bd9Sstevel@tonic-gate } 3638*7c478bd9Sstevel@tonic-gate /* LINTED fallthrough on case statement */ 3639*7c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_RANDOM: 3640*7c478bd9Sstevel@tonic-gate 3641*7c478bd9Sstevel@tonic-gate /* 3642*7c478bd9Sstevel@tonic-gate * Return a random leaf lgroup with memory 3643*7c478bd9Sstevel@tonic-gate */ 3644*7c478bd9Sstevel@tonic-gate lgrpset = lgrp_root->lgrp_set[LGRP_RSRC_MEM]; 3645*7c478bd9Sstevel@tonic-gate /* 3646*7c478bd9Sstevel@tonic-gate * Count how many lgroups are spanned 3647*7c478bd9Sstevel@tonic-gate */ 3648*7c478bd9Sstevel@tonic-gate klgrpset_nlgrps(lgrpset, lgrps_spanned); 3649*7c478bd9Sstevel@tonic-gate 3650*7c478bd9Sstevel@tonic-gate /* 3651*7c478bd9Sstevel@tonic-gate * There may be no memnodes in the root lgroup during DR copy 3652*7c478bd9Sstevel@tonic-gate * rename on a system with only two boards (memnodes) 3653*7c478bd9Sstevel@tonic-gate * configured. In this case just return the root lgrp. 3654*7c478bd9Sstevel@tonic-gate */ 3655*7c478bd9Sstevel@tonic-gate if (lgrps_spanned == 0) { 3656*7c478bd9Sstevel@tonic-gate lgrp = lgrp_root; 3657*7c478bd9Sstevel@tonic-gate break; 3658*7c478bd9Sstevel@tonic-gate } 3659*7c478bd9Sstevel@tonic-gate 3660*7c478bd9Sstevel@tonic-gate /* 3661*7c478bd9Sstevel@tonic-gate * Pick a random offset within lgroups spanned 3662*7c478bd9Sstevel@tonic-gate * and return lgroup at that offset 3663*7c478bd9Sstevel@tonic-gate */ 3664*7c478bd9Sstevel@tonic-gate random = (ushort_t)gethrtime() >> 4; 3665*7c478bd9Sstevel@tonic-gate off = random % lgrps_spanned; 3666*7c478bd9Sstevel@tonic-gate ASSERT(off <= lgrp_alloc_max); 3667*7c478bd9Sstevel@tonic-gate 3668*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 3669*7c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrpset, i)) 3670*7c478bd9Sstevel@tonic-gate continue; 3671*7c478bd9Sstevel@tonic-gate if (off) 3672*7c478bd9Sstevel@tonic-gate off--; 3673*7c478bd9Sstevel@tonic-gate else { 3674*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 3675*7c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_RANDOM, 3676*7c478bd9Sstevel@tonic-gate 1); 3677*7c478bd9Sstevel@tonic-gate break; 3678*7c478bd9Sstevel@tonic-gate } 3679*7c478bd9Sstevel@tonic-gate } 3680*7c478bd9Sstevel@tonic-gate break; 3681*7c478bd9Sstevel@tonic-gate 3682*7c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_RANDOM_PROC: 3683*7c478bd9Sstevel@tonic-gate 3684*7c478bd9Sstevel@tonic-gate /* 3685*7c478bd9Sstevel@tonic-gate * Grab copy of bitmask of lgroups spanned by 3686*7c478bd9Sstevel@tonic-gate * this process 3687*7c478bd9Sstevel@tonic-gate */ 3688*7c478bd9Sstevel@tonic-gate klgrpset_copy(lgrpset, curproc->p_lgrpset); 3689*7c478bd9Sstevel@tonic-gate stat = LGRP_NUM_RANDOM_PROC; 3690*7c478bd9Sstevel@tonic-gate 3691*7c478bd9Sstevel@tonic-gate /* LINTED fallthrough on case statement */ 3692*7c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_RANDOM_PSET: 3693*7c478bd9Sstevel@tonic-gate 3694*7c478bd9Sstevel@tonic-gate if (!stat) 3695*7c478bd9Sstevel@tonic-gate stat = LGRP_NUM_RANDOM_PSET; 3696*7c478bd9Sstevel@tonic-gate 3697*7c478bd9Sstevel@tonic-gate if (klgrpset_isempty(lgrpset)) { 3698*7c478bd9Sstevel@tonic-gate /* 3699*7c478bd9Sstevel@tonic-gate * Grab copy of bitmask of lgroups spanned by 3700*7c478bd9Sstevel@tonic-gate * this processor set 3701*7c478bd9Sstevel@tonic-gate */ 3702*7c478bd9Sstevel@tonic-gate kpreempt_disable(); 3703*7c478bd9Sstevel@tonic-gate klgrpset_copy(lgrpset, 3704*7c478bd9Sstevel@tonic-gate curthread->t_cpupart->cp_lgrpset); 3705*7c478bd9Sstevel@tonic-gate kpreempt_enable(); 3706*7c478bd9Sstevel@tonic-gate } 3707*7c478bd9Sstevel@tonic-gate 3708*7c478bd9Sstevel@tonic-gate /* 3709*7c478bd9Sstevel@tonic-gate * Count how many lgroups are spanned 3710*7c478bd9Sstevel@tonic-gate */ 3711*7c478bd9Sstevel@tonic-gate klgrpset_nlgrps(lgrpset, lgrps_spanned); 3712*7c478bd9Sstevel@tonic-gate ASSERT(lgrps_spanned <= nlgrps); 3713*7c478bd9Sstevel@tonic-gate 3714*7c478bd9Sstevel@tonic-gate /* 3715*7c478bd9Sstevel@tonic-gate * Probably lgrps_spanned should be always non-zero, but to be 3716*7c478bd9Sstevel@tonic-gate * on the safe side we return lgrp_root if it is empty. 3717*7c478bd9Sstevel@tonic-gate */ 3718*7c478bd9Sstevel@tonic-gate if (lgrps_spanned == 0) { 3719*7c478bd9Sstevel@tonic-gate lgrp = lgrp_root; 3720*7c478bd9Sstevel@tonic-gate break; 3721*7c478bd9Sstevel@tonic-gate } 3722*7c478bd9Sstevel@tonic-gate 3723*7c478bd9Sstevel@tonic-gate /* 3724*7c478bd9Sstevel@tonic-gate * Pick a random offset within lgroups spanned 3725*7c478bd9Sstevel@tonic-gate * and return lgroup at that offset 3726*7c478bd9Sstevel@tonic-gate */ 3727*7c478bd9Sstevel@tonic-gate random = (ushort_t)gethrtime() >> 4; 3728*7c478bd9Sstevel@tonic-gate off = random % lgrps_spanned; 3729*7c478bd9Sstevel@tonic-gate ASSERT(off <= lgrp_alloc_max); 3730*7c478bd9Sstevel@tonic-gate 3731*7c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) { 3732*7c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrpset, i)) 3733*7c478bd9Sstevel@tonic-gate continue; 3734*7c478bd9Sstevel@tonic-gate if (off) 3735*7c478bd9Sstevel@tonic-gate off--; 3736*7c478bd9Sstevel@tonic-gate else { 3737*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 3738*7c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_RANDOM, 3739*7c478bd9Sstevel@tonic-gate 1); 3740*7c478bd9Sstevel@tonic-gate break; 3741*7c478bd9Sstevel@tonic-gate } 3742*7c478bd9Sstevel@tonic-gate } 3743*7c478bd9Sstevel@tonic-gate break; 3744*7c478bd9Sstevel@tonic-gate 3745*7c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_ROUNDROBIN: 3746*7c478bd9Sstevel@tonic-gate 3747*7c478bd9Sstevel@tonic-gate /* 3748*7c478bd9Sstevel@tonic-gate * Use offset within segment to determine 3749*7c478bd9Sstevel@tonic-gate * offset from home lgroup to choose for 3750*7c478bd9Sstevel@tonic-gate * next lgroup to allocate memory from 3751*7c478bd9Sstevel@tonic-gate */ 3752*7c478bd9Sstevel@tonic-gate off = ((unsigned long)(vaddr - seg->s_base) / pgsz) % 3753*7c478bd9Sstevel@tonic-gate (lgrp_alloc_max + 1); 3754*7c478bd9Sstevel@tonic-gate 3755*7c478bd9Sstevel@tonic-gate kpreempt_disable(); 3756*7c478bd9Sstevel@tonic-gate lgrpset = lgrp_root->lgrp_set[LGRP_RSRC_MEM]; 3757*7c478bd9Sstevel@tonic-gate i = lgrp->lgrp_id; 3758*7c478bd9Sstevel@tonic-gate kpreempt_enable(); 3759*7c478bd9Sstevel@tonic-gate 3760*7c478bd9Sstevel@tonic-gate while (off > 0) { 3761*7c478bd9Sstevel@tonic-gate i = (i + 1) % (lgrp_alloc_max + 1); 3762*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i]; 3763*7c478bd9Sstevel@tonic-gate if (klgrpset_ismember(lgrpset, i)) 3764*7c478bd9Sstevel@tonic-gate off--; 3765*7c478bd9Sstevel@tonic-gate } 3766*7c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ROUNDROBIN, 1); 3767*7c478bd9Sstevel@tonic-gate 3768*7c478bd9Sstevel@tonic-gate break; 3769*7c478bd9Sstevel@tonic-gate } 3770*7c478bd9Sstevel@tonic-gate 3771*7c478bd9Sstevel@tonic-gate ASSERT(lgrp != NULL); 3772*7c478bd9Sstevel@tonic-gate return (lgrp); 3773*7c478bd9Sstevel@tonic-gate } 3774*7c478bd9Sstevel@tonic-gate 3775*7c478bd9Sstevel@tonic-gate /* 3776*7c478bd9Sstevel@tonic-gate * Return the number of pages in an lgroup 3777*7c478bd9Sstevel@tonic-gate * 3778*7c478bd9Sstevel@tonic-gate * NOTE: NUMA test (numat) driver uses this, so changing arguments or semantics 3779*7c478bd9Sstevel@tonic-gate * could cause tests that rely on the numat driver to fail.... 3780*7c478bd9Sstevel@tonic-gate */ 3781*7c478bd9Sstevel@tonic-gate pgcnt_t 3782*7c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrp_id_t lgrpid, lgrp_mem_query_t query) 3783*7c478bd9Sstevel@tonic-gate { 3784*7c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 3785*7c478bd9Sstevel@tonic-gate 3786*7c478bd9Sstevel@tonic-gate lgrp = lgrp_table[lgrpid]; 3787*7c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) || 3788*7c478bd9Sstevel@tonic-gate klgrpset_isempty(lgrp->lgrp_set[LGRP_RSRC_MEM]) || 3789*7c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid)) 3790*7c478bd9Sstevel@tonic-gate return (0); 3791*7c478bd9Sstevel@tonic-gate 3792*7c478bd9Sstevel@tonic-gate return (lgrp_plat_mem_size(lgrp->lgrp_plathand, query)); 3793*7c478bd9Sstevel@tonic-gate } 3794*7c478bd9Sstevel@tonic-gate 3795*7c478bd9Sstevel@tonic-gate /* 3796*7c478bd9Sstevel@tonic-gate * Initialize lgroup shared memory allocation policy support 3797*7c478bd9Sstevel@tonic-gate */ 3798*7c478bd9Sstevel@tonic-gate void 3799*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_init(struct anon_map *amp, vnode_t *vp) 3800*7c478bd9Sstevel@tonic-gate { 3801*7c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality; 3802*7c478bd9Sstevel@tonic-gate 3803*7c478bd9Sstevel@tonic-gate /* 3804*7c478bd9Sstevel@tonic-gate * Initialize locality field in anon_map 3805*7c478bd9Sstevel@tonic-gate * Don't need any locks because this is called when anon_map is 3806*7c478bd9Sstevel@tonic-gate * allocated, but not used anywhere yet. 3807*7c478bd9Sstevel@tonic-gate */ 3808*7c478bd9Sstevel@tonic-gate if (amp) { 3809*7c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 3810*7c478bd9Sstevel@tonic-gate if (amp->locality == NULL) { 3811*7c478bd9Sstevel@tonic-gate /* 3812*7c478bd9Sstevel@tonic-gate * Allocate and initialize shared memory locality info 3813*7c478bd9Sstevel@tonic-gate * and set anon_map locality pointer to it 3814*7c478bd9Sstevel@tonic-gate * Drop lock across kmem_alloc(KM_SLEEP) 3815*7c478bd9Sstevel@tonic-gate */ 3816*7c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3817*7c478bd9Sstevel@tonic-gate shm_locality = kmem_alloc(sizeof (*shm_locality), 3818*7c478bd9Sstevel@tonic-gate KM_SLEEP); 3819*7c478bd9Sstevel@tonic-gate rw_init(&shm_locality->loc_lock, NULL, RW_DEFAULT, 3820*7c478bd9Sstevel@tonic-gate NULL); 3821*7c478bd9Sstevel@tonic-gate shm_locality->loc_count = 1; /* not used for amp */ 3822*7c478bd9Sstevel@tonic-gate shm_locality->loc_tree = NULL; 3823*7c478bd9Sstevel@tonic-gate 3824*7c478bd9Sstevel@tonic-gate /* 3825*7c478bd9Sstevel@tonic-gate * Reacquire lock and check to see whether anyone beat 3826*7c478bd9Sstevel@tonic-gate * us to initializing the locality info 3827*7c478bd9Sstevel@tonic-gate */ 3828*7c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 3829*7c478bd9Sstevel@tonic-gate if (amp->locality != NULL) { 3830*7c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock); 3831*7c478bd9Sstevel@tonic-gate kmem_free(shm_locality, 3832*7c478bd9Sstevel@tonic-gate sizeof (*shm_locality)); 3833*7c478bd9Sstevel@tonic-gate } else 3834*7c478bd9Sstevel@tonic-gate amp->locality = shm_locality; 3835*7c478bd9Sstevel@tonic-gate } 3836*7c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 3837*7c478bd9Sstevel@tonic-gate return; 3838*7c478bd9Sstevel@tonic-gate } 3839*7c478bd9Sstevel@tonic-gate 3840*7c478bd9Sstevel@tonic-gate /* 3841*7c478bd9Sstevel@tonic-gate * Allocate shared vnode policy info if vnode is not locality aware yet 3842*7c478bd9Sstevel@tonic-gate */ 3843*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 3844*7c478bd9Sstevel@tonic-gate if ((vp->v_flag & V_LOCALITY) == 0) { 3845*7c478bd9Sstevel@tonic-gate /* 3846*7c478bd9Sstevel@tonic-gate * Allocate and initialize shared memory locality info 3847*7c478bd9Sstevel@tonic-gate */ 3848*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 3849*7c478bd9Sstevel@tonic-gate shm_locality = kmem_alloc(sizeof (*shm_locality), KM_SLEEP); 3850*7c478bd9Sstevel@tonic-gate rw_init(&shm_locality->loc_lock, NULL, RW_DEFAULT, NULL); 3851*7c478bd9Sstevel@tonic-gate shm_locality->loc_count = 1; 3852*7c478bd9Sstevel@tonic-gate shm_locality->loc_tree = NULL; 3853*7c478bd9Sstevel@tonic-gate 3854*7c478bd9Sstevel@tonic-gate /* 3855*7c478bd9Sstevel@tonic-gate * Point vnode locality field at shared vnode policy info 3856*7c478bd9Sstevel@tonic-gate * and set locality aware flag in vnode 3857*7c478bd9Sstevel@tonic-gate */ 3858*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 3859*7c478bd9Sstevel@tonic-gate if ((vp->v_flag & V_LOCALITY) == 0) { 3860*7c478bd9Sstevel@tonic-gate vp->v_locality = shm_locality; 3861*7c478bd9Sstevel@tonic-gate vp->v_flag |= V_LOCALITY; 3862*7c478bd9Sstevel@tonic-gate } else { 3863*7c478bd9Sstevel@tonic-gate /* 3864*7c478bd9Sstevel@tonic-gate * Lost race so free locality info and increment count. 3865*7c478bd9Sstevel@tonic-gate */ 3866*7c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock); 3867*7c478bd9Sstevel@tonic-gate kmem_free(shm_locality, sizeof (*shm_locality)); 3868*7c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality; 3869*7c478bd9Sstevel@tonic-gate shm_locality->loc_count++; 3870*7c478bd9Sstevel@tonic-gate } 3871*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 3872*7c478bd9Sstevel@tonic-gate 3873*7c478bd9Sstevel@tonic-gate return; 3874*7c478bd9Sstevel@tonic-gate } 3875*7c478bd9Sstevel@tonic-gate 3876*7c478bd9Sstevel@tonic-gate /* 3877*7c478bd9Sstevel@tonic-gate * Increment reference count of number of segments mapping this vnode 3878*7c478bd9Sstevel@tonic-gate * shared 3879*7c478bd9Sstevel@tonic-gate */ 3880*7c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality; 3881*7c478bd9Sstevel@tonic-gate shm_locality->loc_count++; 3882*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 3883*7c478bd9Sstevel@tonic-gate } 3884*7c478bd9Sstevel@tonic-gate 3885*7c478bd9Sstevel@tonic-gate /* 3886*7c478bd9Sstevel@tonic-gate * Destroy the given shared memory policy segment tree 3887*7c478bd9Sstevel@tonic-gate */ 3888*7c478bd9Sstevel@tonic-gate void 3889*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_tree_destroy(avl_tree_t *tree) 3890*7c478bd9Sstevel@tonic-gate { 3891*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *cur; 3892*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *next; 3893*7c478bd9Sstevel@tonic-gate 3894*7c478bd9Sstevel@tonic-gate if (tree == NULL) 3895*7c478bd9Sstevel@tonic-gate return; 3896*7c478bd9Sstevel@tonic-gate 3897*7c478bd9Sstevel@tonic-gate cur = (lgrp_shm_policy_seg_t *)avl_first(tree); 3898*7c478bd9Sstevel@tonic-gate while (cur != NULL) { 3899*7c478bd9Sstevel@tonic-gate next = AVL_NEXT(tree, cur); 3900*7c478bd9Sstevel@tonic-gate avl_remove(tree, cur); 3901*7c478bd9Sstevel@tonic-gate kmem_free(cur, sizeof (*cur)); 3902*7c478bd9Sstevel@tonic-gate cur = next; 3903*7c478bd9Sstevel@tonic-gate } 3904*7c478bd9Sstevel@tonic-gate kmem_free(tree, sizeof (avl_tree_t)); 3905*7c478bd9Sstevel@tonic-gate } 3906*7c478bd9Sstevel@tonic-gate 3907*7c478bd9Sstevel@tonic-gate /* 3908*7c478bd9Sstevel@tonic-gate * Uninitialize lgroup shared memory allocation policy support 3909*7c478bd9Sstevel@tonic-gate */ 3910*7c478bd9Sstevel@tonic-gate void 3911*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_fini(struct anon_map *amp, vnode_t *vp) 3912*7c478bd9Sstevel@tonic-gate { 3913*7c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality; 3914*7c478bd9Sstevel@tonic-gate 3915*7c478bd9Sstevel@tonic-gate /* 3916*7c478bd9Sstevel@tonic-gate * For anon_map, deallocate shared memory policy tree and 3917*7c478bd9Sstevel@tonic-gate * zero locality field 3918*7c478bd9Sstevel@tonic-gate * Don't need any locks because anon_map is being freed 3919*7c478bd9Sstevel@tonic-gate */ 3920*7c478bd9Sstevel@tonic-gate if (amp) { 3921*7c478bd9Sstevel@tonic-gate if (amp->locality == NULL) 3922*7c478bd9Sstevel@tonic-gate return; 3923*7c478bd9Sstevel@tonic-gate shm_locality = amp->locality; 3924*7c478bd9Sstevel@tonic-gate shm_locality->loc_count = 0; /* not really used for amp */ 3925*7c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock); 3926*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_tree_destroy(shm_locality->loc_tree); 3927*7c478bd9Sstevel@tonic-gate kmem_free(shm_locality, sizeof (*shm_locality)); 3928*7c478bd9Sstevel@tonic-gate amp->locality = 0; 3929*7c478bd9Sstevel@tonic-gate return; 3930*7c478bd9Sstevel@tonic-gate } 3931*7c478bd9Sstevel@tonic-gate 3932*7c478bd9Sstevel@tonic-gate /* 3933*7c478bd9Sstevel@tonic-gate * For vnode, decrement reference count of segments mapping this vnode 3934*7c478bd9Sstevel@tonic-gate * shared and delete locality info if reference count drops to 0 3935*7c478bd9Sstevel@tonic-gate */ 3936*7c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 3937*7c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality; 3938*7c478bd9Sstevel@tonic-gate shm_locality->loc_count--; 3939*7c478bd9Sstevel@tonic-gate 3940*7c478bd9Sstevel@tonic-gate if (shm_locality->loc_count == 0) { 3941*7c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock); 3942*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_tree_destroy(shm_locality->loc_tree); 3943*7c478bd9Sstevel@tonic-gate kmem_free(shm_locality, sizeof (*shm_locality)); 3944*7c478bd9Sstevel@tonic-gate vp->v_locality = 0; 3945*7c478bd9Sstevel@tonic-gate vp->v_flag &= ~V_LOCALITY; 3946*7c478bd9Sstevel@tonic-gate } 3947*7c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 3948*7c478bd9Sstevel@tonic-gate } 3949*7c478bd9Sstevel@tonic-gate 3950*7c478bd9Sstevel@tonic-gate /* 3951*7c478bd9Sstevel@tonic-gate * Compare two shared memory policy segments 3952*7c478bd9Sstevel@tonic-gate * Used by AVL tree code for searching 3953*7c478bd9Sstevel@tonic-gate */ 3954*7c478bd9Sstevel@tonic-gate int 3955*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_compar(const void *x, const void *y) 3956*7c478bd9Sstevel@tonic-gate { 3957*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *a = (lgrp_shm_policy_seg_t *)x; 3958*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *b = (lgrp_shm_policy_seg_t *)y; 3959*7c478bd9Sstevel@tonic-gate 3960*7c478bd9Sstevel@tonic-gate if (a->shm_off < b->shm_off) 3961*7c478bd9Sstevel@tonic-gate return (-1); 3962*7c478bd9Sstevel@tonic-gate if (a->shm_off >= b->shm_off + b->shm_size) 3963*7c478bd9Sstevel@tonic-gate return (1); 3964*7c478bd9Sstevel@tonic-gate return (0); 3965*7c478bd9Sstevel@tonic-gate } 3966*7c478bd9Sstevel@tonic-gate 3967*7c478bd9Sstevel@tonic-gate /* 3968*7c478bd9Sstevel@tonic-gate * Concatenate seg1 with seg2 and remove seg2 3969*7c478bd9Sstevel@tonic-gate */ 3970*7c478bd9Sstevel@tonic-gate static int 3971*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_concat(avl_tree_t *tree, lgrp_shm_policy_seg_t *seg1, 3972*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *seg2) 3973*7c478bd9Sstevel@tonic-gate { 3974*7c478bd9Sstevel@tonic-gate if (!seg1 || !seg2 || 3975*7c478bd9Sstevel@tonic-gate seg1->shm_off + seg1->shm_size != seg2->shm_off || 3976*7c478bd9Sstevel@tonic-gate seg1->shm_policy.mem_policy != seg2->shm_policy.mem_policy) 3977*7c478bd9Sstevel@tonic-gate return (-1); 3978*7c478bd9Sstevel@tonic-gate 3979*7c478bd9Sstevel@tonic-gate seg1->shm_size += seg2->shm_size; 3980*7c478bd9Sstevel@tonic-gate avl_remove(tree, seg2); 3981*7c478bd9Sstevel@tonic-gate kmem_free(seg2, sizeof (*seg2)); 3982*7c478bd9Sstevel@tonic-gate return (0); 3983*7c478bd9Sstevel@tonic-gate } 3984*7c478bd9Sstevel@tonic-gate 3985*7c478bd9Sstevel@tonic-gate /* 3986*7c478bd9Sstevel@tonic-gate * Split segment at given offset and return rightmost (uppermost) segment 3987*7c478bd9Sstevel@tonic-gate * Assumes that there are no overlapping segments 3988*7c478bd9Sstevel@tonic-gate */ 3989*7c478bd9Sstevel@tonic-gate static lgrp_shm_policy_seg_t * 3990*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_split(avl_tree_t *tree, lgrp_shm_policy_seg_t *seg, 3991*7c478bd9Sstevel@tonic-gate u_offset_t off) 3992*7c478bd9Sstevel@tonic-gate { 3993*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *newseg; 3994*7c478bd9Sstevel@tonic-gate avl_index_t where; 3995*7c478bd9Sstevel@tonic-gate 3996*7c478bd9Sstevel@tonic-gate ASSERT(seg != NULL); 3997*7c478bd9Sstevel@tonic-gate ASSERT(off >= seg->shm_off && off <= seg->shm_off + seg->shm_size); 3998*7c478bd9Sstevel@tonic-gate 3999*7c478bd9Sstevel@tonic-gate if (!seg || off < seg->shm_off || off > seg->shm_off + 4000*7c478bd9Sstevel@tonic-gate seg->shm_size) 4001*7c478bd9Sstevel@tonic-gate return (NULL); 4002*7c478bd9Sstevel@tonic-gate 4003*7c478bd9Sstevel@tonic-gate if (off == seg->shm_off || off == seg->shm_off + seg->shm_size) 4004*7c478bd9Sstevel@tonic-gate return (seg); 4005*7c478bd9Sstevel@tonic-gate 4006*7c478bd9Sstevel@tonic-gate /* 4007*7c478bd9Sstevel@tonic-gate * Adjust size of left segment and allocate new (right) segment 4008*7c478bd9Sstevel@tonic-gate */ 4009*7c478bd9Sstevel@tonic-gate newseg = kmem_alloc(sizeof (lgrp_shm_policy_seg_t), KM_SLEEP); 4010*7c478bd9Sstevel@tonic-gate newseg->shm_policy = seg->shm_policy; 4011*7c478bd9Sstevel@tonic-gate newseg->shm_off = off; 4012*7c478bd9Sstevel@tonic-gate newseg->shm_size = seg->shm_size - (off - seg->shm_off); 4013*7c478bd9Sstevel@tonic-gate seg->shm_size = off - seg->shm_off; 4014*7c478bd9Sstevel@tonic-gate 4015*7c478bd9Sstevel@tonic-gate /* 4016*7c478bd9Sstevel@tonic-gate * Find where to insert new segment in AVL tree and insert it 4017*7c478bd9Sstevel@tonic-gate */ 4018*7c478bd9Sstevel@tonic-gate (void) avl_find(tree, &off, &where); 4019*7c478bd9Sstevel@tonic-gate avl_insert(tree, newseg, where); 4020*7c478bd9Sstevel@tonic-gate 4021*7c478bd9Sstevel@tonic-gate return (newseg); 4022*7c478bd9Sstevel@tonic-gate } 4023*7c478bd9Sstevel@tonic-gate 4024*7c478bd9Sstevel@tonic-gate /* 4025*7c478bd9Sstevel@tonic-gate * Set shared memory allocation policy on specified shared object at given 4026*7c478bd9Sstevel@tonic-gate * offset and length 4027*7c478bd9Sstevel@tonic-gate * 4028*7c478bd9Sstevel@tonic-gate * Return 0 if policy wasn't set already, 1 if policy was set already, and 4029*7c478bd9Sstevel@tonic-gate * -1 if can't set policy. 4030*7c478bd9Sstevel@tonic-gate */ 4031*7c478bd9Sstevel@tonic-gate int 4032*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_set(lgrp_mem_policy_t policy, struct anon_map *amp, 4033*7c478bd9Sstevel@tonic-gate ulong_t anon_index, vnode_t *vp, u_offset_t vn_off, size_t len) 4034*7c478bd9Sstevel@tonic-gate { 4035*7c478bd9Sstevel@tonic-gate u_offset_t eoff; 4036*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *next; 4037*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *newseg; 4038*7c478bd9Sstevel@tonic-gate u_offset_t off; 4039*7c478bd9Sstevel@tonic-gate u_offset_t oldeoff; 4040*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *prev; 4041*7c478bd9Sstevel@tonic-gate int retval; 4042*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *seg; 4043*7c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality; 4044*7c478bd9Sstevel@tonic-gate avl_tree_t *tree; 4045*7c478bd9Sstevel@tonic-gate avl_index_t where; 4046*7c478bd9Sstevel@tonic-gate 4047*7c478bd9Sstevel@tonic-gate ASSERT(amp || vp); 4048*7c478bd9Sstevel@tonic-gate ASSERT((len & PAGEOFFSET) == 0); 4049*7c478bd9Sstevel@tonic-gate 4050*7c478bd9Sstevel@tonic-gate if (len == 0) 4051*7c478bd9Sstevel@tonic-gate return (-1); 4052*7c478bd9Sstevel@tonic-gate 4053*7c478bd9Sstevel@tonic-gate retval = 0; 4054*7c478bd9Sstevel@tonic-gate 4055*7c478bd9Sstevel@tonic-gate /* 4056*7c478bd9Sstevel@tonic-gate * Get locality info and starting offset into shared object 4057*7c478bd9Sstevel@tonic-gate * Try anon map first and then vnode 4058*7c478bd9Sstevel@tonic-gate * Assume that no locks need to be held on anon_map or vnode, since 4059*7c478bd9Sstevel@tonic-gate * it should be protected by its reference count which must be nonzero 4060*7c478bd9Sstevel@tonic-gate * for an existing segment. 4061*7c478bd9Sstevel@tonic-gate */ 4062*7c478bd9Sstevel@tonic-gate if (amp) { 4063*7c478bd9Sstevel@tonic-gate /* 4064*7c478bd9Sstevel@tonic-gate * Get policy info from anon_map 4065*7c478bd9Sstevel@tonic-gate * 4066*7c478bd9Sstevel@tonic-gate */ 4067*7c478bd9Sstevel@tonic-gate ASSERT(amp->refcnt != 0); 4068*7c478bd9Sstevel@tonic-gate if (amp->locality == NULL) 4069*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_init(amp, NULL); 4070*7c478bd9Sstevel@tonic-gate shm_locality = amp->locality; 4071*7c478bd9Sstevel@tonic-gate off = ptob(anon_index); 4072*7c478bd9Sstevel@tonic-gate } else if (vp) { 4073*7c478bd9Sstevel@tonic-gate /* 4074*7c478bd9Sstevel@tonic-gate * Get policy info from vnode 4075*7c478bd9Sstevel@tonic-gate */ 4076*7c478bd9Sstevel@tonic-gate if ((vp->v_flag & V_LOCALITY) == 0 || vp->v_locality == NULL) 4077*7c478bd9Sstevel@tonic-gate lgrp_shm_policy_init(NULL, vp); 4078*7c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality; 4079*7c478bd9Sstevel@tonic-gate ASSERT(shm_locality->loc_count != 0); 4080*7c478bd9Sstevel@tonic-gate off = vn_off; 4081*7c478bd9Sstevel@tonic-gate } else 4082*7c478bd9Sstevel@tonic-gate return (-1); 4083*7c478bd9Sstevel@tonic-gate 4084*7c478bd9Sstevel@tonic-gate ASSERT((off & PAGEOFFSET) == 0); 4085*7c478bd9Sstevel@tonic-gate 4086*7c478bd9Sstevel@tonic-gate /* 4087*7c478bd9Sstevel@tonic-gate * Figure out default policy 4088*7c478bd9Sstevel@tonic-gate */ 4089*7c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_DEFAULT) 4090*7c478bd9Sstevel@tonic-gate policy = lgrp_mem_policy_default(len, MAP_SHARED); 4091*7c478bd9Sstevel@tonic-gate 4092*7c478bd9Sstevel@tonic-gate /* 4093*7c478bd9Sstevel@tonic-gate * Create AVL tree if there isn't one yet 4094*7c478bd9Sstevel@tonic-gate * and set locality field to point at it 4095*7c478bd9Sstevel@tonic-gate */ 4096*7c478bd9Sstevel@tonic-gate rw_enter(&shm_locality->loc_lock, RW_WRITER); 4097*7c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree; 4098*7c478bd9Sstevel@tonic-gate if (!tree) { 4099*7c478bd9Sstevel@tonic-gate rw_exit(&shm_locality->loc_lock); 4100*7c478bd9Sstevel@tonic-gate 4101*7c478bd9Sstevel@tonic-gate tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP); 4102*7c478bd9Sstevel@tonic-gate 4103*7c478bd9Sstevel@tonic-gate rw_enter(&shm_locality->loc_lock, RW_WRITER); 4104*7c478bd9Sstevel@tonic-gate if (shm_locality->loc_tree == NULL) { 4105*7c478bd9Sstevel@tonic-gate avl_create(tree, lgrp_shm_policy_compar, 4106*7c478bd9Sstevel@tonic-gate sizeof (lgrp_shm_policy_seg_t), 4107*7c478bd9Sstevel@tonic-gate offsetof(lgrp_shm_policy_seg_t, shm_tree)); 4108*7c478bd9Sstevel@tonic-gate shm_locality->loc_tree = tree; 4109*7c478bd9Sstevel@tonic-gate } else { 4110*7c478bd9Sstevel@tonic-gate /* 4111*7c478bd9Sstevel@tonic-gate * Another thread managed to set up the tree 4112*7c478bd9Sstevel@tonic-gate * before we could. Free the tree we allocated 4113*7c478bd9Sstevel@tonic-gate * and use the one that's already there. 4114*7c478bd9Sstevel@tonic-gate */ 4115*7c478bd9Sstevel@tonic-gate kmem_free(tree, sizeof (*tree)); 4116*7c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree; 4117*7c478bd9Sstevel@tonic-gate } 4118*7c478bd9Sstevel@tonic-gate } 4119*7c478bd9Sstevel@tonic-gate 4120*7c478bd9Sstevel@tonic-gate /* 4121*7c478bd9Sstevel@tonic-gate * Set policy 4122*7c478bd9Sstevel@tonic-gate * 4123*7c478bd9Sstevel@tonic-gate * Need to maintain hold on writer's lock to keep tree from 4124*7c478bd9Sstevel@tonic-gate * changing out from under us 4125*7c478bd9Sstevel@tonic-gate */ 4126*7c478bd9Sstevel@tonic-gate while (len != 0) { 4127*7c478bd9Sstevel@tonic-gate /* 4128*7c478bd9Sstevel@tonic-gate * Find policy segment for specified offset into shared object 4129*7c478bd9Sstevel@tonic-gate */ 4130*7c478bd9Sstevel@tonic-gate seg = avl_find(tree, &off, &where); 4131*7c478bd9Sstevel@tonic-gate 4132*7c478bd9Sstevel@tonic-gate /* 4133*7c478bd9Sstevel@tonic-gate * Didn't find any existing segment that contains specified 4134*7c478bd9Sstevel@tonic-gate * offset, so allocate new segment, insert it, and concatenate 4135*7c478bd9Sstevel@tonic-gate * with adjacent segments if possible 4136*7c478bd9Sstevel@tonic-gate */ 4137*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 4138*7c478bd9Sstevel@tonic-gate newseg = kmem_alloc(sizeof (lgrp_shm_policy_seg_t), 4139*7c478bd9Sstevel@tonic-gate KM_SLEEP); 4140*7c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_policy = policy; 4141*7c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_reserved = 0; 4142*7c478bd9Sstevel@tonic-gate newseg->shm_off = off; 4143*7c478bd9Sstevel@tonic-gate avl_insert(tree, newseg, where); 4144*7c478bd9Sstevel@tonic-gate 4145*7c478bd9Sstevel@tonic-gate /* 4146*7c478bd9Sstevel@tonic-gate * Check to see whether new segment overlaps with next 4147*7c478bd9Sstevel@tonic-gate * one, set length of new segment accordingly, and 4148*7c478bd9Sstevel@tonic-gate * calculate remaining length and next offset 4149*7c478bd9Sstevel@tonic-gate */ 4150*7c478bd9Sstevel@tonic-gate seg = AVL_NEXT(tree, newseg); 4151*7c478bd9Sstevel@tonic-gate if (seg == NULL || off + len <= seg->shm_off) { 4152*7c478bd9Sstevel@tonic-gate newseg->shm_size = len; 4153*7c478bd9Sstevel@tonic-gate len = 0; 4154*7c478bd9Sstevel@tonic-gate } else { 4155*7c478bd9Sstevel@tonic-gate newseg->shm_size = seg->shm_off - off; 4156*7c478bd9Sstevel@tonic-gate off = seg->shm_off; 4157*7c478bd9Sstevel@tonic-gate len -= newseg->shm_size; 4158*7c478bd9Sstevel@tonic-gate } 4159*7c478bd9Sstevel@tonic-gate 4160*7c478bd9Sstevel@tonic-gate /* 4161*7c478bd9Sstevel@tonic-gate * Try to concatenate new segment with next and 4162*7c478bd9Sstevel@tonic-gate * previous ones, since they might have the same policy 4163*7c478bd9Sstevel@tonic-gate * now. Grab previous and next segments first because 4164*7c478bd9Sstevel@tonic-gate * they will change on concatenation. 4165*7c478bd9Sstevel@tonic-gate */ 4166*7c478bd9Sstevel@tonic-gate prev = AVL_PREV(tree, newseg); 4167*7c478bd9Sstevel@tonic-gate next = AVL_NEXT(tree, newseg); 4168*7c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, newseg, next); 4169*7c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, prev, newseg); 4170*7c478bd9Sstevel@tonic-gate 4171*7c478bd9Sstevel@tonic-gate continue; 4172*7c478bd9Sstevel@tonic-gate } 4173*7c478bd9Sstevel@tonic-gate 4174*7c478bd9Sstevel@tonic-gate eoff = off + len; 4175*7c478bd9Sstevel@tonic-gate oldeoff = seg->shm_off + seg->shm_size; 4176*7c478bd9Sstevel@tonic-gate 4177*7c478bd9Sstevel@tonic-gate /* 4178*7c478bd9Sstevel@tonic-gate * Policy set already? 4179*7c478bd9Sstevel@tonic-gate */ 4180*7c478bd9Sstevel@tonic-gate if (policy == seg->shm_policy.mem_policy) { 4181*7c478bd9Sstevel@tonic-gate /* 4182*7c478bd9Sstevel@tonic-gate * Nothing left to do if offset and length 4183*7c478bd9Sstevel@tonic-gate * fall within this segment 4184*7c478bd9Sstevel@tonic-gate */ 4185*7c478bd9Sstevel@tonic-gate if (eoff <= oldeoff) { 4186*7c478bd9Sstevel@tonic-gate retval = 1; 4187*7c478bd9Sstevel@tonic-gate break; 4188*7c478bd9Sstevel@tonic-gate } else { 4189*7c478bd9Sstevel@tonic-gate len = eoff - oldeoff; 4190*7c478bd9Sstevel@tonic-gate off = oldeoff; 4191*7c478bd9Sstevel@tonic-gate continue; 4192*7c478bd9Sstevel@tonic-gate } 4193*7c478bd9Sstevel@tonic-gate } 4194*7c478bd9Sstevel@tonic-gate 4195*7c478bd9Sstevel@tonic-gate /* 4196*7c478bd9Sstevel@tonic-gate * Specified offset and length match existing segment exactly 4197*7c478bd9Sstevel@tonic-gate */ 4198*7c478bd9Sstevel@tonic-gate if (off == seg->shm_off && len == seg->shm_size) { 4199*7c478bd9Sstevel@tonic-gate /* 4200*7c478bd9Sstevel@tonic-gate * Set policy and update current length 4201*7c478bd9Sstevel@tonic-gate */ 4202*7c478bd9Sstevel@tonic-gate seg->shm_policy.mem_policy = policy; 4203*7c478bd9Sstevel@tonic-gate seg->shm_policy.mem_reserved = 0; 4204*7c478bd9Sstevel@tonic-gate len = 0; 4205*7c478bd9Sstevel@tonic-gate 4206*7c478bd9Sstevel@tonic-gate /* 4207*7c478bd9Sstevel@tonic-gate * Try concatenating new segment with previous and next 4208*7c478bd9Sstevel@tonic-gate * segments, since they might have the same policy now. 4209*7c478bd9Sstevel@tonic-gate * Grab previous and next segments first because they 4210*7c478bd9Sstevel@tonic-gate * will change on concatenation. 4211*7c478bd9Sstevel@tonic-gate */ 4212*7c478bd9Sstevel@tonic-gate prev = AVL_PREV(tree, seg); 4213*7c478bd9Sstevel@tonic-gate next = AVL_NEXT(tree, seg); 4214*7c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, seg, next); 4215*7c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, prev, seg); 4216*7c478bd9Sstevel@tonic-gate } else { 4217*7c478bd9Sstevel@tonic-gate /* 4218*7c478bd9Sstevel@tonic-gate * Specified offset and length only apply to part of 4219*7c478bd9Sstevel@tonic-gate * existing segment 4220*7c478bd9Sstevel@tonic-gate */ 4221*7c478bd9Sstevel@tonic-gate 4222*7c478bd9Sstevel@tonic-gate /* 4223*7c478bd9Sstevel@tonic-gate * New segment starts in middle of old one, so split 4224*7c478bd9Sstevel@tonic-gate * new one off near beginning of old one 4225*7c478bd9Sstevel@tonic-gate */ 4226*7c478bd9Sstevel@tonic-gate newseg = NULL; 4227*7c478bd9Sstevel@tonic-gate if (off > seg->shm_off) { 4228*7c478bd9Sstevel@tonic-gate newseg = lgrp_shm_policy_split(tree, seg, off); 4229*7c478bd9Sstevel@tonic-gate 4230*7c478bd9Sstevel@tonic-gate /* 4231*7c478bd9Sstevel@tonic-gate * New segment ends where old one did, so try 4232*7c478bd9Sstevel@tonic-gate * to concatenate with next segment 4233*7c478bd9Sstevel@tonic-gate */ 4234*7c478bd9Sstevel@tonic-gate if (eoff == oldeoff) { 4235*7c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_policy = policy; 4236*7c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_reserved = 0; 4237*7c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, 4238*7c478bd9Sstevel@tonic-gate newseg, AVL_NEXT(tree, newseg)); 4239*7c478bd9Sstevel@tonic-gate break; 4240*7c478bd9Sstevel@tonic-gate } 4241*7c478bd9Sstevel@tonic-gate } 4242*7c478bd9Sstevel@tonic-gate 4243*7c478bd9Sstevel@tonic-gate /* 4244*7c478bd9Sstevel@tonic-gate * New segment ends before old one, so split off end of 4245*7c478bd9Sstevel@tonic-gate * old one 4246*7c478bd9Sstevel@tonic-gate */ 4247*7c478bd9Sstevel@tonic-gate if (eoff < oldeoff) { 4248*7c478bd9Sstevel@tonic-gate if (newseg) { 4249*7c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_split(tree, 4250*7c478bd9Sstevel@tonic-gate newseg, eoff); 4251*7c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_policy = policy; 4252*7c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_reserved = 0; 4253*7c478bd9Sstevel@tonic-gate } else { 4254*7c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_split(tree, seg, 4255*7c478bd9Sstevel@tonic-gate eoff); 4256*7c478bd9Sstevel@tonic-gate seg->shm_policy.mem_policy = policy; 4257*7c478bd9Sstevel@tonic-gate seg->shm_policy.mem_reserved = 0; 4258*7c478bd9Sstevel@tonic-gate } 4259*7c478bd9Sstevel@tonic-gate 4260*7c478bd9Sstevel@tonic-gate if (off == seg->shm_off) 4261*7c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, 4262*7c478bd9Sstevel@tonic-gate AVL_PREV(tree, seg), seg); 4263*7c478bd9Sstevel@tonic-gate break; 4264*7c478bd9Sstevel@tonic-gate } 4265*7c478bd9Sstevel@tonic-gate 4266*7c478bd9Sstevel@tonic-gate /* 4267*7c478bd9Sstevel@tonic-gate * Calculate remaining length and next offset 4268*7c478bd9Sstevel@tonic-gate */ 4269*7c478bd9Sstevel@tonic-gate len = eoff - oldeoff; 4270*7c478bd9Sstevel@tonic-gate off = oldeoff; 4271*7c478bd9Sstevel@tonic-gate } 4272*7c478bd9Sstevel@tonic-gate } 4273*7c478bd9Sstevel@tonic-gate 4274*7c478bd9Sstevel@tonic-gate rw_exit(&shm_locality->loc_lock); 4275*7c478bd9Sstevel@tonic-gate return (retval); 4276*7c478bd9Sstevel@tonic-gate } 4277*7c478bd9Sstevel@tonic-gate 4278*7c478bd9Sstevel@tonic-gate /* 4279*7c478bd9Sstevel@tonic-gate * Return the best memnode from which to allocate memory given 4280*7c478bd9Sstevel@tonic-gate * an lgroup. 4281*7c478bd9Sstevel@tonic-gate * 4282*7c478bd9Sstevel@tonic-gate * "c" is for cookie, which is good enough for me. 4283*7c478bd9Sstevel@tonic-gate * It references a cookie struct that should be zero'ed to initialize. 4284*7c478bd9Sstevel@tonic-gate * The cookie should live on the caller's stack. 4285*7c478bd9Sstevel@tonic-gate * 4286*7c478bd9Sstevel@tonic-gate * The routine returns -1 when: 4287*7c478bd9Sstevel@tonic-gate * - traverse is 0, and all the memnodes in "lgrp" have been returned. 4288*7c478bd9Sstevel@tonic-gate * - traverse is 1, and all the memnodes in the system have been 4289*7c478bd9Sstevel@tonic-gate * returned. 4290*7c478bd9Sstevel@tonic-gate */ 4291*7c478bd9Sstevel@tonic-gate int 4292*7c478bd9Sstevel@tonic-gate lgrp_memnode_choose(lgrp_mnode_cookie_t *c) 4293*7c478bd9Sstevel@tonic-gate { 4294*7c478bd9Sstevel@tonic-gate lgrp_t *lp = c->lmc_lgrp; 4295*7c478bd9Sstevel@tonic-gate mnodeset_t nodes = c->lmc_nodes; 4296*7c478bd9Sstevel@tonic-gate int cnt = c->lmc_cnt; 4297*7c478bd9Sstevel@tonic-gate int offset, mnode; 4298*7c478bd9Sstevel@tonic-gate 4299*7c478bd9Sstevel@tonic-gate extern int max_mem_nodes; 4300*7c478bd9Sstevel@tonic-gate 4301*7c478bd9Sstevel@tonic-gate /* 4302*7c478bd9Sstevel@tonic-gate * If the set is empty, and the caller is willing, traverse 4303*7c478bd9Sstevel@tonic-gate * up the hierarchy until we find a non-empty set. 4304*7c478bd9Sstevel@tonic-gate */ 4305*7c478bd9Sstevel@tonic-gate while (nodes == (mnodeset_t)0 || cnt <= 0) { 4306*7c478bd9Sstevel@tonic-gate if (c->lmc_scope == LGRP_SRCH_LOCAL || 4307*7c478bd9Sstevel@tonic-gate ((lp = lp->lgrp_parent) == NULL)) 4308*7c478bd9Sstevel@tonic-gate return (-1); 4309*7c478bd9Sstevel@tonic-gate 4310*7c478bd9Sstevel@tonic-gate nodes = lp->lgrp_mnodes & ~(c->lmc_tried); 4311*7c478bd9Sstevel@tonic-gate cnt = lp->lgrp_nmnodes - c->lmc_ntried; 4312*7c478bd9Sstevel@tonic-gate } 4313*7c478bd9Sstevel@tonic-gate 4314*7c478bd9Sstevel@tonic-gate /* 4315*7c478bd9Sstevel@tonic-gate * Select a memnode by picking one at a "random" offset. 4316*7c478bd9Sstevel@tonic-gate * Because of DR, memnodes can come and go at any time. 4317*7c478bd9Sstevel@tonic-gate * This code must be able to cope with the possibility 4318*7c478bd9Sstevel@tonic-gate * that the nodes count "cnt" is inconsistent with respect 4319*7c478bd9Sstevel@tonic-gate * to the number of elements actually in "nodes", and 4320*7c478bd9Sstevel@tonic-gate * therefore that the offset chosen could be greater than 4321*7c478bd9Sstevel@tonic-gate * the number of elements in the set (some memnodes may 4322*7c478bd9Sstevel@tonic-gate * have dissapeared just before cnt was read). 4323*7c478bd9Sstevel@tonic-gate * If this happens, the search simply wraps back to the 4324*7c478bd9Sstevel@tonic-gate * beginning of the set. 4325*7c478bd9Sstevel@tonic-gate */ 4326*7c478bd9Sstevel@tonic-gate ASSERT(nodes != (mnodeset_t)0 && cnt > 0); 4327*7c478bd9Sstevel@tonic-gate offset = c->lmc_rand % cnt; 4328*7c478bd9Sstevel@tonic-gate do { 4329*7c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) 4330*7c478bd9Sstevel@tonic-gate if (nodes & ((mnodeset_t)1 << mnode)) 4331*7c478bd9Sstevel@tonic-gate if (!offset--) 4332*7c478bd9Sstevel@tonic-gate break; 4333*7c478bd9Sstevel@tonic-gate } while (mnode >= max_mem_nodes); 4334*7c478bd9Sstevel@tonic-gate 4335*7c478bd9Sstevel@tonic-gate /* Found a node. Store state before returning. */ 4336*7c478bd9Sstevel@tonic-gate c->lmc_lgrp = lp; 4337*7c478bd9Sstevel@tonic-gate c->lmc_nodes = (nodes & ~((mnodeset_t)1 << mnode)); 4338*7c478bd9Sstevel@tonic-gate c->lmc_cnt = cnt - 1; 4339*7c478bd9Sstevel@tonic-gate c->lmc_tried = (c->lmc_tried | ((mnodeset_t)1 << mnode)); 4340*7c478bd9Sstevel@tonic-gate c->lmc_ntried++; 4341*7c478bd9Sstevel@tonic-gate 4342*7c478bd9Sstevel@tonic-gate return (mnode); 4343*7c478bd9Sstevel@tonic-gate } 4344