1ce8eb11aSdp78419 /* 2ce8eb11aSdp78419 * CDDL HEADER START 3ce8eb11aSdp78419 * 4ce8eb11aSdp78419 * The contents of this file are subject to the terms of the 5ce8eb11aSdp78419 * Common Development and Distribution License (the "License"). 6ce8eb11aSdp78419 * You may not use this file except in compliance with the License. 7ce8eb11aSdp78419 * 8ce8eb11aSdp78419 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9ce8eb11aSdp78419 * or http://www.opensolaris.org/os/licensing. 10ce8eb11aSdp78419 * See the License for the specific language governing permissions 11ce8eb11aSdp78419 * and limitations under the License. 12ce8eb11aSdp78419 * 13ce8eb11aSdp78419 * When distributing Covered Code, include this CDDL HEADER in each 14ce8eb11aSdp78419 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15ce8eb11aSdp78419 * If applicable, add the following below this CDDL HEADER, with the 16ce8eb11aSdp78419 * fields enclosed by brackets "[]" replaced with your own identifying 17ce8eb11aSdp78419 * information: Portions Copyright [yyyy] [name of copyright owner] 18ce8eb11aSdp78419 * 19ce8eb11aSdp78419 * CDDL HEADER END 20ce8eb11aSdp78419 */ 21ce8eb11aSdp78419 22ce8eb11aSdp78419 /* 23183ef8a1SHaik Aftandilian * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24ce8eb11aSdp78419 * Use is subject to license terms. 25ce8eb11aSdp78419 */ 26ce8eb11aSdp78419 27ce8eb11aSdp78419 #include <sys/types.h> 28ce8eb11aSdp78419 #include <sys/sysmacros.h> 29ce8eb11aSdp78419 #include <sys/machsystm.h> 30ce8eb11aSdp78419 #include <sys/machparam.h> 31ce8eb11aSdp78419 #include <sys/cmn_err.h> 32ce8eb11aSdp78419 #include <sys/stat.h> 33ce8eb11aSdp78419 #include <sys/mach_descrip.h> 34ce8eb11aSdp78419 #include <sys/memnode.h> 35ce8eb11aSdp78419 #include <sys/mdesc.h> 36ce8eb11aSdp78419 #include <sys/mpo.h> 379853d9e8SJason Beloro #include <vm/page.h> 38ce8eb11aSdp78419 #include <vm/vm_dep.h> 39e853d8c3Sjc25722 #include <vm/hat_sfmmu.h> 40bb57d1f5Sjc25722 #include <sys/promif.h> 41ce8eb11aSdp78419 42ce8eb11aSdp78419 /* 43ce8eb11aSdp78419 * MPO and the sun4v memory representation 44ce8eb11aSdp78419 * --------------------------------------- 45ce8eb11aSdp78419 * 46ce8eb11aSdp78419 * Latency groups are defined in the sun4v achitecture by memory-latency-group 47ce8eb11aSdp78419 * nodes in the Machine Description, as specified in FWARC/2007/260. These 48ce8eb11aSdp78419 * tie together cpu nodes and mblock nodes, and contain mask and match 49ce8eb11aSdp78419 * properties that identify the portion of an mblock that belongs to the 50ce8eb11aSdp78419 * lgroup. Mask and match are defined in the Physical Address (PA) space, 51ce8eb11aSdp78419 * but an mblock defines Real Addresses (RA). To translate, the mblock 52ce8eb11aSdp78419 * includes the property address-congruence-offset, hereafter referred to as 53ce8eb11aSdp78419 * ra_to_pa. A real address ra is a member of an lgroup if 54ce8eb11aSdp78419 * 55ce8eb11aSdp78419 * (ra + mblock.ra_to_pa) & lgroup.mask == lgroup.match 56ce8eb11aSdp78419 * 57ce8eb11aSdp78419 * The MD is traversed, and information on all mblocks is kept in the array 58ce8eb11aSdp78419 * mpo_mblock[]. Information on all CPUs, including which lgroup they map 59ce8eb11aSdp78419 * to, is kept in the array mpo_cpu[]. 60ce8eb11aSdp78419 * 61ce8eb11aSdp78419 * This implementation makes (and verifies) the simplifying assumption that 62ce8eb11aSdp78419 * the mask bits are the same for all defined lgroups, and that all 1 bits in 63ce8eb11aSdp78419 * the mask are contiguous. Thus the number of lgroups is bounded by the 64ce8eb11aSdp78419 * number of possible mask values, and the lgrp_handle_t is defined as the 65ce8eb11aSdp78419 * mask value, shifted right to eliminate the 0 bit positions in mask. The 66ce8eb11aSdp78419 * masks and values are also referred to as "home bits" in the code. 67ce8eb11aSdp78419 * 68ce8eb11aSdp78419 * A mem_node is defined to be 1:1 with an lgrp_handle_t, thus each lgroup 69ce8eb11aSdp78419 * has exactly 1 mem_node, and plat_pfn_to_mem_node() must find the mblock 70ce8eb11aSdp78419 * containing a pfn, apply the mblock's ra_to_pa adjustment, and extract the 71ce8eb11aSdp78419 * home bits. This yields the mem_node. 72ce8eb11aSdp78419 * 73ce8eb11aSdp78419 * Interfaces 74ce8eb11aSdp78419 * ---------- 75ce8eb11aSdp78419 * 76ce8eb11aSdp78419 * This file exports the following entry points: 77ce8eb11aSdp78419 * 78ce8eb11aSdp78419 * plat_lgrp_init() 79ce8eb11aSdp78419 * plat_build_mem_nodes() 80ce8eb11aSdp78419 * plat_lgrp_cpu_to_hand() 81ce8eb11aSdp78419 * plat_lgrp_latency() 82ce8eb11aSdp78419 * plat_pfn_to_mem_node() 83ce8eb11aSdp78419 * These implement the usual platform lgroup interfaces. 84ce8eb11aSdp78419 * 85ce8eb11aSdp78419 * plat_rapfn_to_papfn() 86ce8eb11aSdp78419 * Recover the PA page coloring bits from an RA. 87ce8eb11aSdp78419 * 88ce8eb11aSdp78419 * plat_mem_node_iterator_init() 89ce8eb11aSdp78419 * Initialize an iterator to efficiently step through pages in a mem_node. 90ce8eb11aSdp78419 * 91ce8eb11aSdp78419 * plat_mem_node_intersect_range() 92ce8eb11aSdp78419 * Find the intersection with a mem_node. 939853d9e8SJason Beloro * 949853d9e8SJason Beloro * plat_slice_add() 959853d9e8SJason Beloro * plat_slice_del() 969853d9e8SJason Beloro * Platform hooks to add/delete a pfn range. 979853d9e8SJason Beloro * 989853d9e8SJason Beloro * Internal Organization 999853d9e8SJason Beloro * --------------------- 1009853d9e8SJason Beloro * 1019853d9e8SJason Beloro * A number of routines are used both boot/DR code which (re)build 1029853d9e8SJason Beloro * appropriate MPO structures. 1039853d9e8SJason Beloro * 1049853d9e8SJason Beloro * mblock_alloc() 1059853d9e8SJason Beloro * Allocate memory for mblocks and stripes as 1069853d9e8SJason Beloro * appropriate for boot or memory DR. 1079853d9e8SJason Beloro * 1089853d9e8SJason Beloro * mblock_free() 1099853d9e8SJason Beloro * Free memory allocated by mblock_alloc. 1109853d9e8SJason Beloro * 1119853d9e8SJason Beloro * mblock_update() 1129853d9e8SJason Beloro * Build mblocks based on mblock nodes read from the MD. 1139853d9e8SJason Beloro * 1149853d9e8SJason Beloro * mblock_update_add() 1159853d9e8SJason Beloro * Rebuild mblocks after a memory DR add operation. 1169853d9e8SJason Beloro * 1179853d9e8SJason Beloro * mblock_update_del() 1189853d9e8SJason Beloro * Rebuild mblocks after a memory DR delete operation. 1199853d9e8SJason Beloro * 1209853d9e8SJason Beloro * mblock_install() 1219853d9e8SJason Beloro * Install mblocks as the new configuration. 1229853d9e8SJason Beloro * 1239853d9e8SJason Beloro * mstripe_update() 1249853d9e8SJason Beloro * Build stripes based on mblocks. 1259853d9e8SJason Beloro * 1269853d9e8SJason Beloro * mnode_update() 1279853d9e8SJason Beloro * Call memnode layer to add/del a pfn range, based on stripes. 1289853d9e8SJason Beloro * 1299853d9e8SJason Beloro * The platform interfaces allocate all memory required for the 1309853d9e8SJason Beloro * particualar update first, block access to the MPO structures 1319853d9e8SJason Beloro * while they are updated, and free old structures after the update. 132ce8eb11aSdp78419 */ 133ce8eb11aSdp78419 134ce8eb11aSdp78419 int sun4v_mpo_enable = 1; 135ce8eb11aSdp78419 int sun4v_mpo_debug = 0; 136ce8eb11aSdp78419 char sun4v_mpo_status[256] = ""; 137ce8eb11aSdp78419 138ce8eb11aSdp78419 /* Save CPU info from the MD and associate CPUs with lgroups */ 139ce8eb11aSdp78419 static struct cpu_md mpo_cpu[NCPU]; 140ce8eb11aSdp78419 141ce8eb11aSdp78419 /* Save lgroup info from the MD */ 142ce8eb11aSdp78419 #define MAX_MD_LGROUPS 32 143ce8eb11aSdp78419 static struct lgrp_md mpo_lgroup[MAX_MD_LGROUPS]; 144ce8eb11aSdp78419 static int n_lgrpnodes = 0; 145ce8eb11aSdp78419 static int n_locality_groups = 0; 146ce8eb11aSdp78419 static int max_locality_groups = 0; 1479853d9e8SJason Beloro static int szc_mask0 = 0; 148ce8eb11aSdp78419 149ce8eb11aSdp78419 /* Save mblocks from the MD */ 150bb57d1f5Sjc25722 #define SMALL_MBLOCKS_COUNT 8 151bb57d1f5Sjc25722 static struct mblock_md *mpo_mblock; 152bb57d1f5Sjc25722 static struct mblock_md small_mpo_mblocks[SMALL_MBLOCKS_COUNT]; 153ce8eb11aSdp78419 static int n_mblocks = 0; 154ce8eb11aSdp78419 155ce8eb11aSdp78419 /* Save mem_node stripes calculate from mblocks and lgroups. */ 156bb57d1f5Sjc25722 static mem_stripe_t *mem_stripes; 157bb57d1f5Sjc25722 static mem_stripe_t small_mem_stripes[SMALL_MBLOCKS_COUNT * MAX_MEM_NODES]; 158ce8eb11aSdp78419 static int n_mem_stripes = 0; 159ce8eb11aSdp78419 static pfn_t mnode_stride; /* distance between stripes, start to start */ 160ce8eb11aSdp78419 static int stripe_shift; /* stride/stripes expressed as a shift */ 161ce8eb11aSdp78419 static pfn_t mnode_pages; /* mem_node stripe width */ 162ce8eb11aSdp78419 163ce8eb11aSdp78419 /* Save home mask and shift used to calculate lgrp_handle_t values */ 164ce8eb11aSdp78419 static uint64_t home_mask = 0; 165ce8eb11aSdp78419 static pfn_t home_mask_pfn = 0; 166ce8eb11aSdp78419 static int home_mask_shift = 0; 167ce8eb11aSdp78419 static uint_t home_mask_pfn_shift = 0; 168ce8eb11aSdp78419 169ce8eb11aSdp78419 /* Save lowest and highest latencies found across all lgroups */ 170ce8eb11aSdp78419 static int lower_latency = 0; 171ce8eb11aSdp78419 static int higher_latency = 0; 172ce8eb11aSdp78419 173ce8eb11aSdp78419 static pfn_t base_ra_to_pa_pfn = 0; /* ra_to_pa for single mblock memory */ 1749853d9e8SJason Beloro static int mpo_genid; /* config gen; updated by mem DR */ 1759853d9e8SJason Beloro static mpo_config_t mpo_config; /* current mblocks and stripes */ 1769853d9e8SJason Beloro 1779853d9e8SJason Beloro typedef enum { U_ADD, U_ADD_ALL, U_DEL } update_t; 178ce8eb11aSdp78419 179ce8eb11aSdp78419 static int valid_pages(md_t *md, mde_cookie_t cpu0); 180ce8eb11aSdp78419 static int unique_home_mem_lg_count(uint64_t mem_lg_homeset); 181ce8eb11aSdp78419 static int fix_interleave(void); 182ce8eb11aSdp78419 1839853d9e8SJason Beloro static int mblock_alloc(mpo_config_t *, update_t, int nmblocks); 1849853d9e8SJason Beloro static void mblock_install(mpo_config_t *); 1859853d9e8SJason Beloro static void mblock_free(mpo_config_t *); 1869853d9e8SJason Beloro static void mblock_update(mpo_config_t *, md_t, mde_cookie_t *mblocknodes); 1879853d9e8SJason Beloro static void mblock_update_add(mpo_config_t *); 1889853d9e8SJason Beloro static void mblock_update_del(mpo_config_t *, mpo_config_t *, pfn_t, pfn_t); 1899853d9e8SJason Beloro static void mstripe_update(mpo_config_t *); 1909853d9e8SJason Beloro static void mnode_update(mpo_config_t *, pfn_t, pfn_t, update_t); 1919853d9e8SJason Beloro 192ce8eb11aSdp78419 /* Debug support */ 193ce8eb11aSdp78419 #if defined(DEBUG) && !defined(lint) 1949853d9e8SJason Beloro #define VALIDATE_SLICE(base, end) { \ 1959853d9e8SJason Beloro ASSERT(IS_P2ALIGNED(ptob(base), TTEBYTES(TTE256M))); \ 1969853d9e8SJason Beloro ASSERT(IS_P2ALIGNED(ptob(end - base + 1), TTEBYTES(TTE256M))); \ 1979853d9e8SJason Beloro } 198ce8eb11aSdp78419 #define MPO_DEBUG(args...) if (sun4v_mpo_debug) printf(args) 199ce8eb11aSdp78419 #else 2009853d9e8SJason Beloro #define VALIDATE_SLICE(base, end) 201ce8eb11aSdp78419 #define MPO_DEBUG(...) 202ce8eb11aSdp78419 #endif /* DEBUG */ 203ce8eb11aSdp78419 204ce8eb11aSdp78419 /* Record status message, viewable from mdb */ 205ce8eb11aSdp78419 #define MPO_STATUS(args...) { \ 206ce8eb11aSdp78419 (void) snprintf(sun4v_mpo_status, sizeof (sun4v_mpo_status), args); \ 207ce8eb11aSdp78419 MPO_DEBUG(sun4v_mpo_status); \ 208ce8eb11aSdp78419 } 209ce8eb11aSdp78419 210ce8eb11aSdp78419 /* 2119853d9e8SJason Beloro * The MPO locks are to protect the MPO metadata while that 2129853d9e8SJason Beloro * information is updated as a result of a memory DR operation. 2139853d9e8SJason Beloro * The read lock must be acquired to read the metadata and the 2149853d9e8SJason Beloro * write locks must be acquired to update it. 2159853d9e8SJason Beloro */ 2169853d9e8SJason Beloro #define mpo_rd_lock kpreempt_disable 2179853d9e8SJason Beloro #define mpo_rd_unlock kpreempt_enable 2189853d9e8SJason Beloro 2199853d9e8SJason Beloro static void 2209853d9e8SJason Beloro mpo_wr_lock() 2219853d9e8SJason Beloro { 2229853d9e8SJason Beloro mutex_enter(&cpu_lock); 223*0ed5c46eSJosef 'Jeff' Sipek pause_cpus(NULL, NULL); 2249853d9e8SJason Beloro mutex_exit(&cpu_lock); 2259853d9e8SJason Beloro } 2269853d9e8SJason Beloro 2279853d9e8SJason Beloro static void 2289853d9e8SJason Beloro mpo_wr_unlock() 2299853d9e8SJason Beloro { 2309853d9e8SJason Beloro mutex_enter(&cpu_lock); 2319853d9e8SJason Beloro start_cpus(); 2329853d9e8SJason Beloro mutex_exit(&cpu_lock); 2339853d9e8SJason Beloro } 2349853d9e8SJason Beloro 2359853d9e8SJason Beloro /* 236ce8eb11aSdp78419 * Routine to read a uint64_t from a given md 237ce8eb11aSdp78419 */ 238ce8eb11aSdp78419 static int64_t 239ce8eb11aSdp78419 get_int(md_t md, mde_cookie_t node, char *propname, uint64_t *val) 240ce8eb11aSdp78419 { 241ce8eb11aSdp78419 int err = md_get_prop_val(md, node, propname, val); 242ce8eb11aSdp78419 return (err); 243ce8eb11aSdp78419 } 244ce8eb11aSdp78419 245ce8eb11aSdp78419 static int 246ce8eb11aSdp78419 mblock_cmp(const void *a, const void *b) 247ce8eb11aSdp78419 { 248ce8eb11aSdp78419 struct mblock_md *m1 = (struct mblock_md *)a; 249ce8eb11aSdp78419 struct mblock_md *m2 = (struct mblock_md *)b; 250ce8eb11aSdp78419 251ce8eb11aSdp78419 if (m1->base < m2->base) 252ce8eb11aSdp78419 return (-1); 253ce8eb11aSdp78419 else if (m1->base == m2->base) 254ce8eb11aSdp78419 return (0); 255ce8eb11aSdp78419 else 256ce8eb11aSdp78419 return (1); 257ce8eb11aSdp78419 } 258ce8eb11aSdp78419 259ce8eb11aSdp78419 static void 260ce8eb11aSdp78419 mblock_sort(struct mblock_md *mblocks, int n) 261ce8eb11aSdp78419 { 262ce8eb11aSdp78419 extern void qsort(void *, size_t, size_t, 263ce8eb11aSdp78419 int (*)(const void *, const void *)); 264ce8eb11aSdp78419 265ce8eb11aSdp78419 qsort(mblocks, n, sizeof (mblocks[0]), mblock_cmp); 266ce8eb11aSdp78419 } 267ce8eb11aSdp78419 268924db11bSjc25722 static void 269924db11bSjc25722 mpo_update_tunables(void) 270924db11bSjc25722 { 271924db11bSjc25722 int i, ncpu_min; 272924db11bSjc25722 273924db11bSjc25722 /* 274924db11bSjc25722 * lgrp_expand_proc_thresh is the minimum load on the lgroups 275924db11bSjc25722 * this process is currently running on before considering 276924db11bSjc25722 * expanding threads to another lgroup. 277924db11bSjc25722 * 278924db11bSjc25722 * lgrp_expand_proc_diff determines how much less the remote lgroup 279924db11bSjc25722 * must be loaded before expanding to it. 280924db11bSjc25722 * 281924db11bSjc25722 * On sun4v CMT processors, threads share a core pipeline, and 282924db11bSjc25722 * at less than 100% utilization, best throughput is obtained by 283924db11bSjc25722 * spreading threads across more cores, even if some are in a 284924db11bSjc25722 * different lgroup. Spread threads to a new lgroup if the 285924db11bSjc25722 * current group is more than 50% loaded. Because of virtualization, 286924db11bSjc25722 * lgroups may have different numbers of CPUs, but the tunables 287924db11bSjc25722 * apply to all lgroups, so find the smallest lgroup and compute 288924db11bSjc25722 * 50% loading. 289924db11bSjc25722 */ 290924db11bSjc25722 291924db11bSjc25722 ncpu_min = NCPU; 292924db11bSjc25722 for (i = 0; i < n_lgrpnodes; i++) { 293924db11bSjc25722 int ncpu = mpo_lgroup[i].ncpu; 294924db11bSjc25722 if (ncpu != 0 && ncpu < ncpu_min) 295924db11bSjc25722 ncpu_min = ncpu; 296924db11bSjc25722 } 297924db11bSjc25722 lgrp_expand_proc_thresh = ncpu_min * lgrp_loadavg_max_effect / 2; 298924db11bSjc25722 299924db11bSjc25722 /* new home may only be half as loaded as the existing home to use it */ 300924db11bSjc25722 lgrp_expand_proc_diff = lgrp_expand_proc_thresh / 2; 301924db11bSjc25722 302924db11bSjc25722 lgrp_loadavg_tolerance = lgrp_loadavg_max_effect; 303924db11bSjc25722 } 304924db11bSjc25722 305924db11bSjc25722 static mde_cookie_t 306924db11bSjc25722 cpuid_to_cpunode(md_t *md, int cpuid) 307924db11bSjc25722 { 308924db11bSjc25722 mde_cookie_t rootnode, foundnode, *cpunodes; 309924db11bSjc25722 uint64_t cpuid_prop; 310924db11bSjc25722 int n_cpunodes, i; 311924db11bSjc25722 312924db11bSjc25722 if (md == NULL) 313924db11bSjc25722 return (MDE_INVAL_ELEM_COOKIE); 314924db11bSjc25722 315924db11bSjc25722 rootnode = md_root_node(md); 316924db11bSjc25722 if (rootnode == MDE_INVAL_ELEM_COOKIE) 317924db11bSjc25722 return (MDE_INVAL_ELEM_COOKIE); 318924db11bSjc25722 319924db11bSjc25722 n_cpunodes = md_alloc_scan_dag(md, rootnode, PROP_LG_CPU, 320924db11bSjc25722 "fwd", &cpunodes); 321924db11bSjc25722 if (n_cpunodes <= 0 || n_cpunodes > NCPU) 322924db11bSjc25722 goto cpuid_fail; 323924db11bSjc25722 324924db11bSjc25722 for (i = 0; i < n_cpunodes; i++) { 325924db11bSjc25722 if (md_get_prop_val(md, cpunodes[i], PROP_LG_CPU_ID, 326924db11bSjc25722 &cpuid_prop)) 327924db11bSjc25722 break; 328924db11bSjc25722 if (cpuid_prop == (uint64_t)cpuid) { 329924db11bSjc25722 foundnode = cpunodes[i]; 330924db11bSjc25722 md_free_scan_dag(md, &cpunodes); 331924db11bSjc25722 return (foundnode); 332924db11bSjc25722 } 333924db11bSjc25722 } 334924db11bSjc25722 cpuid_fail: 335924db11bSjc25722 if (n_cpunodes > 0) 336924db11bSjc25722 md_free_scan_dag(md, &cpunodes); 337924db11bSjc25722 return (MDE_INVAL_ELEM_COOKIE); 338924db11bSjc25722 } 339924db11bSjc25722 340924db11bSjc25722 static int 341924db11bSjc25722 mpo_cpu_to_lgroup(md_t *md, mde_cookie_t cpunode) 342924db11bSjc25722 { 343924db11bSjc25722 mde_cookie_t *nodes; 344924db11bSjc25722 uint64_t latency, lowest_latency; 345924db11bSjc25722 uint64_t address_match, lowest_address_match; 346924db11bSjc25722 int n_lgroups, j, result = 0; 347924db11bSjc25722 348924db11bSjc25722 /* Find lgroup nodes reachable from this cpu */ 349924db11bSjc25722 n_lgroups = md_alloc_scan_dag(md, cpunode, PROP_LG_MEM_LG, 350924db11bSjc25722 "fwd", &nodes); 351924db11bSjc25722 352924db11bSjc25722 lowest_latency = ~(0UL); 353924db11bSjc25722 354924db11bSjc25722 /* Find the lgroup node with the smallest latency */ 355924db11bSjc25722 for (j = 0; j < n_lgroups; j++) { 356924db11bSjc25722 result = get_int(md, nodes[j], PROP_LG_LATENCY, 357924db11bSjc25722 &latency); 358924db11bSjc25722 result |= get_int(md, nodes[j], PROP_LG_MATCH, 359924db11bSjc25722 &address_match); 360924db11bSjc25722 if (result != 0) { 361924db11bSjc25722 j = -1; 362924db11bSjc25722 goto to_lgrp_done; 363924db11bSjc25722 } 364924db11bSjc25722 if (latency < lowest_latency) { 365924db11bSjc25722 lowest_latency = latency; 366924db11bSjc25722 lowest_address_match = address_match; 367924db11bSjc25722 } 368924db11bSjc25722 } 369924db11bSjc25722 for (j = 0; j < n_lgrpnodes; j++) { 370924db11bSjc25722 if ((mpo_lgroup[j].latency == lowest_latency) && 371924db11bSjc25722 (mpo_lgroup[j].addr_match == lowest_address_match)) 372924db11bSjc25722 break; 373924db11bSjc25722 } 374924db11bSjc25722 if (j == n_lgrpnodes) 375924db11bSjc25722 j = -1; 376924db11bSjc25722 377924db11bSjc25722 to_lgrp_done: 378924db11bSjc25722 if (n_lgroups > 0) 379924db11bSjc25722 md_free_scan_dag(md, &nodes); 380924db11bSjc25722 return (j); 381924db11bSjc25722 } 382924db11bSjc25722 383924db11bSjc25722 /* Called when DR'ing in a CPU */ 384924db11bSjc25722 void 385183ef8a1SHaik Aftandilian mpo_cpu_add(md_t *md, int cpuid) 386924db11bSjc25722 { 387924db11bSjc25722 mde_cookie_t cpunode; 388924db11bSjc25722 389924db11bSjc25722 int i; 390924db11bSjc25722 391924db11bSjc25722 if (n_lgrpnodes <= 0) 392924db11bSjc25722 return; 393924db11bSjc25722 394924db11bSjc25722 if (md == NULL) 395924db11bSjc25722 goto add_fail; 396924db11bSjc25722 397924db11bSjc25722 cpunode = cpuid_to_cpunode(md, cpuid); 398924db11bSjc25722 if (cpunode == MDE_INVAL_ELEM_COOKIE) 399924db11bSjc25722 goto add_fail; 400924db11bSjc25722 401924db11bSjc25722 i = mpo_cpu_to_lgroup(md, cpunode); 402924db11bSjc25722 if (i == -1) 403924db11bSjc25722 goto add_fail; 404924db11bSjc25722 405924db11bSjc25722 mpo_cpu[cpuid].lgrp_index = i; 406924db11bSjc25722 mpo_cpu[cpuid].home = mpo_lgroup[i].addr_match >> home_mask_shift; 407924db11bSjc25722 mpo_lgroup[i].ncpu++; 408924db11bSjc25722 mpo_update_tunables(); 409924db11bSjc25722 return; 410924db11bSjc25722 add_fail: 411924db11bSjc25722 panic("mpo_cpu_add: Cannot read MD"); 412924db11bSjc25722 } 413924db11bSjc25722 414924db11bSjc25722 /* Called when DR'ing out a CPU */ 415924db11bSjc25722 void 416924db11bSjc25722 mpo_cpu_remove(int cpuid) 417924db11bSjc25722 { 418924db11bSjc25722 int i; 419924db11bSjc25722 420924db11bSjc25722 if (n_lgrpnodes <= 0) 421924db11bSjc25722 return; 422924db11bSjc25722 423924db11bSjc25722 i = mpo_cpu[cpuid].lgrp_index; 424924db11bSjc25722 mpo_lgroup[i].ncpu--; 425924db11bSjc25722 mpo_cpu[cpuid].home = 0; 426924db11bSjc25722 mpo_cpu[cpuid].lgrp_index = -1; 427924db11bSjc25722 mpo_update_tunables(); 428924db11bSjc25722 } 429924db11bSjc25722 4309853d9e8SJason Beloro static mde_cookie_t 4319853d9e8SJason Beloro md_get_root(md_t *md) 432ce8eb11aSdp78419 { 4339853d9e8SJason Beloro mde_cookie_t root = MDE_INVAL_ELEM_COOKIE; 4349853d9e8SJason Beloro int n_nodes; 435ce8eb11aSdp78419 436ce8eb11aSdp78419 n_nodes = md_node_count(md); 437ce8eb11aSdp78419 438ce8eb11aSdp78419 if (n_nodes <= 0) { 4399853d9e8SJason Beloro MPO_STATUS("md_get_root: No nodes in node count\n"); 4409853d9e8SJason Beloro return (root); 441ce8eb11aSdp78419 } 442ce8eb11aSdp78419 443ce8eb11aSdp78419 root = md_root_node(md); 444ce8eb11aSdp78419 445ce8eb11aSdp78419 if (root == MDE_INVAL_ELEM_COOKIE) { 4469853d9e8SJason Beloro MPO_STATUS("md_get_root: Root node is missing\n"); 4479853d9e8SJason Beloro return (root); 448ce8eb11aSdp78419 } 449ce8eb11aSdp78419 4509853d9e8SJason Beloro MPO_DEBUG("md_get_root: Node Count: %d\n", n_nodes); 4519853d9e8SJason Beloro MPO_DEBUG("md_get_root: md: %p\n", md); 4529853d9e8SJason Beloro MPO_DEBUG("md_get_root: root: %lx\n", root); 4539853d9e8SJason Beloro done: 4549853d9e8SJason Beloro return (root); 455ce8eb11aSdp78419 } 456ce8eb11aSdp78419 4579853d9e8SJason Beloro static int 4589853d9e8SJason Beloro lgrp_update(md_t *md, mde_cookie_t root) 4599853d9e8SJason Beloro { 4609853d9e8SJason Beloro int i, j, result; 4619853d9e8SJason Beloro int ret_val = 0; 4629853d9e8SJason Beloro int sub_page_fix; 4639853d9e8SJason Beloro mde_cookie_t *nodes, *lgrpnodes; 464ce8eb11aSdp78419 465ce8eb11aSdp78419 n_lgrpnodes = md_alloc_scan_dag(md, root, PROP_LG_MEM_LG, 466ce8eb11aSdp78419 "fwd", &lgrpnodes); 467ce8eb11aSdp78419 468ce8eb11aSdp78419 if (n_lgrpnodes <= 0 || n_lgrpnodes >= MAX_MD_LGROUPS) { 4699853d9e8SJason Beloro MPO_STATUS("lgrp_update: No Lgroups\n"); 470ce8eb11aSdp78419 ret_val = -1; 471ce8eb11aSdp78419 goto fail; 472ce8eb11aSdp78419 } 473ce8eb11aSdp78419 4749853d9e8SJason Beloro MPO_DEBUG("lgrp_update: mem_lgs: %d\n", n_lgrpnodes); 475ce8eb11aSdp78419 476ce8eb11aSdp78419 for (i = 0; i < n_lgrpnodes; i++) { 477ce8eb11aSdp78419 mpo_lgroup[i].node = lgrpnodes[i]; 478ce8eb11aSdp78419 mpo_lgroup[i].id = i; 479ce8eb11aSdp78419 mpo_lgroup[i].ncpu = 0; 480ce8eb11aSdp78419 result = get_int(md, lgrpnodes[i], PROP_LG_MASK, 481ce8eb11aSdp78419 &mpo_lgroup[i].addr_mask); 482ce8eb11aSdp78419 result |= get_int(md, lgrpnodes[i], PROP_LG_MATCH, 483ce8eb11aSdp78419 &mpo_lgroup[i].addr_match); 484ce8eb11aSdp78419 485ce8eb11aSdp78419 /* 486ce8eb11aSdp78419 * If either the mask or match properties are missing, set to 0 487ce8eb11aSdp78419 */ 488ce8eb11aSdp78419 if (result < 0) { 489ce8eb11aSdp78419 mpo_lgroup[i].addr_mask = 0; 490ce8eb11aSdp78419 mpo_lgroup[i].addr_match = 0; 491ce8eb11aSdp78419 } 492ce8eb11aSdp78419 493ce8eb11aSdp78419 /* Set latency to 0 if property not present */ 494ce8eb11aSdp78419 495ce8eb11aSdp78419 result = get_int(md, lgrpnodes[i], PROP_LG_LATENCY, 496ce8eb11aSdp78419 &mpo_lgroup[i].latency); 497ce8eb11aSdp78419 if (result < 0) 498ce8eb11aSdp78419 mpo_lgroup[i].latency = 0; 499ce8eb11aSdp78419 } 500ce8eb11aSdp78419 501ce8eb11aSdp78419 /* 502ce8eb11aSdp78419 * Sub-page level interleave is not yet supported. Check for it, 503ce8eb11aSdp78419 * and remove sub-page interleaved lgroups from mpo_lgroup and 504ce8eb11aSdp78419 * n_lgrpnodes. If no lgroups are left, return. 505ce8eb11aSdp78419 */ 506ce8eb11aSdp78419 507ce8eb11aSdp78419 sub_page_fix = fix_interleave(); 508ce8eb11aSdp78419 if (n_lgrpnodes == 0) { 509ce8eb11aSdp78419 ret_val = -1; 510ce8eb11aSdp78419 goto fail; 511ce8eb11aSdp78419 } 512ce8eb11aSdp78419 513ce8eb11aSdp78419 /* Ensure that all of the addr_mask values are the same */ 514ce8eb11aSdp78419 515ce8eb11aSdp78419 for (i = 0; i < n_lgrpnodes; i++) { 516ce8eb11aSdp78419 if (mpo_lgroup[0].addr_mask != mpo_lgroup[i].addr_mask) { 5179853d9e8SJason Beloro MPO_STATUS("lgrp_update: " 518ce8eb11aSdp78419 "addr_mask values are not the same\n"); 519ce8eb11aSdp78419 ret_val = -1; 520ce8eb11aSdp78419 goto fail; 521ce8eb11aSdp78419 } 522ce8eb11aSdp78419 } 523ce8eb11aSdp78419 524ce8eb11aSdp78419 /* 525ce8eb11aSdp78419 * Ensure that all lgrp nodes see all the mblocks. However, if 526ce8eb11aSdp78419 * sub-page interleave is being fixed, they do not, so skip 527ce8eb11aSdp78419 * the check. 528ce8eb11aSdp78419 */ 529ce8eb11aSdp78419 530ce8eb11aSdp78419 if (sub_page_fix == 0) { 531ce8eb11aSdp78419 for (i = 0; i < n_lgrpnodes; i++) { 532ce8eb11aSdp78419 j = md_alloc_scan_dag(md, mpo_lgroup[i].node, 533ce8eb11aSdp78419 PROP_LG_MBLOCK, "fwd", &nodes); 534ce8eb11aSdp78419 md_free_scan_dag(md, &nodes); 535ce8eb11aSdp78419 if (j != n_mblocks) { 5369853d9e8SJason Beloro MPO_STATUS("lgrp_update: " 537ce8eb11aSdp78419 "sub-page interleave is being fixed\n"); 538ce8eb11aSdp78419 ret_val = -1; 539ce8eb11aSdp78419 goto fail; 540ce8eb11aSdp78419 } 541ce8eb11aSdp78419 } 542ce8eb11aSdp78419 } 5439853d9e8SJason Beloro fail: 5449853d9e8SJason Beloro if (n_lgrpnodes > 0) { 5459853d9e8SJason Beloro md_free_scan_dag(md, &lgrpnodes); 5469853d9e8SJason Beloro for (i = 0; i < n_lgrpnodes; i++) 5479853d9e8SJason Beloro mpo_lgroup[i].node = MDE_INVAL_ELEM_COOKIE; 5489853d9e8SJason Beloro } 5499853d9e8SJason Beloro 5509853d9e8SJason Beloro return (ret_val); 5519853d9e8SJason Beloro } 5529853d9e8SJason Beloro 5539853d9e8SJason Beloro /* 5549853d9e8SJason Beloro * 5559853d9e8SJason Beloro * Traverse the MD to determine: 5569853d9e8SJason Beloro * 5579853d9e8SJason Beloro * Number of CPU nodes, lgrp_nodes, and mblocks 5589853d9e8SJason Beloro * Then for each lgrp_node, obtain the appropriate data. 5599853d9e8SJason Beloro * For each CPU, determine its home locality and store it. 5609853d9e8SJason Beloro * For each mblock, retrieve its data and store it. 5619853d9e8SJason Beloro */ 5629853d9e8SJason Beloro static int 5639853d9e8SJason Beloro lgrp_traverse(md_t *md) 5649853d9e8SJason Beloro { 5659853d9e8SJason Beloro mde_cookie_t root, *cpunodes, *mblocknodes; 5669853d9e8SJason Beloro int o; 5679853d9e8SJason Beloro uint64_t i, k, stripe, stride; 5689853d9e8SJason Beloro uint64_t mem_lg_homeset = 0; 5699853d9e8SJason Beloro int ret_val = 0; 5709853d9e8SJason Beloro int result = 0; 5719853d9e8SJason Beloro int n_cpunodes = 0; 5729853d9e8SJason Beloro mpo_config_t new_config; 5739853d9e8SJason Beloro 5749853d9e8SJason Beloro if ((root = md_get_root(md)) == MDE_INVAL_ELEM_COOKIE) { 5759853d9e8SJason Beloro ret_val = -1; 5769853d9e8SJason Beloro goto fail; 5779853d9e8SJason Beloro } 5789853d9e8SJason Beloro 5799853d9e8SJason Beloro n_mblocks = md_alloc_scan_dag(md, root, PROP_LG_MBLOCK, "fwd", 5809853d9e8SJason Beloro &mblocknodes); 5819853d9e8SJason Beloro if (n_mblocks <= 0) { 5829853d9e8SJason Beloro MPO_STATUS("lgrp_traverse: No mblock nodes detected in Machine " 5839853d9e8SJason Beloro "Descriptor\n"); 5849853d9e8SJason Beloro ret_val = -1; 5859853d9e8SJason Beloro goto fail; 5869853d9e8SJason Beloro } 5879853d9e8SJason Beloro 5889853d9e8SJason Beloro /* 5899853d9e8SJason Beloro * Build the Memory Nodes. Do this before any possibility of 5909853d9e8SJason Beloro * bailing from this routine so we obtain ra_to_pa (needed for page 5919853d9e8SJason Beloro * coloring) even when there are no lgroups defined. 5929853d9e8SJason Beloro */ 5939853d9e8SJason Beloro if (mblock_alloc(&new_config, U_ADD_ALL, n_mblocks) < 0) { 5949853d9e8SJason Beloro ret_val = -1; 5959853d9e8SJason Beloro goto fail; 5969853d9e8SJason Beloro } 5979853d9e8SJason Beloro 5989853d9e8SJason Beloro mblock_update(&new_config, md, mblocknodes); 5999853d9e8SJason Beloro mblock_install(&new_config); 6009853d9e8SJason Beloro 6019853d9e8SJason Beloro /* Page coloring hook is required so we can iterate through mnodes */ 6029853d9e8SJason Beloro if (&page_next_pfn_for_color_cpu == NULL) { 6039853d9e8SJason Beloro MPO_STATUS("lgrp_traverse: No page coloring support\n"); 6049853d9e8SJason Beloro ret_val = -1; 6059853d9e8SJason Beloro goto fail; 6069853d9e8SJason Beloro } 6079853d9e8SJason Beloro 6089853d9e8SJason Beloro /* Global enable for mpo */ 6099853d9e8SJason Beloro if (sun4v_mpo_enable == 0) { 6109853d9e8SJason Beloro MPO_STATUS("lgrp_traverse: MPO feature is not enabled\n"); 6119853d9e8SJason Beloro ret_val = -1; 6129853d9e8SJason Beloro goto fail; 6139853d9e8SJason Beloro } 6149853d9e8SJason Beloro 6159853d9e8SJason Beloro n_cpunodes = md_alloc_scan_dag(md, root, PROP_LG_CPU, "fwd", &cpunodes); 6169853d9e8SJason Beloro 6179853d9e8SJason Beloro if (n_cpunodes <= 0 || n_cpunodes > NCPU) { 6189853d9e8SJason Beloro MPO_STATUS("lgrp_traverse: No CPU nodes detected " 6199853d9e8SJason Beloro "in MD\n"); 6209853d9e8SJason Beloro ret_val = -1; 6219853d9e8SJason Beloro goto fail; 6229853d9e8SJason Beloro } 6239853d9e8SJason Beloro 6249853d9e8SJason Beloro MPO_DEBUG("lgrp_traverse: cpus: %d\n", n_cpunodes); 6259853d9e8SJason Beloro 6269853d9e8SJason Beloro if ((ret_val = lgrp_update(md, root)) == -1) 6279853d9e8SJason Beloro goto fail; 628ce8eb11aSdp78419 629ce8eb11aSdp78419 /* 630ce8eb11aSdp78419 * Use the address mask from the first lgroup node 631ce8eb11aSdp78419 * to establish our home_mask. 632ce8eb11aSdp78419 */ 633ce8eb11aSdp78419 home_mask = mpo_lgroup[0].addr_mask; 634ce8eb11aSdp78419 home_mask_pfn = btop(home_mask); 635ce8eb11aSdp78419 home_mask_shift = lowbit(home_mask) - 1; 636ce8eb11aSdp78419 home_mask_pfn_shift = home_mask_shift - PAGESHIFT; 637ce8eb11aSdp78419 mnode_pages = btop(1ULL << home_mask_shift); 638ce8eb11aSdp78419 639ce8eb11aSdp78419 /* 640ce8eb11aSdp78419 * How many values are possible in home mask? Assume the mask 641ce8eb11aSdp78419 * bits are contiguous. 642ce8eb11aSdp78419 */ 643ce8eb11aSdp78419 max_locality_groups = 644ce8eb11aSdp78419 1 << highbit(home_mask_pfn >> home_mask_pfn_shift); 645ce8eb11aSdp78419 6469853d9e8SJason Beloro stripe_shift = highbit(max_locality_groups) - 1; 6479853d9e8SJason Beloro stripe = ptob(mnode_pages); 6489853d9e8SJason Beloro stride = max_locality_groups * stripe; 6499853d9e8SJason Beloro mnode_stride = btop(stride); 6509853d9e8SJason Beloro 651ce8eb11aSdp78419 /* Now verify the home mask bits are contiguous */ 652ce8eb11aSdp78419 653ce8eb11aSdp78419 if (max_locality_groups - 1 != home_mask_pfn >> home_mask_pfn_shift) { 654ce8eb11aSdp78419 MPO_STATUS("lgrp_traverse: " 655ce8eb11aSdp78419 "home mask bits are not contiguous\n"); 656ce8eb11aSdp78419 ret_val = -1; 657ce8eb11aSdp78419 goto fail; 658ce8eb11aSdp78419 } 659ce8eb11aSdp78419 660ce8eb11aSdp78419 /* Record all of the home bits */ 661ce8eb11aSdp78419 662ce8eb11aSdp78419 for (i = 0; i < n_lgrpnodes; i++) { 663ce8eb11aSdp78419 HOMESET_ADD(mem_lg_homeset, 664ce8eb11aSdp78419 mpo_lgroup[i].addr_match >> home_mask_shift); 665ce8eb11aSdp78419 } 666ce8eb11aSdp78419 667ce8eb11aSdp78419 /* Count the number different "home" mem_lg's we've discovered */ 668ce8eb11aSdp78419 669ce8eb11aSdp78419 n_locality_groups = unique_home_mem_lg_count(mem_lg_homeset); 670ce8eb11aSdp78419 671ce8eb11aSdp78419 /* If we have only 1 locality group then we can exit */ 672ce8eb11aSdp78419 if (n_locality_groups == 1) { 673ce8eb11aSdp78419 MPO_STATUS("lgrp_traverse: n_locality_groups == 1\n"); 674ce8eb11aSdp78419 ret_val = -1; 675ce8eb11aSdp78419 goto fail; 676ce8eb11aSdp78419 } 677ce8eb11aSdp78419 678ce8eb11aSdp78419 /* 679ce8eb11aSdp78419 * Set the latencies. A CPU's lgroup is defined by the lowest 680ce8eb11aSdp78419 * latency found. All other memory is considered remote, and the 681ce8eb11aSdp78419 * remote latency is represented by the highest latency found. 682ce8eb11aSdp78419 * Thus hierarchical lgroups, if any, are approximated by a 683ce8eb11aSdp78419 * two level scheme. 684ce8eb11aSdp78419 * 685ce8eb11aSdp78419 * The Solaris MPO framework by convention wants to see latencies 686ce8eb11aSdp78419 * in units of nano-sec/10. In the MD, the units are defined to be 687ce8eb11aSdp78419 * pico-seconds. 688ce8eb11aSdp78419 */ 689ce8eb11aSdp78419 690ce8eb11aSdp78419 lower_latency = mpo_lgroup[0].latency; 691ce8eb11aSdp78419 higher_latency = mpo_lgroup[0].latency; 692ce8eb11aSdp78419 693ce8eb11aSdp78419 for (i = 1; i < n_lgrpnodes; i++) { 694ce8eb11aSdp78419 if (mpo_lgroup[i].latency < lower_latency) { 695ce8eb11aSdp78419 lower_latency = mpo_lgroup[i].latency; 696ce8eb11aSdp78419 } 697ce8eb11aSdp78419 if (mpo_lgroup[i].latency > higher_latency) { 698ce8eb11aSdp78419 higher_latency = mpo_lgroup[i].latency; 699ce8eb11aSdp78419 } 700ce8eb11aSdp78419 } 701ce8eb11aSdp78419 lower_latency /= 10000; 702ce8eb11aSdp78419 higher_latency /= 10000; 703ce8eb11aSdp78419 704ce8eb11aSdp78419 /* Clear our CPU data */ 705ce8eb11aSdp78419 706ce8eb11aSdp78419 for (i = 0; i < NCPU; i++) { 707ce8eb11aSdp78419 mpo_cpu[i].home = 0; 708924db11bSjc25722 mpo_cpu[i].lgrp_index = -1; 709ce8eb11aSdp78419 } 710ce8eb11aSdp78419 711ce8eb11aSdp78419 /* Build the CPU nodes */ 712ce8eb11aSdp78419 for (i = 0; i < n_cpunodes; i++) { 713ce8eb11aSdp78419 714ce8eb11aSdp78419 /* Read in the lgroup nodes */ 715ce8eb11aSdp78419 result = get_int(md, cpunodes[i], PROP_LG_CPU_ID, &k); 716ce8eb11aSdp78419 if (result < 0) { 717ce8eb11aSdp78419 MPO_STATUS("lgrp_traverse: PROP_LG_CPU_ID missing\n"); 718ce8eb11aSdp78419 ret_val = -1; 719ce8eb11aSdp78419 goto fail; 720ce8eb11aSdp78419 } 721ce8eb11aSdp78419 722924db11bSjc25722 o = mpo_cpu_to_lgroup(md, cpunodes[i]); 723924db11bSjc25722 if (o == -1) { 724ce8eb11aSdp78419 ret_val = -1; 725ce8eb11aSdp78419 goto fail; 726ce8eb11aSdp78419 } 727924db11bSjc25722 mpo_cpu[k].lgrp_index = o; 728924db11bSjc25722 mpo_cpu[k].home = mpo_lgroup[o].addr_match >> home_mask_shift; 729ce8eb11aSdp78419 mpo_lgroup[o].ncpu++; 730ce8eb11aSdp78419 } 731ce8eb11aSdp78419 /* Validate that no large pages cross mnode boundaries. */ 732ce8eb11aSdp78419 if (valid_pages(md, cpunodes[0]) == 0) { 733ce8eb11aSdp78419 ret_val = -1; 734ce8eb11aSdp78419 goto fail; 735ce8eb11aSdp78419 } 736ce8eb11aSdp78419 737ce8eb11aSdp78419 fail: 738ce8eb11aSdp78419 if (n_cpunodes > 0) 739ce8eb11aSdp78419 md_free_scan_dag(md, &cpunodes); 740ce8eb11aSdp78419 if (n_mblocks > 0) 741ce8eb11aSdp78419 md_free_scan_dag(md, &mblocknodes); 742ce8eb11aSdp78419 else 743ce8eb11aSdp78419 panic("lgrp_traverse: No memory blocks found"); 744ce8eb11aSdp78419 7459853d9e8SJason Beloro if (ret_val == 0) { 746ce8eb11aSdp78419 MPO_STATUS("MPO feature is enabled.\n"); 7479853d9e8SJason Beloro } else 7489853d9e8SJason Beloro sun4v_mpo_enable = 0; /* set this for DR */ 749ce8eb11aSdp78419 750ce8eb11aSdp78419 return (ret_val); 751ce8eb11aSdp78419 } 752ce8eb11aSdp78419 753ce8eb11aSdp78419 /* 754ce8eb11aSdp78419 * Determine the number of unique mem_lg's present in our system 755ce8eb11aSdp78419 */ 756ce8eb11aSdp78419 static int 757ce8eb11aSdp78419 unique_home_mem_lg_count(uint64_t mem_lg_homeset) 758ce8eb11aSdp78419 { 759ce8eb11aSdp78419 int homeid; 760ce8eb11aSdp78419 int count = 0; 761ce8eb11aSdp78419 762ce8eb11aSdp78419 /* 763ce8eb11aSdp78419 * Scan the "home" bits of the mem_lgs, count 764ce8eb11aSdp78419 * the number that are unique. 765ce8eb11aSdp78419 */ 766ce8eb11aSdp78419 767ce8eb11aSdp78419 for (homeid = 0; homeid < NLGRPS_MAX; homeid++) { 768ce8eb11aSdp78419 if (MEM_LG_ISMEMBER(mem_lg_homeset, homeid)) { 769ce8eb11aSdp78419 count++; 770ce8eb11aSdp78419 } 771ce8eb11aSdp78419 } 772ce8eb11aSdp78419 773ce8eb11aSdp78419 MPO_DEBUG("unique_home_mem_lg_count: homeset %lx\n", 774ce8eb11aSdp78419 mem_lg_homeset); 775ce8eb11aSdp78419 MPO_DEBUG("unique_home_mem_lg_count: count: %d\n", count); 776ce8eb11aSdp78419 777ce8eb11aSdp78419 /* Default must be at least one */ 778ce8eb11aSdp78419 if (count == 0) 779ce8eb11aSdp78419 count = 1; 780ce8eb11aSdp78419 781ce8eb11aSdp78419 return (count); 782ce8eb11aSdp78419 } 783ce8eb11aSdp78419 784ce8eb11aSdp78419 /* 785ce8eb11aSdp78419 * Platform specific lgroup initialization 786ce8eb11aSdp78419 */ 787ce8eb11aSdp78419 void 788ce8eb11aSdp78419 plat_lgrp_init(void) 789ce8eb11aSdp78419 { 790ce8eb11aSdp78419 md_t *md; 791924db11bSjc25722 int rc; 792ce8eb11aSdp78419 793ce8eb11aSdp78419 /* Get the Machine Descriptor handle */ 794ce8eb11aSdp78419 795ce8eb11aSdp78419 md = md_get_handle(); 796ce8eb11aSdp78419 797ce8eb11aSdp78419 /* If not, we cannot continue */ 798ce8eb11aSdp78419 799ce8eb11aSdp78419 if (md == NULL) { 800ce8eb11aSdp78419 panic("cannot access machine descriptor\n"); 801ce8eb11aSdp78419 } else { 802ce8eb11aSdp78419 rc = lgrp_traverse(md); 803ce8eb11aSdp78419 (void) md_fini_handle(md); 804ce8eb11aSdp78419 } 805ce8eb11aSdp78419 806ce8eb11aSdp78419 /* 807ce8eb11aSdp78419 * If we can't process the MD for lgroups then at least let the 808ce8eb11aSdp78419 * system try to boot. Assume we have one lgroup so that 809ce8eb11aSdp78419 * when plat_build_mem_nodes is called, it will attempt to init 810ce8eb11aSdp78419 * an mnode based on the supplied memory segment. 811ce8eb11aSdp78419 */ 812ce8eb11aSdp78419 813ce8eb11aSdp78419 if (rc == -1) { 814ce8eb11aSdp78419 home_mask_pfn = 0; 815ce8eb11aSdp78419 max_locality_groups = 1; 816ce8eb11aSdp78419 n_locality_groups = 1; 817ce8eb11aSdp78419 return; 818ce8eb11aSdp78419 } 819ce8eb11aSdp78419 820ce8eb11aSdp78419 mem_node_pfn_shift = 0; 821ce8eb11aSdp78419 mem_node_physalign = 0; 822ce8eb11aSdp78419 823ce8eb11aSdp78419 /* Use lgroup-aware TSB allocations */ 824ce8eb11aSdp78419 tsb_lgrp_affinity = 1; 825ce8eb11aSdp78419 826ce8eb11aSdp78419 /* Require that a home lgroup have some memory to be chosen */ 827ce8eb11aSdp78419 lgrp_mem_free_thresh = 1; 828ce8eb11aSdp78419 829ce8eb11aSdp78419 /* Standard home-on-next-touch policy */ 830ce8eb11aSdp78419 lgrp_mem_policy_root = LGRP_MEM_POLICY_NEXT; 831ce8eb11aSdp78419 832ce8eb11aSdp78419 /* Disable option to choose root lgroup if all leaf lgroups are busy */ 833ce8eb11aSdp78419 lgrp_load_thresh = UINT32_MAX; 834924db11bSjc25722 835924db11bSjc25722 mpo_update_tunables(); 836ce8eb11aSdp78419 } 837ce8eb11aSdp78419 838ce8eb11aSdp78419 /* 839ce8eb11aSdp78419 * Helper routine for debugging calls to mem_node_add_slice() 840ce8eb11aSdp78419 */ 841ce8eb11aSdp78419 static void 842ce8eb11aSdp78419 mpo_mem_node_add_slice(pfn_t basepfn, pfn_t endpfn) 843ce8eb11aSdp78419 { 844ce8eb11aSdp78419 #if defined(DEBUG) && !defined(lint) 845ce8eb11aSdp78419 static int slice_count = 0; 846ce8eb11aSdp78419 847ce8eb11aSdp78419 slice_count++; 848ce8eb11aSdp78419 MPO_DEBUG("mem_add_slice(%d): basepfn: %lx endpfn: %lx\n", 849ce8eb11aSdp78419 slice_count, basepfn, endpfn); 850ce8eb11aSdp78419 #endif 851ce8eb11aSdp78419 mem_node_add_slice(basepfn, endpfn); 852ce8eb11aSdp78419 } 853ce8eb11aSdp78419 8549853d9e8SJason Beloro static void 8559853d9e8SJason Beloro mpo_mem_node_del_slice(pfn_t basepfn, pfn_t endpfn) 8569853d9e8SJason Beloro { 8579853d9e8SJason Beloro #if defined(DEBUG) && !defined(lint) 8589853d9e8SJason Beloro static int slice_count = 0; 8599853d9e8SJason Beloro 8609853d9e8SJason Beloro slice_count++; 8619853d9e8SJason Beloro MPO_DEBUG("mem_del_slice(%d): basepfn: %lx endpfn: %lx\n", 8629853d9e8SJason Beloro slice_count, basepfn, endpfn); 8639853d9e8SJason Beloro #endif 8649853d9e8SJason Beloro mem_node_del_slice(basepfn, endpfn); 8659853d9e8SJason Beloro } 8669853d9e8SJason Beloro 867ce8eb11aSdp78419 /* 868ce8eb11aSdp78419 * Helper routine for debugging calls to plat_assign_lgrphand_to_mem_node() 869ce8eb11aSdp78419 */ 870ce8eb11aSdp78419 static void 871ce8eb11aSdp78419 mpo_plat_assign_lgrphand_to_mem_node(lgrp_handle_t plathand, int mnode) 872ce8eb11aSdp78419 { 873ce8eb11aSdp78419 MPO_DEBUG("plat_assign_to_mem_nodes: lgroup home %ld, " 874ce8eb11aSdp78419 "mnode index: %d\n", plathand, mnode); 875ce8eb11aSdp78419 plat_assign_lgrphand_to_mem_node(plathand, mnode); 876ce8eb11aSdp78419 } 877ce8eb11aSdp78419 878ce8eb11aSdp78419 /* 879ce8eb11aSdp78419 * plat_build_mem_nodes() 880ce8eb11aSdp78419 * 881ce8eb11aSdp78419 * Define the mem_nodes based on the modified boot memory list, 882ce8eb11aSdp78419 * or based on info read from the MD in plat_lgrp_init(). 883ce8eb11aSdp78419 * 884ce8eb11aSdp78419 * When the home mask lies in the middle of the address bits (as it does on 885ce8eb11aSdp78419 * Victoria Falls), then the memory in one mem_node is no longer contiguous; 886ce8eb11aSdp78419 * it is striped across an mblock in a repeating pattern of contiguous memory 887ce8eb11aSdp78419 * followed by a gap. The stripe width is the size of the contiguous piece. 888ce8eb11aSdp78419 * The stride is the distance from the start of one contiguous piece to the 889ce8eb11aSdp78419 * start of the next. The gap is thus stride - stripe_width. 890ce8eb11aSdp78419 * 891ce8eb11aSdp78419 * The stripe of an mnode that falls within an mblock is described by the type 892ce8eb11aSdp78419 * mem_stripe_t, and there is one mem_stripe_t per mnode per mblock. The 893ce8eb11aSdp78419 * mem_stripe_t's are kept in a global array mem_stripes[]. The index into 894ce8eb11aSdp78419 * this array is predetermined. The mem_stripe_t that describes mnode m 895ce8eb11aSdp78419 * within mpo_mblock[i] is stored at 896ce8eb11aSdp78419 * mem_stripes[ m + i * max_locality_groups ] 897ce8eb11aSdp78419 * 898ce8eb11aSdp78419 * max_locality_groups is the total number of possible locality groups, 899ce8eb11aSdp78419 * as defined by the size of the home mask, even if the memory assigned 900ce8eb11aSdp78419 * to the domain is small and does not cover all the lgroups. Thus some 901ce8eb11aSdp78419 * mem_stripe_t's may be empty. 902ce8eb11aSdp78419 * 903ce8eb11aSdp78419 * The members of mem_stripe_t are: 904ce8eb11aSdp78419 * physbase: First valid page in mem_node in the corresponding mblock 905ce8eb11aSdp78419 * physmax: Last valid page in mem_node in mblock 906ce8eb11aSdp78419 * offset: The full stripe width starts at physbase - offset. 907ce8eb11aSdp78419 * Thus if offset is non-zero, this mem_node starts in the middle 908ce8eb11aSdp78419 * of a stripe width, and the second full stripe starts at 909ce8eb11aSdp78419 * physbase - offset + stride. (even though physmax may fall in the 910ce8eb11aSdp78419 * middle of a stripe width, we do not save the ending fragment size 911ce8eb11aSdp78419 * in this data structure.) 912ce8eb11aSdp78419 * exists: Set to 1 if the mblock has memory in this mem_node stripe. 913ce8eb11aSdp78419 * 914ce8eb11aSdp78419 * The stripe width is kept in the global mnode_pages. 915ce8eb11aSdp78419 * The stride is kept in the global mnode_stride. 916ce8eb11aSdp78419 * All the above use pfn's as the unit. 917ce8eb11aSdp78419 * 918ce8eb11aSdp78419 * As an example, the memory layout for a domain with 2 mblocks and 4 919ce8eb11aSdp78419 * mem_nodes 0,1,2,3 could look like this: 920ce8eb11aSdp78419 * 921ce8eb11aSdp78419 * 123012301230 ... 012301230123 ... 922ce8eb11aSdp78419 * mblock 0 mblock 1 923ce8eb11aSdp78419 */ 924ce8eb11aSdp78419 9259853d9e8SJason Beloro /*ARGSUSED*/ 926ce8eb11aSdp78419 void 927986fd29aSsetje plat_build_mem_nodes(prom_memlist_t *list, size_t nelems) 928ce8eb11aSdp78419 { 9299853d9e8SJason Beloro int elem; 9309853d9e8SJason Beloro uint64_t base, len; 931ce8eb11aSdp78419 932e853d8c3Sjc25722 /* Pre-reserve space for plat_assign_lgrphand_to_mem_node */ 933e853d8c3Sjc25722 max_mem_nodes = max_locality_groups; 934ce8eb11aSdp78419 9359853d9e8SJason Beloro mstripe_update(&mpo_config); 9369853d9e8SJason Beloro 937e853d8c3Sjc25722 /* Check for non-MPO sun4v platforms */ 938ce8eb11aSdp78419 if (n_locality_groups <= 1) { 939e853d8c3Sjc25722 mpo_plat_assign_lgrphand_to_mem_node(LGRP_DEFAULT_HANDLE, 0); 940986fd29aSsetje for (elem = 0; elem < nelems; list++, elem++) { 941986fd29aSsetje base = list->addr; 942986fd29aSsetje len = list->size; 943ce8eb11aSdp78419 944ce8eb11aSdp78419 mpo_mem_node_add_slice(btop(base), 945ce8eb11aSdp78419 btop(base + len - 1)); 946ce8eb11aSdp78419 } 947ce8eb11aSdp78419 mem_node_pfn_shift = 0; 948ce8eb11aSdp78419 mem_node_physalign = 0; 9499853d9e8SJason Beloro } else 9509853d9e8SJason Beloro mnode_update(&mpo_config, 0, 0, U_ADD_ALL); 951ce8eb11aSdp78419 952ce8eb11aSdp78419 /* 953ce8eb11aSdp78419 * Indicate to vm_pagelist that the hpm_counters array 954ce8eb11aSdp78419 * should be shared because the ranges overlap. 955ce8eb11aSdp78419 */ 956ce8eb11aSdp78419 if (max_mem_nodes > 1) { 957ce8eb11aSdp78419 interleaved_mnodes = 1; 958ce8eb11aSdp78419 } 959ce8eb11aSdp78419 } 960ce8eb11aSdp78419 961ce8eb11aSdp78419 /* 962ce8eb11aSdp78419 * Return the locality group value for the supplied processor 963ce8eb11aSdp78419 */ 964ce8eb11aSdp78419 lgrp_handle_t 965ce8eb11aSdp78419 plat_lgrp_cpu_to_hand(processorid_t id) 966ce8eb11aSdp78419 { 9679853d9e8SJason Beloro lgrp_handle_t lgrphand; 9689853d9e8SJason Beloro 9699853d9e8SJason Beloro mpo_rd_lock(); 970ce8eb11aSdp78419 if (n_locality_groups > 1) { 9719853d9e8SJason Beloro lgrphand = (lgrp_handle_t)mpo_cpu[(int)id].home; 972ce8eb11aSdp78419 } else { 9739853d9e8SJason Beloro lgrphand = (lgrp_handle_t)LGRP_DEFAULT_HANDLE; /* Default */ 974ce8eb11aSdp78419 } 9759853d9e8SJason Beloro mpo_rd_unlock(); 9769853d9e8SJason Beloro 9779853d9e8SJason Beloro return (lgrphand); 978ce8eb11aSdp78419 } 979ce8eb11aSdp78419 980ce8eb11aSdp78419 int 981ce8eb11aSdp78419 plat_lgrp_latency(lgrp_handle_t from, lgrp_handle_t to) 982ce8eb11aSdp78419 { 983ce8eb11aSdp78419 /* 984ce8eb11aSdp78419 * Return min remote latency when there are more than two lgroups 985ce8eb11aSdp78419 * (root and child) and getting latency between two different lgroups 986ce8eb11aSdp78419 * or root is involved. 987ce8eb11aSdp78419 */ 988ce8eb11aSdp78419 if (lgrp_optimizations() && (from != to || 989ce8eb11aSdp78419 from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE)) { 990ce8eb11aSdp78419 return ((int)higher_latency); 991ce8eb11aSdp78419 } else { 992ce8eb11aSdp78419 return ((int)lower_latency); 993ce8eb11aSdp78419 } 994ce8eb11aSdp78419 } 995ce8eb11aSdp78419 996ce8eb11aSdp78419 int 997ce8eb11aSdp78419 plat_pfn_to_mem_node(pfn_t pfn) 998ce8eb11aSdp78419 { 999ce8eb11aSdp78419 int i, mnode; 1000ce8eb11aSdp78419 pfn_t ra_to_pa_pfn; 1001ce8eb11aSdp78419 struct mblock_md *mb; 1002ce8eb11aSdp78419 1003ce8eb11aSdp78419 if (n_locality_groups <= 1) 1004ce8eb11aSdp78419 return (0); 1005ce8eb11aSdp78419 1006ce8eb11aSdp78419 /* 1007ce8eb11aSdp78419 * The mnode is defined to be 1:1 with the lgroup handle, which 1008ce8eb11aSdp78419 * is taken from from the home bits. Find the mblock in which 1009ce8eb11aSdp78419 * the pfn falls to get the ra_to_pa adjustment, and extract 1010ce8eb11aSdp78419 * the home bits. 1011ce8eb11aSdp78419 */ 10129853d9e8SJason Beloro mpo_rd_lock(); 1013ce8eb11aSdp78419 mb = &mpo_mblock[0]; 1014ce8eb11aSdp78419 for (i = 0; i < n_mblocks; i++) { 1015ce8eb11aSdp78419 if (pfn >= mb->base_pfn && pfn <= mb->end_pfn) { 1016ce8eb11aSdp78419 ra_to_pa_pfn = btop(mb->ra_to_pa); 1017ce8eb11aSdp78419 mnode = (((pfn + ra_to_pa_pfn) & home_mask_pfn) >> 1018ce8eb11aSdp78419 home_mask_pfn_shift); 1019ce8eb11aSdp78419 ASSERT(mnode < max_mem_nodes); 10209853d9e8SJason Beloro mpo_rd_unlock(); 1021ce8eb11aSdp78419 return (mnode); 1022ce8eb11aSdp78419 } 1023ce8eb11aSdp78419 mb++; 1024ce8eb11aSdp78419 } 1025ce8eb11aSdp78419 1026ce8eb11aSdp78419 panic("plat_pfn_to_mem_node() failed to find mblock: pfn=%lx\n", pfn); 1027ce8eb11aSdp78419 return (pfn); 1028ce8eb11aSdp78419 } 1029ce8eb11aSdp78419 1030ce8eb11aSdp78419 /* 1031ce8eb11aSdp78419 * plat_rapfn_to_papfn 1032ce8eb11aSdp78419 * 1033ce8eb11aSdp78419 * Convert a pfn in RA space to a pfn in PA space, in which the page coloring 1034ce8eb11aSdp78419 * and home mask bits are correct. The upper bits do not necessarily 1035ce8eb11aSdp78419 * match the actual PA, however. 1036ce8eb11aSdp78419 */ 1037ce8eb11aSdp78419 pfn_t 1038ce8eb11aSdp78419 plat_rapfn_to_papfn(pfn_t pfn) 1039ce8eb11aSdp78419 { 1040ce8eb11aSdp78419 int i; 1041ce8eb11aSdp78419 pfn_t ra_to_pa_pfn; 1042ce8eb11aSdp78419 struct mblock_md *mb; 1043ce8eb11aSdp78419 1044ce8eb11aSdp78419 ASSERT(n_mblocks > 0); 1045ce8eb11aSdp78419 if (n_mblocks == 1) 1046ce8eb11aSdp78419 return (pfn + base_ra_to_pa_pfn); 1047ce8eb11aSdp78419 1048ce8eb11aSdp78419 /* 1049ce8eb11aSdp78419 * Find the mblock in which the pfn falls 1050ce8eb11aSdp78419 * in order to get the ra_to_pa adjustment. 1051ce8eb11aSdp78419 */ 10529853d9e8SJason Beloro mpo_rd_lock(); 1053ce8eb11aSdp78419 for (mb = &mpo_mblock[0], i = 0; i < n_mblocks; i++, mb++) { 1054ce8eb11aSdp78419 if (pfn <= mb->end_pfn && pfn >= mb->base_pfn) { 1055ce8eb11aSdp78419 ra_to_pa_pfn = btop(mb->ra_to_pa); 10569853d9e8SJason Beloro mpo_rd_unlock(); 1057ce8eb11aSdp78419 return (pfn + ra_to_pa_pfn); 1058ce8eb11aSdp78419 } 1059ce8eb11aSdp78419 } 1060ce8eb11aSdp78419 1061ce8eb11aSdp78419 panic("plat_rapfn_to_papfn() failed to find mblock: pfn=%lx\n", pfn); 1062ce8eb11aSdp78419 return (pfn); 1063ce8eb11aSdp78419 } 1064ce8eb11aSdp78419 1065ce8eb11aSdp78419 /* 1066ce8eb11aSdp78419 * plat_mem_node_iterator_init() 1067b779d3e0Sdp78419 * Initialize cookie "it" to iterate over pfn's in an mnode. There is 1068ce8eb11aSdp78419 * no additional iterator function. The caller uses the info from 1069ce8eb11aSdp78419 * the iterator structure directly. 1070ce8eb11aSdp78419 * 1071ce8eb11aSdp78419 * pfn: starting pfn. 1072ce8eb11aSdp78419 * mnode: desired mnode. 1073b779d3e0Sdp78419 * szc: desired page size. 1074b779d3e0Sdp78419 * init: 1075b779d3e0Sdp78419 * if 1, start a new traversal, initialize "it", find first 1076b779d3e0Sdp78419 * mblock containing pfn, and return its starting pfn 1077b779d3e0Sdp78419 * within the mnode. 1078b779d3e0Sdp78419 * if 0, continue the previous traversal using passed-in data 1079b779d3e0Sdp78419 * from "it", advance to the next mblock, and return its 1080b779d3e0Sdp78419 * starting pfn within the mnode. 1081b779d3e0Sdp78419 * it: returns readonly data to the caller; see below. 1082ce8eb11aSdp78419 * 1083b779d3e0Sdp78419 * The input pfn must be aligned for the page size szc. 1084b779d3e0Sdp78419 * 1085b779d3e0Sdp78419 * Returns: starting pfn for the iteration for the mnode/mblock, 1086b779d3e0Sdp78419 * which is aligned according to the page size, 1087b779d3e0Sdp78419 * or returns (pfn_t)(-1) if the input pfn lies past the last 1088b779d3e0Sdp78419 * valid pfn of the mnode. 1089b779d3e0Sdp78419 * Returns misc values in the "it" struct that allows the caller 1090b779d3e0Sdp78419 * to advance the pfn within an mblock using address arithmetic; 1091b779d3e0Sdp78419 * see definition of mem_node_iterator_t in vm_dep.h. 1092b779d3e0Sdp78419 * When the caller calculates a pfn that is greater than the 1093b779d3e0Sdp78419 * returned value it->mi_mblock_end, the caller should again 1094b779d3e0Sdp78419 * call plat_mem_node_iterator_init, passing init=0. 10959853d9e8SJason Beloro * 10969853d9e8SJason Beloro * The last mblock in continuation case may be invalid because 10979853d9e8SJason Beloro * of memory DR. To detect this situation mi_genid is checked 10989853d9e8SJason Beloro * against mpo_genid which is incremented after a memory DR 10999853d9e8SJason Beloro * operation. See also plat_slice_add()/plat_slice_del(). 1100ce8eb11aSdp78419 */ 1101ce8eb11aSdp78419 pfn_t 1102b779d3e0Sdp78419 plat_mem_node_iterator_init(pfn_t pfn, int mnode, uchar_t szc, 1103ce8eb11aSdp78419 mem_node_iterator_t *it, int init) 1104ce8eb11aSdp78419 { 1105ce8eb11aSdp78419 int i; 1106b779d3e0Sdp78419 pgcnt_t szcpgcnt = PNUM_SIZE(szc); 1107ce8eb11aSdp78419 struct mblock_md *mblock; 1108ce8eb11aSdp78419 pfn_t base, end; 1109b779d3e0Sdp78419 mem_stripe_t *ms; 1110b779d3e0Sdp78419 uint64_t szcpagesize; 1111ce8eb11aSdp78419 1112ce8eb11aSdp78419 ASSERT(it != NULL); 1113ce8eb11aSdp78419 ASSERT(mnode >= 0 && mnode < max_mem_nodes); 1114ce8eb11aSdp78419 ASSERT(n_mblocks > 0); 1115b779d3e0Sdp78419 ASSERT(P2PHASE(pfn, szcpgcnt) == 0); 1116ce8eb11aSdp78419 11179853d9e8SJason Beloro mpo_rd_lock(); 11189853d9e8SJason Beloro 11199853d9e8SJason Beloro if (init || (it->mi_genid != mpo_genid)) { 11209853d9e8SJason Beloro it->mi_genid = mpo_genid; 1121ce8eb11aSdp78419 it->mi_last_mblock = 0; 1122ce8eb11aSdp78419 it->mi_init = 1; 1123ce8eb11aSdp78419 } 1124ce8eb11aSdp78419 1125ce8eb11aSdp78419 /* Check if mpo is not enabled and we only have one mblock */ 1126ce8eb11aSdp78419 if (n_locality_groups == 1 && n_mblocks == 1) { 11279853d9e8SJason Beloro if (P2PHASE(base_ra_to_pa_pfn, szcpgcnt)) { 11289853d9e8SJason Beloro pfn = (pfn_t)-1; 11299853d9e8SJason Beloro goto done; 11309853d9e8SJason Beloro } 1131ce8eb11aSdp78419 it->mi_mnode = mnode; 1132ce8eb11aSdp78419 it->mi_ra_to_pa = base_ra_to_pa_pfn; 1133ce8eb11aSdp78419 it->mi_mnode_pfn_mask = 0; 1134ce8eb11aSdp78419 it->mi_mnode_pfn_shift = 0; 1135ce8eb11aSdp78419 it->mi_mnode_mask = 0; 1136ce8eb11aSdp78419 it->mi_mblock_base = mem_node_config[mnode].physbase; 1137ce8eb11aSdp78419 it->mi_mblock_end = mem_node_config[mnode].physmax; 1138ce8eb11aSdp78419 if (pfn < it->mi_mblock_base) 1139b779d3e0Sdp78419 pfn = P2ROUNDUP(it->mi_mblock_base, szcpgcnt); 1140b779d3e0Sdp78419 if ((pfn + szcpgcnt - 1) > it->mi_mblock_end) 1141ce8eb11aSdp78419 pfn = (pfn_t)-1; 11429853d9e8SJason Beloro goto done; 1143ce8eb11aSdp78419 } 1144ce8eb11aSdp78419 1145b779d3e0Sdp78419 /* init=1 means begin iterator, init=0 means continue */ 1146b779d3e0Sdp78419 if (init == 1) { 1147b779d3e0Sdp78419 i = 0; 1148b779d3e0Sdp78419 } else { 1149ce8eb11aSdp78419 ASSERT(it->mi_last_mblock < n_mblocks); 1150b779d3e0Sdp78419 i = it->mi_last_mblock; 1151b779d3e0Sdp78419 ASSERT(pfn > 1152b779d3e0Sdp78419 mem_stripes[i * max_locality_groups + mnode].physmax); 11539853d9e8SJason Beloro if (++i == n_mblocks) { 11549853d9e8SJason Beloro pfn = (pfn_t)-1; 11559853d9e8SJason Beloro goto done; 11569853d9e8SJason Beloro } 1157b779d3e0Sdp78419 } 1158ce8eb11aSdp78419 1159b779d3e0Sdp78419 /* 1160b779d3e0Sdp78419 * Find mblock that contains pfn for mnode's stripe, or first such an 1161b779d3e0Sdp78419 * mblock after pfn, else pfn is out of bound and we'll return -1. 1162b779d3e0Sdp78419 * mblocks and stripes are sorted in ascending address order. 1163b779d3e0Sdp78419 */ 1164b779d3e0Sdp78419 szcpagesize = szcpgcnt << PAGESHIFT; 1165ce8eb11aSdp78419 for (; i < n_mblocks; i++) { 1166b779d3e0Sdp78419 if (P2PHASE(mpo_mblock[i].ra_to_pa, szcpagesize)) 1167b779d3e0Sdp78419 continue; 1168b779d3e0Sdp78419 ms = &mem_stripes[i * max_locality_groups + mnode]; 1169b779d3e0Sdp78419 if (ms->exists && (pfn + szcpgcnt - 1) <= ms->physmax && 1170b779d3e0Sdp78419 (P2ROUNDUP(ms->physbase, szcpgcnt) + szcpgcnt - 1) <= 1171b779d3e0Sdp78419 ms->physmax) 1172ce8eb11aSdp78419 break; 1173ce8eb11aSdp78419 } 1174ce8eb11aSdp78419 if (i == n_mblocks) { 1175ce8eb11aSdp78419 it->mi_last_mblock = i - 1; 11769853d9e8SJason Beloro pfn = (pfn_t)-1; 11779853d9e8SJason Beloro goto done; 1178ce8eb11aSdp78419 } 1179b779d3e0Sdp78419 1180ce8eb11aSdp78419 it->mi_last_mblock = i; 1181ce8eb11aSdp78419 1182ce8eb11aSdp78419 mblock = &mpo_mblock[i]; 1183ce8eb11aSdp78419 base = ms->physbase; 1184ce8eb11aSdp78419 end = ms->physmax; 1185ce8eb11aSdp78419 1186ce8eb11aSdp78419 it->mi_mnode = mnode; 1187ce8eb11aSdp78419 it->mi_ra_to_pa = btop(mblock->ra_to_pa); 1188ce8eb11aSdp78419 it->mi_mblock_base = base; 1189ce8eb11aSdp78419 it->mi_mblock_end = end; 1190ce8eb11aSdp78419 it->mi_mnode_pfn_mask = home_mask_pfn; /* is 0 for non-MPO case */ 1191ce8eb11aSdp78419 it->mi_mnode_pfn_shift = home_mask_pfn_shift; 1192ce8eb11aSdp78419 it->mi_mnode_mask = max_locality_groups - 1; 1193b779d3e0Sdp78419 if (pfn < base) { 1194b779d3e0Sdp78419 pfn = P2ROUNDUP(base, szcpgcnt); 1195b779d3e0Sdp78419 ASSERT(pfn + szcpgcnt - 1 <= end); 1196b779d3e0Sdp78419 } 1197b779d3e0Sdp78419 ASSERT((pfn + szcpgcnt - 1) <= mpo_mblock[i].end_pfn); 11989853d9e8SJason Beloro done: 11999853d9e8SJason Beloro mpo_rd_unlock(); 1200ce8eb11aSdp78419 return (pfn); 1201ce8eb11aSdp78419 } 1202ce8eb11aSdp78419 1203ce8eb11aSdp78419 /* 1204ce8eb11aSdp78419 * plat_mem_node_intersect_range() 1205ce8eb11aSdp78419 * 1206ce8eb11aSdp78419 * Find the intersection between a memnode and a range of pfn's. 1207ce8eb11aSdp78419 */ 1208ce8eb11aSdp78419 void 1209ce8eb11aSdp78419 plat_mem_node_intersect_range(pfn_t test_base, pgcnt_t test_len, 1210ce8eb11aSdp78419 int mnode, pgcnt_t *npages_out) 1211ce8eb11aSdp78419 { 1212ce8eb11aSdp78419 pfn_t offset, len, hole, base, end, test_end, frag; 1213ce8eb11aSdp78419 pfn_t nearest; 1214ce8eb11aSdp78419 mem_stripe_t *ms; 1215ce8eb11aSdp78419 int i, npages; 1216ce8eb11aSdp78419 1217ce8eb11aSdp78419 *npages_out = 0; 1218ce8eb11aSdp78419 1219ce8eb11aSdp78419 if (!mem_node_config[mnode].exists || test_len == 0) 1220ce8eb11aSdp78419 return; 1221ce8eb11aSdp78419 1222ce8eb11aSdp78419 base = mem_node_config[mnode].physbase; 1223ce8eb11aSdp78419 end = mem_node_config[mnode].physmax; 1224ce8eb11aSdp78419 1225ce8eb11aSdp78419 test_end = test_base + test_len - 1; 1226ce8eb11aSdp78419 if (end < test_base || base > test_end) 1227ce8eb11aSdp78419 return; 1228ce8eb11aSdp78419 1229ce8eb11aSdp78419 if (n_locality_groups == 1) { 1230ce8eb11aSdp78419 *npages_out = MIN(test_end, end) - MAX(test_base, base) + 1; 1231ce8eb11aSdp78419 return; 1232ce8eb11aSdp78419 } 1233ce8eb11aSdp78419 1234ce8eb11aSdp78419 hole = mnode_stride - mnode_pages; 1235ce8eb11aSdp78419 npages = 0; 1236ce8eb11aSdp78419 1237ce8eb11aSdp78419 /* 1238ce8eb11aSdp78419 * Iterate over all the stripes for this mnode (one per mblock), 1239ce8eb11aSdp78419 * find the intersection with each, and accumulate the intersections. 1240ce8eb11aSdp78419 * 1241ce8eb11aSdp78419 * Determing the intersection with a stripe is tricky. If base or end 1242ce8eb11aSdp78419 * fall outside the mem_node bounds, round them to physbase/physmax of 1243ce8eb11aSdp78419 * mem_node. If base or end fall in a gap, round them to start of 1244ce8eb11aSdp78419 * nearest stripe. If they fall within a stripe, keep base or end, 1245ce8eb11aSdp78419 * but calculate the fragment size that should be excluded from the 1246ce8eb11aSdp78419 * stripe. Calculate how many strides fall in the adjusted range, 1247ce8eb11aSdp78419 * multiply by stripe width, and add the start and end fragments. 1248ce8eb11aSdp78419 */ 1249ce8eb11aSdp78419 12509853d9e8SJason Beloro mpo_rd_lock(); 1251ce8eb11aSdp78419 for (i = mnode; i < n_mem_stripes; i += max_locality_groups) { 1252ce8eb11aSdp78419 ms = &mem_stripes[i]; 1253ce8eb11aSdp78419 if (ms->exists && 1254ce8eb11aSdp78419 test_base <= (end = ms->physmax) && 1255ce8eb11aSdp78419 test_end >= (base = ms->physbase)) { 1256ce8eb11aSdp78419 1257ce8eb11aSdp78419 offset = ms->offset; 1258ce8eb11aSdp78419 1259ce8eb11aSdp78419 if (test_base > base) { 1260ce8eb11aSdp78419 /* Round test_base to next multiple of stride */ 1261ce8eb11aSdp78419 len = P2ROUNDUP(test_base - (base - offset), 1262ce8eb11aSdp78419 mnode_stride); 1263ce8eb11aSdp78419 nearest = base - offset + len; 1264ce8eb11aSdp78419 /* 1265ce8eb11aSdp78419 * Compute distance from test_base to the 1266ce8eb11aSdp78419 * stride boundary to see if test_base falls 1267ce8eb11aSdp78419 * in the stripe or in the hole. 1268ce8eb11aSdp78419 */ 1269ce8eb11aSdp78419 if (nearest - test_base > hole) { 1270ce8eb11aSdp78419 /* 1271ce8eb11aSdp78419 * test_base lies in stripe, 1272ce8eb11aSdp78419 * and offset should be excluded. 1273ce8eb11aSdp78419 */ 1274ce8eb11aSdp78419 offset = test_base - 1275ce8eb11aSdp78419 (nearest - mnode_stride); 1276ce8eb11aSdp78419 base = test_base; 1277ce8eb11aSdp78419 } else { 1278ce8eb11aSdp78419 /* round up to next stripe start */ 1279ce8eb11aSdp78419 offset = 0; 1280ce8eb11aSdp78419 base = nearest; 1281ce8eb11aSdp78419 if (base > end) 1282ce8eb11aSdp78419 continue; 1283ce8eb11aSdp78419 } 1284ce8eb11aSdp78419 1285ce8eb11aSdp78419 } 1286ce8eb11aSdp78419 1287ce8eb11aSdp78419 if (test_end < end) 1288ce8eb11aSdp78419 end = test_end; 1289ce8eb11aSdp78419 end++; /* adjust to an exclusive bound */ 1290ce8eb11aSdp78419 1291ce8eb11aSdp78419 /* Round end to next multiple of stride */ 1292ce8eb11aSdp78419 len = P2ROUNDUP(end - (base - offset), mnode_stride); 1293ce8eb11aSdp78419 nearest = (base - offset) + len; 1294ce8eb11aSdp78419 if (nearest - end <= hole) { 1295ce8eb11aSdp78419 /* end falls in hole, use entire last stripe */ 1296ce8eb11aSdp78419 frag = 0; 1297ce8eb11aSdp78419 } else { 1298ce8eb11aSdp78419 /* end falls in stripe, compute fragment */ 1299ce8eb11aSdp78419 frag = nearest - hole - end; 1300ce8eb11aSdp78419 } 1301ce8eb11aSdp78419 1302ce8eb11aSdp78419 len = (len >> stripe_shift) - offset - frag; 1303ce8eb11aSdp78419 npages += len; 1304ce8eb11aSdp78419 } 1305ce8eb11aSdp78419 } 1306ce8eb11aSdp78419 1307ce8eb11aSdp78419 *npages_out = npages; 13089853d9e8SJason Beloro mpo_rd_unlock(); 1309ce8eb11aSdp78419 } 1310ce8eb11aSdp78419 1311ce8eb11aSdp78419 /* 1312ce8eb11aSdp78419 * valid_pages() 1313ce8eb11aSdp78419 * 1314ce8eb11aSdp78419 * Return 1 if pages are valid and do not cross mnode boundaries 1315ce8eb11aSdp78419 * (which would break page free list assumptions), and 0 otherwise. 1316ce8eb11aSdp78419 */ 1317ce8eb11aSdp78419 1318ce8eb11aSdp78419 #define MNODE(pa) \ 1319ce8eb11aSdp78419 ((btop(pa) & home_mask_pfn) >> home_mask_pfn_shift) 1320ce8eb11aSdp78419 1321ce8eb11aSdp78419 static int 1322ce8eb11aSdp78419 valid_pages(md_t *md, mde_cookie_t cpu0) 1323ce8eb11aSdp78419 { 1324ce8eb11aSdp78419 int i, max_szc; 1325ce8eb11aSdp78419 uint64_t last_page_base, szc_mask; 1326ce8eb11aSdp78419 uint64_t max_page_len, max_coalesce_len; 1327ce8eb11aSdp78419 struct mblock_md *mb = mpo_mblock; 1328ce8eb11aSdp78419 1329ce8eb11aSdp78419 /* 1330ce8eb11aSdp78419 * Find the smaller of the largest page possible and supported. 1331ce8eb11aSdp78419 * mmu_exported_pagesize_mask is not yet initialized, so read 1332ce8eb11aSdp78419 * it from the MD. Apply minimal fixups in case of broken MDs 1333ce8eb11aSdp78419 * to get a sane mask. 1334ce8eb11aSdp78419 */ 1335ce8eb11aSdp78419 13369853d9e8SJason Beloro if (cpu0 == NULL) 13379853d9e8SJason Beloro szc_mask = szc_mask0; 13389853d9e8SJason Beloro else { 1339ce8eb11aSdp78419 if (md_get_prop_val(md, cpu0, "mmu-page-size-list", &szc_mask)) 1340ce8eb11aSdp78419 szc_mask = 0; 13419853d9e8SJason Beloro /* largest in sun4v default support */ 13429853d9e8SJason Beloro szc_mask |= (1 << TTE4M); 13439853d9e8SJason Beloro szc_mask0 = szc_mask; 13449853d9e8SJason Beloro } 1345ce8eb11aSdp78419 max_szc = highbit(szc_mask) - 1; 1346ce8eb11aSdp78419 if (max_szc > TTE256M) 1347ce8eb11aSdp78419 max_szc = TTE256M; 1348ce8eb11aSdp78419 max_page_len = TTEBYTES(max_szc); 1349ce8eb11aSdp78419 1350ce8eb11aSdp78419 /* 1351ce8eb11aSdp78419 * Page coalescing code coalesces all sizes up to 256M on sun4v, even 1352ce8eb11aSdp78419 * if mmu-page-size-list does not contain it, so 256M pages must fall 1353ce8eb11aSdp78419 * within one mnode to use MPO. 1354ce8eb11aSdp78419 */ 1355ce8eb11aSdp78419 max_coalesce_len = TTEBYTES(TTE256M); 1356ce8eb11aSdp78419 ASSERT(max_coalesce_len >= max_page_len); 1357ce8eb11aSdp78419 1358ce8eb11aSdp78419 if (ptob(mnode_pages) < max_coalesce_len) { 1359ce8eb11aSdp78419 MPO_STATUS("Page too large; MPO disabled: page = %lx, " 1360ce8eb11aSdp78419 "mnode slice = %lx\n", max_coalesce_len, ptob(mnode_pages)); 1361ce8eb11aSdp78419 return (0); 1362ce8eb11aSdp78419 } 1363ce8eb11aSdp78419 1364ce8eb11aSdp78419 for (i = 0; i < n_mblocks; i++) { 1365ce8eb11aSdp78419 uint64_t base = mb->base; 1366ce8eb11aSdp78419 uint64_t end = mb->base + mb->size - 1; 1367ce8eb11aSdp78419 uint64_t ra_to_pa = mb->ra_to_pa; 1368ce8eb11aSdp78419 1369ce8eb11aSdp78419 /* 1370ce8eb11aSdp78419 * If mblock is smaller than the max page size, then 1371ce8eb11aSdp78419 * RA = PA mod MAXPAGE is not guaranteed, but it must 1372ce8eb11aSdp78419 * not span mnodes. 1373ce8eb11aSdp78419 */ 1374ce8eb11aSdp78419 if (mb->size < max_page_len) { 1375ce8eb11aSdp78419 if (MNODE(base + ra_to_pa) != MNODE(end + ra_to_pa)) { 1376ce8eb11aSdp78419 MPO_STATUS("Small mblock spans mnodes; " 1377ce8eb11aSdp78419 "MPO disabled: base = %lx, end = %lx, " 1378ce8eb11aSdp78419 "ra2pa = %lx\n", base, end, ra_to_pa); 1379ce8eb11aSdp78419 return (0); 1380ce8eb11aSdp78419 } 1381ce8eb11aSdp78419 } else { 1382ce8eb11aSdp78419 /* Verify RA = PA mod MAXPAGE, using coalesce size */ 1383ce8eb11aSdp78419 uint64_t pa_base = base + ra_to_pa; 1384ce8eb11aSdp78419 if ((base & (max_coalesce_len - 1)) != 1385ce8eb11aSdp78419 (pa_base & (max_coalesce_len - 1))) { 1386ce8eb11aSdp78419 MPO_STATUS("bad page alignment; MPO disabled: " 1387ce8eb11aSdp78419 "ra = %lx, pa = %lx, pagelen = %lx\n", 1388ce8eb11aSdp78419 base, pa_base, max_coalesce_len); 1389ce8eb11aSdp78419 return (0); 1390ce8eb11aSdp78419 } 1391ce8eb11aSdp78419 } 1392ce8eb11aSdp78419 1393ce8eb11aSdp78419 /* 1394ce8eb11aSdp78419 * Find start of last large page in mblock in RA space. 1395ce8eb11aSdp78419 * If page extends into the next mblock, verify the 1396ce8eb11aSdp78419 * mnode does not change. 1397ce8eb11aSdp78419 */ 1398ce8eb11aSdp78419 last_page_base = P2ALIGN(end, max_coalesce_len); 1399ce8eb11aSdp78419 if (i + 1 < n_mblocks && 1400ce8eb11aSdp78419 last_page_base + max_coalesce_len > mb[1].base && 1401ce8eb11aSdp78419 MNODE(last_page_base + ra_to_pa) != 1402ce8eb11aSdp78419 MNODE(mb[1].base + mb[1].ra_to_pa)) { 1403ce8eb11aSdp78419 MPO_STATUS("Large page spans mblocks; MPO disabled: " 1404ce8eb11aSdp78419 "end = %lx, ra2pa = %lx, base = %lx, ra2pa = %lx, " 1405ce8eb11aSdp78419 "pagelen = %lx\n", end, ra_to_pa, mb[1].base, 1406ce8eb11aSdp78419 mb[1].ra_to_pa, max_coalesce_len); 1407ce8eb11aSdp78419 return (0); 1408ce8eb11aSdp78419 } 1409ce8eb11aSdp78419 1410ce8eb11aSdp78419 mb++; 1411ce8eb11aSdp78419 } 1412ce8eb11aSdp78419 return (1); 1413ce8eb11aSdp78419 } 1414ce8eb11aSdp78419 1415ce8eb11aSdp78419 1416ce8eb11aSdp78419 /* 1417ce8eb11aSdp78419 * fix_interleave() - Find lgroups with sub-page sized memory interleave, 1418ce8eb11aSdp78419 * if any, and remove them. This yields a config where the "coarse 1419ce8eb11aSdp78419 * grained" lgroups cover all of memory, even though part of that memory 1420ce8eb11aSdp78419 * is fine grain interleaved and does not deliver a purely local memory 1421ce8eb11aSdp78419 * latency. 1422ce8eb11aSdp78419 * 1423ce8eb11aSdp78419 * This function reads and modifies the globals: 1424ce8eb11aSdp78419 * mpo_lgroup[], n_lgrpnodes 1425ce8eb11aSdp78419 * 1426ce8eb11aSdp78419 * Returns 1 if lgroup nodes were removed, 0 otherwise. 1427ce8eb11aSdp78419 */ 1428ce8eb11aSdp78419 1429ce8eb11aSdp78419 static int 1430ce8eb11aSdp78419 fix_interleave(void) 1431ce8eb11aSdp78419 { 1432ce8eb11aSdp78419 int i, j; 1433ce8eb11aSdp78419 uint64_t mask = 0; 1434ce8eb11aSdp78419 1435ce8eb11aSdp78419 j = 0; 1436ce8eb11aSdp78419 for (i = 0; i < n_lgrpnodes; i++) { 1437ce8eb11aSdp78419 if ((mpo_lgroup[i].addr_mask & PAGEOFFSET) != 0) { 1438ce8eb11aSdp78419 /* remove this lgroup */ 1439ce8eb11aSdp78419 mask = mpo_lgroup[i].addr_mask; 1440ce8eb11aSdp78419 } else { 1441ce8eb11aSdp78419 mpo_lgroup[j++] = mpo_lgroup[i]; 1442ce8eb11aSdp78419 } 1443ce8eb11aSdp78419 } 1444ce8eb11aSdp78419 n_lgrpnodes = j; 1445ce8eb11aSdp78419 1446ce8eb11aSdp78419 if (mask != 0) 1447ce8eb11aSdp78419 MPO_STATUS("sub-page interleave %lx found; " 1448ce8eb11aSdp78419 "removing lgroup.\n", mask); 1449ce8eb11aSdp78419 1450ce8eb11aSdp78419 return (mask != 0); 1451ce8eb11aSdp78419 } 14529853d9e8SJason Beloro 14539853d9e8SJason Beloro /* 14549853d9e8SJason Beloro * mblock_alloc 14559853d9e8SJason Beloro * 14569853d9e8SJason Beloro * Allocate memory for mblock an stripe arrays from either static or 14579853d9e8SJason Beloro * dynamic space depending on utype, and return the result in mc. 14589853d9e8SJason Beloro * Returns 0 on success and -1 on error. 14599853d9e8SJason Beloro */ 14609853d9e8SJason Beloro 14619853d9e8SJason Beloro static int 14629853d9e8SJason Beloro mblock_alloc(mpo_config_t *mc, update_t utype, int nmblocks) 14639853d9e8SJason Beloro { 14649853d9e8SJason Beloro mblock_md_t *mb = NULL; 14659853d9e8SJason Beloro mem_stripe_t *ms = NULL; 14669853d9e8SJason Beloro int nstripes = MAX_MEM_NODES * nmblocks; 14679853d9e8SJason Beloro size_t mblocksz = nmblocks * sizeof (struct mblock_md); 14689853d9e8SJason Beloro size_t mstripesz = nstripes * sizeof (mem_stripe_t); 14699853d9e8SJason Beloro size_t allocsz = mmu_ptob(mmu_btopr(mblocksz + mstripesz)); 14709853d9e8SJason Beloro 14719853d9e8SJason Beloro /* 14729853d9e8SJason Beloro * Allocate space for mblocks and mstripes. 14739853d9e8SJason Beloro * 14749853d9e8SJason Beloro * For DR allocations, just use kmem_alloc(), and set 14759853d9e8SJason Beloro * mc_alloc_sz to indicate it was used. 14769853d9e8SJason Beloro * 14779853d9e8SJason Beloro * For boot allocation: 14789853d9e8SJason Beloro * If we have a small number of mblocks we will use the space 14799853d9e8SJason Beloro * that we preallocated. Otherwise, we will dynamically 14809853d9e8SJason Beloro * allocate the space from the prom and map it to the 14819853d9e8SJason Beloro * reserved VA at MPOBUF_BASE. 14829853d9e8SJason Beloro */ 14839853d9e8SJason Beloro 14849853d9e8SJason Beloro if (utype == U_ADD || utype == U_DEL) { 14859853d9e8SJason Beloro mb = (struct mblock_md *)kmem_zalloc(allocsz, KM_SLEEP); 14869853d9e8SJason Beloro ms = (mem_stripe_t *)(mb + nmblocks); 14879853d9e8SJason Beloro mc->mc_alloc_sz = allocsz; 14889853d9e8SJason Beloro } else if (nmblocks <= SMALL_MBLOCKS_COUNT) { 14899853d9e8SJason Beloro mb = &small_mpo_mblocks[0]; 14909853d9e8SJason Beloro ms = &small_mem_stripes[0]; 14919853d9e8SJason Beloro mc->mc_alloc_sz = 0; 14929853d9e8SJason Beloro } else { 14939853d9e8SJason Beloro /* Ensure that we dont request more space than reserved */ 14949853d9e8SJason Beloro if (allocsz > MPOBUF_SIZE) { 14959853d9e8SJason Beloro MPO_STATUS("mblock_alloc: Insufficient space " 14969853d9e8SJason Beloro "for mblock structures \n"); 14979853d9e8SJason Beloro return (-1); 14989853d9e8SJason Beloro } 14999853d9e8SJason Beloro mb = (struct mblock_md *) 15009853d9e8SJason Beloro prom_alloc((caddr_t)MPOBUF_BASE, allocsz, PAGESIZE); 15019853d9e8SJason Beloro if (mb != (struct mblock_md *)MPOBUF_BASE) { 15029853d9e8SJason Beloro MPO_STATUS("mblock_alloc: Cannot allocate space " 15039853d9e8SJason Beloro "for mblocks \n"); 15049853d9e8SJason Beloro return (-1); 15059853d9e8SJason Beloro } 15069853d9e8SJason Beloro mpo_heap32_buf = (caddr_t)MPOBUF_BASE; 15079853d9e8SJason Beloro mpo_heap32_bufsz = MPOBUF_SIZE; 15089853d9e8SJason Beloro ms = (mem_stripe_t *)(mb + nmblocks); 15099853d9e8SJason Beloro mc->mc_alloc_sz = 0; 15109853d9e8SJason Beloro } 15119853d9e8SJason Beloro mc->mc_mblocks = mb; 15129853d9e8SJason Beloro mc->mc_stripes = ms; 15139853d9e8SJason Beloro mc->mc_nmblocks = nmblocks; 15149853d9e8SJason Beloro mc->mc_nstripes = nstripes; 15159853d9e8SJason Beloro MPO_DEBUG("mblock_alloc: mblocks: %d\n", nmblocks); 15169853d9e8SJason Beloro return (0); 15179853d9e8SJason Beloro } 15189853d9e8SJason Beloro 15199853d9e8SJason Beloro /* 15209853d9e8SJason Beloro * mblock_free 15219853d9e8SJason Beloro * 15229853d9e8SJason Beloro * Free memory in mc that was allocated by mblock_alloc. 15239853d9e8SJason Beloro */ 15249853d9e8SJason Beloro 15259853d9e8SJason Beloro static void 15269853d9e8SJason Beloro mblock_free(mpo_config_t *mc) 15279853d9e8SJason Beloro { 15289853d9e8SJason Beloro if (mc->mc_alloc_sz > 0) { 15299853d9e8SJason Beloro ASSERT(mc->mc_mblocks != mpo_mblock); 15309853d9e8SJason Beloro kmem_free((caddr_t)mc->mc_mblocks, mc->mc_alloc_sz); 15319853d9e8SJason Beloro } 15329853d9e8SJason Beloro bzero(mc, sizeof (*mc)); 15339853d9e8SJason Beloro } 15349853d9e8SJason Beloro 15359853d9e8SJason Beloro /* 15369853d9e8SJason Beloro * mblock_install 15379853d9e8SJason Beloro * 15389853d9e8SJason Beloro * Install mblock config passed in mc as the global configuration. 15399853d9e8SJason Beloro * May only be called at boot or while holding mpo_wr_lock. 15409853d9e8SJason Beloro */ 15419853d9e8SJason Beloro 15429853d9e8SJason Beloro static void 15439853d9e8SJason Beloro mblock_install(mpo_config_t *mc) 15449853d9e8SJason Beloro { 15459853d9e8SJason Beloro mpo_mblock = mc->mc_mblocks; 15469853d9e8SJason Beloro n_mblocks = mc->mc_nmblocks; 15479853d9e8SJason Beloro mem_stripes = mc->mc_stripes; 15489853d9e8SJason Beloro n_mem_stripes = mc->mc_nstripes; 15499853d9e8SJason Beloro base_ra_to_pa_pfn = btop(mc->mc_mblocks[0].ra_to_pa); 15509853d9e8SJason Beloro mpo_config = *mc; 15519853d9e8SJason Beloro } 15529853d9e8SJason Beloro 15539853d9e8SJason Beloro /* 15549853d9e8SJason Beloro * mblock_update 15559853d9e8SJason Beloro * 15569853d9e8SJason Beloro * Traverse mblocknodes, read the mblock properties from the MD, and 15579853d9e8SJason Beloro * save the mblocks in mc. 15589853d9e8SJason Beloro */ 15599853d9e8SJason Beloro 15609853d9e8SJason Beloro static void 15619853d9e8SJason Beloro mblock_update(mpo_config_t *mc, md_t md, mde_cookie_t *mblocknodes) 15629853d9e8SJason Beloro { 15639853d9e8SJason Beloro uint64_t i, j; 15649853d9e8SJason Beloro int result = 0; 15659853d9e8SJason Beloro mblock_md_t *mblock = mc->mc_mblocks; 15669853d9e8SJason Beloro 15679853d9e8SJason Beloro for (i = 0, j = 0; j < mc->mc_nmblocks; j++) { 15689853d9e8SJason Beloro 15699853d9e8SJason Beloro /* Without a base or size value we will fail */ 15709853d9e8SJason Beloro result = get_int(md, mblocknodes[j], PROP_LG_BASE, 15719853d9e8SJason Beloro &mblock[i].base); 15729853d9e8SJason Beloro if (result < 0) { 15739853d9e8SJason Beloro MPO_STATUS("mblock_update: " 15749853d9e8SJason Beloro "PROP_LG_BASE is missing\n"); 15759853d9e8SJason Beloro mc->mc_nmblocks = 0; 15769853d9e8SJason Beloro return; 15779853d9e8SJason Beloro } 15789853d9e8SJason Beloro 15799853d9e8SJason Beloro result = get_int(md, mblocknodes[j], PROP_LG_SIZE, 15809853d9e8SJason Beloro &mblock[i].size); 15819853d9e8SJason Beloro if (result < 0) { 15829853d9e8SJason Beloro MPO_STATUS("mblock_update: " 15839853d9e8SJason Beloro "PROP_LG_SIZE is missing\n"); 15849853d9e8SJason Beloro mc->mc_nmblocks = 0; 15859853d9e8SJason Beloro return; 15869853d9e8SJason Beloro } 15879853d9e8SJason Beloro 15889853d9e8SJason Beloro result = get_int(md, mblocknodes[j], 15899853d9e8SJason Beloro PROP_LG_RA_PA_OFFSET, &mblock[i].ra_to_pa); 15909853d9e8SJason Beloro 15919853d9e8SJason Beloro /* If we don't have an ra_pa_offset, just set it to 0 */ 15929853d9e8SJason Beloro if (result < 0) 15939853d9e8SJason Beloro mblock[i].ra_to_pa = 0; 15949853d9e8SJason Beloro 15959853d9e8SJason Beloro MPO_DEBUG("mblock[%ld]: base = %lx, size = %lx, " 15969853d9e8SJason Beloro "ra_to_pa = %lx\n", i, 15979853d9e8SJason Beloro mblock[i].base, 15989853d9e8SJason Beloro mblock[i].size, 15999853d9e8SJason Beloro mblock[i].ra_to_pa); 16009853d9e8SJason Beloro 16019853d9e8SJason Beloro /* check for unsupportable values of base and size */ 16029853d9e8SJason Beloro if (mblock[i].base > mblock[i].base + mblock[i].size) { 16039853d9e8SJason Beloro MPO_STATUS("mblock_update: " 16049853d9e8SJason Beloro "PROP_LG_BASE+PROP_LG_SIZE is invalid: " 16059853d9e8SJason Beloro "base = %lx, size = %lx\n", 16069853d9e8SJason Beloro mblock[i].base, mblock[i].size); 16079853d9e8SJason Beloro mc->mc_nmblocks = 0; 16089853d9e8SJason Beloro return; 16099853d9e8SJason Beloro } 16109853d9e8SJason Beloro 16119853d9e8SJason Beloro /* eliminate size==0 blocks */ 16129853d9e8SJason Beloro if (mblock[i].size != 0) { 16139853d9e8SJason Beloro uint64_t base = mblock[i].base; 16149853d9e8SJason Beloro uint64_t end = base + mblock[i].size; 16159853d9e8SJason Beloro ASSERT(end > base); 16169853d9e8SJason Beloro mblock[i].base_pfn = btop(base); 16179853d9e8SJason Beloro mblock[i].end_pfn = btop(end - 1); 16189853d9e8SJason Beloro i++; 16199853d9e8SJason Beloro } 16209853d9e8SJason Beloro } 16219853d9e8SJason Beloro 16229853d9e8SJason Beloro if (i == 0) { 16239853d9e8SJason Beloro MPO_STATUS("mblock_update: " 16249853d9e8SJason Beloro "No non-empty mblock nodes were found " 16259853d9e8SJason Beloro "in the Machine Descriptor\n"); 16269853d9e8SJason Beloro mc->mc_nmblocks = 0; 16279853d9e8SJason Beloro return; 16289853d9e8SJason Beloro } 16299853d9e8SJason Beloro ASSERT(i <= mc->mc_nmblocks); 16309853d9e8SJason Beloro mc->mc_nmblocks = i; 16319853d9e8SJason Beloro 16329853d9e8SJason Beloro /* Must sort mblocks by address for mem_node_iterator_init() */ 16339853d9e8SJason Beloro mblock_sort(mblock, mc->mc_nmblocks); 16349853d9e8SJason Beloro } 16359853d9e8SJason Beloro 16369853d9e8SJason Beloro /* 16379853d9e8SJason Beloro * mblock_update_add 16389853d9e8SJason Beloro * 16399853d9e8SJason Beloro * Update mblock config after a memory DR add. The added range is not 16409853d9e8SJason Beloro * needed, as we read *all* mblock nodes from the MD. Save the mblocks 16419853d9e8SJason Beloro * in mc. 16429853d9e8SJason Beloro */ 16439853d9e8SJason Beloro 16449853d9e8SJason Beloro static void 16459853d9e8SJason Beloro mblock_update_add(mpo_config_t *mc) 16469853d9e8SJason Beloro { 16479853d9e8SJason Beloro md_t *md; 16489853d9e8SJason Beloro mde_cookie_t root, *mblocknodes; 16499853d9e8SJason Beloro int nmblocks = 0; 16509853d9e8SJason Beloro 16519853d9e8SJason Beloro if ((md = md_get_handle()) == NULL) { 16529853d9e8SJason Beloro MPO_STATUS("Cannot access Machine Descriptor\n"); 16539853d9e8SJason Beloro goto error; 16549853d9e8SJason Beloro } 16559853d9e8SJason Beloro 16569853d9e8SJason Beloro if ((root = md_get_root(md)) == MDE_INVAL_ELEM_COOKIE) 16579853d9e8SJason Beloro goto error; 16589853d9e8SJason Beloro 16599853d9e8SJason Beloro nmblocks = md_alloc_scan_dag(md, root, PROP_LG_MBLOCK, "fwd", 16609853d9e8SJason Beloro &mblocknodes); 16619853d9e8SJason Beloro if (nmblocks <= 0) { 16629853d9e8SJason Beloro MPO_STATUS("No mblock nodes detected in Machine Descriptor\n"); 16639853d9e8SJason Beloro goto error; 16649853d9e8SJason Beloro } 16659853d9e8SJason Beloro 16669853d9e8SJason Beloro if (mblock_alloc(mc, U_ADD, nmblocks) < 0) 16679853d9e8SJason Beloro goto error; 16689853d9e8SJason Beloro 16699853d9e8SJason Beloro mblock_update(mc, md, mblocknodes); 16709853d9e8SJason Beloro md_free_scan_dag(md, &mblocknodes); 16719853d9e8SJason Beloro (void) md_fini_handle(md); 16729853d9e8SJason Beloro return; 16739853d9e8SJason Beloro error: 16749853d9e8SJason Beloro panic("mblock_update_add: cannot process mblocks from MD.\n"); 16759853d9e8SJason Beloro } 16769853d9e8SJason Beloro 16779853d9e8SJason Beloro /* 16789853d9e8SJason Beloro * mblock_update_del 16799853d9e8SJason Beloro * 16809853d9e8SJason Beloro * Update mblocks after a memory DR deletion of the range (ubase, uend). 16819853d9e8SJason Beloro * Allocate a new mblock config, copy old config to the new, modify the new 16829853d9e8SJason Beloro * mblocks to reflect the deletion. The new mblocks are returned in 16839853d9e8SJason Beloro * mc_new and are not yet installed as the active config. 16849853d9e8SJason Beloro */ 16859853d9e8SJason Beloro 16869853d9e8SJason Beloro static void 16879853d9e8SJason Beloro mblock_update_del(mpo_config_t *mc_new, mpo_config_t *mc_old, pfn_t ubase, 16889853d9e8SJason Beloro pfn_t uend) 16899853d9e8SJason Beloro { 16909853d9e8SJason Beloro int i, j; 16919853d9e8SJason Beloro pfn_t base, end; 16929853d9e8SJason Beloro mblock_md_t *mblock; 16939853d9e8SJason Beloro int nmblocks = mc_old->mc_nmblocks; 16949853d9e8SJason Beloro 16959853d9e8SJason Beloro MPO_DEBUG("mblock_update_del(0x%lx, 0x%lx)\n", ubase, uend); 16969853d9e8SJason Beloro 16979853d9e8SJason Beloro /* 16989853d9e8SJason Beloro * Allocate mblocks in mc_new and copy the old to the new. 16999853d9e8SJason Beloro * Allocate one extra in case the deletion splits an mblock. 17009853d9e8SJason Beloro */ 17019853d9e8SJason Beloro if (mblock_alloc(mc_new, U_DEL, nmblocks + 1) < 0) 17029853d9e8SJason Beloro return; 17039853d9e8SJason Beloro mblock = mc_new->mc_mblocks; 17049853d9e8SJason Beloro bcopy(mc_old->mc_mblocks, mblock, nmblocks * sizeof (mblock_md_t)); 17059853d9e8SJason Beloro 17069853d9e8SJason Beloro /* 17079853d9e8SJason Beloro * Find the mblock containing the deleted range and adjust it in 17089853d9e8SJason Beloro * the new config. 17099853d9e8SJason Beloro */ 17109853d9e8SJason Beloro for (i = 0; i < nmblocks; i++) { 17119853d9e8SJason Beloro 17129853d9e8SJason Beloro base = btop(mblock[i].base); 17139853d9e8SJason Beloro end = base + btop(mblock[i].size) - 1; 17149853d9e8SJason Beloro 17159853d9e8SJason Beloro /* 17169853d9e8SJason Beloro * Adjust the mblock based on the subset that was deleted. 17179853d9e8SJason Beloro * 17189853d9e8SJason Beloro * If the entire mblk was deleted, compact the table. 17199853d9e8SJason Beloro * 17209853d9e8SJason Beloro * If the middle of the mblk was deleted, extend 17219853d9e8SJason Beloro * the table. Space for the new slot was already 17229853d9e8SJason Beloro * allocated. 17239853d9e8SJason Beloro * 17249853d9e8SJason Beloro * The memory to be deleted is a mblock or a subset of 17259853d9e8SJason Beloro * and does not span multiple mblocks. 17269853d9e8SJason Beloro */ 17279853d9e8SJason Beloro if (base == ubase && end == uend) { 17289853d9e8SJason Beloro for (j = i; j < nmblocks - 1; j++) 17299853d9e8SJason Beloro mblock[j] = mblock[j + 1]; 17309853d9e8SJason Beloro nmblocks--; 17319853d9e8SJason Beloro bzero(&mblock[nmblocks], sizeof (*mblock)); 17329853d9e8SJason Beloro break; 17339853d9e8SJason Beloro } else if (base < ubase && end > uend) { 17349853d9e8SJason Beloro for (j = nmblocks - 1; j >= i; j--) 17359853d9e8SJason Beloro mblock[j + 1] = mblock[j]; 17369853d9e8SJason Beloro mblock[i].size = ptob(ubase - base); 17379853d9e8SJason Beloro mblock[i].end_pfn = ubase - 1; 17389853d9e8SJason Beloro mblock[i + 1].base = ptob(uend + 1); 17399853d9e8SJason Beloro mblock[i + 1].size = ptob(end - uend); 17409853d9e8SJason Beloro mblock[i + 1].base_pfn = uend + 1; 17419853d9e8SJason Beloro nmblocks++; 17429853d9e8SJason Beloro break; 17439853d9e8SJason Beloro } else if (base == ubase) { 17449853d9e8SJason Beloro MPO_DEBUG("mblock_update_del: shrink>" 17459853d9e8SJason Beloro " i=%d base=0x%lx end=0x%lx", i, base, end); 17469853d9e8SJason Beloro mblock[i].base = ptob(uend + 1); 17479853d9e8SJason Beloro mblock[i].size -= ptob(uend - ubase + 1); 17489853d9e8SJason Beloro base = uend + 1; 17499853d9e8SJason Beloro mblock[i].base_pfn = base; 17509853d9e8SJason Beloro mblock[i].end_pfn = end; 17519853d9e8SJason Beloro MPO_DEBUG(" nbase=0x%lx nend=0x%lx\n", base, end); 17529853d9e8SJason Beloro break; 17539853d9e8SJason Beloro } else if (end == uend) { 17549853d9e8SJason Beloro MPO_DEBUG("mblock_update_del: shrink<" 17559853d9e8SJason Beloro " i=%d base=0x%lx end=0x%lx", i, base, end); 17569853d9e8SJason Beloro mblock[i].size -= ptob(uend - ubase + 1); 17579853d9e8SJason Beloro end = ubase - 1; 17589853d9e8SJason Beloro mblock[i].base_pfn = base; 17599853d9e8SJason Beloro mblock[i].end_pfn = end; 17609853d9e8SJason Beloro MPO_DEBUG(" nbase=0x%lx nend=0x%lx\n", base, end); 17619853d9e8SJason Beloro break; 17629853d9e8SJason Beloro } 17639853d9e8SJason Beloro } 17649853d9e8SJason Beloro mc_new->mc_nmblocks = nmblocks; 17659853d9e8SJason Beloro ASSERT(end > base); 17669853d9e8SJason Beloro } 17679853d9e8SJason Beloro 17689853d9e8SJason Beloro /* 17699853d9e8SJason Beloro * mstripe_update 17709853d9e8SJason Beloro * 17719853d9e8SJason Beloro * Read mblocks from mc and update mstripes in mc 17729853d9e8SJason Beloro */ 17739853d9e8SJason Beloro 17749853d9e8SJason Beloro static void 17759853d9e8SJason Beloro mstripe_update(mpo_config_t *mc) 17769853d9e8SJason Beloro { 17779853d9e8SJason Beloro lgrp_handle_t lgrphand, lgrp_start; 17789853d9e8SJason Beloro int i, mnode; 17799853d9e8SJason Beloro uint64_t offset, stripe_end, base, end, ra_to_pa, stride; 17809853d9e8SJason Beloro uint64_t stripe, frag, remove; 17819853d9e8SJason Beloro mem_stripe_t *ms; 17829853d9e8SJason Beloro mblock_md_t *mblock = mc->mc_mblocks; 17839853d9e8SJason Beloro int nmblocks = mc->mc_nmblocks; 17849853d9e8SJason Beloro int mstripesz = MAX_MEM_NODES * nmblocks * sizeof (mem_stripe_t); 17859853d9e8SJason Beloro 17869853d9e8SJason Beloro /* Check for non-MPO sun4v platforms or memory DR removal */ 17879853d9e8SJason Beloro if (n_locality_groups <= 1) { 17889853d9e8SJason Beloro ASSERT(n_locality_groups == 1); 17899853d9e8SJason Beloro ASSERT(max_locality_groups == 1 && max_mem_nodes == 1); 17909853d9e8SJason Beloro 17919853d9e8SJason Beloro if (nmblocks == 1) { 17929853d9e8SJason Beloro mc->mc_nstripes = 0; 17939853d9e8SJason Beloro } else { 17949853d9e8SJason Beloro mc->mc_nstripes = nmblocks; 17959853d9e8SJason Beloro bzero(mc->mc_stripes, mstripesz); 17969853d9e8SJason Beloro for (i = 0; i < nmblocks; i++) { 17979853d9e8SJason Beloro mc->mc_stripes[i].exists = 1; 17989853d9e8SJason Beloro mc->mc_stripes[i].physbase = mblock[i].base_pfn; 17999853d9e8SJason Beloro mc->mc_stripes[i].physmax = mblock[i].end_pfn; 18009853d9e8SJason Beloro } 18019853d9e8SJason Beloro } 18029853d9e8SJason Beloro return; 18039853d9e8SJason Beloro } 18049853d9e8SJason Beloro 18059853d9e8SJason Beloro bzero(mc->mc_stripes, mstripesz); 18069853d9e8SJason Beloro mc->mc_nstripes = max_locality_groups * nmblocks; 18079853d9e8SJason Beloro stripe = ptob(mnode_pages); 18089853d9e8SJason Beloro stride = max_locality_groups * stripe; 18099853d9e8SJason Beloro 18109853d9e8SJason Beloro for (i = 0; i < nmblocks; i++) { 18119853d9e8SJason Beloro base = mblock[i].base; 18129853d9e8SJason Beloro end = base + mblock[i].size; 18139853d9e8SJason Beloro ra_to_pa = mblock[i].ra_to_pa; 18149853d9e8SJason Beloro 18159853d9e8SJason Beloro /* Find the offset from the prev stripe boundary in PA space. */ 18169853d9e8SJason Beloro offset = (base + ra_to_pa) & (stripe - 1); 18179853d9e8SJason Beloro 18189853d9e8SJason Beloro /* Set the next stripe boundary. */ 18199853d9e8SJason Beloro stripe_end = base - offset + stripe; 18209853d9e8SJason Beloro 18219853d9e8SJason Beloro lgrp_start = (((base + ra_to_pa) & home_mask) >> 18229853d9e8SJason Beloro home_mask_shift); 18239853d9e8SJason Beloro lgrphand = lgrp_start; 18249853d9e8SJason Beloro 18259853d9e8SJason Beloro /* 18269853d9e8SJason Beloro * Loop over all lgroups covered by the mblock, creating a 18279853d9e8SJason Beloro * stripe for each. Stop when lgrp_start is visited again. 18289853d9e8SJason Beloro */ 18299853d9e8SJason Beloro do { 18309853d9e8SJason Beloro /* mblock may not span all lgroups */ 18319853d9e8SJason Beloro if (base >= end) 18329853d9e8SJason Beloro break; 18339853d9e8SJason Beloro 18349853d9e8SJason Beloro mnode = lgrphand; 18359853d9e8SJason Beloro ASSERT(mnode < max_mem_nodes); 18369853d9e8SJason Beloro 18379853d9e8SJason Beloro /* 18389853d9e8SJason Beloro * Calculate the size of the fragment that does not 18399853d9e8SJason Beloro * belong to the mnode in the last partial stride. 18409853d9e8SJason Beloro */ 18419853d9e8SJason Beloro frag = (end - (base - offset)) & (stride - 1); 18429853d9e8SJason Beloro if (frag == 0) { 18439853d9e8SJason Beloro /* remove the gap */ 18449853d9e8SJason Beloro remove = stride - stripe; 18459853d9e8SJason Beloro } else if (frag < stripe) { 18469853d9e8SJason Beloro /* fragment fits in stripe; keep it all */ 18479853d9e8SJason Beloro remove = 0; 18489853d9e8SJason Beloro } else { 18499853d9e8SJason Beloro /* fragment is large; trim after whole stripe */ 18509853d9e8SJason Beloro remove = frag - stripe; 18519853d9e8SJason Beloro } 18529853d9e8SJason Beloro 18539853d9e8SJason Beloro ms = &mc->mc_stripes[i * max_locality_groups + mnode]; 18549853d9e8SJason Beloro ms->physbase = btop(base); 18559853d9e8SJason Beloro ms->physmax = btop(end - 1 - remove); 18569853d9e8SJason Beloro ms->offset = btop(offset); 18579853d9e8SJason Beloro ms->exists = 1; 18589853d9e8SJason Beloro 18599853d9e8SJason Beloro base = stripe_end; 18609853d9e8SJason Beloro stripe_end += stripe; 18619853d9e8SJason Beloro offset = 0; 18629853d9e8SJason Beloro lgrphand = (((base + ra_to_pa) & home_mask) >> 18639853d9e8SJason Beloro home_mask_shift); 18649853d9e8SJason Beloro } while (lgrphand != lgrp_start); 18659853d9e8SJason Beloro } 18669853d9e8SJason Beloro } 18679853d9e8SJason Beloro 18689853d9e8SJason Beloro #define INTERSECT(a, b, c, d) \ 18699853d9e8SJason Beloro if (((a) >= (c) && (a) <= (d)) || \ 18709853d9e8SJason Beloro ((c) >= (a) && (c) <= (b))) { \ 18719853d9e8SJason Beloro (c) = MAX((a), (c)); \ 18729853d9e8SJason Beloro (d) = MIN((b), (d)); \ 18739853d9e8SJason Beloro } else { \ 18749853d9e8SJason Beloro ASSERT((a) >= (d) || (b) <= (c)); \ 18759853d9e8SJason Beloro continue; \ 18769853d9e8SJason Beloro } \ 18779853d9e8SJason Beloro 18789853d9e8SJason Beloro /* 18799853d9e8SJason Beloro * mnode_update 18809853d9e8SJason Beloro * 18819853d9e8SJason Beloro * Read stripes from mc and update mnode extents. The mnode extents are 18829853d9e8SJason Beloro * part of the live configuration, so this can only be done at boot time 18839853d9e8SJason Beloro * or while holding the mpo_wr_lock. 18849853d9e8SJason Beloro */ 18859853d9e8SJason Beloro 18869853d9e8SJason Beloro static void 18879853d9e8SJason Beloro mnode_update(mpo_config_t *mc, pfn_t ubase, pfn_t uend, update_t utype) 18889853d9e8SJason Beloro { 18899853d9e8SJason Beloro int i, j, mnode, found; 18909853d9e8SJason Beloro pfn_t base, end; 18919853d9e8SJason Beloro mem_stripe_t *ms; 18929853d9e8SJason Beloro 18939853d9e8SJason Beloro MPO_DEBUG("mnode_udpate: basepfn: %lx endpfn: %lx\n", ubase, uend); 18949853d9e8SJason Beloro 18959853d9e8SJason Beloro if (n_locality_groups <= 1 && mc->mc_nmblocks == 1) { 18969853d9e8SJason Beloro if (utype == U_ADD) 18979853d9e8SJason Beloro mpo_mem_node_add_slice(ubase, uend); 18989853d9e8SJason Beloro else if (utype == U_DEL) 18999853d9e8SJason Beloro mpo_mem_node_del_slice(ubase, uend); 19009853d9e8SJason Beloro else 19019853d9e8SJason Beloro panic("mnode update: %d: invalid\n", utype); 19029853d9e8SJason Beloro return; 19039853d9e8SJason Beloro } 19049853d9e8SJason Beloro 19059853d9e8SJason Beloro found = 0; 19069853d9e8SJason Beloro for (i = 0; i < mc->mc_nmblocks; i++) { 19079853d9e8SJason Beloro for (mnode = 0; mnode < max_locality_groups; mnode++) { 19089853d9e8SJason Beloro 19099853d9e8SJason Beloro j = i * max_locality_groups + mnode; 19109853d9e8SJason Beloro ms = &mc->mc_stripes[j]; 19119853d9e8SJason Beloro if (!ms->exists) 19129853d9e8SJason Beloro continue; 19139853d9e8SJason Beloro 19149853d9e8SJason Beloro base = ms->physbase; 19159853d9e8SJason Beloro end = ms->physmax; 19169853d9e8SJason Beloro 19179853d9e8SJason Beloro /* 19189853d9e8SJason Beloro * Look for the mstripes intersecting this slice. 19199853d9e8SJason Beloro * 19209853d9e8SJason Beloro * The mstripe and slice pairs may not be equal 19219853d9e8SJason Beloro * if a subset of a mblock is added/deleted. 19229853d9e8SJason Beloro */ 19239853d9e8SJason Beloro switch (utype) { 19249853d9e8SJason Beloro case U_ADD: 19259853d9e8SJason Beloro INTERSECT(ubase, uend, base, end); 19269853d9e8SJason Beloro /*FALLTHROUGH*/ 19279853d9e8SJason Beloro case U_ADD_ALL: 19289853d9e8SJason Beloro if (n_locality_groups > 1) 19299853d9e8SJason Beloro mpo_plat_assign_lgrphand_to_mem_node( 19309853d9e8SJason Beloro mnode, mnode); 19319853d9e8SJason Beloro mpo_mem_node_add_slice(base, end); 19329853d9e8SJason Beloro break; 19339853d9e8SJason Beloro case U_DEL: 19349853d9e8SJason Beloro INTERSECT(ubase, uend, base, end); 19359853d9e8SJason Beloro mpo_mem_node_del_slice(base, end); 19369853d9e8SJason Beloro break; 19379853d9e8SJason Beloro default: 19389853d9e8SJason Beloro panic("mnode_update: %d: invalid\n", utype); 19399853d9e8SJason Beloro break; 19409853d9e8SJason Beloro } 19419853d9e8SJason Beloro 19429853d9e8SJason Beloro found++; 19439853d9e8SJason Beloro } 19449853d9e8SJason Beloro } 19459853d9e8SJason Beloro 19469853d9e8SJason Beloro if (!found) 19479853d9e8SJason Beloro panic("mnode_update: mstripe not found"); 19489853d9e8SJason Beloro 19499853d9e8SJason Beloro #ifdef DEBUG 19509853d9e8SJason Beloro if (utype == U_ADD_ALL || utype == U_DEL) 19519853d9e8SJason Beloro return; 19529853d9e8SJason Beloro found = 0; 19539853d9e8SJason Beloro for (i = 0; i < max_mem_nodes; i++) { 19549853d9e8SJason Beloro if (!mem_node_config[i].exists) 19559853d9e8SJason Beloro continue; 19569853d9e8SJason Beloro if (ubase >= mem_node_config[i].physbase && 19579853d9e8SJason Beloro ubase <= mem_node_config[i].physmax) 19589853d9e8SJason Beloro found |= 1; 19599853d9e8SJason Beloro if (uend >= mem_node_config[i].physbase && 19609853d9e8SJason Beloro uend <= mem_node_config[i].physmax) 19619853d9e8SJason Beloro found |= 2; 19629853d9e8SJason Beloro } 19639853d9e8SJason Beloro ASSERT(found == 3); 19649853d9e8SJason Beloro { 19659853d9e8SJason Beloro pfn_t minpfn, maxpfn; 19669853d9e8SJason Beloro 19679853d9e8SJason Beloro mem_node_max_range(&minpfn, &maxpfn); 19689853d9e8SJason Beloro ASSERT(minpfn <= ubase); 19699853d9e8SJason Beloro ASSERT(maxpfn >= uend); 19709853d9e8SJason Beloro } 19719853d9e8SJason Beloro #endif 19729853d9e8SJason Beloro } 19739853d9e8SJason Beloro 19749853d9e8SJason Beloro /* 19759853d9e8SJason Beloro * Plat_slice_add()/plat_slice_del() are the platform hooks 19769853d9e8SJason Beloro * for adding/deleting a pfn range to/from the system. 19779853d9e8SJason Beloro * 19789853d9e8SJason Beloro * Platform_slice_add() is used for both boot/DR cases. 19799853d9e8SJason Beloro * 19809853d9e8SJason Beloro * - Zeus has already added the mblocks to the MD, so read the updated 19819853d9e8SJason Beloro * MD and allocate all data structures required to manage the new memory 19829853d9e8SJason Beloro * configuration. 19839853d9e8SJason Beloro * 19849853d9e8SJason Beloro * - Recompute the stripes which are derived from the mblocks. 19859853d9e8SJason Beloro * 19869853d9e8SJason Beloro * - Update (expand) the mnode extents and install the modified mblocks as 19879853d9e8SJason Beloro * the new mpo config. This must be done while holding the mpo_wr_lock 19889853d9e8SJason Beloro * to guarantee that no other threads access the mpo meta-data. 19899853d9e8SJason Beloro * 19909853d9e8SJason Beloro * - Unlock MPO data structures; the new config is live. Free the old config. 19919853d9e8SJason Beloro * 19929853d9e8SJason Beloro * Plat_slice_del() is used for DR only. 19939853d9e8SJason Beloro * 19949853d9e8SJason Beloro * - Zeus has not yet modified the MD to reflect the deletion, so copy 19959853d9e8SJason Beloro * the old mpo mblocks and delete the range from the copy. 19969853d9e8SJason Beloro * 19979853d9e8SJason Beloro * - Recompute the stripes which are derived from the mblocks. 19989853d9e8SJason Beloro * 19999853d9e8SJason Beloro * - Update (shrink) the mnode extents and install the modified mblocks as 20009853d9e8SJason Beloro * the new mpo config. This must be done while holding the mpo_wr_lock 20019853d9e8SJason Beloro * to guarantee that no other threads access the mpo meta-data. 20029853d9e8SJason Beloro * 20039853d9e8SJason Beloro * - Unlock MPO data structures; the new config is live. Free the old config. 20049853d9e8SJason Beloro */ 20059853d9e8SJason Beloro 20069853d9e8SJason Beloro void 20079853d9e8SJason Beloro plat_slice_add(pfn_t base, pfn_t end) 20089853d9e8SJason Beloro { 20099853d9e8SJason Beloro mpo_config_t old_config = mpo_config; 20109853d9e8SJason Beloro mpo_config_t new_config; 20119853d9e8SJason Beloro 20129853d9e8SJason Beloro VALIDATE_SLICE(base, end); 20139853d9e8SJason Beloro mblock_update_add(&new_config); 20149853d9e8SJason Beloro mstripe_update(&new_config); 20159853d9e8SJason Beloro mpo_wr_lock(); 20169853d9e8SJason Beloro mblock_install(&new_config); 20179853d9e8SJason Beloro /* Use new config to add all ranges for mnode_update */ 20189853d9e8SJason Beloro mnode_update(&new_config, base, end, U_ADD); 20199853d9e8SJason Beloro mpo_genid++; 20209853d9e8SJason Beloro mpo_wr_unlock(); 20219853d9e8SJason Beloro mblock_free(&old_config); 20229853d9e8SJason Beloro } 20239853d9e8SJason Beloro 20249853d9e8SJason Beloro void 20259853d9e8SJason Beloro plat_slice_del(pfn_t base, pfn_t end) 20269853d9e8SJason Beloro { 20279853d9e8SJason Beloro mpo_config_t old_config = mpo_config; 20289853d9e8SJason Beloro mpo_config_t new_config; 20299853d9e8SJason Beloro 20309853d9e8SJason Beloro VALIDATE_SLICE(base, end); 20319853d9e8SJason Beloro mblock_update_del(&new_config, &old_config, base, end); 20329853d9e8SJason Beloro mstripe_update(&new_config); 20339853d9e8SJason Beloro mpo_wr_lock(); 20349853d9e8SJason Beloro /* Use old config to find deleted range for mnode_update */ 20359853d9e8SJason Beloro mnode_update(&old_config, base, end, U_DEL); 20369853d9e8SJason Beloro mblock_install(&new_config); 20379853d9e8SJason Beloro mpo_genid++; 20389853d9e8SJason Beloro mpo_wr_unlock(); 20399853d9e8SJason Beloro mblock_free(&old_config); 20409853d9e8SJason Beloro } 2041