17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 77c478bd9Sstevel@tonic-gate * with the License. 87c478bd9Sstevel@tonic-gate * 97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 127c478bd9Sstevel@tonic-gate * and limitations under the License. 137c478bd9Sstevel@tonic-gate * 147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 197c478bd9Sstevel@tonic-gate * 207c478bd9Sstevel@tonic-gate * CDDL HEADER END 217c478bd9Sstevel@tonic-gate */ 227c478bd9Sstevel@tonic-gate /* 237c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> /* for {in,out}{b,w,l}() */ 317c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 327c478bd9Sstevel@tonic-gate #include <sys/cpupart.h> 337c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 347c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 357c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 367c478bd9Sstevel@tonic-gate #include <sys/memlist.h> 377c478bd9Sstevel@tonic-gate #include <sys/memnode.h> 387c478bd9Sstevel@tonic-gate #include <sys/mman.h> 397c478bd9Sstevel@tonic-gate #include <sys/pci_impl.h> /* for PCI configuration space macros */ 407c478bd9Sstevel@tonic-gate #include <sys/param.h> 417c478bd9Sstevel@tonic-gate #include <sys/promif.h> /* for prom_printf() */ 427c478bd9Sstevel@tonic-gate #include <sys/systm.h> 437c478bd9Sstevel@tonic-gate #include <sys/thread.h> 447c478bd9Sstevel@tonic-gate #include <sys/types.h> 457c478bd9Sstevel@tonic-gate #include <sys/var.h> 467c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h> /* for x86_feature and X86_AMD */ 477c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h> 487c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 497c478bd9Sstevel@tonic-gate 507c478bd9Sstevel@tonic-gate 517c478bd9Sstevel@tonic-gate 527c478bd9Sstevel@tonic-gate /* 537c478bd9Sstevel@tonic-gate * lgroup platform support for x86 platforms. 547c478bd9Sstevel@tonic-gate */ 557c478bd9Sstevel@tonic-gate 567c478bd9Sstevel@tonic-gate #define MAX_NODES 8 577c478bd9Sstevel@tonic-gate #define NLGRP (MAX_NODES * (MAX_NODES - 1) + 1) 587c478bd9Sstevel@tonic-gate 597c478bd9Sstevel@tonic-gate #define LGRP_PLAT_CPU_TO_NODE(cpu) (chip_plat_get_chipid(cpu)) 607c478bd9Sstevel@tonic-gate 617c478bd9Sstevel@tonic-gate #define LGRP_PLAT_PROBE_NROUNDS 64 /* default laps for probing */ 627c478bd9Sstevel@tonic-gate #define LGRP_PLAT_PROBE_NSAMPLES 1 /* default samples to take */ 637c478bd9Sstevel@tonic-gate 647c478bd9Sstevel@tonic-gate 657c478bd9Sstevel@tonic-gate /* 667c478bd9Sstevel@tonic-gate * Multiprocessor Opteron machines have Non Uniform Memory Access (NUMA). 677c478bd9Sstevel@tonic-gate * 687c478bd9Sstevel@tonic-gate * Until System Affinity Resource Table (SRAT) becomes part of ACPI standard, 697c478bd9Sstevel@tonic-gate * we need to examine registers in PCI configuration space to determine how 707c478bd9Sstevel@tonic-gate * many nodes are in the system and which CPUs and memory are in each node. 717c478bd9Sstevel@tonic-gate * This could be determined by probing all memory from each CPU, but that is 727c478bd9Sstevel@tonic-gate * too expensive to do while booting the kernel. 737c478bd9Sstevel@tonic-gate * 747c478bd9Sstevel@tonic-gate * NOTE: Using these PCI configuration space registers to determine this 757c478bd9Sstevel@tonic-gate * locality info is Opteron K8 specific and not guaranteed to work on 767c478bd9Sstevel@tonic-gate * the next generation Opteron processor. Furthermore, we assume that 777c478bd9Sstevel@tonic-gate * there is one CPU per node and CPU 0 is in node 0, CPU 1 is in node 1, 787c478bd9Sstevel@tonic-gate * etc. which should be true for Opteron K8.... 797c478bd9Sstevel@tonic-gate */ 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate /* 827c478bd9Sstevel@tonic-gate * Opteron DRAM Address Map in PCI configuration space gives base and limit 837c478bd9Sstevel@tonic-gate * of physical memory in each node for Opteron K8. The following constants 847c478bd9Sstevel@tonic-gate * and macros define their contents, structure, and access. 857c478bd9Sstevel@tonic-gate */ 867c478bd9Sstevel@tonic-gate 877c478bd9Sstevel@tonic-gate /* 887c478bd9Sstevel@tonic-gate * How many bits to shift Opteron DRAM Address Map base and limit registers 897c478bd9Sstevel@tonic-gate * to get actual value 907c478bd9Sstevel@tonic-gate */ 917c478bd9Sstevel@tonic-gate #define OPT_DRAMADDR_LSHIFT_ADDR 8 /* shift left for address */ 927c478bd9Sstevel@tonic-gate 937c478bd9Sstevel@tonic-gate #define OPT_DRAMADDR_MASK_OFF 0xFFFFFF /* offset for address */ 947c478bd9Sstevel@tonic-gate 957c478bd9Sstevel@tonic-gate /* 967c478bd9Sstevel@tonic-gate * Bit masks defining what's in Opteron DRAM Address Map base register 977c478bd9Sstevel@tonic-gate */ 987c478bd9Sstevel@tonic-gate #define OPT_DRAMBASE_MASK_RE 0x1 /* read enable */ 997c478bd9Sstevel@tonic-gate #define OPT_DRAMBASE_MASK_WE 0x2 /* write enable */ 1007c478bd9Sstevel@tonic-gate #define OPT_DRAMBASE_MASK_INTRLVEN 0x700 /* interleave */ 1017c478bd9Sstevel@tonic-gate 1027c478bd9Sstevel@tonic-gate #define OPT_DRAMBASE_MASK_ADDR 0xFFFF0000 /* address bits 39-24 */ 1037c478bd9Sstevel@tonic-gate 1047c478bd9Sstevel@tonic-gate /* 1057c478bd9Sstevel@tonic-gate * Macros to get values from Opteron DRAM Address Map base register 1067c478bd9Sstevel@tonic-gate */ 1077c478bd9Sstevel@tonic-gate #define OPT_DRAMBASE(reg) \ 1087c478bd9Sstevel@tonic-gate (((u_longlong_t)reg & OPT_DRAMBASE_MASK_ADDR) << \ 1097c478bd9Sstevel@tonic-gate OPT_DRAMADDR_LSHIFT_ADDR) 1107c478bd9Sstevel@tonic-gate 1117c478bd9Sstevel@tonic-gate 1127c478bd9Sstevel@tonic-gate /* 1137c478bd9Sstevel@tonic-gate * Bit masks defining what's in Opteron DRAM Address Map limit register 1147c478bd9Sstevel@tonic-gate */ 1157c478bd9Sstevel@tonic-gate #define OPT_DRAMLIMIT_MASK_DSTNODE 0x7 /* destination node */ 1167c478bd9Sstevel@tonic-gate #define OPT_DRAMLIMIT_MASK_INTRLVSEL 0x70 /* interleave select */ 1177c478bd9Sstevel@tonic-gate #define OPT_DRAMLIMIT_MASK_ADDR 0xFFFF0000 /* addr bits 39-24 */ 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate /* 1207c478bd9Sstevel@tonic-gate * Macros to get values from Opteron DRAM Address Map limit register 1217c478bd9Sstevel@tonic-gate */ 1227c478bd9Sstevel@tonic-gate #define OPT_DRAMLIMIT(reg) \ 1237c478bd9Sstevel@tonic-gate (((u_longlong_t)reg & OPT_DRAMLIMIT_MASK_ADDR) << \ 1247c478bd9Sstevel@tonic-gate OPT_DRAMADDR_LSHIFT_ADDR) 1257c478bd9Sstevel@tonic-gate 1267c478bd9Sstevel@tonic-gate 1277c478bd9Sstevel@tonic-gate /* 1287c478bd9Sstevel@tonic-gate * Opteron Node ID register in PCI configuration space contains 1297c478bd9Sstevel@tonic-gate * number of nodes in system, etc. for Opteron K8. The following 1307c478bd9Sstevel@tonic-gate * constants and macros define its contents, structure, and access. 1317c478bd9Sstevel@tonic-gate */ 1327c478bd9Sstevel@tonic-gate 1337c478bd9Sstevel@tonic-gate /* 1347c478bd9Sstevel@tonic-gate * Bit masks defining what's in Opteron Node ID register 1357c478bd9Sstevel@tonic-gate */ 1367c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_ID 0x7 /* node ID */ 1377c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_CNT 0x70 /* node count */ 1387c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_IONODE 0x700 /* Hypertransport I/O hub node ID */ 1397c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_LCKNODE 0x7000 /* lock controller node ID */ 1407c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_CPUCNT 0xF0000 /* CPUs in system (0 means 1 CPU) */ 1417c478bd9Sstevel@tonic-gate 1427c478bd9Sstevel@tonic-gate /* 1437c478bd9Sstevel@tonic-gate * How many bits in Opteron Node ID register to shift right to get actual value 1447c478bd9Sstevel@tonic-gate */ 1457c478bd9Sstevel@tonic-gate #define OPT_NODE_RSHIFT_CNT 0x4 /* shift right for node count value */ 1467c478bd9Sstevel@tonic-gate 1477c478bd9Sstevel@tonic-gate /* 1487c478bd9Sstevel@tonic-gate * Macros to get values from Opteron Node ID register 1497c478bd9Sstevel@tonic-gate */ 1507c478bd9Sstevel@tonic-gate #define OPT_NODE_CNT(reg) \ 1517c478bd9Sstevel@tonic-gate ((reg & OPT_NODE_MASK_CNT) >> OPT_NODE_RSHIFT_CNT) 1527c478bd9Sstevel@tonic-gate 1537c478bd9Sstevel@tonic-gate 1547c478bd9Sstevel@tonic-gate /* 1557c478bd9Sstevel@tonic-gate * PCI configuration space registers accessed by specifying 1567c478bd9Sstevel@tonic-gate * a bus, device, function, and offset. The following constants 1577c478bd9Sstevel@tonic-gate * define the values needed to access Opteron K8 configuration 1587c478bd9Sstevel@tonic-gate * info to determine its node topology 1597c478bd9Sstevel@tonic-gate */ 1607c478bd9Sstevel@tonic-gate 1617c478bd9Sstevel@tonic-gate #define OPT_PCS_BUS_CONFIG 0 /* Hypertransport config space bus */ 1627c478bd9Sstevel@tonic-gate 1637c478bd9Sstevel@tonic-gate /* 1647c478bd9Sstevel@tonic-gate * Opteron PCI configuration space register function values 1657c478bd9Sstevel@tonic-gate */ 1667c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_HT 0 /* Hypertransport configuration */ 1677c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_ADDRMAP 1 /* Address map configuration */ 1687c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_DRAM 2 /* DRAM configuration */ 1697c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_MISC 3 /* Miscellaneous configuration */ 1707c478bd9Sstevel@tonic-gate 1717c478bd9Sstevel@tonic-gate /* 1727c478bd9Sstevel@tonic-gate * PCI Configuration Space register offsets 1737c478bd9Sstevel@tonic-gate */ 1747c478bd9Sstevel@tonic-gate #define OPT_PCS_OFF_VENDOR 0x0 /* device/vendor ID register */ 1757c478bd9Sstevel@tonic-gate #define OPT_PCS_OFF_DRAMBASE 0x40 /* DRAM Base register (node 0) */ 1767c478bd9Sstevel@tonic-gate #define OPT_PCS_OFF_NODEID 0x60 /* Node ID register */ 1777c478bd9Sstevel@tonic-gate 1787c478bd9Sstevel@tonic-gate /* 1797c478bd9Sstevel@tonic-gate * Opteron PCI Configuration Space device IDs for nodes 1807c478bd9Sstevel@tonic-gate */ 1817c478bd9Sstevel@tonic-gate #define OPT_PCS_DEV_NODE0 24 /* device number for node 0 */ 1827c478bd9Sstevel@tonic-gate 1837c478bd9Sstevel@tonic-gate 1847c478bd9Sstevel@tonic-gate /* 1857c478bd9Sstevel@tonic-gate * Bookkeeping for latencies seen during probing (used for verification) 1867c478bd9Sstevel@tonic-gate */ 1877c478bd9Sstevel@tonic-gate typedef struct lgrp_plat_latency_acct { 1887c478bd9Sstevel@tonic-gate hrtime_t la_value; /* latency value */ 1897c478bd9Sstevel@tonic-gate int la_count; /* occurrences */ 1907c478bd9Sstevel@tonic-gate } lgrp_plat_latency_acct_t; 1917c478bd9Sstevel@tonic-gate 1927c478bd9Sstevel@tonic-gate 1937c478bd9Sstevel@tonic-gate /* 1947c478bd9Sstevel@tonic-gate * Choices for probing to determine lgroup topology 1957c478bd9Sstevel@tonic-gate */ 1967c478bd9Sstevel@tonic-gate typedef enum lgrp_plat_probe_op { 1977c478bd9Sstevel@tonic-gate LGRP_PLAT_PROBE_PGCPY, /* Use page copy */ 1987c478bd9Sstevel@tonic-gate LGRP_PLAT_PROBE_VENDOR /* Read vendor ID on Northbridge */ 1997c478bd9Sstevel@tonic-gate } lgrp_plat_probe_op_t; 2007c478bd9Sstevel@tonic-gate 2017c478bd9Sstevel@tonic-gate 2027c478bd9Sstevel@tonic-gate /* 2037c478bd9Sstevel@tonic-gate * Opteron DRAM address map gives base and limit for physical memory in a node 2047c478bd9Sstevel@tonic-gate */ 2057c478bd9Sstevel@tonic-gate typedef struct opt_dram_addr_map { 2067c478bd9Sstevel@tonic-gate uint32_t base; 2077c478bd9Sstevel@tonic-gate uint32_t limit; 2087c478bd9Sstevel@tonic-gate } opt_dram_addr_map_t; 2097c478bd9Sstevel@tonic-gate 2107c478bd9Sstevel@tonic-gate 2117c478bd9Sstevel@tonic-gate /* 2127c478bd9Sstevel@tonic-gate * Starting and ending page for physical memory in node 2137c478bd9Sstevel@tonic-gate */ 2147c478bd9Sstevel@tonic-gate typedef struct phys_addr_map { 2157c478bd9Sstevel@tonic-gate pfn_t start; 2167c478bd9Sstevel@tonic-gate pfn_t end; 2177c478bd9Sstevel@tonic-gate } phys_addr_map_t; 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate 2207c478bd9Sstevel@tonic-gate /* 2217c478bd9Sstevel@tonic-gate * Opteron DRAM address map for each node 2227c478bd9Sstevel@tonic-gate */ 2237c478bd9Sstevel@tonic-gate struct opt_dram_addr_map opt_dram_map[MAX_NODES]; 2247c478bd9Sstevel@tonic-gate 2257c478bd9Sstevel@tonic-gate /* 2267c478bd9Sstevel@tonic-gate * Node ID register contents for each node 2277c478bd9Sstevel@tonic-gate */ 2287c478bd9Sstevel@tonic-gate uint_t opt_node_info[MAX_NODES]; 2297c478bd9Sstevel@tonic-gate 2307c478bd9Sstevel@tonic-gate /* 2317c478bd9Sstevel@tonic-gate * Whether memory is interleaved across nodes causing MPO to be disabled 2327c478bd9Sstevel@tonic-gate */ 2337c478bd9Sstevel@tonic-gate int lgrp_plat_mem_intrlv = 0; 2347c478bd9Sstevel@tonic-gate 2357c478bd9Sstevel@tonic-gate /* 2367c478bd9Sstevel@tonic-gate * Number of nodes in system 2377c478bd9Sstevel@tonic-gate */ 2387c478bd9Sstevel@tonic-gate uint_t lgrp_plat_node_cnt = 1; 2397c478bd9Sstevel@tonic-gate 2407c478bd9Sstevel@tonic-gate /* 2417c478bd9Sstevel@tonic-gate * Physical address range for memory in each node 2427c478bd9Sstevel@tonic-gate */ 2437c478bd9Sstevel@tonic-gate phys_addr_map_t lgrp_plat_node_memory[MAX_NODES]; 2447c478bd9Sstevel@tonic-gate 2457c478bd9Sstevel@tonic-gate /* 2467c478bd9Sstevel@tonic-gate * Probe costs (individual and total) and flush cost 2477c478bd9Sstevel@tonic-gate */ 2487c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_flush_cost = 0; 2497c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_cost = 0; 2507c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_cost_total = 0; 2517c478bd9Sstevel@tonic-gate 2527c478bd9Sstevel@tonic-gate /* 2537c478bd9Sstevel@tonic-gate * Error code for latency adjustment and verification 2547c478bd9Sstevel@tonic-gate */ 2557c478bd9Sstevel@tonic-gate int lgrp_plat_probe_error_code = 0; 2567c478bd9Sstevel@tonic-gate 2577c478bd9Sstevel@tonic-gate /* 2587c478bd9Sstevel@tonic-gate * How much latencies were off from minimum values gotten 2597c478bd9Sstevel@tonic-gate */ 2607c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_errors[MAX_NODES][MAX_NODES]; 2617c478bd9Sstevel@tonic-gate 2627c478bd9Sstevel@tonic-gate /* 2637c478bd9Sstevel@tonic-gate * Unique probe latencies and number of occurrences of each 2647c478bd9Sstevel@tonic-gate */ 2657c478bd9Sstevel@tonic-gate lgrp_plat_latency_acct_t lgrp_plat_probe_lat_acct[MAX_NODES]; 2667c478bd9Sstevel@tonic-gate 2677c478bd9Sstevel@tonic-gate /* 2687c478bd9Sstevel@tonic-gate * Size of memory buffer in each node for probing 2697c478bd9Sstevel@tonic-gate */ 2707c478bd9Sstevel@tonic-gate size_t lgrp_plat_probe_memsize = 0; 2717c478bd9Sstevel@tonic-gate 2727c478bd9Sstevel@tonic-gate /* 2737c478bd9Sstevel@tonic-gate * Virtual address of page in each node for probing 2747c478bd9Sstevel@tonic-gate */ 2757c478bd9Sstevel@tonic-gate caddr_t lgrp_plat_probe_memory[MAX_NODES]; 2767c478bd9Sstevel@tonic-gate 2777c478bd9Sstevel@tonic-gate /* 2787c478bd9Sstevel@tonic-gate * Number of unique latencies in probe times 2797c478bd9Sstevel@tonic-gate */ 2807c478bd9Sstevel@tonic-gate int lgrp_plat_probe_nlatencies = 0; 2817c478bd9Sstevel@tonic-gate 2827c478bd9Sstevel@tonic-gate /* 2837c478bd9Sstevel@tonic-gate * How many rounds of probing to do 2847c478bd9Sstevel@tonic-gate */ 2857c478bd9Sstevel@tonic-gate int lgrp_plat_probe_nrounds = LGRP_PLAT_PROBE_NROUNDS; 2867c478bd9Sstevel@tonic-gate 2877c478bd9Sstevel@tonic-gate /* 2887c478bd9Sstevel@tonic-gate * Number of samples to take when probing each node 2897c478bd9Sstevel@tonic-gate */ 2907c478bd9Sstevel@tonic-gate int lgrp_plat_probe_nsamples = LGRP_PLAT_PROBE_NSAMPLES; 2917c478bd9Sstevel@tonic-gate 2927c478bd9Sstevel@tonic-gate /* 2937c478bd9Sstevel@tonic-gate * How to probe to determine lgroup topology 2947c478bd9Sstevel@tonic-gate */ 2957c478bd9Sstevel@tonic-gate lgrp_plat_probe_op_t lgrp_plat_probe_op = LGRP_PLAT_PROBE_VENDOR; 2967c478bd9Sstevel@tonic-gate 2977c478bd9Sstevel@tonic-gate /* 2987c478bd9Sstevel@tonic-gate * PFN of page in each node for probing 2997c478bd9Sstevel@tonic-gate */ 3007c478bd9Sstevel@tonic-gate pfn_t lgrp_plat_probe_pfn[MAX_NODES]; 3017c478bd9Sstevel@tonic-gate 3027c478bd9Sstevel@tonic-gate /* 3037c478bd9Sstevel@tonic-gate * Whether probe time was suspect (ie. not within tolerance of value that it 3047c478bd9Sstevel@tonic-gate * should match) 3057c478bd9Sstevel@tonic-gate */ 3067c478bd9Sstevel@tonic-gate int lgrp_plat_probe_suspect[MAX_NODES][MAX_NODES]; 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate /* 3097c478bd9Sstevel@tonic-gate * How long it takes to access memory from each node 3107c478bd9Sstevel@tonic-gate */ 3117c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_times[MAX_NODES][MAX_NODES]; 3127c478bd9Sstevel@tonic-gate 3137c478bd9Sstevel@tonic-gate /* 3147c478bd9Sstevel@tonic-gate * Min and max node memory probe times seen 3157c478bd9Sstevel@tonic-gate */ 3167c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_time_max = 0; 3177c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_time_min = -1; 3187c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_max[MAX_NODES][MAX_NODES]; 3197c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_min[MAX_NODES][MAX_NODES]; 3207c478bd9Sstevel@tonic-gate 3217c478bd9Sstevel@tonic-gate 3227c478bd9Sstevel@tonic-gate /* 3237c478bd9Sstevel@tonic-gate * Allocate lgrp and lgrp stat arrays statically. 3247c478bd9Sstevel@tonic-gate */ 3257c478bd9Sstevel@tonic-gate static lgrp_t lgrp_space[NLGRP]; 3267c478bd9Sstevel@tonic-gate static int nlgrps_alloc; 3277c478bd9Sstevel@tonic-gate 3287c478bd9Sstevel@tonic-gate struct lgrp_stats lgrp_stats[NLGRP]; 3297c478bd9Sstevel@tonic-gate 3307c478bd9Sstevel@tonic-gate #define CPUID_FAMILY_OPTERON 15 3317c478bd9Sstevel@tonic-gate 3327c478bd9Sstevel@tonic-gate uint_t opt_family = 0; 3337c478bd9Sstevel@tonic-gate uint_t opt_model = 0; 3347c478bd9Sstevel@tonic-gate uint_t opt_probe_func = OPT_PCS_FUNC_DRAM; 3357c478bd9Sstevel@tonic-gate 3367c478bd9Sstevel@tonic-gate 3377c478bd9Sstevel@tonic-gate /* 3387c478bd9Sstevel@tonic-gate * Determine whether we're running on an AMD Opteron K8 machine 3397c478bd9Sstevel@tonic-gate */ 3407c478bd9Sstevel@tonic-gate int 3417c478bd9Sstevel@tonic-gate is_opteron(void) 3427c478bd9Sstevel@tonic-gate { 3437c478bd9Sstevel@tonic-gate if (x86_vendor != X86_VENDOR_AMD) 3447c478bd9Sstevel@tonic-gate return (0); 3457c478bd9Sstevel@tonic-gate 3467c478bd9Sstevel@tonic-gate if (cpuid_getfamily(CPU) == CPUID_FAMILY_OPTERON) 3477c478bd9Sstevel@tonic-gate return (1); 3487c478bd9Sstevel@tonic-gate else 3497c478bd9Sstevel@tonic-gate return (0); 3507c478bd9Sstevel@tonic-gate } 3517c478bd9Sstevel@tonic-gate 3527c478bd9Sstevel@tonic-gate int 3537c478bd9Sstevel@tonic-gate plat_lgrphand_to_mem_node(lgrp_handle_t hand) 3547c478bd9Sstevel@tonic-gate { 3557c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 3567c478bd9Sstevel@tonic-gate return (0); 3577c478bd9Sstevel@tonic-gate 3587c478bd9Sstevel@tonic-gate return ((int)hand); 3597c478bd9Sstevel@tonic-gate } 3607c478bd9Sstevel@tonic-gate 3617c478bd9Sstevel@tonic-gate lgrp_handle_t 3627c478bd9Sstevel@tonic-gate plat_mem_node_to_lgrphand(int mnode) 3637c478bd9Sstevel@tonic-gate { 3647c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 3657c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 3667c478bd9Sstevel@tonic-gate 3677c478bd9Sstevel@tonic-gate return ((lgrp_handle_t)mnode); 3687c478bd9Sstevel@tonic-gate } 3697c478bd9Sstevel@tonic-gate 3707c478bd9Sstevel@tonic-gate int 3717c478bd9Sstevel@tonic-gate plat_pfn_to_mem_node(pfn_t pfn) 3727c478bd9Sstevel@tonic-gate { 3737c478bd9Sstevel@tonic-gate int node; 3747c478bd9Sstevel@tonic-gate 3757c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 3767c478bd9Sstevel@tonic-gate return (0); 3777c478bd9Sstevel@tonic-gate 3787c478bd9Sstevel@tonic-gate for (node = 0; node < lgrp_plat_node_cnt; node++) { 3797c478bd9Sstevel@tonic-gate if (pfn >= lgrp_plat_node_memory[node].start && 3807c478bd9Sstevel@tonic-gate pfn <= lgrp_plat_node_memory[node].end) 3817c478bd9Sstevel@tonic-gate return (node); 3827c478bd9Sstevel@tonic-gate } 3837c478bd9Sstevel@tonic-gate 3847c478bd9Sstevel@tonic-gate ASSERT(node < lgrp_plat_node_cnt); 3857c478bd9Sstevel@tonic-gate return (-1); 3867c478bd9Sstevel@tonic-gate } 3877c478bd9Sstevel@tonic-gate 3887c478bd9Sstevel@tonic-gate /* 3897c478bd9Sstevel@tonic-gate * Configure memory nodes for machines with more than one node (ie NUMA) 3907c478bd9Sstevel@tonic-gate */ 3917c478bd9Sstevel@tonic-gate void 3927c478bd9Sstevel@tonic-gate plat_build_mem_nodes(struct memlist *list) 3937c478bd9Sstevel@tonic-gate { 3947c478bd9Sstevel@tonic-gate pfn_t cur_start, cur_end; /* start & end addr of subrange */ 3957c478bd9Sstevel@tonic-gate pfn_t start, end; /* start & end addr of whole range */ 3967c478bd9Sstevel@tonic-gate 3977c478bd9Sstevel@tonic-gate /* 3987c478bd9Sstevel@tonic-gate * Boot install lists are arranged <addr, len>, ... 3997c478bd9Sstevel@tonic-gate */ 4007c478bd9Sstevel@tonic-gate while (list) { 4017c478bd9Sstevel@tonic-gate int node; 4027c478bd9Sstevel@tonic-gate 4037c478bd9Sstevel@tonic-gate start = list->address >> PAGESHIFT; 4047c478bd9Sstevel@tonic-gate end = (list->address + list->size - 1) >> PAGESHIFT; 4057c478bd9Sstevel@tonic-gate 4067c478bd9Sstevel@tonic-gate if (start > physmax) { 4077c478bd9Sstevel@tonic-gate list = list->next; 4087c478bd9Sstevel@tonic-gate continue; 4097c478bd9Sstevel@tonic-gate } 4107c478bd9Sstevel@tonic-gate if (end > physmax) 4117c478bd9Sstevel@tonic-gate end = physmax; 4127c478bd9Sstevel@tonic-gate 4137c478bd9Sstevel@tonic-gate /* 4147c478bd9Sstevel@tonic-gate * When there is only one memnode, just add memory to memnode 4157c478bd9Sstevel@tonic-gate */ 4167c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) { 4177c478bd9Sstevel@tonic-gate mem_node_add_slice(start, end); 4187c478bd9Sstevel@tonic-gate list = list->next; 4197c478bd9Sstevel@tonic-gate continue; 4207c478bd9Sstevel@tonic-gate } 4217c478bd9Sstevel@tonic-gate 4227c478bd9Sstevel@tonic-gate /* 4237c478bd9Sstevel@tonic-gate * mem_node_add_slice() expects to get a memory range that 4247c478bd9Sstevel@tonic-gate * is within one memnode, so need to split any memory range 4257c478bd9Sstevel@tonic-gate * that spans multiple memnodes into subranges that are each 4267c478bd9Sstevel@tonic-gate * contained within one memnode when feeding them to 4277c478bd9Sstevel@tonic-gate * mem_node_add_slice() 4287c478bd9Sstevel@tonic-gate */ 4297c478bd9Sstevel@tonic-gate cur_start = start; 4307c478bd9Sstevel@tonic-gate do { 4317c478bd9Sstevel@tonic-gate node = plat_pfn_to_mem_node(cur_start); 4327c478bd9Sstevel@tonic-gate ASSERT(cur_start >= 4337c478bd9Sstevel@tonic-gate lgrp_plat_node_memory[node].start && 4347c478bd9Sstevel@tonic-gate cur_start <= lgrp_plat_node_memory[node].end); 4357c478bd9Sstevel@tonic-gate 4367c478bd9Sstevel@tonic-gate cur_end = end; 4377c478bd9Sstevel@tonic-gate 4387c478bd9Sstevel@tonic-gate /* 4397c478bd9Sstevel@tonic-gate * End of current subrange should not span memnodes 4407c478bd9Sstevel@tonic-gate */ 4417c478bd9Sstevel@tonic-gate if (cur_end > lgrp_plat_node_memory[node].end) 4427c478bd9Sstevel@tonic-gate cur_end = lgrp_plat_node_memory[node].end; 4437c478bd9Sstevel@tonic-gate 4447c478bd9Sstevel@tonic-gate mem_node_add_slice(cur_start, cur_end); 4457c478bd9Sstevel@tonic-gate 4467c478bd9Sstevel@tonic-gate /* 4477c478bd9Sstevel@tonic-gate * Next subrange starts after end of current one 4487c478bd9Sstevel@tonic-gate */ 4497c478bd9Sstevel@tonic-gate cur_start = cur_end + 1; 4507c478bd9Sstevel@tonic-gate } while (cur_end < end); 4517c478bd9Sstevel@tonic-gate 4527c478bd9Sstevel@tonic-gate list = list->next; 4537c478bd9Sstevel@tonic-gate } 4547c478bd9Sstevel@tonic-gate mem_node_physalign = 0; 4557c478bd9Sstevel@tonic-gate mem_node_pfn_shift = 0; 4567c478bd9Sstevel@tonic-gate } 4577c478bd9Sstevel@tonic-gate 4587c478bd9Sstevel@tonic-gate 4597c478bd9Sstevel@tonic-gate /* 4607c478bd9Sstevel@tonic-gate * Platform-specific initialization of lgroups 4617c478bd9Sstevel@tonic-gate */ 4627c478bd9Sstevel@tonic-gate void 4637c478bd9Sstevel@tonic-gate lgrp_plat_init(void) 4647c478bd9Sstevel@tonic-gate { 4657c478bd9Sstevel@tonic-gate uint_t bus; 4667c478bd9Sstevel@tonic-gate uint_t dev; 4677c478bd9Sstevel@tonic-gate uint_t node; 4687c478bd9Sstevel@tonic-gate uint_t off; 4697c478bd9Sstevel@tonic-gate 4707c478bd9Sstevel@tonic-gate extern lgrp_load_t lgrp_expand_proc_thresh; 4717c478bd9Sstevel@tonic-gate extern lgrp_load_t lgrp_expand_proc_diff; 4727c478bd9Sstevel@tonic-gate 4737c478bd9Sstevel@tonic-gate /* 4747c478bd9Sstevel@tonic-gate * Initialize as a UMA machine if this isn't an Opteron 4757c478bd9Sstevel@tonic-gate */ 4767c478bd9Sstevel@tonic-gate if (!is_opteron() || lgrp_topo_ht_limit() == 1) { 4777c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt = max_mem_nodes = 1; 4787c478bd9Sstevel@tonic-gate return; 4797c478bd9Sstevel@tonic-gate } 4807c478bd9Sstevel@tonic-gate 4817c478bd9Sstevel@tonic-gate /* 4827c478bd9Sstevel@tonic-gate * Read configuration registers from PCI configuration space to 4837c478bd9Sstevel@tonic-gate * determine node information, which memory is in each node, etc. 4847c478bd9Sstevel@tonic-gate * 4857c478bd9Sstevel@tonic-gate * Write to PCI configuration space address register to specify 4867c478bd9Sstevel@tonic-gate * which configuration register to read and read/write PCI 4877c478bd9Sstevel@tonic-gate * configuration space data register to get/set contents 4887c478bd9Sstevel@tonic-gate */ 4897c478bd9Sstevel@tonic-gate bus = OPT_PCS_BUS_CONFIG; 4907c478bd9Sstevel@tonic-gate dev = OPT_PCS_DEV_NODE0; 4917c478bd9Sstevel@tonic-gate off = OPT_PCS_OFF_DRAMBASE; 4927c478bd9Sstevel@tonic-gate 4937c478bd9Sstevel@tonic-gate /* 4947c478bd9Sstevel@tonic-gate * Read node ID register for node 0 to get node count 4957c478bd9Sstevel@tonic-gate */ 4967c478bd9Sstevel@tonic-gate outl(PCI_CONFADD, PCI_CADDR1(bus, dev, OPT_PCS_FUNC_HT, 4977c478bd9Sstevel@tonic-gate OPT_PCS_OFF_NODEID)); 4987c478bd9Sstevel@tonic-gate opt_node_info[0] = inl(PCI_CONFDATA); 4997c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt = OPT_NODE_CNT(opt_node_info[0]) + 1; 5007c478bd9Sstevel@tonic-gate 5017c478bd9Sstevel@tonic-gate for (node = 0; node < lgrp_plat_node_cnt; node++) { 5027c478bd9Sstevel@tonic-gate /* 5037c478bd9Sstevel@tonic-gate * Read node ID register (except for node 0 which we just read) 5047c478bd9Sstevel@tonic-gate */ 5057c478bd9Sstevel@tonic-gate if (node > 0) { 5067c478bd9Sstevel@tonic-gate outl(PCI_CONFADD, PCI_CADDR1(bus, dev, 5077c478bd9Sstevel@tonic-gate OPT_PCS_FUNC_HT, OPT_PCS_OFF_NODEID)); 5087c478bd9Sstevel@tonic-gate opt_node_info[node] = inl(PCI_CONFDATA); 5097c478bd9Sstevel@tonic-gate } 5107c478bd9Sstevel@tonic-gate 5117c478bd9Sstevel@tonic-gate /* 5127c478bd9Sstevel@tonic-gate * Read DRAM base and limit registers which specify 5137c478bd9Sstevel@tonic-gate * physical memory range of each node 5147c478bd9Sstevel@tonic-gate */ 5157c478bd9Sstevel@tonic-gate outl(PCI_CONFADD, PCI_CADDR1(bus, dev, OPT_PCS_FUNC_ADDRMAP, 5167c478bd9Sstevel@tonic-gate off)); 5177c478bd9Sstevel@tonic-gate opt_dram_map[node].base = inl(PCI_CONFDATA); 5187c478bd9Sstevel@tonic-gate if (opt_dram_map[node].base & OPT_DRAMBASE_MASK_INTRLVEN) 5197c478bd9Sstevel@tonic-gate lgrp_plat_mem_intrlv++; 5207c478bd9Sstevel@tonic-gate 5217c478bd9Sstevel@tonic-gate off += 4; /* limit register offset */ 5227c478bd9Sstevel@tonic-gate outl(PCI_CONFADD, PCI_CADDR1(bus, dev, OPT_PCS_FUNC_ADDRMAP, 5237c478bd9Sstevel@tonic-gate off)); 5247c478bd9Sstevel@tonic-gate opt_dram_map[node].limit = inl(PCI_CONFDATA); 5257c478bd9Sstevel@tonic-gate 5267c478bd9Sstevel@tonic-gate /* 5277c478bd9Sstevel@tonic-gate * Increment device number to next node and register offset for 5287c478bd9Sstevel@tonic-gate * DRAM base register of next node 5297c478bd9Sstevel@tonic-gate */ 5307c478bd9Sstevel@tonic-gate off += 4; 5317c478bd9Sstevel@tonic-gate dev++; 5327c478bd9Sstevel@tonic-gate 5337c478bd9Sstevel@tonic-gate /* 5347c478bd9Sstevel@tonic-gate * Get PFN for first page in each node, 5357c478bd9Sstevel@tonic-gate * so we can probe memory to determine latency topology 5367c478bd9Sstevel@tonic-gate */ 5377c478bd9Sstevel@tonic-gate lgrp_plat_probe_pfn[node] = 5387c478bd9Sstevel@tonic-gate btop(OPT_DRAMBASE(opt_dram_map[node].base)); 5397c478bd9Sstevel@tonic-gate 5407c478bd9Sstevel@tonic-gate /* 5417c478bd9Sstevel@tonic-gate * Remember physical address range of each node for use later 5427c478bd9Sstevel@tonic-gate */ 5437c478bd9Sstevel@tonic-gate lgrp_plat_node_memory[node].start = 5447c478bd9Sstevel@tonic-gate btop(OPT_DRAMBASE(opt_dram_map[node].base)); 5457c478bd9Sstevel@tonic-gate lgrp_plat_node_memory[node].end = 5467c478bd9Sstevel@tonic-gate btop(OPT_DRAMLIMIT(opt_dram_map[node].limit) | 5477c478bd9Sstevel@tonic-gate OPT_DRAMADDR_MASK_OFF); 5487c478bd9Sstevel@tonic-gate } 5497c478bd9Sstevel@tonic-gate 5507c478bd9Sstevel@tonic-gate /* 5517c478bd9Sstevel@tonic-gate * Only use one memory node if memory is interleaved between any nodes 5527c478bd9Sstevel@tonic-gate */ 5537c478bd9Sstevel@tonic-gate if (lgrp_plat_mem_intrlv) { 5547c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt = max_mem_nodes = 1; 5557c478bd9Sstevel@tonic-gate (void) lgrp_topo_ht_limit_set(1); 5567c478bd9Sstevel@tonic-gate } else { 5577c478bd9Sstevel@tonic-gate max_mem_nodes = lgrp_plat_node_cnt; 5587c478bd9Sstevel@tonic-gate 5597c478bd9Sstevel@tonic-gate /* 5607c478bd9Sstevel@tonic-gate * Probing errors can mess up the lgroup topology and force us 5617c478bd9Sstevel@tonic-gate * fall back to a 2 level lgroup topology. Here we bound how 5627c478bd9Sstevel@tonic-gate * tall the lgroup topology can grow in hopes of avoiding any 5637c478bd9Sstevel@tonic-gate * anamolies in probing from messing up the lgroup topology 5647c478bd9Sstevel@tonic-gate * by limiting the accuracy of the latency topology. 5657c478bd9Sstevel@tonic-gate * 5667c478bd9Sstevel@tonic-gate * Assume that nodes will at least be configured in a ring, 5677c478bd9Sstevel@tonic-gate * so limit height of lgroup topology to be less than number 5687c478bd9Sstevel@tonic-gate * of nodes on a system with 4 or more nodes 5697c478bd9Sstevel@tonic-gate */ 5707c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt >= 4 && 5717c478bd9Sstevel@tonic-gate lgrp_topo_ht_limit() == lgrp_topo_ht_limit_default()) 5727c478bd9Sstevel@tonic-gate (void) lgrp_topo_ht_limit_set(lgrp_plat_node_cnt - 1); 5737c478bd9Sstevel@tonic-gate } 5747c478bd9Sstevel@tonic-gate 5757c478bd9Sstevel@tonic-gate /* 5767c478bd9Sstevel@tonic-gate * Lgroups on Opteron architectures have but a single physical 5777c478bd9Sstevel@tonic-gate * processor. Tune lgrp_expand_proc_thresh and lgrp_expand_proc_diff 5787c478bd9Sstevel@tonic-gate * so that lgrp_choose() will spread things out aggressively. 5797c478bd9Sstevel@tonic-gate */ 5807c478bd9Sstevel@tonic-gate lgrp_expand_proc_thresh = LGRP_LOADAVG_THREAD_MAX / 2; 5817c478bd9Sstevel@tonic-gate lgrp_expand_proc_diff = 0; 5827c478bd9Sstevel@tonic-gate } 5837c478bd9Sstevel@tonic-gate 5847c478bd9Sstevel@tonic-gate 5857c478bd9Sstevel@tonic-gate /* 5867c478bd9Sstevel@tonic-gate * Latencies must be within 1/(2**LGRP_LAT_TOLERANCE_SHIFT) of each other to 5877c478bd9Sstevel@tonic-gate * be considered same 5887c478bd9Sstevel@tonic-gate */ 5897c478bd9Sstevel@tonic-gate #define LGRP_LAT_TOLERANCE_SHIFT 4 5907c478bd9Sstevel@tonic-gate 5917c478bd9Sstevel@tonic-gate int lgrp_plat_probe_lt_shift = LGRP_LAT_TOLERANCE_SHIFT; 5927c478bd9Sstevel@tonic-gate 5937c478bd9Sstevel@tonic-gate 5947c478bd9Sstevel@tonic-gate /* 5957c478bd9Sstevel@tonic-gate * Adjust latencies between nodes to be symmetric, normalize latencies between 5967c478bd9Sstevel@tonic-gate * any nodes that are within some tolerance to be same, and make local 5977c478bd9Sstevel@tonic-gate * latencies be same 5987c478bd9Sstevel@tonic-gate */ 5997c478bd9Sstevel@tonic-gate static void 6007c478bd9Sstevel@tonic-gate lgrp_plat_latency_adjust(void) 6017c478bd9Sstevel@tonic-gate { 6027c478bd9Sstevel@tonic-gate int i; 6037c478bd9Sstevel@tonic-gate int j; 6047c478bd9Sstevel@tonic-gate int k; 6057c478bd9Sstevel@tonic-gate int l; 6067c478bd9Sstevel@tonic-gate u_longlong_t max; 6077c478bd9Sstevel@tonic-gate u_longlong_t min; 6087c478bd9Sstevel@tonic-gate u_longlong_t t; 6097c478bd9Sstevel@tonic-gate u_longlong_t t1; 6107c478bd9Sstevel@tonic-gate u_longlong_t t2; 6117c478bd9Sstevel@tonic-gate const lgrp_config_flag_t cflag = LGRP_CONFIG_LATENCY_CHANGE; 6127c478bd9Sstevel@tonic-gate int lat_corrected[MAX_NODES][MAX_NODES]; 6137c478bd9Sstevel@tonic-gate 6147c478bd9Sstevel@tonic-gate /* 6157c478bd9Sstevel@tonic-gate * Nothing to do when this is an UMA machine 6167c478bd9Sstevel@tonic-gate */ 6177c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 6187c478bd9Sstevel@tonic-gate return; 6197c478bd9Sstevel@tonic-gate 6207c478bd9Sstevel@tonic-gate /* 6217c478bd9Sstevel@tonic-gate * Make sure that latencies are symmetric between any two nodes 6227c478bd9Sstevel@tonic-gate * (ie. latency(node0, node1) == latency(node1, node0)) 6237c478bd9Sstevel@tonic-gate */ 6247c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 6257c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 6267c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 6277c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[j][i]; 6287c478bd9Sstevel@tonic-gate 6297c478bd9Sstevel@tonic-gate if (t1 == 0 || t2 == 0 || t1 == t2) 6307c478bd9Sstevel@tonic-gate continue; 6317c478bd9Sstevel@tonic-gate 6327c478bd9Sstevel@tonic-gate /* 6337c478bd9Sstevel@tonic-gate * Latencies should be same 6347c478bd9Sstevel@tonic-gate * - Use minimum of two latencies which should be same 6357c478bd9Sstevel@tonic-gate * - Track suspect probe times not within tolerance of 6367c478bd9Sstevel@tonic-gate * min value 6377c478bd9Sstevel@tonic-gate * - Remember how much values are corrected by 6387c478bd9Sstevel@tonic-gate */ 6397c478bd9Sstevel@tonic-gate if (t1 > t2) { 6407c478bd9Sstevel@tonic-gate t = t2; 6417c478bd9Sstevel@tonic-gate lgrp_plat_probe_errors[i][j] += t1 - t2; 6427c478bd9Sstevel@tonic-gate if (t1 - t2 > t2 >> lgrp_plat_probe_lt_shift) { 6437c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[i][j]++; 6447c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[j][i]++; 6457c478bd9Sstevel@tonic-gate } 6467c478bd9Sstevel@tonic-gate } else if (t2 > t1) { 6477c478bd9Sstevel@tonic-gate t = t1; 6487c478bd9Sstevel@tonic-gate lgrp_plat_probe_errors[j][i] += t2 - t1; 6497c478bd9Sstevel@tonic-gate if (t2 - t1 > t1 >> lgrp_plat_probe_lt_shift) { 6507c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[i][j]++; 6517c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[j][i]++; 6527c478bd9Sstevel@tonic-gate } 6537c478bd9Sstevel@tonic-gate } 6547c478bd9Sstevel@tonic-gate 6557c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 6567c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[j][i] = t; 6577c478bd9Sstevel@tonic-gate lgrp_config(cflag, t1, t); 6587c478bd9Sstevel@tonic-gate lgrp_config(cflag, t2, t); 6597c478bd9Sstevel@tonic-gate } 6607c478bd9Sstevel@tonic-gate 6617c478bd9Sstevel@tonic-gate /* 6627c478bd9Sstevel@tonic-gate * Keep track of which latencies get corrected 6637c478bd9Sstevel@tonic-gate */ 6647c478bd9Sstevel@tonic-gate for (i = 0; i < MAX_NODES; i++) 6657c478bd9Sstevel@tonic-gate for (j = 0; j < MAX_NODES; j++) 6667c478bd9Sstevel@tonic-gate lat_corrected[i][j] = 0; 6677c478bd9Sstevel@tonic-gate 6687c478bd9Sstevel@tonic-gate /* 6697c478bd9Sstevel@tonic-gate * For every two nodes, see whether there is another pair of nodes which 6707c478bd9Sstevel@tonic-gate * are about the same distance apart and make the latencies be the same 6717c478bd9Sstevel@tonic-gate * if they are close enough together 6727c478bd9Sstevel@tonic-gate */ 6737c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 6747c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 6757c478bd9Sstevel@tonic-gate /* 6767c478bd9Sstevel@tonic-gate * Pick one pair of nodes (i, j) 6777c478bd9Sstevel@tonic-gate * and get latency between them 6787c478bd9Sstevel@tonic-gate */ 6797c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 6807c478bd9Sstevel@tonic-gate 6817c478bd9Sstevel@tonic-gate /* 6827c478bd9Sstevel@tonic-gate * Skip this pair of nodes if there isn't a latency 6837c478bd9Sstevel@tonic-gate * for it yet 6847c478bd9Sstevel@tonic-gate */ 6857c478bd9Sstevel@tonic-gate if (t1 == 0) 6867c478bd9Sstevel@tonic-gate continue; 6877c478bd9Sstevel@tonic-gate 6887c478bd9Sstevel@tonic-gate for (k = 0; k < lgrp_plat_node_cnt; k++) 6897c478bd9Sstevel@tonic-gate for (l = 0; l < lgrp_plat_node_cnt; l++) { 6907c478bd9Sstevel@tonic-gate /* 6917c478bd9Sstevel@tonic-gate * Pick another pair of nodes (k, l) 6927c478bd9Sstevel@tonic-gate * not same as (i, j) and get latency 6937c478bd9Sstevel@tonic-gate * between them 6947c478bd9Sstevel@tonic-gate */ 6957c478bd9Sstevel@tonic-gate if (k == i && l == j) 6967c478bd9Sstevel@tonic-gate continue; 6977c478bd9Sstevel@tonic-gate 6987c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[k][l]; 6997c478bd9Sstevel@tonic-gate 7007c478bd9Sstevel@tonic-gate /* 7017c478bd9Sstevel@tonic-gate * Skip this pair of nodes if there 7027c478bd9Sstevel@tonic-gate * isn't a latency for it yet 7037c478bd9Sstevel@tonic-gate */ 7047c478bd9Sstevel@tonic-gate 7057c478bd9Sstevel@tonic-gate if (t2 == 0) 7067c478bd9Sstevel@tonic-gate continue; 7077c478bd9Sstevel@tonic-gate 7087c478bd9Sstevel@tonic-gate /* 7097c478bd9Sstevel@tonic-gate * Skip nodes (k, l) if they already 7107c478bd9Sstevel@tonic-gate * have same latency as (i, j) or 7117c478bd9Sstevel@tonic-gate * their latency isn't close enough to 7127c478bd9Sstevel@tonic-gate * be considered/made the same 7137c478bd9Sstevel@tonic-gate */ 7147c478bd9Sstevel@tonic-gate if (t1 == t2 || (t1 > t2 && t1 - t2 > 7157c478bd9Sstevel@tonic-gate t1 >> lgrp_plat_probe_lt_shift) || 7167c478bd9Sstevel@tonic-gate (t2 > t1 && t2 - t1 > 7177c478bd9Sstevel@tonic-gate t2 >> lgrp_plat_probe_lt_shift)) 7187c478bd9Sstevel@tonic-gate continue; 7197c478bd9Sstevel@tonic-gate 7207c478bd9Sstevel@tonic-gate /* 7217c478bd9Sstevel@tonic-gate * Make latency(i, j) same as 7227c478bd9Sstevel@tonic-gate * latency(k, l), try to use latency 7237c478bd9Sstevel@tonic-gate * that has been adjusted already to get 7247c478bd9Sstevel@tonic-gate * more consistency (if possible), and 7257c478bd9Sstevel@tonic-gate * remember which latencies were 7267c478bd9Sstevel@tonic-gate * adjusted for next time 7277c478bd9Sstevel@tonic-gate */ 7287c478bd9Sstevel@tonic-gate if (lat_corrected[i][j]) { 7297c478bd9Sstevel@tonic-gate t = t1; 7307c478bd9Sstevel@tonic-gate lgrp_config(cflag, t2, t); 7317c478bd9Sstevel@tonic-gate t2 = t; 7327c478bd9Sstevel@tonic-gate } else if (lat_corrected[k][l]) { 7337c478bd9Sstevel@tonic-gate t = t2; 7347c478bd9Sstevel@tonic-gate lgrp_config(cflag, t1, t); 7357c478bd9Sstevel@tonic-gate t1 = t; 7367c478bd9Sstevel@tonic-gate } else { 7377c478bd9Sstevel@tonic-gate if (t1 > t2) 7387c478bd9Sstevel@tonic-gate t = t2; 7397c478bd9Sstevel@tonic-gate else 7407c478bd9Sstevel@tonic-gate t = t1; 7417c478bd9Sstevel@tonic-gate lgrp_config(cflag, t1, t); 7427c478bd9Sstevel@tonic-gate lgrp_config(cflag, t2, t); 7437c478bd9Sstevel@tonic-gate t1 = t2 = t; 7447c478bd9Sstevel@tonic-gate } 7457c478bd9Sstevel@tonic-gate 7467c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 7477c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[k][l] = t; 7487c478bd9Sstevel@tonic-gate 7497c478bd9Sstevel@tonic-gate lat_corrected[i][j] = 7507c478bd9Sstevel@tonic-gate lat_corrected[k][l] = 1; 7517c478bd9Sstevel@tonic-gate } 7527c478bd9Sstevel@tonic-gate } 7537c478bd9Sstevel@tonic-gate 7547c478bd9Sstevel@tonic-gate /* 7557c478bd9Sstevel@tonic-gate * Local latencies should be same 7567c478bd9Sstevel@tonic-gate * - Find min and max local latencies 7577c478bd9Sstevel@tonic-gate * - Make all local latencies be minimum 7587c478bd9Sstevel@tonic-gate */ 7597c478bd9Sstevel@tonic-gate min = -1; 7607c478bd9Sstevel@tonic-gate max = 0; 7617c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 7627c478bd9Sstevel@tonic-gate t = lgrp_plat_probe_times[i][i]; 7637c478bd9Sstevel@tonic-gate if (t == 0) 7647c478bd9Sstevel@tonic-gate continue; 7657c478bd9Sstevel@tonic-gate if (min == -1 || t < min) 7667c478bd9Sstevel@tonic-gate min = t; 7677c478bd9Sstevel@tonic-gate if (t > max) 7687c478bd9Sstevel@tonic-gate max = t; 7697c478bd9Sstevel@tonic-gate } 7707c478bd9Sstevel@tonic-gate if (min != max) { 7717c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 7727c478bd9Sstevel@tonic-gate int local; 7737c478bd9Sstevel@tonic-gate 7747c478bd9Sstevel@tonic-gate local = lgrp_plat_probe_times[i][i]; 7757c478bd9Sstevel@tonic-gate if (local == 0) 7767c478bd9Sstevel@tonic-gate continue; 7777c478bd9Sstevel@tonic-gate 7787c478bd9Sstevel@tonic-gate /* 7797c478bd9Sstevel@tonic-gate * Track suspect probe times that aren't within 7807c478bd9Sstevel@tonic-gate * tolerance of minimum local latency and how much 7817c478bd9Sstevel@tonic-gate * probe times are corrected by 7827c478bd9Sstevel@tonic-gate */ 7837c478bd9Sstevel@tonic-gate if (local - min > min >> lgrp_plat_probe_lt_shift) 7847c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[i][i]++; 7857c478bd9Sstevel@tonic-gate 7867c478bd9Sstevel@tonic-gate lgrp_plat_probe_errors[i][i] += local - min; 7877c478bd9Sstevel@tonic-gate 7887c478bd9Sstevel@tonic-gate /* 7897c478bd9Sstevel@tonic-gate * Make local latencies be minimum 7907c478bd9Sstevel@tonic-gate */ 7917c478bd9Sstevel@tonic-gate lgrp_config(cflag, local, min); 7927c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][i] = min; 7937c478bd9Sstevel@tonic-gate } 7947c478bd9Sstevel@tonic-gate } 7957c478bd9Sstevel@tonic-gate 7967c478bd9Sstevel@tonic-gate /* 7977c478bd9Sstevel@tonic-gate * Determine max probe time again since just adjusted latencies 7987c478bd9Sstevel@tonic-gate */ 7997c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = 0; 8007c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 8017c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 8027c478bd9Sstevel@tonic-gate t = lgrp_plat_probe_times[i][j]; 8037c478bd9Sstevel@tonic-gate if (t > lgrp_plat_probe_time_max) 8047c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = t; 8057c478bd9Sstevel@tonic-gate } 8067c478bd9Sstevel@tonic-gate } 8077c478bd9Sstevel@tonic-gate 8087c478bd9Sstevel@tonic-gate 8097c478bd9Sstevel@tonic-gate /* 8107c478bd9Sstevel@tonic-gate * Verify following about latencies between nodes: 8117c478bd9Sstevel@tonic-gate * 8127c478bd9Sstevel@tonic-gate * - Latencies should be symmetric (ie. latency(a, b) == latency(b, a)) 8137c478bd9Sstevel@tonic-gate * - Local latencies same 8147c478bd9Sstevel@tonic-gate * - Local < remote 8157c478bd9Sstevel@tonic-gate * - Number of latencies seen is reasonable 8167c478bd9Sstevel@tonic-gate * - Number of occurrences of a given latency should be more than 1 8177c478bd9Sstevel@tonic-gate * 8187c478bd9Sstevel@tonic-gate * Returns: 8197c478bd9Sstevel@tonic-gate * 0 Success 8207c478bd9Sstevel@tonic-gate * -1 Not symmetric 8217c478bd9Sstevel@tonic-gate * -2 Local latencies not same 8227c478bd9Sstevel@tonic-gate * -3 Local >= remote 8237c478bd9Sstevel@tonic-gate * -4 Wrong number of latencies 8247c478bd9Sstevel@tonic-gate * -5 Not enough occurrences of given latency 8257c478bd9Sstevel@tonic-gate */ 8267c478bd9Sstevel@tonic-gate static int 8277c478bd9Sstevel@tonic-gate lgrp_plat_latency_verify(void) 8287c478bd9Sstevel@tonic-gate { 8297c478bd9Sstevel@tonic-gate int i; 8307c478bd9Sstevel@tonic-gate int j; 8317c478bd9Sstevel@tonic-gate lgrp_plat_latency_acct_t *l; 8327c478bd9Sstevel@tonic-gate int probed; 8337c478bd9Sstevel@tonic-gate u_longlong_t t1; 8347c478bd9Sstevel@tonic-gate u_longlong_t t2; 8357c478bd9Sstevel@tonic-gate 8367c478bd9Sstevel@tonic-gate /* 837*2dae3fb5Sjjc * Nothing to do when this is an UMA machine, lgroup topology is 838*2dae3fb5Sjjc * limited to 2 levels, or there aren't any probe times yet 8397c478bd9Sstevel@tonic-gate */ 8407c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1 || lgrp_topo_levels < 2 || 841*2dae3fb5Sjjc (lgrp_plat_probe_time_max == 0 && lgrp_plat_probe_time_min == -1)) 8427c478bd9Sstevel@tonic-gate return (0); 8437c478bd9Sstevel@tonic-gate 8447c478bd9Sstevel@tonic-gate /* 8457c478bd9Sstevel@tonic-gate * Make sure that latencies are symmetric between any two nodes 8467c478bd9Sstevel@tonic-gate * (ie. latency(node0, node1) == latency(node1, node0)) 8477c478bd9Sstevel@tonic-gate */ 8487c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 8497c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 8507c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 8517c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[j][i]; 8527c478bd9Sstevel@tonic-gate 8537c478bd9Sstevel@tonic-gate if (t1 == 0 || t2 == 0 || t1 == t2) 8547c478bd9Sstevel@tonic-gate continue; 8557c478bd9Sstevel@tonic-gate 8567c478bd9Sstevel@tonic-gate return (-1); 8577c478bd9Sstevel@tonic-gate } 8587c478bd9Sstevel@tonic-gate 8597c478bd9Sstevel@tonic-gate /* 8607c478bd9Sstevel@tonic-gate * Local latencies should be same 8617c478bd9Sstevel@tonic-gate */ 8627c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[0][0]; 8637c478bd9Sstevel@tonic-gate for (i = 1; i < lgrp_plat_node_cnt; i++) { 8647c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[i][i]; 8657c478bd9Sstevel@tonic-gate if (t2 == 0) 8667c478bd9Sstevel@tonic-gate continue; 8677c478bd9Sstevel@tonic-gate 868*2dae3fb5Sjjc if (t1 == 0) { 869*2dae3fb5Sjjc t1 = t2; 870*2dae3fb5Sjjc continue; 871*2dae3fb5Sjjc } 872*2dae3fb5Sjjc 8737c478bd9Sstevel@tonic-gate if (t1 != t2) 8747c478bd9Sstevel@tonic-gate return (-2); 8757c478bd9Sstevel@tonic-gate } 8767c478bd9Sstevel@tonic-gate 8777c478bd9Sstevel@tonic-gate /* 8787c478bd9Sstevel@tonic-gate * Local latencies should be less than remote 8797c478bd9Sstevel@tonic-gate */ 880*2dae3fb5Sjjc if (t1) { 8817c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 8827c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 883*2dae3fb5Sjjc t2 = lgrp_plat_probe_times[i][j]; 8847c478bd9Sstevel@tonic-gate if (i == j || t2 == 0) 8857c478bd9Sstevel@tonic-gate continue; 8867c478bd9Sstevel@tonic-gate 8877c478bd9Sstevel@tonic-gate if (t1 >= t2) 8887c478bd9Sstevel@tonic-gate return (-3); 8897c478bd9Sstevel@tonic-gate } 890*2dae3fb5Sjjc } 8917c478bd9Sstevel@tonic-gate 8927c478bd9Sstevel@tonic-gate /* 8937c478bd9Sstevel@tonic-gate * Rest of checks are not very useful for machines with less than 8947c478bd9Sstevel@tonic-gate * 4 nodes (which means less than 3 latencies on Opteron) 8957c478bd9Sstevel@tonic-gate */ 8967c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt < 4) 8977c478bd9Sstevel@tonic-gate return (0); 8987c478bd9Sstevel@tonic-gate 8997c478bd9Sstevel@tonic-gate /* 9007c478bd9Sstevel@tonic-gate * Need to see whether done probing in order to verify number of 9017c478bd9Sstevel@tonic-gate * latencies are correct 9027c478bd9Sstevel@tonic-gate */ 9037c478bd9Sstevel@tonic-gate probed = 0; 9047c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 9057c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[i][i]) 9067c478bd9Sstevel@tonic-gate probed++; 9077c478bd9Sstevel@tonic-gate 9087c478bd9Sstevel@tonic-gate if (probed != lgrp_plat_node_cnt) 9097c478bd9Sstevel@tonic-gate return (0); 9107c478bd9Sstevel@tonic-gate 9117c478bd9Sstevel@tonic-gate /* 9127c478bd9Sstevel@tonic-gate * Determine number of unique latencies seen in probe times, 9137c478bd9Sstevel@tonic-gate * their values, and number of occurrences of each 9147c478bd9Sstevel@tonic-gate */ 9157c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies = 0; 9167c478bd9Sstevel@tonic-gate bzero(lgrp_plat_probe_lat_acct, 9177c478bd9Sstevel@tonic-gate MAX_NODES * sizeof (lgrp_plat_latency_acct_t)); 9187c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 9197c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 9207c478bd9Sstevel@tonic-gate int k; 9217c478bd9Sstevel@tonic-gate 9227c478bd9Sstevel@tonic-gate /* 9237c478bd9Sstevel@tonic-gate * Look at each probe time 9247c478bd9Sstevel@tonic-gate */ 9257c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 9267c478bd9Sstevel@tonic-gate if (t1 == 0) 9277c478bd9Sstevel@tonic-gate continue; 9287c478bd9Sstevel@tonic-gate 9297c478bd9Sstevel@tonic-gate /* 9307c478bd9Sstevel@tonic-gate * Account for unique latencies 9317c478bd9Sstevel@tonic-gate */ 9327c478bd9Sstevel@tonic-gate for (k = 0; k < lgrp_plat_node_cnt; k++) { 9337c478bd9Sstevel@tonic-gate l = &lgrp_plat_probe_lat_acct[k]; 9347c478bd9Sstevel@tonic-gate if (t1 == l->la_value) { 9357c478bd9Sstevel@tonic-gate /* 9367c478bd9Sstevel@tonic-gate * Increment number of occurrences 9377c478bd9Sstevel@tonic-gate * if seen before 9387c478bd9Sstevel@tonic-gate */ 9397c478bd9Sstevel@tonic-gate l->la_count++; 9407c478bd9Sstevel@tonic-gate break; 9417c478bd9Sstevel@tonic-gate } else if (l->la_value == 0) { 9427c478bd9Sstevel@tonic-gate /* 9437c478bd9Sstevel@tonic-gate * Record latency if haven't seen before 9447c478bd9Sstevel@tonic-gate */ 9457c478bd9Sstevel@tonic-gate l->la_value = t1; 9467c478bd9Sstevel@tonic-gate l->la_count++; 9477c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies++; 9487c478bd9Sstevel@tonic-gate break; 9497c478bd9Sstevel@tonic-gate } 9507c478bd9Sstevel@tonic-gate } 9517c478bd9Sstevel@tonic-gate } 9527c478bd9Sstevel@tonic-gate } 9537c478bd9Sstevel@tonic-gate 9547c478bd9Sstevel@tonic-gate /* 9557c478bd9Sstevel@tonic-gate * Number of latencies should be relative to number of 9567c478bd9Sstevel@tonic-gate * nodes in system: 9577c478bd9Sstevel@tonic-gate * - Same as nodes when nodes <= 2 9587c478bd9Sstevel@tonic-gate * - Less than nodes when nodes > 2 9597c478bd9Sstevel@tonic-gate * - Greater than 2 when nodes >= 4 9607c478bd9Sstevel@tonic-gate */ 9617c478bd9Sstevel@tonic-gate if ((lgrp_plat_node_cnt <= 2 && 9627c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies != lgrp_plat_node_cnt) || 9637c478bd9Sstevel@tonic-gate (lgrp_plat_node_cnt > 2 && 9647c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies >= lgrp_plat_node_cnt) || 9657c478bd9Sstevel@tonic-gate (lgrp_plat_node_cnt >= 4 && lgrp_topo_levels >= 3 && 9667c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies <= 2)) 9677c478bd9Sstevel@tonic-gate return (-4); 9687c478bd9Sstevel@tonic-gate 9697c478bd9Sstevel@tonic-gate /* 9707c478bd9Sstevel@tonic-gate * There should be more than one occurrence of every latency 9717c478bd9Sstevel@tonic-gate * as long as probing is complete 9727c478bd9Sstevel@tonic-gate */ 9737c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_probe_nlatencies; i++) { 9747c478bd9Sstevel@tonic-gate l = &lgrp_plat_probe_lat_acct[i]; 9757c478bd9Sstevel@tonic-gate if (l->la_count <= 1) 9767c478bd9Sstevel@tonic-gate return (-5); 9777c478bd9Sstevel@tonic-gate } 9787c478bd9Sstevel@tonic-gate return (0); 9797c478bd9Sstevel@tonic-gate } 9807c478bd9Sstevel@tonic-gate 9817c478bd9Sstevel@tonic-gate 9827c478bd9Sstevel@tonic-gate /* 9837c478bd9Sstevel@tonic-gate * Set lgroup latencies for 2 level lgroup topology 9847c478bd9Sstevel@tonic-gate */ 9857c478bd9Sstevel@tonic-gate static void 9867c478bd9Sstevel@tonic-gate lgrp_plat_2level_setup(void) 9877c478bd9Sstevel@tonic-gate { 9887c478bd9Sstevel@tonic-gate int i; 9897c478bd9Sstevel@tonic-gate 9907c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt >= 4) 9917c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 9927c478bd9Sstevel@tonic-gate "MPO only optimizing for local and remote\n"); 9937c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 9947c478bd9Sstevel@tonic-gate int j; 9957c478bd9Sstevel@tonic-gate 9967c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 9977c478bd9Sstevel@tonic-gate if (i == j) 9987c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 2; 9997c478bd9Sstevel@tonic-gate else 10007c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 3; 10017c478bd9Sstevel@tonic-gate } 10027c478bd9Sstevel@tonic-gate } 10037c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_min = 2; 10047c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = 3; 10057c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_FLATTEN, 2, 0); 10067c478bd9Sstevel@tonic-gate } 10077c478bd9Sstevel@tonic-gate 10087c478bd9Sstevel@tonic-gate 10097c478bd9Sstevel@tonic-gate /* 10107c478bd9Sstevel@tonic-gate * Return time needed to probe from current CPU to memory in given node 10117c478bd9Sstevel@tonic-gate */ 10127c478bd9Sstevel@tonic-gate static hrtime_t 10137c478bd9Sstevel@tonic-gate lgrp_plat_probe_time(int to) 10147c478bd9Sstevel@tonic-gate { 10157c478bd9Sstevel@tonic-gate caddr_t buf; 10167c478bd9Sstevel@tonic-gate uint_t dev; 10177c478bd9Sstevel@tonic-gate /* LINTED: set but not used in function */ 10187c478bd9Sstevel@tonic-gate volatile uint_t dev_vendor; 10197c478bd9Sstevel@tonic-gate hrtime_t elapsed; 10207c478bd9Sstevel@tonic-gate hrtime_t end; 10217c478bd9Sstevel@tonic-gate int from; 10227c478bd9Sstevel@tonic-gate int i; 10237c478bd9Sstevel@tonic-gate int ipl; 10247c478bd9Sstevel@tonic-gate hrtime_t max; 10257c478bd9Sstevel@tonic-gate hrtime_t min; 10267c478bd9Sstevel@tonic-gate hrtime_t start; 10277c478bd9Sstevel@tonic-gate extern int use_sse_pagecopy; 10287c478bd9Sstevel@tonic-gate 10297c478bd9Sstevel@tonic-gate /* 10307c478bd9Sstevel@tonic-gate * Determine ID of node containing current CPU 10317c478bd9Sstevel@tonic-gate */ 10327c478bd9Sstevel@tonic-gate from = LGRP_PLAT_CPU_TO_NODE(CPU); 10337c478bd9Sstevel@tonic-gate 10347c478bd9Sstevel@tonic-gate /* 10357c478bd9Sstevel@tonic-gate * Do common work for probing main memory 10367c478bd9Sstevel@tonic-gate */ 10377c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_op == LGRP_PLAT_PROBE_PGCPY) { 10387c478bd9Sstevel@tonic-gate /* 10397c478bd9Sstevel@tonic-gate * Skip probing any nodes without memory and 10407c478bd9Sstevel@tonic-gate * set probe time to 0 10417c478bd9Sstevel@tonic-gate */ 10427c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_memory[to] == NULL) { 10437c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[from][to] = 0; 10447c478bd9Sstevel@tonic-gate return (0); 10457c478bd9Sstevel@tonic-gate } 10467c478bd9Sstevel@tonic-gate 10477c478bd9Sstevel@tonic-gate /* 10487c478bd9Sstevel@tonic-gate * Invalidate caches once instead of once every sample 10497c478bd9Sstevel@tonic-gate * which should cut cost of probing by a lot 10507c478bd9Sstevel@tonic-gate */ 10517c478bd9Sstevel@tonic-gate lgrp_plat_flush_cost = gethrtime(); 10527c478bd9Sstevel@tonic-gate invalidate_cache(); 10537c478bd9Sstevel@tonic-gate lgrp_plat_flush_cost = gethrtime() - lgrp_plat_flush_cost; 10547c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost_total += lgrp_plat_flush_cost; 10557c478bd9Sstevel@tonic-gate } 10567c478bd9Sstevel@tonic-gate 10577c478bd9Sstevel@tonic-gate /* 10587c478bd9Sstevel@tonic-gate * Probe from current CPU to given memory using specified operation 10597c478bd9Sstevel@tonic-gate * and take specified number of samples 10607c478bd9Sstevel@tonic-gate */ 10617c478bd9Sstevel@tonic-gate max = 0; 10627c478bd9Sstevel@tonic-gate min = -1; 10637c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_probe_nsamples; i++) { 10647c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost = gethrtime(); 10657c478bd9Sstevel@tonic-gate 10667c478bd9Sstevel@tonic-gate /* 10677c478bd9Sstevel@tonic-gate * Can't measure probe time if gethrtime() isn't working yet 10687c478bd9Sstevel@tonic-gate */ 10697c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_cost == 0 && gethrtime() == 0) 10707c478bd9Sstevel@tonic-gate return (0); 10717c478bd9Sstevel@tonic-gate 10727c478bd9Sstevel@tonic-gate switch (lgrp_plat_probe_op) { 10737c478bd9Sstevel@tonic-gate 10747c478bd9Sstevel@tonic-gate case LGRP_PLAT_PROBE_PGCPY: 10757c478bd9Sstevel@tonic-gate default: 10767c478bd9Sstevel@tonic-gate /* 10777c478bd9Sstevel@tonic-gate * Measure how long it takes to copy page 10787c478bd9Sstevel@tonic-gate * on top of itself 10797c478bd9Sstevel@tonic-gate */ 10807c478bd9Sstevel@tonic-gate buf = lgrp_plat_probe_memory[to] + (i * PAGESIZE); 10817c478bd9Sstevel@tonic-gate 10827c478bd9Sstevel@tonic-gate kpreempt_disable(); 10837c478bd9Sstevel@tonic-gate ipl = splhigh(); 10847c478bd9Sstevel@tonic-gate start = gethrtime(); 10857c478bd9Sstevel@tonic-gate if (use_sse_pagecopy) 10867c478bd9Sstevel@tonic-gate hwblkpagecopy(buf, buf); 10877c478bd9Sstevel@tonic-gate else 10887c478bd9Sstevel@tonic-gate bcopy(buf, buf, PAGESIZE); 10897c478bd9Sstevel@tonic-gate end = gethrtime(); 10907c478bd9Sstevel@tonic-gate elapsed = end - start; 10917c478bd9Sstevel@tonic-gate splx(ipl); 10927c478bd9Sstevel@tonic-gate kpreempt_enable(); 10937c478bd9Sstevel@tonic-gate break; 10947c478bd9Sstevel@tonic-gate 10957c478bd9Sstevel@tonic-gate case LGRP_PLAT_PROBE_VENDOR: 10967c478bd9Sstevel@tonic-gate /* 10977c478bd9Sstevel@tonic-gate * Measure how long it takes to read vendor ID from 10987c478bd9Sstevel@tonic-gate * Northbridge 10997c478bd9Sstevel@tonic-gate */ 11007c478bd9Sstevel@tonic-gate dev = OPT_PCS_DEV_NODE0 + to; 11017c478bd9Sstevel@tonic-gate kpreempt_disable(); 11027c478bd9Sstevel@tonic-gate ipl = spl8(); 11037c478bd9Sstevel@tonic-gate outl(PCI_CONFADD, PCI_CADDR1(0, dev, opt_probe_func, 11047c478bd9Sstevel@tonic-gate OPT_PCS_OFF_VENDOR)); 11057c478bd9Sstevel@tonic-gate start = gethrtime(); 11067c478bd9Sstevel@tonic-gate dev_vendor = inl(PCI_CONFDATA); 11077c478bd9Sstevel@tonic-gate end = gethrtime(); 11087c478bd9Sstevel@tonic-gate elapsed = end - start; 11097c478bd9Sstevel@tonic-gate splx(ipl); 11107c478bd9Sstevel@tonic-gate kpreempt_enable(); 11117c478bd9Sstevel@tonic-gate break; 11127c478bd9Sstevel@tonic-gate } 11137c478bd9Sstevel@tonic-gate 11147c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost = gethrtime() - lgrp_plat_probe_cost; 11157c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost_total += lgrp_plat_probe_cost; 11167c478bd9Sstevel@tonic-gate 11177c478bd9Sstevel@tonic-gate if (min == -1 || elapsed < min) 11187c478bd9Sstevel@tonic-gate min = elapsed; 11197c478bd9Sstevel@tonic-gate if (elapsed > max) 11207c478bd9Sstevel@tonic-gate max = elapsed; 11217c478bd9Sstevel@tonic-gate } 11227c478bd9Sstevel@tonic-gate 11237c478bd9Sstevel@tonic-gate /* 11247c478bd9Sstevel@tonic-gate * Update minimum and maximum probe times between 11257c478bd9Sstevel@tonic-gate * these two nodes 11267c478bd9Sstevel@tonic-gate */ 11277c478bd9Sstevel@tonic-gate if (min < lgrp_plat_probe_min[from][to] || 11287c478bd9Sstevel@tonic-gate lgrp_plat_probe_min[from][to] == 0) 11297c478bd9Sstevel@tonic-gate lgrp_plat_probe_min[from][to] = min; 11307c478bd9Sstevel@tonic-gate 11317c478bd9Sstevel@tonic-gate if (max > lgrp_plat_probe_max[from][to]) 11327c478bd9Sstevel@tonic-gate lgrp_plat_probe_max[from][to] = max; 11337c478bd9Sstevel@tonic-gate 11347c478bd9Sstevel@tonic-gate return (min); 11357c478bd9Sstevel@tonic-gate } 11367c478bd9Sstevel@tonic-gate 11377c478bd9Sstevel@tonic-gate 11387c478bd9Sstevel@tonic-gate /* 11397c478bd9Sstevel@tonic-gate * Probe memory in each node from current CPU to determine latency topology 11407c478bd9Sstevel@tonic-gate */ 11417c478bd9Sstevel@tonic-gate void 11427c478bd9Sstevel@tonic-gate lgrp_plat_probe(void) 11437c478bd9Sstevel@tonic-gate { 11447c478bd9Sstevel@tonic-gate int from; 11457c478bd9Sstevel@tonic-gate int i; 11467c478bd9Sstevel@tonic-gate hrtime_t probe_time; 11477c478bd9Sstevel@tonic-gate int to; 11487c478bd9Sstevel@tonic-gate 11497c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1 || lgrp_topo_ht_limit() <= 2) 11507c478bd9Sstevel@tonic-gate return; 11517c478bd9Sstevel@tonic-gate 11527c478bd9Sstevel@tonic-gate /* 11537c478bd9Sstevel@tonic-gate * Determine ID of node containing current CPU 11547c478bd9Sstevel@tonic-gate */ 11557c478bd9Sstevel@tonic-gate from = LGRP_PLAT_CPU_TO_NODE(CPU); 11567c478bd9Sstevel@tonic-gate 11577c478bd9Sstevel@tonic-gate /* 11587c478bd9Sstevel@tonic-gate * Don't need to probe if got times already 11597c478bd9Sstevel@tonic-gate */ 11607c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[from][from] != 0) 11617c478bd9Sstevel@tonic-gate return; 11627c478bd9Sstevel@tonic-gate 11637c478bd9Sstevel@tonic-gate /* 11647c478bd9Sstevel@tonic-gate * Read vendor ID in Northbridge or read and write page(s) 11657c478bd9Sstevel@tonic-gate * in each node from current CPU and remember how long it takes, 11667c478bd9Sstevel@tonic-gate * so we can build latency topology of machine later. 11677c478bd9Sstevel@tonic-gate * This should approximate the memory latency between each node. 11687c478bd9Sstevel@tonic-gate */ 11697c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_probe_nrounds; i++) 11707c478bd9Sstevel@tonic-gate for (to = 0; to < lgrp_plat_node_cnt; to++) { 11717c478bd9Sstevel@tonic-gate /* 11727c478bd9Sstevel@tonic-gate * Get probe time and bail out if can't get it yet 11737c478bd9Sstevel@tonic-gate */ 11747c478bd9Sstevel@tonic-gate probe_time = lgrp_plat_probe_time(to); 11757c478bd9Sstevel@tonic-gate if (probe_time == 0) 11767c478bd9Sstevel@tonic-gate return; 11777c478bd9Sstevel@tonic-gate 11787c478bd9Sstevel@tonic-gate /* 11797c478bd9Sstevel@tonic-gate * Keep lowest probe time as latency between nodes 11807c478bd9Sstevel@tonic-gate */ 11817c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[from][to] == 0 || 11827c478bd9Sstevel@tonic-gate probe_time < lgrp_plat_probe_times[from][to]) 11837c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[from][to] = probe_time; 11847c478bd9Sstevel@tonic-gate 11857c478bd9Sstevel@tonic-gate /* 11867c478bd9Sstevel@tonic-gate * Update overall minimum and maximum probe times 11877c478bd9Sstevel@tonic-gate * across all nodes 11887c478bd9Sstevel@tonic-gate */ 11897c478bd9Sstevel@tonic-gate if (probe_time < lgrp_plat_probe_time_min || 11907c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_min == -1) 11917c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_min = probe_time; 11927c478bd9Sstevel@tonic-gate if (probe_time > lgrp_plat_probe_time_max) 11937c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = probe_time; 11947c478bd9Sstevel@tonic-gate } 11957c478bd9Sstevel@tonic-gate 11967c478bd9Sstevel@tonic-gate /* 11977c478bd9Sstevel@tonic-gate * - Fix up latencies such that local latencies are same, 11987c478bd9Sstevel@tonic-gate * latency(i, j) == latency(j, i), etc. (if possible) 11997c478bd9Sstevel@tonic-gate * 12007c478bd9Sstevel@tonic-gate * - Verify that latencies look ok 12017c478bd9Sstevel@tonic-gate * 12027c478bd9Sstevel@tonic-gate * - Fallback to just optimizing for local and remote if 12037c478bd9Sstevel@tonic-gate * latencies didn't look right 12047c478bd9Sstevel@tonic-gate */ 12057c478bd9Sstevel@tonic-gate lgrp_plat_latency_adjust(); 12067c478bd9Sstevel@tonic-gate lgrp_plat_probe_error_code = lgrp_plat_latency_verify(); 12077c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_error_code) 12087c478bd9Sstevel@tonic-gate lgrp_plat_2level_setup(); 12097c478bd9Sstevel@tonic-gate } 12107c478bd9Sstevel@tonic-gate 12117c478bd9Sstevel@tonic-gate 12127c478bd9Sstevel@tonic-gate /* 12137c478bd9Sstevel@tonic-gate * Platform-specific initialization 12147c478bd9Sstevel@tonic-gate */ 12157c478bd9Sstevel@tonic-gate void 12167c478bd9Sstevel@tonic-gate lgrp_plat_main_init(void) 12177c478bd9Sstevel@tonic-gate { 12187c478bd9Sstevel@tonic-gate int curnode; 12197c478bd9Sstevel@tonic-gate int ht_limit; 12207c478bd9Sstevel@tonic-gate int i; 12217c478bd9Sstevel@tonic-gate 12227c478bd9Sstevel@tonic-gate /* 12237c478bd9Sstevel@tonic-gate * Print a notice that MPO is disabled when memory is interleaved 12247c478bd9Sstevel@tonic-gate * across nodes....Would do this when it is discovered, but can't 12257c478bd9Sstevel@tonic-gate * because it happens way too early during boot.... 12267c478bd9Sstevel@tonic-gate */ 12277c478bd9Sstevel@tonic-gate if (lgrp_plat_mem_intrlv) 12287c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 12297c478bd9Sstevel@tonic-gate "MPO disabled because memory is interleaved\n"); 12307c478bd9Sstevel@tonic-gate 12317c478bd9Sstevel@tonic-gate /* 12327c478bd9Sstevel@tonic-gate * Don't bother to do any probing if there is only one node or the 12337c478bd9Sstevel@tonic-gate * height of the lgroup topology less than or equal to 2 12347c478bd9Sstevel@tonic-gate */ 12357c478bd9Sstevel@tonic-gate ht_limit = lgrp_topo_ht_limit(); 12367c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1 || ht_limit <= 2) { 12377c478bd9Sstevel@tonic-gate /* 12387c478bd9Sstevel@tonic-gate * Setup lgroup latencies for 2 level lgroup topology 12397c478bd9Sstevel@tonic-gate * (ie. local and remote only) if they haven't been set yet 12407c478bd9Sstevel@tonic-gate */ 12417c478bd9Sstevel@tonic-gate if (ht_limit == 2 && lgrp_plat_probe_time_min == -1 && 12427c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max == 0) 12437c478bd9Sstevel@tonic-gate lgrp_plat_2level_setup(); 12447c478bd9Sstevel@tonic-gate return; 12457c478bd9Sstevel@tonic-gate } 12467c478bd9Sstevel@tonic-gate 12477c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_op == LGRP_PLAT_PROBE_VENDOR) { 12487c478bd9Sstevel@tonic-gate /* 12497c478bd9Sstevel@tonic-gate * Should have been able to probe from CPU 0 when it was added 12507c478bd9Sstevel@tonic-gate * to lgroup hierarchy, but may not have been able to then 12517c478bd9Sstevel@tonic-gate * because it happens so early in boot that gethrtime() hasn't 12527c478bd9Sstevel@tonic-gate * been initialized. (:-( 12537c478bd9Sstevel@tonic-gate */ 12547c478bd9Sstevel@tonic-gate curnode = LGRP_PLAT_CPU_TO_NODE(CPU); 12557c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[curnode][curnode] == 0) 12567c478bd9Sstevel@tonic-gate lgrp_plat_probe(); 12577c478bd9Sstevel@tonic-gate 12587c478bd9Sstevel@tonic-gate return; 12597c478bd9Sstevel@tonic-gate } 12607c478bd9Sstevel@tonic-gate 12617c478bd9Sstevel@tonic-gate /* 12627c478bd9Sstevel@tonic-gate * When probing memory, use one page for every sample to determine 12637c478bd9Sstevel@tonic-gate * lgroup topology and taking multiple samples 12647c478bd9Sstevel@tonic-gate */ 12657c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_memsize == 0) 12667c478bd9Sstevel@tonic-gate lgrp_plat_probe_memsize = PAGESIZE * 12677c478bd9Sstevel@tonic-gate lgrp_plat_probe_nsamples; 12687c478bd9Sstevel@tonic-gate 12697c478bd9Sstevel@tonic-gate /* 12707c478bd9Sstevel@tonic-gate * Map memory in each node needed for probing to determine latency 12717c478bd9Sstevel@tonic-gate * topology 12727c478bd9Sstevel@tonic-gate */ 12737c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 12747c478bd9Sstevel@tonic-gate int mnode; 12757c478bd9Sstevel@tonic-gate 12767c478bd9Sstevel@tonic-gate /* 12777c478bd9Sstevel@tonic-gate * Skip this node and leave its probe page NULL 12787c478bd9Sstevel@tonic-gate * if it doesn't have any memory 12797c478bd9Sstevel@tonic-gate */ 12807c478bd9Sstevel@tonic-gate mnode = plat_lgrphand_to_mem_node((lgrp_handle_t)i); 12817c478bd9Sstevel@tonic-gate if (!mem_node_config[mnode].exists) { 12827c478bd9Sstevel@tonic-gate lgrp_plat_probe_memory[i] = NULL; 12837c478bd9Sstevel@tonic-gate continue; 12847c478bd9Sstevel@tonic-gate } 12857c478bd9Sstevel@tonic-gate 12867c478bd9Sstevel@tonic-gate /* 12877c478bd9Sstevel@tonic-gate * Allocate one kernel virtual page 12887c478bd9Sstevel@tonic-gate */ 12897c478bd9Sstevel@tonic-gate lgrp_plat_probe_memory[i] = vmem_alloc(heap_arena, 12907c478bd9Sstevel@tonic-gate lgrp_plat_probe_memsize, VM_NOSLEEP); 12917c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_memory[i] == NULL) { 12927c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 12937c478bd9Sstevel@tonic-gate "lgrp_plat_main_init: couldn't allocate memory"); 12947c478bd9Sstevel@tonic-gate return; 12957c478bd9Sstevel@tonic-gate } 12967c478bd9Sstevel@tonic-gate 12977c478bd9Sstevel@tonic-gate /* 12987c478bd9Sstevel@tonic-gate * Map virtual page to first page in node 12997c478bd9Sstevel@tonic-gate */ 13007c478bd9Sstevel@tonic-gate hat_devload(kas.a_hat, lgrp_plat_probe_memory[i], 13017c478bd9Sstevel@tonic-gate lgrp_plat_probe_memsize, 13027c478bd9Sstevel@tonic-gate lgrp_plat_probe_pfn[i], 13037c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE | HAT_PLAT_NOCACHE, 13047c478bd9Sstevel@tonic-gate HAT_LOAD_NOCONSIST); 13057c478bd9Sstevel@tonic-gate } 13067c478bd9Sstevel@tonic-gate 13077c478bd9Sstevel@tonic-gate /* 13087c478bd9Sstevel@tonic-gate * Probe from current CPU 13097c478bd9Sstevel@tonic-gate */ 13107c478bd9Sstevel@tonic-gate lgrp_plat_probe(); 13117c478bd9Sstevel@tonic-gate } 13127c478bd9Sstevel@tonic-gate 13137c478bd9Sstevel@tonic-gate /* 13147c478bd9Sstevel@tonic-gate * Allocate additional space for an lgroup. 13157c478bd9Sstevel@tonic-gate */ 13167c478bd9Sstevel@tonic-gate /* ARGSUSED */ 13177c478bd9Sstevel@tonic-gate lgrp_t * 13187c478bd9Sstevel@tonic-gate lgrp_plat_alloc(lgrp_id_t lgrpid) 13197c478bd9Sstevel@tonic-gate { 13207c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 13217c478bd9Sstevel@tonic-gate 13227c478bd9Sstevel@tonic-gate lgrp = &lgrp_space[nlgrps_alloc++]; 13237c478bd9Sstevel@tonic-gate if (lgrpid >= NLGRP || nlgrps_alloc > NLGRP) 13247c478bd9Sstevel@tonic-gate return (NULL); 13257c478bd9Sstevel@tonic-gate return (lgrp); 13267c478bd9Sstevel@tonic-gate } 13277c478bd9Sstevel@tonic-gate 13287c478bd9Sstevel@tonic-gate /* 13297c478bd9Sstevel@tonic-gate * Platform handling for (re)configuration changes 13307c478bd9Sstevel@tonic-gate */ 13317c478bd9Sstevel@tonic-gate /* ARGSUSED */ 13327c478bd9Sstevel@tonic-gate void 13337c478bd9Sstevel@tonic-gate lgrp_plat_config(lgrp_config_flag_t flag, uintptr_t arg) 13347c478bd9Sstevel@tonic-gate { 13357c478bd9Sstevel@tonic-gate } 13367c478bd9Sstevel@tonic-gate 13377c478bd9Sstevel@tonic-gate /* 13387c478bd9Sstevel@tonic-gate * Return the platform handle for the lgroup containing the given CPU 13397c478bd9Sstevel@tonic-gate */ 13407c478bd9Sstevel@tonic-gate /* ARGSUSED */ 13417c478bd9Sstevel@tonic-gate lgrp_handle_t 13427c478bd9Sstevel@tonic-gate lgrp_plat_cpu_to_hand(processorid_t id) 13437c478bd9Sstevel@tonic-gate { 13447c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt == 1) 13457c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 13467c478bd9Sstevel@tonic-gate 13477c478bd9Sstevel@tonic-gate return ((lgrp_handle_t)LGRP_PLAT_CPU_TO_NODE(cpu[id])); 13487c478bd9Sstevel@tonic-gate } 13497c478bd9Sstevel@tonic-gate 13507c478bd9Sstevel@tonic-gate /* 13517c478bd9Sstevel@tonic-gate * Return the platform handle of the lgroup that contains the physical memory 13527c478bd9Sstevel@tonic-gate * corresponding to the given page frame number 13537c478bd9Sstevel@tonic-gate */ 13547c478bd9Sstevel@tonic-gate /* ARGSUSED */ 13557c478bd9Sstevel@tonic-gate lgrp_handle_t 13567c478bd9Sstevel@tonic-gate lgrp_plat_pfn_to_hand(pfn_t pfn) 13577c478bd9Sstevel@tonic-gate { 13587c478bd9Sstevel@tonic-gate int mnode; 13597c478bd9Sstevel@tonic-gate 13607c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 13617c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 13627c478bd9Sstevel@tonic-gate 13637c478bd9Sstevel@tonic-gate mnode = plat_pfn_to_mem_node(pfn); 13647c478bd9Sstevel@tonic-gate return (MEM_NODE_2_LGRPHAND(mnode)); 13657c478bd9Sstevel@tonic-gate } 13667c478bd9Sstevel@tonic-gate 13677c478bd9Sstevel@tonic-gate /* 13687c478bd9Sstevel@tonic-gate * Return the maximum number of lgrps supported by the platform. 13697c478bd9Sstevel@tonic-gate * Before lgrp topology is known it returns an estimate based on the number of 13707c478bd9Sstevel@tonic-gate * nodes. Once topology is known it returns the actual maximim number of lgrps 13717c478bd9Sstevel@tonic-gate * created. Since x86 doesn't support dynamic addition of new nodes, this number 13727c478bd9Sstevel@tonic-gate * may not grow during system lifetime. 13737c478bd9Sstevel@tonic-gate */ 13747c478bd9Sstevel@tonic-gate int 13757c478bd9Sstevel@tonic-gate lgrp_plat_max_lgrps() 13767c478bd9Sstevel@tonic-gate { 13777c478bd9Sstevel@tonic-gate return (lgrp_topo_initialized ? 13787c478bd9Sstevel@tonic-gate lgrp_alloc_max + 1 : 13797c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt * (lgrp_plat_node_cnt - 1) + 1); 13807c478bd9Sstevel@tonic-gate } 13817c478bd9Sstevel@tonic-gate 13827c478bd9Sstevel@tonic-gate /* 13837c478bd9Sstevel@tonic-gate * Return the number of free, allocatable, or installed 13847c478bd9Sstevel@tonic-gate * pages in an lgroup 13857c478bd9Sstevel@tonic-gate * This is a copy of the MAX_MEM_NODES == 1 version of the routine 13867c478bd9Sstevel@tonic-gate * used when MPO is disabled (i.e. single lgroup) or this is the root lgroup 13877c478bd9Sstevel@tonic-gate */ 13887c478bd9Sstevel@tonic-gate /* ARGSUSED */ 13897c478bd9Sstevel@tonic-gate static pgcnt_t 13907c478bd9Sstevel@tonic-gate lgrp_plat_mem_size_default(lgrp_handle_t lgrphand, lgrp_mem_query_t query) 13917c478bd9Sstevel@tonic-gate { 13927c478bd9Sstevel@tonic-gate struct memlist *mlist; 13937c478bd9Sstevel@tonic-gate pgcnt_t npgs = 0; 13947c478bd9Sstevel@tonic-gate extern struct memlist *phys_avail; 13957c478bd9Sstevel@tonic-gate extern struct memlist *phys_install; 13967c478bd9Sstevel@tonic-gate 13977c478bd9Sstevel@tonic-gate switch (query) { 13987c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_FREE: 13997c478bd9Sstevel@tonic-gate return ((pgcnt_t)freemem); 14007c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_AVAIL: 14017c478bd9Sstevel@tonic-gate memlist_read_lock(); 14027c478bd9Sstevel@tonic-gate for (mlist = phys_avail; mlist; mlist = mlist->next) 14037c478bd9Sstevel@tonic-gate npgs += btop(mlist->size); 14047c478bd9Sstevel@tonic-gate memlist_read_unlock(); 14057c478bd9Sstevel@tonic-gate return (npgs); 14067c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_INSTALL: 14077c478bd9Sstevel@tonic-gate memlist_read_lock(); 14087c478bd9Sstevel@tonic-gate for (mlist = phys_install; mlist; mlist = mlist->next) 14097c478bd9Sstevel@tonic-gate npgs += btop(mlist->size); 14107c478bd9Sstevel@tonic-gate memlist_read_unlock(); 14117c478bd9Sstevel@tonic-gate return (npgs); 14127c478bd9Sstevel@tonic-gate default: 14137c478bd9Sstevel@tonic-gate return ((pgcnt_t)0); 14147c478bd9Sstevel@tonic-gate } 14157c478bd9Sstevel@tonic-gate } 14167c478bd9Sstevel@tonic-gate 14177c478bd9Sstevel@tonic-gate /* 14187c478bd9Sstevel@tonic-gate * Return the number of free pages in an lgroup. 14197c478bd9Sstevel@tonic-gate * 14207c478bd9Sstevel@tonic-gate * For query of LGRP_MEM_SIZE_FREE, return the number of base pagesize 14217c478bd9Sstevel@tonic-gate * pages on freelists. For query of LGRP_MEM_SIZE_AVAIL, return the 14227c478bd9Sstevel@tonic-gate * number of allocatable base pagesize pages corresponding to the 14237c478bd9Sstevel@tonic-gate * lgroup (e.g. do not include page_t's, BOP_ALLOC()'ed memory, ..) 14247c478bd9Sstevel@tonic-gate * For query of LGRP_MEM_SIZE_INSTALL, return the amount of physical 14257c478bd9Sstevel@tonic-gate * memory installed, regardless of whether or not it's usable. 14267c478bd9Sstevel@tonic-gate */ 14277c478bd9Sstevel@tonic-gate pgcnt_t 14287c478bd9Sstevel@tonic-gate lgrp_plat_mem_size(lgrp_handle_t plathand, lgrp_mem_query_t query) 14297c478bd9Sstevel@tonic-gate { 14307c478bd9Sstevel@tonic-gate int mnode; 14317c478bd9Sstevel@tonic-gate pgcnt_t npgs = (pgcnt_t)0; 14327c478bd9Sstevel@tonic-gate extern struct memlist *phys_avail; 14337c478bd9Sstevel@tonic-gate extern struct memlist *phys_install; 14347c478bd9Sstevel@tonic-gate 14357c478bd9Sstevel@tonic-gate 14367c478bd9Sstevel@tonic-gate if (plathand == LGRP_DEFAULT_HANDLE) 14377c478bd9Sstevel@tonic-gate return (lgrp_plat_mem_size_default(plathand, query)); 14387c478bd9Sstevel@tonic-gate 14397c478bd9Sstevel@tonic-gate if (plathand != LGRP_NULL_HANDLE) { 14407c478bd9Sstevel@tonic-gate mnode = plat_lgrphand_to_mem_node(plathand); 14417c478bd9Sstevel@tonic-gate if (mnode >= 0 && mem_node_config[mnode].exists) { 14427c478bd9Sstevel@tonic-gate switch (query) { 14437c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_FREE: 14447c478bd9Sstevel@tonic-gate npgs = mem_node_config[mnode].cursize; 14457c478bd9Sstevel@tonic-gate break; 14467c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_AVAIL: 14477c478bd9Sstevel@tonic-gate npgs = mem_node_memlist_pages(mnode, 14487c478bd9Sstevel@tonic-gate phys_avail); 14497c478bd9Sstevel@tonic-gate break; 14507c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_INSTALL: 14517c478bd9Sstevel@tonic-gate npgs = mem_node_memlist_pages(mnode, 14527c478bd9Sstevel@tonic-gate phys_install); 14537c478bd9Sstevel@tonic-gate break; 14547c478bd9Sstevel@tonic-gate default: 14557c478bd9Sstevel@tonic-gate break; 14567c478bd9Sstevel@tonic-gate } 14577c478bd9Sstevel@tonic-gate } 14587c478bd9Sstevel@tonic-gate } 14597c478bd9Sstevel@tonic-gate return (npgs); 14607c478bd9Sstevel@tonic-gate } 14617c478bd9Sstevel@tonic-gate 14627c478bd9Sstevel@tonic-gate /* 14637c478bd9Sstevel@tonic-gate * Return latency between "from" and "to" lgroups 14647c478bd9Sstevel@tonic-gate * 14657c478bd9Sstevel@tonic-gate * This latency number can only be used for relative comparison 14667c478bd9Sstevel@tonic-gate * between lgroups on the running system, cannot be used across platforms, 14677c478bd9Sstevel@tonic-gate * and may not reflect the actual latency. It is platform and implementation 14687c478bd9Sstevel@tonic-gate * specific, so platform gets to decide its value. It would be nice if the 14697c478bd9Sstevel@tonic-gate * number was at least proportional to make comparisons more meaningful though. 14707c478bd9Sstevel@tonic-gate */ 14717c478bd9Sstevel@tonic-gate /* ARGSUSED */ 14727c478bd9Sstevel@tonic-gate int 14737c478bd9Sstevel@tonic-gate lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to) 14747c478bd9Sstevel@tonic-gate { 14757c478bd9Sstevel@tonic-gate lgrp_handle_t src, dest; 14767c478bd9Sstevel@tonic-gate 14777c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 14787c478bd9Sstevel@tonic-gate return (0); 14797c478bd9Sstevel@tonic-gate 14807c478bd9Sstevel@tonic-gate /* 14817c478bd9Sstevel@tonic-gate * Return max latency for root lgroup 14827c478bd9Sstevel@tonic-gate */ 14837c478bd9Sstevel@tonic-gate if (from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE) 14847c478bd9Sstevel@tonic-gate return (lgrp_plat_probe_time_max); 14857c478bd9Sstevel@tonic-gate 14867c478bd9Sstevel@tonic-gate src = from; 14877c478bd9Sstevel@tonic-gate dest = to; 14887c478bd9Sstevel@tonic-gate 14897c478bd9Sstevel@tonic-gate /* 14907c478bd9Sstevel@tonic-gate * Return 0 for nodes (lgroup platform handles) out of range 14917c478bd9Sstevel@tonic-gate */ 14927c478bd9Sstevel@tonic-gate if (src < 0 || src >= MAX_NODES || dest < 0 || dest >= MAX_NODES) 14937c478bd9Sstevel@tonic-gate return (0); 14947c478bd9Sstevel@tonic-gate 14957c478bd9Sstevel@tonic-gate /* 14967c478bd9Sstevel@tonic-gate * Probe from current CPU if its lgroup latencies haven't been set yet 14977c478bd9Sstevel@tonic-gate * and we are trying to get latency from current CPU to some node 14987c478bd9Sstevel@tonic-gate */ 14997c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[src][src] == 0 && 15007c478bd9Sstevel@tonic-gate LGRP_PLAT_CPU_TO_NODE(CPU) == src) 15017c478bd9Sstevel@tonic-gate lgrp_plat_probe(); 15027c478bd9Sstevel@tonic-gate 15037c478bd9Sstevel@tonic-gate return (lgrp_plat_probe_times[src][dest]); 15047c478bd9Sstevel@tonic-gate } 15057c478bd9Sstevel@tonic-gate 15067c478bd9Sstevel@tonic-gate /* 15077c478bd9Sstevel@tonic-gate * Return platform handle for root lgroup 15087c478bd9Sstevel@tonic-gate */ 15097c478bd9Sstevel@tonic-gate lgrp_handle_t 15107c478bd9Sstevel@tonic-gate lgrp_plat_root_hand(void) 15117c478bd9Sstevel@tonic-gate { 15127c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 15137c478bd9Sstevel@tonic-gate } 1514