17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5c39996a7Sstevel * Common Development and Distribution License (the "License"). 6c39996a7Sstevel * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 21c39996a7Sstevel 227c478bd9Sstevel@tonic-gate /* 23*fb2f18f8Sesaxe * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> /* for {in,out}{b,w,l}() */ 317c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 327c478bd9Sstevel@tonic-gate #include <sys/cpupart.h> 337c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 347c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 357c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 367c478bd9Sstevel@tonic-gate #include <sys/memlist.h> 377c478bd9Sstevel@tonic-gate #include <sys/memnode.h> 387c478bd9Sstevel@tonic-gate #include <sys/mman.h> 39ef50d8c0Sesaxe #include <sys/pci_cfgspace.h> 40ef50d8c0Sesaxe #include <sys/pci_impl.h> 417c478bd9Sstevel@tonic-gate #include <sys/param.h> 42*fb2f18f8Sesaxe #include <sys/pghw.h> 437c478bd9Sstevel@tonic-gate #include <sys/promif.h> /* for prom_printf() */ 447c478bd9Sstevel@tonic-gate #include <sys/systm.h> 457c478bd9Sstevel@tonic-gate #include <sys/thread.h> 467c478bd9Sstevel@tonic-gate #include <sys/types.h> 477c478bd9Sstevel@tonic-gate #include <sys/var.h> 487c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h> /* for x86_feature and X86_AMD */ 497c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h> 507c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 51affbd3ccSkchow #include <vm/vm_dep.h> 527c478bd9Sstevel@tonic-gate 537c478bd9Sstevel@tonic-gate 547c478bd9Sstevel@tonic-gate /* 557c478bd9Sstevel@tonic-gate * lgroup platform support for x86 platforms. 567c478bd9Sstevel@tonic-gate */ 577c478bd9Sstevel@tonic-gate 587c478bd9Sstevel@tonic-gate #define MAX_NODES 8 597c478bd9Sstevel@tonic-gate #define NLGRP (MAX_NODES * (MAX_NODES - 1) + 1) 607c478bd9Sstevel@tonic-gate 61*fb2f18f8Sesaxe #define LGRP_PLAT_CPU_TO_NODE(cpu) (pg_plat_hw_instance_id(cpu, PGHW_CHIP)) 627c478bd9Sstevel@tonic-gate 637c478bd9Sstevel@tonic-gate #define LGRP_PLAT_PROBE_NROUNDS 64 /* default laps for probing */ 647c478bd9Sstevel@tonic-gate #define LGRP_PLAT_PROBE_NSAMPLES 1 /* default samples to take */ 658949bcd6Sandrei #define LGRP_PLAT_PROBE_NREADS 256 /* number of vendor ID reads */ 667c478bd9Sstevel@tonic-gate 677c478bd9Sstevel@tonic-gate /* 687c478bd9Sstevel@tonic-gate * Multiprocessor Opteron machines have Non Uniform Memory Access (NUMA). 697c478bd9Sstevel@tonic-gate * 707c478bd9Sstevel@tonic-gate * Until System Affinity Resource Table (SRAT) becomes part of ACPI standard, 717c478bd9Sstevel@tonic-gate * we need to examine registers in PCI configuration space to determine how 727c478bd9Sstevel@tonic-gate * many nodes are in the system and which CPUs and memory are in each node. 737c478bd9Sstevel@tonic-gate * This could be determined by probing all memory from each CPU, but that is 747c478bd9Sstevel@tonic-gate * too expensive to do while booting the kernel. 757c478bd9Sstevel@tonic-gate * 767c478bd9Sstevel@tonic-gate * NOTE: Using these PCI configuration space registers to determine this 777c478bd9Sstevel@tonic-gate * locality info is Opteron K8 specific and not guaranteed to work on 787c478bd9Sstevel@tonic-gate * the next generation Opteron processor. Furthermore, we assume that 797c478bd9Sstevel@tonic-gate * there is one CPU per node and CPU 0 is in node 0, CPU 1 is in node 1, 807c478bd9Sstevel@tonic-gate * etc. which should be true for Opteron K8.... 817c478bd9Sstevel@tonic-gate */ 827c478bd9Sstevel@tonic-gate 837c478bd9Sstevel@tonic-gate /* 847c478bd9Sstevel@tonic-gate * Opteron DRAM Address Map in PCI configuration space gives base and limit 857c478bd9Sstevel@tonic-gate * of physical memory in each node for Opteron K8. The following constants 867c478bd9Sstevel@tonic-gate * and macros define their contents, structure, and access. 877c478bd9Sstevel@tonic-gate */ 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate /* 907c478bd9Sstevel@tonic-gate * How many bits to shift Opteron DRAM Address Map base and limit registers 917c478bd9Sstevel@tonic-gate * to get actual value 927c478bd9Sstevel@tonic-gate */ 937c478bd9Sstevel@tonic-gate #define OPT_DRAMADDR_LSHIFT_ADDR 8 /* shift left for address */ 947c478bd9Sstevel@tonic-gate 957c478bd9Sstevel@tonic-gate #define OPT_DRAMADDR_MASK_OFF 0xFFFFFF /* offset for address */ 967c478bd9Sstevel@tonic-gate 977c478bd9Sstevel@tonic-gate /* 987c478bd9Sstevel@tonic-gate * Bit masks defining what's in Opteron DRAM Address Map base register 997c478bd9Sstevel@tonic-gate */ 1007c478bd9Sstevel@tonic-gate #define OPT_DRAMBASE_MASK_RE 0x1 /* read enable */ 1017c478bd9Sstevel@tonic-gate #define OPT_DRAMBASE_MASK_WE 0x2 /* write enable */ 1027c478bd9Sstevel@tonic-gate #define OPT_DRAMBASE_MASK_INTRLVEN 0x700 /* interleave */ 1037c478bd9Sstevel@tonic-gate 1047c478bd9Sstevel@tonic-gate #define OPT_DRAMBASE_MASK_ADDR 0xFFFF0000 /* address bits 39-24 */ 1057c478bd9Sstevel@tonic-gate 1067c478bd9Sstevel@tonic-gate /* 1077c478bd9Sstevel@tonic-gate * Macros to get values from Opteron DRAM Address Map base register 1087c478bd9Sstevel@tonic-gate */ 1097c478bd9Sstevel@tonic-gate #define OPT_DRAMBASE(reg) \ 1107c478bd9Sstevel@tonic-gate (((u_longlong_t)reg & OPT_DRAMBASE_MASK_ADDR) << \ 1117c478bd9Sstevel@tonic-gate OPT_DRAMADDR_LSHIFT_ADDR) 1127c478bd9Sstevel@tonic-gate 1137c478bd9Sstevel@tonic-gate 1147c478bd9Sstevel@tonic-gate /* 1157c478bd9Sstevel@tonic-gate * Bit masks defining what's in Opteron DRAM Address Map limit register 1167c478bd9Sstevel@tonic-gate */ 1177c478bd9Sstevel@tonic-gate #define OPT_DRAMLIMIT_MASK_DSTNODE 0x7 /* destination node */ 1187c478bd9Sstevel@tonic-gate #define OPT_DRAMLIMIT_MASK_INTRLVSEL 0x70 /* interleave select */ 1197c478bd9Sstevel@tonic-gate #define OPT_DRAMLIMIT_MASK_ADDR 0xFFFF0000 /* addr bits 39-24 */ 1207c478bd9Sstevel@tonic-gate 1217c478bd9Sstevel@tonic-gate /* 1227c478bd9Sstevel@tonic-gate * Macros to get values from Opteron DRAM Address Map limit register 1237c478bd9Sstevel@tonic-gate */ 1247c478bd9Sstevel@tonic-gate #define OPT_DRAMLIMIT(reg) \ 1257c478bd9Sstevel@tonic-gate (((u_longlong_t)reg & OPT_DRAMLIMIT_MASK_ADDR) << \ 1267c478bd9Sstevel@tonic-gate OPT_DRAMADDR_LSHIFT_ADDR) 1277c478bd9Sstevel@tonic-gate 1287c478bd9Sstevel@tonic-gate 1297c478bd9Sstevel@tonic-gate /* 1307c478bd9Sstevel@tonic-gate * Opteron Node ID register in PCI configuration space contains 1317c478bd9Sstevel@tonic-gate * number of nodes in system, etc. for Opteron K8. The following 1327c478bd9Sstevel@tonic-gate * constants and macros define its contents, structure, and access. 1337c478bd9Sstevel@tonic-gate */ 1347c478bd9Sstevel@tonic-gate 1357c478bd9Sstevel@tonic-gate /* 1367c478bd9Sstevel@tonic-gate * Bit masks defining what's in Opteron Node ID register 1377c478bd9Sstevel@tonic-gate */ 1387c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_ID 0x7 /* node ID */ 1397c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_CNT 0x70 /* node count */ 1407c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_IONODE 0x700 /* Hypertransport I/O hub node ID */ 1417c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_LCKNODE 0x7000 /* lock controller node ID */ 1427c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_CPUCNT 0xF0000 /* CPUs in system (0 means 1 CPU) */ 1437c478bd9Sstevel@tonic-gate 1447c478bd9Sstevel@tonic-gate /* 1457c478bd9Sstevel@tonic-gate * How many bits in Opteron Node ID register to shift right to get actual value 1467c478bd9Sstevel@tonic-gate */ 1477c478bd9Sstevel@tonic-gate #define OPT_NODE_RSHIFT_CNT 0x4 /* shift right for node count value */ 1487c478bd9Sstevel@tonic-gate 1497c478bd9Sstevel@tonic-gate /* 1507c478bd9Sstevel@tonic-gate * Macros to get values from Opteron Node ID register 1517c478bd9Sstevel@tonic-gate */ 1527c478bd9Sstevel@tonic-gate #define OPT_NODE_CNT(reg) \ 1537c478bd9Sstevel@tonic-gate ((reg & OPT_NODE_MASK_CNT) >> OPT_NODE_RSHIFT_CNT) 1547c478bd9Sstevel@tonic-gate 1557c478bd9Sstevel@tonic-gate 1567c478bd9Sstevel@tonic-gate /* 1577c478bd9Sstevel@tonic-gate * PCI configuration space registers accessed by specifying 1587c478bd9Sstevel@tonic-gate * a bus, device, function, and offset. The following constants 1597c478bd9Sstevel@tonic-gate * define the values needed to access Opteron K8 configuration 1607c478bd9Sstevel@tonic-gate * info to determine its node topology 1617c478bd9Sstevel@tonic-gate */ 1627c478bd9Sstevel@tonic-gate 1637c478bd9Sstevel@tonic-gate #define OPT_PCS_BUS_CONFIG 0 /* Hypertransport config space bus */ 1647c478bd9Sstevel@tonic-gate 1657c478bd9Sstevel@tonic-gate /* 1667c478bd9Sstevel@tonic-gate * Opteron PCI configuration space register function values 1677c478bd9Sstevel@tonic-gate */ 1687c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_HT 0 /* Hypertransport configuration */ 1697c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_ADDRMAP 1 /* Address map configuration */ 1707c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_DRAM 2 /* DRAM configuration */ 1717c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_MISC 3 /* Miscellaneous configuration */ 1727c478bd9Sstevel@tonic-gate 1737c478bd9Sstevel@tonic-gate /* 1747c478bd9Sstevel@tonic-gate * PCI Configuration Space register offsets 1757c478bd9Sstevel@tonic-gate */ 1767c478bd9Sstevel@tonic-gate #define OPT_PCS_OFF_VENDOR 0x0 /* device/vendor ID register */ 1777c478bd9Sstevel@tonic-gate #define OPT_PCS_OFF_DRAMBASE 0x40 /* DRAM Base register (node 0) */ 1787c478bd9Sstevel@tonic-gate #define OPT_PCS_OFF_NODEID 0x60 /* Node ID register */ 1797c478bd9Sstevel@tonic-gate 1807c478bd9Sstevel@tonic-gate /* 1817c478bd9Sstevel@tonic-gate * Opteron PCI Configuration Space device IDs for nodes 1827c478bd9Sstevel@tonic-gate */ 1837c478bd9Sstevel@tonic-gate #define OPT_PCS_DEV_NODE0 24 /* device number for node 0 */ 1847c478bd9Sstevel@tonic-gate 1857c478bd9Sstevel@tonic-gate 1867c478bd9Sstevel@tonic-gate /* 1877c478bd9Sstevel@tonic-gate * Bookkeeping for latencies seen during probing (used for verification) 1887c478bd9Sstevel@tonic-gate */ 1897c478bd9Sstevel@tonic-gate typedef struct lgrp_plat_latency_acct { 1907c478bd9Sstevel@tonic-gate hrtime_t la_value; /* latency value */ 1917c478bd9Sstevel@tonic-gate int la_count; /* occurrences */ 1927c478bd9Sstevel@tonic-gate } lgrp_plat_latency_acct_t; 1937c478bd9Sstevel@tonic-gate 1947c478bd9Sstevel@tonic-gate 1957c478bd9Sstevel@tonic-gate /* 1967c478bd9Sstevel@tonic-gate * Choices for probing to determine lgroup topology 1977c478bd9Sstevel@tonic-gate */ 1987c478bd9Sstevel@tonic-gate typedef enum lgrp_plat_probe_op { 1997c478bd9Sstevel@tonic-gate LGRP_PLAT_PROBE_PGCPY, /* Use page copy */ 2007c478bd9Sstevel@tonic-gate LGRP_PLAT_PROBE_VENDOR /* Read vendor ID on Northbridge */ 2017c478bd9Sstevel@tonic-gate } lgrp_plat_probe_op_t; 2027c478bd9Sstevel@tonic-gate 2037c478bd9Sstevel@tonic-gate 2047c478bd9Sstevel@tonic-gate /* 2057c478bd9Sstevel@tonic-gate * Opteron DRAM address map gives base and limit for physical memory in a node 2067c478bd9Sstevel@tonic-gate */ 2077c478bd9Sstevel@tonic-gate typedef struct opt_dram_addr_map { 2087c478bd9Sstevel@tonic-gate uint32_t base; 2097c478bd9Sstevel@tonic-gate uint32_t limit; 2107c478bd9Sstevel@tonic-gate } opt_dram_addr_map_t; 2117c478bd9Sstevel@tonic-gate 2127c478bd9Sstevel@tonic-gate 2137c478bd9Sstevel@tonic-gate /* 2147c478bd9Sstevel@tonic-gate * Starting and ending page for physical memory in node 2157c478bd9Sstevel@tonic-gate */ 2167c478bd9Sstevel@tonic-gate typedef struct phys_addr_map { 2177c478bd9Sstevel@tonic-gate pfn_t start; 2187c478bd9Sstevel@tonic-gate pfn_t end; 219a940d195Sjjc int exists; 2207c478bd9Sstevel@tonic-gate } phys_addr_map_t; 2217c478bd9Sstevel@tonic-gate 2227c478bd9Sstevel@tonic-gate 2237c478bd9Sstevel@tonic-gate /* 2247c478bd9Sstevel@tonic-gate * Opteron DRAM address map for each node 2257c478bd9Sstevel@tonic-gate */ 2267c478bd9Sstevel@tonic-gate struct opt_dram_addr_map opt_dram_map[MAX_NODES]; 2277c478bd9Sstevel@tonic-gate 2287c478bd9Sstevel@tonic-gate /* 2297c478bd9Sstevel@tonic-gate * Node ID register contents for each node 2307c478bd9Sstevel@tonic-gate */ 2317c478bd9Sstevel@tonic-gate uint_t opt_node_info[MAX_NODES]; 2327c478bd9Sstevel@tonic-gate 2337c478bd9Sstevel@tonic-gate /* 2347c478bd9Sstevel@tonic-gate * Whether memory is interleaved across nodes causing MPO to be disabled 2357c478bd9Sstevel@tonic-gate */ 2367c478bd9Sstevel@tonic-gate int lgrp_plat_mem_intrlv = 0; 2377c478bd9Sstevel@tonic-gate 2387c478bd9Sstevel@tonic-gate /* 2397c478bd9Sstevel@tonic-gate * Number of nodes in system 2407c478bd9Sstevel@tonic-gate */ 2417c478bd9Sstevel@tonic-gate uint_t lgrp_plat_node_cnt = 1; 2427c478bd9Sstevel@tonic-gate 2437c478bd9Sstevel@tonic-gate /* 2447c478bd9Sstevel@tonic-gate * Physical address range for memory in each node 2457c478bd9Sstevel@tonic-gate */ 2467c478bd9Sstevel@tonic-gate phys_addr_map_t lgrp_plat_node_memory[MAX_NODES]; 2477c478bd9Sstevel@tonic-gate 2487c478bd9Sstevel@tonic-gate /* 2497c478bd9Sstevel@tonic-gate * Probe costs (individual and total) and flush cost 2507c478bd9Sstevel@tonic-gate */ 2517c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_flush_cost = 0; 2527c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_cost = 0; 2537c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_cost_total = 0; 2547c478bd9Sstevel@tonic-gate 2557c478bd9Sstevel@tonic-gate /* 2567c478bd9Sstevel@tonic-gate * Error code for latency adjustment and verification 2577c478bd9Sstevel@tonic-gate */ 2587c478bd9Sstevel@tonic-gate int lgrp_plat_probe_error_code = 0; 2597c478bd9Sstevel@tonic-gate 2607c478bd9Sstevel@tonic-gate /* 2617c478bd9Sstevel@tonic-gate * How much latencies were off from minimum values gotten 2627c478bd9Sstevel@tonic-gate */ 2637c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_errors[MAX_NODES][MAX_NODES]; 2647c478bd9Sstevel@tonic-gate 2657c478bd9Sstevel@tonic-gate /* 2667c478bd9Sstevel@tonic-gate * Unique probe latencies and number of occurrences of each 2677c478bd9Sstevel@tonic-gate */ 2687c478bd9Sstevel@tonic-gate lgrp_plat_latency_acct_t lgrp_plat_probe_lat_acct[MAX_NODES]; 2697c478bd9Sstevel@tonic-gate 2707c478bd9Sstevel@tonic-gate /* 2717c478bd9Sstevel@tonic-gate * Size of memory buffer in each node for probing 2727c478bd9Sstevel@tonic-gate */ 2737c478bd9Sstevel@tonic-gate size_t lgrp_plat_probe_memsize = 0; 2747c478bd9Sstevel@tonic-gate 2757c478bd9Sstevel@tonic-gate /* 2767c478bd9Sstevel@tonic-gate * Virtual address of page in each node for probing 2777c478bd9Sstevel@tonic-gate */ 2787c478bd9Sstevel@tonic-gate caddr_t lgrp_plat_probe_memory[MAX_NODES]; 2797c478bd9Sstevel@tonic-gate 2807c478bd9Sstevel@tonic-gate /* 2817c478bd9Sstevel@tonic-gate * Number of unique latencies in probe times 2827c478bd9Sstevel@tonic-gate */ 2837c478bd9Sstevel@tonic-gate int lgrp_plat_probe_nlatencies = 0; 2847c478bd9Sstevel@tonic-gate 2857c478bd9Sstevel@tonic-gate /* 2867c478bd9Sstevel@tonic-gate * How many rounds of probing to do 2877c478bd9Sstevel@tonic-gate */ 2887c478bd9Sstevel@tonic-gate int lgrp_plat_probe_nrounds = LGRP_PLAT_PROBE_NROUNDS; 2897c478bd9Sstevel@tonic-gate 2907c478bd9Sstevel@tonic-gate /* 2917c478bd9Sstevel@tonic-gate * Number of samples to take when probing each node 2927c478bd9Sstevel@tonic-gate */ 2937c478bd9Sstevel@tonic-gate int lgrp_plat_probe_nsamples = LGRP_PLAT_PROBE_NSAMPLES; 2947c478bd9Sstevel@tonic-gate 2957c478bd9Sstevel@tonic-gate /* 2968949bcd6Sandrei * Number of times to read vendor ID from Northbridge for each probe. 2978949bcd6Sandrei */ 2988949bcd6Sandrei int lgrp_plat_probe_nreads = LGRP_PLAT_PROBE_NREADS; 2998949bcd6Sandrei 3008949bcd6Sandrei /* 3017c478bd9Sstevel@tonic-gate * How to probe to determine lgroup topology 3027c478bd9Sstevel@tonic-gate */ 3037c478bd9Sstevel@tonic-gate lgrp_plat_probe_op_t lgrp_plat_probe_op = LGRP_PLAT_PROBE_VENDOR; 3047c478bd9Sstevel@tonic-gate 3057c478bd9Sstevel@tonic-gate /* 3067c478bd9Sstevel@tonic-gate * PFN of page in each node for probing 3077c478bd9Sstevel@tonic-gate */ 3087c478bd9Sstevel@tonic-gate pfn_t lgrp_plat_probe_pfn[MAX_NODES]; 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate /* 3117c478bd9Sstevel@tonic-gate * Whether probe time was suspect (ie. not within tolerance of value that it 3127c478bd9Sstevel@tonic-gate * should match) 3137c478bd9Sstevel@tonic-gate */ 3147c478bd9Sstevel@tonic-gate int lgrp_plat_probe_suspect[MAX_NODES][MAX_NODES]; 3157c478bd9Sstevel@tonic-gate 3167c478bd9Sstevel@tonic-gate /* 3177c478bd9Sstevel@tonic-gate * How long it takes to access memory from each node 3187c478bd9Sstevel@tonic-gate */ 3197c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_times[MAX_NODES][MAX_NODES]; 3207c478bd9Sstevel@tonic-gate 3217c478bd9Sstevel@tonic-gate /* 3227c478bd9Sstevel@tonic-gate * Min and max node memory probe times seen 3237c478bd9Sstevel@tonic-gate */ 3247c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_time_max = 0; 3257c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_time_min = -1; 3267c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_max[MAX_NODES][MAX_NODES]; 3277c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_min[MAX_NODES][MAX_NODES]; 3287c478bd9Sstevel@tonic-gate 3297c478bd9Sstevel@tonic-gate 3307c478bd9Sstevel@tonic-gate /* 3317c478bd9Sstevel@tonic-gate * Allocate lgrp and lgrp stat arrays statically. 3327c478bd9Sstevel@tonic-gate */ 3337c478bd9Sstevel@tonic-gate static lgrp_t lgrp_space[NLGRP]; 3347c478bd9Sstevel@tonic-gate static int nlgrps_alloc; 3357c478bd9Sstevel@tonic-gate 3367c478bd9Sstevel@tonic-gate struct lgrp_stats lgrp_stats[NLGRP]; 3377c478bd9Sstevel@tonic-gate 3387c478bd9Sstevel@tonic-gate #define CPUID_FAMILY_OPTERON 15 3397c478bd9Sstevel@tonic-gate 3407c478bd9Sstevel@tonic-gate uint_t opt_family = 0; 3417c478bd9Sstevel@tonic-gate uint_t opt_model = 0; 3427c478bd9Sstevel@tonic-gate uint_t opt_probe_func = OPT_PCS_FUNC_DRAM; 3437c478bd9Sstevel@tonic-gate 3447c478bd9Sstevel@tonic-gate 3457c478bd9Sstevel@tonic-gate /* 3467c478bd9Sstevel@tonic-gate * Determine whether we're running on an AMD Opteron K8 machine 3477c478bd9Sstevel@tonic-gate */ 3487c478bd9Sstevel@tonic-gate int 3497c478bd9Sstevel@tonic-gate is_opteron(void) 3507c478bd9Sstevel@tonic-gate { 3517c478bd9Sstevel@tonic-gate if (x86_vendor != X86_VENDOR_AMD) 3527c478bd9Sstevel@tonic-gate return (0); 3537c478bd9Sstevel@tonic-gate 3547c478bd9Sstevel@tonic-gate if (cpuid_getfamily(CPU) == CPUID_FAMILY_OPTERON) 3557c478bd9Sstevel@tonic-gate return (1); 3567c478bd9Sstevel@tonic-gate else 3577c478bd9Sstevel@tonic-gate return (0); 3587c478bd9Sstevel@tonic-gate } 3597c478bd9Sstevel@tonic-gate 3607c478bd9Sstevel@tonic-gate int 3617c478bd9Sstevel@tonic-gate plat_lgrphand_to_mem_node(lgrp_handle_t hand) 3627c478bd9Sstevel@tonic-gate { 3637c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 3647c478bd9Sstevel@tonic-gate return (0); 3657c478bd9Sstevel@tonic-gate 3667c478bd9Sstevel@tonic-gate return ((int)hand); 3677c478bd9Sstevel@tonic-gate } 3687c478bd9Sstevel@tonic-gate 3697c478bd9Sstevel@tonic-gate lgrp_handle_t 3707c478bd9Sstevel@tonic-gate plat_mem_node_to_lgrphand(int mnode) 3717c478bd9Sstevel@tonic-gate { 3727c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 3737c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 3747c478bd9Sstevel@tonic-gate 3757c478bd9Sstevel@tonic-gate return ((lgrp_handle_t)mnode); 3767c478bd9Sstevel@tonic-gate } 3777c478bd9Sstevel@tonic-gate 3787c478bd9Sstevel@tonic-gate int 3797c478bd9Sstevel@tonic-gate plat_pfn_to_mem_node(pfn_t pfn) 3807c478bd9Sstevel@tonic-gate { 3817c478bd9Sstevel@tonic-gate int node; 3827c478bd9Sstevel@tonic-gate 3837c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 3847c478bd9Sstevel@tonic-gate return (0); 3857c478bd9Sstevel@tonic-gate 3867c478bd9Sstevel@tonic-gate for (node = 0; node < lgrp_plat_node_cnt; node++) { 387a940d195Sjjc /* 388a940d195Sjjc * Skip nodes with no memory 389a940d195Sjjc */ 390a940d195Sjjc if (!lgrp_plat_node_memory[node].exists) 391a940d195Sjjc continue; 392a940d195Sjjc 3937c478bd9Sstevel@tonic-gate if (pfn >= lgrp_plat_node_memory[node].start && 3947c478bd9Sstevel@tonic-gate pfn <= lgrp_plat_node_memory[node].end) 3957c478bd9Sstevel@tonic-gate return (node); 3967c478bd9Sstevel@tonic-gate } 3977c478bd9Sstevel@tonic-gate 3987c478bd9Sstevel@tonic-gate ASSERT(node < lgrp_plat_node_cnt); 3997c478bd9Sstevel@tonic-gate return (-1); 4007c478bd9Sstevel@tonic-gate } 4017c478bd9Sstevel@tonic-gate 4027c478bd9Sstevel@tonic-gate /* 4037c478bd9Sstevel@tonic-gate * Configure memory nodes for machines with more than one node (ie NUMA) 4047c478bd9Sstevel@tonic-gate */ 4057c478bd9Sstevel@tonic-gate void 4067c478bd9Sstevel@tonic-gate plat_build_mem_nodes(struct memlist *list) 4077c478bd9Sstevel@tonic-gate { 408a940d195Sjjc pfn_t cur_start; /* start addr of subrange */ 409a940d195Sjjc pfn_t cur_end; /* end addr of subrange */ 410a940d195Sjjc pfn_t start; /* start addr of whole range */ 411a940d195Sjjc pfn_t end; /* end addr of whole range */ 4127c478bd9Sstevel@tonic-gate 4137c478bd9Sstevel@tonic-gate /* 4147c478bd9Sstevel@tonic-gate * Boot install lists are arranged <addr, len>, ... 4157c478bd9Sstevel@tonic-gate */ 4167c478bd9Sstevel@tonic-gate while (list) { 4177c478bd9Sstevel@tonic-gate int node; 4187c478bd9Sstevel@tonic-gate 4197c478bd9Sstevel@tonic-gate start = list->address >> PAGESHIFT; 4207c478bd9Sstevel@tonic-gate end = (list->address + list->size - 1) >> PAGESHIFT; 4217c478bd9Sstevel@tonic-gate 4227c478bd9Sstevel@tonic-gate if (start > physmax) { 4237c478bd9Sstevel@tonic-gate list = list->next; 4247c478bd9Sstevel@tonic-gate continue; 4257c478bd9Sstevel@tonic-gate } 4267c478bd9Sstevel@tonic-gate if (end > physmax) 4277c478bd9Sstevel@tonic-gate end = physmax; 4287c478bd9Sstevel@tonic-gate 4297c478bd9Sstevel@tonic-gate /* 4307c478bd9Sstevel@tonic-gate * When there is only one memnode, just add memory to memnode 4317c478bd9Sstevel@tonic-gate */ 4327c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) { 4337c478bd9Sstevel@tonic-gate mem_node_add_slice(start, end); 4347c478bd9Sstevel@tonic-gate list = list->next; 4357c478bd9Sstevel@tonic-gate continue; 4367c478bd9Sstevel@tonic-gate } 4377c478bd9Sstevel@tonic-gate 4387c478bd9Sstevel@tonic-gate /* 4397c478bd9Sstevel@tonic-gate * mem_node_add_slice() expects to get a memory range that 4407c478bd9Sstevel@tonic-gate * is within one memnode, so need to split any memory range 4417c478bd9Sstevel@tonic-gate * that spans multiple memnodes into subranges that are each 4427c478bd9Sstevel@tonic-gate * contained within one memnode when feeding them to 4437c478bd9Sstevel@tonic-gate * mem_node_add_slice() 4447c478bd9Sstevel@tonic-gate */ 4457c478bd9Sstevel@tonic-gate cur_start = start; 4467c478bd9Sstevel@tonic-gate do { 4477c478bd9Sstevel@tonic-gate node = plat_pfn_to_mem_node(cur_start); 4487c478bd9Sstevel@tonic-gate 449a940d195Sjjc /* 450a940d195Sjjc * Panic if DRAM address map registers or SRAT say 451a940d195Sjjc * memory in node doesn't exist or address from 452a940d195Sjjc * boot installed memory list entry isn't in this node. 453a940d195Sjjc * This shouldn't happen and rest of code can't deal 454a940d195Sjjc * with this if it does. 455a940d195Sjjc */ 456a940d195Sjjc if (node < 0 || node >= lgrp_plat_node_cnt || 457a940d195Sjjc !lgrp_plat_node_memory[node].exists || 458a940d195Sjjc cur_start < lgrp_plat_node_memory[node].start || 459a940d195Sjjc cur_start > lgrp_plat_node_memory[node].end) { 460a940d195Sjjc cmn_err(CE_PANIC, "Don't know which memnode " 461a940d195Sjjc "to add installed memory address 0x%lx\n", 462a940d195Sjjc cur_start); 463a940d195Sjjc } 4647c478bd9Sstevel@tonic-gate 4657c478bd9Sstevel@tonic-gate /* 4667c478bd9Sstevel@tonic-gate * End of current subrange should not span memnodes 4677c478bd9Sstevel@tonic-gate */ 468a940d195Sjjc cur_end = end; 469a940d195Sjjc if (lgrp_plat_node_memory[node].exists && 470a940d195Sjjc cur_end > lgrp_plat_node_memory[node].end) 4717c478bd9Sstevel@tonic-gate cur_end = lgrp_plat_node_memory[node].end; 4727c478bd9Sstevel@tonic-gate 4737c478bd9Sstevel@tonic-gate mem_node_add_slice(cur_start, cur_end); 4747c478bd9Sstevel@tonic-gate 4757c478bd9Sstevel@tonic-gate /* 4767c478bd9Sstevel@tonic-gate * Next subrange starts after end of current one 4777c478bd9Sstevel@tonic-gate */ 4787c478bd9Sstevel@tonic-gate cur_start = cur_end + 1; 4797c478bd9Sstevel@tonic-gate } while (cur_end < end); 4807c478bd9Sstevel@tonic-gate 4817c478bd9Sstevel@tonic-gate list = list->next; 4827c478bd9Sstevel@tonic-gate } 4837c478bd9Sstevel@tonic-gate mem_node_physalign = 0; 4847c478bd9Sstevel@tonic-gate mem_node_pfn_shift = 0; 4857c478bd9Sstevel@tonic-gate } 4867c478bd9Sstevel@tonic-gate 4877c478bd9Sstevel@tonic-gate 4887c478bd9Sstevel@tonic-gate /* 4897c478bd9Sstevel@tonic-gate * Platform-specific initialization of lgroups 4907c478bd9Sstevel@tonic-gate */ 4917c478bd9Sstevel@tonic-gate void 4927c478bd9Sstevel@tonic-gate lgrp_plat_init(void) 4937c478bd9Sstevel@tonic-gate { 4947c478bd9Sstevel@tonic-gate uint_t bus; 4957c478bd9Sstevel@tonic-gate uint_t dev; 4967c478bd9Sstevel@tonic-gate uint_t node; 4977c478bd9Sstevel@tonic-gate uint_t off; 4987c478bd9Sstevel@tonic-gate 4997c478bd9Sstevel@tonic-gate extern lgrp_load_t lgrp_expand_proc_thresh; 5007c478bd9Sstevel@tonic-gate extern lgrp_load_t lgrp_expand_proc_diff; 5017c478bd9Sstevel@tonic-gate 5027c478bd9Sstevel@tonic-gate /* 5037c478bd9Sstevel@tonic-gate * Initialize as a UMA machine if this isn't an Opteron 5047c478bd9Sstevel@tonic-gate */ 5057c478bd9Sstevel@tonic-gate if (!is_opteron() || lgrp_topo_ht_limit() == 1) { 5067c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt = max_mem_nodes = 1; 5077c478bd9Sstevel@tonic-gate return; 5087c478bd9Sstevel@tonic-gate } 5097c478bd9Sstevel@tonic-gate 5107c478bd9Sstevel@tonic-gate /* 5117c478bd9Sstevel@tonic-gate * Read configuration registers from PCI configuration space to 5127c478bd9Sstevel@tonic-gate * determine node information, which memory is in each node, etc. 5137c478bd9Sstevel@tonic-gate * 5147c478bd9Sstevel@tonic-gate * Write to PCI configuration space address register to specify 5157c478bd9Sstevel@tonic-gate * which configuration register to read and read/write PCI 5167c478bd9Sstevel@tonic-gate * configuration space data register to get/set contents 5177c478bd9Sstevel@tonic-gate */ 5187c478bd9Sstevel@tonic-gate bus = OPT_PCS_BUS_CONFIG; 5197c478bd9Sstevel@tonic-gate dev = OPT_PCS_DEV_NODE0; 5207c478bd9Sstevel@tonic-gate off = OPT_PCS_OFF_DRAMBASE; 5217c478bd9Sstevel@tonic-gate 5227c478bd9Sstevel@tonic-gate /* 5237c478bd9Sstevel@tonic-gate * Read node ID register for node 0 to get node count 5247c478bd9Sstevel@tonic-gate */ 525ef50d8c0Sesaxe opt_node_info[0] = pci_getl_func(bus, dev, OPT_PCS_FUNC_HT, 526ef50d8c0Sesaxe OPT_PCS_OFF_NODEID); 5277c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt = OPT_NODE_CNT(opt_node_info[0]) + 1; 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate for (node = 0; node < lgrp_plat_node_cnt; node++) { 5307c478bd9Sstevel@tonic-gate /* 5317c478bd9Sstevel@tonic-gate * Read node ID register (except for node 0 which we just read) 5327c478bd9Sstevel@tonic-gate */ 5337c478bd9Sstevel@tonic-gate if (node > 0) { 534ef50d8c0Sesaxe opt_node_info[node] = pci_getl_func(bus, dev, 535ef50d8c0Sesaxe OPT_PCS_FUNC_HT, OPT_PCS_OFF_NODEID); 5367c478bd9Sstevel@tonic-gate } 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate /* 5397c478bd9Sstevel@tonic-gate * Read DRAM base and limit registers which specify 5407c478bd9Sstevel@tonic-gate * physical memory range of each node 5417c478bd9Sstevel@tonic-gate */ 542ef50d8c0Sesaxe opt_dram_map[node].base = pci_getl_func(bus, dev, 543ef50d8c0Sesaxe OPT_PCS_FUNC_ADDRMAP, off); 5447c478bd9Sstevel@tonic-gate if (opt_dram_map[node].base & OPT_DRAMBASE_MASK_INTRLVEN) 5457c478bd9Sstevel@tonic-gate lgrp_plat_mem_intrlv++; 5467c478bd9Sstevel@tonic-gate 5477c478bd9Sstevel@tonic-gate off += 4; /* limit register offset */ 548ef50d8c0Sesaxe opt_dram_map[node].limit = pci_getl_func(bus, dev, 549ef50d8c0Sesaxe OPT_PCS_FUNC_ADDRMAP, off); 5507c478bd9Sstevel@tonic-gate 5517c478bd9Sstevel@tonic-gate /* 5527c478bd9Sstevel@tonic-gate * Increment device number to next node and register offset for 5537c478bd9Sstevel@tonic-gate * DRAM base register of next node 5547c478bd9Sstevel@tonic-gate */ 5557c478bd9Sstevel@tonic-gate off += 4; 5567c478bd9Sstevel@tonic-gate dev++; 5577c478bd9Sstevel@tonic-gate 5587c478bd9Sstevel@tonic-gate /* 559a940d195Sjjc * Both read and write enable bits must be enabled in DRAM 560a940d195Sjjc * address map base register for physical memory to exist in 561a940d195Sjjc * node 562a940d195Sjjc */ 563a940d195Sjjc if ((opt_dram_map[node].base & OPT_DRAMBASE_MASK_RE) == 0 || 564a940d195Sjjc (opt_dram_map[node].base & OPT_DRAMBASE_MASK_WE) == 0) { 565a940d195Sjjc /* 566a940d195Sjjc * Mark node memory as non-existent and set start and 567a940d195Sjjc * end addresses to be same in lgrp_plat_node_memory[] 568a940d195Sjjc */ 569a940d195Sjjc lgrp_plat_node_memory[node].exists = 0; 570a940d195Sjjc lgrp_plat_node_memory[node].start = 571a940d195Sjjc lgrp_plat_node_memory[node].end = (pfn_t)-1; 572a940d195Sjjc continue; 573a940d195Sjjc } 574a940d195Sjjc 575a940d195Sjjc /* 5767c478bd9Sstevel@tonic-gate * Get PFN for first page in each node, 5777c478bd9Sstevel@tonic-gate * so we can probe memory to determine latency topology 5787c478bd9Sstevel@tonic-gate */ 5797c478bd9Sstevel@tonic-gate lgrp_plat_probe_pfn[node] = 5807c478bd9Sstevel@tonic-gate btop(OPT_DRAMBASE(opt_dram_map[node].base)); 5817c478bd9Sstevel@tonic-gate 5827c478bd9Sstevel@tonic-gate /* 583a940d195Sjjc * Mark node memory as existing and remember physical address 584a940d195Sjjc * range of each node for use later 5857c478bd9Sstevel@tonic-gate */ 586a940d195Sjjc lgrp_plat_node_memory[node].exists = 1; 5877c478bd9Sstevel@tonic-gate lgrp_plat_node_memory[node].start = 5887c478bd9Sstevel@tonic-gate btop(OPT_DRAMBASE(opt_dram_map[node].base)); 5897c478bd9Sstevel@tonic-gate lgrp_plat_node_memory[node].end = 5907c478bd9Sstevel@tonic-gate btop(OPT_DRAMLIMIT(opt_dram_map[node].limit) | 5917c478bd9Sstevel@tonic-gate OPT_DRAMADDR_MASK_OFF); 5927c478bd9Sstevel@tonic-gate } 5937c478bd9Sstevel@tonic-gate 5947c478bd9Sstevel@tonic-gate /* 5957c478bd9Sstevel@tonic-gate * Only use one memory node if memory is interleaved between any nodes 5967c478bd9Sstevel@tonic-gate */ 5977c478bd9Sstevel@tonic-gate if (lgrp_plat_mem_intrlv) { 5987c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt = max_mem_nodes = 1; 5997c478bd9Sstevel@tonic-gate (void) lgrp_topo_ht_limit_set(1); 6007c478bd9Sstevel@tonic-gate } else { 6017c478bd9Sstevel@tonic-gate max_mem_nodes = lgrp_plat_node_cnt; 6027c478bd9Sstevel@tonic-gate 6037c478bd9Sstevel@tonic-gate /* 6047c478bd9Sstevel@tonic-gate * Probing errors can mess up the lgroup topology and force us 6057c478bd9Sstevel@tonic-gate * fall back to a 2 level lgroup topology. Here we bound how 6067c478bd9Sstevel@tonic-gate * tall the lgroup topology can grow in hopes of avoiding any 6077c478bd9Sstevel@tonic-gate * anamolies in probing from messing up the lgroup topology 6087c478bd9Sstevel@tonic-gate * by limiting the accuracy of the latency topology. 6097c478bd9Sstevel@tonic-gate * 6107c478bd9Sstevel@tonic-gate * Assume that nodes will at least be configured in a ring, 6117c478bd9Sstevel@tonic-gate * so limit height of lgroup topology to be less than number 6127c478bd9Sstevel@tonic-gate * of nodes on a system with 4 or more nodes 6137c478bd9Sstevel@tonic-gate */ 6147c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt >= 4 && 6157c478bd9Sstevel@tonic-gate lgrp_topo_ht_limit() == lgrp_topo_ht_limit_default()) 6167c478bd9Sstevel@tonic-gate (void) lgrp_topo_ht_limit_set(lgrp_plat_node_cnt - 1); 6177c478bd9Sstevel@tonic-gate } 6187c478bd9Sstevel@tonic-gate 6197c478bd9Sstevel@tonic-gate /* 6207c478bd9Sstevel@tonic-gate * Lgroups on Opteron architectures have but a single physical 6217c478bd9Sstevel@tonic-gate * processor. Tune lgrp_expand_proc_thresh and lgrp_expand_proc_diff 6227c478bd9Sstevel@tonic-gate * so that lgrp_choose() will spread things out aggressively. 6237c478bd9Sstevel@tonic-gate */ 6247c478bd9Sstevel@tonic-gate lgrp_expand_proc_thresh = LGRP_LOADAVG_THREAD_MAX / 2; 6257c478bd9Sstevel@tonic-gate lgrp_expand_proc_diff = 0; 6267c478bd9Sstevel@tonic-gate } 6277c478bd9Sstevel@tonic-gate 6287c478bd9Sstevel@tonic-gate 6297c478bd9Sstevel@tonic-gate /* 6307c478bd9Sstevel@tonic-gate * Latencies must be within 1/(2**LGRP_LAT_TOLERANCE_SHIFT) of each other to 6317c478bd9Sstevel@tonic-gate * be considered same 6327c478bd9Sstevel@tonic-gate */ 6337c478bd9Sstevel@tonic-gate #define LGRP_LAT_TOLERANCE_SHIFT 4 6347c478bd9Sstevel@tonic-gate 6357c478bd9Sstevel@tonic-gate int lgrp_plat_probe_lt_shift = LGRP_LAT_TOLERANCE_SHIFT; 6367c478bd9Sstevel@tonic-gate 6377c478bd9Sstevel@tonic-gate 6387c478bd9Sstevel@tonic-gate /* 6397c478bd9Sstevel@tonic-gate * Adjust latencies between nodes to be symmetric, normalize latencies between 6407c478bd9Sstevel@tonic-gate * any nodes that are within some tolerance to be same, and make local 6417c478bd9Sstevel@tonic-gate * latencies be same 6427c478bd9Sstevel@tonic-gate */ 6437c478bd9Sstevel@tonic-gate static void 6447c478bd9Sstevel@tonic-gate lgrp_plat_latency_adjust(void) 6457c478bd9Sstevel@tonic-gate { 6467c478bd9Sstevel@tonic-gate int i; 6477c478bd9Sstevel@tonic-gate int j; 6487c478bd9Sstevel@tonic-gate int k; 6497c478bd9Sstevel@tonic-gate int l; 6507c478bd9Sstevel@tonic-gate u_longlong_t max; 6517c478bd9Sstevel@tonic-gate u_longlong_t min; 6527c478bd9Sstevel@tonic-gate u_longlong_t t; 6537c478bd9Sstevel@tonic-gate u_longlong_t t1; 6547c478bd9Sstevel@tonic-gate u_longlong_t t2; 65503400a71Sjjc const lgrp_config_flag_t cflag = LGRP_CONFIG_LAT_CHANGE_ALL; 6567c478bd9Sstevel@tonic-gate int lat_corrected[MAX_NODES][MAX_NODES]; 6577c478bd9Sstevel@tonic-gate 6587c478bd9Sstevel@tonic-gate /* 6597c478bd9Sstevel@tonic-gate * Nothing to do when this is an UMA machine 6607c478bd9Sstevel@tonic-gate */ 6617c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 6627c478bd9Sstevel@tonic-gate return; 6637c478bd9Sstevel@tonic-gate 6647c478bd9Sstevel@tonic-gate /* 6657c478bd9Sstevel@tonic-gate * Make sure that latencies are symmetric between any two nodes 6667c478bd9Sstevel@tonic-gate * (ie. latency(node0, node1) == latency(node1, node0)) 6677c478bd9Sstevel@tonic-gate */ 6687c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 6697c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 6707c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 6717c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[j][i]; 6727c478bd9Sstevel@tonic-gate 6737c478bd9Sstevel@tonic-gate if (t1 == 0 || t2 == 0 || t1 == t2) 6747c478bd9Sstevel@tonic-gate continue; 6757c478bd9Sstevel@tonic-gate 6767c478bd9Sstevel@tonic-gate /* 6777c478bd9Sstevel@tonic-gate * Latencies should be same 6787c478bd9Sstevel@tonic-gate * - Use minimum of two latencies which should be same 6797c478bd9Sstevel@tonic-gate * - Track suspect probe times not within tolerance of 6807c478bd9Sstevel@tonic-gate * min value 6817c478bd9Sstevel@tonic-gate * - Remember how much values are corrected by 6827c478bd9Sstevel@tonic-gate */ 6837c478bd9Sstevel@tonic-gate if (t1 > t2) { 6847c478bd9Sstevel@tonic-gate t = t2; 6857c478bd9Sstevel@tonic-gate lgrp_plat_probe_errors[i][j] += t1 - t2; 6867c478bd9Sstevel@tonic-gate if (t1 - t2 > t2 >> lgrp_plat_probe_lt_shift) { 6877c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[i][j]++; 6887c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[j][i]++; 6897c478bd9Sstevel@tonic-gate } 6907c478bd9Sstevel@tonic-gate } else if (t2 > t1) { 6917c478bd9Sstevel@tonic-gate t = t1; 6927c478bd9Sstevel@tonic-gate lgrp_plat_probe_errors[j][i] += t2 - t1; 6937c478bd9Sstevel@tonic-gate if (t2 - t1 > t1 >> lgrp_plat_probe_lt_shift) { 6947c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[i][j]++; 6957c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[j][i]++; 6967c478bd9Sstevel@tonic-gate } 6977c478bd9Sstevel@tonic-gate } 6987c478bd9Sstevel@tonic-gate 6997c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 7007c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[j][i] = t; 7017c478bd9Sstevel@tonic-gate lgrp_config(cflag, t1, t); 7027c478bd9Sstevel@tonic-gate lgrp_config(cflag, t2, t); 7037c478bd9Sstevel@tonic-gate } 7047c478bd9Sstevel@tonic-gate 7057c478bd9Sstevel@tonic-gate /* 7067c478bd9Sstevel@tonic-gate * Keep track of which latencies get corrected 7077c478bd9Sstevel@tonic-gate */ 7087c478bd9Sstevel@tonic-gate for (i = 0; i < MAX_NODES; i++) 7097c478bd9Sstevel@tonic-gate for (j = 0; j < MAX_NODES; j++) 7107c478bd9Sstevel@tonic-gate lat_corrected[i][j] = 0; 7117c478bd9Sstevel@tonic-gate 7127c478bd9Sstevel@tonic-gate /* 7137c478bd9Sstevel@tonic-gate * For every two nodes, see whether there is another pair of nodes which 7147c478bd9Sstevel@tonic-gate * are about the same distance apart and make the latencies be the same 7157c478bd9Sstevel@tonic-gate * if they are close enough together 7167c478bd9Sstevel@tonic-gate */ 7177c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 7187c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 7197c478bd9Sstevel@tonic-gate /* 7207c478bd9Sstevel@tonic-gate * Pick one pair of nodes (i, j) 7217c478bd9Sstevel@tonic-gate * and get latency between them 7227c478bd9Sstevel@tonic-gate */ 7237c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 7247c478bd9Sstevel@tonic-gate 7257c478bd9Sstevel@tonic-gate /* 7267c478bd9Sstevel@tonic-gate * Skip this pair of nodes if there isn't a latency 7277c478bd9Sstevel@tonic-gate * for it yet 7287c478bd9Sstevel@tonic-gate */ 7297c478bd9Sstevel@tonic-gate if (t1 == 0) 7307c478bd9Sstevel@tonic-gate continue; 7317c478bd9Sstevel@tonic-gate 7327c478bd9Sstevel@tonic-gate for (k = 0; k < lgrp_plat_node_cnt; k++) 7337c478bd9Sstevel@tonic-gate for (l = 0; l < lgrp_plat_node_cnt; l++) { 7347c478bd9Sstevel@tonic-gate /* 7357c478bd9Sstevel@tonic-gate * Pick another pair of nodes (k, l) 7367c478bd9Sstevel@tonic-gate * not same as (i, j) and get latency 7377c478bd9Sstevel@tonic-gate * between them 7387c478bd9Sstevel@tonic-gate */ 7397c478bd9Sstevel@tonic-gate if (k == i && l == j) 7407c478bd9Sstevel@tonic-gate continue; 7417c478bd9Sstevel@tonic-gate 7427c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[k][l]; 7437c478bd9Sstevel@tonic-gate 7447c478bd9Sstevel@tonic-gate /* 7457c478bd9Sstevel@tonic-gate * Skip this pair of nodes if there 7467c478bd9Sstevel@tonic-gate * isn't a latency for it yet 7477c478bd9Sstevel@tonic-gate */ 7487c478bd9Sstevel@tonic-gate 7497c478bd9Sstevel@tonic-gate if (t2 == 0) 7507c478bd9Sstevel@tonic-gate continue; 7517c478bd9Sstevel@tonic-gate 7527c478bd9Sstevel@tonic-gate /* 7537c478bd9Sstevel@tonic-gate * Skip nodes (k, l) if they already 7547c478bd9Sstevel@tonic-gate * have same latency as (i, j) or 7557c478bd9Sstevel@tonic-gate * their latency isn't close enough to 7567c478bd9Sstevel@tonic-gate * be considered/made the same 7577c478bd9Sstevel@tonic-gate */ 7587c478bd9Sstevel@tonic-gate if (t1 == t2 || (t1 > t2 && t1 - t2 > 7597c478bd9Sstevel@tonic-gate t1 >> lgrp_plat_probe_lt_shift) || 7607c478bd9Sstevel@tonic-gate (t2 > t1 && t2 - t1 > 7617c478bd9Sstevel@tonic-gate t2 >> lgrp_plat_probe_lt_shift)) 7627c478bd9Sstevel@tonic-gate continue; 7637c478bd9Sstevel@tonic-gate 7647c478bd9Sstevel@tonic-gate /* 7657c478bd9Sstevel@tonic-gate * Make latency(i, j) same as 7667c478bd9Sstevel@tonic-gate * latency(k, l), try to use latency 7677c478bd9Sstevel@tonic-gate * that has been adjusted already to get 7687c478bd9Sstevel@tonic-gate * more consistency (if possible), and 7697c478bd9Sstevel@tonic-gate * remember which latencies were 7707c478bd9Sstevel@tonic-gate * adjusted for next time 7717c478bd9Sstevel@tonic-gate */ 7727c478bd9Sstevel@tonic-gate if (lat_corrected[i][j]) { 7737c478bd9Sstevel@tonic-gate t = t1; 7747c478bd9Sstevel@tonic-gate lgrp_config(cflag, t2, t); 7757c478bd9Sstevel@tonic-gate t2 = t; 7767c478bd9Sstevel@tonic-gate } else if (lat_corrected[k][l]) { 7777c478bd9Sstevel@tonic-gate t = t2; 7787c478bd9Sstevel@tonic-gate lgrp_config(cflag, t1, t); 7797c478bd9Sstevel@tonic-gate t1 = t; 7807c478bd9Sstevel@tonic-gate } else { 7817c478bd9Sstevel@tonic-gate if (t1 > t2) 7827c478bd9Sstevel@tonic-gate t = t2; 7837c478bd9Sstevel@tonic-gate else 7847c478bd9Sstevel@tonic-gate t = t1; 7857c478bd9Sstevel@tonic-gate lgrp_config(cflag, t1, t); 7867c478bd9Sstevel@tonic-gate lgrp_config(cflag, t2, t); 7877c478bd9Sstevel@tonic-gate t1 = t2 = t; 7887c478bd9Sstevel@tonic-gate } 7897c478bd9Sstevel@tonic-gate 7907c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 7917c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[k][l] = t; 7927c478bd9Sstevel@tonic-gate 7937c478bd9Sstevel@tonic-gate lat_corrected[i][j] = 7947c478bd9Sstevel@tonic-gate lat_corrected[k][l] = 1; 7957c478bd9Sstevel@tonic-gate } 7967c478bd9Sstevel@tonic-gate } 7977c478bd9Sstevel@tonic-gate 7987c478bd9Sstevel@tonic-gate /* 7997c478bd9Sstevel@tonic-gate * Local latencies should be same 8007c478bd9Sstevel@tonic-gate * - Find min and max local latencies 8017c478bd9Sstevel@tonic-gate * - Make all local latencies be minimum 8027c478bd9Sstevel@tonic-gate */ 8037c478bd9Sstevel@tonic-gate min = -1; 8047c478bd9Sstevel@tonic-gate max = 0; 8057c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 8067c478bd9Sstevel@tonic-gate t = lgrp_plat_probe_times[i][i]; 8077c478bd9Sstevel@tonic-gate if (t == 0) 8087c478bd9Sstevel@tonic-gate continue; 8097c478bd9Sstevel@tonic-gate if (min == -1 || t < min) 8107c478bd9Sstevel@tonic-gate min = t; 8117c478bd9Sstevel@tonic-gate if (t > max) 8127c478bd9Sstevel@tonic-gate max = t; 8137c478bd9Sstevel@tonic-gate } 8147c478bd9Sstevel@tonic-gate if (min != max) { 8157c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 8167c478bd9Sstevel@tonic-gate int local; 8177c478bd9Sstevel@tonic-gate 8187c478bd9Sstevel@tonic-gate local = lgrp_plat_probe_times[i][i]; 8197c478bd9Sstevel@tonic-gate if (local == 0) 8207c478bd9Sstevel@tonic-gate continue; 8217c478bd9Sstevel@tonic-gate 8227c478bd9Sstevel@tonic-gate /* 8237c478bd9Sstevel@tonic-gate * Track suspect probe times that aren't within 8247c478bd9Sstevel@tonic-gate * tolerance of minimum local latency and how much 8257c478bd9Sstevel@tonic-gate * probe times are corrected by 8267c478bd9Sstevel@tonic-gate */ 8277c478bd9Sstevel@tonic-gate if (local - min > min >> lgrp_plat_probe_lt_shift) 8287c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[i][i]++; 8297c478bd9Sstevel@tonic-gate 8307c478bd9Sstevel@tonic-gate lgrp_plat_probe_errors[i][i] += local - min; 8317c478bd9Sstevel@tonic-gate 8327c478bd9Sstevel@tonic-gate /* 8337c478bd9Sstevel@tonic-gate * Make local latencies be minimum 8347c478bd9Sstevel@tonic-gate */ 83503400a71Sjjc lgrp_config(LGRP_CONFIG_LAT_CHANGE, i, min); 8367c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][i] = min; 8377c478bd9Sstevel@tonic-gate } 8387c478bd9Sstevel@tonic-gate } 8397c478bd9Sstevel@tonic-gate 8407c478bd9Sstevel@tonic-gate /* 8417c478bd9Sstevel@tonic-gate * Determine max probe time again since just adjusted latencies 8427c478bd9Sstevel@tonic-gate */ 8437c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = 0; 8447c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 8457c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 8467c478bd9Sstevel@tonic-gate t = lgrp_plat_probe_times[i][j]; 8477c478bd9Sstevel@tonic-gate if (t > lgrp_plat_probe_time_max) 8487c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = t; 8497c478bd9Sstevel@tonic-gate } 8507c478bd9Sstevel@tonic-gate } 8517c478bd9Sstevel@tonic-gate 8527c478bd9Sstevel@tonic-gate 8537c478bd9Sstevel@tonic-gate /* 8547c478bd9Sstevel@tonic-gate * Verify following about latencies between nodes: 8557c478bd9Sstevel@tonic-gate * 8567c478bd9Sstevel@tonic-gate * - Latencies should be symmetric (ie. latency(a, b) == latency(b, a)) 8577c478bd9Sstevel@tonic-gate * - Local latencies same 8587c478bd9Sstevel@tonic-gate * - Local < remote 8597c478bd9Sstevel@tonic-gate * - Number of latencies seen is reasonable 8607c478bd9Sstevel@tonic-gate * - Number of occurrences of a given latency should be more than 1 8617c478bd9Sstevel@tonic-gate * 8627c478bd9Sstevel@tonic-gate * Returns: 8637c478bd9Sstevel@tonic-gate * 0 Success 8647c478bd9Sstevel@tonic-gate * -1 Not symmetric 8657c478bd9Sstevel@tonic-gate * -2 Local latencies not same 8667c478bd9Sstevel@tonic-gate * -3 Local >= remote 8677c478bd9Sstevel@tonic-gate * -4 Wrong number of latencies 8687c478bd9Sstevel@tonic-gate * -5 Not enough occurrences of given latency 8697c478bd9Sstevel@tonic-gate */ 8707c478bd9Sstevel@tonic-gate static int 8717c478bd9Sstevel@tonic-gate lgrp_plat_latency_verify(void) 8727c478bd9Sstevel@tonic-gate { 8737c478bd9Sstevel@tonic-gate int i; 8747c478bd9Sstevel@tonic-gate int j; 8757c478bd9Sstevel@tonic-gate lgrp_plat_latency_acct_t *l; 8767c478bd9Sstevel@tonic-gate int probed; 8777c478bd9Sstevel@tonic-gate u_longlong_t t1; 8787c478bd9Sstevel@tonic-gate u_longlong_t t2; 8797c478bd9Sstevel@tonic-gate 8807c478bd9Sstevel@tonic-gate /* 8812dae3fb5Sjjc * Nothing to do when this is an UMA machine, lgroup topology is 8822dae3fb5Sjjc * limited to 2 levels, or there aren't any probe times yet 8837c478bd9Sstevel@tonic-gate */ 8847c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1 || lgrp_topo_levels < 2 || 8852dae3fb5Sjjc (lgrp_plat_probe_time_max == 0 && lgrp_plat_probe_time_min == -1)) 8867c478bd9Sstevel@tonic-gate return (0); 8877c478bd9Sstevel@tonic-gate 8887c478bd9Sstevel@tonic-gate /* 8897c478bd9Sstevel@tonic-gate * Make sure that latencies are symmetric between any two nodes 8907c478bd9Sstevel@tonic-gate * (ie. latency(node0, node1) == latency(node1, node0)) 8917c478bd9Sstevel@tonic-gate */ 8927c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 8937c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 8947c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 8957c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[j][i]; 8967c478bd9Sstevel@tonic-gate 8977c478bd9Sstevel@tonic-gate if (t1 == 0 || t2 == 0 || t1 == t2) 8987c478bd9Sstevel@tonic-gate continue; 8997c478bd9Sstevel@tonic-gate 9007c478bd9Sstevel@tonic-gate return (-1); 9017c478bd9Sstevel@tonic-gate } 9027c478bd9Sstevel@tonic-gate 9037c478bd9Sstevel@tonic-gate /* 9047c478bd9Sstevel@tonic-gate * Local latencies should be same 9057c478bd9Sstevel@tonic-gate */ 9067c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[0][0]; 9077c478bd9Sstevel@tonic-gate for (i = 1; i < lgrp_plat_node_cnt; i++) { 9087c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[i][i]; 9097c478bd9Sstevel@tonic-gate if (t2 == 0) 9107c478bd9Sstevel@tonic-gate continue; 9117c478bd9Sstevel@tonic-gate 9122dae3fb5Sjjc if (t1 == 0) { 9132dae3fb5Sjjc t1 = t2; 9142dae3fb5Sjjc continue; 9152dae3fb5Sjjc } 9162dae3fb5Sjjc 9177c478bd9Sstevel@tonic-gate if (t1 != t2) 9187c478bd9Sstevel@tonic-gate return (-2); 9197c478bd9Sstevel@tonic-gate } 9207c478bd9Sstevel@tonic-gate 9217c478bd9Sstevel@tonic-gate /* 9227c478bd9Sstevel@tonic-gate * Local latencies should be less than remote 9237c478bd9Sstevel@tonic-gate */ 9242dae3fb5Sjjc if (t1) { 9257c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 9267c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 9272dae3fb5Sjjc t2 = lgrp_plat_probe_times[i][j]; 9287c478bd9Sstevel@tonic-gate if (i == j || t2 == 0) 9297c478bd9Sstevel@tonic-gate continue; 9307c478bd9Sstevel@tonic-gate 9317c478bd9Sstevel@tonic-gate if (t1 >= t2) 9327c478bd9Sstevel@tonic-gate return (-3); 9337c478bd9Sstevel@tonic-gate } 9342dae3fb5Sjjc } 9357c478bd9Sstevel@tonic-gate 9367c478bd9Sstevel@tonic-gate /* 9377c478bd9Sstevel@tonic-gate * Rest of checks are not very useful for machines with less than 9387c478bd9Sstevel@tonic-gate * 4 nodes (which means less than 3 latencies on Opteron) 9397c478bd9Sstevel@tonic-gate */ 9407c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt < 4) 9417c478bd9Sstevel@tonic-gate return (0); 9427c478bd9Sstevel@tonic-gate 9437c478bd9Sstevel@tonic-gate /* 9447c478bd9Sstevel@tonic-gate * Need to see whether done probing in order to verify number of 9457c478bd9Sstevel@tonic-gate * latencies are correct 9467c478bd9Sstevel@tonic-gate */ 9477c478bd9Sstevel@tonic-gate probed = 0; 9487c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 9497c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[i][i]) 9507c478bd9Sstevel@tonic-gate probed++; 9517c478bd9Sstevel@tonic-gate 9527c478bd9Sstevel@tonic-gate if (probed != lgrp_plat_node_cnt) 9537c478bd9Sstevel@tonic-gate return (0); 9547c478bd9Sstevel@tonic-gate 9557c478bd9Sstevel@tonic-gate /* 9567c478bd9Sstevel@tonic-gate * Determine number of unique latencies seen in probe times, 9577c478bd9Sstevel@tonic-gate * their values, and number of occurrences of each 9587c478bd9Sstevel@tonic-gate */ 9597c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies = 0; 9607c478bd9Sstevel@tonic-gate bzero(lgrp_plat_probe_lat_acct, 9617c478bd9Sstevel@tonic-gate MAX_NODES * sizeof (lgrp_plat_latency_acct_t)); 9627c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 9637c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 9647c478bd9Sstevel@tonic-gate int k; 9657c478bd9Sstevel@tonic-gate 9667c478bd9Sstevel@tonic-gate /* 9677c478bd9Sstevel@tonic-gate * Look at each probe time 9687c478bd9Sstevel@tonic-gate */ 9697c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 9707c478bd9Sstevel@tonic-gate if (t1 == 0) 9717c478bd9Sstevel@tonic-gate continue; 9727c478bd9Sstevel@tonic-gate 9737c478bd9Sstevel@tonic-gate /* 9747c478bd9Sstevel@tonic-gate * Account for unique latencies 9757c478bd9Sstevel@tonic-gate */ 9767c478bd9Sstevel@tonic-gate for (k = 0; k < lgrp_plat_node_cnt; k++) { 9777c478bd9Sstevel@tonic-gate l = &lgrp_plat_probe_lat_acct[k]; 9787c478bd9Sstevel@tonic-gate if (t1 == l->la_value) { 9797c478bd9Sstevel@tonic-gate /* 9807c478bd9Sstevel@tonic-gate * Increment number of occurrences 9817c478bd9Sstevel@tonic-gate * if seen before 9827c478bd9Sstevel@tonic-gate */ 9837c478bd9Sstevel@tonic-gate l->la_count++; 9847c478bd9Sstevel@tonic-gate break; 9857c478bd9Sstevel@tonic-gate } else if (l->la_value == 0) { 9867c478bd9Sstevel@tonic-gate /* 9877c478bd9Sstevel@tonic-gate * Record latency if haven't seen before 9887c478bd9Sstevel@tonic-gate */ 9897c478bd9Sstevel@tonic-gate l->la_value = t1; 9907c478bd9Sstevel@tonic-gate l->la_count++; 9917c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies++; 9927c478bd9Sstevel@tonic-gate break; 9937c478bd9Sstevel@tonic-gate } 9947c478bd9Sstevel@tonic-gate } 9957c478bd9Sstevel@tonic-gate } 9967c478bd9Sstevel@tonic-gate } 9977c478bd9Sstevel@tonic-gate 9987c478bd9Sstevel@tonic-gate /* 9997c478bd9Sstevel@tonic-gate * Number of latencies should be relative to number of 10007c478bd9Sstevel@tonic-gate * nodes in system: 10017c478bd9Sstevel@tonic-gate * - Same as nodes when nodes <= 2 10027c478bd9Sstevel@tonic-gate * - Less than nodes when nodes > 2 10037c478bd9Sstevel@tonic-gate * - Greater than 2 when nodes >= 4 10047c478bd9Sstevel@tonic-gate */ 10057c478bd9Sstevel@tonic-gate if ((lgrp_plat_node_cnt <= 2 && 10067c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies != lgrp_plat_node_cnt) || 10077c478bd9Sstevel@tonic-gate (lgrp_plat_node_cnt > 2 && 10087c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies >= lgrp_plat_node_cnt) || 10097c478bd9Sstevel@tonic-gate (lgrp_plat_node_cnt >= 4 && lgrp_topo_levels >= 3 && 10107c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies <= 2)) 10117c478bd9Sstevel@tonic-gate return (-4); 10127c478bd9Sstevel@tonic-gate 10137c478bd9Sstevel@tonic-gate /* 10147c478bd9Sstevel@tonic-gate * There should be more than one occurrence of every latency 10157c478bd9Sstevel@tonic-gate * as long as probing is complete 10167c478bd9Sstevel@tonic-gate */ 10177c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_probe_nlatencies; i++) { 10187c478bd9Sstevel@tonic-gate l = &lgrp_plat_probe_lat_acct[i]; 10197c478bd9Sstevel@tonic-gate if (l->la_count <= 1) 10207c478bd9Sstevel@tonic-gate return (-5); 10217c478bd9Sstevel@tonic-gate } 10227c478bd9Sstevel@tonic-gate return (0); 10237c478bd9Sstevel@tonic-gate } 10247c478bd9Sstevel@tonic-gate 10257c478bd9Sstevel@tonic-gate 10267c478bd9Sstevel@tonic-gate /* 10277c478bd9Sstevel@tonic-gate * Set lgroup latencies for 2 level lgroup topology 10287c478bd9Sstevel@tonic-gate */ 10297c478bd9Sstevel@tonic-gate static void 10307c478bd9Sstevel@tonic-gate lgrp_plat_2level_setup(void) 10317c478bd9Sstevel@tonic-gate { 10327c478bd9Sstevel@tonic-gate int i; 10337c478bd9Sstevel@tonic-gate 10347c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt >= 4) 10357c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 10367c478bd9Sstevel@tonic-gate "MPO only optimizing for local and remote\n"); 10377c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 10387c478bd9Sstevel@tonic-gate int j; 10397c478bd9Sstevel@tonic-gate 10407c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 10417c478bd9Sstevel@tonic-gate if (i == j) 10427c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 2; 10437c478bd9Sstevel@tonic-gate else 10447c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 3; 10457c478bd9Sstevel@tonic-gate } 10467c478bd9Sstevel@tonic-gate } 10477c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_min = 2; 10487c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = 3; 10497c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_FLATTEN, 2, 0); 10507c478bd9Sstevel@tonic-gate } 10517c478bd9Sstevel@tonic-gate 10527c478bd9Sstevel@tonic-gate 10537c478bd9Sstevel@tonic-gate /* 10547c478bd9Sstevel@tonic-gate * Return time needed to probe from current CPU to memory in given node 10557c478bd9Sstevel@tonic-gate */ 10567c478bd9Sstevel@tonic-gate static hrtime_t 10577c478bd9Sstevel@tonic-gate lgrp_plat_probe_time(int to) 10587c478bd9Sstevel@tonic-gate { 10597c478bd9Sstevel@tonic-gate caddr_t buf; 10607c478bd9Sstevel@tonic-gate uint_t dev; 10617c478bd9Sstevel@tonic-gate /* LINTED: set but not used in function */ 10627c478bd9Sstevel@tonic-gate volatile uint_t dev_vendor; 10637c478bd9Sstevel@tonic-gate hrtime_t elapsed; 10647c478bd9Sstevel@tonic-gate hrtime_t end; 10657c478bd9Sstevel@tonic-gate int from; 10667c478bd9Sstevel@tonic-gate int i; 10677c478bd9Sstevel@tonic-gate int ipl; 10687c478bd9Sstevel@tonic-gate hrtime_t max; 10697c478bd9Sstevel@tonic-gate hrtime_t min; 10707c478bd9Sstevel@tonic-gate hrtime_t start; 10718949bcd6Sandrei int cnt; 10727c478bd9Sstevel@tonic-gate extern int use_sse_pagecopy; 10737c478bd9Sstevel@tonic-gate 10747c478bd9Sstevel@tonic-gate /* 10757c478bd9Sstevel@tonic-gate * Determine ID of node containing current CPU 10767c478bd9Sstevel@tonic-gate */ 10777c478bd9Sstevel@tonic-gate from = LGRP_PLAT_CPU_TO_NODE(CPU); 10787c478bd9Sstevel@tonic-gate 10797c478bd9Sstevel@tonic-gate /* 10807c478bd9Sstevel@tonic-gate * Do common work for probing main memory 10817c478bd9Sstevel@tonic-gate */ 10827c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_op == LGRP_PLAT_PROBE_PGCPY) { 10837c478bd9Sstevel@tonic-gate /* 10847c478bd9Sstevel@tonic-gate * Skip probing any nodes without memory and 10857c478bd9Sstevel@tonic-gate * set probe time to 0 10867c478bd9Sstevel@tonic-gate */ 10877c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_memory[to] == NULL) { 10887c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[from][to] = 0; 10897c478bd9Sstevel@tonic-gate return (0); 10907c478bd9Sstevel@tonic-gate } 10917c478bd9Sstevel@tonic-gate 10927c478bd9Sstevel@tonic-gate /* 10937c478bd9Sstevel@tonic-gate * Invalidate caches once instead of once every sample 10947c478bd9Sstevel@tonic-gate * which should cut cost of probing by a lot 10957c478bd9Sstevel@tonic-gate */ 10967c478bd9Sstevel@tonic-gate lgrp_plat_flush_cost = gethrtime(); 10977c478bd9Sstevel@tonic-gate invalidate_cache(); 10987c478bd9Sstevel@tonic-gate lgrp_plat_flush_cost = gethrtime() - lgrp_plat_flush_cost; 10997c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost_total += lgrp_plat_flush_cost; 11007c478bd9Sstevel@tonic-gate } 11017c478bd9Sstevel@tonic-gate 11027c478bd9Sstevel@tonic-gate /* 11037c478bd9Sstevel@tonic-gate * Probe from current CPU to given memory using specified operation 11047c478bd9Sstevel@tonic-gate * and take specified number of samples 11057c478bd9Sstevel@tonic-gate */ 11067c478bd9Sstevel@tonic-gate max = 0; 11077c478bd9Sstevel@tonic-gate min = -1; 11087c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_probe_nsamples; i++) { 11097c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost = gethrtime(); 11107c478bd9Sstevel@tonic-gate 11117c478bd9Sstevel@tonic-gate /* 11127c478bd9Sstevel@tonic-gate * Can't measure probe time if gethrtime() isn't working yet 11137c478bd9Sstevel@tonic-gate */ 11147c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_cost == 0 && gethrtime() == 0) 11157c478bd9Sstevel@tonic-gate return (0); 11167c478bd9Sstevel@tonic-gate 11177c478bd9Sstevel@tonic-gate switch (lgrp_plat_probe_op) { 11187c478bd9Sstevel@tonic-gate 11197c478bd9Sstevel@tonic-gate case LGRP_PLAT_PROBE_PGCPY: 11207c478bd9Sstevel@tonic-gate default: 11217c478bd9Sstevel@tonic-gate /* 11227c478bd9Sstevel@tonic-gate * Measure how long it takes to copy page 11237c478bd9Sstevel@tonic-gate * on top of itself 11247c478bd9Sstevel@tonic-gate */ 11257c478bd9Sstevel@tonic-gate buf = lgrp_plat_probe_memory[to] + (i * PAGESIZE); 11267c478bd9Sstevel@tonic-gate 11277c478bd9Sstevel@tonic-gate kpreempt_disable(); 11287c478bd9Sstevel@tonic-gate ipl = splhigh(); 11297c478bd9Sstevel@tonic-gate start = gethrtime(); 11307c478bd9Sstevel@tonic-gate if (use_sse_pagecopy) 11317c478bd9Sstevel@tonic-gate hwblkpagecopy(buf, buf); 11327c478bd9Sstevel@tonic-gate else 11337c478bd9Sstevel@tonic-gate bcopy(buf, buf, PAGESIZE); 11347c478bd9Sstevel@tonic-gate end = gethrtime(); 11357c478bd9Sstevel@tonic-gate elapsed = end - start; 11367c478bd9Sstevel@tonic-gate splx(ipl); 11377c478bd9Sstevel@tonic-gate kpreempt_enable(); 11387c478bd9Sstevel@tonic-gate break; 11397c478bd9Sstevel@tonic-gate 11407c478bd9Sstevel@tonic-gate case LGRP_PLAT_PROBE_VENDOR: 11417c478bd9Sstevel@tonic-gate /* 11427c478bd9Sstevel@tonic-gate * Measure how long it takes to read vendor ID from 11437c478bd9Sstevel@tonic-gate * Northbridge 11447c478bd9Sstevel@tonic-gate */ 11457c478bd9Sstevel@tonic-gate dev = OPT_PCS_DEV_NODE0 + to; 11467c478bd9Sstevel@tonic-gate kpreempt_disable(); 11477c478bd9Sstevel@tonic-gate ipl = spl8(); 11487c478bd9Sstevel@tonic-gate outl(PCI_CONFADD, PCI_CADDR1(0, dev, opt_probe_func, 11497c478bd9Sstevel@tonic-gate OPT_PCS_OFF_VENDOR)); 11507c478bd9Sstevel@tonic-gate start = gethrtime(); 11518949bcd6Sandrei for (cnt = 0; cnt < lgrp_plat_probe_nreads; cnt++) 11527c478bd9Sstevel@tonic-gate dev_vendor = inl(PCI_CONFDATA); 11537c478bd9Sstevel@tonic-gate end = gethrtime(); 11548949bcd6Sandrei elapsed = (end - start) / lgrp_plat_probe_nreads; 11557c478bd9Sstevel@tonic-gate splx(ipl); 11567c478bd9Sstevel@tonic-gate kpreempt_enable(); 11577c478bd9Sstevel@tonic-gate break; 11587c478bd9Sstevel@tonic-gate } 11597c478bd9Sstevel@tonic-gate 11607c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost = gethrtime() - lgrp_plat_probe_cost; 11617c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost_total += lgrp_plat_probe_cost; 11627c478bd9Sstevel@tonic-gate 11637c478bd9Sstevel@tonic-gate if (min == -1 || elapsed < min) 11647c478bd9Sstevel@tonic-gate min = elapsed; 11657c478bd9Sstevel@tonic-gate if (elapsed > max) 11667c478bd9Sstevel@tonic-gate max = elapsed; 11677c478bd9Sstevel@tonic-gate } 11687c478bd9Sstevel@tonic-gate 11697c478bd9Sstevel@tonic-gate /* 11707c478bd9Sstevel@tonic-gate * Update minimum and maximum probe times between 11717c478bd9Sstevel@tonic-gate * these two nodes 11727c478bd9Sstevel@tonic-gate */ 11737c478bd9Sstevel@tonic-gate if (min < lgrp_plat_probe_min[from][to] || 11747c478bd9Sstevel@tonic-gate lgrp_plat_probe_min[from][to] == 0) 11757c478bd9Sstevel@tonic-gate lgrp_plat_probe_min[from][to] = min; 11767c478bd9Sstevel@tonic-gate 11777c478bd9Sstevel@tonic-gate if (max > lgrp_plat_probe_max[from][to]) 11787c478bd9Sstevel@tonic-gate lgrp_plat_probe_max[from][to] = max; 11797c478bd9Sstevel@tonic-gate 11807c478bd9Sstevel@tonic-gate return (min); 11817c478bd9Sstevel@tonic-gate } 11827c478bd9Sstevel@tonic-gate 11837c478bd9Sstevel@tonic-gate 11847c478bd9Sstevel@tonic-gate /* 11857c478bd9Sstevel@tonic-gate * Probe memory in each node from current CPU to determine latency topology 11867c478bd9Sstevel@tonic-gate */ 11877c478bd9Sstevel@tonic-gate void 11887c478bd9Sstevel@tonic-gate lgrp_plat_probe(void) 11897c478bd9Sstevel@tonic-gate { 11907c478bd9Sstevel@tonic-gate int from; 11917c478bd9Sstevel@tonic-gate int i; 11927c478bd9Sstevel@tonic-gate hrtime_t probe_time; 11937c478bd9Sstevel@tonic-gate int to; 11947c478bd9Sstevel@tonic-gate 11957c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1 || lgrp_topo_ht_limit() <= 2) 11967c478bd9Sstevel@tonic-gate return; 11977c478bd9Sstevel@tonic-gate 11987c478bd9Sstevel@tonic-gate /* 11997c478bd9Sstevel@tonic-gate * Determine ID of node containing current CPU 12007c478bd9Sstevel@tonic-gate */ 12017c478bd9Sstevel@tonic-gate from = LGRP_PLAT_CPU_TO_NODE(CPU); 12027c478bd9Sstevel@tonic-gate 12037c478bd9Sstevel@tonic-gate /* 12047c478bd9Sstevel@tonic-gate * Don't need to probe if got times already 12057c478bd9Sstevel@tonic-gate */ 12067c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[from][from] != 0) 12077c478bd9Sstevel@tonic-gate return; 12087c478bd9Sstevel@tonic-gate 12097c478bd9Sstevel@tonic-gate /* 12107c478bd9Sstevel@tonic-gate * Read vendor ID in Northbridge or read and write page(s) 12117c478bd9Sstevel@tonic-gate * in each node from current CPU and remember how long it takes, 12127c478bd9Sstevel@tonic-gate * so we can build latency topology of machine later. 12137c478bd9Sstevel@tonic-gate * This should approximate the memory latency between each node. 12147c478bd9Sstevel@tonic-gate */ 12157c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_probe_nrounds; i++) 12167c478bd9Sstevel@tonic-gate for (to = 0; to < lgrp_plat_node_cnt; to++) { 12177c478bd9Sstevel@tonic-gate /* 12187c478bd9Sstevel@tonic-gate * Get probe time and bail out if can't get it yet 12197c478bd9Sstevel@tonic-gate */ 12207c478bd9Sstevel@tonic-gate probe_time = lgrp_plat_probe_time(to); 12217c478bd9Sstevel@tonic-gate if (probe_time == 0) 12227c478bd9Sstevel@tonic-gate return; 12237c478bd9Sstevel@tonic-gate 12247c478bd9Sstevel@tonic-gate /* 12257c478bd9Sstevel@tonic-gate * Keep lowest probe time as latency between nodes 12267c478bd9Sstevel@tonic-gate */ 12277c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[from][to] == 0 || 12287c478bd9Sstevel@tonic-gate probe_time < lgrp_plat_probe_times[from][to]) 12297c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[from][to] = probe_time; 12307c478bd9Sstevel@tonic-gate 12317c478bd9Sstevel@tonic-gate /* 12327c478bd9Sstevel@tonic-gate * Update overall minimum and maximum probe times 12337c478bd9Sstevel@tonic-gate * across all nodes 12347c478bd9Sstevel@tonic-gate */ 12357c478bd9Sstevel@tonic-gate if (probe_time < lgrp_plat_probe_time_min || 12367c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_min == -1) 12377c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_min = probe_time; 12387c478bd9Sstevel@tonic-gate if (probe_time > lgrp_plat_probe_time_max) 12397c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = probe_time; 12407c478bd9Sstevel@tonic-gate } 12417c478bd9Sstevel@tonic-gate 12427c478bd9Sstevel@tonic-gate /* 12437c478bd9Sstevel@tonic-gate * - Fix up latencies such that local latencies are same, 12447c478bd9Sstevel@tonic-gate * latency(i, j) == latency(j, i), etc. (if possible) 12457c478bd9Sstevel@tonic-gate * 12467c478bd9Sstevel@tonic-gate * - Verify that latencies look ok 12477c478bd9Sstevel@tonic-gate * 12487c478bd9Sstevel@tonic-gate * - Fallback to just optimizing for local and remote if 12497c478bd9Sstevel@tonic-gate * latencies didn't look right 12507c478bd9Sstevel@tonic-gate */ 12517c478bd9Sstevel@tonic-gate lgrp_plat_latency_adjust(); 12527c478bd9Sstevel@tonic-gate lgrp_plat_probe_error_code = lgrp_plat_latency_verify(); 12537c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_error_code) 12547c478bd9Sstevel@tonic-gate lgrp_plat_2level_setup(); 12557c478bd9Sstevel@tonic-gate } 12567c478bd9Sstevel@tonic-gate 12577c478bd9Sstevel@tonic-gate 12587c478bd9Sstevel@tonic-gate /* 12597c478bd9Sstevel@tonic-gate * Platform-specific initialization 12607c478bd9Sstevel@tonic-gate */ 12617c478bd9Sstevel@tonic-gate void 12627c478bd9Sstevel@tonic-gate lgrp_plat_main_init(void) 12637c478bd9Sstevel@tonic-gate { 12647c478bd9Sstevel@tonic-gate int curnode; 12657c478bd9Sstevel@tonic-gate int ht_limit; 12667c478bd9Sstevel@tonic-gate int i; 12677c478bd9Sstevel@tonic-gate 12687c478bd9Sstevel@tonic-gate /* 12697c478bd9Sstevel@tonic-gate * Print a notice that MPO is disabled when memory is interleaved 12707c478bd9Sstevel@tonic-gate * across nodes....Would do this when it is discovered, but can't 12717c478bd9Sstevel@tonic-gate * because it happens way too early during boot.... 12727c478bd9Sstevel@tonic-gate */ 12737c478bd9Sstevel@tonic-gate if (lgrp_plat_mem_intrlv) 12747c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 12757c478bd9Sstevel@tonic-gate "MPO disabled because memory is interleaved\n"); 12767c478bd9Sstevel@tonic-gate 12777c478bd9Sstevel@tonic-gate /* 12787c478bd9Sstevel@tonic-gate * Don't bother to do any probing if there is only one node or the 12797c478bd9Sstevel@tonic-gate * height of the lgroup topology less than or equal to 2 12807c478bd9Sstevel@tonic-gate */ 12817c478bd9Sstevel@tonic-gate ht_limit = lgrp_topo_ht_limit(); 12827c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1 || ht_limit <= 2) { 12837c478bd9Sstevel@tonic-gate /* 12847c478bd9Sstevel@tonic-gate * Setup lgroup latencies for 2 level lgroup topology 12857c478bd9Sstevel@tonic-gate * (ie. local and remote only) if they haven't been set yet 12867c478bd9Sstevel@tonic-gate */ 12877c478bd9Sstevel@tonic-gate if (ht_limit == 2 && lgrp_plat_probe_time_min == -1 && 12887c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max == 0) 12897c478bd9Sstevel@tonic-gate lgrp_plat_2level_setup(); 12907c478bd9Sstevel@tonic-gate return; 12917c478bd9Sstevel@tonic-gate } 12927c478bd9Sstevel@tonic-gate 12937c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_op == LGRP_PLAT_PROBE_VENDOR) { 12947c478bd9Sstevel@tonic-gate /* 12957c478bd9Sstevel@tonic-gate * Should have been able to probe from CPU 0 when it was added 12967c478bd9Sstevel@tonic-gate * to lgroup hierarchy, but may not have been able to then 12977c478bd9Sstevel@tonic-gate * because it happens so early in boot that gethrtime() hasn't 12987c478bd9Sstevel@tonic-gate * been initialized. (:-( 12997c478bd9Sstevel@tonic-gate */ 13007c478bd9Sstevel@tonic-gate curnode = LGRP_PLAT_CPU_TO_NODE(CPU); 13017c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[curnode][curnode] == 0) 13027c478bd9Sstevel@tonic-gate lgrp_plat_probe(); 13037c478bd9Sstevel@tonic-gate 13047c478bd9Sstevel@tonic-gate return; 13057c478bd9Sstevel@tonic-gate } 13067c478bd9Sstevel@tonic-gate 13077c478bd9Sstevel@tonic-gate /* 13087c478bd9Sstevel@tonic-gate * When probing memory, use one page for every sample to determine 13097c478bd9Sstevel@tonic-gate * lgroup topology and taking multiple samples 13107c478bd9Sstevel@tonic-gate */ 13117c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_memsize == 0) 13127c478bd9Sstevel@tonic-gate lgrp_plat_probe_memsize = PAGESIZE * 13137c478bd9Sstevel@tonic-gate lgrp_plat_probe_nsamples; 13147c478bd9Sstevel@tonic-gate 13157c478bd9Sstevel@tonic-gate /* 13167c478bd9Sstevel@tonic-gate * Map memory in each node needed for probing to determine latency 13177c478bd9Sstevel@tonic-gate * topology 13187c478bd9Sstevel@tonic-gate */ 13197c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 13207c478bd9Sstevel@tonic-gate int mnode; 13217c478bd9Sstevel@tonic-gate 13227c478bd9Sstevel@tonic-gate /* 13237c478bd9Sstevel@tonic-gate * Skip this node and leave its probe page NULL 13247c478bd9Sstevel@tonic-gate * if it doesn't have any memory 13257c478bd9Sstevel@tonic-gate */ 13267c478bd9Sstevel@tonic-gate mnode = plat_lgrphand_to_mem_node((lgrp_handle_t)i); 13277c478bd9Sstevel@tonic-gate if (!mem_node_config[mnode].exists) { 13287c478bd9Sstevel@tonic-gate lgrp_plat_probe_memory[i] = NULL; 13297c478bd9Sstevel@tonic-gate continue; 13307c478bd9Sstevel@tonic-gate } 13317c478bd9Sstevel@tonic-gate 13327c478bd9Sstevel@tonic-gate /* 13337c478bd9Sstevel@tonic-gate * Allocate one kernel virtual page 13347c478bd9Sstevel@tonic-gate */ 13357c478bd9Sstevel@tonic-gate lgrp_plat_probe_memory[i] = vmem_alloc(heap_arena, 13367c478bd9Sstevel@tonic-gate lgrp_plat_probe_memsize, VM_NOSLEEP); 13377c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_memory[i] == NULL) { 13387c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 13397c478bd9Sstevel@tonic-gate "lgrp_plat_main_init: couldn't allocate memory"); 13407c478bd9Sstevel@tonic-gate return; 13417c478bd9Sstevel@tonic-gate } 13427c478bd9Sstevel@tonic-gate 13437c478bd9Sstevel@tonic-gate /* 13447c478bd9Sstevel@tonic-gate * Map virtual page to first page in node 13457c478bd9Sstevel@tonic-gate */ 13467c478bd9Sstevel@tonic-gate hat_devload(kas.a_hat, lgrp_plat_probe_memory[i], 13477c478bd9Sstevel@tonic-gate lgrp_plat_probe_memsize, 13487c478bd9Sstevel@tonic-gate lgrp_plat_probe_pfn[i], 13497c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE | HAT_PLAT_NOCACHE, 13507c478bd9Sstevel@tonic-gate HAT_LOAD_NOCONSIST); 13517c478bd9Sstevel@tonic-gate } 13527c478bd9Sstevel@tonic-gate 13537c478bd9Sstevel@tonic-gate /* 13547c478bd9Sstevel@tonic-gate * Probe from current CPU 13557c478bd9Sstevel@tonic-gate */ 13567c478bd9Sstevel@tonic-gate lgrp_plat_probe(); 13577c478bd9Sstevel@tonic-gate } 13587c478bd9Sstevel@tonic-gate 13597c478bd9Sstevel@tonic-gate /* 13607c478bd9Sstevel@tonic-gate * Allocate additional space for an lgroup. 13617c478bd9Sstevel@tonic-gate */ 13627c478bd9Sstevel@tonic-gate /* ARGSUSED */ 13637c478bd9Sstevel@tonic-gate lgrp_t * 13647c478bd9Sstevel@tonic-gate lgrp_plat_alloc(lgrp_id_t lgrpid) 13657c478bd9Sstevel@tonic-gate { 13667c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 13677c478bd9Sstevel@tonic-gate 13687c478bd9Sstevel@tonic-gate lgrp = &lgrp_space[nlgrps_alloc++]; 13697c478bd9Sstevel@tonic-gate if (lgrpid >= NLGRP || nlgrps_alloc > NLGRP) 13707c478bd9Sstevel@tonic-gate return (NULL); 13717c478bd9Sstevel@tonic-gate return (lgrp); 13727c478bd9Sstevel@tonic-gate } 13737c478bd9Sstevel@tonic-gate 13747c478bd9Sstevel@tonic-gate /* 13757c478bd9Sstevel@tonic-gate * Platform handling for (re)configuration changes 13767c478bd9Sstevel@tonic-gate */ 13777c478bd9Sstevel@tonic-gate /* ARGSUSED */ 13787c478bd9Sstevel@tonic-gate void 13797c478bd9Sstevel@tonic-gate lgrp_plat_config(lgrp_config_flag_t flag, uintptr_t arg) 13807c478bd9Sstevel@tonic-gate { 13817c478bd9Sstevel@tonic-gate } 13827c478bd9Sstevel@tonic-gate 13837c478bd9Sstevel@tonic-gate /* 13847c478bd9Sstevel@tonic-gate * Return the platform handle for the lgroup containing the given CPU 13857c478bd9Sstevel@tonic-gate */ 13867c478bd9Sstevel@tonic-gate /* ARGSUSED */ 13877c478bd9Sstevel@tonic-gate lgrp_handle_t 13887c478bd9Sstevel@tonic-gate lgrp_plat_cpu_to_hand(processorid_t id) 13897c478bd9Sstevel@tonic-gate { 13907c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt == 1) 13917c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 13927c478bd9Sstevel@tonic-gate 13937c478bd9Sstevel@tonic-gate return ((lgrp_handle_t)LGRP_PLAT_CPU_TO_NODE(cpu[id])); 13947c478bd9Sstevel@tonic-gate } 13957c478bd9Sstevel@tonic-gate 13967c478bd9Sstevel@tonic-gate /* 13977c478bd9Sstevel@tonic-gate * Return the platform handle of the lgroup that contains the physical memory 13987c478bd9Sstevel@tonic-gate * corresponding to the given page frame number 13997c478bd9Sstevel@tonic-gate */ 14007c478bd9Sstevel@tonic-gate /* ARGSUSED */ 14017c478bd9Sstevel@tonic-gate lgrp_handle_t 14027c478bd9Sstevel@tonic-gate lgrp_plat_pfn_to_hand(pfn_t pfn) 14037c478bd9Sstevel@tonic-gate { 14047c478bd9Sstevel@tonic-gate int mnode; 14057c478bd9Sstevel@tonic-gate 14067c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 14077c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 14087c478bd9Sstevel@tonic-gate 1409c39996a7Sstevel if (pfn > physmax) 1410c39996a7Sstevel return (LGRP_NULL_HANDLE); 1411c39996a7Sstevel 14127c478bd9Sstevel@tonic-gate mnode = plat_pfn_to_mem_node(pfn); 1413c39996a7Sstevel if (mnode < 0) 1414c39996a7Sstevel return (LGRP_NULL_HANDLE); 1415c39996a7Sstevel 14167c478bd9Sstevel@tonic-gate return (MEM_NODE_2_LGRPHAND(mnode)); 14177c478bd9Sstevel@tonic-gate } 14187c478bd9Sstevel@tonic-gate 14197c478bd9Sstevel@tonic-gate /* 14207c478bd9Sstevel@tonic-gate * Return the maximum number of lgrps supported by the platform. 14217c478bd9Sstevel@tonic-gate * Before lgrp topology is known it returns an estimate based on the number of 14227c478bd9Sstevel@tonic-gate * nodes. Once topology is known it returns the actual maximim number of lgrps 14237c478bd9Sstevel@tonic-gate * created. Since x86 doesn't support dynamic addition of new nodes, this number 14247c478bd9Sstevel@tonic-gate * may not grow during system lifetime. 14257c478bd9Sstevel@tonic-gate */ 14267c478bd9Sstevel@tonic-gate int 14277c478bd9Sstevel@tonic-gate lgrp_plat_max_lgrps() 14287c478bd9Sstevel@tonic-gate { 14297c478bd9Sstevel@tonic-gate return (lgrp_topo_initialized ? 14307c478bd9Sstevel@tonic-gate lgrp_alloc_max + 1 : 14317c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt * (lgrp_plat_node_cnt - 1) + 1); 14327c478bd9Sstevel@tonic-gate } 14337c478bd9Sstevel@tonic-gate 14347c478bd9Sstevel@tonic-gate /* 14357c478bd9Sstevel@tonic-gate * Return the number of free, allocatable, or installed 14367c478bd9Sstevel@tonic-gate * pages in an lgroup 14377c478bd9Sstevel@tonic-gate * This is a copy of the MAX_MEM_NODES == 1 version of the routine 14387c478bd9Sstevel@tonic-gate * used when MPO is disabled (i.e. single lgroup) or this is the root lgroup 14397c478bd9Sstevel@tonic-gate */ 14407c478bd9Sstevel@tonic-gate /* ARGSUSED */ 14417c478bd9Sstevel@tonic-gate static pgcnt_t 14427c478bd9Sstevel@tonic-gate lgrp_plat_mem_size_default(lgrp_handle_t lgrphand, lgrp_mem_query_t query) 14437c478bd9Sstevel@tonic-gate { 14447c478bd9Sstevel@tonic-gate struct memlist *mlist; 14457c478bd9Sstevel@tonic-gate pgcnt_t npgs = 0; 14467c478bd9Sstevel@tonic-gate extern struct memlist *phys_avail; 14477c478bd9Sstevel@tonic-gate extern struct memlist *phys_install; 14487c478bd9Sstevel@tonic-gate 14497c478bd9Sstevel@tonic-gate switch (query) { 14507c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_FREE: 14517c478bd9Sstevel@tonic-gate return ((pgcnt_t)freemem); 14527c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_AVAIL: 14537c478bd9Sstevel@tonic-gate memlist_read_lock(); 14547c478bd9Sstevel@tonic-gate for (mlist = phys_avail; mlist; mlist = mlist->next) 14557c478bd9Sstevel@tonic-gate npgs += btop(mlist->size); 14567c478bd9Sstevel@tonic-gate memlist_read_unlock(); 14577c478bd9Sstevel@tonic-gate return (npgs); 14587c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_INSTALL: 14597c478bd9Sstevel@tonic-gate memlist_read_lock(); 14607c478bd9Sstevel@tonic-gate for (mlist = phys_install; mlist; mlist = mlist->next) 14617c478bd9Sstevel@tonic-gate npgs += btop(mlist->size); 14627c478bd9Sstevel@tonic-gate memlist_read_unlock(); 14637c478bd9Sstevel@tonic-gate return (npgs); 14647c478bd9Sstevel@tonic-gate default: 14657c478bd9Sstevel@tonic-gate return ((pgcnt_t)0); 14667c478bd9Sstevel@tonic-gate } 14677c478bd9Sstevel@tonic-gate } 14687c478bd9Sstevel@tonic-gate 14697c478bd9Sstevel@tonic-gate /* 14707c478bd9Sstevel@tonic-gate * Return the number of free pages in an lgroup. 14717c478bd9Sstevel@tonic-gate * 14727c478bd9Sstevel@tonic-gate * For query of LGRP_MEM_SIZE_FREE, return the number of base pagesize 14737c478bd9Sstevel@tonic-gate * pages on freelists. For query of LGRP_MEM_SIZE_AVAIL, return the 14747c478bd9Sstevel@tonic-gate * number of allocatable base pagesize pages corresponding to the 14757c478bd9Sstevel@tonic-gate * lgroup (e.g. do not include page_t's, BOP_ALLOC()'ed memory, ..) 14767c478bd9Sstevel@tonic-gate * For query of LGRP_MEM_SIZE_INSTALL, return the amount of physical 14777c478bd9Sstevel@tonic-gate * memory installed, regardless of whether or not it's usable. 14787c478bd9Sstevel@tonic-gate */ 14797c478bd9Sstevel@tonic-gate pgcnt_t 14807c478bd9Sstevel@tonic-gate lgrp_plat_mem_size(lgrp_handle_t plathand, lgrp_mem_query_t query) 14817c478bd9Sstevel@tonic-gate { 14827c478bd9Sstevel@tonic-gate int mnode; 14837c478bd9Sstevel@tonic-gate pgcnt_t npgs = (pgcnt_t)0; 14847c478bd9Sstevel@tonic-gate extern struct memlist *phys_avail; 14857c478bd9Sstevel@tonic-gate extern struct memlist *phys_install; 14867c478bd9Sstevel@tonic-gate 14877c478bd9Sstevel@tonic-gate 14887c478bd9Sstevel@tonic-gate if (plathand == LGRP_DEFAULT_HANDLE) 14897c478bd9Sstevel@tonic-gate return (lgrp_plat_mem_size_default(plathand, query)); 14907c478bd9Sstevel@tonic-gate 14917c478bd9Sstevel@tonic-gate if (plathand != LGRP_NULL_HANDLE) { 14927c478bd9Sstevel@tonic-gate mnode = plat_lgrphand_to_mem_node(plathand); 14937c478bd9Sstevel@tonic-gate if (mnode >= 0 && mem_node_config[mnode].exists) { 14947c478bd9Sstevel@tonic-gate switch (query) { 14957c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_FREE: 1496affbd3ccSkchow npgs = MNODE_PGCNT(mnode); 14977c478bd9Sstevel@tonic-gate break; 14987c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_AVAIL: 14997c478bd9Sstevel@tonic-gate npgs = mem_node_memlist_pages(mnode, 15007c478bd9Sstevel@tonic-gate phys_avail); 15017c478bd9Sstevel@tonic-gate break; 15027c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_INSTALL: 15037c478bd9Sstevel@tonic-gate npgs = mem_node_memlist_pages(mnode, 15047c478bd9Sstevel@tonic-gate phys_install); 15057c478bd9Sstevel@tonic-gate break; 15067c478bd9Sstevel@tonic-gate default: 15077c478bd9Sstevel@tonic-gate break; 15087c478bd9Sstevel@tonic-gate } 15097c478bd9Sstevel@tonic-gate } 15107c478bd9Sstevel@tonic-gate } 15117c478bd9Sstevel@tonic-gate return (npgs); 15127c478bd9Sstevel@tonic-gate } 15137c478bd9Sstevel@tonic-gate 15147c478bd9Sstevel@tonic-gate /* 15157c478bd9Sstevel@tonic-gate * Return latency between "from" and "to" lgroups 15167c478bd9Sstevel@tonic-gate * 15177c478bd9Sstevel@tonic-gate * This latency number can only be used for relative comparison 15187c478bd9Sstevel@tonic-gate * between lgroups on the running system, cannot be used across platforms, 15197c478bd9Sstevel@tonic-gate * and may not reflect the actual latency. It is platform and implementation 15207c478bd9Sstevel@tonic-gate * specific, so platform gets to decide its value. It would be nice if the 15217c478bd9Sstevel@tonic-gate * number was at least proportional to make comparisons more meaningful though. 15227c478bd9Sstevel@tonic-gate */ 15237c478bd9Sstevel@tonic-gate /* ARGSUSED */ 15247c478bd9Sstevel@tonic-gate int 15257c478bd9Sstevel@tonic-gate lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to) 15267c478bd9Sstevel@tonic-gate { 15277c478bd9Sstevel@tonic-gate lgrp_handle_t src, dest; 15287c478bd9Sstevel@tonic-gate 15297c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 15307c478bd9Sstevel@tonic-gate return (0); 15317c478bd9Sstevel@tonic-gate 15327c478bd9Sstevel@tonic-gate /* 15337c478bd9Sstevel@tonic-gate * Return max latency for root lgroup 15347c478bd9Sstevel@tonic-gate */ 15357c478bd9Sstevel@tonic-gate if (from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE) 15367c478bd9Sstevel@tonic-gate return (lgrp_plat_probe_time_max); 15377c478bd9Sstevel@tonic-gate 15387c478bd9Sstevel@tonic-gate src = from; 15397c478bd9Sstevel@tonic-gate dest = to; 15407c478bd9Sstevel@tonic-gate 15417c478bd9Sstevel@tonic-gate /* 15427c478bd9Sstevel@tonic-gate * Return 0 for nodes (lgroup platform handles) out of range 15437c478bd9Sstevel@tonic-gate */ 15447c478bd9Sstevel@tonic-gate if (src < 0 || src >= MAX_NODES || dest < 0 || dest >= MAX_NODES) 15457c478bd9Sstevel@tonic-gate return (0); 15467c478bd9Sstevel@tonic-gate 15477c478bd9Sstevel@tonic-gate /* 15487c478bd9Sstevel@tonic-gate * Probe from current CPU if its lgroup latencies haven't been set yet 15497c478bd9Sstevel@tonic-gate * and we are trying to get latency from current CPU to some node 15507c478bd9Sstevel@tonic-gate */ 15517c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[src][src] == 0 && 15527c478bd9Sstevel@tonic-gate LGRP_PLAT_CPU_TO_NODE(CPU) == src) 15537c478bd9Sstevel@tonic-gate lgrp_plat_probe(); 15547c478bd9Sstevel@tonic-gate 15557c478bd9Sstevel@tonic-gate return (lgrp_plat_probe_times[src][dest]); 15567c478bd9Sstevel@tonic-gate } 15577c478bd9Sstevel@tonic-gate 15587c478bd9Sstevel@tonic-gate /* 15597c478bd9Sstevel@tonic-gate * Return platform handle for root lgroup 15607c478bd9Sstevel@tonic-gate */ 15617c478bd9Sstevel@tonic-gate lgrp_handle_t 15627c478bd9Sstevel@tonic-gate lgrp_plat_root_hand(void) 15637c478bd9Sstevel@tonic-gate { 15647c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 15657c478bd9Sstevel@tonic-gate } 1566