17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5c39996a7Sstevel * Common Development and Distribution License (the "License"). 6c39996a7Sstevel * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 21c39996a7Sstevel 227c478bd9Sstevel@tonic-gate /* 23*472714d6Skchow * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> /* for {in,out}{b,w,l}() */ 317c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 32f78a91cdSjjc #include <sys/controlregs.h> 337c478bd9Sstevel@tonic-gate #include <sys/cpupart.h> 347c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 357c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 367c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 377c478bd9Sstevel@tonic-gate #include <sys/memlist.h> 387c478bd9Sstevel@tonic-gate #include <sys/memnode.h> 397c478bd9Sstevel@tonic-gate #include <sys/mman.h> 40ef50d8c0Sesaxe #include <sys/pci_cfgspace.h> 41ef50d8c0Sesaxe #include <sys/pci_impl.h> 427c478bd9Sstevel@tonic-gate #include <sys/param.h> 43fb2f18f8Sesaxe #include <sys/pghw.h> 447c478bd9Sstevel@tonic-gate #include <sys/promif.h> /* for prom_printf() */ 457c478bd9Sstevel@tonic-gate #include <sys/systm.h> 467c478bd9Sstevel@tonic-gate #include <sys/thread.h> 477c478bd9Sstevel@tonic-gate #include <sys/types.h> 487c478bd9Sstevel@tonic-gate #include <sys/var.h> 497c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h> /* for x86_feature and X86_AMD */ 50*472714d6Skchow #include <sys/sysmacros.h> 517c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h> 527c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 53affbd3ccSkchow #include <vm/vm_dep.h> 547c478bd9Sstevel@tonic-gate 557c478bd9Sstevel@tonic-gate 567c478bd9Sstevel@tonic-gate /* 577c478bd9Sstevel@tonic-gate * lgroup platform support for x86 platforms. 587c478bd9Sstevel@tonic-gate */ 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate #define MAX_NODES 8 617c478bd9Sstevel@tonic-gate #define NLGRP (MAX_NODES * (MAX_NODES - 1) + 1) 627c478bd9Sstevel@tonic-gate 63fb2f18f8Sesaxe #define LGRP_PLAT_CPU_TO_NODE(cpu) (pg_plat_hw_instance_id(cpu, PGHW_CHIP)) 647c478bd9Sstevel@tonic-gate 657c478bd9Sstevel@tonic-gate #define LGRP_PLAT_PROBE_NROUNDS 64 /* default laps for probing */ 667c478bd9Sstevel@tonic-gate #define LGRP_PLAT_PROBE_NSAMPLES 1 /* default samples to take */ 678949bcd6Sandrei #define LGRP_PLAT_PROBE_NREADS 256 /* number of vendor ID reads */ 687c478bd9Sstevel@tonic-gate 697c478bd9Sstevel@tonic-gate /* 707c478bd9Sstevel@tonic-gate * Multiprocessor Opteron machines have Non Uniform Memory Access (NUMA). 717c478bd9Sstevel@tonic-gate * 72f78a91cdSjjc * Until this code supports reading System Resource Affinity Table (SRAT), 737c478bd9Sstevel@tonic-gate * we need to examine registers in PCI configuration space to determine how 747c478bd9Sstevel@tonic-gate * many nodes are in the system and which CPUs and memory are in each node. 757c478bd9Sstevel@tonic-gate * This could be determined by probing all memory from each CPU, but that is 767c478bd9Sstevel@tonic-gate * too expensive to do while booting the kernel. 777c478bd9Sstevel@tonic-gate * 787c478bd9Sstevel@tonic-gate * NOTE: Using these PCI configuration space registers to determine this 79f78a91cdSjjc * locality info is not guaranteed to work on future generations of 80f78a91cdSjjc * Opteron processor. 817c478bd9Sstevel@tonic-gate */ 827c478bd9Sstevel@tonic-gate 837c478bd9Sstevel@tonic-gate /* 847c478bd9Sstevel@tonic-gate * Opteron DRAM Address Map in PCI configuration space gives base and limit 85f78a91cdSjjc * of physical memory in each node. The following constants and macros define 86f78a91cdSjjc * their contents, structure, and access. 877c478bd9Sstevel@tonic-gate */ 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate /* 907c478bd9Sstevel@tonic-gate * How many bits to shift Opteron DRAM Address Map base and limit registers 917c478bd9Sstevel@tonic-gate * to get actual value 927c478bd9Sstevel@tonic-gate */ 93f78a91cdSjjc #define OPT_DRAMADDR_HI_LSHIFT_ADDR 40 /* shift left for address */ 94f78a91cdSjjc #define OPT_DRAMADDR_LO_LSHIFT_ADDR 8 /* shift left for address */ 957c478bd9Sstevel@tonic-gate 96f78a91cdSjjc #define OPT_DRAMADDR_HI_MASK_ADDR 0x000000FF /* address bits 47-40 */ 97f78a91cdSjjc #define OPT_DRAMADDR_LO_MASK_ADDR 0xFFFF0000 /* address bits 39-24 */ 98f78a91cdSjjc 99f78a91cdSjjc #define OPT_DRAMADDR_LO_MASK_OFF 0xFFFFFF /* offset for address */ 100f78a91cdSjjc 101f78a91cdSjjc /* 102f78a91cdSjjc * Macros to derive addresses from Opteron DRAM Address Map registers 103f78a91cdSjjc */ 104f78a91cdSjjc #define OPT_DRAMADDR_HI(reg) \ 105f78a91cdSjjc (((u_longlong_t)reg & OPT_DRAMADDR_HI_MASK_ADDR) << \ 106f78a91cdSjjc OPT_DRAMADDR_HI_LSHIFT_ADDR) 107f78a91cdSjjc 108f78a91cdSjjc #define OPT_DRAMADDR_LO(reg) \ 109f78a91cdSjjc (((u_longlong_t)reg & OPT_DRAMADDR_LO_MASK_ADDR) << \ 110f78a91cdSjjc OPT_DRAMADDR_LO_LSHIFT_ADDR) 111f78a91cdSjjc 112f78a91cdSjjc #define OPT_DRAMADDR(high, low) \ 113f78a91cdSjjc (OPT_DRAMADDR_HI(high) | OPT_DRAMADDR_LO(low)) 1147c478bd9Sstevel@tonic-gate 1157c478bd9Sstevel@tonic-gate /* 1167c478bd9Sstevel@tonic-gate * Bit masks defining what's in Opteron DRAM Address Map base register 1177c478bd9Sstevel@tonic-gate */ 118f78a91cdSjjc #define OPT_DRAMBASE_LO_MASK_RE 0x1 /* read enable */ 119f78a91cdSjjc #define OPT_DRAMBASE_LO_MASK_WE 0x2 /* write enable */ 120f78a91cdSjjc #define OPT_DRAMBASE_LO_MASK_INTRLVEN 0x700 /* interleave */ 1217c478bd9Sstevel@tonic-gate 1227c478bd9Sstevel@tonic-gate /* 1237c478bd9Sstevel@tonic-gate * Bit masks defining what's in Opteron DRAM Address Map limit register 1247c478bd9Sstevel@tonic-gate */ 125f78a91cdSjjc #define OPT_DRAMLIMIT_LO_MASK_DSTNODE 0x7 /* destination node */ 126f78a91cdSjjc #define OPT_DRAMLIMIT_LO_MASK_INTRLVSEL 0x700 /* interleave select */ 1277c478bd9Sstevel@tonic-gate 1287c478bd9Sstevel@tonic-gate 1297c478bd9Sstevel@tonic-gate /* 1307c478bd9Sstevel@tonic-gate * Opteron Node ID register in PCI configuration space contains 1317c478bd9Sstevel@tonic-gate * number of nodes in system, etc. for Opteron K8. The following 1327c478bd9Sstevel@tonic-gate * constants and macros define its contents, structure, and access. 1337c478bd9Sstevel@tonic-gate */ 1347c478bd9Sstevel@tonic-gate 1357c478bd9Sstevel@tonic-gate /* 1367c478bd9Sstevel@tonic-gate * Bit masks defining what's in Opteron Node ID register 1377c478bd9Sstevel@tonic-gate */ 1387c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_ID 0x7 /* node ID */ 1397c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_CNT 0x70 /* node count */ 1407c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_IONODE 0x700 /* Hypertransport I/O hub node ID */ 1417c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_LCKNODE 0x7000 /* lock controller node ID */ 1427c478bd9Sstevel@tonic-gate #define OPT_NODE_MASK_CPUCNT 0xF0000 /* CPUs in system (0 means 1 CPU) */ 1437c478bd9Sstevel@tonic-gate 1447c478bd9Sstevel@tonic-gate /* 1457c478bd9Sstevel@tonic-gate * How many bits in Opteron Node ID register to shift right to get actual value 1467c478bd9Sstevel@tonic-gate */ 1477c478bd9Sstevel@tonic-gate #define OPT_NODE_RSHIFT_CNT 0x4 /* shift right for node count value */ 1487c478bd9Sstevel@tonic-gate 1497c478bd9Sstevel@tonic-gate /* 1507c478bd9Sstevel@tonic-gate * Macros to get values from Opteron Node ID register 1517c478bd9Sstevel@tonic-gate */ 1527c478bd9Sstevel@tonic-gate #define OPT_NODE_CNT(reg) \ 1537c478bd9Sstevel@tonic-gate ((reg & OPT_NODE_MASK_CNT) >> OPT_NODE_RSHIFT_CNT) 1547c478bd9Sstevel@tonic-gate 155f78a91cdSjjc /* 156f78a91cdSjjc * Macro to setup PCI Extended Configuration Space (ECS) address to give to 157f78a91cdSjjc * "in/out" instructions 158f78a91cdSjjc * 159f78a91cdSjjc * NOTE: Should only be used in lgrp_plat_init() before MMIO setup because any 160f78a91cdSjjc * other uses should just do MMIO to access PCI ECS. 161f78a91cdSjjc * Must enable special bit in Northbridge Configuration Register on 162f78a91cdSjjc * Greyhound for extended CF8 space access to be able to access PCI ECS 163f78a91cdSjjc * using "in/out" instructions and restore special bit after done 164f78a91cdSjjc * accessing PCI ECS. 165f78a91cdSjjc */ 166f78a91cdSjjc #define OPT_PCI_ECS_ADDR(bus, device, function, reg) \ 167f78a91cdSjjc (PCI_CONE | (((bus) & 0xff) << 16) | (((device & 0x1f)) << 11) | \ 168f78a91cdSjjc (((function) & 0x7) << 8) | ((reg) & 0xfc) | \ 169f78a91cdSjjc ((((reg) >> 8) & 0xf) << 24)) 1707c478bd9Sstevel@tonic-gate 1717c478bd9Sstevel@tonic-gate /* 1727c478bd9Sstevel@tonic-gate * PCI configuration space registers accessed by specifying 1737c478bd9Sstevel@tonic-gate * a bus, device, function, and offset. The following constants 1747c478bd9Sstevel@tonic-gate * define the values needed to access Opteron K8 configuration 1757c478bd9Sstevel@tonic-gate * info to determine its node topology 1767c478bd9Sstevel@tonic-gate */ 1777c478bd9Sstevel@tonic-gate 1787c478bd9Sstevel@tonic-gate #define OPT_PCS_BUS_CONFIG 0 /* Hypertransport config space bus */ 1797c478bd9Sstevel@tonic-gate 1807c478bd9Sstevel@tonic-gate /* 1817c478bd9Sstevel@tonic-gate * Opteron PCI configuration space register function values 1827c478bd9Sstevel@tonic-gate */ 1837c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_HT 0 /* Hypertransport configuration */ 1847c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_ADDRMAP 1 /* Address map configuration */ 1857c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_DRAM 2 /* DRAM configuration */ 1867c478bd9Sstevel@tonic-gate #define OPT_PCS_FUNC_MISC 3 /* Miscellaneous configuration */ 1877c478bd9Sstevel@tonic-gate 1887c478bd9Sstevel@tonic-gate /* 1897c478bd9Sstevel@tonic-gate * PCI Configuration Space register offsets 1907c478bd9Sstevel@tonic-gate */ 1917c478bd9Sstevel@tonic-gate #define OPT_PCS_OFF_VENDOR 0x0 /* device/vendor ID register */ 192f78a91cdSjjc #define OPT_PCS_OFF_DRAMBASE_HI 0x140 /* DRAM Base register (node 0) */ 193f78a91cdSjjc #define OPT_PCS_OFF_DRAMBASE_LO 0x40 /* DRAM Base register (node 0) */ 1947c478bd9Sstevel@tonic-gate #define OPT_PCS_OFF_NODEID 0x60 /* Node ID register */ 1957c478bd9Sstevel@tonic-gate 1967c478bd9Sstevel@tonic-gate /* 1977c478bd9Sstevel@tonic-gate * Opteron PCI Configuration Space device IDs for nodes 1987c478bd9Sstevel@tonic-gate */ 1997c478bd9Sstevel@tonic-gate #define OPT_PCS_DEV_NODE0 24 /* device number for node 0 */ 2007c478bd9Sstevel@tonic-gate 2017c478bd9Sstevel@tonic-gate 2027c478bd9Sstevel@tonic-gate /* 2037c478bd9Sstevel@tonic-gate * Bookkeeping for latencies seen during probing (used for verification) 2047c478bd9Sstevel@tonic-gate */ 2057c478bd9Sstevel@tonic-gate typedef struct lgrp_plat_latency_acct { 2067c478bd9Sstevel@tonic-gate hrtime_t la_value; /* latency value */ 2077c478bd9Sstevel@tonic-gate int la_count; /* occurrences */ 2087c478bd9Sstevel@tonic-gate } lgrp_plat_latency_acct_t; 2097c478bd9Sstevel@tonic-gate 2107c478bd9Sstevel@tonic-gate 2117c478bd9Sstevel@tonic-gate /* 2127c478bd9Sstevel@tonic-gate * Choices for probing to determine lgroup topology 2137c478bd9Sstevel@tonic-gate */ 2147c478bd9Sstevel@tonic-gate typedef enum lgrp_plat_probe_op { 2157c478bd9Sstevel@tonic-gate LGRP_PLAT_PROBE_PGCPY, /* Use page copy */ 2167c478bd9Sstevel@tonic-gate LGRP_PLAT_PROBE_VENDOR /* Read vendor ID on Northbridge */ 2177c478bd9Sstevel@tonic-gate } lgrp_plat_probe_op_t; 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate 2207c478bd9Sstevel@tonic-gate /* 2217c478bd9Sstevel@tonic-gate * Opteron DRAM address map gives base and limit for physical memory in a node 2227c478bd9Sstevel@tonic-gate */ 2237c478bd9Sstevel@tonic-gate typedef struct opt_dram_addr_map { 224f78a91cdSjjc uint32_t base_hi; 225f78a91cdSjjc uint32_t base_lo; 226f78a91cdSjjc uint32_t limit_hi; 227f78a91cdSjjc uint32_t limit_lo; 2287c478bd9Sstevel@tonic-gate } opt_dram_addr_map_t; 2297c478bd9Sstevel@tonic-gate 2307c478bd9Sstevel@tonic-gate 2317c478bd9Sstevel@tonic-gate /* 2327c478bd9Sstevel@tonic-gate * Starting and ending page for physical memory in node 2337c478bd9Sstevel@tonic-gate */ 2347c478bd9Sstevel@tonic-gate typedef struct phys_addr_map { 2357c478bd9Sstevel@tonic-gate pfn_t start; 2367c478bd9Sstevel@tonic-gate pfn_t end; 237a940d195Sjjc int exists; 2387c478bd9Sstevel@tonic-gate } phys_addr_map_t; 2397c478bd9Sstevel@tonic-gate 2407c478bd9Sstevel@tonic-gate 2417c478bd9Sstevel@tonic-gate /* 2427c478bd9Sstevel@tonic-gate * Opteron DRAM address map for each node 2437c478bd9Sstevel@tonic-gate */ 2447c478bd9Sstevel@tonic-gate struct opt_dram_addr_map opt_dram_map[MAX_NODES]; 2457c478bd9Sstevel@tonic-gate 2467c478bd9Sstevel@tonic-gate /* 2477c478bd9Sstevel@tonic-gate * Node ID register contents for each node 2487c478bd9Sstevel@tonic-gate */ 2497c478bd9Sstevel@tonic-gate uint_t opt_node_info[MAX_NODES]; 2507c478bd9Sstevel@tonic-gate 2517c478bd9Sstevel@tonic-gate /* 2527c478bd9Sstevel@tonic-gate * Whether memory is interleaved across nodes causing MPO to be disabled 2537c478bd9Sstevel@tonic-gate */ 2547c478bd9Sstevel@tonic-gate int lgrp_plat_mem_intrlv = 0; 2557c478bd9Sstevel@tonic-gate 2567c478bd9Sstevel@tonic-gate /* 2577c478bd9Sstevel@tonic-gate * Number of nodes in system 2587c478bd9Sstevel@tonic-gate */ 2597c478bd9Sstevel@tonic-gate uint_t lgrp_plat_node_cnt = 1; 2607c478bd9Sstevel@tonic-gate 2617c478bd9Sstevel@tonic-gate /* 2627c478bd9Sstevel@tonic-gate * Physical address range for memory in each node 2637c478bd9Sstevel@tonic-gate */ 2647c478bd9Sstevel@tonic-gate phys_addr_map_t lgrp_plat_node_memory[MAX_NODES]; 2657c478bd9Sstevel@tonic-gate 2667c478bd9Sstevel@tonic-gate /* 2677c478bd9Sstevel@tonic-gate * Probe costs (individual and total) and flush cost 2687c478bd9Sstevel@tonic-gate */ 2697c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_flush_cost = 0; 2707c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_cost = 0; 2717c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_cost_total = 0; 2727c478bd9Sstevel@tonic-gate 2737c478bd9Sstevel@tonic-gate /* 2747c478bd9Sstevel@tonic-gate * Error code for latency adjustment and verification 2757c478bd9Sstevel@tonic-gate */ 2767c478bd9Sstevel@tonic-gate int lgrp_plat_probe_error_code = 0; 2777c478bd9Sstevel@tonic-gate 2787c478bd9Sstevel@tonic-gate /* 2797c478bd9Sstevel@tonic-gate * How much latencies were off from minimum values gotten 2807c478bd9Sstevel@tonic-gate */ 2817c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_errors[MAX_NODES][MAX_NODES]; 2827c478bd9Sstevel@tonic-gate 2837c478bd9Sstevel@tonic-gate /* 2847c478bd9Sstevel@tonic-gate * Unique probe latencies and number of occurrences of each 2857c478bd9Sstevel@tonic-gate */ 2867c478bd9Sstevel@tonic-gate lgrp_plat_latency_acct_t lgrp_plat_probe_lat_acct[MAX_NODES]; 2877c478bd9Sstevel@tonic-gate 2887c478bd9Sstevel@tonic-gate /* 2897c478bd9Sstevel@tonic-gate * Size of memory buffer in each node for probing 2907c478bd9Sstevel@tonic-gate */ 2917c478bd9Sstevel@tonic-gate size_t lgrp_plat_probe_memsize = 0; 2927c478bd9Sstevel@tonic-gate 2937c478bd9Sstevel@tonic-gate /* 2947c478bd9Sstevel@tonic-gate * Virtual address of page in each node for probing 2957c478bd9Sstevel@tonic-gate */ 2967c478bd9Sstevel@tonic-gate caddr_t lgrp_plat_probe_memory[MAX_NODES]; 2977c478bd9Sstevel@tonic-gate 2987c478bd9Sstevel@tonic-gate /* 2997c478bd9Sstevel@tonic-gate * Number of unique latencies in probe times 3007c478bd9Sstevel@tonic-gate */ 3017c478bd9Sstevel@tonic-gate int lgrp_plat_probe_nlatencies = 0; 3027c478bd9Sstevel@tonic-gate 3037c478bd9Sstevel@tonic-gate /* 3047c478bd9Sstevel@tonic-gate * How many rounds of probing to do 3057c478bd9Sstevel@tonic-gate */ 3067c478bd9Sstevel@tonic-gate int lgrp_plat_probe_nrounds = LGRP_PLAT_PROBE_NROUNDS; 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate /* 3097c478bd9Sstevel@tonic-gate * Number of samples to take when probing each node 3107c478bd9Sstevel@tonic-gate */ 3117c478bd9Sstevel@tonic-gate int lgrp_plat_probe_nsamples = LGRP_PLAT_PROBE_NSAMPLES; 3127c478bd9Sstevel@tonic-gate 3137c478bd9Sstevel@tonic-gate /* 3148949bcd6Sandrei * Number of times to read vendor ID from Northbridge for each probe. 3158949bcd6Sandrei */ 3168949bcd6Sandrei int lgrp_plat_probe_nreads = LGRP_PLAT_PROBE_NREADS; 3178949bcd6Sandrei 3188949bcd6Sandrei /* 3197c478bd9Sstevel@tonic-gate * How to probe to determine lgroup topology 3207c478bd9Sstevel@tonic-gate */ 3217c478bd9Sstevel@tonic-gate lgrp_plat_probe_op_t lgrp_plat_probe_op = LGRP_PLAT_PROBE_VENDOR; 3227c478bd9Sstevel@tonic-gate 3237c478bd9Sstevel@tonic-gate /* 3247c478bd9Sstevel@tonic-gate * PFN of page in each node for probing 3257c478bd9Sstevel@tonic-gate */ 3267c478bd9Sstevel@tonic-gate pfn_t lgrp_plat_probe_pfn[MAX_NODES]; 3277c478bd9Sstevel@tonic-gate 3287c478bd9Sstevel@tonic-gate /* 3297c478bd9Sstevel@tonic-gate * Whether probe time was suspect (ie. not within tolerance of value that it 3307c478bd9Sstevel@tonic-gate * should match) 3317c478bd9Sstevel@tonic-gate */ 3327c478bd9Sstevel@tonic-gate int lgrp_plat_probe_suspect[MAX_NODES][MAX_NODES]; 3337c478bd9Sstevel@tonic-gate 3347c478bd9Sstevel@tonic-gate /* 3357c478bd9Sstevel@tonic-gate * How long it takes to access memory from each node 3367c478bd9Sstevel@tonic-gate */ 3377c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_times[MAX_NODES][MAX_NODES]; 3387c478bd9Sstevel@tonic-gate 3397c478bd9Sstevel@tonic-gate /* 3407c478bd9Sstevel@tonic-gate * Min and max node memory probe times seen 3417c478bd9Sstevel@tonic-gate */ 3427c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_time_max = 0; 3437c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_time_min = -1; 3447c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_max[MAX_NODES][MAX_NODES]; 3457c478bd9Sstevel@tonic-gate hrtime_t lgrp_plat_probe_min[MAX_NODES][MAX_NODES]; 3467c478bd9Sstevel@tonic-gate 3477c478bd9Sstevel@tonic-gate 3487c478bd9Sstevel@tonic-gate /* 3497c478bd9Sstevel@tonic-gate * Allocate lgrp and lgrp stat arrays statically. 3507c478bd9Sstevel@tonic-gate */ 3517c478bd9Sstevel@tonic-gate static lgrp_t lgrp_space[NLGRP]; 3527c478bd9Sstevel@tonic-gate static int nlgrps_alloc; 3537c478bd9Sstevel@tonic-gate 3547c478bd9Sstevel@tonic-gate struct lgrp_stats lgrp_stats[NLGRP]; 3557c478bd9Sstevel@tonic-gate 356f78a91cdSjjc /* 357f78a91cdSjjc * Supported AMD processor families 358f78a91cdSjjc */ 359f78a91cdSjjc #define AMD_FAMILY_HAMMER 15 360f78a91cdSjjc #define AMD_FAMILY_GREYHOUND 16 3617c478bd9Sstevel@tonic-gate 362f78a91cdSjjc /* 363f78a91cdSjjc * Whether to have is_opteron() return 1 even when processor isn't 364f78a91cdSjjc * supported 365f78a91cdSjjc */ 366f78a91cdSjjc uint_t is_opteron_override = 0; 367f78a91cdSjjc 368f78a91cdSjjc /* 369f78a91cdSjjc * AMD processor family for current CPU 370f78a91cdSjjc */ 3717c478bd9Sstevel@tonic-gate uint_t opt_family = 0; 372f78a91cdSjjc 3737c478bd9Sstevel@tonic-gate uint_t opt_probe_func = OPT_PCS_FUNC_DRAM; 3747c478bd9Sstevel@tonic-gate 3757c478bd9Sstevel@tonic-gate 3767c478bd9Sstevel@tonic-gate /* 377f78a91cdSjjc * Determine whether we're running on a supported AMD Opteron since reading 378f78a91cdSjjc * node count and DRAM address map registers may have different format or 379f78a91cdSjjc * may not be supported in future processor families 3807c478bd9Sstevel@tonic-gate */ 3817c478bd9Sstevel@tonic-gate int 3827c478bd9Sstevel@tonic-gate is_opteron(void) 3837c478bd9Sstevel@tonic-gate { 384f78a91cdSjjc 3857c478bd9Sstevel@tonic-gate if (x86_vendor != X86_VENDOR_AMD) 3867c478bd9Sstevel@tonic-gate return (0); 3877c478bd9Sstevel@tonic-gate 388f78a91cdSjjc opt_family = cpuid_getfamily(CPU); 389f78a91cdSjjc if (opt_family == AMD_FAMILY_HAMMER || 390f78a91cdSjjc opt_family == AMD_FAMILY_GREYHOUND || is_opteron_override) 3917c478bd9Sstevel@tonic-gate return (1); 3927c478bd9Sstevel@tonic-gate else 3937c478bd9Sstevel@tonic-gate return (0); 3947c478bd9Sstevel@tonic-gate } 3957c478bd9Sstevel@tonic-gate 3967c478bd9Sstevel@tonic-gate int 3977c478bd9Sstevel@tonic-gate plat_lgrphand_to_mem_node(lgrp_handle_t hand) 3987c478bd9Sstevel@tonic-gate { 3997c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 4007c478bd9Sstevel@tonic-gate return (0); 4017c478bd9Sstevel@tonic-gate 4027c478bd9Sstevel@tonic-gate return ((int)hand); 4037c478bd9Sstevel@tonic-gate } 4047c478bd9Sstevel@tonic-gate 4057c478bd9Sstevel@tonic-gate lgrp_handle_t 4067c478bd9Sstevel@tonic-gate plat_mem_node_to_lgrphand(int mnode) 4077c478bd9Sstevel@tonic-gate { 4087c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 4097c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 4107c478bd9Sstevel@tonic-gate 4117c478bd9Sstevel@tonic-gate return ((lgrp_handle_t)mnode); 4127c478bd9Sstevel@tonic-gate } 4137c478bd9Sstevel@tonic-gate 414*472714d6Skchow 415*472714d6Skchow /* 416*472714d6Skchow * plat_mnode_xcheck: checks the node memory ranges to see if there is a pfncnt 417*472714d6Skchow * range of pages aligned on pfncnt that crosses an node boundary. Returns 1 if 418*472714d6Skchow * a crossing is found and returns 0 otherwise. 419*472714d6Skchow */ 420*472714d6Skchow int 421*472714d6Skchow plat_mnode_xcheck(pfn_t pfncnt) 422*472714d6Skchow { 423*472714d6Skchow int node, prevnode = -1, basenode; 424*472714d6Skchow pfn_t ea, sa; 425*472714d6Skchow 426*472714d6Skchow for (node = 0; node < lgrp_plat_node_cnt; node++) { 427*472714d6Skchow 428*472714d6Skchow if (lgrp_plat_node_memory[node].exists == 0) 429*472714d6Skchow continue; 430*472714d6Skchow 431*472714d6Skchow if (prevnode == -1) { 432*472714d6Skchow prevnode = node; 433*472714d6Skchow basenode = node; 434*472714d6Skchow continue; 435*472714d6Skchow } 436*472714d6Skchow 437*472714d6Skchow /* assume x86 node pfn ranges are in increasing order */ 438*472714d6Skchow ASSERT(lgrp_plat_node_memory[node].start > 439*472714d6Skchow lgrp_plat_node_memory[prevnode].end); 440*472714d6Skchow 441*472714d6Skchow /* 442*472714d6Skchow * continue if the starting address of node is not contiguous 443*472714d6Skchow * with the previous node. 444*472714d6Skchow */ 445*472714d6Skchow 446*472714d6Skchow if (lgrp_plat_node_memory[node].start != 447*472714d6Skchow (lgrp_plat_node_memory[prevnode].end + 1)) { 448*472714d6Skchow basenode = node; 449*472714d6Skchow prevnode = node; 450*472714d6Skchow continue; 451*472714d6Skchow } 452*472714d6Skchow 453*472714d6Skchow /* check if the starting address of node is pfncnt aligned */ 454*472714d6Skchow if ((lgrp_plat_node_memory[node].start & (pfncnt - 1)) != 0) { 455*472714d6Skchow 456*472714d6Skchow /* 457*472714d6Skchow * at this point, node starts at an unaligned boundary 458*472714d6Skchow * and is contiguous with the previous node(s) to 459*472714d6Skchow * basenode. Check if there is an aligned contiguous 460*472714d6Skchow * range of length pfncnt that crosses this boundary. 461*472714d6Skchow */ 462*472714d6Skchow 463*472714d6Skchow sa = P2ALIGN(lgrp_plat_node_memory[prevnode].end, 464*472714d6Skchow pfncnt); 465*472714d6Skchow ea = P2ROUNDUP((lgrp_plat_node_memory[node].start), 466*472714d6Skchow pfncnt); 467*472714d6Skchow 468*472714d6Skchow ASSERT((ea - sa) == pfncnt); 469*472714d6Skchow if (sa >= lgrp_plat_node_memory[basenode].start && 470*472714d6Skchow ea <= (lgrp_plat_node_memory[node].end + 1)) 471*472714d6Skchow return (1); 472*472714d6Skchow } 473*472714d6Skchow prevnode = node; 474*472714d6Skchow } 475*472714d6Skchow return (0); 476*472714d6Skchow } 477*472714d6Skchow 4787c478bd9Sstevel@tonic-gate int 4797c478bd9Sstevel@tonic-gate plat_pfn_to_mem_node(pfn_t pfn) 4807c478bd9Sstevel@tonic-gate { 4817c478bd9Sstevel@tonic-gate int node; 4827c478bd9Sstevel@tonic-gate 4837c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 4847c478bd9Sstevel@tonic-gate return (0); 4857c478bd9Sstevel@tonic-gate 4867c478bd9Sstevel@tonic-gate for (node = 0; node < lgrp_plat_node_cnt; node++) { 487a940d195Sjjc /* 488a940d195Sjjc * Skip nodes with no memory 489a940d195Sjjc */ 490a940d195Sjjc if (!lgrp_plat_node_memory[node].exists) 491a940d195Sjjc continue; 492a940d195Sjjc 4937c478bd9Sstevel@tonic-gate if (pfn >= lgrp_plat_node_memory[node].start && 4947c478bd9Sstevel@tonic-gate pfn <= lgrp_plat_node_memory[node].end) 4957c478bd9Sstevel@tonic-gate return (node); 4967c478bd9Sstevel@tonic-gate } 4977c478bd9Sstevel@tonic-gate 4987c478bd9Sstevel@tonic-gate ASSERT(node < lgrp_plat_node_cnt); 4997c478bd9Sstevel@tonic-gate return (-1); 5007c478bd9Sstevel@tonic-gate } 5017c478bd9Sstevel@tonic-gate 5027c478bd9Sstevel@tonic-gate /* 5037c478bd9Sstevel@tonic-gate * Configure memory nodes for machines with more than one node (ie NUMA) 5047c478bd9Sstevel@tonic-gate */ 5057c478bd9Sstevel@tonic-gate void 5067c478bd9Sstevel@tonic-gate plat_build_mem_nodes(struct memlist *list) 5077c478bd9Sstevel@tonic-gate { 508a940d195Sjjc pfn_t cur_start; /* start addr of subrange */ 509a940d195Sjjc pfn_t cur_end; /* end addr of subrange */ 510a940d195Sjjc pfn_t start; /* start addr of whole range */ 511a940d195Sjjc pfn_t end; /* end addr of whole range */ 5127c478bd9Sstevel@tonic-gate 5137c478bd9Sstevel@tonic-gate /* 5147c478bd9Sstevel@tonic-gate * Boot install lists are arranged <addr, len>, ... 5157c478bd9Sstevel@tonic-gate */ 5167c478bd9Sstevel@tonic-gate while (list) { 5177c478bd9Sstevel@tonic-gate int node; 5187c478bd9Sstevel@tonic-gate 5197c478bd9Sstevel@tonic-gate start = list->address >> PAGESHIFT; 5207c478bd9Sstevel@tonic-gate end = (list->address + list->size - 1) >> PAGESHIFT; 5217c478bd9Sstevel@tonic-gate 5227c478bd9Sstevel@tonic-gate if (start > physmax) { 5237c478bd9Sstevel@tonic-gate list = list->next; 5247c478bd9Sstevel@tonic-gate continue; 5257c478bd9Sstevel@tonic-gate } 5267c478bd9Sstevel@tonic-gate if (end > physmax) 5277c478bd9Sstevel@tonic-gate end = physmax; 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate /* 5307c478bd9Sstevel@tonic-gate * When there is only one memnode, just add memory to memnode 5317c478bd9Sstevel@tonic-gate */ 5327c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) { 5337c478bd9Sstevel@tonic-gate mem_node_add_slice(start, end); 5347c478bd9Sstevel@tonic-gate list = list->next; 5357c478bd9Sstevel@tonic-gate continue; 5367c478bd9Sstevel@tonic-gate } 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate /* 5397c478bd9Sstevel@tonic-gate * mem_node_add_slice() expects to get a memory range that 5407c478bd9Sstevel@tonic-gate * is within one memnode, so need to split any memory range 5417c478bd9Sstevel@tonic-gate * that spans multiple memnodes into subranges that are each 5427c478bd9Sstevel@tonic-gate * contained within one memnode when feeding them to 5437c478bd9Sstevel@tonic-gate * mem_node_add_slice() 5447c478bd9Sstevel@tonic-gate */ 5457c478bd9Sstevel@tonic-gate cur_start = start; 5467c478bd9Sstevel@tonic-gate do { 5477c478bd9Sstevel@tonic-gate node = plat_pfn_to_mem_node(cur_start); 5487c478bd9Sstevel@tonic-gate 549a940d195Sjjc /* 550a940d195Sjjc * Panic if DRAM address map registers or SRAT say 551a940d195Sjjc * memory in node doesn't exist or address from 552a940d195Sjjc * boot installed memory list entry isn't in this node. 553a940d195Sjjc * This shouldn't happen and rest of code can't deal 554a940d195Sjjc * with this if it does. 555a940d195Sjjc */ 556a940d195Sjjc if (node < 0 || node >= lgrp_plat_node_cnt || 557a940d195Sjjc !lgrp_plat_node_memory[node].exists || 558a940d195Sjjc cur_start < lgrp_plat_node_memory[node].start || 559a940d195Sjjc cur_start > lgrp_plat_node_memory[node].end) { 560a940d195Sjjc cmn_err(CE_PANIC, "Don't know which memnode " 561a940d195Sjjc "to add installed memory address 0x%lx\n", 562a940d195Sjjc cur_start); 563a940d195Sjjc } 5647c478bd9Sstevel@tonic-gate 5657c478bd9Sstevel@tonic-gate /* 5667c478bd9Sstevel@tonic-gate * End of current subrange should not span memnodes 5677c478bd9Sstevel@tonic-gate */ 568a940d195Sjjc cur_end = end; 569a940d195Sjjc if (lgrp_plat_node_memory[node].exists && 570a940d195Sjjc cur_end > lgrp_plat_node_memory[node].end) 5717c478bd9Sstevel@tonic-gate cur_end = lgrp_plat_node_memory[node].end; 5727c478bd9Sstevel@tonic-gate 5737c478bd9Sstevel@tonic-gate mem_node_add_slice(cur_start, cur_end); 5747c478bd9Sstevel@tonic-gate 5757c478bd9Sstevel@tonic-gate /* 5767c478bd9Sstevel@tonic-gate * Next subrange starts after end of current one 5777c478bd9Sstevel@tonic-gate */ 5787c478bd9Sstevel@tonic-gate cur_start = cur_end + 1; 5797c478bd9Sstevel@tonic-gate } while (cur_end < end); 5807c478bd9Sstevel@tonic-gate 5817c478bd9Sstevel@tonic-gate list = list->next; 5827c478bd9Sstevel@tonic-gate } 5837c478bd9Sstevel@tonic-gate mem_node_physalign = 0; 5847c478bd9Sstevel@tonic-gate mem_node_pfn_shift = 0; 5857c478bd9Sstevel@tonic-gate } 5867c478bd9Sstevel@tonic-gate 5877c478bd9Sstevel@tonic-gate 5887c478bd9Sstevel@tonic-gate /* 5897c478bd9Sstevel@tonic-gate * Platform-specific initialization of lgroups 5907c478bd9Sstevel@tonic-gate */ 5917c478bd9Sstevel@tonic-gate void 5927c478bd9Sstevel@tonic-gate lgrp_plat_init(void) 5937c478bd9Sstevel@tonic-gate { 594843e1988Sjohnlev #if defined(__xpv) 595843e1988Sjohnlev /* 596843e1988Sjohnlev * XXPV For now, the hypervisor treats all memory equally. 597843e1988Sjohnlev */ 598843e1988Sjohnlev lgrp_plat_node_cnt = max_mem_nodes = 1; 599843e1988Sjohnlev #else /* __xpv */ 6007c478bd9Sstevel@tonic-gate uint_t bus; 6017c478bd9Sstevel@tonic-gate uint_t dev; 6027c478bd9Sstevel@tonic-gate uint_t node; 603f78a91cdSjjc uint_t off_hi; 604f78a91cdSjjc uint_t off_lo; 605f78a91cdSjjc uint64_t nb_cfg_reg; 6067c478bd9Sstevel@tonic-gate 6077c478bd9Sstevel@tonic-gate extern lgrp_load_t lgrp_expand_proc_thresh; 6087c478bd9Sstevel@tonic-gate extern lgrp_load_t lgrp_expand_proc_diff; 6097c478bd9Sstevel@tonic-gate 6107c478bd9Sstevel@tonic-gate /* 6117c478bd9Sstevel@tonic-gate * Initialize as a UMA machine if this isn't an Opteron 6127c478bd9Sstevel@tonic-gate */ 6137c478bd9Sstevel@tonic-gate if (!is_opteron() || lgrp_topo_ht_limit() == 1) { 6147c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt = max_mem_nodes = 1; 6157c478bd9Sstevel@tonic-gate return; 6167c478bd9Sstevel@tonic-gate } 6177c478bd9Sstevel@tonic-gate 6187c478bd9Sstevel@tonic-gate /* 6197c478bd9Sstevel@tonic-gate * Read configuration registers from PCI configuration space to 6207c478bd9Sstevel@tonic-gate * determine node information, which memory is in each node, etc. 6217c478bd9Sstevel@tonic-gate * 6227c478bd9Sstevel@tonic-gate * Write to PCI configuration space address register to specify 6237c478bd9Sstevel@tonic-gate * which configuration register to read and read/write PCI 6247c478bd9Sstevel@tonic-gate * configuration space data register to get/set contents 6257c478bd9Sstevel@tonic-gate */ 6267c478bd9Sstevel@tonic-gate bus = OPT_PCS_BUS_CONFIG; 6277c478bd9Sstevel@tonic-gate dev = OPT_PCS_DEV_NODE0; 628f78a91cdSjjc off_hi = OPT_PCS_OFF_DRAMBASE_HI; 629f78a91cdSjjc off_lo = OPT_PCS_OFF_DRAMBASE_LO; 6307c478bd9Sstevel@tonic-gate 6317c478bd9Sstevel@tonic-gate /* 6327c478bd9Sstevel@tonic-gate * Read node ID register for node 0 to get node count 6337c478bd9Sstevel@tonic-gate */ 634ef50d8c0Sesaxe opt_node_info[0] = pci_getl_func(bus, dev, OPT_PCS_FUNC_HT, 635ef50d8c0Sesaxe OPT_PCS_OFF_NODEID); 6367c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt = OPT_NODE_CNT(opt_node_info[0]) + 1; 6377c478bd9Sstevel@tonic-gate 638f78a91cdSjjc /* 639f78a91cdSjjc * For Greyhound, PCI Extended Configuration Space must be enabled to 640f78a91cdSjjc * read high DRAM address map base and limit registers 641f78a91cdSjjc */ 642f78a91cdSjjc if (opt_family == AMD_FAMILY_GREYHOUND) { 643f78a91cdSjjc nb_cfg_reg = rdmsr(MSR_AMD_NB_CFG); 644f78a91cdSjjc if ((nb_cfg_reg & AMD_GH_NB_CFG_EN_ECS) == 0) 645f78a91cdSjjc wrmsr(MSR_AMD_NB_CFG, 646f78a91cdSjjc nb_cfg_reg | AMD_GH_NB_CFG_EN_ECS); 647f78a91cdSjjc } 648f78a91cdSjjc 6497c478bd9Sstevel@tonic-gate for (node = 0; node < lgrp_plat_node_cnt; node++) { 650f78a91cdSjjc uint32_t base_hi; 651f78a91cdSjjc uint32_t base_lo; 652f78a91cdSjjc uint32_t limit_hi; 653f78a91cdSjjc uint32_t limit_lo; 654f78a91cdSjjc 6557c478bd9Sstevel@tonic-gate /* 6567c478bd9Sstevel@tonic-gate * Read node ID register (except for node 0 which we just read) 6577c478bd9Sstevel@tonic-gate */ 6587c478bd9Sstevel@tonic-gate if (node > 0) { 659ef50d8c0Sesaxe opt_node_info[node] = pci_getl_func(bus, dev, 660ef50d8c0Sesaxe OPT_PCS_FUNC_HT, OPT_PCS_OFF_NODEID); 6617c478bd9Sstevel@tonic-gate } 6627c478bd9Sstevel@tonic-gate 6637c478bd9Sstevel@tonic-gate /* 6647c478bd9Sstevel@tonic-gate * Read DRAM base and limit registers which specify 6657c478bd9Sstevel@tonic-gate * physical memory range of each node 6667c478bd9Sstevel@tonic-gate */ 667f78a91cdSjjc if (opt_family != AMD_FAMILY_GREYHOUND) 668f78a91cdSjjc base_hi = 0; 669f78a91cdSjjc else { 670f78a91cdSjjc outl(PCI_CONFADD, OPT_PCI_ECS_ADDR(bus, dev, 671f78a91cdSjjc OPT_PCS_FUNC_ADDRMAP, off_hi)); 672f78a91cdSjjc base_hi = opt_dram_map[node].base_hi = 673f78a91cdSjjc inl(PCI_CONFDATA); 674f78a91cdSjjc } 675f78a91cdSjjc base_lo = opt_dram_map[node].base_lo = pci_getl_func(bus, dev, 676f78a91cdSjjc OPT_PCS_FUNC_ADDRMAP, off_lo); 677f78a91cdSjjc 678f78a91cdSjjc if (opt_dram_map[node].base_lo & OPT_DRAMBASE_LO_MASK_INTRLVEN) 6797c478bd9Sstevel@tonic-gate lgrp_plat_mem_intrlv++; 6807c478bd9Sstevel@tonic-gate 681f78a91cdSjjc off_hi += 4; /* high limit register offset */ 682f78a91cdSjjc if (opt_family != AMD_FAMILY_GREYHOUND) 683f78a91cdSjjc limit_hi = 0; 684f78a91cdSjjc else { 685f78a91cdSjjc outl(PCI_CONFADD, OPT_PCI_ECS_ADDR(bus, dev, 686f78a91cdSjjc OPT_PCS_FUNC_ADDRMAP, off_hi)); 687f78a91cdSjjc limit_hi = opt_dram_map[node].limit_hi = 688f78a91cdSjjc inl(PCI_CONFDATA); 689f78a91cdSjjc } 690f78a91cdSjjc 691f78a91cdSjjc off_lo += 4; /* low limit register offset */ 692f78a91cdSjjc limit_lo = opt_dram_map[node].limit_lo = pci_getl_func(bus, 693f78a91cdSjjc dev, OPT_PCS_FUNC_ADDRMAP, off_lo); 6947c478bd9Sstevel@tonic-gate 6957c478bd9Sstevel@tonic-gate /* 696f78a91cdSjjc * Increment device number to next node and register offsets 697f78a91cdSjjc * for DRAM base register of next node 6987c478bd9Sstevel@tonic-gate */ 699f78a91cdSjjc off_hi += 4; 700f78a91cdSjjc off_lo += 4; 7017c478bd9Sstevel@tonic-gate dev++; 7027c478bd9Sstevel@tonic-gate 7037c478bd9Sstevel@tonic-gate /* 704a940d195Sjjc * Both read and write enable bits must be enabled in DRAM 705a940d195Sjjc * address map base register for physical memory to exist in 706a940d195Sjjc * node 707a940d195Sjjc */ 708f78a91cdSjjc if ((base_lo & OPT_DRAMBASE_LO_MASK_RE) == 0 || 709f78a91cdSjjc (base_lo & OPT_DRAMBASE_LO_MASK_WE) == 0) { 710a940d195Sjjc /* 711a940d195Sjjc * Mark node memory as non-existent and set start and 712a940d195Sjjc * end addresses to be same in lgrp_plat_node_memory[] 713a940d195Sjjc */ 714a940d195Sjjc lgrp_plat_node_memory[node].exists = 0; 715a940d195Sjjc lgrp_plat_node_memory[node].start = 716a940d195Sjjc lgrp_plat_node_memory[node].end = (pfn_t)-1; 717a940d195Sjjc continue; 718a940d195Sjjc } 719a940d195Sjjc 720a940d195Sjjc /* 7217c478bd9Sstevel@tonic-gate * Get PFN for first page in each node, 7227c478bd9Sstevel@tonic-gate * so we can probe memory to determine latency topology 7237c478bd9Sstevel@tonic-gate */ 7247c478bd9Sstevel@tonic-gate lgrp_plat_probe_pfn[node] = 725f78a91cdSjjc btop(OPT_DRAMADDR(base_hi, base_lo)); 7267c478bd9Sstevel@tonic-gate 7277c478bd9Sstevel@tonic-gate /* 728a940d195Sjjc * Mark node memory as existing and remember physical address 729a940d195Sjjc * range of each node for use later 7307c478bd9Sstevel@tonic-gate */ 731a940d195Sjjc lgrp_plat_node_memory[node].exists = 1; 732f78a91cdSjjc 7337c478bd9Sstevel@tonic-gate lgrp_plat_node_memory[node].start = 734f78a91cdSjjc btop(OPT_DRAMADDR(base_hi, base_lo)); 735f78a91cdSjjc 7367c478bd9Sstevel@tonic-gate lgrp_plat_node_memory[node].end = 737f78a91cdSjjc btop(OPT_DRAMADDR(limit_hi, limit_lo) | 738f78a91cdSjjc OPT_DRAMADDR_LO_MASK_OFF); 739f78a91cdSjjc } 740f78a91cdSjjc 741f78a91cdSjjc /* 742f78a91cdSjjc * Restore PCI Extended Configuration Space enable bit 743f78a91cdSjjc */ 744f78a91cdSjjc if (opt_family == AMD_FAMILY_GREYHOUND) { 745f78a91cdSjjc if ((nb_cfg_reg & AMD_GH_NB_CFG_EN_ECS) == 0) 746f78a91cdSjjc wrmsr(MSR_AMD_NB_CFG, nb_cfg_reg); 7477c478bd9Sstevel@tonic-gate } 7487c478bd9Sstevel@tonic-gate 7497c478bd9Sstevel@tonic-gate /* 7507c478bd9Sstevel@tonic-gate * Only use one memory node if memory is interleaved between any nodes 7517c478bd9Sstevel@tonic-gate */ 7527c478bd9Sstevel@tonic-gate if (lgrp_plat_mem_intrlv) { 7537c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt = max_mem_nodes = 1; 7547c478bd9Sstevel@tonic-gate (void) lgrp_topo_ht_limit_set(1); 7557c478bd9Sstevel@tonic-gate } else { 7567c478bd9Sstevel@tonic-gate max_mem_nodes = lgrp_plat_node_cnt; 7577c478bd9Sstevel@tonic-gate 7587c478bd9Sstevel@tonic-gate /* 7597c478bd9Sstevel@tonic-gate * Probing errors can mess up the lgroup topology and force us 7607c478bd9Sstevel@tonic-gate * fall back to a 2 level lgroup topology. Here we bound how 7617c478bd9Sstevel@tonic-gate * tall the lgroup topology can grow in hopes of avoiding any 7627c478bd9Sstevel@tonic-gate * anamolies in probing from messing up the lgroup topology 7637c478bd9Sstevel@tonic-gate * by limiting the accuracy of the latency topology. 7647c478bd9Sstevel@tonic-gate * 7657c478bd9Sstevel@tonic-gate * Assume that nodes will at least be configured in a ring, 7667c478bd9Sstevel@tonic-gate * so limit height of lgroup topology to be less than number 7677c478bd9Sstevel@tonic-gate * of nodes on a system with 4 or more nodes 7687c478bd9Sstevel@tonic-gate */ 7697c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt >= 4 && 7707c478bd9Sstevel@tonic-gate lgrp_topo_ht_limit() == lgrp_topo_ht_limit_default()) 7717c478bd9Sstevel@tonic-gate (void) lgrp_topo_ht_limit_set(lgrp_plat_node_cnt - 1); 7727c478bd9Sstevel@tonic-gate } 7737c478bd9Sstevel@tonic-gate 7747c478bd9Sstevel@tonic-gate /* 7757c478bd9Sstevel@tonic-gate * Lgroups on Opteron architectures have but a single physical 7767c478bd9Sstevel@tonic-gate * processor. Tune lgrp_expand_proc_thresh and lgrp_expand_proc_diff 7777c478bd9Sstevel@tonic-gate * so that lgrp_choose() will spread things out aggressively. 7787c478bd9Sstevel@tonic-gate */ 7797c478bd9Sstevel@tonic-gate lgrp_expand_proc_thresh = LGRP_LOADAVG_THREAD_MAX / 2; 7807c478bd9Sstevel@tonic-gate lgrp_expand_proc_diff = 0; 781843e1988Sjohnlev #endif /* __xpv */ 7827c478bd9Sstevel@tonic-gate } 7837c478bd9Sstevel@tonic-gate 7847c478bd9Sstevel@tonic-gate 7857c478bd9Sstevel@tonic-gate /* 7867c478bd9Sstevel@tonic-gate * Latencies must be within 1/(2**LGRP_LAT_TOLERANCE_SHIFT) of each other to 7877c478bd9Sstevel@tonic-gate * be considered same 7887c478bd9Sstevel@tonic-gate */ 7897c478bd9Sstevel@tonic-gate #define LGRP_LAT_TOLERANCE_SHIFT 4 7907c478bd9Sstevel@tonic-gate 7917c478bd9Sstevel@tonic-gate int lgrp_plat_probe_lt_shift = LGRP_LAT_TOLERANCE_SHIFT; 7927c478bd9Sstevel@tonic-gate 7937c478bd9Sstevel@tonic-gate 7947c478bd9Sstevel@tonic-gate /* 7957c478bd9Sstevel@tonic-gate * Adjust latencies between nodes to be symmetric, normalize latencies between 7967c478bd9Sstevel@tonic-gate * any nodes that are within some tolerance to be same, and make local 7977c478bd9Sstevel@tonic-gate * latencies be same 7987c478bd9Sstevel@tonic-gate */ 7997c478bd9Sstevel@tonic-gate static void 8007c478bd9Sstevel@tonic-gate lgrp_plat_latency_adjust(void) 8017c478bd9Sstevel@tonic-gate { 8027c478bd9Sstevel@tonic-gate int i; 8037c478bd9Sstevel@tonic-gate int j; 8047c478bd9Sstevel@tonic-gate int k; 8057c478bd9Sstevel@tonic-gate int l; 8067c478bd9Sstevel@tonic-gate u_longlong_t max; 8077c478bd9Sstevel@tonic-gate u_longlong_t min; 8087c478bd9Sstevel@tonic-gate u_longlong_t t; 8097c478bd9Sstevel@tonic-gate u_longlong_t t1; 8107c478bd9Sstevel@tonic-gate u_longlong_t t2; 81103400a71Sjjc const lgrp_config_flag_t cflag = LGRP_CONFIG_LAT_CHANGE_ALL; 8127c478bd9Sstevel@tonic-gate int lat_corrected[MAX_NODES][MAX_NODES]; 8137c478bd9Sstevel@tonic-gate 8147c478bd9Sstevel@tonic-gate /* 8157c478bd9Sstevel@tonic-gate * Nothing to do when this is an UMA machine 8167c478bd9Sstevel@tonic-gate */ 8177c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 8187c478bd9Sstevel@tonic-gate return; 8197c478bd9Sstevel@tonic-gate 8207c478bd9Sstevel@tonic-gate /* 8217c478bd9Sstevel@tonic-gate * Make sure that latencies are symmetric between any two nodes 8227c478bd9Sstevel@tonic-gate * (ie. latency(node0, node1) == latency(node1, node0)) 8237c478bd9Sstevel@tonic-gate */ 8247c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 8257c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 8267c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 8277c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[j][i]; 8287c478bd9Sstevel@tonic-gate 8297c478bd9Sstevel@tonic-gate if (t1 == 0 || t2 == 0 || t1 == t2) 8307c478bd9Sstevel@tonic-gate continue; 8317c478bd9Sstevel@tonic-gate 8327c478bd9Sstevel@tonic-gate /* 8337c478bd9Sstevel@tonic-gate * Latencies should be same 8347c478bd9Sstevel@tonic-gate * - Use minimum of two latencies which should be same 8357c478bd9Sstevel@tonic-gate * - Track suspect probe times not within tolerance of 8367c478bd9Sstevel@tonic-gate * min value 8377c478bd9Sstevel@tonic-gate * - Remember how much values are corrected by 8387c478bd9Sstevel@tonic-gate */ 8397c478bd9Sstevel@tonic-gate if (t1 > t2) { 8407c478bd9Sstevel@tonic-gate t = t2; 8417c478bd9Sstevel@tonic-gate lgrp_plat_probe_errors[i][j] += t1 - t2; 8427c478bd9Sstevel@tonic-gate if (t1 - t2 > t2 >> lgrp_plat_probe_lt_shift) { 8437c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[i][j]++; 8447c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[j][i]++; 8457c478bd9Sstevel@tonic-gate } 8467c478bd9Sstevel@tonic-gate } else if (t2 > t1) { 8477c478bd9Sstevel@tonic-gate t = t1; 8487c478bd9Sstevel@tonic-gate lgrp_plat_probe_errors[j][i] += t2 - t1; 8497c478bd9Sstevel@tonic-gate if (t2 - t1 > t1 >> lgrp_plat_probe_lt_shift) { 8507c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[i][j]++; 8517c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[j][i]++; 8527c478bd9Sstevel@tonic-gate } 8537c478bd9Sstevel@tonic-gate } 8547c478bd9Sstevel@tonic-gate 8557c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 8567c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[j][i] = t; 8577c478bd9Sstevel@tonic-gate lgrp_config(cflag, t1, t); 8587c478bd9Sstevel@tonic-gate lgrp_config(cflag, t2, t); 8597c478bd9Sstevel@tonic-gate } 8607c478bd9Sstevel@tonic-gate 8617c478bd9Sstevel@tonic-gate /* 8627c478bd9Sstevel@tonic-gate * Keep track of which latencies get corrected 8637c478bd9Sstevel@tonic-gate */ 8647c478bd9Sstevel@tonic-gate for (i = 0; i < MAX_NODES; i++) 8657c478bd9Sstevel@tonic-gate for (j = 0; j < MAX_NODES; j++) 8667c478bd9Sstevel@tonic-gate lat_corrected[i][j] = 0; 8677c478bd9Sstevel@tonic-gate 8687c478bd9Sstevel@tonic-gate /* 8697c478bd9Sstevel@tonic-gate * For every two nodes, see whether there is another pair of nodes which 8707c478bd9Sstevel@tonic-gate * are about the same distance apart and make the latencies be the same 8717c478bd9Sstevel@tonic-gate * if they are close enough together 8727c478bd9Sstevel@tonic-gate */ 8737c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 8747c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 8757c478bd9Sstevel@tonic-gate /* 8767c478bd9Sstevel@tonic-gate * Pick one pair of nodes (i, j) 8777c478bd9Sstevel@tonic-gate * and get latency between them 8787c478bd9Sstevel@tonic-gate */ 8797c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 8807c478bd9Sstevel@tonic-gate 8817c478bd9Sstevel@tonic-gate /* 8827c478bd9Sstevel@tonic-gate * Skip this pair of nodes if there isn't a latency 8837c478bd9Sstevel@tonic-gate * for it yet 8847c478bd9Sstevel@tonic-gate */ 8857c478bd9Sstevel@tonic-gate if (t1 == 0) 8867c478bd9Sstevel@tonic-gate continue; 8877c478bd9Sstevel@tonic-gate 8887c478bd9Sstevel@tonic-gate for (k = 0; k < lgrp_plat_node_cnt; k++) 8897c478bd9Sstevel@tonic-gate for (l = 0; l < lgrp_plat_node_cnt; l++) { 8907c478bd9Sstevel@tonic-gate /* 8917c478bd9Sstevel@tonic-gate * Pick another pair of nodes (k, l) 8927c478bd9Sstevel@tonic-gate * not same as (i, j) and get latency 8937c478bd9Sstevel@tonic-gate * between them 8947c478bd9Sstevel@tonic-gate */ 8957c478bd9Sstevel@tonic-gate if (k == i && l == j) 8967c478bd9Sstevel@tonic-gate continue; 8977c478bd9Sstevel@tonic-gate 8987c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[k][l]; 8997c478bd9Sstevel@tonic-gate 9007c478bd9Sstevel@tonic-gate /* 9017c478bd9Sstevel@tonic-gate * Skip this pair of nodes if there 9027c478bd9Sstevel@tonic-gate * isn't a latency for it yet 9037c478bd9Sstevel@tonic-gate */ 9047c478bd9Sstevel@tonic-gate 9057c478bd9Sstevel@tonic-gate if (t2 == 0) 9067c478bd9Sstevel@tonic-gate continue; 9077c478bd9Sstevel@tonic-gate 9087c478bd9Sstevel@tonic-gate /* 9097c478bd9Sstevel@tonic-gate * Skip nodes (k, l) if they already 9107c478bd9Sstevel@tonic-gate * have same latency as (i, j) or 9117c478bd9Sstevel@tonic-gate * their latency isn't close enough to 9127c478bd9Sstevel@tonic-gate * be considered/made the same 9137c478bd9Sstevel@tonic-gate */ 9147c478bd9Sstevel@tonic-gate if (t1 == t2 || (t1 > t2 && t1 - t2 > 9157c478bd9Sstevel@tonic-gate t1 >> lgrp_plat_probe_lt_shift) || 9167c478bd9Sstevel@tonic-gate (t2 > t1 && t2 - t1 > 9177c478bd9Sstevel@tonic-gate t2 >> lgrp_plat_probe_lt_shift)) 9187c478bd9Sstevel@tonic-gate continue; 9197c478bd9Sstevel@tonic-gate 9207c478bd9Sstevel@tonic-gate /* 9217c478bd9Sstevel@tonic-gate * Make latency(i, j) same as 9227c478bd9Sstevel@tonic-gate * latency(k, l), try to use latency 9237c478bd9Sstevel@tonic-gate * that has been adjusted already to get 9247c478bd9Sstevel@tonic-gate * more consistency (if possible), and 9257c478bd9Sstevel@tonic-gate * remember which latencies were 9267c478bd9Sstevel@tonic-gate * adjusted for next time 9277c478bd9Sstevel@tonic-gate */ 9287c478bd9Sstevel@tonic-gate if (lat_corrected[i][j]) { 9297c478bd9Sstevel@tonic-gate t = t1; 9307c478bd9Sstevel@tonic-gate lgrp_config(cflag, t2, t); 9317c478bd9Sstevel@tonic-gate t2 = t; 9327c478bd9Sstevel@tonic-gate } else if (lat_corrected[k][l]) { 9337c478bd9Sstevel@tonic-gate t = t2; 9347c478bd9Sstevel@tonic-gate lgrp_config(cflag, t1, t); 9357c478bd9Sstevel@tonic-gate t1 = t; 9367c478bd9Sstevel@tonic-gate } else { 9377c478bd9Sstevel@tonic-gate if (t1 > t2) 9387c478bd9Sstevel@tonic-gate t = t2; 9397c478bd9Sstevel@tonic-gate else 9407c478bd9Sstevel@tonic-gate t = t1; 9417c478bd9Sstevel@tonic-gate lgrp_config(cflag, t1, t); 9427c478bd9Sstevel@tonic-gate lgrp_config(cflag, t2, t); 9437c478bd9Sstevel@tonic-gate t1 = t2 = t; 9447c478bd9Sstevel@tonic-gate } 9457c478bd9Sstevel@tonic-gate 9467c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 9477c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[k][l] = t; 9487c478bd9Sstevel@tonic-gate 9497c478bd9Sstevel@tonic-gate lat_corrected[i][j] = 9507c478bd9Sstevel@tonic-gate lat_corrected[k][l] = 1; 9517c478bd9Sstevel@tonic-gate } 9527c478bd9Sstevel@tonic-gate } 9537c478bd9Sstevel@tonic-gate 9547c478bd9Sstevel@tonic-gate /* 9557c478bd9Sstevel@tonic-gate * Local latencies should be same 9567c478bd9Sstevel@tonic-gate * - Find min and max local latencies 9577c478bd9Sstevel@tonic-gate * - Make all local latencies be minimum 9587c478bd9Sstevel@tonic-gate */ 9597c478bd9Sstevel@tonic-gate min = -1; 9607c478bd9Sstevel@tonic-gate max = 0; 9617c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 9627c478bd9Sstevel@tonic-gate t = lgrp_plat_probe_times[i][i]; 9637c478bd9Sstevel@tonic-gate if (t == 0) 9647c478bd9Sstevel@tonic-gate continue; 9657c478bd9Sstevel@tonic-gate if (min == -1 || t < min) 9667c478bd9Sstevel@tonic-gate min = t; 9677c478bd9Sstevel@tonic-gate if (t > max) 9687c478bd9Sstevel@tonic-gate max = t; 9697c478bd9Sstevel@tonic-gate } 9707c478bd9Sstevel@tonic-gate if (min != max) { 9717c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 9727c478bd9Sstevel@tonic-gate int local; 9737c478bd9Sstevel@tonic-gate 9747c478bd9Sstevel@tonic-gate local = lgrp_plat_probe_times[i][i]; 9757c478bd9Sstevel@tonic-gate if (local == 0) 9767c478bd9Sstevel@tonic-gate continue; 9777c478bd9Sstevel@tonic-gate 9787c478bd9Sstevel@tonic-gate /* 9797c478bd9Sstevel@tonic-gate * Track suspect probe times that aren't within 9807c478bd9Sstevel@tonic-gate * tolerance of minimum local latency and how much 9817c478bd9Sstevel@tonic-gate * probe times are corrected by 9827c478bd9Sstevel@tonic-gate */ 9837c478bd9Sstevel@tonic-gate if (local - min > min >> lgrp_plat_probe_lt_shift) 9847c478bd9Sstevel@tonic-gate lgrp_plat_probe_suspect[i][i]++; 9857c478bd9Sstevel@tonic-gate 9867c478bd9Sstevel@tonic-gate lgrp_plat_probe_errors[i][i] += local - min; 9877c478bd9Sstevel@tonic-gate 9887c478bd9Sstevel@tonic-gate /* 9897c478bd9Sstevel@tonic-gate * Make local latencies be minimum 9907c478bd9Sstevel@tonic-gate */ 99103400a71Sjjc lgrp_config(LGRP_CONFIG_LAT_CHANGE, i, min); 9927c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][i] = min; 9937c478bd9Sstevel@tonic-gate } 9947c478bd9Sstevel@tonic-gate } 9957c478bd9Sstevel@tonic-gate 9967c478bd9Sstevel@tonic-gate /* 9977c478bd9Sstevel@tonic-gate * Determine max probe time again since just adjusted latencies 9987c478bd9Sstevel@tonic-gate */ 9997c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = 0; 10007c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 10017c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 10027c478bd9Sstevel@tonic-gate t = lgrp_plat_probe_times[i][j]; 10037c478bd9Sstevel@tonic-gate if (t > lgrp_plat_probe_time_max) 10047c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = t; 10057c478bd9Sstevel@tonic-gate } 10067c478bd9Sstevel@tonic-gate } 10077c478bd9Sstevel@tonic-gate 10087c478bd9Sstevel@tonic-gate 10097c478bd9Sstevel@tonic-gate /* 10107c478bd9Sstevel@tonic-gate * Verify following about latencies between nodes: 10117c478bd9Sstevel@tonic-gate * 10127c478bd9Sstevel@tonic-gate * - Latencies should be symmetric (ie. latency(a, b) == latency(b, a)) 10137c478bd9Sstevel@tonic-gate * - Local latencies same 10147c478bd9Sstevel@tonic-gate * - Local < remote 10157c478bd9Sstevel@tonic-gate * - Number of latencies seen is reasonable 10167c478bd9Sstevel@tonic-gate * - Number of occurrences of a given latency should be more than 1 10177c478bd9Sstevel@tonic-gate * 10187c478bd9Sstevel@tonic-gate * Returns: 10197c478bd9Sstevel@tonic-gate * 0 Success 10207c478bd9Sstevel@tonic-gate * -1 Not symmetric 10217c478bd9Sstevel@tonic-gate * -2 Local latencies not same 10227c478bd9Sstevel@tonic-gate * -3 Local >= remote 10237c478bd9Sstevel@tonic-gate * -4 Wrong number of latencies 10247c478bd9Sstevel@tonic-gate * -5 Not enough occurrences of given latency 10257c478bd9Sstevel@tonic-gate */ 10267c478bd9Sstevel@tonic-gate static int 10277c478bd9Sstevel@tonic-gate lgrp_plat_latency_verify(void) 10287c478bd9Sstevel@tonic-gate { 10297c478bd9Sstevel@tonic-gate int i; 10307c478bd9Sstevel@tonic-gate int j; 10317c478bd9Sstevel@tonic-gate lgrp_plat_latency_acct_t *l; 10327c478bd9Sstevel@tonic-gate int probed; 10337c478bd9Sstevel@tonic-gate u_longlong_t t1; 10347c478bd9Sstevel@tonic-gate u_longlong_t t2; 10357c478bd9Sstevel@tonic-gate 10367c478bd9Sstevel@tonic-gate /* 10372dae3fb5Sjjc * Nothing to do when this is an UMA machine, lgroup topology is 10382dae3fb5Sjjc * limited to 2 levels, or there aren't any probe times yet 10397c478bd9Sstevel@tonic-gate */ 10407c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1 || lgrp_topo_levels < 2 || 10412dae3fb5Sjjc (lgrp_plat_probe_time_max == 0 && lgrp_plat_probe_time_min == -1)) 10427c478bd9Sstevel@tonic-gate return (0); 10437c478bd9Sstevel@tonic-gate 10447c478bd9Sstevel@tonic-gate /* 10457c478bd9Sstevel@tonic-gate * Make sure that latencies are symmetric between any two nodes 10467c478bd9Sstevel@tonic-gate * (ie. latency(node0, node1) == latency(node1, node0)) 10477c478bd9Sstevel@tonic-gate */ 10487c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 10497c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 10507c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 10517c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[j][i]; 10527c478bd9Sstevel@tonic-gate 10537c478bd9Sstevel@tonic-gate if (t1 == 0 || t2 == 0 || t1 == t2) 10547c478bd9Sstevel@tonic-gate continue; 10557c478bd9Sstevel@tonic-gate 10567c478bd9Sstevel@tonic-gate return (-1); 10577c478bd9Sstevel@tonic-gate } 10587c478bd9Sstevel@tonic-gate 10597c478bd9Sstevel@tonic-gate /* 10607c478bd9Sstevel@tonic-gate * Local latencies should be same 10617c478bd9Sstevel@tonic-gate */ 10627c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[0][0]; 10637c478bd9Sstevel@tonic-gate for (i = 1; i < lgrp_plat_node_cnt; i++) { 10647c478bd9Sstevel@tonic-gate t2 = lgrp_plat_probe_times[i][i]; 10657c478bd9Sstevel@tonic-gate if (t2 == 0) 10667c478bd9Sstevel@tonic-gate continue; 10677c478bd9Sstevel@tonic-gate 10682dae3fb5Sjjc if (t1 == 0) { 10692dae3fb5Sjjc t1 = t2; 10702dae3fb5Sjjc continue; 10712dae3fb5Sjjc } 10722dae3fb5Sjjc 10737c478bd9Sstevel@tonic-gate if (t1 != t2) 10747c478bd9Sstevel@tonic-gate return (-2); 10757c478bd9Sstevel@tonic-gate } 10767c478bd9Sstevel@tonic-gate 10777c478bd9Sstevel@tonic-gate /* 10787c478bd9Sstevel@tonic-gate * Local latencies should be less than remote 10797c478bd9Sstevel@tonic-gate */ 10802dae3fb5Sjjc if (t1) { 10817c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 10827c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 10832dae3fb5Sjjc t2 = lgrp_plat_probe_times[i][j]; 10847c478bd9Sstevel@tonic-gate if (i == j || t2 == 0) 10857c478bd9Sstevel@tonic-gate continue; 10867c478bd9Sstevel@tonic-gate 10877c478bd9Sstevel@tonic-gate if (t1 >= t2) 10887c478bd9Sstevel@tonic-gate return (-3); 10897c478bd9Sstevel@tonic-gate } 10902dae3fb5Sjjc } 10917c478bd9Sstevel@tonic-gate 10927c478bd9Sstevel@tonic-gate /* 10937c478bd9Sstevel@tonic-gate * Rest of checks are not very useful for machines with less than 10947c478bd9Sstevel@tonic-gate * 4 nodes (which means less than 3 latencies on Opteron) 10957c478bd9Sstevel@tonic-gate */ 10967c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt < 4) 10977c478bd9Sstevel@tonic-gate return (0); 10987c478bd9Sstevel@tonic-gate 10997c478bd9Sstevel@tonic-gate /* 11007c478bd9Sstevel@tonic-gate * Need to see whether done probing in order to verify number of 11017c478bd9Sstevel@tonic-gate * latencies are correct 11027c478bd9Sstevel@tonic-gate */ 11037c478bd9Sstevel@tonic-gate probed = 0; 11047c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) 11057c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[i][i]) 11067c478bd9Sstevel@tonic-gate probed++; 11077c478bd9Sstevel@tonic-gate 11087c478bd9Sstevel@tonic-gate if (probed != lgrp_plat_node_cnt) 11097c478bd9Sstevel@tonic-gate return (0); 11107c478bd9Sstevel@tonic-gate 11117c478bd9Sstevel@tonic-gate /* 11127c478bd9Sstevel@tonic-gate * Determine number of unique latencies seen in probe times, 11137c478bd9Sstevel@tonic-gate * their values, and number of occurrences of each 11147c478bd9Sstevel@tonic-gate */ 11157c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies = 0; 11167c478bd9Sstevel@tonic-gate bzero(lgrp_plat_probe_lat_acct, 11177c478bd9Sstevel@tonic-gate MAX_NODES * sizeof (lgrp_plat_latency_acct_t)); 11187c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 11197c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 11207c478bd9Sstevel@tonic-gate int k; 11217c478bd9Sstevel@tonic-gate 11227c478bd9Sstevel@tonic-gate /* 11237c478bd9Sstevel@tonic-gate * Look at each probe time 11247c478bd9Sstevel@tonic-gate */ 11257c478bd9Sstevel@tonic-gate t1 = lgrp_plat_probe_times[i][j]; 11267c478bd9Sstevel@tonic-gate if (t1 == 0) 11277c478bd9Sstevel@tonic-gate continue; 11287c478bd9Sstevel@tonic-gate 11297c478bd9Sstevel@tonic-gate /* 11307c478bd9Sstevel@tonic-gate * Account for unique latencies 11317c478bd9Sstevel@tonic-gate */ 11327c478bd9Sstevel@tonic-gate for (k = 0; k < lgrp_plat_node_cnt; k++) { 11337c478bd9Sstevel@tonic-gate l = &lgrp_plat_probe_lat_acct[k]; 11347c478bd9Sstevel@tonic-gate if (t1 == l->la_value) { 11357c478bd9Sstevel@tonic-gate /* 11367c478bd9Sstevel@tonic-gate * Increment number of occurrences 11377c478bd9Sstevel@tonic-gate * if seen before 11387c478bd9Sstevel@tonic-gate */ 11397c478bd9Sstevel@tonic-gate l->la_count++; 11407c478bd9Sstevel@tonic-gate break; 11417c478bd9Sstevel@tonic-gate } else if (l->la_value == 0) { 11427c478bd9Sstevel@tonic-gate /* 11437c478bd9Sstevel@tonic-gate * Record latency if haven't seen before 11447c478bd9Sstevel@tonic-gate */ 11457c478bd9Sstevel@tonic-gate l->la_value = t1; 11467c478bd9Sstevel@tonic-gate l->la_count++; 11477c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies++; 11487c478bd9Sstevel@tonic-gate break; 11497c478bd9Sstevel@tonic-gate } 11507c478bd9Sstevel@tonic-gate } 11517c478bd9Sstevel@tonic-gate } 11527c478bd9Sstevel@tonic-gate } 11537c478bd9Sstevel@tonic-gate 11547c478bd9Sstevel@tonic-gate /* 11557c478bd9Sstevel@tonic-gate * Number of latencies should be relative to number of 11567c478bd9Sstevel@tonic-gate * nodes in system: 11577c478bd9Sstevel@tonic-gate * - Same as nodes when nodes <= 2 11587c478bd9Sstevel@tonic-gate * - Less than nodes when nodes > 2 11597c478bd9Sstevel@tonic-gate * - Greater than 2 when nodes >= 4 11607c478bd9Sstevel@tonic-gate */ 11617c478bd9Sstevel@tonic-gate if ((lgrp_plat_node_cnt <= 2 && 11627c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies != lgrp_plat_node_cnt) || 11637c478bd9Sstevel@tonic-gate (lgrp_plat_node_cnt > 2 && 11647c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies >= lgrp_plat_node_cnt) || 11657c478bd9Sstevel@tonic-gate (lgrp_plat_node_cnt >= 4 && lgrp_topo_levels >= 3 && 11667c478bd9Sstevel@tonic-gate lgrp_plat_probe_nlatencies <= 2)) 11677c478bd9Sstevel@tonic-gate return (-4); 11687c478bd9Sstevel@tonic-gate 11697c478bd9Sstevel@tonic-gate /* 11707c478bd9Sstevel@tonic-gate * There should be more than one occurrence of every latency 11717c478bd9Sstevel@tonic-gate * as long as probing is complete 11727c478bd9Sstevel@tonic-gate */ 11737c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_probe_nlatencies; i++) { 11747c478bd9Sstevel@tonic-gate l = &lgrp_plat_probe_lat_acct[i]; 11757c478bd9Sstevel@tonic-gate if (l->la_count <= 1) 11767c478bd9Sstevel@tonic-gate return (-5); 11777c478bd9Sstevel@tonic-gate } 11787c478bd9Sstevel@tonic-gate return (0); 11797c478bd9Sstevel@tonic-gate } 11807c478bd9Sstevel@tonic-gate 11817c478bd9Sstevel@tonic-gate 11827c478bd9Sstevel@tonic-gate /* 11837c478bd9Sstevel@tonic-gate * Set lgroup latencies for 2 level lgroup topology 11847c478bd9Sstevel@tonic-gate */ 11857c478bd9Sstevel@tonic-gate static void 11867c478bd9Sstevel@tonic-gate lgrp_plat_2level_setup(void) 11877c478bd9Sstevel@tonic-gate { 11887c478bd9Sstevel@tonic-gate int i; 11897c478bd9Sstevel@tonic-gate 11907c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt >= 4) 11917c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 11927c478bd9Sstevel@tonic-gate "MPO only optimizing for local and remote\n"); 11937c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 11947c478bd9Sstevel@tonic-gate int j; 11957c478bd9Sstevel@tonic-gate 11967c478bd9Sstevel@tonic-gate for (j = 0; j < lgrp_plat_node_cnt; j++) { 11977c478bd9Sstevel@tonic-gate if (i == j) 11987c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 2; 11997c478bd9Sstevel@tonic-gate else 12007c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[i][j] = 3; 12017c478bd9Sstevel@tonic-gate } 12027c478bd9Sstevel@tonic-gate } 12037c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_min = 2; 12047c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = 3; 12057c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_FLATTEN, 2, 0); 12067c478bd9Sstevel@tonic-gate } 12077c478bd9Sstevel@tonic-gate 12087c478bd9Sstevel@tonic-gate 12097c478bd9Sstevel@tonic-gate /* 12107c478bd9Sstevel@tonic-gate * Return time needed to probe from current CPU to memory in given node 12117c478bd9Sstevel@tonic-gate */ 12127c478bd9Sstevel@tonic-gate static hrtime_t 12137c478bd9Sstevel@tonic-gate lgrp_plat_probe_time(int to) 12147c478bd9Sstevel@tonic-gate { 12157c478bd9Sstevel@tonic-gate caddr_t buf; 12167c478bd9Sstevel@tonic-gate uint_t dev; 12177c478bd9Sstevel@tonic-gate /* LINTED: set but not used in function */ 12187c478bd9Sstevel@tonic-gate volatile uint_t dev_vendor; 12197c478bd9Sstevel@tonic-gate hrtime_t elapsed; 12207c478bd9Sstevel@tonic-gate hrtime_t end; 12217c478bd9Sstevel@tonic-gate int from; 12227c478bd9Sstevel@tonic-gate int i; 12237c478bd9Sstevel@tonic-gate int ipl; 12247c478bd9Sstevel@tonic-gate hrtime_t max; 12257c478bd9Sstevel@tonic-gate hrtime_t min; 12267c478bd9Sstevel@tonic-gate hrtime_t start; 12278949bcd6Sandrei int cnt; 12287c478bd9Sstevel@tonic-gate extern int use_sse_pagecopy; 12297c478bd9Sstevel@tonic-gate 12307c478bd9Sstevel@tonic-gate /* 12317c478bd9Sstevel@tonic-gate * Determine ID of node containing current CPU 12327c478bd9Sstevel@tonic-gate */ 12337c478bd9Sstevel@tonic-gate from = LGRP_PLAT_CPU_TO_NODE(CPU); 12347c478bd9Sstevel@tonic-gate 12357c478bd9Sstevel@tonic-gate /* 12367c478bd9Sstevel@tonic-gate * Do common work for probing main memory 12377c478bd9Sstevel@tonic-gate */ 12387c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_op == LGRP_PLAT_PROBE_PGCPY) { 12397c478bd9Sstevel@tonic-gate /* 12407c478bd9Sstevel@tonic-gate * Skip probing any nodes without memory and 12417c478bd9Sstevel@tonic-gate * set probe time to 0 12427c478bd9Sstevel@tonic-gate */ 12437c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_memory[to] == NULL) { 12447c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[from][to] = 0; 12457c478bd9Sstevel@tonic-gate return (0); 12467c478bd9Sstevel@tonic-gate } 12477c478bd9Sstevel@tonic-gate 12487c478bd9Sstevel@tonic-gate /* 12497c478bd9Sstevel@tonic-gate * Invalidate caches once instead of once every sample 12507c478bd9Sstevel@tonic-gate * which should cut cost of probing by a lot 12517c478bd9Sstevel@tonic-gate */ 12527c478bd9Sstevel@tonic-gate lgrp_plat_flush_cost = gethrtime(); 12537c478bd9Sstevel@tonic-gate invalidate_cache(); 12547c478bd9Sstevel@tonic-gate lgrp_plat_flush_cost = gethrtime() - lgrp_plat_flush_cost; 12557c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost_total += lgrp_plat_flush_cost; 12567c478bd9Sstevel@tonic-gate } 12577c478bd9Sstevel@tonic-gate 12587c478bd9Sstevel@tonic-gate /* 12597c478bd9Sstevel@tonic-gate * Probe from current CPU to given memory using specified operation 12607c478bd9Sstevel@tonic-gate * and take specified number of samples 12617c478bd9Sstevel@tonic-gate */ 12627c478bd9Sstevel@tonic-gate max = 0; 12637c478bd9Sstevel@tonic-gate min = -1; 12647c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_probe_nsamples; i++) { 12657c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost = gethrtime(); 12667c478bd9Sstevel@tonic-gate 12677c478bd9Sstevel@tonic-gate /* 12687c478bd9Sstevel@tonic-gate * Can't measure probe time if gethrtime() isn't working yet 12697c478bd9Sstevel@tonic-gate */ 12707c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_cost == 0 && gethrtime() == 0) 12717c478bd9Sstevel@tonic-gate return (0); 12727c478bd9Sstevel@tonic-gate 12737c478bd9Sstevel@tonic-gate switch (lgrp_plat_probe_op) { 12747c478bd9Sstevel@tonic-gate 12757c478bd9Sstevel@tonic-gate case LGRP_PLAT_PROBE_PGCPY: 12767c478bd9Sstevel@tonic-gate default: 12777c478bd9Sstevel@tonic-gate /* 12787c478bd9Sstevel@tonic-gate * Measure how long it takes to copy page 12797c478bd9Sstevel@tonic-gate * on top of itself 12807c478bd9Sstevel@tonic-gate */ 12817c478bd9Sstevel@tonic-gate buf = lgrp_plat_probe_memory[to] + (i * PAGESIZE); 12827c478bd9Sstevel@tonic-gate 12837c478bd9Sstevel@tonic-gate kpreempt_disable(); 12847c478bd9Sstevel@tonic-gate ipl = splhigh(); 12857c478bd9Sstevel@tonic-gate start = gethrtime(); 12867c478bd9Sstevel@tonic-gate if (use_sse_pagecopy) 12877c478bd9Sstevel@tonic-gate hwblkpagecopy(buf, buf); 12887c478bd9Sstevel@tonic-gate else 12897c478bd9Sstevel@tonic-gate bcopy(buf, buf, PAGESIZE); 12907c478bd9Sstevel@tonic-gate end = gethrtime(); 12917c478bd9Sstevel@tonic-gate elapsed = end - start; 12927c478bd9Sstevel@tonic-gate splx(ipl); 12937c478bd9Sstevel@tonic-gate kpreempt_enable(); 12947c478bd9Sstevel@tonic-gate break; 12957c478bd9Sstevel@tonic-gate 12967c478bd9Sstevel@tonic-gate case LGRP_PLAT_PROBE_VENDOR: 12977c478bd9Sstevel@tonic-gate /* 12987c478bd9Sstevel@tonic-gate * Measure how long it takes to read vendor ID from 12997c478bd9Sstevel@tonic-gate * Northbridge 13007c478bd9Sstevel@tonic-gate */ 13017c478bd9Sstevel@tonic-gate dev = OPT_PCS_DEV_NODE0 + to; 13027c478bd9Sstevel@tonic-gate kpreempt_disable(); 13037c478bd9Sstevel@tonic-gate ipl = spl8(); 13047c478bd9Sstevel@tonic-gate outl(PCI_CONFADD, PCI_CADDR1(0, dev, opt_probe_func, 13057c478bd9Sstevel@tonic-gate OPT_PCS_OFF_VENDOR)); 13067c478bd9Sstevel@tonic-gate start = gethrtime(); 13078949bcd6Sandrei for (cnt = 0; cnt < lgrp_plat_probe_nreads; cnt++) 13087c478bd9Sstevel@tonic-gate dev_vendor = inl(PCI_CONFDATA); 13097c478bd9Sstevel@tonic-gate end = gethrtime(); 13108949bcd6Sandrei elapsed = (end - start) / lgrp_plat_probe_nreads; 13117c478bd9Sstevel@tonic-gate splx(ipl); 13127c478bd9Sstevel@tonic-gate kpreempt_enable(); 13137c478bd9Sstevel@tonic-gate break; 13147c478bd9Sstevel@tonic-gate } 13157c478bd9Sstevel@tonic-gate 13167c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost = gethrtime() - lgrp_plat_probe_cost; 13177c478bd9Sstevel@tonic-gate lgrp_plat_probe_cost_total += lgrp_plat_probe_cost; 13187c478bd9Sstevel@tonic-gate 13197c478bd9Sstevel@tonic-gate if (min == -1 || elapsed < min) 13207c478bd9Sstevel@tonic-gate min = elapsed; 13217c478bd9Sstevel@tonic-gate if (elapsed > max) 13227c478bd9Sstevel@tonic-gate max = elapsed; 13237c478bd9Sstevel@tonic-gate } 13247c478bd9Sstevel@tonic-gate 13257c478bd9Sstevel@tonic-gate /* 13267c478bd9Sstevel@tonic-gate * Update minimum and maximum probe times between 13277c478bd9Sstevel@tonic-gate * these two nodes 13287c478bd9Sstevel@tonic-gate */ 13297c478bd9Sstevel@tonic-gate if (min < lgrp_plat_probe_min[from][to] || 13307c478bd9Sstevel@tonic-gate lgrp_plat_probe_min[from][to] == 0) 13317c478bd9Sstevel@tonic-gate lgrp_plat_probe_min[from][to] = min; 13327c478bd9Sstevel@tonic-gate 13337c478bd9Sstevel@tonic-gate if (max > lgrp_plat_probe_max[from][to]) 13347c478bd9Sstevel@tonic-gate lgrp_plat_probe_max[from][to] = max; 13357c478bd9Sstevel@tonic-gate 13367c478bd9Sstevel@tonic-gate return (min); 13377c478bd9Sstevel@tonic-gate } 13387c478bd9Sstevel@tonic-gate 13397c478bd9Sstevel@tonic-gate 13407c478bd9Sstevel@tonic-gate /* 13417c478bd9Sstevel@tonic-gate * Probe memory in each node from current CPU to determine latency topology 13427c478bd9Sstevel@tonic-gate */ 13437c478bd9Sstevel@tonic-gate void 13447c478bd9Sstevel@tonic-gate lgrp_plat_probe(void) 13457c478bd9Sstevel@tonic-gate { 13467c478bd9Sstevel@tonic-gate int from; 13477c478bd9Sstevel@tonic-gate int i; 13487c478bd9Sstevel@tonic-gate hrtime_t probe_time; 13497c478bd9Sstevel@tonic-gate int to; 13507c478bd9Sstevel@tonic-gate 13517c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1 || lgrp_topo_ht_limit() <= 2) 13527c478bd9Sstevel@tonic-gate return; 13537c478bd9Sstevel@tonic-gate 13547c478bd9Sstevel@tonic-gate /* 13557c478bd9Sstevel@tonic-gate * Determine ID of node containing current CPU 13567c478bd9Sstevel@tonic-gate */ 13577c478bd9Sstevel@tonic-gate from = LGRP_PLAT_CPU_TO_NODE(CPU); 13587c478bd9Sstevel@tonic-gate 13597c478bd9Sstevel@tonic-gate /* 13607c478bd9Sstevel@tonic-gate * Don't need to probe if got times already 13617c478bd9Sstevel@tonic-gate */ 13627c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[from][from] != 0) 13637c478bd9Sstevel@tonic-gate return; 13647c478bd9Sstevel@tonic-gate 13657c478bd9Sstevel@tonic-gate /* 13667c478bd9Sstevel@tonic-gate * Read vendor ID in Northbridge or read and write page(s) 13677c478bd9Sstevel@tonic-gate * in each node from current CPU and remember how long it takes, 13687c478bd9Sstevel@tonic-gate * so we can build latency topology of machine later. 13697c478bd9Sstevel@tonic-gate * This should approximate the memory latency between each node. 13707c478bd9Sstevel@tonic-gate */ 13717c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_probe_nrounds; i++) 13727c478bd9Sstevel@tonic-gate for (to = 0; to < lgrp_plat_node_cnt; to++) { 13737c478bd9Sstevel@tonic-gate /* 13747c478bd9Sstevel@tonic-gate * Get probe time and bail out if can't get it yet 13757c478bd9Sstevel@tonic-gate */ 13767c478bd9Sstevel@tonic-gate probe_time = lgrp_plat_probe_time(to); 13777c478bd9Sstevel@tonic-gate if (probe_time == 0) 13787c478bd9Sstevel@tonic-gate return; 13797c478bd9Sstevel@tonic-gate 13807c478bd9Sstevel@tonic-gate /* 13817c478bd9Sstevel@tonic-gate * Keep lowest probe time as latency between nodes 13827c478bd9Sstevel@tonic-gate */ 13837c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[from][to] == 0 || 13847c478bd9Sstevel@tonic-gate probe_time < lgrp_plat_probe_times[from][to]) 13857c478bd9Sstevel@tonic-gate lgrp_plat_probe_times[from][to] = probe_time; 13867c478bd9Sstevel@tonic-gate 13877c478bd9Sstevel@tonic-gate /* 13887c478bd9Sstevel@tonic-gate * Update overall minimum and maximum probe times 13897c478bd9Sstevel@tonic-gate * across all nodes 13907c478bd9Sstevel@tonic-gate */ 13917c478bd9Sstevel@tonic-gate if (probe_time < lgrp_plat_probe_time_min || 13927c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_min == -1) 13937c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_min = probe_time; 13947c478bd9Sstevel@tonic-gate if (probe_time > lgrp_plat_probe_time_max) 13957c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max = probe_time; 13967c478bd9Sstevel@tonic-gate } 13977c478bd9Sstevel@tonic-gate 13987c478bd9Sstevel@tonic-gate /* 13997c478bd9Sstevel@tonic-gate * - Fix up latencies such that local latencies are same, 14007c478bd9Sstevel@tonic-gate * latency(i, j) == latency(j, i), etc. (if possible) 14017c478bd9Sstevel@tonic-gate * 14027c478bd9Sstevel@tonic-gate * - Verify that latencies look ok 14037c478bd9Sstevel@tonic-gate * 14047c478bd9Sstevel@tonic-gate * - Fallback to just optimizing for local and remote if 14057c478bd9Sstevel@tonic-gate * latencies didn't look right 14067c478bd9Sstevel@tonic-gate */ 14077c478bd9Sstevel@tonic-gate lgrp_plat_latency_adjust(); 14087c478bd9Sstevel@tonic-gate lgrp_plat_probe_error_code = lgrp_plat_latency_verify(); 14097c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_error_code) 14107c478bd9Sstevel@tonic-gate lgrp_plat_2level_setup(); 14117c478bd9Sstevel@tonic-gate } 14127c478bd9Sstevel@tonic-gate 14137c478bd9Sstevel@tonic-gate 14147c478bd9Sstevel@tonic-gate /* 14157c478bd9Sstevel@tonic-gate * Platform-specific initialization 14167c478bd9Sstevel@tonic-gate */ 14177c478bd9Sstevel@tonic-gate void 14187c478bd9Sstevel@tonic-gate lgrp_plat_main_init(void) 14197c478bd9Sstevel@tonic-gate { 14207c478bd9Sstevel@tonic-gate int curnode; 14217c478bd9Sstevel@tonic-gate int ht_limit; 14227c478bd9Sstevel@tonic-gate int i; 14237c478bd9Sstevel@tonic-gate 14247c478bd9Sstevel@tonic-gate /* 14257c478bd9Sstevel@tonic-gate * Print a notice that MPO is disabled when memory is interleaved 14267c478bd9Sstevel@tonic-gate * across nodes....Would do this when it is discovered, but can't 14277c478bd9Sstevel@tonic-gate * because it happens way too early during boot.... 14287c478bd9Sstevel@tonic-gate */ 14297c478bd9Sstevel@tonic-gate if (lgrp_plat_mem_intrlv) 14307c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 14317c478bd9Sstevel@tonic-gate "MPO disabled because memory is interleaved\n"); 14327c478bd9Sstevel@tonic-gate 14337c478bd9Sstevel@tonic-gate /* 14347c478bd9Sstevel@tonic-gate * Don't bother to do any probing if there is only one node or the 14357c478bd9Sstevel@tonic-gate * height of the lgroup topology less than or equal to 2 14367c478bd9Sstevel@tonic-gate */ 14377c478bd9Sstevel@tonic-gate ht_limit = lgrp_topo_ht_limit(); 14387c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1 || ht_limit <= 2) { 14397c478bd9Sstevel@tonic-gate /* 14407c478bd9Sstevel@tonic-gate * Setup lgroup latencies for 2 level lgroup topology 14417c478bd9Sstevel@tonic-gate * (ie. local and remote only) if they haven't been set yet 14427c478bd9Sstevel@tonic-gate */ 14437c478bd9Sstevel@tonic-gate if (ht_limit == 2 && lgrp_plat_probe_time_min == -1 && 14447c478bd9Sstevel@tonic-gate lgrp_plat_probe_time_max == 0) 14457c478bd9Sstevel@tonic-gate lgrp_plat_2level_setup(); 14467c478bd9Sstevel@tonic-gate return; 14477c478bd9Sstevel@tonic-gate } 14487c478bd9Sstevel@tonic-gate 14497c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_op == LGRP_PLAT_PROBE_VENDOR) { 14507c478bd9Sstevel@tonic-gate /* 14517c478bd9Sstevel@tonic-gate * Should have been able to probe from CPU 0 when it was added 14527c478bd9Sstevel@tonic-gate * to lgroup hierarchy, but may not have been able to then 14537c478bd9Sstevel@tonic-gate * because it happens so early in boot that gethrtime() hasn't 14547c478bd9Sstevel@tonic-gate * been initialized. (:-( 14557c478bd9Sstevel@tonic-gate */ 14567c478bd9Sstevel@tonic-gate curnode = LGRP_PLAT_CPU_TO_NODE(CPU); 14577c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[curnode][curnode] == 0) 14587c478bd9Sstevel@tonic-gate lgrp_plat_probe(); 14597c478bd9Sstevel@tonic-gate 14607c478bd9Sstevel@tonic-gate return; 14617c478bd9Sstevel@tonic-gate } 14627c478bd9Sstevel@tonic-gate 14637c478bd9Sstevel@tonic-gate /* 14647c478bd9Sstevel@tonic-gate * When probing memory, use one page for every sample to determine 14657c478bd9Sstevel@tonic-gate * lgroup topology and taking multiple samples 14667c478bd9Sstevel@tonic-gate */ 14677c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_memsize == 0) 14687c478bd9Sstevel@tonic-gate lgrp_plat_probe_memsize = PAGESIZE * 14697c478bd9Sstevel@tonic-gate lgrp_plat_probe_nsamples; 14707c478bd9Sstevel@tonic-gate 14717c478bd9Sstevel@tonic-gate /* 14727c478bd9Sstevel@tonic-gate * Map memory in each node needed for probing to determine latency 14737c478bd9Sstevel@tonic-gate * topology 14747c478bd9Sstevel@tonic-gate */ 14757c478bd9Sstevel@tonic-gate for (i = 0; i < lgrp_plat_node_cnt; i++) { 14767c478bd9Sstevel@tonic-gate int mnode; 14777c478bd9Sstevel@tonic-gate 14787c478bd9Sstevel@tonic-gate /* 14797c478bd9Sstevel@tonic-gate * Skip this node and leave its probe page NULL 14807c478bd9Sstevel@tonic-gate * if it doesn't have any memory 14817c478bd9Sstevel@tonic-gate */ 14827c478bd9Sstevel@tonic-gate mnode = plat_lgrphand_to_mem_node((lgrp_handle_t)i); 14837c478bd9Sstevel@tonic-gate if (!mem_node_config[mnode].exists) { 14847c478bd9Sstevel@tonic-gate lgrp_plat_probe_memory[i] = NULL; 14857c478bd9Sstevel@tonic-gate continue; 14867c478bd9Sstevel@tonic-gate } 14877c478bd9Sstevel@tonic-gate 14887c478bd9Sstevel@tonic-gate /* 14897c478bd9Sstevel@tonic-gate * Allocate one kernel virtual page 14907c478bd9Sstevel@tonic-gate */ 14917c478bd9Sstevel@tonic-gate lgrp_plat_probe_memory[i] = vmem_alloc(heap_arena, 14927c478bd9Sstevel@tonic-gate lgrp_plat_probe_memsize, VM_NOSLEEP); 14937c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_memory[i] == NULL) { 14947c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 14957c478bd9Sstevel@tonic-gate "lgrp_plat_main_init: couldn't allocate memory"); 14967c478bd9Sstevel@tonic-gate return; 14977c478bd9Sstevel@tonic-gate } 14987c478bd9Sstevel@tonic-gate 14997c478bd9Sstevel@tonic-gate /* 15007c478bd9Sstevel@tonic-gate * Map virtual page to first page in node 15017c478bd9Sstevel@tonic-gate */ 15027c478bd9Sstevel@tonic-gate hat_devload(kas.a_hat, lgrp_plat_probe_memory[i], 15037c478bd9Sstevel@tonic-gate lgrp_plat_probe_memsize, 15047c478bd9Sstevel@tonic-gate lgrp_plat_probe_pfn[i], 15057c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE | HAT_PLAT_NOCACHE, 15067c478bd9Sstevel@tonic-gate HAT_LOAD_NOCONSIST); 15077c478bd9Sstevel@tonic-gate } 15087c478bd9Sstevel@tonic-gate 15097c478bd9Sstevel@tonic-gate /* 15107c478bd9Sstevel@tonic-gate * Probe from current CPU 15117c478bd9Sstevel@tonic-gate */ 15127c478bd9Sstevel@tonic-gate lgrp_plat_probe(); 15137c478bd9Sstevel@tonic-gate } 15147c478bd9Sstevel@tonic-gate 15157c478bd9Sstevel@tonic-gate /* 15167c478bd9Sstevel@tonic-gate * Allocate additional space for an lgroup. 15177c478bd9Sstevel@tonic-gate */ 15187c478bd9Sstevel@tonic-gate /* ARGSUSED */ 15197c478bd9Sstevel@tonic-gate lgrp_t * 15207c478bd9Sstevel@tonic-gate lgrp_plat_alloc(lgrp_id_t lgrpid) 15217c478bd9Sstevel@tonic-gate { 15227c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 15237c478bd9Sstevel@tonic-gate 15247c478bd9Sstevel@tonic-gate lgrp = &lgrp_space[nlgrps_alloc++]; 15257c478bd9Sstevel@tonic-gate if (lgrpid >= NLGRP || nlgrps_alloc > NLGRP) 15267c478bd9Sstevel@tonic-gate return (NULL); 15277c478bd9Sstevel@tonic-gate return (lgrp); 15287c478bd9Sstevel@tonic-gate } 15297c478bd9Sstevel@tonic-gate 15307c478bd9Sstevel@tonic-gate /* 15317c478bd9Sstevel@tonic-gate * Platform handling for (re)configuration changes 15327c478bd9Sstevel@tonic-gate */ 15337c478bd9Sstevel@tonic-gate /* ARGSUSED */ 15347c478bd9Sstevel@tonic-gate void 15357c478bd9Sstevel@tonic-gate lgrp_plat_config(lgrp_config_flag_t flag, uintptr_t arg) 15367c478bd9Sstevel@tonic-gate { 15377c478bd9Sstevel@tonic-gate } 15387c478bd9Sstevel@tonic-gate 15397c478bd9Sstevel@tonic-gate /* 15407c478bd9Sstevel@tonic-gate * Return the platform handle for the lgroup containing the given CPU 15417c478bd9Sstevel@tonic-gate */ 15427c478bd9Sstevel@tonic-gate /* ARGSUSED */ 15437c478bd9Sstevel@tonic-gate lgrp_handle_t 15447c478bd9Sstevel@tonic-gate lgrp_plat_cpu_to_hand(processorid_t id) 15457c478bd9Sstevel@tonic-gate { 15467c478bd9Sstevel@tonic-gate if (lgrp_plat_node_cnt == 1) 15477c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 15487c478bd9Sstevel@tonic-gate 15497c478bd9Sstevel@tonic-gate return ((lgrp_handle_t)LGRP_PLAT_CPU_TO_NODE(cpu[id])); 15507c478bd9Sstevel@tonic-gate } 15517c478bd9Sstevel@tonic-gate 15527c478bd9Sstevel@tonic-gate /* 15537c478bd9Sstevel@tonic-gate * Return the platform handle of the lgroup that contains the physical memory 15547c478bd9Sstevel@tonic-gate * corresponding to the given page frame number 15557c478bd9Sstevel@tonic-gate */ 15567c478bd9Sstevel@tonic-gate /* ARGSUSED */ 15577c478bd9Sstevel@tonic-gate lgrp_handle_t 15587c478bd9Sstevel@tonic-gate lgrp_plat_pfn_to_hand(pfn_t pfn) 15597c478bd9Sstevel@tonic-gate { 15607c478bd9Sstevel@tonic-gate int mnode; 15617c478bd9Sstevel@tonic-gate 15627c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 15637c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 15647c478bd9Sstevel@tonic-gate 1565c39996a7Sstevel if (pfn > physmax) 1566c39996a7Sstevel return (LGRP_NULL_HANDLE); 1567c39996a7Sstevel 15687c478bd9Sstevel@tonic-gate mnode = plat_pfn_to_mem_node(pfn); 1569c39996a7Sstevel if (mnode < 0) 1570c39996a7Sstevel return (LGRP_NULL_HANDLE); 1571c39996a7Sstevel 15727c478bd9Sstevel@tonic-gate return (MEM_NODE_2_LGRPHAND(mnode)); 15737c478bd9Sstevel@tonic-gate } 15747c478bd9Sstevel@tonic-gate 15757c478bd9Sstevel@tonic-gate /* 15767c478bd9Sstevel@tonic-gate * Return the maximum number of lgrps supported by the platform. 15777c478bd9Sstevel@tonic-gate * Before lgrp topology is known it returns an estimate based on the number of 15787c478bd9Sstevel@tonic-gate * nodes. Once topology is known it returns the actual maximim number of lgrps 15797c478bd9Sstevel@tonic-gate * created. Since x86 doesn't support dynamic addition of new nodes, this number 15807c478bd9Sstevel@tonic-gate * may not grow during system lifetime. 15817c478bd9Sstevel@tonic-gate */ 15827c478bd9Sstevel@tonic-gate int 15837c478bd9Sstevel@tonic-gate lgrp_plat_max_lgrps() 15847c478bd9Sstevel@tonic-gate { 15857c478bd9Sstevel@tonic-gate return (lgrp_topo_initialized ? 15867c478bd9Sstevel@tonic-gate lgrp_alloc_max + 1 : 15877c478bd9Sstevel@tonic-gate lgrp_plat_node_cnt * (lgrp_plat_node_cnt - 1) + 1); 15887c478bd9Sstevel@tonic-gate } 15897c478bd9Sstevel@tonic-gate 15907c478bd9Sstevel@tonic-gate /* 15917c478bd9Sstevel@tonic-gate * Return the number of free, allocatable, or installed 15927c478bd9Sstevel@tonic-gate * pages in an lgroup 15937c478bd9Sstevel@tonic-gate * This is a copy of the MAX_MEM_NODES == 1 version of the routine 15947c478bd9Sstevel@tonic-gate * used when MPO is disabled (i.e. single lgroup) or this is the root lgroup 15957c478bd9Sstevel@tonic-gate */ 15967c478bd9Sstevel@tonic-gate /* ARGSUSED */ 15977c478bd9Sstevel@tonic-gate static pgcnt_t 15987c478bd9Sstevel@tonic-gate lgrp_plat_mem_size_default(lgrp_handle_t lgrphand, lgrp_mem_query_t query) 15997c478bd9Sstevel@tonic-gate { 16007c478bd9Sstevel@tonic-gate struct memlist *mlist; 16017c478bd9Sstevel@tonic-gate pgcnt_t npgs = 0; 16027c478bd9Sstevel@tonic-gate extern struct memlist *phys_avail; 16037c478bd9Sstevel@tonic-gate extern struct memlist *phys_install; 16047c478bd9Sstevel@tonic-gate 16057c478bd9Sstevel@tonic-gate switch (query) { 16067c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_FREE: 16077c478bd9Sstevel@tonic-gate return ((pgcnt_t)freemem); 16087c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_AVAIL: 16097c478bd9Sstevel@tonic-gate memlist_read_lock(); 16107c478bd9Sstevel@tonic-gate for (mlist = phys_avail; mlist; mlist = mlist->next) 16117c478bd9Sstevel@tonic-gate npgs += btop(mlist->size); 16127c478bd9Sstevel@tonic-gate memlist_read_unlock(); 16137c478bd9Sstevel@tonic-gate return (npgs); 16147c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_INSTALL: 16157c478bd9Sstevel@tonic-gate memlist_read_lock(); 16167c478bd9Sstevel@tonic-gate for (mlist = phys_install; mlist; mlist = mlist->next) 16177c478bd9Sstevel@tonic-gate npgs += btop(mlist->size); 16187c478bd9Sstevel@tonic-gate memlist_read_unlock(); 16197c478bd9Sstevel@tonic-gate return (npgs); 16207c478bd9Sstevel@tonic-gate default: 16217c478bd9Sstevel@tonic-gate return ((pgcnt_t)0); 16227c478bd9Sstevel@tonic-gate } 16237c478bd9Sstevel@tonic-gate } 16247c478bd9Sstevel@tonic-gate 16257c478bd9Sstevel@tonic-gate /* 16267c478bd9Sstevel@tonic-gate * Return the number of free pages in an lgroup. 16277c478bd9Sstevel@tonic-gate * 16287c478bd9Sstevel@tonic-gate * For query of LGRP_MEM_SIZE_FREE, return the number of base pagesize 16297c478bd9Sstevel@tonic-gate * pages on freelists. For query of LGRP_MEM_SIZE_AVAIL, return the 16307c478bd9Sstevel@tonic-gate * number of allocatable base pagesize pages corresponding to the 16317c478bd9Sstevel@tonic-gate * lgroup (e.g. do not include page_t's, BOP_ALLOC()'ed memory, ..) 16327c478bd9Sstevel@tonic-gate * For query of LGRP_MEM_SIZE_INSTALL, return the amount of physical 16337c478bd9Sstevel@tonic-gate * memory installed, regardless of whether or not it's usable. 16347c478bd9Sstevel@tonic-gate */ 16357c478bd9Sstevel@tonic-gate pgcnt_t 16367c478bd9Sstevel@tonic-gate lgrp_plat_mem_size(lgrp_handle_t plathand, lgrp_mem_query_t query) 16377c478bd9Sstevel@tonic-gate { 16387c478bd9Sstevel@tonic-gate int mnode; 16397c478bd9Sstevel@tonic-gate pgcnt_t npgs = (pgcnt_t)0; 16407c478bd9Sstevel@tonic-gate extern struct memlist *phys_avail; 16417c478bd9Sstevel@tonic-gate extern struct memlist *phys_install; 16427c478bd9Sstevel@tonic-gate 16437c478bd9Sstevel@tonic-gate 16447c478bd9Sstevel@tonic-gate if (plathand == LGRP_DEFAULT_HANDLE) 16457c478bd9Sstevel@tonic-gate return (lgrp_plat_mem_size_default(plathand, query)); 16467c478bd9Sstevel@tonic-gate 16477c478bd9Sstevel@tonic-gate if (plathand != LGRP_NULL_HANDLE) { 16487c478bd9Sstevel@tonic-gate mnode = plat_lgrphand_to_mem_node(plathand); 16497c478bd9Sstevel@tonic-gate if (mnode >= 0 && mem_node_config[mnode].exists) { 16507c478bd9Sstevel@tonic-gate switch (query) { 16517c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_FREE: 1652affbd3ccSkchow npgs = MNODE_PGCNT(mnode); 16537c478bd9Sstevel@tonic-gate break; 16547c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_AVAIL: 16557c478bd9Sstevel@tonic-gate npgs = mem_node_memlist_pages(mnode, 16567c478bd9Sstevel@tonic-gate phys_avail); 16577c478bd9Sstevel@tonic-gate break; 16587c478bd9Sstevel@tonic-gate case LGRP_MEM_SIZE_INSTALL: 16597c478bd9Sstevel@tonic-gate npgs = mem_node_memlist_pages(mnode, 16607c478bd9Sstevel@tonic-gate phys_install); 16617c478bd9Sstevel@tonic-gate break; 16627c478bd9Sstevel@tonic-gate default: 16637c478bd9Sstevel@tonic-gate break; 16647c478bd9Sstevel@tonic-gate } 16657c478bd9Sstevel@tonic-gate } 16667c478bd9Sstevel@tonic-gate } 16677c478bd9Sstevel@tonic-gate return (npgs); 16687c478bd9Sstevel@tonic-gate } 16697c478bd9Sstevel@tonic-gate 16707c478bd9Sstevel@tonic-gate /* 16717c478bd9Sstevel@tonic-gate * Return latency between "from" and "to" lgroups 16727c478bd9Sstevel@tonic-gate * 16737c478bd9Sstevel@tonic-gate * This latency number can only be used for relative comparison 16747c478bd9Sstevel@tonic-gate * between lgroups on the running system, cannot be used across platforms, 16757c478bd9Sstevel@tonic-gate * and may not reflect the actual latency. It is platform and implementation 16767c478bd9Sstevel@tonic-gate * specific, so platform gets to decide its value. It would be nice if the 16777c478bd9Sstevel@tonic-gate * number was at least proportional to make comparisons more meaningful though. 16787c478bd9Sstevel@tonic-gate */ 16797c478bd9Sstevel@tonic-gate /* ARGSUSED */ 16807c478bd9Sstevel@tonic-gate int 16817c478bd9Sstevel@tonic-gate lgrp_plat_latency(lgrp_handle_t from, lgrp_handle_t to) 16827c478bd9Sstevel@tonic-gate { 16837c478bd9Sstevel@tonic-gate lgrp_handle_t src, dest; 16847c478bd9Sstevel@tonic-gate 16857c478bd9Sstevel@tonic-gate if (max_mem_nodes == 1) 16867c478bd9Sstevel@tonic-gate return (0); 16877c478bd9Sstevel@tonic-gate 16887c478bd9Sstevel@tonic-gate /* 16897c478bd9Sstevel@tonic-gate * Return max latency for root lgroup 16907c478bd9Sstevel@tonic-gate */ 16917c478bd9Sstevel@tonic-gate if (from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE) 16927c478bd9Sstevel@tonic-gate return (lgrp_plat_probe_time_max); 16937c478bd9Sstevel@tonic-gate 16947c478bd9Sstevel@tonic-gate src = from; 16957c478bd9Sstevel@tonic-gate dest = to; 16967c478bd9Sstevel@tonic-gate 16977c478bd9Sstevel@tonic-gate /* 16987c478bd9Sstevel@tonic-gate * Return 0 for nodes (lgroup platform handles) out of range 16997c478bd9Sstevel@tonic-gate */ 17007c478bd9Sstevel@tonic-gate if (src < 0 || src >= MAX_NODES || dest < 0 || dest >= MAX_NODES) 17017c478bd9Sstevel@tonic-gate return (0); 17027c478bd9Sstevel@tonic-gate 17037c478bd9Sstevel@tonic-gate /* 17047c478bd9Sstevel@tonic-gate * Probe from current CPU if its lgroup latencies haven't been set yet 17057c478bd9Sstevel@tonic-gate * and we are trying to get latency from current CPU to some node 17067c478bd9Sstevel@tonic-gate */ 17077c478bd9Sstevel@tonic-gate if (lgrp_plat_probe_times[src][src] == 0 && 17087c478bd9Sstevel@tonic-gate LGRP_PLAT_CPU_TO_NODE(CPU) == src) 17097c478bd9Sstevel@tonic-gate lgrp_plat_probe(); 17107c478bd9Sstevel@tonic-gate 17117c478bd9Sstevel@tonic-gate return (lgrp_plat_probe_times[src][dest]); 17127c478bd9Sstevel@tonic-gate } 17137c478bd9Sstevel@tonic-gate 17147c478bd9Sstevel@tonic-gate /* 17157c478bd9Sstevel@tonic-gate * Return platform handle for root lgroup 17167c478bd9Sstevel@tonic-gate */ 17177c478bd9Sstevel@tonic-gate lgrp_handle_t 17187c478bd9Sstevel@tonic-gate lgrp_plat_root_hand(void) 17197c478bd9Sstevel@tonic-gate { 17207c478bd9Sstevel@tonic-gate return (LGRP_DEFAULT_HANDLE); 17217c478bd9Sstevel@tonic-gate } 1722