1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <cma.h> 29 30 #include <fcntl.h> 31 #include <unistd.h> 32 #include <strings.h> 33 #include <errno.h> 34 #include <time.h> 35 #include <fm/fmd_api.h> 36 #include <sys/fm/protocol.h> 37 #include <sys/bl.h> 38 #include <sys/processor.h> 39 40 static int 41 cpu_blacklist(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru) 42 { 43 bl_req_t blr; 44 nvlist_t *fmri; 45 char *fmribuf; 46 size_t fmrisz; 47 int fd, rc, err; 48 char *class; 49 50 /* 51 * Some platforms have special unums for the E$ DIMMs. If we're dealing 52 * with a platform that has these unums, one will have been added to the 53 * fault as the resource. We'll use that for the blacklisting. If we 54 * can't find a resource, we'll fall back to the ASRU. 55 */ 56 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &fmri) != 0) 57 fmri = asru; 58 59 if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) || 60 (class == NULL) || (*class == '\0')) { 61 fmd_hdl_debug(hdl, "failed to get the fault class name\n"); 62 errno = EINVAL; 63 return (-1); 64 } 65 66 if ((fd = open("/dev/bl", O_RDONLY)) < 0) 67 return (-1); /* errno is set for us */ 68 69 if ((errno = nvlist_size(fmri, &fmrisz, NV_ENCODE_NATIVE)) != 0 || 70 (fmribuf = fmd_hdl_alloc(hdl, fmrisz, FMD_SLEEP)) == NULL) { 71 (void) close(fd); 72 return (-1); /* errno is set for us */ 73 } 74 75 if ((errno = nvlist_pack(fmri, &fmribuf, &fmrisz, 76 NV_ENCODE_NATIVE, 0)) != 0) { 77 fmd_hdl_free(hdl, fmribuf, fmrisz); 78 (void) close(fd); 79 return (-1); /* errno is set for us */ 80 } 81 82 blr.bl_fmri = fmribuf; 83 blr.bl_fmrisz = fmrisz; 84 blr.bl_class = class; 85 86 rc = ioctl(fd, BLIOC_INSERT, &blr); 87 err = errno; 88 89 fmd_hdl_free(hdl, fmribuf, fmrisz); 90 (void) close(fd); 91 92 if (rc < 0 && err != ENOTSUP) { 93 errno = err; 94 return (-1); 95 } 96 97 return (0); 98 } 99 100 int 101 cpu_offline(fmd_hdl_t *hdl, uint_t cpuid, int cpustate) 102 { 103 int i; 104 105 for (i = 0; i < cma.cma_cpu_tries; 106 i++, (void) nanosleep(&cma.cma_cpu_delay, NULL)) { 107 if (p_online(cpuid, cpustate) != -1) { 108 fmd_hdl_debug(hdl, "offlined cpu %u\n", cpuid); 109 cma_stats.cpu_flts.fmds_value.ui64++; 110 return (CMA_RA_SUCCESS); 111 } 112 } 113 114 fmd_hdl_debug(hdl, "failed to offline %u: %s\n", cpuid, 115 strerror(errno)); 116 cma_stats.cpu_fails.fmds_value.ui64++; 117 return (CMA_RA_FAILURE); 118 } 119 120 int 121 /* ARGSUSED 3 */ 122 cma_cpu_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru, const char *uuid) 123 { 124 uint_t cpuid, cpuvid; 125 int err = CMA_RA_FAILURE; 126 127 /* 128 * This added expansion is needed to cover the situation where a 129 * cpu fault from the resource cache is replayed at fmd restart, 130 * and the cpu resource has been remapped or replaced. The stored 131 * FMRI is expanded, but may have stale data. 132 */ 133 if (fmd_nvl_fmri_expand(hdl, asru) < 0) { 134 fmd_hdl_debug(hdl, "failed to expand cpu asru\n"); 135 cma_stats.bad_flts.fmds_value.ui64++; 136 return (CMA_RA_FAILURE); 137 } 138 139 if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_ID, &cpuid) != 0) { 140 fmd_hdl_debug(hdl, "cpu fault missing '%s'\n", FM_FMRI_CPU_ID); 141 cma_stats.bad_flts.fmds_value.ui64++; 142 return (CMA_RA_FAILURE); 143 } 144 145 /* 146 * If this asru's FMRI contains a virtual CPU id, use that value for 147 * p_online() call instead of (physical) cpu id. 148 */ 149 150 if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_VID, &cpuvid) == 0) 151 cpuid = cpuvid; 152 153 if (cma.cma_cpu_dooffline) { 154 int cpustate = P_FAULTED; 155 156 if (cma.cma_cpu_forcedoffline) 157 cpustate |= P_FORCED; 158 159 err = cpu_offline(hdl, cpuid, cpustate); 160 } else { 161 fmd_hdl_debug(hdl, "suppressed offline of CPU %u\n", cpuid); 162 cma_stats.cpu_supp.fmds_value.ui64++; 163 } 164 165 if (cma.cma_cpu_doblacklist) { 166 if (cpu_blacklist(hdl, nvl, asru) < 0) 167 cma_stats.cpu_blfails.fmds_value.ui64++; 168 } else { 169 fmd_hdl_debug(hdl, "suppressed blacklist of CPU %u\n", cpuid); 170 cma_stats.cpu_blsupp.fmds_value.ui64++; 171 } 172 173 return (err); 174 } 175