1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <cma.h> 30 31 #include <fcntl.h> 32 #include <unistd.h> 33 #include <strings.h> 34 #include <errno.h> 35 #include <time.h> 36 #include <fm/fmd_api.h> 37 #include <sys/fm/protocol.h> 38 #include <sys/bl.h> 39 #include <sys/processor.h> 40 41 static int 42 cpu_blacklist(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru) 43 { 44 bl_req_t blr; 45 nvlist_t *fmri; 46 char *fmribuf; 47 size_t fmrisz; 48 int fd, rc, err; 49 char *class; 50 51 /* 52 * Some platforms have special unums for the E$ DIMMs. If we're dealing 53 * with a platform that has these unums, one will have been added to the 54 * fault as the resource. We'll use that for the blacklisting. If we 55 * can't find a resource, we'll fall back to the ASRU. 56 */ 57 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &fmri) != 0) 58 fmri = asru; 59 60 if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) || 61 (class == NULL) || (*class == '\0')) { 62 fmd_hdl_debug(hdl, "failed to get the fault class name\n"); 63 errno = EINVAL; 64 return (-1); 65 } 66 67 if ((fd = open("/dev/bl", O_RDONLY)) < 0) 68 return (-1); /* errno is set for us */ 69 70 if ((errno = nvlist_size(fmri, &fmrisz, NV_ENCODE_NATIVE)) != 0 || 71 (fmribuf = fmd_hdl_alloc(hdl, fmrisz, FMD_SLEEP)) == NULL) { 72 (void) close(fd); 73 return (-1); /* errno is set for us */ 74 } 75 76 if ((errno = nvlist_pack(fmri, &fmribuf, &fmrisz, 77 NV_ENCODE_NATIVE, 0)) != 0) { 78 fmd_hdl_free(hdl, fmribuf, fmrisz); 79 (void) close(fd); 80 return (-1); /* errno is set for us */ 81 } 82 83 blr.bl_fmri = fmribuf; 84 blr.bl_fmrisz = fmrisz; 85 blr.bl_class = class; 86 87 rc = ioctl(fd, BLIOC_INSERT, &blr); 88 err = errno; 89 90 fmd_hdl_free(hdl, fmribuf, fmrisz); 91 (void) close(fd); 92 93 if (rc < 0 && err != ENOTSUP) { 94 errno = err; 95 return (-1); 96 } 97 98 return (0); 99 } 100 101 static void 102 cpu_offline(fmd_hdl_t *hdl, const char *uuid, uint_t cpuid, int cpustate) 103 { 104 int i; 105 106 for (i = 0; i < cma.cma_cpu_tries; 107 i++, (void) nanosleep(&cma.cma_cpu_delay, NULL)) { 108 if (p_online(cpuid, cpustate) != -1) { 109 fmd_hdl_debug(hdl, "offlined cpu %u\n", cpuid); 110 cma_stats.cpu_flts.fmds_value.ui64++; 111 if (uuid != NULL) 112 fmd_case_uuclose(hdl, uuid); 113 return; 114 } 115 } 116 117 fmd_hdl_debug(hdl, "failed to offline %u: %s\n", cpuid, 118 strerror(errno)); 119 cma_stats.cpu_fails.fmds_value.ui64++; 120 } 121 122 void 123 cma_cpu_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru, const char *uuid) 124 { 125 uint_t cpuid, cpuvid; 126 127 /* 128 * This added expansion is needed to cover the situation where a 129 * cpu fault from the resource cache is replayed at fmd restart, 130 * and the cpu resource has been remapped or replaced. The stored 131 * FMRI is expanded, but may have stale data. 132 */ 133 if (fmd_nvl_fmri_expand(hdl, asru) < 0) { 134 fmd_hdl_debug(hdl, "failed to expand cpu asru\n"); 135 cma_stats.bad_flts.fmds_value.ui64++; 136 return; 137 } 138 139 if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_ID, &cpuid) != 0) { 140 fmd_hdl_debug(hdl, "cpu fault missing '%s'\n", FM_FMRI_CPU_ID); 141 cma_stats.bad_flts.fmds_value.ui64++; 142 return; 143 } 144 145 /* 146 * If this asru's FMRI contains a virtual CPU id, use that value for 147 * p_online() call instead of (physical) cpu id. 148 */ 149 150 if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_VID, &cpuvid) == 0) 151 cpuid = cpuvid; 152 153 if (cma.cma_cpu_dooffline) { 154 int cpustate = P_FAULTED; 155 156 if (cma.cma_cpu_forcedoffline) 157 cpustate |= P_FORCED; 158 159 cpu_offline(hdl, uuid, cpuid, cpustate); 160 } else { 161 fmd_hdl_debug(hdl, "suppressed offline of CPU %u\n", cpuid); 162 cma_stats.cpu_supp.fmds_value.ui64++; 163 } 164 165 if (cma.cma_cpu_doblacklist) { 166 if (cpu_blacklist(hdl, nvl, asru) < 0) 167 cma_stats.cpu_blfails.fmds_value.ui64++; 168 } else { 169 fmd_hdl_debug(hdl, "suppressed blacklist of CPU %u\n", cpuid); 170 cma_stats.cpu_blsupp.fmds_value.ui64++; 171 } 172 } 173