1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <cma.h> 30 31 #include <fcntl.h> 32 #include <unistd.h> 33 #include <strings.h> 34 #include <errno.h> 35 #include <time.h> 36 #include <fm/fmd_api.h> 37 #include <sys/fm/protocol.h> 38 #include <sys/bl.h> 39 #include <sys/processor.h> 40 41 static int 42 cpu_blacklist(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru) 43 { 44 bl_req_t blr; 45 nvlist_t *fmri; 46 char *fmribuf; 47 size_t fmrisz; 48 int fd, rc, err; 49 char *class; 50 51 /* 52 * Some platforms have special unums for the E$ DIMMs. If we're dealing 53 * with a platform that has these unums, one will have been added to the 54 * fault as the resource. We'll use that for the blacklisting. If we 55 * can't find a resource, we'll fall back to the ASRU. 56 */ 57 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &fmri) != 0) 58 fmri = asru; 59 60 if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) || 61 (class == NULL) || (*class == '\0')) { 62 fmd_hdl_debug(hdl, "failed to get the fault class name\n"); 63 errno = EINVAL; 64 return (-1); 65 } 66 67 if ((fd = open("/dev/bl", O_RDONLY)) < 0) 68 return (-1); /* errno is set for us */ 69 70 if ((errno = nvlist_size(fmri, &fmrisz, NV_ENCODE_NATIVE)) != 0 || 71 (fmribuf = fmd_hdl_alloc(hdl, fmrisz, FMD_SLEEP)) == NULL) { 72 (void) close(fd); 73 return (-1); /* errno is set for us */ 74 } 75 76 if ((errno = nvlist_pack(fmri, &fmribuf, &fmrisz, 77 NV_ENCODE_NATIVE, 0)) != 0) { 78 fmd_hdl_free(hdl, fmribuf, fmrisz); 79 (void) close(fd); 80 return (-1); /* errno is set for us */ 81 } 82 83 blr.bl_fmri = fmribuf; 84 blr.bl_fmrisz = fmrisz; 85 blr.bl_class = class; 86 87 rc = ioctl(fd, BLIOC_INSERT, &blr); 88 err = errno; 89 90 fmd_hdl_free(hdl, fmribuf, fmrisz); 91 (void) close(fd); 92 93 if (rc < 0 && err != ENOTSUP) { 94 errno = err; 95 return (-1); 96 } 97 98 return (0); 99 } 100 101 static void 102 cpu_offline(fmd_hdl_t *hdl, const char *uuid, uint_t cpuid, int cpustate) 103 { 104 int i; 105 106 for (i = 0; i < cma.cma_cpu_tries; 107 i++, (void) nanosleep(&cma.cma_cpu_delay, NULL)) { 108 if (p_online(cpuid, cpustate) != -1) { 109 fmd_hdl_debug(hdl, "offlined cpu %u\n", cpuid); 110 cma_stats.cpu_flts.fmds_value.ui64++; 111 if (uuid != NULL) 112 fmd_case_uuclose(hdl, uuid); 113 return; 114 } 115 } 116 117 fmd_hdl_debug(hdl, "failed to offline %u: %s\n", cpuid, 118 strerror(errno)); 119 cma_stats.cpu_fails.fmds_value.ui64++; 120 } 121 122 void 123 cma_cpu_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru, const char *uuid) 124 { 125 uint_t cpuid; 126 127 if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_ID, &cpuid) != 0) { 128 fmd_hdl_debug(hdl, "cpu fault missing '%s'\n", FM_FMRI_CPU_ID); 129 cma_stats.bad_flts.fmds_value.ui64++; 130 return; 131 } 132 133 if (cma.cma_cpu_dooffline) { 134 int cpustate = P_FAULTED; 135 136 if (cma.cma_cpu_forcedoffline) 137 cpustate |= P_FORCED; 138 139 cpu_offline(hdl, uuid, cpuid, cpustate); 140 } else { 141 fmd_hdl_debug(hdl, "suppressed offline of CPU %u\n", cpuid); 142 cma_stats.cpu_supp.fmds_value.ui64++; 143 } 144 145 if (cma.cma_cpu_doblacklist) { 146 if (cpu_blacklist(hdl, nvl, asru) < 0) 147 cma_stats.cpu_blfails.fmds_value.ui64++; 148 } else { 149 fmd_hdl_debug(hdl, "suppressed blacklist of CPU %u\n", cpuid); 150 cma_stats.cpu_blsupp.fmds_value.ui64++; 151 } 152 } 153