1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Page retirement can be an extended process due to the fact that a retirement 29 * may not be possible when the original request is made. The kernel will 30 * repeatedly attempt to retire a given page, but will not let us know when the 31 * page has been retired. We therefore have to poll to see if the retirement 32 * has been completed. This poll is implemented with a bounded exponential 33 * backoff to reduce the burden which we impose upon the system. 34 * 35 * To reduce the burden on fmd in the face of retirement storms, we schedule 36 * all retries as a group. In the simplest case, we attempt to retire a single 37 * page. When forced to retry, we initially schedule a retry at a configurable 38 * interval t. If the retry fails, we schedule another at 2 * t, and so on, 39 * until t reaches the maximum interval (also configurable). Future retries 40 * for that page will occur with t equal to the maximum interval value. We 41 * will never give up on a retirement. 42 * 43 * With multiple retirements, the situation gets slightly more complicated. As 44 * indicated above, we schedule retries as a group. We don't want to deny new 45 * pages their short retry intervals, so we'll (re)set the retry interval to the 46 * value appropriate for the newest page. 47 */ 48 49 #include <cma.h> 50 51 #include <time.h> 52 #include <errno.h> 53 #include <unistd.h> 54 #include <strings.h> 55 #include <fm/fmd_api.h> 56 #include <fm/libtopo.h> 57 #include <fm/fmd_fmri.h> 58 #include <fm/fmd_agent.h> 59 #include <sys/fm/protocol.h> 60 61 static void 62 cma_page_free(fmd_hdl_t *hdl, cma_page_t *page) 63 { 64 nvlist_free(page->pg_asru); 65 nvlist_free(page->pg_rsrc); 66 fmd_hdl_free(hdl, page, sizeof (cma_page_t)); 67 } 68 69 /* 70 * Retire the specified ASRU, referring to a memory page by PA or by DIMM 71 * offset (i.e. the encoded coordinates internal bank, row, and column). 72 * In the initial FMA implementation, fault.memory.page exported an ASRU 73 * with an explicit physical address, which is valid at the initial time of 74 * diagnosis but may not be later following DR, DIMM removal, or interleave 75 * changes. On SPARC, this issue was solved by exporting the DIMM offset 76 * and pushing the entire FMRI to the platform memory controller through 77 * /dev/fm so it can derive the current PA from the DIMM and offset. 78 * On x86, we also encode DIMM and offset in hc-specific, which is then used 79 * by the x64 memory controller driver. 80 * At some point these three approaches need to be rationalized: all platforms 81 * should use the same scheme, either with decoding in the kernel or decoding 82 * in userland (i.e. with a libtopo method to compute and update the PA). 83 */ 84 /*ARGSUSED*/ 85 int 86 cma_page_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru, 87 const char *uuid, boolean_t repair) 88 { 89 cma_page_t *page; 90 uint64_t pageaddr; 91 const char *action = repair ? "unretire" : "retire"; 92 int rc; 93 nvlist_t *rsrc = NULL, *asrucp = NULL, *hcsp; 94 95 (void) nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc); 96 97 if (nvlist_dup(asru, &asrucp, 0) != 0) { 98 fmd_hdl_debug(hdl, "page retire nvlist dup failed\n"); 99 return (CMA_RA_FAILURE); 100 } 101 102 /* It should already be expanded, but we'll do it again anyway */ 103 if (fmd_nvl_fmri_expand(hdl, asrucp) < 0) { 104 fmd_hdl_debug(hdl, "failed to expand page asru\n"); 105 cma_stats.bad_flts.fmds_value.ui64++; 106 nvlist_free(asrucp); 107 return (CMA_RA_FAILURE); 108 } 109 110 if (!repair && !fmd_nvl_fmri_present(hdl, asrucp)) { 111 fmd_hdl_debug(hdl, "page retire overtaken by events\n"); 112 cma_stats.page_nonent.fmds_value.ui64++; 113 nvlist_free(asrucp); 114 return (CMA_RA_SUCCESS); 115 } 116 117 /* Figure out physaddr from resource or asru */ 118 if (rsrc == NULL || 119 nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcsp) != 0 || 120 (nvlist_lookup_uint64(hcsp, "asru-" FM_FMRI_HC_SPECIFIC_PHYSADDR, 121 &pageaddr) != 0 && nvlist_lookup_uint64(hcsp, 122 FM_FMRI_HC_SPECIFIC_PHYSADDR, &pageaddr) != 0)) { 123 if (nvlist_lookup_uint64(asrucp, FM_FMRI_MEM_PHYSADDR, 124 &pageaddr) != 0) { 125 fmd_hdl_debug(hdl, "mem fault missing 'physaddr'\n"); 126 cma_stats.bad_flts.fmds_value.ui64++; 127 nvlist_free(asrucp); 128 return (CMA_RA_FAILURE); 129 } 130 } 131 132 if (repair) { 133 if (!cma.cma_page_dounretire) { 134 fmd_hdl_debug(hdl, "suppressed unretire of page %llx\n", 135 (u_longlong_t)pageaddr); 136 cma_stats.page_supp.fmds_value.ui64++; 137 nvlist_free(asrucp); 138 return (CMA_RA_SUCCESS); 139 } 140 /* If unretire via topo fails, we fall back to legacy way */ 141 if (rsrc == NULL || (rc = fmd_nvl_fmri_unretire(hdl, rsrc)) < 0) 142 rc = cma_fmri_page_unretire(hdl, asrucp); 143 } else { 144 if (!cma.cma_page_doretire) { 145 fmd_hdl_debug(hdl, "suppressed retire of page %llx\n", 146 (u_longlong_t)pageaddr); 147 cma_stats.page_supp.fmds_value.ui64++; 148 nvlist_free(asrucp); 149 return (CMA_RA_FAILURE); 150 } 151 /* If retire via topo fails, we fall back to legacy way */ 152 if (rsrc == NULL || (rc = fmd_nvl_fmri_retire(hdl, rsrc)) < 0) 153 rc = cma_fmri_page_retire(hdl, asrucp); 154 } 155 156 if (rc == FMD_AGENT_RETIRE_DONE) { 157 fmd_hdl_debug(hdl, "%sd page 0x%llx\n", 158 action, (u_longlong_t)pageaddr); 159 if (repair) 160 cma_stats.page_repairs.fmds_value.ui64++; 161 else 162 cma_stats.page_flts.fmds_value.ui64++; 163 nvlist_free(asrucp); 164 return (CMA_RA_SUCCESS); 165 } else if (repair || rc != FMD_AGENT_RETIRE_ASYNC) { 166 fmd_hdl_debug(hdl, "%s of page 0x%llx failed, will not " 167 "retry: %s\n", action, (u_longlong_t)pageaddr, 168 strerror(errno)); 169 170 cma_stats.page_fails.fmds_value.ui64++; 171 nvlist_free(asrucp); 172 return (CMA_RA_FAILURE); 173 } 174 175 /* 176 * The page didn't immediately retire. We'll need to periodically 177 * check to see if it has been retired. 178 */ 179 fmd_hdl_debug(hdl, "page didn't retire - sleeping\n"); 180 181 page = fmd_hdl_zalloc(hdl, sizeof (cma_page_t), FMD_SLEEP); 182 page->pg_addr = pageaddr; 183 if (rsrc != NULL) 184 (void) nvlist_dup(rsrc, &page->pg_rsrc, 0); 185 page->pg_asru = asrucp; 186 if (uuid != NULL) 187 page->pg_uuid = fmd_hdl_strdup(hdl, uuid, FMD_SLEEP); 188 189 page->pg_next = cma.cma_pages; 190 cma.cma_pages = page; 191 192 if (cma.cma_page_timerid != 0) 193 fmd_timer_remove(hdl, cma.cma_page_timerid); 194 195 cma.cma_page_curdelay = cma.cma_page_mindelay; 196 197 cma.cma_page_timerid = 198 fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay); 199 200 /* Don't free asrucp here. This FMRI will be needed for retry. */ 201 return (CMA_RA_FAILURE); 202 } 203 204 static int 205 page_retry(fmd_hdl_t *hdl, cma_page_t *page) 206 { 207 int rc; 208 209 if (page->pg_asru != NULL && 210 !fmd_nvl_fmri_present(hdl, page->pg_asru)) { 211 fmd_hdl_debug(hdl, "page retire overtaken by events"); 212 cma_stats.page_nonent.fmds_value.ui64++; 213 214 if (page->pg_uuid != NULL) 215 fmd_case_uuclose(hdl, page->pg_uuid); 216 return (1); /* no longer a page to retire */ 217 } 218 219 if (page->pg_rsrc == NULL || 220 (rc = fmd_nvl_fmri_service_state(hdl, page->pg_rsrc)) < 0) 221 rc = cma_fmri_page_service_state(hdl, page->pg_asru); 222 223 if (rc == FMD_SERVICE_STATE_UNUSABLE) { 224 fmd_hdl_debug(hdl, "retired page 0x%llx on retry %u\n", 225 page->pg_addr, page->pg_nretries); 226 cma_stats.page_flts.fmds_value.ui64++; 227 228 if (page->pg_uuid != NULL) 229 fmd_case_uuclose(hdl, page->pg_uuid); 230 return (1); /* page retired */ 231 } 232 233 if (rc == FMD_SERVICE_STATE_ISOLATE_PENDING) { 234 fmd_hdl_debug(hdl, "scheduling another retry for 0x%llx\n", 235 page->pg_addr); 236 return (0); /* schedule another retry */ 237 } else { 238 fmd_hdl_debug(hdl, "failed to retry page 0x%llx " 239 "retirement: %s\n", page->pg_addr, 240 strerror(errno)); 241 242 cma_stats.page_fails.fmds_value.ui64++; 243 return (1); /* give up */ 244 } 245 } 246 247 void 248 cma_page_retry(fmd_hdl_t *hdl) 249 { 250 cma_page_t **pagep; 251 252 cma.cma_page_timerid = 0; 253 254 fmd_hdl_debug(hdl, "page_retry: timer fired\n"); 255 256 pagep = &cma.cma_pages; 257 while (*pagep != NULL) { 258 cma_page_t *page = *pagep; 259 260 if (page_retry(hdl, page)) { 261 /* 262 * Successful retry or we're giving up - remove from 263 * the list 264 */ 265 *pagep = page->pg_next; 266 267 if (page->pg_uuid != NULL) 268 fmd_hdl_strfree(hdl, page->pg_uuid); 269 270 cma_page_free(hdl, page); 271 } else { 272 page->pg_nretries++; 273 pagep = &page->pg_next; 274 } 275 } 276 277 if (cma.cma_pages == NULL) 278 return; /* no more retirements */ 279 280 /* 281 * We still have retirements that haven't completed. Back the delay 282 * off, and schedule a retry. 283 */ 284 cma.cma_page_curdelay = MIN(cma.cma_page_curdelay * 2, 285 cma.cma_page_maxdelay); 286 287 fmd_hdl_debug(hdl, "scheduled page retirement retry for %llu secs\n", 288 (u_longlong_t)(cma.cma_page_curdelay / NANOSEC)); 289 290 cma.cma_page_timerid = 291 fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay); 292 } 293 294 void 295 cma_page_fini(fmd_hdl_t *hdl) 296 { 297 cma_page_t *page; 298 299 while ((page = cma.cma_pages) != NULL) { 300 cma.cma_pages = page->pg_next; 301 if (page->pg_uuid != NULL) 302 fmd_hdl_strfree(hdl, page->pg_uuid); 303 cma_page_free(hdl, page); 304 } 305 } 306