1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Page retirement can be an extended process due to the fact that a retirement 31 * may not be possible when the original request is made. The kernel will 32 * repeatedly attempt to retire a given page, but will not let us know when the 33 * page has been retired. We therefore have to poll to see if the retirement 34 * has been completed. This poll is implemented with a bounded exponential 35 * backoff to reduce the burden which we impose upon the system. 36 * 37 * To reduce the burden on fmd in the face of retirement storms, we schedule 38 * all retries as a group. In the simplest case, we attempt to retire a single 39 * page. When forced to retry, we initially schedule a retry at a configurable 40 * interval t. If the retry fails, we schedule another at 2 * t, and so on, 41 * until t reaches the maximum interval (also configurable). Future retries 42 * for that page will occur with t equal to the maximum interval value. We 43 * will never give up on a retirement. 44 * 45 * With multiple retirements, the situation gets slightly more complicated. As 46 * indicated above, we schedule retries as a group. We don't want to deny new 47 * pages their short retry intervals, so we'll (re)set the retry interval to the 48 * value appropriate for the newest page. 49 */ 50 51 #include <cma.h> 52 53 #include <time.h> 54 #include <errno.h> 55 #include <unistd.h> 56 #include <strings.h> 57 #include <fm/fmd_api.h> 58 #include <fm/libtopo.h> 59 #include <sys/fm/protocol.h> 60 #include <sys/mem.h> 61 62 static void 63 cma_page_free(fmd_hdl_t *hdl, cma_page_t *page) 64 { 65 if (page->pg_fmri != NULL) 66 nvlist_free(page->pg_fmri); 67 fmd_hdl_free(hdl, page, sizeof (cma_page_t)); 68 } 69 70 /* 71 * Retire the specified ASRU, referring to a memory page by PA or by DIMM 72 * offset (i.e. the encoded coordinates internal bank, row, and column). 73 * In the initial FMA implementation, fault.memory.page exported an ASRU 74 * with an explicit physical address, which is valid at the initial time of 75 * diagnosis but may not be later following DR, DIMM removal, or interleave 76 * changes. On SPARC, this issue was solved by exporting the DIMM offset 77 * and pushing the entire FMRI to the platform memory controller through 78 * /dev/mem so it can derive the current PA from the DIMM and offset. 79 * On x64, we also use DIMM and offset, but the mem:/// unum string is an 80 * encoded hc:/// FMRI that is then used by the x64 memory controller driver. 81 * At some point these three approaches need to be rationalized: all platforms 82 * should use the same scheme, either with decoding in the kernel or decoding 83 * in userland (i.e. with a libtopo method to compute and update the PA). 84 */ 85 /*ARGSUSED*/ 86 int 87 cma_page_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru, 88 const char *uuid, boolean_t repair) 89 { 90 cma_page_t *page; 91 uint64_t pageaddr; 92 char *unumstr; 93 nvlist_t *asrucp = NULL; 94 const char *action = repair ? "unretire" : "retire"; 95 96 if (nvlist_dup(asru, &asrucp, 0) != 0) { 97 fmd_hdl_debug(hdl, "page retire nvlist dup failed\n"); 98 return (CMA_RA_FAILURE); 99 } 100 101 /* It should already be expanded, but we'll do it again anyway */ 102 if (fmd_nvl_fmri_expand(hdl, asrucp) < 0) { 103 fmd_hdl_debug(hdl, "failed to expand page asru\n"); 104 cma_stats.bad_flts.fmds_value.ui64++; 105 nvlist_free(asrucp); 106 return (CMA_RA_FAILURE); 107 } 108 109 if (!repair && !fmd_nvl_fmri_present(hdl, asrucp)) { 110 fmd_hdl_debug(hdl, "page retire overtaken by events\n"); 111 cma_stats.page_nonent.fmds_value.ui64++; 112 nvlist_free(asrucp); 113 return (CMA_RA_SUCCESS); 114 } 115 116 if (nvlist_lookup_uint64(asrucp, FM_FMRI_MEM_PHYSADDR, &pageaddr) 117 != 0) { 118 fmd_hdl_debug(hdl, "mem fault missing '%s'\n", 119 FM_FMRI_MEM_PHYSADDR); 120 cma_stats.bad_flts.fmds_value.ui64++; 121 nvlist_free(asrucp); 122 return (CMA_RA_FAILURE); 123 } 124 125 if (repair) { 126 if (!cma.cma_page_dounretire) { 127 fmd_hdl_debug(hdl, "suppressed unretire of page %llx\n", 128 (u_longlong_t)pageaddr); 129 cma_stats.page_supp.fmds_value.ui64++; 130 nvlist_free(asrucp); 131 return (CMA_RA_SUCCESS); 132 } 133 } else { 134 if (!cma.cma_page_doretire) { 135 fmd_hdl_debug(hdl, "suppressed retire of page %llx\n", 136 (u_longlong_t)pageaddr); 137 cma_stats.page_supp.fmds_value.ui64++; 138 nvlist_free(asrucp); 139 return (CMA_RA_FAILURE); 140 } 141 } 142 143 /* 144 * If the unum is an hc fmri string expand it to an fmri and include 145 * that in a modified asru nvlist. 146 */ 147 if (nvlist_lookup_string(asrucp, FM_FMRI_MEM_UNUM, &unumstr) == 0 && 148 strncmp(unumstr, "hc:/", 4) == 0) { 149 int err; 150 nvlist_t *unumfmri; 151 struct topo_hdl *thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION); 152 153 if (topo_fmri_str2nvl(thp, unumstr, &unumfmri, &err) != 0) { 154 fmd_hdl_debug(hdl, "page retire str2nvl failed: %s\n", 155 topo_strerror(err)); 156 fmd_hdl_topo_rele(hdl, thp); 157 nvlist_free(asrucp); 158 return (CMA_RA_FAILURE); 159 } 160 161 fmd_hdl_topo_rele(hdl, thp); 162 163 if (nvlist_add_nvlist(asrucp, FM_FMRI_MEM_UNUM "-fmri", 164 unumfmri) != 0) { 165 fmd_hdl_debug(hdl, "page retire failed to add " 166 "unumfmri to modified asru"); 167 nvlist_free(unumfmri); 168 nvlist_free(asrucp); 169 return (CMA_RA_FAILURE); 170 } 171 nvlist_free(unumfmri); 172 } 173 174 if (cma_page_cmd(hdl, 175 repair ? MEM_PAGE_FMRI_UNRETIRE : MEM_PAGE_FMRI_RETIRE, asrucp) 176 == 0) { 177 fmd_hdl_debug(hdl, "%sd page 0x%llx\n", 178 action, (u_longlong_t)pageaddr); 179 if (repair) 180 cma_stats.page_repairs.fmds_value.ui64++; 181 else 182 cma_stats.page_flts.fmds_value.ui64++; 183 nvlist_free(asrucp); 184 return (CMA_RA_SUCCESS); 185 } else if (repair || errno != EAGAIN) { 186 fmd_hdl_debug(hdl, "%s of page 0x%llx failed, will not " 187 "retry: %s\n", action, (u_longlong_t)pageaddr, 188 strerror(errno)); 189 190 cma_stats.page_fails.fmds_value.ui64++; 191 192 nvlist_free(asrucp); 193 if (uuid != NULL && cma.cma_page_maxretries != 0) 194 return (CMA_RA_SUCCESS); 195 return (CMA_RA_FAILURE); 196 } 197 198 /* 199 * The page didn't immediately retire. We'll need to periodically 200 * check to see if it has been retired. 201 */ 202 fmd_hdl_debug(hdl, "page didn't retire - sleeping\n"); 203 204 page = fmd_hdl_zalloc(hdl, sizeof (cma_page_t), FMD_SLEEP); 205 page->pg_addr = pageaddr; 206 page->pg_fmri = asrucp; 207 if (uuid != NULL) 208 page->pg_uuid = fmd_hdl_strdup(hdl, uuid, FMD_SLEEP); 209 210 page->pg_next = cma.cma_pages; 211 cma.cma_pages = page; 212 213 if (cma.cma_page_timerid != 0) 214 fmd_timer_remove(hdl, cma.cma_page_timerid); 215 216 cma.cma_page_curdelay = cma.cma_page_mindelay; 217 218 cma.cma_page_timerid = 219 fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay); 220 221 /* Don't free asrucp here. This FMRI will be needed for retry. */ 222 return (CMA_RA_FAILURE); 223 } 224 225 static int 226 page_retry(fmd_hdl_t *hdl, cma_page_t *page) 227 { 228 if (page->pg_fmri != NULL && !fmd_nvl_fmri_present(hdl, 229 page->pg_fmri)) { 230 fmd_hdl_debug(hdl, "page retire overtaken by events"); 231 cma_stats.page_nonent.fmds_value.ui64++; 232 233 if (page->pg_uuid != NULL) 234 fmd_case_uuclose(hdl, page->pg_uuid); 235 return (1); /* no longer a page to retire */ 236 } 237 238 if (cma_page_cmd(hdl, MEM_PAGE_FMRI_ISRETIRED, page->pg_fmri) == 0) { 239 fmd_hdl_debug(hdl, "retired page 0x%llx on retry %u\n", 240 page->pg_addr, page->pg_nretries); 241 cma_stats.page_flts.fmds_value.ui64++; 242 243 if (page->pg_uuid != NULL) 244 fmd_case_uuclose(hdl, page->pg_uuid); 245 return (1); /* page retired */ 246 } 247 248 if (errno == EAGAIN) { 249 fmd_hdl_debug(hdl, "scheduling another retry for 0x%llx\n", 250 page->pg_addr); 251 return (0); /* schedule another retry */ 252 } else { 253 if (errno == EIO) { 254 fmd_hdl_debug(hdl, "failed to retry page 0x%llx " 255 "retirement: page isn't scheduled for retirement" 256 "(request made beyond page_retire limit?)\n", 257 page->pg_addr); 258 } else { 259 fmd_hdl_debug(hdl, "failed to retry page 0x%llx " 260 "retirement: %s\n", page->pg_addr, 261 strerror(errno)); 262 } 263 264 if (page->pg_uuid != NULL && cma.cma_page_maxretries != 0) 265 fmd_case_uuclose(hdl, page->pg_uuid); 266 267 cma_stats.page_fails.fmds_value.ui64++; 268 return (1); /* give up */ 269 } 270 } 271 272 void 273 cma_page_retry(fmd_hdl_t *hdl) 274 { 275 cma_page_t **pagep; 276 277 cma.cma_page_timerid = 0; 278 279 fmd_hdl_debug(hdl, "page_retry: timer fired\n"); 280 281 pagep = &cma.cma_pages; 282 while (*pagep != NULL) { 283 cma_page_t *page = *pagep; 284 285 if (page_retry(hdl, page)) { 286 /* 287 * Successful retry or we're giving up - remove from 288 * the list 289 */ 290 *pagep = page->pg_next; 291 292 if (page->pg_uuid != NULL) 293 fmd_hdl_strfree(hdl, page->pg_uuid); 294 295 cma_page_free(hdl, page); 296 } else if (cma.cma_page_maxretries == 0 || 297 page->pg_nretries < cma.cma_page_maxretries) { 298 page->pg_nretries++; 299 pagep = &page->pg_next; 300 } else { 301 /* 302 * Tunable maxretries was set and we reached 303 * the max, so just close the case. 304 */ 305 fmd_hdl_debug(hdl, 306 "giving up page retire 0x%llx on retry %u\n", 307 page->pg_addr, page->pg_nretries); 308 cma_stats.page_retmax.fmds_value.ui64++; 309 310 if (page->pg_uuid != NULL) { 311 fmd_case_uuclose(hdl, page->pg_uuid); 312 fmd_hdl_strfree(hdl, page->pg_uuid); 313 } 314 315 *pagep = page->pg_next; 316 317 cma_page_free(hdl, page); 318 } 319 } 320 321 if (cma.cma_pages == NULL) 322 return; /* no more retirements */ 323 324 /* 325 * We still have retirements that haven't completed. Back the delay 326 * off, and schedule a retry. 327 */ 328 cma.cma_page_curdelay = MIN(cma.cma_page_curdelay * 2, 329 cma.cma_page_maxdelay); 330 331 fmd_hdl_debug(hdl, "scheduled page retirement retry for %llu secs\n", 332 (u_longlong_t)(cma.cma_page_curdelay / NANOSEC)); 333 334 cma.cma_page_timerid = 335 fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay); 336 } 337 338 void 339 cma_page_fini(fmd_hdl_t *hdl) 340 { 341 cma_page_t *page; 342 343 while ((page = cma.cma_pages) != NULL) { 344 cma.cma_pages = page->pg_next; 345 cma_page_free(hdl, page); 346 } 347 } 348