1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Page retirement can be an extended process due to the fact that a retirement 31 * may not be possible when the original request is made. The kernel will 32 * repeatedly attempt to retire a given page, but will not let us know when the 33 * page has been retired. We therefore have to poll to see if the retirement 34 * has been completed. This poll is implemented with a bounded exponential 35 * backoff to reduce the burden which we impose upon the system. 36 * 37 * To reduce the burden on fmd in the face of retirement storms, we schedule 38 * all retries as a group. In the simplest case, we attempt to retire a single 39 * page. When forced to retry, we initially schedule a retry at a configurable 40 * interval t. If the retry fails, we schedule another at 2 * t, and so on, 41 * until t reaches the maximum interval (also configurable). Future retries 42 * for that page will occur with t equal to the maximum interval value. We 43 * will never give up on a retirement. 44 * 45 * With multiple retirements, the situation gets slightly more complicated. As 46 * indicated above, we schedule retries as a group. We don't want to deny new 47 * pages their short retry intervals, so we'll (re)set the retry interval to the 48 * value appropriate for the newest page. 49 */ 50 51 #include <cma.h> 52 53 #include <time.h> 54 #include <fcntl.h> 55 #include <errno.h> 56 #include <unistd.h> 57 #include <strings.h> 58 #include <fm/fmd_api.h> 59 #include <sys/fm/protocol.h> 60 #include <sys/mem.h> 61 62 static int 63 cma_page_cmd(fmd_hdl_t *hdl, int cmd, nvlist_t *nvl) 64 { 65 mem_page_t mpage; 66 char *fmribuf; 67 size_t fmrisz; 68 int fd, rc, err; 69 70 if ((fd = open("/dev/mem", O_RDONLY)) < 0) 71 return (-1); /* errno is set for us */ 72 73 if ((errno = nvlist_size(nvl, &fmrisz, NV_ENCODE_NATIVE)) != 0 || 74 fmrisz > MEM_FMRI_MAX_BUFSIZE || 75 (fmribuf = fmd_hdl_alloc(hdl, fmrisz, FMD_SLEEP)) == NULL) { 76 (void) close(fd); 77 return (-1); /* errno is set for us */ 78 } 79 80 if ((errno = nvlist_pack(nvl, &fmribuf, &fmrisz, 81 NV_ENCODE_NATIVE, 0)) != 0) { 82 fmd_hdl_free(hdl, fmribuf, fmrisz); 83 (void) close(fd); 84 return (-1); /* errno is set for us */ 85 } 86 87 mpage.m_fmri = fmribuf; 88 mpage.m_fmrisz = fmrisz; 89 90 if ((rc = ioctl(fd, cmd, &mpage)) < 0) 91 err = errno; 92 93 fmd_hdl_free(hdl, fmribuf, fmrisz); 94 95 (void) close(fd); 96 97 if (rc < 0) { 98 errno = err; 99 return (-1); 100 } 101 102 return (0); 103 } 104 105 static void 106 cma_page_free(fmd_hdl_t *hdl, cma_page_t *page) 107 { 108 if (page->pg_fmri != NULL) 109 nvlist_free(page->pg_fmri); 110 fmd_hdl_free(hdl, page, sizeof (cma_page_t)); 111 } 112 113 /*ARGSUSED*/ 114 void 115 cma_page_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru, const char *uuid) 116 { 117 cma_page_t *page; 118 uint64_t pageaddr; 119 120 /* It should already be expanded, but we'll do it again anyway */ 121 if (fmd_nvl_fmri_expand(hdl, asru) < 0) { 122 fmd_hdl_debug(hdl, "failed to expand page asru\n"); 123 cma_stats.bad_flts.fmds_value.ui64++; 124 return; 125 } 126 127 if (nvlist_lookup_uint64(asru, FM_FMRI_MEM_PHYSADDR, &pageaddr) != 0) { 128 fmd_hdl_debug(hdl, "mem fault missing '%s'\n", 129 FM_FMRI_MEM_PHYSADDR); 130 cma_stats.bad_flts.fmds_value.ui64++; 131 return; 132 } 133 134 if (!cma.cma_page_doretire) { 135 fmd_hdl_debug(hdl, "suppressed retire of page %llx\n", 136 (u_longlong_t)pageaddr); 137 cma_stats.page_supp.fmds_value.ui64++; 138 return; 139 } 140 141 if (!fmd_nvl_fmri_present(hdl, asru)) { 142 fmd_hdl_debug(hdl, "page retire overtaken by events\n"); 143 cma_stats.page_nonent.fmds_value.ui64++; 144 if (uuid != NULL) 145 fmd_case_uuclose(hdl, uuid); 146 return; 147 } 148 149 if (cma_page_cmd(hdl, MEM_PAGE_FMRI_RETIRE, asru) == 0) { 150 fmd_hdl_debug(hdl, "retired page 0x%llx\n", 151 (u_longlong_t)pageaddr); 152 cma_stats.page_flts.fmds_value.ui64++; 153 if (uuid != NULL) 154 fmd_case_uuclose(hdl, uuid); 155 return; 156 } 157 158 /* 159 * The page didn't immediately retire. We'll need to periodically 160 * check to see if it has been retired. 161 */ 162 fmd_hdl_debug(hdl, "page didn't retire - sleeping\n"); 163 164 page = fmd_hdl_zalloc(hdl, sizeof (cma_page_t), FMD_SLEEP); 165 page->pg_addr = pageaddr; 166 (void) nvlist_dup(asru, &page->pg_fmri, 0); 167 if (uuid != NULL) 168 page->pg_uuid = fmd_hdl_strdup(hdl, uuid, FMD_SLEEP); 169 170 page->pg_next = cma.cma_pages; 171 cma.cma_pages = page; 172 173 if (cma.cma_page_timerid != 0) 174 fmd_timer_remove(hdl, cma.cma_page_timerid); 175 176 cma.cma_page_curdelay = cma.cma_page_mindelay; 177 178 cma.cma_page_timerid = 179 fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay); 180 } 181 182 static int 183 page_retry(fmd_hdl_t *hdl, cma_page_t *page) 184 { 185 if (page->pg_fmri != NULL && !fmd_nvl_fmri_present(hdl, 186 page->pg_fmri)) { 187 fmd_hdl_debug(hdl, "page retire overtaken by events"); 188 cma_stats.page_nonent.fmds_value.ui64++; 189 190 if (page->pg_uuid != NULL) 191 fmd_case_uuclose(hdl, page->pg_uuid); 192 return (1); /* no longer a page to retire */ 193 } 194 195 if (cma_page_cmd(hdl, MEM_PAGE_FMRI_ISRETIRED, page->pg_fmri) == 0) { 196 fmd_hdl_debug(hdl, "retired page 0x%llx on retry %u\n", 197 page->pg_addr, page->pg_nretries); 198 cma_stats.page_flts.fmds_value.ui64++; 199 200 if (page->pg_uuid != NULL) 201 fmd_case_uuclose(hdl, page->pg_uuid); 202 return (1); /* page retired */ 203 } 204 205 if (errno == EAGAIN) { 206 fmd_hdl_debug(hdl, "scheduling another retry for 0x%llx\n", 207 page->pg_addr); 208 return (0); /* schedule another retry */ 209 } else { 210 if (errno == EIO) { 211 fmd_hdl_debug(hdl, "failed to retry page 0x%llx " 212 "retirement: page isn't scheduled for retirement\n", 213 page->pg_addr); 214 } else { 215 fmd_hdl_debug(hdl, "failed to retry page 0x%llx " 216 "retirement: %s\n", page->pg_addr, 217 strerror(errno)); 218 } 219 220 cma_stats.page_fails.fmds_value.ui64++; 221 return (1); /* give up */ 222 } 223 } 224 225 void 226 cma_page_retry(fmd_hdl_t *hdl) 227 { 228 cma_page_t **pagep; 229 230 cma.cma_page_timerid = 0; 231 232 fmd_hdl_debug(hdl, "page_retry: timer fired\n"); 233 234 pagep = &cma.cma_pages; 235 while (*pagep != NULL) { 236 cma_page_t *page = *pagep; 237 238 if (page_retry(hdl, page)) { 239 /* 240 * Successful retry or we're giving up - remove from 241 * the list 242 */ 243 *pagep = page->pg_next; 244 245 if (page->pg_uuid != NULL) 246 fmd_hdl_strfree(hdl, page->pg_uuid); 247 248 cma_page_free(hdl, page); 249 } else { 250 page->pg_nretries++; 251 pagep = &page->pg_next; 252 } 253 } 254 255 if (cma.cma_pages == NULL) 256 return; /* no more retirements */ 257 258 /* 259 * We still have retirements that haven't completed. Back the delay 260 * off, and schedule a retry. 261 */ 262 cma.cma_page_curdelay = MIN(cma.cma_page_curdelay * 2, 263 cma.cma_page_maxdelay); 264 265 fmd_hdl_debug(hdl, "scheduled page retirement retry for %llu secs\n", 266 (u_longlong_t)(cma.cma_page_curdelay / NANOSEC)); 267 268 cma.cma_page_timerid = 269 fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay); 270 } 271 272 void 273 cma_page_fini(fmd_hdl_t *hdl) 274 { 275 cma_page_t *page; 276 277 while ((page = cma.cma_pages) != NULL) { 278 cma.cma_pages = page->pg_next; 279 cma_page_free(hdl, page); 280 } 281 } 282