xref: /illumos-gate/usr/src/cmd/fm/modules/common/cpumem-retire/cma_page.c (revision 24da5b34f49324ed742a340010ed5bd3d4e06625)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Page retirement can be an extended process due to the fact that a retirement
31  * may not be possible when the original request is made.  The kernel will
32  * repeatedly attempt to retire a given page, but will not let us know when the
33  * page has been retired.  We therefore have to poll to see if the retirement
34  * has been completed.  This poll is implemented with a bounded exponential
35  * backoff to reduce the burden which we impose upon the system.
36  *
37  * To reduce the burden on fmd in the face of retirement storms, we schedule
38  * all retries as a group.  In the simplest case, we attempt to retire a single
39  * page.  When forced to retry, we initially schedule a retry at a configurable
40  * interval t.  If the retry fails, we schedule another at 2 * t, and so on,
41  * until t reaches the maximum interval (also configurable).  Future retries
42  * for that page will occur with t equal to the maximum interval value.  We
43  * will never give up on a retirement.
44  *
45  * With multiple retirements, the situation gets slightly more complicated.  As
46  * indicated above, we schedule retries as a group.  We don't want to deny new
47  * pages their short retry intervals, so we'll (re)set the retry interval to the
48  * value appropriate for the newest page.
49  */
50 
51 #include <cma.h>
52 
53 #include <time.h>
54 #include <fcntl.h>
55 #include <errno.h>
56 #include <unistd.h>
57 #include <strings.h>
58 #include <fm/fmd_api.h>
59 #include <fm/libtopo.h>
60 #include <sys/fm/protocol.h>
61 #include <sys/mem.h>
62 
63 static int
64 cma_page_cmd(fmd_hdl_t *hdl, int cmd, nvlist_t *nvl)
65 {
66 	mem_page_t mpage;
67 	char *fmribuf;
68 	size_t fmrisz;
69 	int fd, rc, err;
70 
71 	if ((fd = open("/dev/mem", O_RDONLY)) < 0)
72 		return (-1); /* errno is set for us */
73 
74 	if ((errno = nvlist_size(nvl, &fmrisz, NV_ENCODE_NATIVE)) != 0 ||
75 	    fmrisz > MEM_FMRI_MAX_BUFSIZE ||
76 	    (fmribuf = fmd_hdl_alloc(hdl, fmrisz, FMD_SLEEP)) == NULL) {
77 		(void) close(fd);
78 		return (-1); /* errno is set for us */
79 	}
80 
81 	if ((errno = nvlist_pack(nvl, &fmribuf, &fmrisz,
82 	    NV_ENCODE_NATIVE, 0)) != 0) {
83 		fmd_hdl_free(hdl, fmribuf, fmrisz);
84 		(void) close(fd);
85 		return (-1); /* errno is set for us */
86 	}
87 
88 	mpage.m_fmri = fmribuf;
89 	mpage.m_fmrisz = fmrisz;
90 
91 	if ((rc = ioctl(fd, cmd, &mpage)) < 0)
92 		err = errno;
93 
94 	fmd_hdl_free(hdl, fmribuf, fmrisz);
95 
96 	(void) close(fd);
97 
98 	if (rc < 0) {
99 		errno = err;
100 		return (-1);
101 	}
102 
103 	return (0);
104 }
105 
106 static void
107 cma_page_free(fmd_hdl_t *hdl, cma_page_t *page)
108 {
109 	if (page->pg_fmri != NULL)
110 		nvlist_free(page->pg_fmri);
111 	fmd_hdl_free(hdl, page, sizeof (cma_page_t));
112 }
113 
114 /*
115  * Retire the specified ASRU, referring to a memory page by PA or by DIMM
116  * offset (i.e. the encoded coordinates internal bank, row, and column).
117  * In the initial FMA implementation, fault.memory.page exported an ASRU
118  * with an explicit physical address, which is valid at the initial time of
119  * diagnosis but may not be later following DR, DIMM removal, or interleave
120  * changes.  On SPARC, this issue was solved by exporting the DIMM offset
121  * and pushing the entire FMRI to the platform memory controller through
122  * /dev/mem so it can derive the current PA from the DIMM and offset.
123  * On x64, we also use DIMM and offset, but the mem:/// unum string is an
124  * encoded hc:/// FMRI that is then used by the x64 memory controller driver.
125  * At some point these three approaches need to be rationalized: all platforms
126  * should use the same scheme, either with decoding in the kernel or decoding
127  * in userland (i.e. with a libtopo method to compute and update the PA).
128  */
129 /*ARGSUSED*/
130 int
131 cma_page_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru, const char *uuid)
132 {
133 	cma_page_t *page;
134 	uint64_t pageaddr;
135 	char *unumstr;
136 	nvlist_t *asrucp = NULL;
137 
138 	/* It should already be expanded, but we'll do it again anyway */
139 	if (fmd_nvl_fmri_expand(hdl, asru) < 0) {
140 		fmd_hdl_debug(hdl, "failed to expand page asru\n");
141 		cma_stats.bad_flts.fmds_value.ui64++;
142 		return (CMA_RA_FAILURE);
143 	}
144 
145 	if (nvlist_lookup_uint64(asru, FM_FMRI_MEM_PHYSADDR, &pageaddr) != 0) {
146 		fmd_hdl_debug(hdl, "mem fault missing '%s'\n",
147 		    FM_FMRI_MEM_PHYSADDR);
148 		cma_stats.bad_flts.fmds_value.ui64++;
149 		return (CMA_RA_FAILURE);
150 	}
151 
152 	if (!cma.cma_page_doretire) {
153 		fmd_hdl_debug(hdl, "suppressed retire of page %llx\n",
154 		    (u_longlong_t)pageaddr);
155 		cma_stats.page_supp.fmds_value.ui64++;
156 		return (CMA_RA_FAILURE);
157 	}
158 
159 	if (!fmd_nvl_fmri_present(hdl, asru)) {
160 		fmd_hdl_debug(hdl, "page retire overtaken by events\n");
161 		cma_stats.page_nonent.fmds_value.ui64++;
162 		return (CMA_RA_SUCCESS);
163 	}
164 
165 	/*
166 	 * If the unum is an hc fmri string expand it to an fmri and include
167 	 * that in a modified asru nvlist.
168 	 */
169 	if (nvlist_lookup_string(asru, FM_FMRI_MEM_UNUM, &unumstr) == 0 &&
170 	    strncmp(unumstr, "hc:/", 4) == 0) {
171 		int err;
172 		nvlist_t *unumfmri;
173 		struct topo_hdl *thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
174 
175 		if (topo_fmri_str2nvl(thp, unumstr, &unumfmri, &err) != 0) {
176 			fmd_hdl_topo_rele(hdl, thp);
177 			fmd_hdl_debug(hdl, "page retire str2nvl failed: %s\n",
178 			    topo_strerror(err));
179 			return (CMA_RA_FAILURE);
180 		}
181 		fmd_hdl_topo_rele(hdl, thp);
182 
183 		if (nvlist_dup(asru, &asrucp, 0) != 0) {
184 			fmd_hdl_debug(hdl, "page retire nvlist dup failed\n");
185 			nvlist_free(unumfmri);
186 			return (CMA_RA_FAILURE);
187 		}
188 
189 		if (nvlist_add_nvlist(asrucp, FM_FMRI_MEM_UNUM "-fmri",
190 		    unumfmri) != 0) {
191 			fmd_hdl_debug(hdl, "page retire failed to add "
192 			    "unumfmri to modified asru");
193 			nvlist_free(unumfmri);
194 			nvlist_free(asrucp);
195 			return (CMA_RA_FAILURE);
196 		}
197 		nvlist_free(unumfmri);
198 	}
199 
200 	if (cma_page_cmd(hdl, MEM_PAGE_FMRI_RETIRE,
201 	    asrucp ? asrucp : asru) == 0) {
202 		fmd_hdl_debug(hdl, "retired page 0x%llx\n",
203 		    (u_longlong_t)pageaddr);
204 		cma_stats.page_flts.fmds_value.ui64++;
205 		if (asrucp)
206 			nvlist_free(asrucp);
207 		return (CMA_RA_SUCCESS);
208 	} else if (errno != EAGAIN) {
209 		fmd_hdl_debug(hdl, "retire of page 0x%llx failed, will not "
210 		    "retry: %s\n", (u_longlong_t)pageaddr, strerror(errno));
211 		if (asrucp)
212 			nvlist_free(asrucp);
213 		if (uuid != NULL && cma.cma_page_maxretries != 0)
214 			return (CMA_RA_SUCCESS);
215 		return (CMA_RA_FAILURE);
216 	}
217 
218 	/*
219 	 * The page didn't immediately retire.  We'll need to periodically
220 	 * check to see if it has been retired.
221 	 */
222 	fmd_hdl_debug(hdl, "page didn't retire - sleeping\n");
223 
224 	page = fmd_hdl_zalloc(hdl, sizeof (cma_page_t), FMD_SLEEP);
225 	page->pg_addr = pageaddr;
226 	if (asrucp) {
227 		page->pg_fmri = asrucp;
228 	} else {
229 		(void) nvlist_dup(asru, &page->pg_fmri, 0);
230 	}
231 	if (uuid != NULL)
232 		page->pg_uuid = fmd_hdl_strdup(hdl, uuid, FMD_SLEEP);
233 
234 	page->pg_next = cma.cma_pages;
235 	cma.cma_pages = page;
236 
237 	if (cma.cma_page_timerid != 0)
238 		fmd_timer_remove(hdl, cma.cma_page_timerid);
239 
240 	cma.cma_page_curdelay = cma.cma_page_mindelay;
241 
242 	cma.cma_page_timerid =
243 	    fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
244 
245 	return (CMA_RA_FAILURE);
246 }
247 
248 static int
249 page_retry(fmd_hdl_t *hdl, cma_page_t *page)
250 {
251 	if (page->pg_fmri != NULL && !fmd_nvl_fmri_present(hdl,
252 	    page->pg_fmri)) {
253 		fmd_hdl_debug(hdl, "page retire overtaken by events");
254 		cma_stats.page_nonent.fmds_value.ui64++;
255 
256 		if (page->pg_uuid != NULL)
257 			fmd_case_uuclose(hdl, page->pg_uuid);
258 		return (1); /* no longer a page to retire */
259 	}
260 
261 	if (cma_page_cmd(hdl, MEM_PAGE_FMRI_ISRETIRED, page->pg_fmri) == 0) {
262 		fmd_hdl_debug(hdl, "retired page 0x%llx on retry %u\n",
263 		    page->pg_addr, page->pg_nretries);
264 		cma_stats.page_flts.fmds_value.ui64++;
265 
266 		if (page->pg_uuid != NULL)
267 			fmd_case_uuclose(hdl, page->pg_uuid);
268 		return (1); /* page retired */
269 	}
270 
271 	if (errno == EAGAIN) {
272 		fmd_hdl_debug(hdl, "scheduling another retry for 0x%llx\n",
273 		    page->pg_addr);
274 		return (0); /* schedule another retry */
275 	} else {
276 		if (errno == EIO) {
277 			fmd_hdl_debug(hdl, "failed to retry page 0x%llx "
278 			    "retirement: page isn't scheduled for retirement"
279 			    "(request made beyond page_retire limit?)\n",
280 			    page->pg_addr);
281 		} else {
282 			fmd_hdl_debug(hdl, "failed to retry page 0x%llx "
283 			    "retirement: %s\n", page->pg_addr,
284 			    strerror(errno));
285 		}
286 
287 		if (page->pg_uuid != NULL && cma.cma_page_maxretries != 0)
288 			fmd_case_uuclose(hdl, page->pg_uuid);
289 
290 		cma_stats.page_fails.fmds_value.ui64++;
291 		return (1); /* give up */
292 	}
293 }
294 
295 void
296 cma_page_retry(fmd_hdl_t *hdl)
297 {
298 	cma_page_t **pagep;
299 
300 	cma.cma_page_timerid = 0;
301 
302 	fmd_hdl_debug(hdl, "page_retry: timer fired\n");
303 
304 	pagep = &cma.cma_pages;
305 	while (*pagep != NULL) {
306 		cma_page_t *page = *pagep;
307 
308 		if (page_retry(hdl, page)) {
309 			/*
310 			 * Successful retry or we're giving up - remove from
311 			 * the list
312 			 */
313 			*pagep = page->pg_next;
314 
315 			if (page->pg_uuid != NULL)
316 				fmd_hdl_strfree(hdl, page->pg_uuid);
317 
318 			cma_page_free(hdl, page);
319 		} else if (cma.cma_page_maxretries == 0 ||
320 		    page->pg_nretries < cma.cma_page_maxretries) {
321 			page->pg_nretries++;
322 			pagep = &page->pg_next;
323 		} else {
324 			/*
325 			 * Tunable maxretries was set and we reached
326 			 * the max, so just close the case.
327 			 */
328 			fmd_hdl_debug(hdl,
329 			    "giving up page retire 0x%llx on retry %u\n",
330 			    page->pg_addr, page->pg_nretries);
331 			cma_stats.page_retmax.fmds_value.ui64++;
332 
333 			if (page->pg_uuid != NULL) {
334 				fmd_case_uuclose(hdl, page->pg_uuid);
335 				fmd_hdl_strfree(hdl, page->pg_uuid);
336 			}
337 
338 			*pagep = page->pg_next;
339 
340 			cma_page_free(hdl, page);
341 		}
342 	}
343 
344 	if (cma.cma_pages == NULL)
345 		return; /* no more retirements */
346 
347 	/*
348 	 * We still have retirements that haven't completed.  Back the delay
349 	 * off, and schedule a retry.
350 	 */
351 	cma.cma_page_curdelay = MIN(cma.cma_page_curdelay * 2,
352 	    cma.cma_page_maxdelay);
353 
354 	fmd_hdl_debug(hdl, "scheduled page retirement retry for %llu secs\n",
355 	    (u_longlong_t)(cma.cma_page_curdelay / NANOSEC));
356 
357 	cma.cma_page_timerid =
358 	    fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
359 }
360 
361 void
362 cma_page_fini(fmd_hdl_t *hdl)
363 {
364 	cma_page_t *page;
365 
366 	while ((page = cma.cma_pages) != NULL) {
367 		cma.cma_pages = page->pg_next;
368 		cma_page_free(hdl, page);
369 	}
370 }
371