xref: /illumos-gate/usr/src/cmd/fm/modules/common/cpumem-retire/cma_page.c (revision 2cb5535af222653abf2eba5c180ded4a7b85d8b6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Page retirement can be an extended process due to the fact that a retirement
31  * may not be possible when the original request is made.  The kernel will
32  * repeatedly attempt to retire a given page, but will not let us know when the
33  * page has been retired.  We therefore have to poll to see if the retirement
34  * has been completed.  This poll is implemented with a bounded exponential
35  * backoff to reduce the burden which we impose upon the system.
36  *
37  * To reduce the burden on fmd in the face of retirement storms, we schedule
38  * all retries as a group.  In the simplest case, we attempt to retire a single
39  * page.  When forced to retry, we initially schedule a retry at a configurable
40  * interval t.  If the retry fails, we schedule another at 2 * t, and so on,
41  * until t reaches the maximum interval (also configurable).  Future retries
42  * for that page will occur with t equal to the maximum interval value.  We
43  * will never give up on a retirement.
44  *
45  * With multiple retirements, the situation gets slightly more complicated.  As
46  * indicated above, we schedule retries as a group.  We don't want to deny new
47  * pages their short retry intervals, so we'll (re)set the retry interval to the
48  * value appropriate for the newest page.
49  */
50 
51 #include <cma.h>
52 
53 #include <time.h>
54 #include <errno.h>
55 #include <unistd.h>
56 #include <strings.h>
57 #include <fm/fmd_api.h>
58 #include <fm/libtopo.h>
59 #include <sys/fm/protocol.h>
60 #include <sys/mem.h>
61 
62 static void
63 cma_page_free(fmd_hdl_t *hdl, cma_page_t *page)
64 {
65 	if (page->pg_fmri != NULL)
66 		nvlist_free(page->pg_fmri);
67 	fmd_hdl_free(hdl, page, sizeof (cma_page_t));
68 }
69 
70 /*
71  * Retire the specified ASRU, referring to a memory page by PA or by DIMM
72  * offset (i.e. the encoded coordinates internal bank, row, and column).
73  * In the initial FMA implementation, fault.memory.page exported an ASRU
74  * with an explicit physical address, which is valid at the initial time of
75  * diagnosis but may not be later following DR, DIMM removal, or interleave
76  * changes.  On SPARC, this issue was solved by exporting the DIMM offset
77  * and pushing the entire FMRI to the platform memory controller through
78  * /dev/mem so it can derive the current PA from the DIMM and offset.
79  * On x64, we also use DIMM and offset, but the mem:/// unum string is an
80  * encoded hc:/// FMRI that is then used by the x64 memory controller driver.
81  * At some point these three approaches need to be rationalized: all platforms
82  * should use the same scheme, either with decoding in the kernel or decoding
83  * in userland (i.e. with a libtopo method to compute and update the PA).
84  */
85 /*ARGSUSED*/
86 int
87 cma_page_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru,
88     const char *uuid, boolean_t repair)
89 {
90 	cma_page_t *page;
91 	uint64_t pageaddr;
92 	char *unumstr;
93 	nvlist_t *asrucp = NULL;
94 	const char *action = repair ? "unretire" : "retire";
95 
96 	if (nvlist_dup(asru, &asrucp, 0) != 0) {
97 		fmd_hdl_debug(hdl, "page retire nvlist dup failed\n");
98 		return (CMA_RA_FAILURE);
99 	}
100 
101 	/* It should already be expanded, but we'll do it again anyway */
102 	if (fmd_nvl_fmri_expand(hdl, asrucp) < 0) {
103 		fmd_hdl_debug(hdl, "failed to expand page asru\n");
104 		cma_stats.bad_flts.fmds_value.ui64++;
105 		nvlist_free(asrucp);
106 		return (CMA_RA_FAILURE);
107 	}
108 
109 	if (!repair && !fmd_nvl_fmri_present(hdl, asrucp)) {
110 		fmd_hdl_debug(hdl, "page retire overtaken by events\n");
111 		cma_stats.page_nonent.fmds_value.ui64++;
112 		nvlist_free(asrucp);
113 		return (CMA_RA_SUCCESS);
114 	}
115 
116 	if (nvlist_lookup_uint64(asrucp, FM_FMRI_MEM_PHYSADDR, &pageaddr)
117 	    != 0) {
118 		fmd_hdl_debug(hdl, "mem fault missing '%s'\n",
119 		    FM_FMRI_MEM_PHYSADDR);
120 		cma_stats.bad_flts.fmds_value.ui64++;
121 		nvlist_free(asrucp);
122 		return (CMA_RA_FAILURE);
123 	}
124 
125 	if (repair) {
126 		if (!cma.cma_page_dounretire) {
127 			fmd_hdl_debug(hdl, "suppressed unretire of page %llx\n",
128 			    (u_longlong_t)pageaddr);
129 			cma_stats.page_supp.fmds_value.ui64++;
130 			nvlist_free(asrucp);
131 			return (CMA_RA_SUCCESS);
132 		}
133 	} else {
134 		if (!cma.cma_page_doretire) {
135 			fmd_hdl_debug(hdl, "suppressed retire of page %llx\n",
136 			    (u_longlong_t)pageaddr);
137 			cma_stats.page_supp.fmds_value.ui64++;
138 			nvlist_free(asrucp);
139 			return (CMA_RA_FAILURE);
140 		}
141 	}
142 
143 	/*
144 	 * If the unum is an hc fmri string expand it to an fmri and include
145 	 * that in a modified asru nvlist.
146 	 */
147 	if (nvlist_lookup_string(asrucp, FM_FMRI_MEM_UNUM, &unumstr) == 0 &&
148 	    strncmp(unumstr, "hc:/", 4) == 0) {
149 		int err;
150 		nvlist_t *unumfmri;
151 		struct topo_hdl *thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
152 
153 		if (topo_fmri_str2nvl(thp, unumstr, &unumfmri, &err) != 0) {
154 			fmd_hdl_debug(hdl, "page retire str2nvl failed: %s\n",
155 			    topo_strerror(err));
156 			fmd_hdl_topo_rele(hdl, thp);
157 			nvlist_free(asrucp);
158 			return (CMA_RA_FAILURE);
159 		}
160 
161 		fmd_hdl_topo_rele(hdl, thp);
162 
163 		if (nvlist_add_nvlist(asrucp, FM_FMRI_MEM_UNUM "-fmri",
164 		    unumfmri) != 0) {
165 			fmd_hdl_debug(hdl, "page retire failed to add "
166 			    "unumfmri to modified asru");
167 			nvlist_free(unumfmri);
168 			nvlist_free(asrucp);
169 			return (CMA_RA_FAILURE);
170 		}
171 		nvlist_free(unumfmri);
172 	}
173 
174 	if (cma_page_cmd(hdl,
175 	    repair ? MEM_PAGE_FMRI_UNRETIRE : MEM_PAGE_FMRI_RETIRE, asrucp)
176 	    == 0) {
177 		fmd_hdl_debug(hdl, "%sd page 0x%llx\n",
178 		    action, (u_longlong_t)pageaddr);
179 		if (repair)
180 			cma_stats.page_repairs.fmds_value.ui64++;
181 		else
182 			cma_stats.page_flts.fmds_value.ui64++;
183 		nvlist_free(asrucp);
184 		return (CMA_RA_SUCCESS);
185 	} else if (repair || errno != EAGAIN) {
186 		fmd_hdl_debug(hdl, "%s of page 0x%llx failed, will not "
187 		    "retry: %s\n", action, (u_longlong_t)pageaddr,
188 		    strerror(errno));
189 
190 		cma_stats.page_fails.fmds_value.ui64++;
191 
192 		nvlist_free(asrucp);
193 		if (uuid != NULL && cma.cma_page_maxretries != 0)
194 			return (CMA_RA_SUCCESS);
195 		return (CMA_RA_FAILURE);
196 	}
197 
198 	/*
199 	 * The page didn't immediately retire.  We'll need to periodically
200 	 * check to see if it has been retired.
201 	 */
202 	fmd_hdl_debug(hdl, "page didn't retire - sleeping\n");
203 
204 	page = fmd_hdl_zalloc(hdl, sizeof (cma_page_t), FMD_SLEEP);
205 	page->pg_addr = pageaddr;
206 	page->pg_fmri = asrucp;
207 	if (uuid != NULL)
208 		page->pg_uuid = fmd_hdl_strdup(hdl, uuid, FMD_SLEEP);
209 
210 	page->pg_next = cma.cma_pages;
211 	cma.cma_pages = page;
212 
213 	if (cma.cma_page_timerid != 0)
214 		fmd_timer_remove(hdl, cma.cma_page_timerid);
215 
216 	cma.cma_page_curdelay = cma.cma_page_mindelay;
217 
218 	cma.cma_page_timerid =
219 	    fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
220 
221 	/* Don't free asrucp here.  This FMRI will be needed for retry. */
222 	return (CMA_RA_FAILURE);
223 }
224 
225 static int
226 page_retry(fmd_hdl_t *hdl, cma_page_t *page)
227 {
228 	if (page->pg_fmri != NULL && !fmd_nvl_fmri_present(hdl,
229 	    page->pg_fmri)) {
230 		fmd_hdl_debug(hdl, "page retire overtaken by events");
231 		cma_stats.page_nonent.fmds_value.ui64++;
232 
233 		if (page->pg_uuid != NULL)
234 			fmd_case_uuclose(hdl, page->pg_uuid);
235 		return (1); /* no longer a page to retire */
236 	}
237 
238 	if (cma_page_cmd(hdl, MEM_PAGE_FMRI_ISRETIRED, page->pg_fmri) == 0) {
239 		fmd_hdl_debug(hdl, "retired page 0x%llx on retry %u\n",
240 		    page->pg_addr, page->pg_nretries);
241 		cma_stats.page_flts.fmds_value.ui64++;
242 
243 		if (page->pg_uuid != NULL)
244 			fmd_case_uuclose(hdl, page->pg_uuid);
245 		return (1); /* page retired */
246 	}
247 
248 	if (errno == EAGAIN) {
249 		fmd_hdl_debug(hdl, "scheduling another retry for 0x%llx\n",
250 		    page->pg_addr);
251 		return (0); /* schedule another retry */
252 	} else {
253 		if (errno == EIO) {
254 			fmd_hdl_debug(hdl, "failed to retry page 0x%llx "
255 			    "retirement: page isn't scheduled for retirement"
256 			    "(request made beyond page_retire limit?)\n",
257 			    page->pg_addr);
258 		} else {
259 			fmd_hdl_debug(hdl, "failed to retry page 0x%llx "
260 			    "retirement: %s\n", page->pg_addr,
261 			    strerror(errno));
262 		}
263 
264 		if (page->pg_uuid != NULL && cma.cma_page_maxretries != 0)
265 			fmd_case_uuclose(hdl, page->pg_uuid);
266 
267 		cma_stats.page_fails.fmds_value.ui64++;
268 		return (1); /* give up */
269 	}
270 }
271 
272 void
273 cma_page_retry(fmd_hdl_t *hdl)
274 {
275 	cma_page_t **pagep;
276 
277 	cma.cma_page_timerid = 0;
278 
279 	fmd_hdl_debug(hdl, "page_retry: timer fired\n");
280 
281 	pagep = &cma.cma_pages;
282 	while (*pagep != NULL) {
283 		cma_page_t *page = *pagep;
284 
285 		if (page_retry(hdl, page)) {
286 			/*
287 			 * Successful retry or we're giving up - remove from
288 			 * the list
289 			 */
290 			*pagep = page->pg_next;
291 
292 			if (page->pg_uuid != NULL)
293 				fmd_hdl_strfree(hdl, page->pg_uuid);
294 
295 			cma_page_free(hdl, page);
296 		} else if (cma.cma_page_maxretries == 0 ||
297 		    page->pg_nretries < cma.cma_page_maxretries) {
298 			page->pg_nretries++;
299 			pagep = &page->pg_next;
300 		} else {
301 			/*
302 			 * Tunable maxretries was set and we reached
303 			 * the max, so just close the case.
304 			 */
305 			fmd_hdl_debug(hdl,
306 			    "giving up page retire 0x%llx on retry %u\n",
307 			    page->pg_addr, page->pg_nretries);
308 			cma_stats.page_retmax.fmds_value.ui64++;
309 
310 			if (page->pg_uuid != NULL) {
311 				fmd_case_uuclose(hdl, page->pg_uuid);
312 				fmd_hdl_strfree(hdl, page->pg_uuid);
313 			}
314 
315 			*pagep = page->pg_next;
316 
317 			cma_page_free(hdl, page);
318 		}
319 	}
320 
321 	if (cma.cma_pages == NULL)
322 		return; /* no more retirements */
323 
324 	/*
325 	 * We still have retirements that haven't completed.  Back the delay
326 	 * off, and schedule a retry.
327 	 */
328 	cma.cma_page_curdelay = MIN(cma.cma_page_curdelay * 2,
329 	    cma.cma_page_maxdelay);
330 
331 	fmd_hdl_debug(hdl, "scheduled page retirement retry for %llu secs\n",
332 	    (u_longlong_t)(cma.cma_page_curdelay / NANOSEC));
333 
334 	cma.cma_page_timerid =
335 	    fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
336 }
337 
338 void
339 cma_page_fini(fmd_hdl_t *hdl)
340 {
341 	cma_page_t *page;
342 
343 	while ((page = cma.cma_pages) != NULL) {
344 		cma.cma_pages = page->pg_next;
345 		cma_page_free(hdl, page);
346 	}
347 }
348