xref: /illumos-gate/usr/src/cmd/fm/modules/common/cpumem-retire/cma_page.c (revision 7f7322febbcfe774b7270abc3b191c094bfcc517)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Page retirement can be an extended process due to the fact that a retirement
31  * may not be possible when the original request is made.  The kernel will
32  * repeatedly attempt to retire a given page, but will not let us know when the
33  * page has been retired.  We therefore have to poll to see if the retirement
34  * has been completed.  This poll is implemented with a bounded exponential
35  * backoff to reduce the burden which we impose upon the system.
36  *
37  * To reduce the burden on fmd in the face of retirement storms, we schedule
38  * all retries as a group.  In the simplest case, we attempt to retire a single
39  * page.  When forced to retry, we initially schedule a retry at a configurable
40  * interval t.  If the retry fails, we schedule another at 2 * t, and so on,
41  * until t reaches the maximum interval (also configurable).  Future retries
42  * for that page will occur with t equal to the maximum interval value.  We
43  * will never give up on a retirement.
44  *
45  * With multiple retirements, the situation gets slightly more complicated.  As
46  * indicated above, we schedule retries as a group.  We don't want to deny new
47  * pages their short retry intervals, so we'll (re)set the retry interval to the
48  * value appropriate for the newest page.
49  */
50 
51 #include <cma.h>
52 
53 #include <time.h>
54 #include <fcntl.h>
55 #include <errno.h>
56 #include <unistd.h>
57 #include <strings.h>
58 #include <fm/fmd_api.h>
59 #include <sys/fm/protocol.h>
60 #include <sys/mem.h>
61 
62 static int
63 cma_page_cmd(fmd_hdl_t *hdl, int cmd, nvlist_t *nvl)
64 {
65 	mem_page_t mpage;
66 	char *fmribuf;
67 	size_t fmrisz;
68 	int fd, rc, err;
69 
70 	if ((fd = open("/dev/mem", O_RDONLY)) < 0)
71 		return (-1); /* errno is set for us */
72 
73 	if ((errno = nvlist_size(nvl, &fmrisz, NV_ENCODE_NATIVE)) != 0 ||
74 	    fmrisz > MEM_FMRI_MAX_BUFSIZE ||
75 	    (fmribuf = fmd_hdl_alloc(hdl, fmrisz, FMD_SLEEP)) == NULL) {
76 		(void) close(fd);
77 		return (-1); /* errno is set for us */
78 	}
79 
80 	if ((errno = nvlist_pack(nvl, &fmribuf, &fmrisz,
81 	    NV_ENCODE_NATIVE, 0)) != 0) {
82 		fmd_hdl_free(hdl, fmribuf, fmrisz);
83 		(void) close(fd);
84 		return (-1); /* errno is set for us */
85 	}
86 
87 	mpage.m_fmri = fmribuf;
88 	mpage.m_fmrisz = fmrisz;
89 
90 	if ((rc = ioctl(fd, cmd, &mpage)) < 0)
91 		err = errno;
92 
93 	fmd_hdl_free(hdl, fmribuf, fmrisz);
94 
95 	(void) close(fd);
96 
97 	if (rc < 0) {
98 		errno = err;
99 		return (-1);
100 	}
101 
102 	return (0);
103 }
104 
105 static void
106 cma_page_free(fmd_hdl_t *hdl, cma_page_t *page)
107 {
108 	if (page->pg_fmri != NULL)
109 		nvlist_free(page->pg_fmri);
110 	fmd_hdl_free(hdl, page, sizeof (cma_page_t));
111 }
112 
113 /*ARGSUSED*/
114 void
115 cma_page_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru, const char *uuid)
116 {
117 	cma_page_t *page;
118 	uint64_t pageaddr;
119 
120 	/* It should already be expanded, but we'll do it again anyway */
121 	if (fmd_nvl_fmri_expand(hdl, asru) < 0) {
122 		fmd_hdl_debug(hdl, "failed to expand page asru\n");
123 		cma_stats.bad_flts.fmds_value.ui64++;
124 		return;
125 	}
126 
127 	if (nvlist_lookup_uint64(asru, FM_FMRI_MEM_PHYSADDR, &pageaddr) != 0) {
128 		fmd_hdl_debug(hdl, "mem fault missing '%s'\n",
129 		    FM_FMRI_MEM_PHYSADDR);
130 		cma_stats.bad_flts.fmds_value.ui64++;
131 		return;
132 	}
133 
134 	if (!cma.cma_page_doretire) {
135 		fmd_hdl_debug(hdl, "suppressed retire of page %llx\n",
136 		    (u_longlong_t)pageaddr);
137 		cma_stats.page_supp.fmds_value.ui64++;
138 		return;
139 	}
140 
141 	if (!fmd_nvl_fmri_present(hdl, asru)) {
142 		fmd_hdl_debug(hdl, "page retire overtaken by events\n");
143 		cma_stats.page_nonent.fmds_value.ui64++;
144 		if (uuid != NULL)
145 			fmd_case_uuclose(hdl, uuid);
146 		return;
147 	}
148 
149 	if (cma_page_cmd(hdl, MEM_PAGE_FMRI_RETIRE, asru) == 0) {
150 		fmd_hdl_debug(hdl, "retired page 0x%llx\n",
151 		    (u_longlong_t)pageaddr);
152 		cma_stats.page_flts.fmds_value.ui64++;
153 		if (uuid != NULL)
154 			fmd_case_uuclose(hdl, uuid);
155 		return;
156 	}
157 
158 	/*
159 	 * The page didn't immediately retire.  We'll need to periodically
160 	 * check to see if it has been retired.
161 	 */
162 	fmd_hdl_debug(hdl, "page didn't retire - sleeping\n");
163 
164 	page = fmd_hdl_zalloc(hdl, sizeof (cma_page_t), FMD_SLEEP);
165 	page->pg_addr = pageaddr;
166 	(void) nvlist_dup(asru, &page->pg_fmri, 0);
167 	if (uuid != NULL)
168 		page->pg_uuid = fmd_hdl_strdup(hdl, uuid, FMD_SLEEP);
169 
170 	page->pg_next = cma.cma_pages;
171 	cma.cma_pages = page;
172 
173 	if (cma.cma_page_timerid != 0)
174 		fmd_timer_remove(hdl, cma.cma_page_timerid);
175 
176 	cma.cma_page_curdelay = cma.cma_page_mindelay;
177 
178 	cma.cma_page_timerid =
179 	    fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
180 }
181 
182 static int
183 page_retry(fmd_hdl_t *hdl, cma_page_t *page)
184 {
185 	if (page->pg_fmri != NULL && !fmd_nvl_fmri_present(hdl,
186 	    page->pg_fmri)) {
187 		fmd_hdl_debug(hdl, "page retire overtaken by events");
188 		cma_stats.page_nonent.fmds_value.ui64++;
189 
190 		if (page->pg_uuid != NULL)
191 			fmd_case_uuclose(hdl, page->pg_uuid);
192 		return (1); /* no longer a page to retire */
193 	}
194 
195 	if (cma_page_cmd(hdl, MEM_PAGE_FMRI_ISRETIRED, page->pg_fmri) == 0) {
196 		fmd_hdl_debug(hdl, "retired page 0x%llx on retry %u\n",
197 		    page->pg_addr, page->pg_nretries);
198 		cma_stats.page_flts.fmds_value.ui64++;
199 
200 		if (page->pg_uuid != NULL)
201 			fmd_case_uuclose(hdl, page->pg_uuid);
202 		return (1); /* page retired */
203 	}
204 
205 	if (errno == EAGAIN) {
206 		fmd_hdl_debug(hdl, "scheduling another retry for 0x%llx\n",
207 		    page->pg_addr);
208 		return (0); /* schedule another retry */
209 	} else {
210 		if (errno == EIO) {
211 			fmd_hdl_debug(hdl, "failed to retry page 0x%llx "
212 			    "retirement: page isn't scheduled for retirement\n",
213 			    page->pg_addr);
214 		} else {
215 			fmd_hdl_debug(hdl, "failed to retry page 0x%llx "
216 			    "retirement: %s\n", page->pg_addr,
217 			    strerror(errno));
218 		}
219 
220 		cma_stats.page_fails.fmds_value.ui64++;
221 		return (1); /* give up */
222 	}
223 }
224 
225 void
226 cma_page_retry(fmd_hdl_t *hdl)
227 {
228 	cma_page_t **pagep;
229 
230 	cma.cma_page_timerid = 0;
231 
232 	fmd_hdl_debug(hdl, "page_retry: timer fired\n");
233 
234 	pagep = &cma.cma_pages;
235 	while (*pagep != NULL) {
236 		cma_page_t *page = *pagep;
237 
238 		if (page_retry(hdl, page)) {
239 			/*
240 			 * Successful retry or we're giving up - remove from
241 			 * the list
242 			 */
243 			*pagep = page->pg_next;
244 
245 			if (page->pg_uuid != NULL)
246 				fmd_hdl_strfree(hdl, page->pg_uuid);
247 
248 			cma_page_free(hdl, page);
249 		} else {
250 			page->pg_nretries++;
251 			pagep = &page->pg_next;
252 		}
253 	}
254 
255 	if (cma.cma_pages == NULL)
256 		return; /* no more retirements */
257 
258 	/*
259 	 * We still have retirements that haven't completed.  Back the delay
260 	 * off, and schedule a retry.
261 	 */
262 	cma.cma_page_curdelay = MIN(cma.cma_page_curdelay * 2,
263 	    cma.cma_page_maxdelay);
264 
265 	fmd_hdl_debug(hdl, "scheduled page retirement retry for %llu secs\n",
266 	    (u_longlong_t)(cma.cma_page_curdelay / NANOSEC));
267 
268 	cma.cma_page_timerid =
269 	    fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
270 }
271 
272 void
273 cma_page_fini(fmd_hdl_t *hdl)
274 {
275 	cma_page_t *page;
276 
277 	while ((page = cma.cma_pages) != NULL) {
278 		cma.cma_pages = page->pg_next;
279 		cma_page_free(hdl, page);
280 	}
281 }
282