xref: /titanic_44/usr/src/uts/sun4v/io/fault_iso.c (revision f500b19684bd0346ac05bec02a50af07f369da1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * sun4v Fault Isolation Services Module
29  */
30 
31 #include <sys/modctl.h>
32 #include <sys/cmn_err.h>
33 #include <sys/machsystm.h>
34 #include <sys/processor.h>
35 #include <sys/mem.h>
36 #include <vm/page.h>
37 #include <sys/note.h>
38 #include <sys/ds.h>
39 #include <sys/fault_iso.h>
40 
41 /*
42  * Debugging routines
43  */
44 #ifdef DEBUG
45 uint_t fi_debug = 0x0;
46 #define	FI_DBG	if (fi_debug) cmn_err
47 #else /* DEBUG */
48 #define	FI_DBG	_NOTE(CONSTCOND) if (0) cmn_err
49 #endif /* DEBUG */
50 
51 /*
52  * Domains Services interaction
53  */
54 static ds_svc_hdl_t	cpu_handle;
55 static ds_svc_hdl_t	mem_handle;
56 
57 static ds_ver_t		fi_vers[] = { { 1, 0 } };
58 #define	FI_NVERS	(sizeof (fi_vers) / sizeof (fi_vers[0]))
59 
60 static ds_capability_t cpu_cap = {
61 	"fma-cpu-service",	/* svc_id */
62 	fi_vers,		/* vers */
63 	FI_NVERS		/* nvers */
64 };
65 
66 static ds_capability_t mem_cap = {
67 	"fma-mem-service",	/* svc_id */
68 	fi_vers,		/* vers */
69 	FI_NVERS		/* nvers */
70 };
71 
72 static void fi_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl);
73 static void fi_unreg_handler(ds_cb_arg_t arg);
74 
75 static void cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
76 static void mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
77 
78 static ds_clnt_ops_t cpu_ops = {
79 	fi_reg_handler,		/* ds_reg_cb */
80 	fi_unreg_handler,	/* ds_unreg_cb */
81 	cpu_data_handler,	/* ds_data_cb */
82 	&cpu_handle		/* cb_arg */
83 };
84 
85 static ds_clnt_ops_t mem_ops = {
86 	fi_reg_handler,		/* ds_reg_cb */
87 	fi_unreg_handler,	/* ds_unreg_cb */
88 	mem_data_handler,	/* ds_data_cb */
89 	&mem_handle		/* cb_arg */
90 };
91 
92 static int fi_init(void);
93 static void fi_fini(void);
94 
95 static struct modlmisc modlmisc = {
96 	&mod_miscops,
97 	"sun4v Fault Isolation Services"
98 };
99 
100 static struct modlinkage modlinkage = {
101 	MODREV_1,
102 	(void *)&modlmisc,
103 	NULL
104 };
105 
106 int
_init(void)107 _init(void)
108 {
109 	int	rv;
110 
111 	if ((rv = fi_init()) != 0)
112 		return (rv);
113 
114 	if ((rv = mod_install(&modlinkage)) != 0)
115 		fi_fini();
116 
117 	return (rv);
118 }
119 
120 int
_info(struct modinfo * modinfop)121 _info(struct modinfo *modinfop)
122 {
123 	return (mod_info(&modlinkage, modinfop));
124 }
125 
126 int fi_allow_unload;
127 
128 int
_fini(void)129 _fini(void)
130 {
131 	int	status;
132 
133 	if (fi_allow_unload == 0)
134 		return (EBUSY);
135 
136 	if ((status = mod_remove(&modlinkage)) == 0)
137 		fi_fini();
138 
139 	return (status);
140 }
141 
142 static int
fi_init(void)143 fi_init(void)
144 {
145 	int	rv;
146 
147 	/* register CPU service with domain services framework */
148 	rv = ds_cap_init(&cpu_cap, &cpu_ops);
149 	if (rv != 0) {
150 		FI_DBG(CE_CONT, "ds_cap_init failed: %d", rv);
151 		return (rv);
152 	}
153 
154 	/* register MEM servicewith domain services framework */
155 	rv = ds_cap_init(&mem_cap, &mem_ops);
156 	if (rv != 0) {
157 		FI_DBG(CE_CONT, "ds_cap_init failed: %d", rv);
158 		(void) ds_cap_fini(&cpu_cap);
159 		return (rv);
160 	}
161 
162 	return (rv);
163 }
164 
165 static void
fi_fini(void)166 fi_fini(void)
167 {
168 	/*
169 	 * Stop incoming requests from Zeus
170 	 */
171 	(void) ds_cap_fini(&cpu_cap);
172 	(void) ds_cap_fini(&mem_cap);
173 }
174 
175 static void
cpu_data_handler(ds_cb_arg_t arg,void * buf,size_t buflen)176 cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
177 {
178 	_NOTE(ARGUNUSED(arg))
179 
180 	fma_cpu_service_req_t	*msg = buf;
181 	fma_cpu_resp_t		resp_msg;
182 	int			rv = 0;
183 	int			cpu_status;
184 	int			resp_back = 0;
185 
186 	/*
187 	 * If the buffer is the wrong size for CPU calls or is NULL then
188 	 * do not return any message. The call from the ldom mgr. will time out
189 	 * and the response will be NULL.
190 	 */
191 	if (msg == NULL || buflen != sizeof (fma_cpu_service_req_t)) {
192 		return;
193 	}
194 
195 	FI_DBG(CE_CONT, "req_num = %ld, msg_type = %d, cpu_id = %d\n",
196 	    msg->req_num, msg->msg_type, msg->cpu_id);
197 
198 	resp_msg.req_num = msg->req_num;
199 
200 	switch (msg->msg_type) {
201 	case FMA_CPU_REQ_STATUS:
202 		rv = p_online_internal(msg->cpu_id, P_STATUS,
203 		    &cpu_status);
204 		if (rv == EINVAL) {
205 			FI_DBG(CE_CONT, "Failed p_online call failed."
206 			    "Invalid CPU\n");
207 			resp_msg.result = FMA_CPU_RESP_FAILURE;
208 			resp_msg.status = FMA_CPU_STAT_ILLEGAL;
209 			resp_back = 1;
210 		}
211 		break;
212 	case FMA_CPU_REQ_OFFLINE:
213 		rv = p_online_internal(msg->cpu_id, P_FAULTED,
214 		    &cpu_status);
215 		if (rv == EINVAL) {
216 			FI_DBG(CE_CONT, "Failed p_online call failed."
217 			    "Invalid CPU\n");
218 			resp_msg.result = FMA_CPU_RESP_FAILURE;
219 			resp_msg.status = FMA_CPU_STAT_ILLEGAL;
220 			resp_back = 1;
221 		} else if (rv == EBUSY) {
222 			FI_DBG(CE_CONT, "Failed p_online call failed."
223 			    "Tried to offline while busy\n");
224 			resp_msg.result = FMA_CPU_RESP_FAILURE;
225 			resp_msg.status = FMA_CPU_STAT_ONLINE;
226 			resp_back = 1;
227 		}
228 		break;
229 	case FMA_CPU_REQ_ONLINE:
230 		rv = p_online_internal(msg->cpu_id, P_ONLINE,
231 		    &cpu_status);
232 		if (rv == EINVAL) {
233 			FI_DBG(CE_CONT, "Failed p_online call failed."
234 			    "Invalid CPU\n");
235 			resp_msg.result = FMA_CPU_RESP_FAILURE;
236 			resp_msg.status = FMA_CPU_STAT_ILLEGAL;
237 			resp_back = 1;
238 		} else if (rv == ENOTSUP) {
239 			FI_DBG(CE_CONT, "Failed p_online call failed."
240 			    "Online not supported for single CPU\n");
241 			resp_msg.result = FMA_CPU_RESP_FAILURE;
242 			resp_msg.status = FMA_CPU_STAT_OFFLINE;
243 			resp_back = 1;
244 		}
245 		break;
246 	default:
247 		/*
248 		 * If the msg_type was of unknown type simply return and
249 		 * have the ldom mgr. time out with a NULL response.
250 		 */
251 		return;
252 	}
253 
254 	if (rv != 0) {
255 		if (resp_back) {
256 			if ((rv = ds_cap_send(cpu_handle, &resp_msg,
257 			    sizeof (resp_msg))) != 0) {
258 				FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n",
259 				    rv);
260 			}
261 			return;
262 		}
263 		ASSERT((rv == EINVAL) || ((rv == EBUSY) &&
264 		    (msg->msg_type == FMA_CPU_REQ_OFFLINE)) ||
265 		    ((rv == ENOTSUP) && (msg->msg_type == FMA_CPU_REQ_ONLINE)));
266 
267 		cmn_err(CE_WARN, "p_online_internal error not handled "
268 		    "rv = %d\n", rv);
269 	}
270 
271 	resp_msg.req_num = msg->req_num;
272 	resp_msg.result = FMA_CPU_RESP_OK;
273 
274 	switch (cpu_status) {
275 	case P_OFFLINE:
276 	case P_FAULTED:
277 	case P_POWEROFF:
278 	case P_SPARE:
279 		resp_msg.status = FMA_CPU_STAT_OFFLINE;
280 		break;
281 	case P_ONLINE:
282 	case P_NOINTR:
283 		resp_msg.status = FMA_CPU_STAT_ONLINE;
284 		break;
285 	default:
286 		resp_msg.status = FMA_CPU_STAT_ILLEGAL;
287 	}
288 
289 	if ((rv = ds_cap_send(cpu_handle, &resp_msg,
290 	    sizeof (resp_msg))) != 0) {
291 		FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n", rv);
292 	}
293 }
294 
295 static void
mem_data_handler(ds_cb_arg_t arg,void * buf,size_t buflen)296 mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
297 {
298 	_NOTE(ARGUNUSED(arg))
299 
300 	fma_mem_service_req_t	*msg = buf;
301 	fma_mem_resp_t		resp_msg;
302 	int			rv = 0;
303 
304 	/*
305 	 * If the buffer is the wrong size for Mem calls or is NULL then
306 	 * do not return any message. The call from the ldom mgr. will time out
307 	 * and the response will be NULL.
308 	 */
309 	if (msg == NULL || buflen != sizeof (fma_mem_service_req_t)) {
310 		return;
311 	}
312 
313 	FI_DBG(CE_CONT, "req_num = %ld, msg_type = %d, memory addr = 0x%lx"
314 	"memory length = 0x%lx\n", msg->req_num, msg->msg_type,
315 	    msg->real_addr, msg->length);
316 
317 	resp_msg.req_num = msg->req_num;
318 	resp_msg.res_addr = msg->real_addr;
319 	resp_msg.res_length = msg->length;
320 
321 	/*
322 	 * Information about return values for page calls can be referenced
323 	 * in usr/src/uts/common/vm/page_retire.c
324 	 */
325 	switch (msg->msg_type) {
326 	case FMA_MEM_REQ_STATUS:
327 		rv = page_retire_check(msg->real_addr, NULL);
328 		switch (rv) {
329 		/* Page is retired */
330 		case 0:
331 			resp_msg.result = FMA_MEM_RESP_OK;
332 			resp_msg.status = FMA_MEM_STAT_RETIRED;
333 			break;
334 		/* Page is pending. Send back failure and not retired */
335 		case EAGAIN:
336 			resp_msg.result = FMA_MEM_RESP_FAILURE;
337 			resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
338 			break;
339 		/* Page is not retired. */
340 		case EIO:
341 			resp_msg.result = FMA_MEM_RESP_OK;
342 			resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
343 			break;
344 		/* PA is not valid */
345 		case EINVAL:
346 			resp_msg.result = FMA_MEM_RESP_FAILURE;
347 			resp_msg.status = FMA_MEM_STAT_ILLEGAL;
348 			break;
349 		default:
350 			ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) ||
351 			    (rv ==  EINVAL));
352 			cmn_err(CE_WARN, "fault_iso: return value from "
353 			    "page_retire_check invalid: %d\n", rv);
354 		}
355 		break;
356 	case FMA_MEM_REQ_RETIRE:
357 		rv = page_retire(msg->real_addr, PR_FMA);
358 		switch (rv) {
359 		/* Page retired successfully */
360 		case 0:
361 			resp_msg.result = FMA_MEM_RESP_OK;
362 			resp_msg.status = FMA_MEM_STAT_RETIRED;
363 			break;
364 		/* Tried to retire and now Pending retirement */
365 		case EAGAIN:
366 			resp_msg.result = FMA_MEM_RESP_FAILURE;
367 			resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
368 			break;
369 		/* Did not try to retire. Page already retired */
370 		case EIO:
371 			resp_msg.result = FMA_MEM_RESP_FAILURE;
372 			resp_msg.status = FMA_MEM_STAT_RETIRED;
373 			break;
374 		/* PA is not valid */
375 		case EINVAL:
376 			resp_msg.result = FMA_MEM_RESP_FAILURE;
377 			resp_msg.status = FMA_MEM_STAT_ILLEGAL;
378 			break;
379 		default:
380 			ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) ||
381 			    (rv ==  EINVAL));
382 			cmn_err(CE_WARN, "fault_iso: return value from "
383 			    "page_retire invalid: %d\n", rv);
384 		}
385 		break;
386 	case FMA_MEM_REQ_RESURRECT:
387 		rv = page_unretire(msg->real_addr);
388 		switch (rv) {
389 		/* Page succesfullly unretired */
390 		case 0:
391 			resp_msg.result = FMA_MEM_RESP_OK;
392 			resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
393 			break;
394 		/* Page could not be locked. Still retired */
395 		case EAGAIN:
396 			resp_msg.result = FMA_MEM_RESP_FAILURE;
397 			resp_msg.status = FMA_MEM_STAT_RETIRED;
398 			break;
399 		/* Page was not retired already */
400 		case EIO:
401 			resp_msg.result = FMA_MEM_RESP_FAILURE;
402 			resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
403 			break;
404 		/* PA is not valid */
405 		case EINVAL:
406 			resp_msg.result = FMA_MEM_RESP_FAILURE;
407 			resp_msg.status = FMA_MEM_STAT_ILLEGAL;
408 			break;
409 		default:
410 			ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) ||
411 			    (rv ==  EINVAL));
412 			cmn_err(CE_WARN, "fault_iso: return value from "
413 			    "page_unretire invalid: %d\n", rv);
414 		}
415 		break;
416 	default:
417 		/*
418 		 * If the msg_type was of unknown type simply return and
419 		 * have the ldom mgr. time out with a NULL response.
420 		 */
421 		return;
422 	}
423 
424 	if ((rv = ds_cap_send(mem_handle, &resp_msg, sizeof (resp_msg))) != 0) {
425 		FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n", rv);
426 	}
427 }
428 
429 static void
fi_reg_handler(ds_cb_arg_t arg,ds_ver_t * ver,ds_svc_hdl_t hdl)430 fi_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
431 {
432 	FI_DBG(CE_CONT, "fi_reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n",
433 	    arg, ver->major, ver->minor, hdl);
434 
435 	if ((ds_svc_hdl_t *)arg == &cpu_handle)
436 		cpu_handle = hdl;
437 	if ((ds_svc_hdl_t *)arg == &mem_handle)
438 		mem_handle = hdl;
439 }
440 
441 static void
fi_unreg_handler(ds_cb_arg_t arg)442 fi_unreg_handler(ds_cb_arg_t arg)
443 {
444 	FI_DBG(CE_CONT, "fi_unreg_handler: arg=0x%p\n", arg);
445 
446 	if ((ds_svc_hdl_t *)arg == &cpu_handle)
447 		cpu_handle = DS_INVALID_HDL;
448 	if ((ds_svc_hdl_t *)arg == &mem_handle)
449 		mem_handle = DS_INVALID_HDL;
450 }
451