1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * sun4v Fault Isolation Services Module 29 */ 30 31 #include <sys/modctl.h> 32 #include <sys/cmn_err.h> 33 #include <sys/machsystm.h> 34 #include <sys/processor.h> 35 #include <sys/mem.h> 36 #include <vm/page.h> 37 #include <sys/note.h> 38 #include <sys/ds.h> 39 #include <sys/fault_iso.h> 40 41 /* 42 * Debugging routines 43 */ 44 #ifdef DEBUG 45 uint_t fi_debug = 0x0; 46 #define FI_DBG if (fi_debug) cmn_err 47 #else /* DEBUG */ 48 #define FI_DBG _NOTE(CONSTCOND) if (0) cmn_err 49 #endif /* DEBUG */ 50 51 /* 52 * Domains Services interaction 53 */ 54 static ds_svc_hdl_t cpu_handle; 55 static ds_svc_hdl_t mem_handle; 56 57 static ds_ver_t fi_vers[] = { { 1, 0 } }; 58 #define FI_NVERS (sizeof (fi_vers) / sizeof (fi_vers[0])) 59 60 static ds_capability_t cpu_cap = { 61 "fma-cpu-service", /* svc_id */ 62 fi_vers, /* vers */ 63 FI_NVERS /* nvers */ 64 }; 65 66 static ds_capability_t mem_cap = { 67 "fma-mem-service", /* svc_id */ 68 fi_vers, /* vers */ 69 FI_NVERS /* nvers */ 70 }; 71 72 static void fi_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl); 73 static void fi_unreg_handler(ds_cb_arg_t arg); 74 75 static void cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen); 76 static void mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen); 77 78 static ds_clnt_ops_t cpu_ops = { 79 fi_reg_handler, /* ds_reg_cb */ 80 fi_unreg_handler, /* ds_unreg_cb */ 81 cpu_data_handler, /* ds_data_cb */ 82 &cpu_handle /* cb_arg */ 83 }; 84 85 static ds_clnt_ops_t mem_ops = { 86 fi_reg_handler, /* ds_reg_cb */ 87 fi_unreg_handler, /* ds_unreg_cb */ 88 mem_data_handler, /* ds_data_cb */ 89 &mem_handle /* cb_arg */ 90 }; 91 92 static int fi_init(void); 93 static void fi_fini(void); 94 95 static struct modlmisc modlmisc = { 96 &mod_miscops, 97 "sun4v Fault Isolation Services" 98 }; 99 100 static struct modlinkage modlinkage = { 101 MODREV_1, 102 (void *)&modlmisc, 103 NULL 104 }; 105 106 int 107 _init(void) 108 { 109 int rv; 110 111 if ((rv = fi_init()) != 0) 112 return (rv); 113 114 if ((rv = mod_install(&modlinkage)) != 0) 115 fi_fini(); 116 117 return (rv); 118 } 119 120 int 121 _info(struct modinfo *modinfop) 122 { 123 return (mod_info(&modlinkage, modinfop)); 124 } 125 126 int fi_allow_unload; 127 128 int 129 _fini(void) 130 { 131 int status; 132 133 if (fi_allow_unload == 0) 134 return (EBUSY); 135 136 if ((status = mod_remove(&modlinkage)) == 0) 137 fi_fini(); 138 139 return (status); 140 } 141 142 static int 143 fi_init(void) 144 { 145 int rv; 146 147 /* register CPU service with domain services framework */ 148 rv = ds_cap_init(&cpu_cap, &cpu_ops); 149 if (rv != 0) { 150 FI_DBG(CE_CONT, "ds_cap_init failed: %d", rv); 151 return (rv); 152 } 153 154 /* register MEM servicewith domain services framework */ 155 rv = ds_cap_init(&mem_cap, &mem_ops); 156 if (rv != 0) { 157 FI_DBG(CE_CONT, "ds_cap_init failed: %d", rv); 158 (void) ds_cap_fini(&cpu_cap); 159 return (rv); 160 } 161 162 return (rv); 163 } 164 165 static void 166 fi_fini(void) 167 { 168 /* 169 * Stop incoming requests from Zeus 170 */ 171 (void) ds_cap_fini(&cpu_cap); 172 (void) ds_cap_fini(&mem_cap); 173 } 174 175 static void 176 cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen) 177 { 178 _NOTE(ARGUNUSED(arg)) 179 180 fma_cpu_service_req_t *msg = buf; 181 fma_cpu_resp_t resp_msg; 182 int rv = 0; 183 int cpu_status; 184 int resp_back = 0; 185 186 /* 187 * If the buffer is the wrong size for CPU calls or is NULL then 188 * do not return any message. The call from the ldom mgr. will time out 189 * and the response will be NULL. 190 */ 191 if (msg == NULL || buflen != sizeof (fma_cpu_service_req_t)) { 192 return; 193 } 194 195 FI_DBG(CE_CONT, "req_num = %ld, msg_type = %d, cpu_id = %d\n", 196 msg->req_num, msg->msg_type, msg->cpu_id); 197 198 resp_msg.req_num = msg->req_num; 199 200 switch (msg->msg_type) { 201 case FMA_CPU_REQ_STATUS: 202 rv = p_online_internal(msg->cpu_id, P_STATUS, 203 &cpu_status); 204 if (rv == EINVAL) { 205 FI_DBG(CE_CONT, "Failed p_online call failed." 206 "Invalid CPU\n"); 207 resp_msg.result = FMA_CPU_RESP_FAILURE; 208 resp_msg.status = FMA_CPU_STAT_ILLEGAL; 209 resp_back = 1; 210 } 211 break; 212 case FMA_CPU_REQ_OFFLINE: 213 rv = p_online_internal(msg->cpu_id, P_FAULTED, 214 &cpu_status); 215 if (rv == EINVAL) { 216 FI_DBG(CE_CONT, "Failed p_online call failed." 217 "Invalid CPU\n"); 218 resp_msg.result = FMA_CPU_RESP_FAILURE; 219 resp_msg.status = FMA_CPU_STAT_ILLEGAL; 220 resp_back = 1; 221 } else if (rv == EBUSY) { 222 FI_DBG(CE_CONT, "Failed p_online call failed." 223 "Tried to offline while busy\n"); 224 resp_msg.result = FMA_CPU_RESP_FAILURE; 225 resp_msg.status = FMA_CPU_STAT_ONLINE; 226 resp_back = 1; 227 } 228 break; 229 case FMA_CPU_REQ_ONLINE: 230 rv = p_online_internal(msg->cpu_id, P_ONLINE, 231 &cpu_status); 232 if (rv == EINVAL) { 233 FI_DBG(CE_CONT, "Failed p_online call failed." 234 "Invalid CPU\n"); 235 resp_msg.result = FMA_CPU_RESP_FAILURE; 236 resp_msg.status = FMA_CPU_STAT_ILLEGAL; 237 resp_back = 1; 238 } else if (rv == ENOTSUP) { 239 FI_DBG(CE_CONT, "Failed p_online call failed." 240 "Online not supported for single CPU\n"); 241 resp_msg.result = FMA_CPU_RESP_FAILURE; 242 resp_msg.status = FMA_CPU_STAT_OFFLINE; 243 resp_back = 1; 244 } 245 break; 246 default: 247 /* 248 * If the msg_type was of unknown type simply return and 249 * have the ldom mgr. time out with a NULL response. 250 */ 251 return; 252 } 253 254 if (rv != 0) { 255 if (resp_back) { 256 if ((rv = ds_cap_send(cpu_handle, &resp_msg, 257 sizeof (resp_msg))) != 0) { 258 FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n", 259 rv); 260 } 261 return; 262 } 263 ASSERT((rv == EINVAL) || ((rv == EBUSY) && 264 (msg->msg_type == FMA_CPU_REQ_OFFLINE)) || 265 ((rv == ENOTSUP) && (msg->msg_type == FMA_CPU_REQ_ONLINE))); 266 267 cmn_err(CE_WARN, "p_online_internal error not handled " 268 "rv = %d\n", rv); 269 } 270 271 resp_msg.req_num = msg->req_num; 272 resp_msg.result = FMA_CPU_RESP_OK; 273 274 switch (cpu_status) { 275 case P_OFFLINE: 276 case P_FAULTED: 277 case P_POWEROFF: 278 case P_SPARE: 279 resp_msg.status = FMA_CPU_STAT_OFFLINE; 280 break; 281 case P_ONLINE: 282 case P_NOINTR: 283 resp_msg.status = FMA_CPU_STAT_ONLINE; 284 break; 285 default: 286 resp_msg.status = FMA_CPU_STAT_ILLEGAL; 287 } 288 289 if ((rv = ds_cap_send(cpu_handle, &resp_msg, 290 sizeof (resp_msg))) != 0) { 291 FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n", rv); 292 } 293 } 294 295 static void 296 mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen) 297 { 298 _NOTE(ARGUNUSED(arg)) 299 300 fma_mem_service_req_t *msg = buf; 301 fma_mem_resp_t resp_msg; 302 int rv = 0; 303 304 /* 305 * If the buffer is the wrong size for Mem calls or is NULL then 306 * do not return any message. The call from the ldom mgr. will time out 307 * and the response will be NULL. 308 */ 309 if (msg == NULL || buflen != sizeof (fma_mem_service_req_t)) { 310 return; 311 } 312 313 FI_DBG(CE_CONT, "req_num = %ld, msg_type = %d, memory addr = 0x%lx" 314 "memory length = 0x%lx\n", msg->req_num, msg->msg_type, 315 msg->real_addr, msg->length); 316 317 resp_msg.req_num = msg->req_num; 318 resp_msg.res_addr = msg->real_addr; 319 resp_msg.res_length = msg->length; 320 321 /* 322 * Information about return values for page calls can be referenced 323 * in usr/src/uts/common/vm/page_retire.c 324 */ 325 switch (msg->msg_type) { 326 case FMA_MEM_REQ_STATUS: 327 rv = page_retire_check(msg->real_addr, NULL); 328 switch (rv) { 329 /* Page is retired */ 330 case 0: 331 resp_msg.result = FMA_MEM_RESP_OK; 332 resp_msg.status = FMA_MEM_STAT_RETIRED; 333 break; 334 /* Page is pending. Send back failure and not retired */ 335 case EAGAIN: 336 resp_msg.result = FMA_MEM_RESP_FAILURE; 337 resp_msg.status = FMA_MEM_STAT_NOTRETIRED; 338 break; 339 /* Page is not retired. */ 340 case EIO: 341 resp_msg.result = FMA_MEM_RESP_OK; 342 resp_msg.status = FMA_MEM_STAT_NOTRETIRED; 343 break; 344 /* PA is not valid */ 345 case EINVAL: 346 resp_msg.result = FMA_MEM_RESP_FAILURE; 347 resp_msg.status = FMA_MEM_STAT_ILLEGAL; 348 break; 349 default: 350 ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) || 351 (rv == EINVAL)); 352 cmn_err(CE_WARN, "fault_iso: return value from " 353 "page_retire_check invalid: %d\n", rv); 354 } 355 break; 356 case FMA_MEM_REQ_RETIRE: 357 rv = page_retire(msg->real_addr, PR_FMA); 358 switch (rv) { 359 /* Page retired successfully */ 360 case 0: 361 resp_msg.result = FMA_MEM_RESP_OK; 362 resp_msg.status = FMA_MEM_STAT_RETIRED; 363 break; 364 /* Tried to retire and now Pending retirement */ 365 case EAGAIN: 366 resp_msg.result = FMA_MEM_RESP_FAILURE; 367 resp_msg.status = FMA_MEM_STAT_NOTRETIRED; 368 break; 369 /* Did not try to retire. Page already retired */ 370 case EIO: 371 resp_msg.result = FMA_MEM_RESP_FAILURE; 372 resp_msg.status = FMA_MEM_STAT_RETIRED; 373 break; 374 /* PA is not valid */ 375 case EINVAL: 376 resp_msg.result = FMA_MEM_RESP_FAILURE; 377 resp_msg.status = FMA_MEM_STAT_ILLEGAL; 378 break; 379 default: 380 ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) || 381 (rv == EINVAL)); 382 cmn_err(CE_WARN, "fault_iso: return value from " 383 "page_retire invalid: %d\n", rv); 384 } 385 break; 386 case FMA_MEM_REQ_RESURRECT: 387 rv = page_unretire(msg->real_addr); 388 switch (rv) { 389 /* Page succesfullly unretired */ 390 case 0: 391 resp_msg.result = FMA_MEM_RESP_OK; 392 resp_msg.status = FMA_MEM_STAT_NOTRETIRED; 393 break; 394 /* Page could not be locked. Still retired */ 395 case EAGAIN: 396 resp_msg.result = FMA_MEM_RESP_FAILURE; 397 resp_msg.status = FMA_MEM_STAT_RETIRED; 398 break; 399 /* Page was not retired already */ 400 case EIO: 401 resp_msg.result = FMA_MEM_RESP_FAILURE; 402 resp_msg.status = FMA_MEM_STAT_NOTRETIRED; 403 break; 404 /* PA is not valid */ 405 case EINVAL: 406 resp_msg.result = FMA_MEM_RESP_FAILURE; 407 resp_msg.status = FMA_MEM_STAT_ILLEGAL; 408 break; 409 default: 410 ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) || 411 (rv == EINVAL)); 412 cmn_err(CE_WARN, "fault_iso: return value from " 413 "page_unretire invalid: %d\n", rv); 414 } 415 break; 416 default: 417 /* 418 * If the msg_type was of unknown type simply return and 419 * have the ldom mgr. time out with a NULL response. 420 */ 421 return; 422 } 423 424 if ((rv = ds_cap_send(mem_handle, &resp_msg, sizeof (resp_msg))) != 0) { 425 FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n", rv); 426 } 427 } 428 429 static void 430 fi_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl) 431 { 432 FI_DBG(CE_CONT, "fi_reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", 433 arg, ver->major, ver->minor, hdl); 434 435 if ((ds_svc_hdl_t *)arg == &cpu_handle) 436 cpu_handle = hdl; 437 if ((ds_svc_hdl_t *)arg == &mem_handle) 438 mem_handle = hdl; 439 } 440 441 static void 442 fi_unreg_handler(ds_cb_arg_t arg) 443 { 444 FI_DBG(CE_CONT, "fi_unreg_handler: arg=0x%p\n", arg); 445 446 if ((ds_svc_hdl_t *)arg == &cpu_handle) 447 cpu_handle = DS_INVALID_HDL; 448 if ((ds_svc_hdl_t *)arg == &mem_handle) 449 mem_handle = DS_INVALID_HDL; 450 } 451