1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <strings.h> 29 #include <string.h> 30 #include <errno.h> 31 #include <fm/fmd_api.h> 32 #include <sys/fm/protocol.h> 33 #include <sys/async.h> 34 #include <sys/time.h> 35 #include <cmd.h> 36 #include <cmd_state.h> 37 #include <cmd_mem.h> 38 #include <cmd_dp.h> 39 #include <cmd_dp_page.h> 40 #include <libnvpair.h> 41 #include <fcntl.h> 42 #include <unistd.h> 43 #include <sys/mem.h> 44 #include <sys/plat_datapath.h> 45 46 /*ARGSUSED*/ 47 static nvlist_t * 48 dp_cpu_fmri(fmd_hdl_t *hdl, uint32_t cpuid, uint64_t serial_id) 49 { 50 nvlist_t *nvl = NULL; 51 int err; 52 char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */ 53 54 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) 55 return (NULL); 56 57 err = nvlist_add_string(nvl, FM_FMRI_SCHEME, FM_FMRI_SCHEME_CPU); 58 err |= nvlist_add_uint8(nvl, FM_VERSION, FM_CPU_SCHEME_VERSION); 59 err |= nvlist_add_uint32(nvl, FM_FMRI_CPU_ID, cpuid); 60 61 /* 62 * Version 1 calls for a string-based serial number 63 */ 64 (void) snprintf(sbuf, sizeof (sbuf), "%llX", (u_longlong_t)serial_id); 65 err |= nvlist_add_string(nvl, FM_FMRI_CPU_SERIAL_ID, sbuf); 66 if (err != 0) { 67 nvlist_free(nvl); 68 return (NULL); 69 } 70 return (nvl); 71 } 72 73 cmd_dp_t * 74 cmd_dp_lookup_fault(fmd_hdl_t *hdl, uint32_t cpuid) 75 { 76 cmd_dp_t *ptr; 77 int i, found = 0; 78 79 /* 80 * Scan the cmd.cmd_datapaths list to see if there is 81 * a fault event present that impacts 'cpuid' 82 */ 83 for (ptr = cmd_list_next(&cmd.cmd_datapaths); ptr != NULL; 84 ptr = cmd_list_next(ptr)) { 85 if (ptr->dp_erpt_type == DP_FAULT) { 86 for (i = 0; i < ptr->dp_ncpus; i++) { 87 if (ptr->dp_cpuid_list[i] == cpuid) { 88 found = 1; 89 break; 90 } 91 } 92 } 93 if (found) 94 break; 95 } 96 97 /* 98 * Check if the FMRI for the found cpuid exists in the domain. 99 * If it does not, it implies a DR has been done and this DP_FAULT 100 * is no longer needed. 101 */ 102 if (ptr != NULL) { 103 nvlist_t *nvl; 104 105 nvl = dp_cpu_fmri(hdl, ptr->dp_cpuid_list[i], 106 ptr->dp_serid_list[i]); 107 108 if (nvl != NULL) { 109 if (!fmd_nvl_fmri_present(hdl, nvl)) { 110 cmd_dp_destroy(hdl, ptr); 111 ptr = NULL; 112 } 113 nvlist_free(nvl); 114 } 115 } 116 return (ptr); 117 } 118 119 cmd_dp_t * 120 cmd_dp_lookup_error(cmd_dp_t *dp) 121 { 122 cmd_dp_t *ptr; 123 124 /* 125 * Scan the cmd.cmd_datapaths list to see if there is 126 * an existing error that matches 'dp'. A match is if 127 * both dp_err and the base cpuid are identical 128 */ 129 for (ptr = cmd_list_next(&cmd.cmd_datapaths); ptr != NULL; 130 ptr = cmd_list_next(ptr)) { 131 if (ptr->dp_erpt_type == DP_ERROR) { 132 if ((ptr->dp_err == dp->dp_err) && 133 (ptr->dp_cpuid_list[0] == dp->dp_cpuid_list[0])) 134 return (ptr); 135 } 136 } 137 return (NULL); 138 } 139 140 /* 141 * Allocates an nvlist_t, and sets ASRU information according to 142 * the cmd_dp_t provided. 143 */ 144 /*ARGSUSED*/ 145 nvlist_t * 146 cmd_dp_setasru(fmd_hdl_t *hdl, cmd_dp_t *dpt) 147 { 148 nvlist_t *asru, *hcelem[DP_MAX_ASRUS]; 149 int i, j, sz, err; 150 char buf[DP_MAX_BUF]; 151 152 sz = dpt->dp_ncpus; 153 154 /* put ASRUs in an nvlist */ 155 for (i = 0; i < sz; i++) { 156 (void) snprintf(buf, DP_MAX_BUF, "%d", dpt->dp_cpuid_list[i]); 157 if (nvlist_alloc(&hcelem[i], NV_UNIQUE_NAME, 0) != 0) 158 return (NULL); 159 160 err = nvlist_add_string(hcelem[i], FM_FMRI_HC_NAME, 161 FM_FMRI_CPU_ID); 162 err |= nvlist_add_string(hcelem[i], FM_FMRI_HC_ID, buf); 163 if (err != 0) { 164 for (j = 0; j < i + 1; j++) 165 nvlist_free(hcelem[j]); 166 return (NULL); 167 } 168 } 169 170 /* put it in an HC scheme */ 171 if (nvlist_alloc(&asru, NV_UNIQUE_NAME, 0) != 0) { 172 for (j = 0; j < sz; j++) 173 nvlist_free(hcelem[j]); 174 return (NULL); 175 } 176 err = nvlist_add_uint8(asru, FM_VERSION, FM_HC_SCHEME_VERSION); 177 err |= nvlist_add_string(asru, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 178 err |= nvlist_add_string(asru, FM_FMRI_HC_ROOT, ""); 179 err |= nvlist_add_uint32(asru, FM_FMRI_HC_LIST_SZ, sz); 180 err |= nvlist_add_nvlist_array(asru, FM_FMRI_HC_LIST, &hcelem[0], 181 dpt->dp_ncpus); 182 if (err != 0) { 183 for (j = 0; j < sz; j++) 184 nvlist_free(hcelem[j]); 185 nvlist_free(asru); 186 return (NULL); 187 } 188 189 /* free up memory */ 190 for (j = 0; j < sz; j++) 191 nvlist_free(hcelem[j]); 192 193 /* return the ASRU */ 194 return (asru); 195 } 196 197 void 198 dp_buf_write(fmd_hdl_t *hdl, cmd_dp_t *dp) 199 { 200 size_t sz; 201 202 if ((sz = fmd_buf_size(hdl, NULL, dp->dp_bufname)) != 0 && 203 sz != sizeof (cmd_dp_pers_t)) 204 fmd_buf_destroy(hdl, NULL, dp->dp_bufname); 205 206 fmd_buf_write(hdl, NULL, dp->dp_bufname, &dp->dp_pers, 207 sizeof (cmd_dp_pers_t)); 208 } 209 210 static cmd_dp_t * 211 dp_wrapv0(fmd_hdl_t *hdl, cmd_dp_pers_t *pers, size_t psz) 212 { 213 cmd_dp_t *dp; 214 215 if (psz != sizeof (cmd_dp_pers_t)) { 216 fmd_hdl_abort(hdl, "size of state doesn't match size of " 217 "version 1 state (%u bytes).\n", sizeof (cmd_dp_pers_t)); 218 } 219 220 dp = fmd_hdl_zalloc(hdl, sizeof (cmd_dp_t), FMD_SLEEP); 221 bcopy(pers, dp, sizeof (cmd_dp_pers_t)); 222 fmd_hdl_free(hdl, pers, psz); 223 return (dp); 224 } 225 226 void * 227 cmd_dp_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr) 228 { 229 cmd_dp_t *dp; 230 231 for (dp = cmd_list_next(&cmd.cmd_datapaths); dp != NULL; 232 dp = cmd_list_next(dp)) { 233 if (dp->dp_case == cp) 234 break; 235 } 236 237 if (dp == NULL) { 238 size_t dpsz; 239 240 fmd_hdl_debug(hdl, "restoring dp from %s\n", ptr->ptr_name); 241 242 if ((dpsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) { 243 if (fmd_case_solved(hdl, cp) || 244 fmd_case_closed(hdl, cp)) { 245 fmd_hdl_debug(hdl, "dp %s from case %s not " 246 "found. Case is already solved or closed\n", 247 ptr->ptr_name, fmd_case_uuid(hdl, cp)); 248 return (NULL); 249 } else { 250 fmd_hdl_abort(hdl, "dp referenced by case %s " 251 "does not exist in saved state\n", 252 fmd_case_uuid(hdl, cp)); 253 } 254 } else if (dpsz > CMD_DP_MAXSIZE || 255 dpsz < CMD_DP_MINSIZE) { 256 fmd_hdl_abort(hdl, "dp buffer referenced by " 257 "case %s is out of bounds (is %u bytes, " 258 "max %u, min %u)\n", fmd_case_uuid(hdl, cp), 259 dpsz, CMD_DP_MAXSIZE, CMD_DP_MINSIZE); 260 } 261 262 if ((dp = cmd_buf_read(hdl, NULL, ptr->ptr_name, dpsz)) == NULL) 263 fmd_hdl_abort(hdl, "failed to read dp buf %s", 264 ptr->ptr_name); 265 266 switch (dp->dp_version) { 267 case CMD_DP_VERSION_0: 268 dp = dp_wrapv0(hdl, (cmd_dp_pers_t *)dp, dpsz); 269 break; 270 default: 271 fmd_hdl_abort(hdl, "unknown version (found %d) " 272 "for dp state referenced by case %s.\n", 273 dp->dp_version, fmd_case_uuid(hdl, cp)); 274 break; 275 } 276 277 dp->dp_case = cp; 278 279 if (dp->dp_erpt_type == DP_ERROR) { 280 fmd_event_t *ep = fmd_case_getprincipal(hdl, cp); 281 282 ++cmd.cmd_dp_flag; 283 284 dp->dp_id = fmd_timer_install(hdl, 285 (void *)CMD_TIMERTYPE_DP, ep, 286 (hrtime_t)NANOSEC * (dp->dp_t_value + 120)); 287 } 288 289 cmd_list_append(&cmd.cmd_datapaths, dp); 290 } 291 292 return (dp); 293 } 294 295 void 296 cmd_dp_close(fmd_hdl_t *hdl, void *arg) 297 { 298 cmd_dp_destroy(hdl, arg); 299 } 300 301 void 302 cmd_dp_timeout(fmd_hdl_t *hdl, id_t id) 303 { 304 cmd_dp_t *dp; 305 306 /* close case associated with the timer */ 307 for (dp = cmd_list_next(&cmd.cmd_datapaths); dp != NULL; 308 dp = cmd_list_next(dp)) { 309 if (dp->dp_id == id) { 310 cmd_dp_destroy(hdl, dp); 311 break; 312 } 313 } 314 315 fmd_hdl_debug(hdl, "cmd_dp_timeout() complete\n"); 316 } 317 318 /* 319 * Validate by matching each cmd_dp_t cpu and serial id to what is 320 * installed and active on this machine or domain. Delete the cmd_dp_t 321 * if no match is made. 322 */ 323 void 324 cmd_dp_validate(fmd_hdl_t *hdl) 325 { 326 cmd_dp_t *dp, *next; 327 nvlist_t *nvl; 328 int i, no_match; 329 330 for (dp = cmd_list_next(&cmd.cmd_datapaths); dp != NULL; dp = next) { 331 next = cmd_list_next(dp); 332 333 for (i = 0, no_match = 0; i < dp->dp_ncpus; i++) { 334 nvl = dp_cpu_fmri(hdl, dp->dp_cpuid_list[i], 335 dp->dp_serid_list[i]); 336 337 if (nvl == NULL) 338 fmd_hdl_abort(hdl, "could not make CPU fmri"); 339 340 if (!fmd_nvl_fmri_present(hdl, nvl)) 341 no_match = 1; 342 343 nvlist_free(nvl); 344 345 if (no_match) { 346 cmd_dp_destroy(hdl, dp); 347 break; 348 } 349 } 350 } 351 } 352 353 static void 354 cmd_dp_free(fmd_hdl_t *hdl, cmd_dp_t *dp, int destroy) 355 { 356 if (dp->dp_case != NULL) 357 cmd_case_fini(hdl, dp->dp_case, destroy); 358 359 if (destroy && dp->dp_erpt_type == DP_ERROR) { 360 --cmd.cmd_dp_flag; 361 /* 362 * If there are no active datapath events, replay any 363 * pages that were deferred. 364 */ 365 if (cmd.cmd_dp_flag == 0) 366 cmd_dp_page_replay(hdl); 367 } 368 369 if (destroy) 370 fmd_buf_destroy(hdl, NULL, dp->dp_bufname); 371 372 cmd_list_delete(&cmd.cmd_datapaths, dp); 373 fmd_hdl_free(hdl, dp, sizeof (cmd_dp_t)); 374 } 375 376 void 377 cmd_dp_destroy(fmd_hdl_t *hdl, cmd_dp_t *dp) 378 { 379 cmd_dp_free(hdl, dp, FMD_B_TRUE); 380 } 381 382 /*ARGSUSED*/ 383 int 384 cmd_dp_error(fmd_hdl_t *hdl) 385 { 386 if (cmd.cmd_dp_flag) 387 return (1); 388 else 389 return (0); 390 } 391 392 int 393 cmd_dp_get_mcid(uint64_t addr, int *mcid) 394 { 395 int fd, rc; 396 mem_info_t data; 397 398 if ((fd = open("/dev/mem", O_RDONLY)) < 0) 399 return (-1); 400 401 data.m_addr = addr; 402 data.m_synd = 0; 403 if ((rc = ioctl(fd, MEM_INFO, &data)) < 0) { 404 (void) close(fd); 405 return (rc); 406 } 407 408 (void) close(fd); 409 *mcid = data.m_mcid; 410 411 return (0); 412 } 413 414 /*ARGSUSED*/ 415 int 416 cmd_dp_fault(fmd_hdl_t *hdl, uint64_t addr) 417 { 418 int mcid; 419 420 if (cmd_dp_get_mcid(addr, &mcid) < 0) 421 fmd_hdl_abort(hdl, "cmd_dp_get_mcid failed"); 422 423 if (cmd_dp_lookup_fault(hdl, mcid) != NULL) 424 return (1); 425 else 426 return (0); 427 } 428 429 void 430 cmd_dp_fini(fmd_hdl_t *hdl) 431 { 432 cmd_dp_t *dp; 433 cmd_dp_defer_t *dpage; 434 435 while ((dp = cmd_list_next(&cmd.cmd_datapaths)) != NULL) 436 cmd_dp_free(hdl, dp, FMD_B_FALSE); 437 438 while ((dpage = cmd_list_next(&cmd.cmd_deferred_pages)) != NULL) { 439 cmd_list_delete(&cmd.cmd_deferred_pages, dpage); 440 fmd_hdl_free(hdl, dpage, sizeof (cmd_dp_defer_t)); 441 } 442 } 443