1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cred.h> 27 #include <sys/kstat.h> 28 #include <sys/list.h> 29 #include <sys/systm.h> 30 #include <sys/vfs.h> 31 #include <sys/vnode.h> 32 #include <sys/cmn_err.h> 33 34 #include <nfs/nfs4_clnt.h> 35 #include <nfs/rnode4.h> 36 37 /* 38 * Recovery kstats 39 */ 40 typedef struct rkstat { 41 kstat_named_t badhandle; 42 kstat_named_t badowner; 43 kstat_named_t clientid; 44 kstat_named_t dead_file; 45 kstat_named_t delay; 46 kstat_named_t fail_relock; 47 kstat_named_t file_diff; 48 kstat_named_t no_grace; 49 kstat_named_t not_responding; 50 kstat_named_t opens_changed; 51 kstat_named_t siglost; 52 kstat_named_t unexp_action; 53 kstat_named_t unexp_errno; 54 kstat_named_t unexp_status; 55 kstat_named_t wrongsec; 56 kstat_named_t lost_state_bad_op; 57 } rkstat_t; 58 59 static rkstat_t rkstat_template = { 60 { "badhandle", KSTAT_DATA_ULONG }, 61 { "badowner", KSTAT_DATA_ULONG }, 62 { "clientid", KSTAT_DATA_ULONG }, 63 { "dead_file", KSTAT_DATA_ULONG }, 64 { "delay", KSTAT_DATA_ULONG }, 65 { "fail_relock", KSTAT_DATA_ULONG }, 66 { "file_diff", KSTAT_DATA_ULONG }, 67 { "no_grace", KSTAT_DATA_ULONG }, 68 { "not_responding", KSTAT_DATA_ULONG }, 69 { "opens_changed", KSTAT_DATA_ULONG }, 70 { "siglost", KSTAT_DATA_ULONG }, 71 { "unexp_action", KSTAT_DATA_ULONG }, 72 { "unexp_errno", KSTAT_DATA_ULONG }, 73 { "unexp_status", KSTAT_DATA_ULONG }, 74 { "wrongsec", KSTAT_DATA_ULONG }, 75 { "bad_op", KSTAT_DATA_ULONG }, 76 }; 77 78 /* maximum number of messages allowed on the mi's mi_msg_list */ 79 int nfs4_msg_max = NFS4_MSG_MAX; 80 #define DEFAULT_LEASE 180 81 82 /* 83 * Sets the appropiate fields of "ep", given "id" and various parameters. 84 * Assumes that ep's fields have been initialized to zero/null, except for 85 * re_type and mount point info, which are already set. 86 */ 87 static void 88 set_event(nfs4_event_type_t id, nfs4_revent_t *ep, mntinfo4_t *mi, 89 rnode4_t *rp1, rnode4_t *rp2, uint_t count, pid_t pid, nfsstat4 nfs4_error, 90 char *server1, char *why, nfs4_tag_type_t tag1, nfs4_tag_type_t tag2, 91 seqid4 seqid1, seqid4 seqid2) 92 { 93 int len; 94 95 switch (id) { 96 case RE_BAD_SEQID: 97 ep->re_mi = mi; 98 99 /* bad seqid'd file <path/component name> */ 100 if (rp1 && rp1->r_svnode.sv_name) 101 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 102 else 103 ep->re_char1 = NULL; 104 ep->re_rp1 = rp1; 105 106 /* for LOCK/LOCKU */ 107 ep->re_pid = pid; 108 109 ep->re_stat4 = nfs4_error; 110 ep->re_tag1 = tag1; 111 ep->re_tag2 = tag2; 112 ep->re_seqid1 = seqid1; 113 ep->re_seqid2 = seqid2; 114 break; 115 case RE_BADHANDLE: 116 ASSERT(rp1 != NULL); 117 118 /* dead file <path/component name> */ 119 if (rp1->r_svnode.sv_name) 120 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 121 else 122 ep->re_char1 = NULL; 123 ep->re_rp1 = rp1; 124 break; 125 case RE_CLIENTID: 126 ep->re_mi = mi; 127 128 /* the error we failed with */ 129 ep->re_uint = count; 130 ep->re_stat4 = nfs4_error; 131 break; 132 case RE_DEAD_FILE: 133 ASSERT(rp1 != NULL); 134 135 /* dead file <path/component name> */ 136 if (rp1->r_svnode.sv_name) 137 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 138 else 139 ep->re_char1 = NULL; 140 ep->re_rp1 = rp1; 141 142 /* why the file got killed */ 143 if (why) { 144 len = strlen(why); 145 ep->re_char2 = kmem_alloc(len + 1, KM_SLEEP); 146 bcopy(why, ep->re_char2, len); 147 ep->re_char2[len] = '\0'; 148 } else 149 ep->re_char2 = NULL; 150 151 ep->re_stat4 = nfs4_error; 152 break; 153 case RE_END: 154 /* first rnode */ 155 if (rp1 && rp1->r_svnode.sv_name) 156 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 157 else 158 ep->re_char1 = NULL; 159 ep->re_rp1 = rp1; 160 161 /* second rnode */ 162 if (rp2 && rp2->r_svnode.sv_name) 163 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 164 else 165 ep->re_char2 = NULL; 166 ep->re_rp2 = rp2; 167 168 ep->re_mi = mi; 169 break; 170 case RE_FAIL_RELOCK: 171 ASSERT(rp1 != NULL); 172 173 /* error on fail relock */ 174 ep->re_uint = count; 175 176 /* process that failed */ 177 ep->re_pid = pid; 178 179 /* nfs4 error */ 180 ep->re_stat4 = nfs4_error; 181 182 /* file <path/component name> */ 183 if (rp1->r_svnode.sv_name) 184 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 185 else 186 ep->re_char1 = NULL; 187 ep->re_rp1 = rp1; 188 break; 189 case RE_FAIL_REMAP_LEN: 190 /* length of returned filehandle */ 191 ep->re_uint = count; 192 break; 193 case RE_FAIL_REMAP_OP: 194 break; 195 case RE_FAILOVER: 196 /* server we're failing over to (if not picking original) */ 197 if (server1 != NULL) { 198 len = strlen(server1); 199 ep->re_char1 = kmem_alloc(len + 1, KM_SLEEP); 200 bcopy(server1, ep->re_char1, len); 201 ep->re_char1[len] = '\0'; 202 } else { 203 ep->re_char1 = NULL; 204 } 205 break; 206 case RE_FILE_DIFF: 207 ASSERT(rp1 != NULL); 208 209 /* dead file <path/component name> */ 210 if (rp1->r_svnode.sv_name) 211 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 212 else 213 ep->re_char1 = NULL; 214 ep->re_rp1 = rp1; 215 break; 216 case RE_LOST_STATE: 217 ep->re_uint = count; /* op number */ 218 if (rp1 && rp1->r_svnode.sv_name) 219 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 220 else 221 ep->re_char1 = NULL; 222 ep->re_rp1 = rp1; 223 if (rp2 && rp2->r_svnode.sv_name) 224 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 225 else 226 ep->re_char2 = NULL; 227 ep->re_rp2 = rp2; 228 break; 229 case RE_OPENS_CHANGED: 230 ep->re_mi = mi; 231 232 /* original number of open files */ 233 ep->re_uint = count; 234 /* new number of open files */ 235 ep->re_pid = pid; 236 break; 237 case RE_SIGLOST: 238 case RE_SIGLOST_NO_DUMP: 239 ASSERT(rp1 != NULL); 240 241 /* file <path/component name> */ 242 if (rp1->r_svnode.sv_name) 243 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 244 else 245 ep->re_char1 = NULL; 246 ep->re_rp1 = rp1; 247 ep->re_pid = pid; 248 ep->re_uint = count; 249 ep->re_stat4 = nfs4_error; 250 break; 251 case RE_START: 252 /* file <path/component name> */ 253 if (rp1 && rp1->r_svnode.sv_name) 254 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 255 else 256 ep->re_char1 = NULL; 257 ep->re_rp1 = rp1; 258 259 /* file <path/component name> */ 260 if (rp2 && rp2->r_svnode.sv_name) 261 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 262 else 263 ep->re_char2 = NULL; 264 ep->re_rp2 = rp2; 265 266 ep->re_mi = mi; 267 ep->re_uint = count; 268 break; 269 case RE_UNEXPECTED_ACTION: 270 case RE_UNEXPECTED_ERRNO: 271 /* the error that is unexpected */ 272 ep->re_uint = count; 273 break; 274 case RE_UNEXPECTED_STATUS: 275 /* nfsstat4 error */ 276 ep->re_stat4 = nfs4_error; 277 break; 278 case RE_WRONGSEC: 279 /* the error we failed with */ 280 ep->re_uint = count; 281 282 /* file <path/component name> */ 283 if (rp1 && rp1->r_svnode.sv_name) 284 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 285 else 286 ep->re_char1 = NULL; 287 ep->re_rp1 = rp1; 288 289 /* file <path/component name> */ 290 if (rp2 && rp2->r_svnode.sv_name) 291 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 292 else 293 ep->re_char2 = NULL; 294 ep->re_rp2 = rp2; 295 break; 296 case RE_LOST_STATE_BAD_OP: 297 ep->re_uint = count; /* the unexpected op */ 298 ep->re_pid = pid; 299 ep->re_rp1 = rp1; 300 if (rp1 != NULL && rp1->r_svnode.sv_name != NULL) 301 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 302 ep->re_rp2 = rp2; 303 if (rp2 != NULL && rp2->r_svnode.sv_name != NULL) 304 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 305 break; 306 default: 307 break; 308 } 309 } 310 311 /* 312 * Sets the appropiate fields of the 'fact' for this 'id'. 313 */ 314 static void 315 set_fact(nfs4_fact_type_t id, nfs4_rfact_t *fp, nfsstat4 stat4, 316 nfs4_recov_t raction, nfs_opnum4 op, bool_t reboot, int error, 317 vnode_t *vp) 318 { 319 rnode4_t *rp1; 320 321 switch (id) { 322 case RF_BADOWNER: 323 fp->rf_op = op; 324 fp->rf_reboot = reboot; 325 fp->rf_stat4 = stat4; 326 break; 327 case RF_RENEW_EXPIRED: 328 break; 329 case RF_ERR: 330 fp->rf_op = op; 331 fp->rf_reboot = reboot; 332 fp->rf_stat4 = stat4; 333 fp->rf_action = raction; 334 fp->rf_error = error; 335 break; 336 case RF_SRV_OK: 337 break; 338 case RF_SRV_NOT_RESPOND: 339 break; 340 case RF_SRVS_OK: 341 break; 342 case RF_SRVS_NOT_RESPOND: 343 gethrestime(&fp->rf_time); 344 break; 345 case RF_DELMAP_CB_ERR: 346 fp->rf_op = op; 347 fp->rf_stat4 = stat4; 348 349 rp1 = VTOR4(vp); 350 fp->rf_rp1 = rp1; 351 if (rp1 && rp1->r_svnode.sv_name) 352 fp->rf_char1 = fn_path(rp1->r_svnode.sv_name); 353 else 354 fp->rf_char1 = NULL; 355 break; 356 case RF_SENDQ_FULL: 357 break; 358 default: 359 zcmn_err(getzoneid(), CE_NOTE, "illegal fact %d", id); 360 break; 361 } 362 } 363 364 /* 365 * Returns 1 if the event/fact is of a successful communication 366 * from the server; 0 otherwise. 367 */ 368 static int 369 successful_comm(nfs4_debug_msg_t *msgp) 370 { 371 if (msgp->msg_type == RM_EVENT) { 372 switch (msgp->rmsg_u.msg_event.re_type) { 373 case RE_BAD_SEQID: 374 case RE_BADHANDLE: 375 case RE_FAIL_REMAP_LEN: 376 case RE_FAIL_REMAP_OP: 377 case RE_FILE_DIFF: 378 case RE_START: 379 case RE_UNEXPECTED_ACTION: 380 case RE_UNEXPECTED_ERRNO: 381 case RE_UNEXPECTED_STATUS: 382 case RE_WRONGSEC: 383 return (1); 384 case RE_CLIENTID: 385 case RE_DEAD_FILE: 386 case RE_END: 387 case RE_FAIL_RELOCK: 388 case RE_FAILOVER: 389 case RE_LOST_STATE: 390 case RE_OPENS_CHANGED: 391 case RE_SIGLOST: 392 case RE_SIGLOST_NO_DUMP: 393 case RE_LOST_STATE_BAD_OP: 394 return (0); 395 default: 396 return (0); 397 } 398 } else { 399 switch (msgp->rmsg_u.msg_fact.rf_type) { 400 case RF_BADOWNER: 401 case RF_ERR: 402 case RF_RENEW_EXPIRED: 403 case RF_SRV_OK: 404 case RF_SRVS_OK: 405 case RF_DELMAP_CB_ERR: 406 return (1); 407 case RF_SRV_NOT_RESPOND: 408 case RF_SRVS_NOT_RESPOND: 409 case RF_SENDQ_FULL: 410 return (0); 411 default: 412 return (0); 413 } 414 } 415 } 416 417 /* 418 * Iterate backwards through the mi's mi_msg_list to find the earliest 419 * message that we should find relevant facts to investigate. 420 */ 421 static nfs4_debug_msg_t * 422 find_beginning(nfs4_debug_msg_t *first_msg, mntinfo4_t *mi) 423 { 424 nfs4_debug_msg_t *oldest_msg, *cur_msg; 425 time_t lease; 426 427 ASSERT(mutex_owned(&mi->mi_msg_list_lock)); 428 if (mi->mi_lease_period > 0) 429 lease = 2 * mi->mi_lease_period; 430 else 431 lease = DEFAULT_LEASE; 432 433 oldest_msg = first_msg; 434 cur_msg = list_prev(&mi->mi_msg_list, first_msg); 435 while (cur_msg && 436 first_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) { 437 oldest_msg = cur_msg; 438 if ((cur_msg->msg_type == RM_FACT) && 439 (cur_msg->rmsg_u.msg_fact.rf_type == RF_SRV_OK)) { 440 /* find where we lost contact with the server */ 441 while (cur_msg) { 442 if ((cur_msg->msg_type == RM_FACT) && 443 (cur_msg->rmsg_u.msg_fact.rf_type == 444 RF_SRV_NOT_RESPOND)) 445 break; 446 oldest_msg = cur_msg; 447 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 448 } 449 /* 450 * Find the first successful message before 451 * we lost contact with the server. 452 */ 453 if (cur_msg) { 454 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 455 while (cur_msg && !successful_comm(cur_msg)) { 456 oldest_msg = cur_msg; 457 cur_msg = list_prev(&mi->mi_msg_list, 458 cur_msg); 459 } 460 } 461 /* 462 * If we're not at the dummy head pointer, 463 * set the oldest and current message. 464 */ 465 if (cur_msg) { 466 first_msg = cur_msg; 467 oldest_msg = cur_msg; 468 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 469 } 470 } else 471 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 472 } 473 474 return (oldest_msg); 475 } 476 477 /* 478 * Returns 1 if facts have been found; 0 otherwise. 479 */ 480 static int 481 get_facts(nfs4_debug_msg_t *msgp, nfs4_rfact_t *ret_fp, char **mnt_pt, 482 mntinfo4_t *mi) 483 { 484 nfs4_debug_msg_t *cur_msg, *oldest_msg; 485 nfs4_rfact_t *cur_fp; 486 int found_a_fact = 0; 487 int len; 488 489 cur_msg = msgp; 490 491 /* find the oldest msg to search backwards to */ 492 oldest_msg = find_beginning(cur_msg, mi); 493 ASSERT(oldest_msg != NULL); 494 495 /* 496 * Create a fact sheet by searching from our current message 497 * backwards to the 'oldest_msg', recording facts along the way 498 * until we found facts that have been inspected by another time. 499 */ 500 while (cur_msg && cur_msg != list_prev(&mi->mi_msg_list, oldest_msg)) { 501 if (cur_msg->msg_type != RM_FACT) { 502 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 503 continue; 504 } 505 506 cur_fp = &cur_msg->rmsg_u.msg_fact; 507 /* 508 * If this fact has already been looked at, then so 509 * have all preceding facts. Return Now. 510 */ 511 if (cur_fp->rf_status == RFS_INSPECT) 512 return (found_a_fact); 513 514 cur_fp->rf_status = RFS_INSPECT; 515 found_a_fact = 1; 516 switch (cur_fp->rf_type) { 517 case RF_BADOWNER: 518 break; 519 case RF_ERR: 520 /* 521 * Don't want to overwrite a fact that was 522 * previously found during our current search. 523 */ 524 if (!ret_fp->rf_reboot) 525 ret_fp->rf_reboot = cur_fp->rf_reboot; 526 if (!ret_fp->rf_stat4) 527 ret_fp->rf_stat4 = cur_fp->rf_stat4; 528 if (!ret_fp->rf_action) 529 ret_fp->rf_action = cur_fp->rf_action; 530 break; 531 case RF_RENEW_EXPIRED: 532 if (cur_msg->msg_mntpt && !(*mnt_pt)) { 533 len = strlen(cur_msg->msg_mntpt) + 1; 534 *mnt_pt = kmem_alloc(len, KM_SLEEP); 535 bcopy(cur_msg->msg_mntpt, *mnt_pt, len); 536 } 537 break; 538 case RF_SRV_OK: 539 break; 540 case RF_SRV_NOT_RESPOND: 541 /* 542 * Okay to overwrite this fact as 543 * we want the earliest time. 544 */ 545 ret_fp->rf_time = cur_fp->rf_time; 546 break; 547 case RF_SRVS_OK: 548 break; 549 case RF_SRVS_NOT_RESPOND: 550 break; 551 case RF_DELMAP_CB_ERR: 552 break; 553 case RF_SENDQ_FULL: 554 break; 555 default: 556 zcmn_err(getzoneid(), CE_NOTE, 557 "get facts: illegal fact %d", cur_fp->rf_type); 558 break; 559 } 560 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 561 } 562 563 return (found_a_fact); 564 } 565 566 /* 567 * Returns 1 if this fact is identical to the last fact recorded 568 * (only checks for a match within the last 2 lease periods). 569 */ 570 static int 571 facts_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg, 572 mntinfo4_t *mi) 573 { 574 nfs4_rfact_t *fp1, *fp2; 575 int lease, len; 576 577 ASSERT(mutex_owned(&mi->mi_msg_list_lock)); 578 if (mi->mi_lease_period > 0) 579 lease = 2 * mi->mi_lease_period; 580 else 581 lease = DEFAULT_LEASE; 582 583 fp2 = &new_msg->rmsg_u.msg_fact; 584 585 while (cur_msg && 586 new_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) { 587 if (cur_msg->msg_type != RM_FACT) { 588 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 589 continue; 590 } 591 fp1 = &cur_msg->rmsg_u.msg_fact; 592 if (fp1->rf_type != fp2->rf_type) 593 return (0); 594 595 /* now actually compare the facts */ 596 if (fp1->rf_action != fp2->rf_action) 597 return (0); 598 if (fp1->rf_stat4 != fp2->rf_stat4) 599 return (0); 600 if (fp1->rf_reboot != fp2->rf_reboot) 601 return (0); 602 if (fp1->rf_op != fp2->rf_op) 603 return (0); 604 if (fp1->rf_time.tv_sec != fp2->rf_time.tv_sec) 605 return (0); 606 if (fp1->rf_error != fp2->rf_error) 607 return (0); 608 if (fp1->rf_rp1 != fp2->rf_rp1) 609 return (0); 610 if (cur_msg->msg_srv != NULL) { 611 if (new_msg->msg_srv == NULL) 612 return (0); 613 len = strlen(cur_msg->msg_srv); 614 if (strncmp(cur_msg->msg_srv, new_msg->msg_srv, 615 len) != 0) 616 return (0); 617 } else if (new_msg->msg_srv != NULL) { 618 return (0); 619 } 620 if (cur_msg->msg_mntpt != NULL) { 621 if (new_msg->msg_mntpt == NULL) 622 return (0); 623 len = strlen(cur_msg->msg_mntpt); 624 if (strncmp(cur_msg->msg_mntpt, new_msg->msg_mntpt, 625 len) != 0) 626 return (0); 627 } else if (new_msg->msg_mntpt != NULL) { 628 return (0); 629 } 630 if (fp1->rf_char1 != NULL) { 631 if (fp2->rf_char1 == NULL) 632 return (0); 633 len = strlen(fp1->rf_char1); 634 if (strncmp(fp1->rf_char1, fp2->rf_char1, len) != 0) 635 return (0); 636 } else if (fp2->rf_char1 != NULL) { 637 return (0); 638 } 639 return (1); 640 } 641 642 return (0); 643 } 644 645 /* 646 * Returns 1 if these two messages are identical; 0 otherwise. 647 */ 648 static int 649 events_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg, 650 mntinfo4_t *mi) 651 { 652 nfs4_revent_t *ep1, *ep2; 653 int len; 654 655 /* find the last event, bypassing all facts */ 656 while (cur_msg && cur_msg->msg_type != RM_EVENT) 657 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 658 659 if (!cur_msg) 660 return (0); 661 662 if (cur_msg->msg_type != RM_EVENT) 663 return (0); 664 665 ep1 = &cur_msg->rmsg_u.msg_event; 666 ep2 = &new_msg->rmsg_u.msg_event; 667 if (ep1->re_type != ep2->re_type) 668 return (0); 669 670 /* 671 * Since we zalloc the buffer, then the two nfs4_debug_msg's 672 * must match up even if all the fields weren't filled in 673 * the first place. 674 */ 675 if (ep1->re_mi != ep2->re_mi) 676 return (0); 677 if (ep1->re_uint != ep2->re_uint) 678 return (0); 679 if (ep1->re_stat4 != ep2->re_stat4) 680 return (0); 681 if (ep1->re_pid != ep2->re_pid) 682 return (0); 683 if (ep1->re_rp1 != ep2->re_rp1) 684 return (0); 685 if (ep1->re_rp2 != ep2->re_rp2) 686 return (0); 687 if (ep1->re_tag1 != ep2->re_tag1) 688 return (0); 689 if (ep1->re_tag2 != ep2->re_tag2) 690 return (0); 691 if (ep1->re_seqid1 != ep2->re_seqid1) 692 return (0); 693 if (ep1->re_seqid2 != ep2->re_seqid2) 694 return (0); 695 696 if (cur_msg->msg_srv != NULL) { 697 if (new_msg->msg_srv == NULL) 698 return (0); 699 len = strlen(cur_msg->msg_srv); 700 if (strncmp(cur_msg->msg_srv, new_msg->msg_srv, len) != 0) 701 return (0); 702 } else if (new_msg->msg_srv != NULL) { 703 return (0); 704 } 705 706 if (ep1->re_char1 != NULL) { 707 if (ep2->re_char1 == NULL) 708 return (0); 709 len = strlen(ep1->re_char1); 710 if (strncmp(ep1->re_char1, ep2->re_char1, len) != 0) 711 return (0); 712 } else if (ep2->re_char1 != NULL) { 713 return (0); 714 } 715 716 if (ep1->re_char2 != NULL) { 717 if (ep2->re_char2 == NULL) 718 return (0); 719 len = strlen(ep1->re_char2); 720 if (strncmp(ep1->re_char2, ep2->re_char2, len) != 0) 721 return (0); 722 } else if (ep2->re_char2 != NULL) { 723 return (0); 724 } 725 726 if (cur_msg->msg_mntpt != NULL) { 727 if (new_msg->msg_mntpt == NULL) 728 return (0); 729 len = strlen(cur_msg->msg_mntpt); 730 if (strncmp(cur_msg->msg_mntpt, cur_msg->msg_mntpt, len) != 0) 731 return (0); 732 } else if (new_msg->msg_mntpt != NULL) { 733 return (0); 734 } 735 736 return (1); 737 } 738 739 /* 740 * Free up a recovery event. 741 */ 742 static void 743 free_event(nfs4_revent_t *ep) 744 { 745 int len; 746 747 if (ep->re_char1) { 748 len = strlen(ep->re_char1) + 1; 749 kmem_free(ep->re_char1, len); 750 } 751 if (ep->re_char2) { 752 len = strlen(ep->re_char2) + 1; 753 kmem_free(ep->re_char2, len); 754 } 755 } 756 757 /* 758 * Free up a recovery fact. 759 */ 760 static void 761 free_fact(nfs4_rfact_t *fp) 762 { 763 int len; 764 765 if (fp->rf_char1) { 766 len = strlen(fp->rf_char1) + 1; 767 kmem_free(fp->rf_char1, len); 768 } 769 } 770 771 /* 772 * Free up the message. 773 */ 774 void 775 nfs4_free_msg(nfs4_debug_msg_t *msg) 776 { 777 int len; 778 779 if (msg->msg_type == RM_EVENT) 780 free_event(&msg->rmsg_u.msg_event); 781 else 782 free_fact(&msg->rmsg_u.msg_fact); 783 784 if (msg->msg_srv) { 785 len = strlen(msg->msg_srv) + 1; 786 kmem_free(msg->msg_srv, len); 787 } 788 789 if (msg->msg_mntpt) { 790 len = strlen(msg->msg_mntpt) + 1; 791 kmem_free(msg->msg_mntpt, len); 792 } 793 794 /* free up the data structure itself */ 795 kmem_free(msg, sizeof (*msg)); 796 } 797 798 /* 799 * Prints out the interesting facts for recovery events: 800 * -DEAD_FILE 801 * -SIGLOST(_NO_DUMP) 802 */ 803 static void 804 print_facts(nfs4_debug_msg_t *msg, mntinfo4_t *mi) 805 { 806 nfs4_rfact_t *fp; 807 char *mount_pt; 808 int len; 809 810 if (msg->rmsg_u.msg_event.re_type != RE_DEAD_FILE && 811 msg->rmsg_u.msg_event.re_type != RE_SIGLOST && 812 msg->rmsg_u.msg_event.re_type != RE_SIGLOST_NO_DUMP) 813 return; 814 815 fp = kmem_zalloc(sizeof (*fp), KM_SLEEP); 816 mount_pt = NULL; 817 818 if (get_facts(msg, fp, &mount_pt, mi)) { 819 char time[256]; 820 821 822 if (fp->rf_time.tv_sec) 823 (void) snprintf(time, 256, "%ld", 824 (gethrestime_sec() - fp->rf_time.tv_sec)/60); 825 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 826 "!NFS4 FACT SHEET: %s%s %s%s %s %s%s%s %s%s", 827 fp->rf_action ? "\n Action: " : "", 828 fp->rf_action ? nfs4_recov_action_to_str(fp->rf_action) : 829 "", 830 fp->rf_stat4 ? "\n NFS4 error: " : "", 831 fp->rf_stat4 ? nfs4_stat_to_str(fp->rf_stat4) : "", 832 fp->rf_reboot ? "\n Suspected server reboot. " : "", 833 fp->rf_time.tv_sec ? "\n Server was down for " : "", 834 fp->rf_time.tv_sec ? time : "", 835 fp->rf_time.tv_sec ? " minutes." : "", 836 mount_pt ? " \n Client's lease expired on mount " : "", 837 mount_pt ? mount_pt : ""); 838 } 839 840 if (mount_pt) { 841 len = strlen(mount_pt) + 1; 842 kmem_free(mount_pt, len); 843 } 844 845 /* free the fact struct itself */ 846 if (fp) 847 kmem_free(fp, sizeof (*fp)); 848 } 849 850 /* 851 * Print an event message to /var/adm/messages 852 * The last argument to this fuction dictates the repeat status 853 * of the event. If set to 1, it means that we are dumping this 854 * event and it will _never_ be printed after this time. Else if 855 * set to 0 it will be printed again. 856 */ 857 static void 858 queue_print_event(nfs4_debug_msg_t *msg, mntinfo4_t *mi, int dump) 859 { 860 nfs4_revent_t *ep; 861 zoneid_t zoneid; 862 863 ep = &msg->rmsg_u.msg_event; 864 zoneid = mi->mi_zone->zone_id; 865 866 switch (ep->re_type) { 867 case RE_BAD_SEQID: 868 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 869 "Operation %s for file %s (rnode_pt 0x%p), pid %d using " 870 "seqid %d got %s. Last good seqid was %d for " 871 "operation %s.", 872 msg->msg_srv, msg->msg_mntpt, 873 nfs4_ctags[ep->re_tag1].ct_str, ep->re_char1, 874 (void *)ep->re_rp1, ep->re_pid, ep->re_seqid1, 875 nfs4_stat_to_str(ep->re_stat4), ep->re_seqid2, 876 nfs4_ctags[ep->re_tag2].ct_str); 877 break; 878 case RE_BADHANDLE: 879 ASSERT(ep->re_rp1 != NULL); 880 if (ep->re_char1 != NULL) { 881 zcmn_err(zoneid, CE_NOTE, 882 "![NFS4][Server: %s][Mntpt: %s]" 883 "server %s said filehandle was " 884 "invalid for file: %s (rnode_pt 0x%p) on mount %s", 885 msg->msg_srv, msg->msg_mntpt, msg->msg_srv, 886 ep->re_char1, (void *)ep->re_rp1, msg->msg_mntpt); 887 } else { 888 zcmn_err(zoneid, CE_NOTE, 889 "![NFS4][Server: %s][Mntpt: %s]" 890 "server %s said filehandle was " 891 "invalid for file: (rnode_pt 0x%p) on mount %s" 892 " for fh:", msg->msg_srv, msg->msg_mntpt, 893 msg->msg_srv, (void *)ep->re_rp1, msg->msg_mntpt); 894 sfh4_printfhandle(ep->re_rp1->r_fh); 895 } 896 break; 897 case RE_CLIENTID: 898 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 899 "Can't recover clientid on mount point %s " 900 "(mi 0x%p) due to error %d (%s), for server %s. Marking " 901 "file system as unusable.", 902 msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt, 903 (void *)ep->re_mi, ep->re_uint, 904 nfs4_stat_to_str(ep->re_stat4), 905 msg->msg_srv); 906 break; 907 case RE_DEAD_FILE: 908 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 909 "File %s (rnode_pt: %p) was closed due to NFS " 910 "recovery error on server %s(%s %s)", msg->msg_srv, 911 msg->msg_mntpt, ep->re_char1, (void *)ep->re_rp1, 912 msg->msg_srv, ep->re_char2 ? ep->re_char2 : "", 913 ep->re_stat4 ? nfs4_stat_to_str(ep->re_stat4) : ""); 914 break; 915 case RE_END: 916 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 917 "NFS Recovery done for mount %s (mi 0x%p) " 918 "on server %s, rnode_pt1 %s (0x%p), " 919 "rnode_pt2 %s (0x%p)", msg->msg_srv, msg->msg_mntpt, 920 msg->msg_mntpt, (void *)ep->re_mi, msg->msg_srv, 921 ep->re_char1, (void *)ep->re_rp1, ep->re_char2, 922 (void *)ep->re_rp2); 923 break; 924 case RE_FAIL_RELOCK: 925 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 926 "Couldn't reclaim lock for pid %d for " 927 "file %s (rnode_pt 0x%p) on (server %s): error %d", 928 msg->msg_srv, msg->msg_mntpt, ep->re_pid, ep->re_char1, 929 (void *)ep->re_rp1, msg->msg_srv, 930 ep->re_uint ? ep->re_uint : ep->re_stat4); 931 break; 932 case RE_FAIL_REMAP_LEN: 933 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 934 "remap_lookup: server %s returned bad " 935 "fhandle length (%d)", msg->msg_srv, msg->msg_mntpt, 936 msg->msg_srv, ep->re_uint); 937 break; 938 case RE_FAIL_REMAP_OP: 939 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 940 "remap_lookup: didn't get expected OP_GETFH" 941 " for server %s", msg->msg_srv, msg->msg_mntpt, 942 msg->msg_srv); 943 break; 944 case RE_FAILOVER: 945 if (ep->re_char1) 946 zcmn_err(zoneid, CE_NOTE, 947 "![NFS4][Server: %s][Mntpt: %s]" 948 "failing over from %s to %s", msg->msg_srv, 949 msg->msg_mntpt, msg->msg_srv, ep->re_char1); 950 else 951 zcmn_err(zoneid, CE_NOTE, 952 "![NFS4][Server: %s][Mntpt: %s]" 953 "NFS4: failing over: selecting " 954 "original server %s", msg->msg_srv, msg->msg_mntpt, 955 msg->msg_srv); 956 break; 957 case RE_FILE_DIFF: 958 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 959 "File %s (rnode_pt: %p) on server %s was closed " 960 "and failed attempted failover since its is different than " 961 "the original file", msg->msg_srv, msg->msg_mntpt, 962 ep->re_char1, (void *)ep->re_rp1, msg->msg_srv); 963 break; 964 case RE_LOST_STATE: 965 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 966 "Lost %s request for fs %s, file %s (rnode_pt: 0x%p), " 967 "dir %s (0x%p) for server %s", msg->msg_srv, msg->msg_mntpt, 968 nfs4_op_to_str(ep->re_uint), msg->msg_mntpt, 969 ep->re_char1, (void *)ep->re_rp1, ep->re_char2, 970 (void *)ep->re_rp2, msg->msg_srv); 971 break; 972 case RE_OPENS_CHANGED: 973 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 974 "The number of open files to reopen changed " 975 "for mount %s mi 0x%p (old %d, new %d) on server %s", 976 msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt, 977 (void *)ep->re_mi, ep->re_uint, ep->re_pid, msg->msg_srv); 978 break; 979 case RE_SIGLOST: 980 case RE_SIGLOST_NO_DUMP: 981 if (ep->re_uint) 982 zcmn_err(zoneid, CE_NOTE, 983 "![NFS4][Server: %s][Mntpt: %s]" 984 "Process %d lost its locks on " 985 "file %s (rnode_pt: %p) due to NFS recovery error " 986 "(%d) on server %s.", msg->msg_srv, msg->msg_mntpt, 987 ep->re_pid, ep->re_char1, (void *)ep->re_rp1, 988 ep->re_uint, msg->msg_srv); 989 else 990 zcmn_err(zoneid, CE_NOTE, 991 "![NFS4][Server: %s][Mntpt: %s]" 992 "Process %d lost its locks on " 993 "file %s (rnode_pt: %p) due to NFS recovery error " 994 "(%s) on server %s.", msg->msg_srv, msg->msg_mntpt, 995 ep->re_pid, ep->re_char1, (void *)ep->re_rp1, 996 nfs4_stat_to_str(ep->re_stat4), msg->msg_srv); 997 break; 998 case RE_START: 999 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1000 "NFS Starting recovery for mount %s " 1001 "(mi 0x%p mi_recovflags [0x%x]) on server %s, " 1002 "rnode_pt1 %s (0x%p), rnode_pt2 %s (0x%p)", msg->msg_srv, 1003 msg->msg_mntpt, msg->msg_mntpt, (void *)ep->re_mi, 1004 ep->re_uint, msg->msg_srv, ep->re_char1, (void *)ep->re_rp1, 1005 ep->re_char2, (void *)ep->re_rp2); 1006 break; 1007 case RE_UNEXPECTED_ACTION: 1008 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1009 "NFS recovery: unexpected action (%s) on server %s", 1010 msg->msg_srv, msg->msg_mntpt, 1011 nfs4_recov_action_to_str(ep->re_uint), msg->msg_srv); 1012 break; 1013 case RE_UNEXPECTED_ERRNO: 1014 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1015 "NFS recovery: unexpected errno (%d) on server %s", 1016 msg->msg_srv, msg->msg_mntpt, ep->re_uint, msg->msg_srv); 1017 break; 1018 case RE_UNEXPECTED_STATUS: 1019 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1020 "NFS recovery: unexpected NFS status code (%s) " 1021 "on server %s", msg->msg_srv, msg->msg_mntpt, 1022 nfs4_stat_to_str(ep->re_stat4), 1023 msg->msg_srv); 1024 break; 1025 case RE_WRONGSEC: 1026 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1027 "NFS can't recover from NFS4ERR_WRONGSEC." 1028 " error %d for server %s: rnode_pt1 %s (0x%p)" 1029 " rnode_pt2 %s (0x%p)", msg->msg_srv, msg->msg_mntpt, 1030 ep->re_uint, msg->msg_srv, ep->re_char1, (void *)ep->re_rp1, 1031 ep->re_char2, (void *)ep->re_rp2); 1032 break; 1033 case RE_LOST_STATE_BAD_OP: 1034 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1035 "NFS lost state with unrecognized op (%d)." 1036 " fs %s, server %s, pid %d, file %s (rnode_pt: 0x%p), " 1037 "dir %s (0x%p)", msg->msg_srv, msg->msg_mntpt, 1038 ep->re_uint, msg->msg_mntpt, msg->msg_srv, ep->re_pid, 1039 ep->re_char1, (void *)ep->re_rp1, ep->re_char2, 1040 (void *)ep->re_rp2); 1041 break; 1042 default: 1043 zcmn_err(zoneid, CE_WARN, 1044 "!queue_print_event: illegal event %d", ep->re_type); 1045 break; 1046 } 1047 1048 print_facts(msg, mi); 1049 1050 /* 1051 * If set this event will not be printed again and is considered 1052 * dumped. 1053 */ 1054 if (dump) 1055 msg->msg_status = NFS4_MS_NO_DUMP; 1056 } 1057 1058 /* 1059 * Print a fact message to /var/adm/messages 1060 */ 1061 static void 1062 queue_print_fact(nfs4_debug_msg_t *msg, int dump) 1063 { 1064 nfs4_rfact_t *fp; 1065 zoneid_t zoneid; 1066 1067 fp = &msg->rmsg_u.msg_fact; 1068 zoneid = getzoneid(); 1069 1070 switch (fp->rf_type) { 1071 case RF_BADOWNER: 1072 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1073 "NFSMAPID_DOMAIN does not match the server: %s domain\n" 1074 "Please check configuration", msg->msg_srv, msg->msg_mntpt, 1075 msg->msg_srv); 1076 break; 1077 case RF_ERR: 1078 if (fp->rf_error) 1079 zcmn_err(zoneid, CE_NOTE, 1080 "![NFS4][Server: %s][Mntpt: %s]NFS op %s got " 1081 "error %d causing recovery action %s.%s", 1082 msg->msg_srv, msg->msg_mntpt, 1083 nfs4_op_to_str(fp->rf_op), fp->rf_error, 1084 nfs4_recov_action_to_str(fp->rf_action), 1085 fp->rf_reboot ? 1086 " Client also suspects that the server rebooted," 1087 " or experienced a network partition." : ""); 1088 else 1089 zcmn_err(zoneid, CE_NOTE, 1090 "![NFS4][Server: %s][Mntpt: %s]NFS op %s got " 1091 "error %s causing recovery action %s.%s", 1092 msg->msg_srv, msg->msg_mntpt, 1093 nfs4_op_to_str(fp->rf_op), 1094 nfs4_stat_to_str(fp->rf_stat4), 1095 nfs4_recov_action_to_str(fp->rf_action), 1096 fp->rf_reboot ? 1097 " Client also suspects that the server rebooted," 1098 " or experienced a network partition." : ""); 1099 break; 1100 case RF_RENEW_EXPIRED: 1101 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1102 "NFS4 renew thread detected client's " 1103 "lease has expired. Current open files/locks/IO may fail", 1104 msg->msg_srv, msg->msg_mntpt); 1105 break; 1106 case RF_SRV_NOT_RESPOND: 1107 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1108 "NFS server %s not responding; still trying\n", 1109 msg->msg_srv, msg->msg_mntpt, msg->msg_srv); 1110 break; 1111 case RF_SRV_OK: 1112 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1113 "NFS server %s ok", msg->msg_srv, msg->msg_mntpt, 1114 msg->msg_srv); 1115 break; 1116 case RF_SRVS_NOT_RESPOND: 1117 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1118 "NFS servers %s not responding; still trying", msg->msg_srv, 1119 msg->msg_mntpt, msg->msg_srv); 1120 break; 1121 case RF_SRVS_OK: 1122 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1123 "NFS servers %s ok", msg->msg_srv, msg->msg_mntpt, 1124 msg->msg_srv); 1125 break; 1126 case RF_DELMAP_CB_ERR: 1127 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1128 "NFS op %s got error %s when executing delmap on file %s " 1129 "(rnode_pt 0x%p).", 1130 msg->msg_srv, msg->msg_mntpt, nfs4_op_to_str(fp->rf_op), 1131 nfs4_stat_to_str(fp->rf_stat4), fp->rf_char1, 1132 (void *)fp->rf_rp1); 1133 break; 1134 case RF_SENDQ_FULL: 1135 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1136 "send queue to NFS server %s is full; still trying\n", 1137 msg->msg_srv, msg->msg_mntpt, msg->msg_srv); 1138 break; 1139 1140 default: 1141 zcmn_err(zoneid, CE_WARN, "!queue_print_fact: illegal fact %d", 1142 fp->rf_type); 1143 } 1144 1145 /* 1146 * If set this fact will not be printed again and is considered 1147 * dumped. 1148 */ 1149 if (dump) 1150 msg->msg_status = NFS4_MS_NO_DUMP; 1151 } 1152 1153 /* 1154 * Returns 1 if the entire queue should be dumped, 0 otherwise. 1155 */ 1156 static int 1157 id_to_dump_queue(nfs4_event_type_t id) 1158 { 1159 switch (id) { 1160 case RE_DEAD_FILE: 1161 case RE_SIGLOST: 1162 case RE_WRONGSEC: 1163 case RE_CLIENTID: 1164 return (1); 1165 default: 1166 return (0); 1167 } 1168 } 1169 1170 /* 1171 * Returns 1 if the event (but not the entire queue) should be printed; 1172 * 0 otherwise. 1173 */ 1174 static int 1175 id_to_dump_solo_event(nfs4_event_type_t id) 1176 { 1177 switch (id) { 1178 case RE_BAD_SEQID: 1179 case RE_BADHANDLE: 1180 case RE_FAIL_REMAP_LEN: 1181 case RE_FAIL_REMAP_OP: 1182 case RE_FAILOVER: 1183 case RE_OPENS_CHANGED: 1184 case RE_SIGLOST_NO_DUMP: 1185 case RE_UNEXPECTED_ACTION: 1186 case RE_UNEXPECTED_ERRNO: 1187 case RE_UNEXPECTED_STATUS: 1188 case RE_LOST_STATE_BAD_OP: 1189 return (1); 1190 default: 1191 return (0); 1192 } 1193 } 1194 1195 /* 1196 * Returns 1 if the fact (but not the entire queue) should be printed; 1197 * 0 otherwise. 1198 */ 1199 static int 1200 id_to_dump_solo_fact(nfs4_fact_type_t id) 1201 { 1202 switch (id) { 1203 case RF_SRV_NOT_RESPOND: 1204 case RF_SRV_OK: 1205 case RF_SRVS_NOT_RESPOND: 1206 case RF_SRVS_OK: 1207 case RF_SENDQ_FULL: 1208 return (1); 1209 default: 1210 return (0); 1211 } 1212 } 1213 1214 /* 1215 * Update a kernel stat 1216 */ 1217 static void 1218 update_recov_kstats(nfs4_debug_msg_t *msg, mntinfo4_t *mi) 1219 { 1220 rkstat_t *rsp; 1221 1222 if (!mi->mi_recov_ksp) 1223 return; 1224 1225 rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data; 1226 1227 if (msg->msg_type == RM_EVENT) { 1228 switch (msg->rmsg_u.msg_event.re_type) { 1229 case RE_BADHANDLE: 1230 rsp->badhandle.value.ul++; 1231 break; 1232 case RE_CLIENTID: 1233 rsp->clientid.value.ul++; 1234 break; 1235 case RE_DEAD_FILE: 1236 rsp->dead_file.value.ul++; 1237 break; 1238 case RE_FAIL_RELOCK: 1239 rsp->fail_relock.value.ul++; 1240 break; 1241 case RE_FILE_DIFF: 1242 rsp->file_diff.value.ul++; 1243 break; 1244 case RE_OPENS_CHANGED: 1245 rsp->opens_changed.value.ul++; 1246 break; 1247 case RE_SIGLOST: 1248 case RE_SIGLOST_NO_DUMP: 1249 rsp->siglost.value.ul++; 1250 break; 1251 case RE_UNEXPECTED_ACTION: 1252 rsp->unexp_action.value.ul++; 1253 break; 1254 case RE_UNEXPECTED_ERRNO: 1255 rsp->unexp_errno.value.ul++; 1256 break; 1257 case RE_UNEXPECTED_STATUS: 1258 rsp->unexp_status.value.ul++; 1259 break; 1260 case RE_WRONGSEC: 1261 rsp->wrongsec.value.ul++; 1262 break; 1263 case RE_LOST_STATE_BAD_OP: 1264 rsp->lost_state_bad_op.value.ul++; 1265 break; 1266 default: 1267 break; 1268 } 1269 } else if (msg->msg_type == RM_FACT) { 1270 switch (msg->rmsg_u.msg_fact.rf_type) { 1271 case RF_BADOWNER: 1272 rsp->badowner.value.ul++; 1273 break; 1274 case RF_SRV_NOT_RESPOND: 1275 rsp->not_responding.value.ul++; 1276 break; 1277 default: 1278 break; 1279 } 1280 } 1281 } 1282 1283 /* 1284 * Dump the mi's mi_msg_list of recovery messages. 1285 */ 1286 static void 1287 dump_queue(mntinfo4_t *mi, nfs4_debug_msg_t *msg) 1288 { 1289 nfs4_debug_msg_t *tmp_msg; 1290 1291 ASSERT(mutex_owned(&mi->mi_msg_list_lock)); 1292 1293 /* update kstats */ 1294 update_recov_kstats(msg, mi); 1295 1296 /* 1297 * If we aren't supposed to dump the queue then see if we 1298 * should just print this single message, then return. 1299 */ 1300 if (!id_to_dump_queue(msg->rmsg_u.msg_event.re_type)) { 1301 if (id_to_dump_solo_event(msg->rmsg_u.msg_event.re_type)) 1302 queue_print_event(msg, mi, 0); 1303 return; 1304 } 1305 1306 /* 1307 * Write all events/facts in the queue that haven't been 1308 * previously written to disk. 1309 */ 1310 tmp_msg = list_head(&mi->mi_msg_list); 1311 while (tmp_msg) { 1312 if (tmp_msg->msg_status == NFS4_MS_DUMP) { 1313 if (tmp_msg->msg_type == RM_EVENT) 1314 queue_print_event(tmp_msg, mi, 1); 1315 else if (tmp_msg->msg_type == RM_FACT) 1316 queue_print_fact(tmp_msg, 1); 1317 } 1318 tmp_msg = list_next(&mi->mi_msg_list, tmp_msg); 1319 } 1320 } 1321 1322 /* 1323 * Places the event into mi's debug recovery message queue. Some of the 1324 * fields can be overloaded to be a generic value, depending on the event 1325 * type. These include "count", "why". 1326 */ 1327 void 1328 nfs4_queue_event(nfs4_event_type_t id, mntinfo4_t *mi, char *server1, 1329 uint_t count, vnode_t *vp1, vnode_t *vp2, nfsstat4 nfs4_error, 1330 char *why, pid_t pid, nfs4_tag_type_t tag1, nfs4_tag_type_t tag2, 1331 seqid4 seqid1, seqid4 seqid2) 1332 { 1333 nfs4_debug_msg_t *msg; 1334 nfs4_revent_t *ep; 1335 char *cur_srv; 1336 rnode4_t *rp1 = NULL, *rp2 = NULL; 1337 refstr_t *mntpt; 1338 1339 ASSERT(mi != NULL); 1340 if (vp1) 1341 rp1 = VTOR4(vp1); 1342 if (vp2) 1343 rp2 = VTOR4(vp2); 1344 1345 /* 1346 * Initialize the message with the relevant server/mount_pt/time 1347 * information. Also place the relevent event related info. 1348 */ 1349 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 1350 msg->msg_type = RM_EVENT; 1351 msg->msg_status = NFS4_MS_DUMP; 1352 ep = &msg->rmsg_u.msg_event; 1353 ep->re_type = id; 1354 gethrestime(&msg->msg_time); 1355 1356 cur_srv = mi->mi_curr_serv->sv_hostname; 1357 msg->msg_srv = strdup(cur_srv); 1358 mntpt = vfs_getmntpoint(mi->mi_vfsp); 1359 msg->msg_mntpt = strdup(refstr_value(mntpt)); 1360 refstr_rele(mntpt); 1361 1362 set_event(id, ep, mi, rp1, rp2, count, pid, nfs4_error, server1, 1363 why, tag1, tag2, seqid1, seqid2); 1364 1365 mutex_enter(&mi->mi_msg_list_lock); 1366 1367 /* if this event is the same as the last event, drop it */ 1368 if (events_same(list_tail(&mi->mi_msg_list), msg, mi)) { 1369 mutex_exit(&mi->mi_msg_list_lock); 1370 nfs4_free_msg(msg); 1371 return; 1372 } 1373 1374 /* queue the message at the end of the list */ 1375 list_insert_tail(&mi->mi_msg_list, msg); 1376 1377 dump_queue(mi, msg); 1378 1379 if (mi->mi_msg_count == nfs4_msg_max) { 1380 nfs4_debug_msg_t *rm_msg; 1381 1382 /* remove the queue'd message at the front of the list */ 1383 rm_msg = list_head(&mi->mi_msg_list); 1384 list_remove(&mi->mi_msg_list, rm_msg); 1385 mutex_exit(&mi->mi_msg_list_lock); 1386 nfs4_free_msg(rm_msg); 1387 } else { 1388 mi->mi_msg_count++; 1389 mutex_exit(&mi->mi_msg_list_lock); 1390 } 1391 } 1392 1393 /* 1394 * Places the fact into mi's debug recovery messages queue. 1395 */ 1396 void 1397 nfs4_queue_fact(nfs4_fact_type_t fid, mntinfo4_t *mi, nfsstat4 stat4, 1398 nfs4_recov_t raction, nfs_opnum4 op, bool_t reboot, char *srvname, 1399 int error, vnode_t *vp) 1400 { 1401 nfs4_debug_msg_t *msg; 1402 nfs4_rfact_t *fp; 1403 char *cur_srv; 1404 refstr_t *mntpt; 1405 1406 /* 1407 * Initialize the message with the relevant server/mount_pt/time 1408 * information. Also place the relevant fact related info. 1409 */ 1410 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 1411 msg->msg_type = RM_FACT; 1412 msg->msg_status = NFS4_MS_DUMP; 1413 gethrestime(&msg->msg_time); 1414 1415 if (srvname) 1416 cur_srv = srvname; 1417 else 1418 cur_srv = mi->mi_curr_serv->sv_hostname; 1419 1420 msg->msg_srv = strdup(cur_srv); 1421 mntpt = vfs_getmntpoint(mi->mi_vfsp); 1422 msg->msg_mntpt = strdup(refstr_value(mntpt)); 1423 refstr_rele(mntpt); 1424 1425 fp = &msg->rmsg_u.msg_fact; 1426 fp->rf_type = fid; 1427 fp->rf_status = RFS_NO_INSPECT; 1428 set_fact(fid, fp, stat4, raction, op, reboot, error, vp); 1429 1430 update_recov_kstats(msg, mi); 1431 1432 mutex_enter(&mi->mi_msg_list_lock); 1433 1434 /* if this fact is the same as the last fact, drop it */ 1435 if (facts_same(list_tail(&mi->mi_msg_list), msg, mi)) { 1436 mutex_exit(&mi->mi_msg_list_lock); 1437 nfs4_free_msg(msg); 1438 return; 1439 } 1440 1441 /* queue the message at the end of the list */ 1442 list_insert_tail(&mi->mi_msg_list, msg); 1443 1444 if (id_to_dump_solo_fact(msg->rmsg_u.msg_fact.rf_type)) 1445 queue_print_fact(msg, 0); 1446 1447 if (mi->mi_msg_count == nfs4_msg_max) { 1448 nfs4_debug_msg_t *rm_msg; 1449 1450 /* remove the queue'd message at the front of the list */ 1451 rm_msg = list_head(&mi->mi_msg_list); 1452 list_remove(&mi->mi_msg_list, rm_msg); 1453 mutex_exit(&mi->mi_msg_list_lock); 1454 nfs4_free_msg(rm_msg); 1455 } else { 1456 mi->mi_msg_count++; 1457 mutex_exit(&mi->mi_msg_list_lock); 1458 } 1459 } 1460 1461 /* 1462 * Initialize the 'mi_recov_kstat' kstat. 1463 */ 1464 void 1465 nfs4_mnt_recov_kstat_init(vfs_t *vfsp) 1466 { 1467 mntinfo4_t *mi = VFTOMI4(vfsp); 1468 kstat_t *ksp; 1469 zoneid_t zoneid = mi->mi_zone->zone_id; 1470 1471 /* 1472 * Create the version specific kstats. 1473 * 1474 * PSARC 2001/697 Contract Private Interface 1475 * All nfs kstats are under SunMC contract 1476 * Please refer to the PSARC listed above and contact 1477 * SunMC before making any changes! 1478 * 1479 * Changes must be reviewed by Solaris File Sharing 1480 * Changes must be communicated to contract-2001-697@sun.com 1481 * 1482 */ 1483 1484 if ((ksp = kstat_create_zone("nfs", getminor(vfsp->vfs_dev), 1485 "mi_recov_kstat", "misc", KSTAT_TYPE_NAMED, 1486 sizeof (rkstat_t) / sizeof (kstat_named_t), 1487 KSTAT_FLAG_WRITABLE, zoneid)) == NULL) { 1488 mi->mi_recov_ksp = NULL; 1489 zcmn_err(GLOBAL_ZONEID, CE_NOTE, 1490 "!mi_recov_kstat for mi %p failed\n", 1491 (void *)mi); 1492 return; 1493 } 1494 if (zoneid != GLOBAL_ZONEID) 1495 kstat_zone_add(ksp, GLOBAL_ZONEID); 1496 mi->mi_recov_ksp = ksp; 1497 bcopy(&rkstat_template, ksp->ks_data, sizeof (rkstat_t)); 1498 kstat_install(ksp); 1499 } 1500 1501 /* 1502 * Increment the "delay" kstat. 1503 */ 1504 void 1505 nfs4_mi_kstat_inc_delay(mntinfo4_t *mi) 1506 { 1507 rkstat_t *rsp; 1508 1509 if (!mi->mi_recov_ksp) 1510 return; 1511 1512 rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data; 1513 rsp->delay.value.ul++; 1514 } 1515 1516 /* 1517 * Increment the "no_grace" kstat. 1518 */ 1519 void 1520 nfs4_mi_kstat_inc_no_grace(mntinfo4_t *mi) 1521 { 1522 rkstat_t *rsp; 1523 1524 if (!mi->mi_recov_ksp) 1525 return; 1526 1527 rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data; 1528 rsp->no_grace.value.ul++; 1529 } 1530