1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cred.h> 27 #include <sys/kstat.h> 28 #include <sys/list.h> 29 #include <sys/systm.h> 30 #include <sys/vfs.h> 31 #include <sys/vnode.h> 32 #include <sys/cmn_err.h> 33 34 #include <nfs/nfs4_clnt.h> 35 #include <nfs/rnode4.h> 36 37 /* 38 * Recovery kstats 39 */ 40 typedef struct rkstat { 41 kstat_named_t badhandle; 42 kstat_named_t badowner; 43 kstat_named_t clientid; 44 kstat_named_t dead_file; 45 kstat_named_t delay; 46 kstat_named_t fail_relock; 47 kstat_named_t file_diff; 48 kstat_named_t no_grace; 49 kstat_named_t not_responding; 50 kstat_named_t opens_changed; 51 kstat_named_t siglost; 52 kstat_named_t unexp_action; 53 kstat_named_t unexp_errno; 54 kstat_named_t unexp_status; 55 kstat_named_t wrongsec; 56 kstat_named_t lost_state_bad_op; 57 } rkstat_t; 58 59 static rkstat_t rkstat_template = { 60 { "badhandle", KSTAT_DATA_ULONG }, 61 { "badowner", KSTAT_DATA_ULONG }, 62 { "clientid", KSTAT_DATA_ULONG }, 63 { "dead_file", KSTAT_DATA_ULONG }, 64 { "delay", KSTAT_DATA_ULONG }, 65 { "fail_relock", KSTAT_DATA_ULONG }, 66 { "file_diff", KSTAT_DATA_ULONG }, 67 { "no_grace", KSTAT_DATA_ULONG }, 68 { "not_responding", KSTAT_DATA_ULONG }, 69 { "opens_changed", KSTAT_DATA_ULONG }, 70 { "siglost", KSTAT_DATA_ULONG }, 71 { "unexp_action", KSTAT_DATA_ULONG }, 72 { "unexp_errno", KSTAT_DATA_ULONG }, 73 { "unexp_status", KSTAT_DATA_ULONG }, 74 { "wrongsec", KSTAT_DATA_ULONG }, 75 { "bad_op", KSTAT_DATA_ULONG }, 76 }; 77 78 /* maximum number of messages allowed on the mi's mi_msg_list */ 79 int nfs4_msg_max = NFS4_MSG_MAX; 80 #define DEFAULT_LEASE 180 81 82 /* 83 * Sets the appropiate fields of "ep", given "id" and various parameters. 84 * Assumes that ep's fields have been initialized to zero/null, except for 85 * re_type and mount point info, which are already set. 86 */ 87 static void 88 set_event(nfs4_event_type_t id, nfs4_revent_t *ep, mntinfo4_t *mi, 89 rnode4_t *rp1, rnode4_t *rp2, uint_t count, pid_t pid, nfsstat4 nfs4_error, 90 char *server1, char *why, nfs4_tag_type_t tag1, nfs4_tag_type_t tag2, 91 seqid4 seqid1, seqid4 seqid2) 92 { 93 int len; 94 95 switch (id) { 96 case RE_BAD_SEQID: 97 ep->re_mi = mi; 98 99 /* bad seqid'd file <path/component name> */ 100 if (rp1 && rp1->r_svnode.sv_name) 101 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 102 else 103 ep->re_char1 = NULL; 104 ep->re_rp1 = rp1; 105 106 /* for LOCK/LOCKU */ 107 ep->re_pid = pid; 108 109 ep->re_stat4 = nfs4_error; 110 ep->re_tag1 = tag1; 111 ep->re_tag2 = tag2; 112 ep->re_seqid1 = seqid1; 113 ep->re_seqid2 = seqid2; 114 break; 115 case RE_BADHANDLE: 116 ASSERT(rp1 != NULL); 117 118 /* dead file <path/component name> */ 119 if (rp1->r_svnode.sv_name) 120 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 121 else 122 ep->re_char1 = NULL; 123 ep->re_rp1 = rp1; 124 break; 125 case RE_CLIENTID: 126 ep->re_mi = mi; 127 128 /* the error we failed with */ 129 ep->re_uint = count; 130 ep->re_stat4 = nfs4_error; 131 break; 132 case RE_DEAD_FILE: 133 ASSERT(rp1 != NULL); 134 135 /* dead file <path/component name> */ 136 if (rp1->r_svnode.sv_name) 137 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 138 else 139 ep->re_char1 = NULL; 140 ep->re_rp1 = rp1; 141 142 /* why the file got killed */ 143 if (why) { 144 len = strlen(why); 145 ep->re_char2 = kmem_alloc(len + 1, KM_SLEEP); 146 bcopy(why, ep->re_char2, len); 147 ep->re_char2[len] = '\0'; 148 } else 149 ep->re_char2 = NULL; 150 151 ep->re_stat4 = nfs4_error; 152 break; 153 case RE_END: 154 /* first rnode */ 155 if (rp1 && rp1->r_svnode.sv_name) 156 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 157 else 158 ep->re_char1 = NULL; 159 ep->re_rp1 = rp1; 160 161 /* second rnode */ 162 if (rp2 && rp2->r_svnode.sv_name) 163 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 164 else 165 ep->re_char2 = NULL; 166 ep->re_rp2 = rp2; 167 168 ep->re_mi = mi; 169 break; 170 case RE_FAIL_RELOCK: 171 ASSERT(rp1 != NULL); 172 173 /* error on fail relock */ 174 ep->re_uint = count; 175 176 /* process that failed */ 177 ep->re_pid = pid; 178 179 /* nfs4 error */ 180 ep->re_stat4 = nfs4_error; 181 182 /* file <path/component name> */ 183 if (rp1->r_svnode.sv_name) 184 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 185 else 186 ep->re_char1 = NULL; 187 ep->re_rp1 = rp1; 188 break; 189 case RE_FAIL_REMAP_LEN: 190 /* length of returned filehandle */ 191 ep->re_uint = count; 192 break; 193 case RE_FAIL_REMAP_OP: 194 break; 195 case RE_FAILOVER: 196 /* server we're failing over to (if not picking original) */ 197 if (server1 != NULL) { 198 len = strlen(server1); 199 ep->re_char1 = kmem_alloc(len + 1, KM_SLEEP); 200 bcopy(server1, ep->re_char1, len); 201 ep->re_char1[len] = '\0'; 202 } else { 203 ep->re_char1 = NULL; 204 } 205 break; 206 case RE_FILE_DIFF: 207 ASSERT(rp1 != NULL); 208 209 /* dead file <path/component name> */ 210 if (rp1->r_svnode.sv_name) 211 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 212 else 213 ep->re_char1 = NULL; 214 ep->re_rp1 = rp1; 215 break; 216 case RE_LOST_STATE: 217 ep->re_uint = count; /* op number */ 218 if (rp1 && rp1->r_svnode.sv_name) 219 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 220 else 221 ep->re_char1 = NULL; 222 ep->re_rp1 = rp1; 223 if (rp2 && rp2->r_svnode.sv_name) 224 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 225 else 226 ep->re_char2 = NULL; 227 ep->re_rp2 = rp2; 228 break; 229 case RE_OPENS_CHANGED: 230 ep->re_mi = mi; 231 232 /* original number of open files */ 233 ep->re_uint = count; 234 /* new number of open files */ 235 ep->re_pid = pid; 236 break; 237 case RE_SIGLOST: 238 case RE_SIGLOST_NO_DUMP: 239 ASSERT(rp1 != NULL); 240 241 /* file <path/component name> */ 242 if (rp1->r_svnode.sv_name) 243 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 244 else 245 ep->re_char1 = NULL; 246 ep->re_rp1 = rp1; 247 ep->re_pid = pid; 248 ep->re_uint = count; 249 ep->re_stat4 = nfs4_error; 250 break; 251 case RE_START: 252 /* file <path/component name> */ 253 if (rp1 && rp1->r_svnode.sv_name) 254 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 255 else 256 ep->re_char1 = NULL; 257 ep->re_rp1 = rp1; 258 259 /* file <path/component name> */ 260 if (rp2 && rp2->r_svnode.sv_name) 261 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 262 else 263 ep->re_char2 = NULL; 264 ep->re_rp2 = rp2; 265 266 ep->re_mi = mi; 267 ep->re_uint = count; 268 break; 269 case RE_UNEXPECTED_ACTION: 270 case RE_UNEXPECTED_ERRNO: 271 /* the error that is unexpected */ 272 ep->re_uint = count; 273 break; 274 case RE_UNEXPECTED_STATUS: 275 /* nfsstat4 error */ 276 ep->re_stat4 = nfs4_error; 277 break; 278 case RE_WRONGSEC: 279 /* the error we failed with */ 280 ep->re_uint = count; 281 282 /* file <path/component name> */ 283 if (rp1 && rp1->r_svnode.sv_name) 284 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 285 else 286 ep->re_char1 = NULL; 287 ep->re_rp1 = rp1; 288 289 /* file <path/component name> */ 290 if (rp2 && rp2->r_svnode.sv_name) 291 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 292 else 293 ep->re_char2 = NULL; 294 ep->re_rp2 = rp2; 295 break; 296 case RE_LOST_STATE_BAD_OP: 297 ep->re_uint = count; /* the unexpected op */ 298 ep->re_pid = pid; 299 ep->re_rp1 = rp1; 300 if (rp1 != NULL && rp1->r_svnode.sv_name != NULL) 301 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 302 ep->re_rp2 = rp2; 303 if (rp2 != NULL && rp2->r_svnode.sv_name != NULL) 304 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 305 break; 306 case RE_REFERRAL: 307 /* server we're being referred to */ 308 if (server1 != NULL) { 309 len = strlen(server1); 310 ep->re_char1 = kmem_alloc(len + 1, KM_SLEEP); 311 bcopy(server1, ep->re_char1, len); 312 ep->re_char1[len] = '\0'; 313 } else { 314 ep->re_char1 = NULL; 315 } 316 break; 317 default: 318 break; 319 } 320 } 321 322 /* 323 * Sets the appropiate fields of the 'fact' for this 'id'. 324 */ 325 static void 326 set_fact(nfs4_fact_type_t id, nfs4_rfact_t *fp, nfsstat4 stat4, 327 nfs4_recov_t raction, nfs_opnum4 op, bool_t reboot, int error, 328 vnode_t *vp) 329 { 330 rnode4_t *rp1; 331 332 switch (id) { 333 case RF_BADOWNER: 334 fp->rf_op = op; 335 fp->rf_reboot = reboot; 336 fp->rf_stat4 = stat4; 337 break; 338 case RF_RENEW_EXPIRED: 339 break; 340 case RF_ERR: 341 fp->rf_op = op; 342 fp->rf_reboot = reboot; 343 fp->rf_stat4 = stat4; 344 fp->rf_action = raction; 345 fp->rf_error = error; 346 break; 347 case RF_SRV_OK: 348 break; 349 case RF_SRV_NOT_RESPOND: 350 break; 351 case RF_SRVS_OK: 352 break; 353 case RF_SRVS_NOT_RESPOND: 354 gethrestime(&fp->rf_time); 355 break; 356 case RF_DELMAP_CB_ERR: 357 fp->rf_op = op; 358 fp->rf_stat4 = stat4; 359 360 rp1 = VTOR4(vp); 361 fp->rf_rp1 = rp1; 362 if (rp1 && rp1->r_svnode.sv_name) 363 fp->rf_char1 = fn_path(rp1->r_svnode.sv_name); 364 else 365 fp->rf_char1 = NULL; 366 break; 367 case RF_SENDQ_FULL: 368 break; 369 default: 370 zcmn_err(getzoneid(), CE_NOTE, "illegal fact %d", id); 371 break; 372 } 373 } 374 375 /* 376 * Returns 1 if the event/fact is of a successful communication 377 * from the server; 0 otherwise. 378 */ 379 static int 380 successful_comm(nfs4_debug_msg_t *msgp) 381 { 382 if (msgp->msg_type == RM_EVENT) { 383 switch (msgp->rmsg_u.msg_event.re_type) { 384 case RE_BAD_SEQID: 385 case RE_BADHANDLE: 386 case RE_FAIL_REMAP_LEN: 387 case RE_FAIL_REMAP_OP: 388 case RE_FILE_DIFF: 389 case RE_START: 390 case RE_UNEXPECTED_ACTION: 391 case RE_UNEXPECTED_ERRNO: 392 case RE_UNEXPECTED_STATUS: 393 case RE_WRONGSEC: 394 return (1); 395 case RE_CLIENTID: 396 case RE_DEAD_FILE: 397 case RE_END: 398 case RE_FAIL_RELOCK: 399 case RE_FAILOVER: 400 case RE_LOST_STATE: 401 case RE_OPENS_CHANGED: 402 case RE_SIGLOST: 403 case RE_SIGLOST_NO_DUMP: 404 case RE_LOST_STATE_BAD_OP: 405 case RE_REFERRAL: 406 /* placeholder */ 407 return (0); 408 default: 409 return (0); 410 } 411 } else { 412 switch (msgp->rmsg_u.msg_fact.rf_type) { 413 case RF_BADOWNER: 414 case RF_ERR: 415 case RF_RENEW_EXPIRED: 416 case RF_SRV_OK: 417 case RF_SRVS_OK: 418 case RF_DELMAP_CB_ERR: 419 return (1); 420 case RF_SRV_NOT_RESPOND: 421 case RF_SRVS_NOT_RESPOND: 422 case RF_SENDQ_FULL: 423 return (0); 424 default: 425 return (0); 426 } 427 } 428 } 429 430 /* 431 * Iterate backwards through the mi's mi_msg_list to find the earliest 432 * message that we should find relevant facts to investigate. 433 */ 434 static nfs4_debug_msg_t * 435 find_beginning(nfs4_debug_msg_t *first_msg, mntinfo4_t *mi) 436 { 437 nfs4_debug_msg_t *oldest_msg, *cur_msg; 438 time_t lease; 439 440 ASSERT(mutex_owned(&mi->mi_msg_list_lock)); 441 if (mi->mi_lease_period > 0) 442 lease = 2 * mi->mi_lease_period; 443 else 444 lease = DEFAULT_LEASE; 445 446 oldest_msg = first_msg; 447 cur_msg = list_prev(&mi->mi_msg_list, first_msg); 448 while (cur_msg && 449 first_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) { 450 oldest_msg = cur_msg; 451 if ((cur_msg->msg_type == RM_FACT) && 452 (cur_msg->rmsg_u.msg_fact.rf_type == RF_SRV_OK)) { 453 /* find where we lost contact with the server */ 454 while (cur_msg) { 455 if ((cur_msg->msg_type == RM_FACT) && 456 (cur_msg->rmsg_u.msg_fact.rf_type == 457 RF_SRV_NOT_RESPOND)) 458 break; 459 oldest_msg = cur_msg; 460 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 461 } 462 /* 463 * Find the first successful message before 464 * we lost contact with the server. 465 */ 466 if (cur_msg) { 467 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 468 while (cur_msg && !successful_comm(cur_msg)) { 469 oldest_msg = cur_msg; 470 cur_msg = list_prev(&mi->mi_msg_list, 471 cur_msg); 472 } 473 } 474 /* 475 * If we're not at the dummy head pointer, 476 * set the oldest and current message. 477 */ 478 if (cur_msg) { 479 first_msg = cur_msg; 480 oldest_msg = cur_msg; 481 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 482 } 483 } else 484 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 485 } 486 487 return (oldest_msg); 488 } 489 490 /* 491 * Returns 1 if facts have been found; 0 otherwise. 492 */ 493 static int 494 get_facts(nfs4_debug_msg_t *msgp, nfs4_rfact_t *ret_fp, char **mnt_pt, 495 mntinfo4_t *mi) 496 { 497 nfs4_debug_msg_t *cur_msg, *oldest_msg; 498 nfs4_rfact_t *cur_fp; 499 int found_a_fact = 0; 500 int len; 501 502 cur_msg = msgp; 503 504 /* find the oldest msg to search backwards to */ 505 oldest_msg = find_beginning(cur_msg, mi); 506 ASSERT(oldest_msg != NULL); 507 508 /* 509 * Create a fact sheet by searching from our current message 510 * backwards to the 'oldest_msg', recording facts along the way 511 * until we found facts that have been inspected by another time. 512 */ 513 while (cur_msg && cur_msg != list_prev(&mi->mi_msg_list, oldest_msg)) { 514 if (cur_msg->msg_type != RM_FACT) { 515 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 516 continue; 517 } 518 519 cur_fp = &cur_msg->rmsg_u.msg_fact; 520 /* 521 * If this fact has already been looked at, then so 522 * have all preceding facts. Return Now. 523 */ 524 if (cur_fp->rf_status == RFS_INSPECT) 525 return (found_a_fact); 526 527 cur_fp->rf_status = RFS_INSPECT; 528 found_a_fact = 1; 529 switch (cur_fp->rf_type) { 530 case RF_BADOWNER: 531 break; 532 case RF_ERR: 533 /* 534 * Don't want to overwrite a fact that was 535 * previously found during our current search. 536 */ 537 if (!ret_fp->rf_reboot) 538 ret_fp->rf_reboot = cur_fp->rf_reboot; 539 if (!ret_fp->rf_stat4) 540 ret_fp->rf_stat4 = cur_fp->rf_stat4; 541 if (!ret_fp->rf_action) 542 ret_fp->rf_action = cur_fp->rf_action; 543 break; 544 case RF_RENEW_EXPIRED: 545 if (cur_msg->msg_mntpt && !(*mnt_pt)) { 546 len = strlen(cur_msg->msg_mntpt) + 1; 547 *mnt_pt = kmem_alloc(len, KM_SLEEP); 548 bcopy(cur_msg->msg_mntpt, *mnt_pt, len); 549 } 550 break; 551 case RF_SRV_OK: 552 break; 553 case RF_SRV_NOT_RESPOND: 554 /* 555 * Okay to overwrite this fact as 556 * we want the earliest time. 557 */ 558 ret_fp->rf_time = cur_fp->rf_time; 559 break; 560 case RF_SRVS_OK: 561 break; 562 case RF_SRVS_NOT_RESPOND: 563 break; 564 case RF_DELMAP_CB_ERR: 565 break; 566 case RF_SENDQ_FULL: 567 break; 568 default: 569 zcmn_err(getzoneid(), CE_NOTE, 570 "get facts: illegal fact %d", cur_fp->rf_type); 571 break; 572 } 573 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 574 } 575 576 return (found_a_fact); 577 } 578 579 /* 580 * Returns 1 if this fact is identical to the last fact recorded 581 * (only checks for a match within the last 2 lease periods). 582 */ 583 static int 584 facts_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg, 585 mntinfo4_t *mi) 586 { 587 nfs4_rfact_t *fp1, *fp2; 588 int lease, len; 589 590 ASSERT(mutex_owned(&mi->mi_msg_list_lock)); 591 if (mi->mi_lease_period > 0) 592 lease = 2 * mi->mi_lease_period; 593 else 594 lease = DEFAULT_LEASE; 595 596 fp2 = &new_msg->rmsg_u.msg_fact; 597 598 while (cur_msg && 599 new_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) { 600 if (cur_msg->msg_type != RM_FACT) { 601 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 602 continue; 603 } 604 fp1 = &cur_msg->rmsg_u.msg_fact; 605 if (fp1->rf_type != fp2->rf_type) 606 return (0); 607 608 /* now actually compare the facts */ 609 if (fp1->rf_action != fp2->rf_action) 610 return (0); 611 if (fp1->rf_stat4 != fp2->rf_stat4) 612 return (0); 613 if (fp1->rf_reboot != fp2->rf_reboot) 614 return (0); 615 if (fp1->rf_op != fp2->rf_op) 616 return (0); 617 if (fp1->rf_time.tv_sec != fp2->rf_time.tv_sec) 618 return (0); 619 if (fp1->rf_error != fp2->rf_error) 620 return (0); 621 if (fp1->rf_rp1 != fp2->rf_rp1) 622 return (0); 623 if (cur_msg->msg_srv != NULL) { 624 if (new_msg->msg_srv == NULL) 625 return (0); 626 len = strlen(cur_msg->msg_srv); 627 if (strncmp(cur_msg->msg_srv, new_msg->msg_srv, 628 len) != 0) 629 return (0); 630 } else if (new_msg->msg_srv != NULL) { 631 return (0); 632 } 633 if (cur_msg->msg_mntpt != NULL) { 634 if (new_msg->msg_mntpt == NULL) 635 return (0); 636 len = strlen(cur_msg->msg_mntpt); 637 if (strncmp(cur_msg->msg_mntpt, new_msg->msg_mntpt, 638 len) != 0) 639 return (0); 640 } else if (new_msg->msg_mntpt != NULL) { 641 return (0); 642 } 643 if (fp1->rf_char1 != NULL) { 644 if (fp2->rf_char1 == NULL) 645 return (0); 646 len = strlen(fp1->rf_char1); 647 if (strncmp(fp1->rf_char1, fp2->rf_char1, len) != 0) 648 return (0); 649 } else if (fp2->rf_char1 != NULL) { 650 return (0); 651 } 652 return (1); 653 } 654 655 return (0); 656 } 657 658 /* 659 * Returns 1 if these two messages are identical; 0 otherwise. 660 */ 661 static int 662 events_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg, 663 mntinfo4_t *mi) 664 { 665 nfs4_revent_t *ep1, *ep2; 666 int len; 667 668 /* find the last event, bypassing all facts */ 669 while (cur_msg && cur_msg->msg_type != RM_EVENT) 670 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 671 672 if (!cur_msg) 673 return (0); 674 675 if (cur_msg->msg_type != RM_EVENT) 676 return (0); 677 678 ep1 = &cur_msg->rmsg_u.msg_event; 679 ep2 = &new_msg->rmsg_u.msg_event; 680 if (ep1->re_type != ep2->re_type) 681 return (0); 682 683 /* 684 * Since we zalloc the buffer, then the two nfs4_debug_msg's 685 * must match up even if all the fields weren't filled in 686 * the first place. 687 */ 688 if (ep1->re_mi != ep2->re_mi) 689 return (0); 690 if (ep1->re_uint != ep2->re_uint) 691 return (0); 692 if (ep1->re_stat4 != ep2->re_stat4) 693 return (0); 694 if (ep1->re_pid != ep2->re_pid) 695 return (0); 696 if (ep1->re_rp1 != ep2->re_rp1) 697 return (0); 698 if (ep1->re_rp2 != ep2->re_rp2) 699 return (0); 700 if (ep1->re_tag1 != ep2->re_tag1) 701 return (0); 702 if (ep1->re_tag2 != ep2->re_tag2) 703 return (0); 704 if (ep1->re_seqid1 != ep2->re_seqid1) 705 return (0); 706 if (ep1->re_seqid2 != ep2->re_seqid2) 707 return (0); 708 709 if (cur_msg->msg_srv != NULL) { 710 if (new_msg->msg_srv == NULL) 711 return (0); 712 len = strlen(cur_msg->msg_srv); 713 if (strncmp(cur_msg->msg_srv, new_msg->msg_srv, len) != 0) 714 return (0); 715 } else if (new_msg->msg_srv != NULL) { 716 return (0); 717 } 718 719 if (ep1->re_char1 != NULL) { 720 if (ep2->re_char1 == NULL) 721 return (0); 722 len = strlen(ep1->re_char1); 723 if (strncmp(ep1->re_char1, ep2->re_char1, len) != 0) 724 return (0); 725 } else if (ep2->re_char1 != NULL) { 726 return (0); 727 } 728 729 if (ep1->re_char2 != NULL) { 730 if (ep2->re_char2 == NULL) 731 return (0); 732 len = strlen(ep1->re_char2); 733 if (strncmp(ep1->re_char2, ep2->re_char2, len) != 0) 734 return (0); 735 } else if (ep2->re_char2 != NULL) { 736 return (0); 737 } 738 739 if (cur_msg->msg_mntpt != NULL) { 740 if (new_msg->msg_mntpt == NULL) 741 return (0); 742 len = strlen(cur_msg->msg_mntpt); 743 if (strncmp(cur_msg->msg_mntpt, cur_msg->msg_mntpt, len) != 0) 744 return (0); 745 } else if (new_msg->msg_mntpt != NULL) { 746 return (0); 747 } 748 749 return (1); 750 } 751 752 /* 753 * Free up a recovery event. 754 */ 755 static void 756 free_event(nfs4_revent_t *ep) 757 { 758 int len; 759 760 if (ep->re_char1) { 761 len = strlen(ep->re_char1) + 1; 762 kmem_free(ep->re_char1, len); 763 } 764 if (ep->re_char2) { 765 len = strlen(ep->re_char2) + 1; 766 kmem_free(ep->re_char2, len); 767 } 768 } 769 770 /* 771 * Free up a recovery fact. 772 */ 773 static void 774 free_fact(nfs4_rfact_t *fp) 775 { 776 int len; 777 778 if (fp->rf_char1) { 779 len = strlen(fp->rf_char1) + 1; 780 kmem_free(fp->rf_char1, len); 781 } 782 } 783 784 /* 785 * Free up the message. 786 */ 787 void 788 nfs4_free_msg(nfs4_debug_msg_t *msg) 789 { 790 int len; 791 792 if (msg->msg_type == RM_EVENT) 793 free_event(&msg->rmsg_u.msg_event); 794 else 795 free_fact(&msg->rmsg_u.msg_fact); 796 797 if (msg->msg_srv) { 798 len = strlen(msg->msg_srv) + 1; 799 kmem_free(msg->msg_srv, len); 800 } 801 802 if (msg->msg_mntpt) { 803 len = strlen(msg->msg_mntpt) + 1; 804 kmem_free(msg->msg_mntpt, len); 805 } 806 807 /* free up the data structure itself */ 808 kmem_free(msg, sizeof (*msg)); 809 } 810 811 /* 812 * Prints out the interesting facts for recovery events: 813 * -DEAD_FILE 814 * -SIGLOST(_NO_DUMP) 815 */ 816 static void 817 print_facts(nfs4_debug_msg_t *msg, mntinfo4_t *mi) 818 { 819 nfs4_rfact_t *fp; 820 char *mount_pt; 821 int len; 822 823 if (msg->rmsg_u.msg_event.re_type != RE_DEAD_FILE && 824 msg->rmsg_u.msg_event.re_type != RE_SIGLOST && 825 msg->rmsg_u.msg_event.re_type != RE_SIGLOST_NO_DUMP) 826 return; 827 828 fp = kmem_zalloc(sizeof (*fp), KM_SLEEP); 829 mount_pt = NULL; 830 831 if (get_facts(msg, fp, &mount_pt, mi)) { 832 char time[256]; 833 834 835 if (fp->rf_time.tv_sec) 836 (void) snprintf(time, 256, "%ld", 837 (gethrestime_sec() - fp->rf_time.tv_sec)/60); 838 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 839 "!NFS4 FACT SHEET: %s%s %s%s %s %s%s%s %s%s", 840 fp->rf_action ? "\n Action: " : "", 841 fp->rf_action ? nfs4_recov_action_to_str(fp->rf_action) : 842 "", 843 fp->rf_stat4 ? "\n NFS4 error: " : "", 844 fp->rf_stat4 ? nfs4_stat_to_str(fp->rf_stat4) : "", 845 fp->rf_reboot ? "\n Suspected server reboot. " : "", 846 fp->rf_time.tv_sec ? "\n Server was down for " : "", 847 fp->rf_time.tv_sec ? time : "", 848 fp->rf_time.tv_sec ? " minutes." : "", 849 mount_pt ? " \n Client's lease expired on mount " : "", 850 mount_pt ? mount_pt : ""); 851 } 852 853 if (mount_pt) { 854 len = strlen(mount_pt) + 1; 855 kmem_free(mount_pt, len); 856 } 857 858 /* free the fact struct itself */ 859 if (fp) 860 kmem_free(fp, sizeof (*fp)); 861 } 862 863 /* 864 * Print an event message to /var/adm/messages 865 * The last argument to this fuction dictates the repeat status 866 * of the event. If set to 1, it means that we are dumping this 867 * event and it will _never_ be printed after this time. Else if 868 * set to 0 it will be printed again. 869 */ 870 static void 871 queue_print_event(nfs4_debug_msg_t *msg, mntinfo4_t *mi, int dump) 872 { 873 nfs4_revent_t *ep; 874 zoneid_t zoneid; 875 876 ep = &msg->rmsg_u.msg_event; 877 zoneid = mi->mi_zone->zone_id; 878 879 switch (ep->re_type) { 880 case RE_BAD_SEQID: 881 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 882 "Operation %s for file %s (rnode_pt 0x%p), pid %d using " 883 "seqid %d got %s. Last good seqid was %d for " 884 "operation %s.", 885 msg->msg_srv, msg->msg_mntpt, 886 nfs4_ctags[ep->re_tag1].ct_str, ep->re_char1, 887 (void *)ep->re_rp1, ep->re_pid, ep->re_seqid1, 888 nfs4_stat_to_str(ep->re_stat4), ep->re_seqid2, 889 nfs4_ctags[ep->re_tag2].ct_str); 890 break; 891 case RE_BADHANDLE: 892 ASSERT(ep->re_rp1 != NULL); 893 if (ep->re_char1 != NULL) { 894 zcmn_err(zoneid, CE_NOTE, 895 "![NFS4][Server: %s][Mntpt: %s]" 896 "server %s said filehandle was " 897 "invalid for file: %s (rnode_pt 0x%p) on mount %s", 898 msg->msg_srv, msg->msg_mntpt, msg->msg_srv, 899 ep->re_char1, (void *)ep->re_rp1, msg->msg_mntpt); 900 } else { 901 zcmn_err(zoneid, CE_NOTE, 902 "![NFS4][Server: %s][Mntpt: %s]" 903 "server %s said filehandle was " 904 "invalid for file: (rnode_pt 0x%p) on mount %s" 905 " for fh:", msg->msg_srv, msg->msg_mntpt, 906 msg->msg_srv, (void *)ep->re_rp1, msg->msg_mntpt); 907 sfh4_printfhandle(ep->re_rp1->r_fh); 908 } 909 break; 910 case RE_CLIENTID: 911 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 912 "Can't recover clientid on mount point %s " 913 "(mi 0x%p) due to error %d (%s), for server %s. Marking " 914 "file system as unusable.", 915 msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt, 916 (void *)ep->re_mi, ep->re_uint, 917 nfs4_stat_to_str(ep->re_stat4), 918 msg->msg_srv); 919 break; 920 case RE_DEAD_FILE: 921 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 922 "File %s (rnode_pt: %p) was closed due to NFS " 923 "recovery error on server %s(%s %s)", msg->msg_srv, 924 msg->msg_mntpt, ep->re_char1, (void *)ep->re_rp1, 925 msg->msg_srv, ep->re_char2 ? ep->re_char2 : "", 926 ep->re_stat4 ? nfs4_stat_to_str(ep->re_stat4) : ""); 927 break; 928 case RE_END: 929 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 930 "NFS Recovery done for mount %s (mi 0x%p) " 931 "on server %s, rnode_pt1 %s (0x%p), " 932 "rnode_pt2 %s (0x%p)", msg->msg_srv, msg->msg_mntpt, 933 msg->msg_mntpt, (void *)ep->re_mi, msg->msg_srv, 934 ep->re_char1, (void *)ep->re_rp1, ep->re_char2, 935 (void *)ep->re_rp2); 936 break; 937 case RE_FAIL_RELOCK: 938 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 939 "Couldn't reclaim lock for pid %d for " 940 "file %s (rnode_pt 0x%p) on (server %s): error %d", 941 msg->msg_srv, msg->msg_mntpt, ep->re_pid, ep->re_char1, 942 (void *)ep->re_rp1, msg->msg_srv, 943 ep->re_uint ? ep->re_uint : ep->re_stat4); 944 break; 945 case RE_FAIL_REMAP_LEN: 946 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 947 "remap_lookup: server %s returned bad " 948 "fhandle length (%d)", msg->msg_srv, msg->msg_mntpt, 949 msg->msg_srv, ep->re_uint); 950 break; 951 case RE_FAIL_REMAP_OP: 952 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 953 "remap_lookup: didn't get expected OP_GETFH" 954 " for server %s", msg->msg_srv, msg->msg_mntpt, 955 msg->msg_srv); 956 break; 957 case RE_FAILOVER: 958 if (ep->re_char1) 959 zcmn_err(zoneid, CE_NOTE, 960 "![NFS4][Server: %s][Mntpt: %s]" 961 "failing over from %s to %s", msg->msg_srv, 962 msg->msg_mntpt, msg->msg_srv, ep->re_char1); 963 else 964 zcmn_err(zoneid, CE_NOTE, 965 "![NFS4][Server: %s][Mntpt: %s]" 966 "NFS4: failing over: selecting " 967 "original server %s", msg->msg_srv, msg->msg_mntpt, 968 msg->msg_srv); 969 break; 970 case RE_FILE_DIFF: 971 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 972 "File %s (rnode_pt: %p) on server %s was closed " 973 "and failed attempted failover since its is different than " 974 "the original file", msg->msg_srv, msg->msg_mntpt, 975 ep->re_char1, (void *)ep->re_rp1, msg->msg_srv); 976 break; 977 case RE_LOST_STATE: 978 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 979 "Lost %s request for fs %s, file %s (rnode_pt: 0x%p), " 980 "dir %s (0x%p) for server %s", msg->msg_srv, msg->msg_mntpt, 981 nfs4_op_to_str(ep->re_uint), msg->msg_mntpt, 982 ep->re_char1, (void *)ep->re_rp1, ep->re_char2, 983 (void *)ep->re_rp2, msg->msg_srv); 984 break; 985 case RE_OPENS_CHANGED: 986 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 987 "The number of open files to reopen changed " 988 "for mount %s mi 0x%p (old %d, new %d) on server %s", 989 msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt, 990 (void *)ep->re_mi, ep->re_uint, ep->re_pid, msg->msg_srv); 991 break; 992 case RE_SIGLOST: 993 case RE_SIGLOST_NO_DUMP: 994 if (ep->re_uint) 995 zcmn_err(zoneid, CE_NOTE, 996 "![NFS4][Server: %s][Mntpt: %s]" 997 "Process %d lost its locks on " 998 "file %s (rnode_pt: %p) due to NFS recovery error " 999 "(%d) on server %s.", msg->msg_srv, msg->msg_mntpt, 1000 ep->re_pid, ep->re_char1, (void *)ep->re_rp1, 1001 ep->re_uint, msg->msg_srv); 1002 else 1003 zcmn_err(zoneid, CE_NOTE, 1004 "![NFS4][Server: %s][Mntpt: %s]" 1005 "Process %d lost its locks on " 1006 "file %s (rnode_pt: %p) due to NFS recovery error " 1007 "(%s) on server %s.", msg->msg_srv, msg->msg_mntpt, 1008 ep->re_pid, ep->re_char1, (void *)ep->re_rp1, 1009 nfs4_stat_to_str(ep->re_stat4), msg->msg_srv); 1010 break; 1011 case RE_START: 1012 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1013 "NFS Starting recovery for mount %s " 1014 "(mi 0x%p mi_recovflags [0x%x]) on server %s, " 1015 "rnode_pt1 %s (0x%p), rnode_pt2 %s (0x%p)", msg->msg_srv, 1016 msg->msg_mntpt, msg->msg_mntpt, (void *)ep->re_mi, 1017 ep->re_uint, msg->msg_srv, ep->re_char1, (void *)ep->re_rp1, 1018 ep->re_char2, (void *)ep->re_rp2); 1019 break; 1020 case RE_UNEXPECTED_ACTION: 1021 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1022 "NFS recovery: unexpected action (%s) on server %s", 1023 msg->msg_srv, msg->msg_mntpt, 1024 nfs4_recov_action_to_str(ep->re_uint), msg->msg_srv); 1025 break; 1026 case RE_UNEXPECTED_ERRNO: 1027 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1028 "NFS recovery: unexpected errno (%d) on server %s", 1029 msg->msg_srv, msg->msg_mntpt, ep->re_uint, msg->msg_srv); 1030 break; 1031 case RE_UNEXPECTED_STATUS: 1032 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1033 "NFS recovery: unexpected NFS status code (%s) " 1034 "on server %s", msg->msg_srv, msg->msg_mntpt, 1035 nfs4_stat_to_str(ep->re_stat4), 1036 msg->msg_srv); 1037 break; 1038 case RE_WRONGSEC: 1039 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1040 "NFS can't recover from NFS4ERR_WRONGSEC." 1041 " error %d for server %s: rnode_pt1 %s (0x%p)" 1042 " rnode_pt2 %s (0x%p)", msg->msg_srv, msg->msg_mntpt, 1043 ep->re_uint, msg->msg_srv, ep->re_char1, (void *)ep->re_rp1, 1044 ep->re_char2, (void *)ep->re_rp2); 1045 break; 1046 case RE_LOST_STATE_BAD_OP: 1047 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1048 "NFS lost state with unrecognized op (%d)." 1049 " fs %s, server %s, pid %d, file %s (rnode_pt: 0x%p), " 1050 "dir %s (0x%p)", msg->msg_srv, msg->msg_mntpt, 1051 ep->re_uint, msg->msg_mntpt, msg->msg_srv, ep->re_pid, 1052 ep->re_char1, (void *)ep->re_rp1, ep->re_char2, 1053 (void *)ep->re_rp2); 1054 break; 1055 case RE_REFERRAL: 1056 if (ep->re_char1) 1057 zcmn_err(zoneid, CE_NOTE, 1058 "![NFS4][Server: %s][Mntpt: %s]" 1059 "being referred from %s to %s", msg->msg_srv, 1060 msg->msg_mntpt, msg->msg_srv, ep->re_char1); 1061 else 1062 zcmn_err(zoneid, CE_NOTE, 1063 "![NFS4][Server: %s][Mntpt: %s]" 1064 "NFS4: being referred from %s to unknown server", 1065 msg->msg_srv, msg->msg_mntpt, msg->msg_srv); 1066 break; 1067 default: 1068 zcmn_err(zoneid, CE_WARN, 1069 "!queue_print_event: illegal event %d", ep->re_type); 1070 break; 1071 } 1072 1073 print_facts(msg, mi); 1074 1075 /* 1076 * If set this event will not be printed again and is considered 1077 * dumped. 1078 */ 1079 if (dump) 1080 msg->msg_status = NFS4_MS_NO_DUMP; 1081 } 1082 1083 /* 1084 * Print a fact message to /var/adm/messages 1085 */ 1086 static void 1087 queue_print_fact(nfs4_debug_msg_t *msg, int dump) 1088 { 1089 nfs4_rfact_t *fp; 1090 zoneid_t zoneid; 1091 1092 fp = &msg->rmsg_u.msg_fact; 1093 zoneid = getzoneid(); 1094 1095 switch (fp->rf_type) { 1096 case RF_BADOWNER: 1097 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1098 "NFSMAPID_DOMAIN does not match the server: %s domain\n" 1099 "Please check configuration", msg->msg_srv, msg->msg_mntpt, 1100 msg->msg_srv); 1101 break; 1102 case RF_ERR: 1103 if (fp->rf_error) 1104 zcmn_err(zoneid, CE_NOTE, 1105 "![NFS4][Server: %s][Mntpt: %s]NFS op %s got " 1106 "error %d causing recovery action %s.%s", 1107 msg->msg_srv, msg->msg_mntpt, 1108 nfs4_op_to_str(fp->rf_op), fp->rf_error, 1109 nfs4_recov_action_to_str(fp->rf_action), 1110 fp->rf_reboot ? 1111 " Client also suspects that the server rebooted," 1112 " or experienced a network partition." : ""); 1113 else 1114 zcmn_err(zoneid, CE_NOTE, 1115 "![NFS4][Server: %s][Mntpt: %s]NFS op %s got " 1116 "error %s causing recovery action %s.%s", 1117 msg->msg_srv, msg->msg_mntpt, 1118 nfs4_op_to_str(fp->rf_op), 1119 nfs4_stat_to_str(fp->rf_stat4), 1120 nfs4_recov_action_to_str(fp->rf_action), 1121 fp->rf_reboot ? 1122 " Client also suspects that the server rebooted," 1123 " or experienced a network partition." : ""); 1124 break; 1125 case RF_RENEW_EXPIRED: 1126 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1127 "NFS4 renew thread detected client's " 1128 "lease has expired. Current open files/locks/IO may fail", 1129 msg->msg_srv, msg->msg_mntpt); 1130 break; 1131 case RF_SRV_NOT_RESPOND: 1132 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1133 "NFS server %s not responding; still trying\n", 1134 msg->msg_srv, msg->msg_mntpt, msg->msg_srv); 1135 break; 1136 case RF_SRV_OK: 1137 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1138 "NFS server %s ok", msg->msg_srv, msg->msg_mntpt, 1139 msg->msg_srv); 1140 break; 1141 case RF_SRVS_NOT_RESPOND: 1142 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1143 "NFS servers %s not responding; still trying", msg->msg_srv, 1144 msg->msg_mntpt, msg->msg_srv); 1145 break; 1146 case RF_SRVS_OK: 1147 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1148 "NFS servers %s ok", msg->msg_srv, msg->msg_mntpt, 1149 msg->msg_srv); 1150 break; 1151 case RF_DELMAP_CB_ERR: 1152 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1153 "NFS op %s got error %s when executing delmap on file %s " 1154 "(rnode_pt 0x%p).", 1155 msg->msg_srv, msg->msg_mntpt, nfs4_op_to_str(fp->rf_op), 1156 nfs4_stat_to_str(fp->rf_stat4), fp->rf_char1, 1157 (void *)fp->rf_rp1); 1158 break; 1159 case RF_SENDQ_FULL: 1160 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1161 "send queue to NFS server %s is full; still trying\n", 1162 msg->msg_srv, msg->msg_mntpt, msg->msg_srv); 1163 break; 1164 1165 default: 1166 zcmn_err(zoneid, CE_WARN, "!queue_print_fact: illegal fact %d", 1167 fp->rf_type); 1168 } 1169 1170 /* 1171 * If set this fact will not be printed again and is considered 1172 * dumped. 1173 */ 1174 if (dump) 1175 msg->msg_status = NFS4_MS_NO_DUMP; 1176 } 1177 1178 /* 1179 * Returns 1 if the entire queue should be dumped, 0 otherwise. 1180 */ 1181 static int 1182 id_to_dump_queue(nfs4_event_type_t id) 1183 { 1184 switch (id) { 1185 case RE_DEAD_FILE: 1186 case RE_SIGLOST: 1187 case RE_WRONGSEC: 1188 case RE_CLIENTID: 1189 return (1); 1190 default: 1191 return (0); 1192 } 1193 } 1194 1195 /* 1196 * Returns 1 if the event (but not the entire queue) should be printed; 1197 * 0 otherwise. 1198 */ 1199 static int 1200 id_to_dump_solo_event(nfs4_event_type_t id) 1201 { 1202 switch (id) { 1203 case RE_BAD_SEQID: 1204 case RE_BADHANDLE: 1205 case RE_FAIL_REMAP_LEN: 1206 case RE_FAIL_REMAP_OP: 1207 case RE_FAILOVER: 1208 case RE_OPENS_CHANGED: 1209 case RE_SIGLOST_NO_DUMP: 1210 case RE_UNEXPECTED_ACTION: 1211 case RE_UNEXPECTED_ERRNO: 1212 case RE_UNEXPECTED_STATUS: 1213 case RE_LOST_STATE_BAD_OP: 1214 case RE_REFERRAL: 1215 return (1); 1216 default: 1217 return (0); 1218 } 1219 } 1220 1221 /* 1222 * Returns 1 if the fact (but not the entire queue) should be printed; 1223 * 0 otherwise. 1224 */ 1225 static int 1226 id_to_dump_solo_fact(nfs4_fact_type_t id) 1227 { 1228 switch (id) { 1229 case RF_SRV_NOT_RESPOND: 1230 case RF_SRV_OK: 1231 case RF_SRVS_NOT_RESPOND: 1232 case RF_SRVS_OK: 1233 case RF_SENDQ_FULL: 1234 return (1); 1235 default: 1236 return (0); 1237 } 1238 } 1239 1240 /* 1241 * Update a kernel stat 1242 */ 1243 static void 1244 update_recov_kstats(nfs4_debug_msg_t *msg, mntinfo4_t *mi) 1245 { 1246 rkstat_t *rsp; 1247 1248 if (!mi->mi_recov_ksp) 1249 return; 1250 1251 rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data; 1252 1253 if (msg->msg_type == RM_EVENT) { 1254 switch (msg->rmsg_u.msg_event.re_type) { 1255 case RE_BADHANDLE: 1256 rsp->badhandle.value.ul++; 1257 break; 1258 case RE_CLIENTID: 1259 rsp->clientid.value.ul++; 1260 break; 1261 case RE_DEAD_FILE: 1262 rsp->dead_file.value.ul++; 1263 break; 1264 case RE_FAIL_RELOCK: 1265 rsp->fail_relock.value.ul++; 1266 break; 1267 case RE_FILE_DIFF: 1268 rsp->file_diff.value.ul++; 1269 break; 1270 case RE_OPENS_CHANGED: 1271 rsp->opens_changed.value.ul++; 1272 break; 1273 case RE_SIGLOST: 1274 case RE_SIGLOST_NO_DUMP: 1275 rsp->siglost.value.ul++; 1276 break; 1277 case RE_UNEXPECTED_ACTION: 1278 rsp->unexp_action.value.ul++; 1279 break; 1280 case RE_UNEXPECTED_ERRNO: 1281 rsp->unexp_errno.value.ul++; 1282 break; 1283 case RE_UNEXPECTED_STATUS: 1284 rsp->unexp_status.value.ul++; 1285 break; 1286 case RE_WRONGSEC: 1287 rsp->wrongsec.value.ul++; 1288 break; 1289 case RE_LOST_STATE_BAD_OP: 1290 rsp->lost_state_bad_op.value.ul++; 1291 break; 1292 default: 1293 break; 1294 } 1295 } else if (msg->msg_type == RM_FACT) { 1296 switch (msg->rmsg_u.msg_fact.rf_type) { 1297 case RF_BADOWNER: 1298 rsp->badowner.value.ul++; 1299 break; 1300 case RF_SRV_NOT_RESPOND: 1301 rsp->not_responding.value.ul++; 1302 break; 1303 default: 1304 break; 1305 } 1306 } 1307 } 1308 1309 /* 1310 * Dump the mi's mi_msg_list of recovery messages. 1311 */ 1312 static void 1313 dump_queue(mntinfo4_t *mi, nfs4_debug_msg_t *msg) 1314 { 1315 nfs4_debug_msg_t *tmp_msg; 1316 1317 ASSERT(mutex_owned(&mi->mi_msg_list_lock)); 1318 1319 /* update kstats */ 1320 update_recov_kstats(msg, mi); 1321 1322 /* 1323 * If we aren't supposed to dump the queue then see if we 1324 * should just print this single message, then return. 1325 */ 1326 if (!id_to_dump_queue(msg->rmsg_u.msg_event.re_type)) { 1327 if (id_to_dump_solo_event(msg->rmsg_u.msg_event.re_type)) 1328 queue_print_event(msg, mi, 0); 1329 return; 1330 } 1331 1332 /* 1333 * Write all events/facts in the queue that haven't been 1334 * previously written to disk. 1335 */ 1336 tmp_msg = list_head(&mi->mi_msg_list); 1337 while (tmp_msg) { 1338 if (tmp_msg->msg_status == NFS4_MS_DUMP) { 1339 if (tmp_msg->msg_type == RM_EVENT) 1340 queue_print_event(tmp_msg, mi, 1); 1341 else if (tmp_msg->msg_type == RM_FACT) 1342 queue_print_fact(tmp_msg, 1); 1343 } 1344 tmp_msg = list_next(&mi->mi_msg_list, tmp_msg); 1345 } 1346 } 1347 1348 /* 1349 * Places the event into mi's debug recovery message queue. Some of the 1350 * fields can be overloaded to be a generic value, depending on the event 1351 * type. These include "count", "why". 1352 */ 1353 void 1354 nfs4_queue_event(nfs4_event_type_t id, mntinfo4_t *mi, char *server1, 1355 uint_t count, vnode_t *vp1, vnode_t *vp2, nfsstat4 nfs4_error, 1356 char *why, pid_t pid, nfs4_tag_type_t tag1, nfs4_tag_type_t tag2, 1357 seqid4 seqid1, seqid4 seqid2) 1358 { 1359 nfs4_debug_msg_t *msg; 1360 nfs4_revent_t *ep; 1361 char *cur_srv; 1362 rnode4_t *rp1 = NULL, *rp2 = NULL; 1363 refstr_t *mntpt; 1364 1365 ASSERT(mi != NULL); 1366 if (vp1) 1367 rp1 = VTOR4(vp1); 1368 if (vp2) 1369 rp2 = VTOR4(vp2); 1370 1371 /* 1372 * Initialize the message with the relevant server/mount_pt/time 1373 * information. Also place the relevent event related info. 1374 */ 1375 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 1376 msg->msg_type = RM_EVENT; 1377 msg->msg_status = NFS4_MS_DUMP; 1378 ep = &msg->rmsg_u.msg_event; 1379 ep->re_type = id; 1380 gethrestime(&msg->msg_time); 1381 1382 cur_srv = mi->mi_curr_serv->sv_hostname; 1383 msg->msg_srv = strdup(cur_srv); 1384 mntpt = vfs_getmntpoint(mi->mi_vfsp); 1385 msg->msg_mntpt = strdup(refstr_value(mntpt)); 1386 refstr_rele(mntpt); 1387 1388 set_event(id, ep, mi, rp1, rp2, count, pid, nfs4_error, server1, 1389 why, tag1, tag2, seqid1, seqid2); 1390 1391 mutex_enter(&mi->mi_msg_list_lock); 1392 1393 /* if this event is the same as the last event, drop it */ 1394 if (events_same(list_tail(&mi->mi_msg_list), msg, mi)) { 1395 mutex_exit(&mi->mi_msg_list_lock); 1396 nfs4_free_msg(msg); 1397 return; 1398 } 1399 1400 /* queue the message at the end of the list */ 1401 list_insert_tail(&mi->mi_msg_list, msg); 1402 1403 dump_queue(mi, msg); 1404 1405 if (mi->mi_msg_count == nfs4_msg_max) { 1406 nfs4_debug_msg_t *rm_msg; 1407 1408 /* remove the queue'd message at the front of the list */ 1409 rm_msg = list_head(&mi->mi_msg_list); 1410 list_remove(&mi->mi_msg_list, rm_msg); 1411 mutex_exit(&mi->mi_msg_list_lock); 1412 nfs4_free_msg(rm_msg); 1413 } else { 1414 mi->mi_msg_count++; 1415 mutex_exit(&mi->mi_msg_list_lock); 1416 } 1417 } 1418 1419 /* 1420 * Places the fact into mi's debug recovery messages queue. 1421 */ 1422 void 1423 nfs4_queue_fact(nfs4_fact_type_t fid, mntinfo4_t *mi, nfsstat4 stat4, 1424 nfs4_recov_t raction, nfs_opnum4 op, bool_t reboot, char *srvname, 1425 int error, vnode_t *vp) 1426 { 1427 nfs4_debug_msg_t *msg; 1428 nfs4_rfact_t *fp; 1429 char *cur_srv; 1430 refstr_t *mntpt; 1431 1432 /* 1433 * Initialize the message with the relevant server/mount_pt/time 1434 * information. Also place the relevant fact related info. 1435 */ 1436 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 1437 msg->msg_type = RM_FACT; 1438 msg->msg_status = NFS4_MS_DUMP; 1439 gethrestime(&msg->msg_time); 1440 1441 if (srvname) 1442 cur_srv = srvname; 1443 else 1444 cur_srv = mi->mi_curr_serv->sv_hostname; 1445 1446 msg->msg_srv = strdup(cur_srv); 1447 mntpt = vfs_getmntpoint(mi->mi_vfsp); 1448 msg->msg_mntpt = strdup(refstr_value(mntpt)); 1449 refstr_rele(mntpt); 1450 1451 fp = &msg->rmsg_u.msg_fact; 1452 fp->rf_type = fid; 1453 fp->rf_status = RFS_NO_INSPECT; 1454 set_fact(fid, fp, stat4, raction, op, reboot, error, vp); 1455 1456 update_recov_kstats(msg, mi); 1457 1458 mutex_enter(&mi->mi_msg_list_lock); 1459 1460 /* if this fact is the same as the last fact, drop it */ 1461 if (facts_same(list_tail(&mi->mi_msg_list), msg, mi)) { 1462 mutex_exit(&mi->mi_msg_list_lock); 1463 nfs4_free_msg(msg); 1464 return; 1465 } 1466 1467 /* queue the message at the end of the list */ 1468 list_insert_tail(&mi->mi_msg_list, msg); 1469 1470 if (id_to_dump_solo_fact(msg->rmsg_u.msg_fact.rf_type)) 1471 queue_print_fact(msg, 0); 1472 1473 if (mi->mi_msg_count == nfs4_msg_max) { 1474 nfs4_debug_msg_t *rm_msg; 1475 1476 /* remove the queue'd message at the front of the list */ 1477 rm_msg = list_head(&mi->mi_msg_list); 1478 list_remove(&mi->mi_msg_list, rm_msg); 1479 mutex_exit(&mi->mi_msg_list_lock); 1480 nfs4_free_msg(rm_msg); 1481 } else { 1482 mi->mi_msg_count++; 1483 mutex_exit(&mi->mi_msg_list_lock); 1484 } 1485 } 1486 1487 /* 1488 * Initialize the 'mi_recov_kstat' kstat. 1489 */ 1490 void 1491 nfs4_mnt_recov_kstat_init(vfs_t *vfsp) 1492 { 1493 mntinfo4_t *mi = VFTOMI4(vfsp); 1494 kstat_t *ksp; 1495 zoneid_t zoneid = mi->mi_zone->zone_id; 1496 1497 /* 1498 * Create the version specific kstats. 1499 * 1500 * PSARC 2001/697 Contract Private Interface 1501 * All nfs kstats are under SunMC contract 1502 * Please refer to the PSARC listed above and contact 1503 * SunMC before making any changes! 1504 * 1505 * Changes must be reviewed by Solaris File Sharing 1506 * Changes must be communicated to contract-2001-697@sun.com 1507 * 1508 */ 1509 1510 if ((ksp = kstat_create_zone("nfs", getminor(vfsp->vfs_dev), 1511 "mi_recov_kstat", "misc", KSTAT_TYPE_NAMED, 1512 sizeof (rkstat_t) / sizeof (kstat_named_t), 1513 KSTAT_FLAG_WRITABLE, zoneid)) == NULL) { 1514 mi->mi_recov_ksp = NULL; 1515 zcmn_err(GLOBAL_ZONEID, CE_NOTE, 1516 "!mi_recov_kstat for mi %p failed\n", 1517 (void *)mi); 1518 return; 1519 } 1520 if (zoneid != GLOBAL_ZONEID) 1521 kstat_zone_add(ksp, GLOBAL_ZONEID); 1522 mi->mi_recov_ksp = ksp; 1523 bcopy(&rkstat_template, ksp->ks_data, sizeof (rkstat_t)); 1524 kstat_install(ksp); 1525 } 1526 1527 /* 1528 * Increment the "delay" kstat. 1529 */ 1530 void 1531 nfs4_mi_kstat_inc_delay(mntinfo4_t *mi) 1532 { 1533 rkstat_t *rsp; 1534 1535 if (!mi->mi_recov_ksp) 1536 return; 1537 1538 rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data; 1539 rsp->delay.value.ul++; 1540 } 1541 1542 /* 1543 * Increment the "no_grace" kstat. 1544 */ 1545 void 1546 nfs4_mi_kstat_inc_no_grace(mntinfo4_t *mi) 1547 { 1548 rkstat_t *rsp; 1549 1550 if (!mi->mi_recov_ksp) 1551 return; 1552 1553 rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data; 1554 rsp->no_grace.value.ul++; 1555 } 1556