1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cred.h> 27 #include <sys/kstat.h> 28 #include <sys/list.h> 29 #include <sys/systm.h> 30 #include <sys/vfs.h> 31 #include <sys/vnode.h> 32 #include <sys/cmn_err.h> 33 34 #include <nfs/nfs4_clnt.h> 35 #include <nfs/rnode4.h> 36 37 /* 38 * Recovery kstats 39 */ 40 typedef struct rkstat { 41 kstat_named_t badhandle; 42 kstat_named_t badowner; 43 kstat_named_t clientid; 44 kstat_named_t dead_file; 45 kstat_named_t delay; 46 kstat_named_t fail_relock; 47 kstat_named_t file_diff; 48 kstat_named_t no_grace; 49 kstat_named_t not_responding; 50 kstat_named_t opens_changed; 51 kstat_named_t siglost; 52 kstat_named_t unexp_action; 53 kstat_named_t unexp_errno; 54 kstat_named_t unexp_status; 55 kstat_named_t wrongsec; 56 kstat_named_t lost_state_bad_op; 57 } rkstat_t; 58 59 static rkstat_t rkstat_template = { 60 { "badhandle", KSTAT_DATA_ULONG }, 61 { "badowner", KSTAT_DATA_ULONG }, 62 { "clientid", KSTAT_DATA_ULONG }, 63 { "dead_file", KSTAT_DATA_ULONG }, 64 { "delay", KSTAT_DATA_ULONG }, 65 { "fail_relock", KSTAT_DATA_ULONG }, 66 { "file_diff", KSTAT_DATA_ULONG }, 67 { "no_grace", KSTAT_DATA_ULONG }, 68 { "not_responding", KSTAT_DATA_ULONG }, 69 { "opens_changed", KSTAT_DATA_ULONG }, 70 { "siglost", KSTAT_DATA_ULONG }, 71 { "unexp_action", KSTAT_DATA_ULONG }, 72 { "unexp_errno", KSTAT_DATA_ULONG }, 73 { "unexp_status", KSTAT_DATA_ULONG }, 74 { "wrongsec", KSTAT_DATA_ULONG }, 75 { "bad_op", KSTAT_DATA_ULONG }, 76 }; 77 78 /* maximum number of messages allowed on the mi's mi_msg_list */ 79 int nfs4_msg_max = NFS4_MSG_MAX; 80 #define DEFAULT_LEASE 180 81 82 /* 83 * Sets the appropiate fields of "ep", given "id" and various parameters. 84 * Assumes that ep's fields have been initialized to zero/null, except for 85 * re_type and mount point info, which are already set. 86 */ 87 static void 88 set_event(nfs4_event_type_t id, nfs4_revent_t *ep, mntinfo4_t *mi, 89 rnode4_t *rp1, rnode4_t *rp2, uint_t count, pid_t pid, nfsstat4 nfs4_error, 90 char *server1, char *why, nfs4_tag_type_t tag1, nfs4_tag_type_t tag2, 91 seqid4 seqid1, seqid4 seqid2) 92 { 93 int len; 94 95 switch (id) { 96 case RE_BAD_SEQID: 97 ep->re_mi = mi; 98 99 /* bad seqid'd file <path/component name> */ 100 if (rp1 && rp1->r_svnode.sv_name) 101 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 102 else 103 ep->re_char1 = NULL; 104 ep->re_rp1 = rp1; 105 106 /* for LOCK/LOCKU */ 107 ep->re_pid = pid; 108 109 ep->re_stat4 = nfs4_error; 110 ep->re_tag1 = tag1; 111 ep->re_tag2 = tag2; 112 ep->re_seqid1 = seqid1; 113 ep->re_seqid2 = seqid2; 114 break; 115 case RE_BADHANDLE: 116 ASSERT(rp1 != NULL); 117 118 /* dead file <path/component name> */ 119 if (rp1->r_svnode.sv_name) 120 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 121 else 122 ep->re_char1 = NULL; 123 ep->re_rp1 = rp1; 124 break; 125 case RE_CLIENTID: 126 ep->re_mi = mi; 127 128 /* the error we failed with */ 129 ep->re_uint = count; 130 ep->re_stat4 = nfs4_error; 131 break; 132 case RE_DEAD_FILE: 133 ASSERT(rp1 != NULL); 134 135 /* dead file <path/component name> */ 136 if (rp1->r_svnode.sv_name) 137 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 138 else 139 ep->re_char1 = NULL; 140 ep->re_rp1 = rp1; 141 142 /* why the file got killed */ 143 if (why) { 144 len = strlen(why); 145 ep->re_char2 = kmem_alloc(len + 1, KM_SLEEP); 146 bcopy(why, ep->re_char2, len); 147 ep->re_char2[len] = '\0'; 148 } else 149 ep->re_char2 = NULL; 150 151 ep->re_stat4 = nfs4_error; 152 break; 153 case RE_END: 154 /* first rnode */ 155 if (rp1 && rp1->r_svnode.sv_name) 156 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 157 else 158 ep->re_char1 = NULL; 159 ep->re_rp1 = rp1; 160 161 /* second rnode */ 162 if (rp2 && rp2->r_svnode.sv_name) 163 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 164 else 165 ep->re_char2 = NULL; 166 ep->re_rp2 = rp2; 167 168 ep->re_mi = mi; 169 break; 170 case RE_FAIL_RELOCK: 171 ASSERT(rp1 != NULL); 172 173 /* error on fail relock */ 174 ep->re_uint = count; 175 176 /* process that failed */ 177 ep->re_pid = pid; 178 179 /* nfs4 error */ 180 ep->re_stat4 = nfs4_error; 181 182 /* file <path/component name> */ 183 if (rp1->r_svnode.sv_name) 184 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 185 else 186 ep->re_char1 = NULL; 187 ep->re_rp1 = rp1; 188 break; 189 case RE_FAIL_REMAP_LEN: 190 /* length of returned filehandle */ 191 ep->re_uint = count; 192 break; 193 case RE_FAIL_REMAP_OP: 194 break; 195 case RE_FAILOVER: 196 /* server we're failing over to (if not picking original) */ 197 if (server1 != NULL) { 198 len = strlen(server1); 199 ep->re_char1 = kmem_alloc(len + 1, KM_SLEEP); 200 bcopy(server1, ep->re_char1, len); 201 ep->re_char1[len] = '\0'; 202 } else { 203 ep->re_char1 = NULL; 204 } 205 break; 206 case RE_FILE_DIFF: 207 ASSERT(rp1 != NULL); 208 209 /* dead file <path/component name> */ 210 if (rp1->r_svnode.sv_name) 211 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 212 else 213 ep->re_char1 = NULL; 214 ep->re_rp1 = rp1; 215 break; 216 case RE_LOST_STATE: 217 ep->re_uint = count; /* op number */ 218 if (rp1 && rp1->r_svnode.sv_name) 219 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 220 else 221 ep->re_char1 = NULL; 222 ep->re_rp1 = rp1; 223 if (rp2 && rp2->r_svnode.sv_name) 224 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 225 else 226 ep->re_char2 = NULL; 227 ep->re_rp2 = rp2; 228 break; 229 case RE_OPENS_CHANGED: 230 ep->re_mi = mi; 231 232 /* original number of open files */ 233 ep->re_uint = count; 234 /* new number of open files */ 235 ep->re_pid = pid; 236 break; 237 case RE_SIGLOST: 238 case RE_SIGLOST_NO_DUMP: 239 ASSERT(rp1 != NULL); 240 241 /* file <path/component name> */ 242 if (rp1->r_svnode.sv_name) 243 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 244 else 245 ep->re_char1 = NULL; 246 ep->re_rp1 = rp1; 247 ep->re_pid = pid; 248 ep->re_uint = count; 249 ep->re_stat4 = nfs4_error; 250 break; 251 case RE_START: 252 /* file <path/component name> */ 253 if (rp1 && rp1->r_svnode.sv_name) 254 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 255 else 256 ep->re_char1 = NULL; 257 ep->re_rp1 = rp1; 258 259 /* file <path/component name> */ 260 if (rp2 && rp2->r_svnode.sv_name) 261 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 262 else 263 ep->re_char2 = NULL; 264 ep->re_rp2 = rp2; 265 266 ep->re_mi = mi; 267 ep->re_uint = count; 268 break; 269 case RE_UNEXPECTED_ACTION: 270 case RE_UNEXPECTED_ERRNO: 271 /* the error that is unexpected */ 272 ep->re_uint = count; 273 break; 274 case RE_UNEXPECTED_STATUS: 275 /* nfsstat4 error */ 276 ep->re_stat4 = nfs4_error; 277 break; 278 case RE_WRONGSEC: 279 /* the error we failed with */ 280 ep->re_uint = count; 281 282 /* file <path/component name> */ 283 if (rp1 && rp1->r_svnode.sv_name) 284 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 285 else 286 ep->re_char1 = NULL; 287 ep->re_rp1 = rp1; 288 289 /* file <path/component name> */ 290 if (rp2 && rp2->r_svnode.sv_name) 291 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 292 else 293 ep->re_char2 = NULL; 294 ep->re_rp2 = rp2; 295 break; 296 case RE_LOST_STATE_BAD_OP: 297 ep->re_uint = count; /* the unexpected op */ 298 ep->re_pid = pid; 299 ep->re_rp1 = rp1; 300 if (rp1 != NULL && rp1->r_svnode.sv_name != NULL) 301 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 302 ep->re_rp2 = rp2; 303 if (rp2 != NULL && rp2->r_svnode.sv_name != NULL) 304 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 305 break; 306 default: 307 break; 308 } 309 } 310 311 /* 312 * Sets the appropiate fields of the 'fact' for this 'id'. 313 */ 314 static void 315 set_fact(nfs4_fact_type_t id, nfs4_rfact_t *fp, nfsstat4 stat4, 316 nfs4_recov_t raction, nfs_opnum4 op, bool_t reboot, int error, 317 vnode_t *vp) 318 { 319 rnode4_t *rp1; 320 321 switch (id) { 322 case RF_BADOWNER: 323 fp->rf_op = op; 324 fp->rf_reboot = reboot; 325 fp->rf_stat4 = stat4; 326 break; 327 case RF_RENEW_EXPIRED: 328 break; 329 case RF_ERR: 330 fp->rf_op = op; 331 fp->rf_reboot = reboot; 332 fp->rf_stat4 = stat4; 333 fp->rf_action = raction; 334 fp->rf_error = error; 335 break; 336 case RF_SRV_OK: 337 break; 338 case RF_SRV_NOT_RESPOND: 339 break; 340 case RF_SRVS_OK: 341 break; 342 case RF_SRVS_NOT_RESPOND: 343 gethrestime(&fp->rf_time); 344 break; 345 case RF_DELMAP_CB_ERR: 346 fp->rf_op = op; 347 fp->rf_stat4 = stat4; 348 349 rp1 = VTOR4(vp); 350 fp->rf_rp1 = rp1; 351 if (rp1 && rp1->r_svnode.sv_name) 352 fp->rf_char1 = fn_path(rp1->r_svnode.sv_name); 353 else 354 fp->rf_char1 = NULL; 355 break; 356 default: 357 zcmn_err(getzoneid(), CE_NOTE, "illegal fact %d", id); 358 break; 359 } 360 } 361 362 /* 363 * Returns 1 if the event/fact is of a successful communication 364 * from the server; 0 otherwise. 365 */ 366 static int 367 successful_comm(nfs4_debug_msg_t *msgp) 368 { 369 if (msgp->msg_type == RM_EVENT) { 370 switch (msgp->rmsg_u.msg_event.re_type) { 371 case RE_BAD_SEQID: 372 case RE_BADHANDLE: 373 case RE_FAIL_REMAP_LEN: 374 case RE_FAIL_REMAP_OP: 375 case RE_FILE_DIFF: 376 case RE_START: 377 case RE_UNEXPECTED_ACTION: 378 case RE_UNEXPECTED_ERRNO: 379 case RE_UNEXPECTED_STATUS: 380 case RE_WRONGSEC: 381 return (1); 382 case RE_CLIENTID: 383 case RE_DEAD_FILE: 384 case RE_END: 385 case RE_FAIL_RELOCK: 386 case RE_FAILOVER: 387 case RE_LOST_STATE: 388 case RE_OPENS_CHANGED: 389 case RE_SIGLOST: 390 case RE_SIGLOST_NO_DUMP: 391 case RE_LOST_STATE_BAD_OP: 392 return (0); 393 default: 394 return (0); 395 } 396 } else { 397 switch (msgp->rmsg_u.msg_fact.rf_type) { 398 case RF_BADOWNER: 399 case RF_ERR: 400 case RF_RENEW_EXPIRED: 401 case RF_SRV_OK: 402 case RF_SRVS_OK: 403 case RF_DELMAP_CB_ERR: 404 return (1); 405 case RF_SRV_NOT_RESPOND: 406 case RF_SRVS_NOT_RESPOND: 407 return (0); 408 default: 409 return (0); 410 } 411 } 412 } 413 414 /* 415 * Iterate backwards through the mi's mi_msg_list to find the earliest 416 * message that we should find relevant facts to investigate. 417 */ 418 static nfs4_debug_msg_t * 419 find_beginning(nfs4_debug_msg_t *first_msg, mntinfo4_t *mi) 420 { 421 nfs4_debug_msg_t *oldest_msg, *cur_msg; 422 time_t lease; 423 424 ASSERT(mutex_owned(&mi->mi_msg_list_lock)); 425 if (mi->mi_lease_period > 0) 426 lease = 2 * mi->mi_lease_period; 427 else 428 lease = DEFAULT_LEASE; 429 430 oldest_msg = first_msg; 431 cur_msg = list_prev(&mi->mi_msg_list, first_msg); 432 while (cur_msg && 433 first_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) { 434 oldest_msg = cur_msg; 435 if ((cur_msg->msg_type == RM_FACT) && 436 (cur_msg->rmsg_u.msg_fact.rf_type == RF_SRV_OK)) { 437 /* find where we lost contact with the server */ 438 while (cur_msg) { 439 if ((cur_msg->msg_type == RM_FACT) && 440 (cur_msg->rmsg_u.msg_fact.rf_type == 441 RF_SRV_NOT_RESPOND)) 442 break; 443 oldest_msg = cur_msg; 444 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 445 } 446 /* 447 * Find the first successful message before 448 * we lost contact with the server. 449 */ 450 if (cur_msg) { 451 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 452 while (cur_msg && !successful_comm(cur_msg)) { 453 oldest_msg = cur_msg; 454 cur_msg = list_prev(&mi->mi_msg_list, 455 cur_msg); 456 } 457 } 458 /* 459 * If we're not at the dummy head pointer, 460 * set the oldest and current message. 461 */ 462 if (cur_msg) { 463 first_msg = cur_msg; 464 oldest_msg = cur_msg; 465 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 466 } 467 } else 468 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 469 } 470 471 return (oldest_msg); 472 } 473 474 /* 475 * Returns 1 if facts have been found; 0 otherwise. 476 */ 477 static int 478 get_facts(nfs4_debug_msg_t *msgp, nfs4_rfact_t *ret_fp, char **mnt_pt, 479 mntinfo4_t *mi) 480 { 481 nfs4_debug_msg_t *cur_msg, *oldest_msg; 482 nfs4_rfact_t *cur_fp; 483 int found_a_fact = 0; 484 int len; 485 486 cur_msg = msgp; 487 488 /* find the oldest msg to search backwards to */ 489 oldest_msg = find_beginning(cur_msg, mi); 490 ASSERT(oldest_msg != NULL); 491 492 /* 493 * Create a fact sheet by searching from our current message 494 * backwards to the 'oldest_msg', recording facts along the way 495 * until we found facts that have been inspected by another time. 496 */ 497 while (cur_msg && cur_msg != list_prev(&mi->mi_msg_list, oldest_msg)) { 498 if (cur_msg->msg_type != RM_FACT) { 499 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 500 continue; 501 } 502 503 cur_fp = &cur_msg->rmsg_u.msg_fact; 504 /* 505 * If this fact has already been looked at, then so 506 * have all preceding facts. Return Now. 507 */ 508 if (cur_fp->rf_status == RFS_INSPECT) 509 return (found_a_fact); 510 511 cur_fp->rf_status = RFS_INSPECT; 512 found_a_fact = 1; 513 switch (cur_fp->rf_type) { 514 case RF_BADOWNER: 515 break; 516 case RF_ERR: 517 /* 518 * Don't want to overwrite a fact that was 519 * previously found during our current search. 520 */ 521 if (!ret_fp->rf_reboot) 522 ret_fp->rf_reboot = cur_fp->rf_reboot; 523 if (!ret_fp->rf_stat4) 524 ret_fp->rf_stat4 = cur_fp->rf_stat4; 525 if (!ret_fp->rf_action) 526 ret_fp->rf_action = cur_fp->rf_action; 527 break; 528 case RF_RENEW_EXPIRED: 529 if (cur_msg->msg_mntpt && !(*mnt_pt)) { 530 len = strlen(cur_msg->msg_mntpt) + 1; 531 *mnt_pt = kmem_alloc(len, KM_SLEEP); 532 bcopy(cur_msg->msg_mntpt, *mnt_pt, len); 533 } 534 break; 535 case RF_SRV_OK: 536 break; 537 case RF_SRV_NOT_RESPOND: 538 /* 539 * Okay to overwrite this fact as 540 * we want the earliest time. 541 */ 542 ret_fp->rf_time = cur_fp->rf_time; 543 break; 544 case RF_SRVS_OK: 545 break; 546 case RF_SRVS_NOT_RESPOND: 547 break; 548 case RF_DELMAP_CB_ERR: 549 break; 550 default: 551 zcmn_err(getzoneid(), CE_NOTE, 552 "get facts: illegal fact %d", cur_fp->rf_type); 553 break; 554 } 555 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 556 } 557 558 return (found_a_fact); 559 } 560 561 /* 562 * Returns 1 if this fact is identical to the last fact recorded 563 * (only checks for a match within the last 2 lease periods). 564 */ 565 static int 566 facts_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg, 567 mntinfo4_t *mi) 568 { 569 nfs4_rfact_t *fp1, *fp2; 570 int lease, len; 571 572 ASSERT(mutex_owned(&mi->mi_msg_list_lock)); 573 if (mi->mi_lease_period > 0) 574 lease = 2 * mi->mi_lease_period; 575 else 576 lease = DEFAULT_LEASE; 577 578 fp2 = &new_msg->rmsg_u.msg_fact; 579 580 while (cur_msg && 581 new_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) { 582 if (cur_msg->msg_type != RM_FACT) { 583 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 584 continue; 585 } 586 fp1 = &cur_msg->rmsg_u.msg_fact; 587 if (fp1->rf_type != fp2->rf_type) 588 return (0); 589 590 /* now actually compare the facts */ 591 if (fp1->rf_action != fp2->rf_action) 592 return (0); 593 if (fp1->rf_stat4 != fp2->rf_stat4) 594 return (0); 595 if (fp1->rf_reboot != fp2->rf_reboot) 596 return (0); 597 if (fp1->rf_op != fp2->rf_op) 598 return (0); 599 if (fp1->rf_time.tv_sec != fp2->rf_time.tv_sec) 600 return (0); 601 if (fp1->rf_error != fp2->rf_error) 602 return (0); 603 if (fp1->rf_rp1 != fp2->rf_rp1) 604 return (0); 605 if (cur_msg->msg_srv != NULL) { 606 if (new_msg->msg_srv == NULL) 607 return (0); 608 len = strlen(cur_msg->msg_srv); 609 if (strncmp(cur_msg->msg_srv, new_msg->msg_srv, 610 len) != 0) 611 return (0); 612 } else if (new_msg->msg_srv != NULL) { 613 return (0); 614 } 615 if (cur_msg->msg_mntpt != NULL) { 616 if (new_msg->msg_mntpt == NULL) 617 return (0); 618 len = strlen(cur_msg->msg_mntpt); 619 if (strncmp(cur_msg->msg_mntpt, new_msg->msg_mntpt, 620 len) != 0) 621 return (0); 622 } else if (new_msg->msg_mntpt != NULL) { 623 return (0); 624 } 625 if (fp1->rf_char1 != NULL) { 626 if (fp2->rf_char1 == NULL) 627 return (0); 628 len = strlen(fp1->rf_char1); 629 if (strncmp(fp1->rf_char1, fp2->rf_char1, len) != 0) 630 return (0); 631 } else if (fp2->rf_char1 != NULL) { 632 return (0); 633 } 634 return (1); 635 } 636 637 return (0); 638 } 639 640 /* 641 * Returns 1 if these two messages are identical; 0 otherwise. 642 */ 643 static int 644 events_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg, 645 mntinfo4_t *mi) 646 { 647 nfs4_revent_t *ep1, *ep2; 648 int len; 649 650 /* find the last event, bypassing all facts */ 651 while (cur_msg && cur_msg->msg_type != RM_EVENT) 652 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 653 654 if (!cur_msg) 655 return (0); 656 657 if (cur_msg->msg_type != RM_EVENT) 658 return (0); 659 660 ep1 = &cur_msg->rmsg_u.msg_event; 661 ep2 = &new_msg->rmsg_u.msg_event; 662 if (ep1->re_type != ep2->re_type) 663 return (0); 664 665 /* 666 * Since we zalloc the buffer, then the two nfs4_debug_msg's 667 * must match up even if all the fields weren't filled in 668 * the first place. 669 */ 670 if (ep1->re_mi != ep2->re_mi) 671 return (0); 672 if (ep1->re_uint != ep2->re_uint) 673 return (0); 674 if (ep1->re_stat4 != ep2->re_stat4) 675 return (0); 676 if (ep1->re_pid != ep2->re_pid) 677 return (0); 678 if (ep1->re_rp1 != ep2->re_rp1) 679 return (0); 680 if (ep1->re_rp2 != ep2->re_rp2) 681 return (0); 682 if (ep1->re_tag1 != ep2->re_tag1) 683 return (0); 684 if (ep1->re_tag2 != ep2->re_tag2) 685 return (0); 686 if (ep1->re_seqid1 != ep2->re_seqid1) 687 return (0); 688 if (ep1->re_seqid2 != ep2->re_seqid2) 689 return (0); 690 691 if (cur_msg->msg_srv != NULL) { 692 if (new_msg->msg_srv == NULL) 693 return (0); 694 len = strlen(cur_msg->msg_srv); 695 if (strncmp(cur_msg->msg_srv, new_msg->msg_srv, len) != 0) 696 return (0); 697 } else if (new_msg->msg_srv != NULL) { 698 return (0); 699 } 700 701 if (ep1->re_char1 != NULL) { 702 if (ep2->re_char1 == NULL) 703 return (0); 704 len = strlen(ep1->re_char1); 705 if (strncmp(ep1->re_char1, ep2->re_char1, len) != 0) 706 return (0); 707 } else if (ep2->re_char1 != NULL) { 708 return (0); 709 } 710 711 if (ep1->re_char2 != NULL) { 712 if (ep2->re_char2 == NULL) 713 return (0); 714 len = strlen(ep1->re_char2); 715 if (strncmp(ep1->re_char2, ep2->re_char2, len) != 0) 716 return (0); 717 } else if (ep2->re_char2 != NULL) { 718 return (0); 719 } 720 721 if (cur_msg->msg_mntpt != NULL) { 722 if (new_msg->msg_mntpt == NULL) 723 return (0); 724 len = strlen(cur_msg->msg_mntpt); 725 if (strncmp(cur_msg->msg_mntpt, cur_msg->msg_mntpt, len) != 0) 726 return (0); 727 } else if (new_msg->msg_mntpt != NULL) { 728 return (0); 729 } 730 731 return (1); 732 } 733 734 /* 735 * Free up a recovery event. 736 */ 737 static void 738 free_event(nfs4_revent_t *ep) 739 { 740 int len; 741 742 if (ep->re_char1) { 743 len = strlen(ep->re_char1) + 1; 744 kmem_free(ep->re_char1, len); 745 } 746 if (ep->re_char2) { 747 len = strlen(ep->re_char2) + 1; 748 kmem_free(ep->re_char2, len); 749 } 750 } 751 752 /* 753 * Free up a recovery fact. 754 */ 755 static void 756 free_fact(nfs4_rfact_t *fp) 757 { 758 int len; 759 760 if (fp->rf_char1) { 761 len = strlen(fp->rf_char1) + 1; 762 kmem_free(fp->rf_char1, len); 763 } 764 } 765 766 /* 767 * Free up the message. 768 */ 769 void 770 nfs4_free_msg(nfs4_debug_msg_t *msg) 771 { 772 int len; 773 774 if (msg->msg_type == RM_EVENT) 775 free_event(&msg->rmsg_u.msg_event); 776 else 777 free_fact(&msg->rmsg_u.msg_fact); 778 779 if (msg->msg_srv) { 780 len = strlen(msg->msg_srv) + 1; 781 kmem_free(msg->msg_srv, len); 782 } 783 784 if (msg->msg_mntpt) { 785 len = strlen(msg->msg_mntpt) + 1; 786 kmem_free(msg->msg_mntpt, len); 787 } 788 789 /* free up the data structure itself */ 790 kmem_free(msg, sizeof (*msg)); 791 } 792 793 /* 794 * Prints out the interesting facts for recovery events: 795 * -DEAD_FILE 796 * -SIGLOST(_NO_DUMP) 797 */ 798 static void 799 print_facts(nfs4_debug_msg_t *msg, mntinfo4_t *mi) 800 { 801 nfs4_rfact_t *fp; 802 char *mount_pt; 803 int len; 804 805 if (msg->rmsg_u.msg_event.re_type != RE_DEAD_FILE && 806 msg->rmsg_u.msg_event.re_type != RE_SIGLOST && 807 msg->rmsg_u.msg_event.re_type != RE_SIGLOST_NO_DUMP) 808 return; 809 810 fp = kmem_zalloc(sizeof (*fp), KM_SLEEP); 811 mount_pt = NULL; 812 813 if (get_facts(msg, fp, &mount_pt, mi)) { 814 char time[256]; 815 816 817 if (fp->rf_time.tv_sec) 818 (void) snprintf(time, 256, "%ld", 819 (gethrestime_sec() - fp->rf_time.tv_sec)/60); 820 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 821 "!NFS4 FACT SHEET: %s%s %s%s %s %s%s%s %s%s", 822 fp->rf_action ? "\n Action: " : "", 823 fp->rf_action ? nfs4_recov_action_to_str(fp->rf_action) : 824 "", 825 fp->rf_stat4 ? "\n NFS4 error: " : "", 826 fp->rf_stat4 ? nfs4_stat_to_str(fp->rf_stat4) : "", 827 fp->rf_reboot ? "\n Suspected server reboot. " : "", 828 fp->rf_time.tv_sec ? "\n Server was down for " : "", 829 fp->rf_time.tv_sec ? time : "", 830 fp->rf_time.tv_sec ? " minutes." : "", 831 mount_pt ? " \n Client's lease expired on mount " : "", 832 mount_pt ? mount_pt : ""); 833 } 834 835 if (mount_pt) { 836 len = strlen(mount_pt) + 1; 837 kmem_free(mount_pt, len); 838 } 839 840 /* free the fact struct itself */ 841 if (fp) 842 kmem_free(fp, sizeof (*fp)); 843 } 844 845 /* 846 * Print an event message to /var/adm/messages 847 * The last argument to this fuction dictates the repeat status 848 * of the event. If set to 1, it means that we are dumping this 849 * event and it will _never_ be printed after this time. Else if 850 * set to 0 it will be printed again. 851 */ 852 static void 853 queue_print_event(nfs4_debug_msg_t *msg, mntinfo4_t *mi, int dump) 854 { 855 nfs4_revent_t *ep; 856 zoneid_t zoneid; 857 858 ep = &msg->rmsg_u.msg_event; 859 zoneid = mi->mi_zone->zone_id; 860 861 switch (ep->re_type) { 862 case RE_BAD_SEQID: 863 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 864 "Operation %s for file %s (rnode_pt 0x%p), pid %d using " 865 "seqid %d got %s. Last good seqid was %d for " 866 "operation %s.", 867 msg->msg_srv, msg->msg_mntpt, 868 nfs4_ctags[ep->re_tag1].ct_str, ep->re_char1, 869 (void *)ep->re_rp1, ep->re_pid, ep->re_seqid1, 870 nfs4_stat_to_str(ep->re_stat4), ep->re_seqid2, 871 nfs4_ctags[ep->re_tag2].ct_str); 872 break; 873 case RE_BADHANDLE: 874 ASSERT(ep->re_rp1 != NULL); 875 if (ep->re_char1 != NULL) { 876 zcmn_err(zoneid, CE_NOTE, 877 "![NFS4][Server: %s][Mntpt: %s]" 878 "server %s said filehandle was " 879 "invalid for file: %s (rnode_pt 0x%p) on mount %s", 880 msg->msg_srv, msg->msg_mntpt, msg->msg_srv, 881 ep->re_char1, (void *)ep->re_rp1, msg->msg_mntpt); 882 } else { 883 zcmn_err(zoneid, CE_NOTE, 884 "![NFS4][Server: %s][Mntpt: %s]" 885 "server %s said filehandle was " 886 "invalid for file: (rnode_pt 0x%p) on mount %s" 887 " for fh:", msg->msg_srv, msg->msg_mntpt, 888 msg->msg_srv, (void *)ep->re_rp1, msg->msg_mntpt); 889 sfh4_printfhandle(ep->re_rp1->r_fh); 890 } 891 break; 892 case RE_CLIENTID: 893 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 894 "Can't recover clientid on mount point %s " 895 "(mi 0x%p) due to error %d (%s), for server %s. Marking " 896 "file system as unusable.", 897 msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt, 898 (void *)ep->re_mi, ep->re_uint, 899 nfs4_stat_to_str(ep->re_stat4), 900 msg->msg_srv); 901 break; 902 case RE_DEAD_FILE: 903 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 904 "File %s (rnode_pt: %p) was closed due to NFS " 905 "recovery error on server %s(%s %s)", msg->msg_srv, 906 msg->msg_mntpt, ep->re_char1, (void *)ep->re_rp1, 907 msg->msg_srv, ep->re_char2 ? ep->re_char2 : "", 908 ep->re_stat4 ? nfs4_stat_to_str(ep->re_stat4) : ""); 909 break; 910 case RE_END: 911 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 912 "NFS Recovery done for mount %s (mi 0x%p) " 913 "on server %s, rnode_pt1 %s (0x%p), " 914 "rnode_pt2 %s (0x%p)", msg->msg_srv, msg->msg_mntpt, 915 msg->msg_mntpt, (void *)ep->re_mi, msg->msg_srv, 916 ep->re_char1, (void *)ep->re_rp1, ep->re_char2, 917 (void *)ep->re_rp2); 918 break; 919 case RE_FAIL_RELOCK: 920 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 921 "Couldn't reclaim lock for pid %d for " 922 "file %s (rnode_pt 0x%p) on (server %s): error %d", 923 msg->msg_srv, msg->msg_mntpt, ep->re_pid, ep->re_char1, 924 (void *)ep->re_rp1, msg->msg_srv, 925 ep->re_uint ? ep->re_uint : ep->re_stat4); 926 break; 927 case RE_FAIL_REMAP_LEN: 928 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 929 "remap_lookup: server %s returned bad " 930 "fhandle length (%d)", msg->msg_srv, msg->msg_mntpt, 931 msg->msg_srv, ep->re_uint); 932 break; 933 case RE_FAIL_REMAP_OP: 934 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 935 "remap_lookup: didn't get expected OP_GETFH" 936 " for server %s", msg->msg_srv, msg->msg_mntpt, 937 msg->msg_srv); 938 break; 939 case RE_FAILOVER: 940 if (ep->re_char1) 941 zcmn_err(zoneid, CE_NOTE, 942 "![NFS4][Server: %s][Mntpt: %s]" 943 "failing over from %s to %s", msg->msg_srv, 944 msg->msg_mntpt, msg->msg_srv, ep->re_char1); 945 else 946 zcmn_err(zoneid, CE_NOTE, 947 "![NFS4][Server: %s][Mntpt: %s]" 948 "NFS4: failing over: selecting " 949 "original server %s", msg->msg_srv, msg->msg_mntpt, 950 msg->msg_srv); 951 break; 952 case RE_FILE_DIFF: 953 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 954 "File %s (rnode_pt: %p) on server %s was closed " 955 "and failed attempted failover since its is different than " 956 "the original file", msg->msg_srv, msg->msg_mntpt, 957 ep->re_char1, (void *)ep->re_rp1, msg->msg_srv); 958 break; 959 case RE_LOST_STATE: 960 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 961 "Lost %s request for fs %s, file %s (rnode_pt: 0x%p), " 962 "dir %s (0x%p) for server %s", msg->msg_srv, msg->msg_mntpt, 963 nfs4_op_to_str(ep->re_uint), msg->msg_mntpt, 964 ep->re_char1, (void *)ep->re_rp1, ep->re_char2, 965 (void *)ep->re_rp2, msg->msg_srv); 966 break; 967 case RE_OPENS_CHANGED: 968 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 969 "The number of open files to reopen changed " 970 "for mount %s mi 0x%p (old %d, new %d) on server %s", 971 msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt, 972 (void *)ep->re_mi, ep->re_uint, ep->re_pid, msg->msg_srv); 973 break; 974 case RE_SIGLOST: 975 case RE_SIGLOST_NO_DUMP: 976 if (ep->re_uint) 977 zcmn_err(zoneid, CE_NOTE, 978 "![NFS4][Server: %s][Mntpt: %s]" 979 "Process %d lost its locks on " 980 "file %s (rnode_pt: %p) due to NFS recovery error " 981 "(%d) on server %s.", msg->msg_srv, msg->msg_mntpt, 982 ep->re_pid, ep->re_char1, (void *)ep->re_rp1, 983 ep->re_uint, msg->msg_srv); 984 else 985 zcmn_err(zoneid, CE_NOTE, 986 "![NFS4][Server: %s][Mntpt: %s]" 987 "Process %d lost its locks on " 988 "file %s (rnode_pt: %p) due to NFS recovery error " 989 "(%s) on server %s.", msg->msg_srv, msg->msg_mntpt, 990 ep->re_pid, ep->re_char1, (void *)ep->re_rp1, 991 nfs4_stat_to_str(ep->re_stat4), msg->msg_srv); 992 break; 993 case RE_START: 994 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 995 "NFS Starting recovery for mount %s " 996 "(mi 0x%p mi_recovflags [0x%x]) on server %s, " 997 "rnode_pt1 %s (0x%p), rnode_pt2 %s (0x%p)", msg->msg_srv, 998 msg->msg_mntpt, msg->msg_mntpt, (void *)ep->re_mi, 999 ep->re_uint, msg->msg_srv, ep->re_char1, (void *)ep->re_rp1, 1000 ep->re_char2, (void *)ep->re_rp2); 1001 break; 1002 case RE_UNEXPECTED_ACTION: 1003 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1004 "NFS recovery: unexpected action (%s) on server %s", 1005 msg->msg_srv, msg->msg_mntpt, 1006 nfs4_recov_action_to_str(ep->re_uint), msg->msg_srv); 1007 break; 1008 case RE_UNEXPECTED_ERRNO: 1009 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1010 "NFS recovery: unexpected errno (%d) on server %s", 1011 msg->msg_srv, msg->msg_mntpt, ep->re_uint, msg->msg_srv); 1012 break; 1013 case RE_UNEXPECTED_STATUS: 1014 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1015 "NFS recovery: unexpected NFS status code (%s) " 1016 "on server %s", msg->msg_srv, msg->msg_mntpt, 1017 nfs4_stat_to_str(ep->re_stat4), 1018 msg->msg_srv); 1019 break; 1020 case RE_WRONGSEC: 1021 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1022 "NFS can't recover from NFS4ERR_WRONGSEC." 1023 " error %d for server %s: rnode_pt1 %s (0x%p)" 1024 " rnode_pt2 %s (0x%p)", msg->msg_srv, msg->msg_mntpt, 1025 ep->re_uint, msg->msg_srv, ep->re_char1, (void *)ep->re_rp1, 1026 ep->re_char2, (void *)ep->re_rp2); 1027 break; 1028 case RE_LOST_STATE_BAD_OP: 1029 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1030 "NFS lost state with unrecognized op (%d)." 1031 " fs %s, server %s, pid %d, file %s (rnode_pt: 0x%p), " 1032 "dir %s (0x%p)", msg->msg_srv, msg->msg_mntpt, 1033 ep->re_uint, msg->msg_mntpt, msg->msg_srv, ep->re_pid, 1034 ep->re_char1, (void *)ep->re_rp1, ep->re_char2, 1035 (void *)ep->re_rp2); 1036 break; 1037 default: 1038 zcmn_err(zoneid, CE_WARN, 1039 "!queue_print_event: illegal event %d", ep->re_type); 1040 break; 1041 } 1042 1043 print_facts(msg, mi); 1044 1045 /* 1046 * If set this event will not be printed again and is considered 1047 * dumped. 1048 */ 1049 if (dump) 1050 msg->msg_status = NFS4_MS_NO_DUMP; 1051 } 1052 1053 /* 1054 * Print a fact message to /var/adm/messages 1055 */ 1056 static void 1057 queue_print_fact(nfs4_debug_msg_t *msg, int dump) 1058 { 1059 nfs4_rfact_t *fp; 1060 zoneid_t zoneid; 1061 1062 fp = &msg->rmsg_u.msg_fact; 1063 zoneid = getzoneid(); 1064 1065 switch (fp->rf_type) { 1066 case RF_BADOWNER: 1067 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1068 "NFSMAPID_DOMAIN does not match the server: %s domain\n" 1069 "Please check configuration", msg->msg_srv, msg->msg_mntpt, 1070 msg->msg_srv); 1071 break; 1072 case RF_ERR: 1073 if (fp->rf_error) 1074 zcmn_err(zoneid, CE_NOTE, 1075 "![NFS4][Server: %s][Mntpt: %s]NFS op %s got " 1076 "error %d causing recovery action %s.%s", 1077 msg->msg_srv, msg->msg_mntpt, 1078 nfs4_op_to_str(fp->rf_op), fp->rf_error, 1079 nfs4_recov_action_to_str(fp->rf_action), 1080 fp->rf_reboot ? 1081 " Client also suspects that the server rebooted," 1082 " or experienced a network partition." : ""); 1083 else 1084 zcmn_err(zoneid, CE_NOTE, 1085 "![NFS4][Server: %s][Mntpt: %s]NFS op %s got " 1086 "error %s causing recovery action %s.%s", 1087 msg->msg_srv, msg->msg_mntpt, 1088 nfs4_op_to_str(fp->rf_op), 1089 nfs4_stat_to_str(fp->rf_stat4), 1090 nfs4_recov_action_to_str(fp->rf_action), 1091 fp->rf_reboot ? 1092 " Client also suspects that the server rebooted," 1093 " or experienced a network partition." : ""); 1094 break; 1095 case RF_RENEW_EXPIRED: 1096 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1097 "NFS4 renew thread detected client's " 1098 "lease has expired. Current open files/locks/IO may fail", 1099 msg->msg_srv, msg->msg_mntpt); 1100 break; 1101 case RF_SRV_NOT_RESPOND: 1102 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1103 "NFS server %s not responding; still trying\n", 1104 msg->msg_srv, msg->msg_mntpt, msg->msg_srv); 1105 break; 1106 case RF_SRV_OK: 1107 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1108 "NFS server %s ok", msg->msg_srv, msg->msg_mntpt, 1109 msg->msg_srv); 1110 break; 1111 case RF_SRVS_NOT_RESPOND: 1112 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1113 "NFS servers %s not responding; still trying", msg->msg_srv, 1114 msg->msg_mntpt, msg->msg_srv); 1115 break; 1116 case RF_SRVS_OK: 1117 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1118 "NFS servers %s ok", msg->msg_srv, msg->msg_mntpt, 1119 msg->msg_srv); 1120 break; 1121 case RF_DELMAP_CB_ERR: 1122 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 1123 "NFS op %s got error %s when executing delmap on file %s " 1124 "(rnode_pt 0x%p).", 1125 msg->msg_srv, msg->msg_mntpt, nfs4_op_to_str(fp->rf_op), 1126 nfs4_stat_to_str(fp->rf_stat4), fp->rf_char1, 1127 (void *)fp->rf_rp1); 1128 break; 1129 default: 1130 zcmn_err(zoneid, CE_WARN, "!queue_print_fact: illegal fact %d", 1131 fp->rf_type); 1132 } 1133 1134 /* 1135 * If set this fact will not be printed again and is considered 1136 * dumped. 1137 */ 1138 if (dump) 1139 msg->msg_status = NFS4_MS_NO_DUMP; 1140 } 1141 1142 /* 1143 * Returns 1 if the entire queue should be dumped, 0 otherwise. 1144 */ 1145 static int 1146 id_to_dump_queue(nfs4_event_type_t id) 1147 { 1148 switch (id) { 1149 case RE_DEAD_FILE: 1150 case RE_SIGLOST: 1151 case RE_WRONGSEC: 1152 case RE_CLIENTID: 1153 return (1); 1154 default: 1155 return (0); 1156 } 1157 } 1158 1159 /* 1160 * Returns 1 if the event (but not the entire queue) should be printed; 1161 * 0 otherwise. 1162 */ 1163 static int 1164 id_to_dump_solo_event(nfs4_event_type_t id) 1165 { 1166 switch (id) { 1167 case RE_BAD_SEQID: 1168 case RE_BADHANDLE: 1169 case RE_FAIL_REMAP_LEN: 1170 case RE_FAIL_REMAP_OP: 1171 case RE_FAILOVER: 1172 case RE_OPENS_CHANGED: 1173 case RE_SIGLOST_NO_DUMP: 1174 case RE_UNEXPECTED_ACTION: 1175 case RE_UNEXPECTED_ERRNO: 1176 case RE_UNEXPECTED_STATUS: 1177 case RE_LOST_STATE_BAD_OP: 1178 return (1); 1179 default: 1180 return (0); 1181 } 1182 } 1183 1184 /* 1185 * Returns 1 if the fact (but not the entire queue) should be printed; 1186 * 0 otherwise. 1187 */ 1188 static int 1189 id_to_dump_solo_fact(nfs4_fact_type_t id) 1190 { 1191 switch (id) { 1192 case RF_SRV_NOT_RESPOND: 1193 case RF_SRV_OK: 1194 case RF_SRVS_NOT_RESPOND: 1195 case RF_SRVS_OK: 1196 return (1); 1197 default: 1198 return (0); 1199 } 1200 } 1201 1202 /* 1203 * Update a kernel stat 1204 */ 1205 static void 1206 update_recov_kstats(nfs4_debug_msg_t *msg, mntinfo4_t *mi) 1207 { 1208 rkstat_t *rsp; 1209 1210 if (!mi->mi_recov_ksp) 1211 return; 1212 1213 rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data; 1214 1215 if (msg->msg_type == RM_EVENT) { 1216 switch (msg->rmsg_u.msg_event.re_type) { 1217 case RE_BADHANDLE: 1218 rsp->badhandle.value.ul++; 1219 break; 1220 case RE_CLIENTID: 1221 rsp->clientid.value.ul++; 1222 break; 1223 case RE_DEAD_FILE: 1224 rsp->dead_file.value.ul++; 1225 break; 1226 case RE_FAIL_RELOCK: 1227 rsp->fail_relock.value.ul++; 1228 break; 1229 case RE_FILE_DIFF: 1230 rsp->file_diff.value.ul++; 1231 break; 1232 case RE_OPENS_CHANGED: 1233 rsp->opens_changed.value.ul++; 1234 break; 1235 case RE_SIGLOST: 1236 case RE_SIGLOST_NO_DUMP: 1237 rsp->siglost.value.ul++; 1238 break; 1239 case RE_UNEXPECTED_ACTION: 1240 rsp->unexp_action.value.ul++; 1241 break; 1242 case RE_UNEXPECTED_ERRNO: 1243 rsp->unexp_errno.value.ul++; 1244 break; 1245 case RE_UNEXPECTED_STATUS: 1246 rsp->unexp_status.value.ul++; 1247 break; 1248 case RE_WRONGSEC: 1249 rsp->wrongsec.value.ul++; 1250 break; 1251 case RE_LOST_STATE_BAD_OP: 1252 rsp->lost_state_bad_op.value.ul++; 1253 break; 1254 default: 1255 break; 1256 } 1257 } else if (msg->msg_type == RM_FACT) { 1258 switch (msg->rmsg_u.msg_fact.rf_type) { 1259 case RF_BADOWNER: 1260 rsp->badowner.value.ul++; 1261 break; 1262 case RF_SRV_NOT_RESPOND: 1263 rsp->not_responding.value.ul++; 1264 break; 1265 default: 1266 break; 1267 } 1268 } 1269 } 1270 1271 /* 1272 * Dump the mi's mi_msg_list of recovery messages. 1273 */ 1274 static void 1275 dump_queue(mntinfo4_t *mi, nfs4_debug_msg_t *msg) 1276 { 1277 nfs4_debug_msg_t *tmp_msg; 1278 1279 ASSERT(mutex_owned(&mi->mi_msg_list_lock)); 1280 1281 /* update kstats */ 1282 update_recov_kstats(msg, mi); 1283 1284 /* 1285 * If we aren't supposed to dump the queue then see if we 1286 * should just print this single message, then return. 1287 */ 1288 if (!id_to_dump_queue(msg->rmsg_u.msg_event.re_type)) { 1289 if (id_to_dump_solo_event(msg->rmsg_u.msg_event.re_type)) 1290 queue_print_event(msg, mi, 0); 1291 return; 1292 } 1293 1294 /* 1295 * Write all events/facts in the queue that haven't been 1296 * previously written to disk. 1297 */ 1298 tmp_msg = list_head(&mi->mi_msg_list); 1299 while (tmp_msg) { 1300 if (tmp_msg->msg_status == NFS4_MS_DUMP) { 1301 if (tmp_msg->msg_type == RM_EVENT) 1302 queue_print_event(tmp_msg, mi, 1); 1303 else if (tmp_msg->msg_type == RM_FACT) 1304 queue_print_fact(tmp_msg, 1); 1305 } 1306 tmp_msg = list_next(&mi->mi_msg_list, tmp_msg); 1307 } 1308 } 1309 1310 /* 1311 * Places the event into mi's debug recovery message queue. Some of the 1312 * fields can be overloaded to be a generic value, depending on the event 1313 * type. These include "count", "why". 1314 */ 1315 void 1316 nfs4_queue_event(nfs4_event_type_t id, mntinfo4_t *mi, char *server1, 1317 uint_t count, vnode_t *vp1, vnode_t *vp2, nfsstat4 nfs4_error, 1318 char *why, pid_t pid, nfs4_tag_type_t tag1, nfs4_tag_type_t tag2, 1319 seqid4 seqid1, seqid4 seqid2) 1320 { 1321 nfs4_debug_msg_t *msg; 1322 nfs4_revent_t *ep; 1323 char *cur_srv; 1324 rnode4_t *rp1 = NULL, *rp2 = NULL; 1325 refstr_t *mntpt; 1326 1327 ASSERT(mi != NULL); 1328 if (vp1) 1329 rp1 = VTOR4(vp1); 1330 if (vp2) 1331 rp2 = VTOR4(vp2); 1332 1333 /* 1334 * Initialize the message with the relevant server/mount_pt/time 1335 * information. Also place the relevent event related info. 1336 */ 1337 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 1338 msg->msg_type = RM_EVENT; 1339 msg->msg_status = NFS4_MS_DUMP; 1340 ep = &msg->rmsg_u.msg_event; 1341 ep->re_type = id; 1342 gethrestime(&msg->msg_time); 1343 1344 cur_srv = mi->mi_curr_serv->sv_hostname; 1345 msg->msg_srv = strdup(cur_srv); 1346 mntpt = vfs_getmntpoint(mi->mi_vfsp); 1347 msg->msg_mntpt = strdup(refstr_value(mntpt)); 1348 refstr_rele(mntpt); 1349 1350 set_event(id, ep, mi, rp1, rp2, count, pid, nfs4_error, server1, 1351 why, tag1, tag2, seqid1, seqid2); 1352 1353 mutex_enter(&mi->mi_msg_list_lock); 1354 1355 /* if this event is the same as the last event, drop it */ 1356 if (events_same(list_tail(&mi->mi_msg_list), msg, mi)) { 1357 mutex_exit(&mi->mi_msg_list_lock); 1358 nfs4_free_msg(msg); 1359 return; 1360 } 1361 1362 /* queue the message at the end of the list */ 1363 list_insert_tail(&mi->mi_msg_list, msg); 1364 1365 dump_queue(mi, msg); 1366 1367 if (mi->mi_msg_count == nfs4_msg_max) { 1368 nfs4_debug_msg_t *rm_msg; 1369 1370 /* remove the queue'd message at the front of the list */ 1371 rm_msg = list_head(&mi->mi_msg_list); 1372 list_remove(&mi->mi_msg_list, rm_msg); 1373 mutex_exit(&mi->mi_msg_list_lock); 1374 nfs4_free_msg(rm_msg); 1375 } else { 1376 mi->mi_msg_count++; 1377 mutex_exit(&mi->mi_msg_list_lock); 1378 } 1379 } 1380 1381 /* 1382 * Places the fact into mi's debug recovery messages queue. 1383 */ 1384 void 1385 nfs4_queue_fact(nfs4_fact_type_t fid, mntinfo4_t *mi, nfsstat4 stat4, 1386 nfs4_recov_t raction, nfs_opnum4 op, bool_t reboot, char *srvname, 1387 int error, vnode_t *vp) 1388 { 1389 nfs4_debug_msg_t *msg; 1390 nfs4_rfact_t *fp; 1391 char *cur_srv; 1392 refstr_t *mntpt; 1393 1394 /* 1395 * Initialize the message with the relevant server/mount_pt/time 1396 * information. Also place the relevant fact related info. 1397 */ 1398 msg = kmem_zalloc(sizeof (*msg), KM_SLEEP); 1399 msg->msg_type = RM_FACT; 1400 msg->msg_status = NFS4_MS_DUMP; 1401 gethrestime(&msg->msg_time); 1402 1403 if (srvname) 1404 cur_srv = srvname; 1405 else 1406 cur_srv = mi->mi_curr_serv->sv_hostname; 1407 1408 msg->msg_srv = strdup(cur_srv); 1409 mntpt = vfs_getmntpoint(mi->mi_vfsp); 1410 msg->msg_mntpt = strdup(refstr_value(mntpt)); 1411 refstr_rele(mntpt); 1412 1413 fp = &msg->rmsg_u.msg_fact; 1414 fp->rf_type = fid; 1415 fp->rf_status = RFS_NO_INSPECT; 1416 set_fact(fid, fp, stat4, raction, op, reboot, error, vp); 1417 1418 update_recov_kstats(msg, mi); 1419 1420 mutex_enter(&mi->mi_msg_list_lock); 1421 1422 /* if this fact is the same as the last fact, drop it */ 1423 if (facts_same(list_tail(&mi->mi_msg_list), msg, mi)) { 1424 mutex_exit(&mi->mi_msg_list_lock); 1425 nfs4_free_msg(msg); 1426 return; 1427 } 1428 1429 /* queue the message at the end of the list */ 1430 list_insert_tail(&mi->mi_msg_list, msg); 1431 1432 if (id_to_dump_solo_fact(msg->rmsg_u.msg_fact.rf_type)) 1433 queue_print_fact(msg, 0); 1434 1435 if (mi->mi_msg_count == nfs4_msg_max) { 1436 nfs4_debug_msg_t *rm_msg; 1437 1438 /* remove the queue'd message at the front of the list */ 1439 rm_msg = list_head(&mi->mi_msg_list); 1440 list_remove(&mi->mi_msg_list, rm_msg); 1441 mutex_exit(&mi->mi_msg_list_lock); 1442 nfs4_free_msg(rm_msg); 1443 } else { 1444 mi->mi_msg_count++; 1445 mutex_exit(&mi->mi_msg_list_lock); 1446 } 1447 } 1448 1449 /* 1450 * Initialize the 'mi_recov_kstat' kstat. 1451 */ 1452 void 1453 nfs4_mnt_recov_kstat_init(vfs_t *vfsp) 1454 { 1455 mntinfo4_t *mi = VFTOMI4(vfsp); 1456 kstat_t *ksp; 1457 zoneid_t zoneid = mi->mi_zone->zone_id; 1458 1459 /* 1460 * Create the version specific kstats. 1461 * 1462 * PSARC 2001/697 Contract Private Interface 1463 * All nfs kstats are under SunMC contract 1464 * Please refer to the PSARC listed above and contact 1465 * SunMC before making any changes! 1466 * 1467 * Changes must be reviewed by Solaris File Sharing 1468 * Changes must be communicated to contract-2001-697@sun.com 1469 * 1470 */ 1471 1472 if ((ksp = kstat_create_zone("nfs", getminor(vfsp->vfs_dev), 1473 "mi_recov_kstat", "misc", KSTAT_TYPE_NAMED, 1474 sizeof (rkstat_t) / sizeof (kstat_named_t), 1475 KSTAT_FLAG_WRITABLE, zoneid)) == NULL) { 1476 mi->mi_recov_ksp = NULL; 1477 zcmn_err(GLOBAL_ZONEID, CE_NOTE, 1478 "!mi_recov_kstat for mi %p failed\n", 1479 (void *)mi); 1480 return; 1481 } 1482 if (zoneid != GLOBAL_ZONEID) 1483 kstat_zone_add(ksp, GLOBAL_ZONEID); 1484 mi->mi_recov_ksp = ksp; 1485 bcopy(&rkstat_template, ksp->ks_data, sizeof (rkstat_t)); 1486 kstat_install(ksp); 1487 } 1488 1489 /* 1490 * Increment the "delay" kstat. 1491 */ 1492 void 1493 nfs4_mi_kstat_inc_delay(mntinfo4_t *mi) 1494 { 1495 rkstat_t *rsp; 1496 1497 if (!mi->mi_recov_ksp) 1498 return; 1499 1500 rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data; 1501 rsp->delay.value.ul++; 1502 } 1503 1504 /* 1505 * Increment the "no_grace" kstat. 1506 */ 1507 void 1508 nfs4_mi_kstat_inc_no_grace(mntinfo4_t *mi) 1509 { 1510 rkstat_t *rsp; 1511 1512 if (!mi->mi_recov_ksp) 1513 return; 1514 1515 rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data; 1516 rsp->no_grace.value.ul++; 1517 } 1518