1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * nfs log - read buffer file and return structs in usable form 31 */ 32 33 #include <ctype.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <stddef.h> 37 #include <string.h> 38 #include <fcntl.h> 39 #include <unistd.h> 40 #include <signal.h> 41 #include <sys/types.h> 42 #include <sys/param.h> 43 #include <sys/stat.h> 44 #include <sys/utsname.h> 45 #include <sys/mman.h> 46 #include <strings.h> 47 #include <errno.h> 48 #include <syslog.h> 49 #include <time.h> 50 #include <limits.h> 51 #include <libintl.h> 52 #include <values.h> 53 #include <search.h> 54 #include <pwd.h> 55 #include <netdb.h> 56 #include <rpc/rpc.h> 57 #include <netconfig.h> 58 #include <netdir.h> 59 #include <nfs/nfs_sec.h> 60 #include <nfs/export.h> 61 #include <rpc/auth.h> 62 #include <rpc/svc.h> 63 #include <rpc/xdr.h> 64 #include <rpc/clnt.h> 65 #include <nfs/nfs.h> 66 #include <nfs/nfs_log.h> 67 #include "nfslogd.h" 68 69 #define MAX_LRS_READ_AHEAD 2048 70 #define MAX_RECS_TO_DELAY 32768 71 72 static int nfslog_init_buf(char *, struct nfslog_buf *, int *); 73 static void nfslog_free_buf(struct nfslog_buf *, int); 74 static struct nfslog_lr *nfslog_read_buffer(struct nfslog_buf *); 75 static void free_lrp(struct nfslog_lr *); 76 static struct nfslog_lr *remove_lrp_from_lb(struct nfslog_buf *, 77 struct nfslog_lr *); 78 static void insert_lrp_to_lb(struct nfslog_buf *, 79 struct nfslog_lr *); 80 static void nfslog_rewrite_bufheader(struct nfslog_buf *); 81 82 /* 83 * Treat the provided path name as an NFS log buffer file. 84 * Allocate a data structure for its handling and initialize it. 85 * *error contains the previous error condition encountered for 86 * this object. This value can be used to avoid printing the last 87 * error endlessly. 88 * It will set *error appropriately after processing. 89 */ 90 struct nfslog_buf * 91 nfslog_open_buf(char *bufpath, int *error) 92 { 93 struct nfslog_buf *lbp = NULL; 94 95 if (bufpath == NULL) { 96 *error = EINVAL; 97 return (NULL); 98 } 99 100 if ((lbp = malloc(sizeof (struct nfslog_buf))) == NULL) { 101 *error = ENOMEM; 102 return (NULL); 103 } 104 bzero(lbp, sizeof (struct nfslog_buf)); 105 106 if (nfslog_init_buf(bufpath, lbp, error)) { 107 free(lbp); 108 return (NULL); 109 } 110 return (lbp); 111 } 112 113 /* 114 * Free the log buffer struct with all of its baggage and free the data struct 115 */ 116 void 117 nfslog_close_buf(struct nfslog_buf *lbp, int close_quick) 118 { 119 nfslog_free_buf(lbp, close_quick); 120 free(lbp); 121 } 122 123 /* 124 * Set up the log buffer struct; simple things are opening and locking 125 * the buffer file and then on to mmap()ing it for later use by the 126 * XDR decode path. Make sure to read the buffer header before 127 * returning so that we will be at the first true log record. 128 * 129 * *error contains the last error encountered on this object. It can 130 * be used to avoid reporting the same error endlessly. It is reset 131 * to the current error code on return. 132 */ 133 static int 134 nfslog_init_buf(char *bufpath, struct nfslog_buf *lbp, int *error) 135 { 136 struct stat sb; 137 int preverror = *error; 138 139 lbp->next = lbp; 140 lbp->prev = lbp; 141 /* 142 * set these values so that the free routine will know what to do 143 */ 144 lbp->mmap_addr = (intptr_t)MAP_FAILED; 145 lbp->last_rec_id = MAXINT - 1; 146 lbp->bh.bh_length = 0; 147 lbp->bh_lrp = NULL; 148 lbp->num_lrps = 0; 149 lbp->lrps = NULL; 150 lbp->last_record_offset = 0; 151 lbp->prp = NULL; 152 lbp->num_pr_queued = 0; 153 154 lbp->bufpath = strdup(bufpath); 155 if (lbp->bufpath == NULL) { 156 *error = ENOMEM; 157 if (preverror != *error) { 158 syslog(LOG_ERR, gettext("Cannot strdup '%s': %s"), 159 bufpath, strerror(*error)); 160 } 161 nfslog_free_buf(lbp, FALSE); 162 return (*error); 163 } 164 165 if ((lbp->fd = open(bufpath, O_RDWR)) < 0) { 166 *error = errno; 167 if (preverror != *error) { 168 syslog(LOG_ERR, gettext("Cannot open '%s': %s"), 169 bufpath, strerror(*error)); 170 } 171 nfslog_free_buf(lbp, FALSE); 172 return (*error); 173 } 174 175 /* 176 * Lock the entire buffer file to prevent conflicting access. 177 * We get a write lock because we want only 1 process to be 178 * generating records from it. 179 */ 180 lbp->fl.l_type = F_WRLCK; 181 lbp->fl.l_whence = SEEK_SET; /* beginning of file */ 182 lbp->fl.l_start = (offset_t)0; 183 lbp->fl.l_len = 0; /* entire file */ 184 lbp->fl.l_sysid = 0; 185 lbp->fl.l_pid = 0; 186 if (fcntl(lbp->fd, F_SETLKW, &lbp->fl) == -1) { 187 *error = errno; 188 if (preverror != *error) { 189 syslog(LOG_ERR, gettext("Cannot lock (%s): %s"), 190 bufpath, strerror(*error)); 191 } 192 nfslog_free_buf(lbp, FALSE); 193 return (*error); 194 } 195 196 if (fstat(lbp->fd, &sb)) { 197 *error = errno; 198 if (preverror != *error) { 199 syslog(LOG_ERR, gettext("Cannot stat (%s): %s"), 200 bufpath, strerror(*error)); 201 } 202 nfslog_free_buf(lbp, FALSE); 203 return (*error); 204 } 205 lbp->filesize = sb.st_size; 206 207 lbp->mmap_addr = (intptr_t)mmap(0, lbp->filesize, PROT_READ|PROT_WRITE, 208 MAP_SHARED|MAP_NORESERVE, lbp->fd, 0); 209 210 /* This is part of the duality of the use of either mmap()|read() */ 211 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) { 212 lbp->next_rec = 0; 213 } else { 214 lbp->next_rec = lbp->mmap_addr; 215 } 216 217 /* Read the header */ 218 if ((lbp->bh_lrp = nfslog_read_buffer(lbp)) == NULL) { 219 *error = EIO; 220 if (preverror != *error) { 221 syslog(LOG_ERR, gettext( 222 "error in reading file '%s': %s"), 223 bufpath, strerror(EIO)); 224 } 225 nfslog_free_buf(lbp, FALSE); 226 return (*error); 227 } 228 229 if (!xdr_nfslog_buffer_header(&lbp->bh_lrp->xdrs, &lbp->bh)) { 230 *error = EIO; 231 if (preverror != *error) { 232 syslog(LOG_ERR, gettext( 233 "error in reading file '%s': %s"), 234 bufpath, strerror(*error)); 235 } 236 nfslog_free_buf(lbp, FALSE); 237 return (*error); 238 } 239 240 /* 241 * Set the pointer to the next record based on the buffer header. 242 * 'lbp->bh.bh_offset' contains the offset of where to begin 243 * processing relative to the buffer header. 244 */ 245 lbp->next_rec += lbp->bh.bh_offset; 246 247 /* 248 * If we are going to be using read() for file data, then we may 249 * have to adjust the current file pointer to take into account 250 * a starting point other than the beginning of the file. 251 * If mmap is being used, this is taken care of as a side effect of 252 * setting up the value of next_rec. 253 */ 254 if (lbp->mmap_addr == (intptr_t)MAP_FAILED && lbp->next_rec != 0) { 255 (void) lseek(lbp->fd, lbp->next_rec, SEEK_SET); 256 /* This is a special case of setting the last_record_offset */ 257 lbp->last_record_offset = lbp->next_rec; 258 } else { 259 lbp->last_record_offset = lbp->next_rec - lbp->mmap_addr; 260 } 261 262 return (*error = 0); 263 } 264 265 /* 266 * Free the nfslog buffer and its associated allocations 267 */ 268 static void 269 nfslog_free_buf(struct nfslog_buf *lbp, int close_quick) 270 { 271 XDR xdrs; 272 int error; 273 caddr_t buffer; 274 struct nfslog_lr *lrp, *lrp_next; 275 struct processed_records *prp, *tprp; 276 277 /* work to free the offset records and rewrite header */ 278 if (lbp->prp) { 279 if (lbp->last_record_offset == lbp->prp->start_offset) { 280 281 /* adjust the offset for the entire buffer */ 282 lbp->last_record_offset = 283 lbp->prp->start_offset + lbp->prp->len; 284 285 nfslog_rewrite_bufheader(lbp); 286 } 287 if (close_quick) 288 return; 289 prp = lbp->prp; 290 do { 291 tprp = prp->next; 292 free(prp); 293 prp = tprp; 294 } while (lbp->prp != prp); 295 } 296 297 if (close_quick) 298 return; 299 300 /* Take care of the queue log records first */ 301 if (lbp->lrps != NULL) { 302 lrp = lbp->lrps; 303 do { 304 lrp_next = lrp->next; 305 nfslog_free_logrecord(lrp, FALSE); 306 lrp = lrp_next; 307 } while (lrp != lbp->lrps); 308 lbp->lrps = NULL; 309 } 310 311 /* The buffer header was decoded and needs to be freed */ 312 if (lbp->bh.bh_length != 0) { 313 buffer = (lbp->bh_lrp->buffer != NULL ? 314 lbp->bh_lrp->buffer : (caddr_t)lbp->mmap_addr); 315 xdrmem_create(&xdrs, buffer, lbp->bh_lrp->recsize, XDR_FREE); 316 (void) xdr_nfslog_buffer_header(&xdrs, &lbp->bh); 317 lbp->bh.bh_length = 0; 318 } 319 320 /* get rid of the bufheader lrp */ 321 if (lbp->bh_lrp != NULL) { 322 free_lrp(lbp->bh_lrp); 323 lbp->bh_lrp = NULL; 324 } 325 326 /* Clean up for mmap() usage */ 327 if (lbp->mmap_addr != (intptr_t)MAP_FAILED) { 328 if (munmap((void *)lbp->mmap_addr, lbp->filesize)) { 329 error = errno; 330 syslog(LOG_ERR, gettext("munmap failed: %s: %s"), 331 (lbp->bufpath != NULL ? lbp->bufpath : ""), 332 strerror(error)); 333 } 334 lbp->mmap_addr = (intptr_t)MAP_FAILED; 335 } 336 337 /* Finally close the buffer file */ 338 if (lbp->fd >= 0) { 339 lbp->fl.l_type = F_UNLCK; 340 if (fcntl(lbp->fd, F_SETLK, &lbp->fl) == -1) { 341 error = errno; 342 syslog(LOG_ERR, 343 gettext("Cannot unlock file %s: %s"), 344 (lbp->bufpath != NULL ? lbp->bufpath : ""), 345 strerror(error)); 346 } 347 (void) close(lbp->fd); 348 lbp->fd = -1; 349 } 350 if (lbp->bufpath != NULL) 351 free(lbp->bufpath); 352 } 353 354 /* 355 * We are reading a record from the log buffer file. Since we are reading 356 * an XDR stream, we first have to read the first integer to determine 357 * how much to read in whole for this record. Our preference is to use 358 * mmap() but if failed initially we will be using read(). Need to be 359 * careful about proper initialization of the log record both from a field 360 * perspective and for XDR decoding. 361 */ 362 static struct nfslog_lr * 363 nfslog_read_buffer(struct nfslog_buf *lbp) 364 { 365 XDR xdrs; 366 unsigned int record_size; 367 struct nfslog_lr *lrp; 368 char *sizebuf, tbuf[16]; 369 caddr_t buffer; 370 offset_t next_rec; 371 372 lrp = (struct nfslog_lr *)malloc(sizeof (*lrp)); 373 bzero(lrp, sizeof (*lrp)); 374 375 /* Check to see if mmap worked */ 376 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) { 377 /* 378 * EOF or other failure; we don't try to recover, just return 379 */ 380 if (read(lbp->fd, tbuf, BYTES_PER_XDR_UNIT) <= 0) { 381 free_lrp(lrp); 382 return (NULL); 383 } 384 sizebuf = tbuf; 385 } else { 386 /* EOF check for the mmap() case */ 387 if (lbp->filesize <= lbp->next_rec - lbp->mmap_addr) { 388 free_lrp(lrp); 389 return (NULL); 390 } 391 sizebuf = (char *)(uintptr_t)lbp->next_rec; 392 } 393 394 /* We have to XDR the first int so we know how much is in this record */ 395 xdrmem_create(&xdrs, sizebuf, sizeof (unsigned int), XDR_DECODE); 396 397 if (!xdr_u_int(&xdrs, &record_size)) { 398 free_lrp(lrp); 399 return (NULL); 400 } 401 402 lrp->recsize = record_size; 403 next_rec = lbp->next_rec + lrp->recsize; 404 405 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) { 406 /* 407 * Read() case - shouldn't be used very much. 408 * Note: The 'buffer' field is used later on 409 * to determine which method is being used mmap()|read() 410 */ 411 if (lbp->filesize < next_rec) { 412 /* partial record from buffer */ 413 syslog(LOG_ERR, gettext( 414 "Last partial record in work buffer %s " 415 "discarded\n"), lbp->bufpath); 416 free_lrp(lrp); 417 return (NULL); 418 } 419 420 if ((lrp->buffer = malloc(lrp->recsize)) == NULL) { 421 free_lrp(lrp); 422 return (NULL); 423 } 424 bcopy(sizebuf, lrp->buffer, BYTES_PER_XDR_UNIT); 425 if (read(lbp->fd, &lrp->buffer[BYTES_PER_XDR_UNIT], 426 lrp->recsize - BYTES_PER_XDR_UNIT) <= 0) { 427 free_lrp(lrp); 428 return (NULL); 429 } 430 } else if (lbp->filesize < next_rec - lbp->mmap_addr) { 431 /* partial record from buffer */ 432 syslog(LOG_ERR, gettext( 433 "Last partial record in work buffer %s " 434 "discarded\n"), lbp->bufpath); 435 free_lrp(lrp); 436 return (NULL); 437 } 438 439 440 /* other initializations */ 441 lrp->next = lrp->prev = lrp; 442 /* Keep track of the offset at which this record was read */ 443 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) 444 lrp->f_offset = lbp->next_rec; 445 else 446 lrp->f_offset = lbp->next_rec - lbp->mmap_addr; 447 /* This is the true address of the record */ 448 lrp->record = lbp->next_rec; 449 lrp->xdrargs = lrp->xdrres = NULL; 450 lrp->lbp = lbp; 451 452 /* Here is the logic for mmap() vs. read() */ 453 buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record); 454 455 /* Setup for the 'real' XDR decode of the entire record */ 456 xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_DECODE); 457 458 /* calculate the offset for the next record */ 459 lbp->next_rec = next_rec; 460 461 return (lrp); 462 } 463 464 /* 465 * Simple removal of the log record from the log buffer queue. 466 * Make sure to manage the count of records queued. 467 */ 468 static struct nfslog_lr * 469 remove_lrp_from_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp) 470 { 471 if (lbp->lrps == lrp) { 472 if (lbp->lrps == lbp->lrps->next) { 473 lbp->lrps = NULL; 474 } else { 475 lbp->lrps = lrp->next; 476 remque(lrp); 477 } 478 } else { 479 remque(lrp); 480 } 481 lbp->num_lrps--; 482 return (lrp); 483 } 484 485 /* 486 * Insert a log record struct on the log buffer struct. The log buffer 487 * has a pointer to the head of a queue of log records that have been 488 * read from the buffer file but have not been processed yet because 489 * the record id did not match the sequence desired for processing. 490 * The insertion must be in the 'correct'/sorted order which adds 491 * to the complexity of this function. 492 */ 493 static void 494 insert_lrp_to_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp) 495 { 496 int ins_rec_id = lrp->log_record.re_header.rh_rec_id; 497 struct nfslog_lr *curlrp; 498 499 if (lbp->lrps == NULL) { 500 /* that was easy */ 501 lbp->lrps = lrp; 502 } else { 503 /* 504 * Does this lrp go before the first on the list? 505 * If so, do the insertion by hand since insque is not 506 * as flexible when queueing an element to the head of 507 * a list. 508 */ 509 if (ins_rec_id < lbp->lrps->log_record.re_header.rh_rec_id) { 510 lrp->next = lbp->lrps; 511 lrp->prev = lbp->lrps->prev; 512 lbp->lrps->prev->next = lrp; 513 lbp->lrps->prev = lrp; 514 lbp->lrps = lrp; 515 } else { 516 /* 517 * Search the queue for the correct insertion point. 518 * Be careful about the insque so that the record 519 * ends up in the right place. 520 */ 521 curlrp = lbp->lrps; 522 do { 523 if (ins_rec_id < 524 curlrp->next->log_record.re_header.rh_rec_id) 525 break; 526 curlrp = curlrp->next; 527 } while (curlrp != lbp->lrps); 528 if (curlrp == lbp->lrps) 529 insque(lrp, lbp->lrps->prev); 530 else 531 insque(lrp, curlrp); 532 } 533 } 534 /* always keep track of how many we have */ 535 lbp->num_lrps++; 536 } 537 538 /* 539 * We are rewriting the buffer header at the start of the log buffer 540 * for the sole purpose of resetting the bh_offset field. This is 541 * supposed to represent the progress that the nfslogd daemon has made 542 * in its processing of the log buffer file. 543 * 'lbp->last_record_offset' contains the absolute offset of the end 544 * of the last element processed. The on-disk buffer offset is relative 545 * to the buffer header, therefore we subtract the length of the buffer 546 * header from the absolute offset. 547 */ 548 static void 549 nfslog_rewrite_bufheader(struct nfslog_buf *lbp) 550 { 551 XDR xdrs; 552 nfslog_buffer_header bh; 553 /* size big enough for buffer header encode */ 554 #define XBUFSIZE 128 555 char buffer[XBUFSIZE]; 556 unsigned int wsize; 557 558 /* 559 * if version 1 buffer is large and the current offset cannot be 560 * represented, then don't update the offset in the buffer. 561 */ 562 if (lbp->bh.bh_flags & NFSLOG_BH_OFFSET_OVERFLOW) { 563 /* No need to update the header - offset too big */ 564 return; 565 } 566 /* 567 * build the buffer header from the original that was saved 568 * on initialization; note that the offset is taken from the 569 * last record processed (the last offset that represents 570 * all records processed without any holes in the processing) 571 */ 572 bh = lbp->bh; 573 574 /* 575 * if version 1 buffer is large and the current offset cannot be 576 * represented in 32 bits, then save only the last valid offset 577 * in the buffer and mark the flags to indicate that. 578 */ 579 if ((bh.bh_version > 1) || 580 (lbp->last_record_offset - bh.bh_length < UINT32_MAX)) { 581 bh.bh_offset = lbp->last_record_offset - bh.bh_length; 582 } else { 583 /* don't update the offset in the buffer */ 584 bh.bh_flags |= NFSLOG_BH_OFFSET_OVERFLOW; 585 lbp->bh.bh_flags = bh.bh_flags; 586 syslog(LOG_ERR, gettext( 587 "nfslog_rewrite_bufheader: %s: offset does not fit " 588 "in a 32 bit field\n"), lbp->bufpath); 589 } 590 591 xdrmem_create(&xdrs, buffer, XBUFSIZE, XDR_ENCODE); 592 593 if (!xdr_nfslog_buffer_header(&xdrs, &bh)) { 594 syslog(LOG_ERR, gettext( 595 "error in re-writing buffer file %s header\n"), 596 lbp->bufpath); 597 return; 598 } 599 600 wsize = xdr_getpos(&xdrs); 601 602 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) { 603 /* go to the beginning of the file */ 604 (void) lseek(lbp->fd, 0, SEEK_SET); 605 (void) write(lbp->fd, buffer, wsize); 606 (void) lseek(lbp->fd, lbp->next_rec, SEEK_SET); 607 (void) fsync(lbp->fd); 608 } else { 609 bcopy(buffer, (void *)lbp->mmap_addr, wsize); 610 (void) msync((void *)lbp->mmap_addr, wsize, MS_SYNC); 611 } 612 } 613 614 /* 615 * With the provided lrp, we will take and 'insert' the range that the 616 * record covered in the buffer file into a list of processed ranges 617 * for the buffer file. These ranges represent the records processed 618 * but not 'marked' in the buffer header as being processed. 619 * This insertion process is being done for two reasons. The first is that 620 * we do not want to pay the performance penalty of re-writing the buffer header 621 * for each record that we process. The second reason is that the records 622 * may be processed out of order because of the unique ids. This will occur 623 * if the kernel has written the records to the buffer file out of order. 624 * The read routine will 'sort' them as the records are read. 625 * 626 * We do not want to re-write the buffer header such that a record is 627 * represented and being processed when it has not been. In the case 628 * that the nfslogd daemon restarts processing and the buffer header 629 * has been re-written improperly, some records could be skipped. 630 * We will be taking the conservative approach and only writing buffer 631 * header offsets when the entire offset range has been processed. 632 */ 633 static void 634 nfslog_ins_last_rec_processed(struct nfslog_lr *lrp) 635 { 636 struct processed_records *prp, *tp; 637 638 /* init the data struct as if it were the only one */ 639 prp = malloc(sizeof (*prp)); 640 prp->next = prp->prev = prp; 641 prp->start_offset = lrp->f_offset; 642 prp->len = lrp->recsize; 643 prp->num_recs = 1; 644 645 /* always add since we know we are going to insert */ 646 lrp->lbp->num_pr_queued++; 647 648 /* Is this the first one? If so, take the easy way out */ 649 if (lrp->lbp->prp == NULL) { 650 lrp->lbp->prp = prp; 651 } else { 652 /* sort on insertion... */ 653 tp = lrp->lbp->prp; 654 do { 655 if (prp->start_offset < tp->start_offset) 656 break; 657 tp = tp->next; 658 } while (tp != lrp->lbp->prp); 659 /* insert where appropriate (before the one we found */ 660 insque(prp, tp->prev); 661 /* 662 * special case where the insertion was done at the 663 * head of the list 664 */ 665 if (tp == lrp->lbp->prp && prp->start_offset < tp->start_offset) 666 lrp->lbp->prp = prp; 667 668 /* 669 * now that the entry is in place, we need to see if it can 670 * be combined with the previous or following entries. 671 * combination is done by adding to the length. 672 */ 673 if (prp->start_offset == 674 (prp->prev->start_offset + prp->prev->len)) { 675 tp = prp->prev; 676 remque(prp); 677 tp->len += prp->len; 678 tp->num_recs += prp->num_recs; 679 free(prp); 680 prp = tp; 681 } 682 if (prp->next->start_offset == 683 (prp->start_offset + prp->len)) { 684 prp->len += prp->next->len; 685 prp->num_recs += prp->next->num_recs; 686 tp = prp->next; 687 remque(tp); 688 free(tp); 689 } 690 } 691 692 if (lrp->lbp->num_pr_queued > MAX_RECS_TO_DELAY) { 693 prp = lrp->lbp->prp; 694 if (lrp->lbp->last_record_offset == 695 prp->start_offset) { 696 697 /* adjust the offset for the entire buffer */ 698 lrp->lbp->last_record_offset = 699 prp->start_offset + prp->len; 700 701 nfslog_rewrite_bufheader(lrp->lbp); 702 703 tp = prp->next; 704 if (tp != prp) 705 remque(prp); 706 else 707 tp = NULL; 708 lrp->lbp->prp = tp; 709 lrp->lbp->num_pr_queued -= prp->num_recs; 710 free(prp); 711 } 712 } 713 } 714 715 /* 716 * nfslog_get_logrecord is responsible for retrieving the next log record 717 * from the buffer file. This would normally be very straightforward but there 718 * is the added complexity of attempting to order the requests coming out of 719 * the buffer file. The fundamental problems is that the kernel nfs logging 720 * functionality does not guarantee that the records were written to the file 721 * in the order that the NFS server processed them. This can cause a problem 722 * in the fh -> pathname mapping in the case were a lookup for a file comes 723 * later in the buffer file than other operations on the lookup's target. 724 * The fh mapping database will not have an entry and will therefore not 725 * be able to map the fh to a name. 726 * 727 * So to solve this problem, the kernel nfs logging code tags each record 728 * with a monotonically increasing id and is guaranteed to be allocated 729 * in the order that the requests were processed. Realize however that 730 * this processing guarantee is essentially for one thread on one client. 731 * This id mechanism does not order all requests since it is only the 732 * single client/single thread case that is most concerning to us here. 733 * 734 * This function will do the 'sorting' of the requests as they are 735 * read from the buffer file. The sorting needs to take into account 736 * that some ids may be missing (operations not logged but ids allocated) 737 * and that the id field will eventually wrap over MAXINT. 738 * 739 * Complexity to solve the fh -> pathname mapping issue. 740 */ 741 struct nfslog_lr * 742 nfslog_get_logrecord(struct nfslog_buf *lbp) 743 { 744 /* figure out what the next should be if the world were perfect */ 745 unsigned int next_rec_id = lbp->last_rec_id + 1; 746 struct nfslog_lr *lrp = NULL; 747 748 /* 749 * First we check the queued records on the log buffer struct 750 * to see if the one we want is there. The records are sorted 751 * on the record id during the insertions to the queue so that 752 * this check is easy. 753 */ 754 if (lbp->lrps != NULL) { 755 /* Does the first record match ? */ 756 if (lbp->lrps->log_record.re_header.rh_rec_id == next_rec_id) { 757 lrp = remove_lrp_from_lb(lbp, lbp->lrps); 758 lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id; 759 } else { 760 /* 761 * Here we are checking for wrap of the record id 762 * since it is an unsigned in. The idea is that 763 * if there is a huge span between what we expect 764 * and what is queued then we need to flush/empty 765 * the queued records first. 766 */ 767 if (next_rec_id < 768 lbp->lrps->log_record.re_header.rh_rec_id && 769 ((lbp->lrps->log_record.re_header.rh_rec_id - 770 next_rec_id) > (MAXINT / 2))) { 771 772 lrp = remove_lrp_from_lb(lbp, lbp->lrps); 773 lbp->last_rec_id = 774 lrp->log_record.re_header.rh_rec_id; 775 } 776 } 777 } 778 /* 779 * So the first queued record didn't match (or there were no queued 780 * records to look at). Now we go to the buffer file looking for 781 * the expected log record based on its id. We loop looking for 782 * a matching records and save/queue the records that don't match. 783 * Note that we will queue a maximum number to handle the case 784 * of a missing record id or a queue that is very confused. We don't 785 * want to consume too much memory. 786 */ 787 while (lrp == NULL) { 788 /* Have we queued too many for this buffer? */ 789 if (lbp->num_lrps >= MAX_LRS_READ_AHEAD) { 790 lrp = remove_lrp_from_lb(lbp, lbp->lrps); 791 lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id; 792 break; 793 } 794 /* 795 * Get a record from the buffer file. If none are available, 796 * this is probably and EOF condition (could be a read error 797 * as well but that is masked. :-(). No records in the 798 * file means that we need to pull any queued records 799 * so that we don't miss any in the processing. 800 */ 801 if ((lrp = nfslog_read_buffer(lbp)) == NULL) { 802 if (lbp->lrps != NULL) { 803 lrp = remove_lrp_from_lb(lbp, lbp->lrps); 804 lbp->last_rec_id = 805 lrp->log_record.re_header.rh_rec_id; 806 } else { 807 return (NULL); /* it was really and EOF */ 808 } 809 } else { 810 /* 811 * Just read a record from the buffer file and now we 812 * need to XDR the record header so that we can take 813 * a look at the record id. 814 */ 815 if (!xdr_nfslog_request_record(&lrp->xdrs, 816 &lrp->log_record)) { 817 /* Free and return EOF/NULL on error */ 818 nfslog_free_logrecord(lrp, FALSE); 819 return (NULL); 820 } 821 /* 822 * If the new record is less than or matches the 823 * expected record id, then we return this record 824 */ 825 if (lrp->log_record.re_header.rh_rec_id <= 826 next_rec_id) { 827 828 lbp->last_rec_id = 829 lrp->log_record.re_header.rh_rec_id; 830 } else { 831 /* 832 * This is not the one we were looking 833 * for; queue it for later processing 834 * (queueing sorts on record id) 835 */ 836 insert_lrp_to_lb(lbp, lrp); 837 lrp = NULL; 838 } 839 } 840 } 841 return (lrp); 842 } 843 844 /* 845 * Free the log record provided. 846 * This is complex because the associated XDR streams also need to be freed 847 * since allocation could have occured during the DECODE phase. The record 848 * header, args and results need to be XDR_FREEd. The xdr funtions will 849 * be provided if a free needs to be done. 850 * 851 * Note that caller tells us if the record being freed was processed. 852 * If so, then the buffer header should be updated. Updating the buffer 853 * header keeps track of where the nfslogd daemon left off in its processing 854 * if it is unable to complete the entire file. 855 */ 856 void 857 nfslog_free_logrecord(struct nfslog_lr *lrp, bool_t processing_complete) 858 { 859 caddr_t buffer; 860 nfslog_request_record *reqrec; 861 862 if (processing_complete) { 863 nfslog_ins_last_rec_processed(lrp); 864 } 865 866 reqrec = &lrp->log_record; 867 868 buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record); 869 870 xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_FREE); 871 872 (void) xdr_nfslog_request_record(&lrp->xdrs, reqrec); 873 874 if (lrp->xdrargs != NULL && reqrec->re_rpc_arg) 875 (*lrp->xdrargs)(&lrp->xdrs, reqrec->re_rpc_arg); 876 877 if (reqrec->re_rpc_arg) 878 free(reqrec->re_rpc_arg); 879 880 if (lrp->xdrres != NULL && reqrec->re_rpc_res) 881 (*lrp->xdrres)(&lrp->xdrs, reqrec->re_rpc_res); 882 883 if (reqrec->re_rpc_res) 884 free(reqrec->re_rpc_res); 885 886 free_lrp(lrp); 887 } 888 889 static void 890 free_lrp(struct nfslog_lr *lrp) 891 { 892 if (lrp->buffer != NULL) 893 free(lrp->buffer); 894 free(lrp); 895 } 896 897 /* 898 * Utility function used elsewhere 899 */ 900 void 901 nfslog_opaque_print_buf(void *buf, int len, char *outbuf, int *outbufoffsetp, 902 int maxoffset) 903 { 904 int i, j; 905 uint_t *ip; 906 uchar_t *u_buf = (uchar_t *)buf; 907 int outbufoffset = *outbufoffsetp; 908 909 outbufoffset += sprintf(&outbuf[outbufoffset], " \""); 910 if (len <= sizeof (int)) { 911 for (j = 0; (j < len) && (outbufoffset < maxoffset); 912 j++, u_buf++) 913 outbufoffset += sprintf(&outbuf[outbufoffset], 914 "%02x", *u_buf); 915 return; 916 } 917 /* More than 4 bytes, print with spaces in integer offsets */ 918 j = (int)((uintptr_t)buf % sizeof (int)); 919 i = 0; 920 if (j > 0) { 921 i = sizeof (int) - j; 922 for (; (j < sizeof (int)) && (outbufoffset < maxoffset); 923 j++, u_buf++) 924 outbufoffset += sprintf(&outbuf[outbufoffset], 925 "%02x", *u_buf); 926 } 927 /* LINTED */ 928 ip = (uint_t *)u_buf; 929 for (; ((i + sizeof (int)) <= len) && (outbufoffset < maxoffset); 930 i += sizeof (int), ip++) { 931 outbufoffset += sprintf(&outbuf[outbufoffset], " %08x", *ip); 932 } 933 if (i < len) { 934 /* Last element not int */ 935 u_buf = (uchar_t *)ip; 936 if (i > j) /* not first element */ 937 outbufoffset += sprintf(&outbuf[outbufoffset], " "); 938 for (; (i < len) && (outbufoffset < maxoffset); i++, u_buf++) { 939 outbufoffset += sprintf(&outbuf[outbufoffset], 940 "%02x", *u_buf); 941 } 942 } 943 if (outbufoffset < maxoffset) 944 outbufoffset += sprintf(&outbuf[outbufoffset], "\""); 945 *outbufoffsetp = outbufoffset; 946 } 947