1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * nfs log - read buffer file and return structs in usable form 29 */ 30 31 #include <ctype.h> 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <stddef.h> 35 #include <string.h> 36 #include <fcntl.h> 37 #include <unistd.h> 38 #include <signal.h> 39 #include <sys/types.h> 40 #include <sys/param.h> 41 #include <sys/stat.h> 42 #include <sys/utsname.h> 43 #include <sys/mman.h> 44 #include <strings.h> 45 #include <errno.h> 46 #include <syslog.h> 47 #include <time.h> 48 #include <limits.h> 49 #include <libintl.h> 50 #include <values.h> 51 #include <search.h> 52 #include <pwd.h> 53 #include <netdb.h> 54 #include <rpc/rpc.h> 55 #include <netconfig.h> 56 #include <netdir.h> 57 #include <nfs/nfs_sec.h> 58 #include <nfs/export.h> 59 #include <rpc/auth.h> 60 #include <rpc/svc.h> 61 #include <rpc/xdr.h> 62 #include <rpc/clnt.h> 63 #include <nfs/nfs.h> 64 #include <nfs/nfs_log.h> 65 #include "nfslogd.h" 66 67 #define MAX_LRS_READ_AHEAD 2048 68 #define MAX_RECS_TO_DELAY 32768 69 70 static int nfslog_init_buf(char *, struct nfslog_buf *, int *); 71 static void nfslog_free_buf(struct nfslog_buf *, int); 72 static struct nfslog_lr *nfslog_read_buffer(struct nfslog_buf *); 73 static void free_lrp(struct nfslog_lr *); 74 static struct nfslog_lr *remove_lrp_from_lb(struct nfslog_buf *, 75 struct nfslog_lr *); 76 static void insert_lrp_to_lb(struct nfslog_buf *, 77 struct nfslog_lr *); 78 static void nfslog_rewrite_bufheader(struct nfslog_buf *); 79 80 /* 81 * Treat the provided path name as an NFS log buffer file. 82 * Allocate a data structure for its handling and initialize it. 83 * *error contains the previous error condition encountered for 84 * this object. This value can be used to avoid printing the last 85 * error endlessly. 86 * It will set *error appropriately after processing. 87 */ 88 struct nfslog_buf * 89 nfslog_open_buf(char *bufpath, int *error) 90 { 91 struct nfslog_buf *lbp = NULL; 92 93 if (bufpath == NULL) { 94 *error = EINVAL; 95 return (NULL); 96 } 97 98 if ((lbp = malloc(sizeof (struct nfslog_buf))) == NULL) { 99 *error = ENOMEM; 100 return (NULL); 101 } 102 bzero(lbp, sizeof (struct nfslog_buf)); 103 104 if (nfslog_init_buf(bufpath, lbp, error)) { 105 free(lbp); 106 return (NULL); 107 } 108 return (lbp); 109 } 110 111 /* 112 * Free the log buffer struct with all of its baggage and free the data struct 113 */ 114 void 115 nfslog_close_buf(struct nfslog_buf *lbp, int close_quick) 116 { 117 nfslog_free_buf(lbp, close_quick); 118 free(lbp); 119 } 120 121 /* 122 * Set up the log buffer struct; simple things are opening and locking 123 * the buffer file and then on to mmap()ing it for later use by the 124 * XDR decode path. Make sure to read the buffer header before 125 * returning so that we will be at the first true log record. 126 * 127 * *error contains the last error encountered on this object. It can 128 * be used to avoid reporting the same error endlessly. It is reset 129 * to the current error code on return. 130 */ 131 static int 132 nfslog_init_buf(char *bufpath, struct nfslog_buf *lbp, int *error) 133 { 134 struct stat sb; 135 int preverror = *error; 136 137 lbp->next = lbp; 138 lbp->prev = lbp; 139 /* 140 * set these values so that the free routine will know what to do 141 */ 142 lbp->mmap_addr = (intptr_t)MAP_FAILED; 143 lbp->last_rec_id = MAXINT - 1; 144 lbp->bh.bh_length = 0; 145 lbp->bh_lrp = NULL; 146 lbp->num_lrps = 0; 147 lbp->lrps = NULL; 148 lbp->last_record_offset = 0; 149 lbp->prp = NULL; 150 lbp->num_pr_queued = 0; 151 152 lbp->bufpath = strdup(bufpath); 153 if (lbp->bufpath == NULL) { 154 *error = ENOMEM; 155 if (preverror != *error) { 156 syslog(LOG_ERR, gettext("Cannot strdup '%s': %s"), 157 bufpath, strerror(*error)); 158 } 159 nfslog_free_buf(lbp, FALSE); 160 return (*error); 161 } 162 163 if ((lbp->fd = open(bufpath, O_RDWR)) < 0) { 164 *error = errno; 165 if (preverror != *error) { 166 syslog(LOG_ERR, gettext("Cannot open '%s': %s"), 167 bufpath, strerror(*error)); 168 } 169 nfslog_free_buf(lbp, FALSE); 170 return (*error); 171 } 172 173 /* 174 * Lock the entire buffer file to prevent conflicting access. 175 * We get a write lock because we want only 1 process to be 176 * generating records from it. 177 */ 178 lbp->fl.l_type = F_WRLCK; 179 lbp->fl.l_whence = SEEK_SET; /* beginning of file */ 180 lbp->fl.l_start = (offset_t)0; 181 lbp->fl.l_len = 0; /* entire file */ 182 lbp->fl.l_sysid = 0; 183 lbp->fl.l_pid = 0; 184 if (fcntl(lbp->fd, F_SETLKW, &lbp->fl) == -1) { 185 *error = errno; 186 if (preverror != *error) { 187 syslog(LOG_ERR, gettext("Cannot lock (%s): %s"), 188 bufpath, strerror(*error)); 189 } 190 nfslog_free_buf(lbp, FALSE); 191 return (*error); 192 } 193 194 if (fstat(lbp->fd, &sb)) { 195 *error = errno; 196 if (preverror != *error) { 197 syslog(LOG_ERR, gettext("Cannot stat (%s): %s"), 198 bufpath, strerror(*error)); 199 } 200 nfslog_free_buf(lbp, FALSE); 201 return (*error); 202 } 203 lbp->filesize = sb.st_size; 204 205 lbp->mmap_addr = (intptr_t)mmap(0, lbp->filesize, PROT_READ|PROT_WRITE, 206 MAP_SHARED|MAP_NORESERVE, lbp->fd, 0); 207 208 /* This is part of the duality of the use of either mmap()|read() */ 209 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) { 210 lbp->next_rec = 0; 211 } else { 212 lbp->next_rec = lbp->mmap_addr; 213 } 214 215 /* Read the header */ 216 if ((lbp->bh_lrp = nfslog_read_buffer(lbp)) == NULL) { 217 *error = EIO; 218 if (preverror != *error) { 219 syslog(LOG_ERR, gettext( 220 "error in reading file '%s': %s"), 221 bufpath, strerror(EIO)); 222 } 223 nfslog_free_buf(lbp, FALSE); 224 return (*error); 225 } 226 227 if (!xdr_nfslog_buffer_header(&lbp->bh_lrp->xdrs, &lbp->bh)) { 228 *error = EIO; 229 if (preverror != *error) { 230 syslog(LOG_ERR, gettext( 231 "error in reading file '%s': %s"), 232 bufpath, strerror(*error)); 233 } 234 nfslog_free_buf(lbp, FALSE); 235 return (*error); 236 } 237 238 /* 239 * Set the pointer to the next record based on the buffer header. 240 * 'lbp->bh.bh_offset' contains the offset of where to begin 241 * processing relative to the buffer header. 242 */ 243 lbp->next_rec += lbp->bh.bh_offset; 244 245 /* 246 * If we are going to be using read() for file data, then we may 247 * have to adjust the current file pointer to take into account 248 * a starting point other than the beginning of the file. 249 * If mmap is being used, this is taken care of as a side effect of 250 * setting up the value of next_rec. 251 */ 252 if (lbp->mmap_addr == (intptr_t)MAP_FAILED && lbp->next_rec != 0) { 253 (void) lseek(lbp->fd, lbp->next_rec, SEEK_SET); 254 /* This is a special case of setting the last_record_offset */ 255 lbp->last_record_offset = lbp->next_rec; 256 } else { 257 lbp->last_record_offset = lbp->next_rec - lbp->mmap_addr; 258 } 259 260 return (*error = 0); 261 } 262 263 /* 264 * Free the nfslog buffer and its associated allocations 265 */ 266 static void 267 nfslog_free_buf(struct nfslog_buf *lbp, int close_quick) 268 { 269 XDR xdrs; 270 int error; 271 caddr_t buffer; 272 struct nfslog_lr *lrp, *lrp_next; 273 struct processed_records *prp, *tprp; 274 275 /* work to free the offset records and rewrite header */ 276 if (lbp->prp) { 277 if (lbp->last_record_offset == lbp->prp->start_offset) { 278 279 /* adjust the offset for the entire buffer */ 280 lbp->last_record_offset = 281 lbp->prp->start_offset + lbp->prp->len; 282 283 nfslog_rewrite_bufheader(lbp); 284 } 285 if (close_quick) 286 return; 287 prp = lbp->prp; 288 do { 289 tprp = prp->next; 290 free(prp); 291 prp = tprp; 292 } while (lbp->prp != prp); 293 } 294 295 if (close_quick) 296 return; 297 298 /* Take care of the queue log records first */ 299 if (lbp->lrps != NULL) { 300 lrp = lbp->lrps; 301 do { 302 lrp_next = lrp->next; 303 nfslog_free_logrecord(lrp, FALSE); 304 lrp = lrp_next; 305 } while (lrp != lbp->lrps); 306 lbp->lrps = NULL; 307 } 308 309 /* The buffer header was decoded and needs to be freed */ 310 if (lbp->bh.bh_length != 0) { 311 buffer = (lbp->bh_lrp->buffer != NULL ? 312 lbp->bh_lrp->buffer : (caddr_t)lbp->mmap_addr); 313 xdrmem_create(&xdrs, buffer, lbp->bh_lrp->recsize, XDR_FREE); 314 (void) xdr_nfslog_buffer_header(&xdrs, &lbp->bh); 315 lbp->bh.bh_length = 0; 316 } 317 318 /* get rid of the bufheader lrp */ 319 if (lbp->bh_lrp != NULL) { 320 free_lrp(lbp->bh_lrp); 321 lbp->bh_lrp = NULL; 322 } 323 324 /* Clean up for mmap() usage */ 325 if (lbp->mmap_addr != (intptr_t)MAP_FAILED) { 326 if (munmap((void *)lbp->mmap_addr, lbp->filesize)) { 327 error = errno; 328 syslog(LOG_ERR, gettext("munmap failed: %s: %s"), 329 (lbp->bufpath != NULL ? lbp->bufpath : ""), 330 strerror(error)); 331 } 332 lbp->mmap_addr = (intptr_t)MAP_FAILED; 333 } 334 335 /* Finally close the buffer file */ 336 if (lbp->fd >= 0) { 337 lbp->fl.l_type = F_UNLCK; 338 if (fcntl(lbp->fd, F_SETLK, &lbp->fl) == -1) { 339 error = errno; 340 syslog(LOG_ERR, 341 gettext("Cannot unlock file %s: %s"), 342 (lbp->bufpath != NULL ? lbp->bufpath : ""), 343 strerror(error)); 344 } 345 (void) close(lbp->fd); 346 lbp->fd = -1; 347 } 348 if (lbp->bufpath != NULL) 349 free(lbp->bufpath); 350 } 351 352 /* 353 * We are reading a record from the log buffer file. Since we are reading 354 * an XDR stream, we first have to read the first integer to determine 355 * how much to read in whole for this record. Our preference is to use 356 * mmap() but if failed initially we will be using read(). Need to be 357 * careful about proper initialization of the log record both from a field 358 * perspective and for XDR decoding. 359 */ 360 static struct nfslog_lr * 361 nfslog_read_buffer(struct nfslog_buf *lbp) 362 { 363 XDR xdrs; 364 unsigned int record_size; 365 struct nfslog_lr *lrp; 366 char *sizebuf, tbuf[16]; 367 caddr_t buffer; 368 offset_t next_rec; 369 370 lrp = (struct nfslog_lr *)malloc(sizeof (*lrp)); 371 bzero(lrp, sizeof (*lrp)); 372 373 /* Check to see if mmap worked */ 374 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) { 375 /* 376 * EOF or other failure; we don't try to recover, just return 377 */ 378 if (read(lbp->fd, tbuf, BYTES_PER_XDR_UNIT) <= 0) { 379 free_lrp(lrp); 380 return (NULL); 381 } 382 sizebuf = tbuf; 383 } else { 384 /* EOF check for the mmap() case */ 385 if (lbp->filesize <= lbp->next_rec - lbp->mmap_addr) { 386 free_lrp(lrp); 387 return (NULL); 388 } 389 sizebuf = (char *)(uintptr_t)lbp->next_rec; 390 } 391 392 /* We have to XDR the first int so we know how much is in this record */ 393 xdrmem_create(&xdrs, sizebuf, sizeof (unsigned int), XDR_DECODE); 394 395 if (!xdr_u_int(&xdrs, &record_size)) { 396 free_lrp(lrp); 397 return (NULL); 398 } 399 400 lrp->recsize = record_size; 401 next_rec = lbp->next_rec + lrp->recsize; 402 403 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) { 404 /* 405 * Read() case - shouldn't be used very much. 406 * Note: The 'buffer' field is used later on 407 * to determine which method is being used mmap()|read() 408 */ 409 if (lbp->filesize < next_rec) { 410 /* partial record from buffer */ 411 syslog(LOG_ERR, gettext( 412 "Last partial record in work buffer %s " 413 "discarded\n"), lbp->bufpath); 414 free_lrp(lrp); 415 return (NULL); 416 } 417 418 if ((lrp->buffer = malloc(lrp->recsize)) == NULL) { 419 free_lrp(lrp); 420 return (NULL); 421 } 422 bcopy(sizebuf, lrp->buffer, BYTES_PER_XDR_UNIT); 423 if (read(lbp->fd, &lrp->buffer[BYTES_PER_XDR_UNIT], 424 lrp->recsize - BYTES_PER_XDR_UNIT) <= 0) { 425 free_lrp(lrp); 426 return (NULL); 427 } 428 } else if (lbp->filesize < next_rec - lbp->mmap_addr) { 429 /* partial record from buffer */ 430 syslog(LOG_ERR, gettext( 431 "Last partial record in work buffer %s " 432 "discarded\n"), lbp->bufpath); 433 free_lrp(lrp); 434 return (NULL); 435 } 436 437 438 /* other initializations */ 439 lrp->next = lrp->prev = lrp; 440 /* Keep track of the offset at which this record was read */ 441 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) 442 lrp->f_offset = lbp->next_rec; 443 else 444 lrp->f_offset = lbp->next_rec - lbp->mmap_addr; 445 /* This is the true address of the record */ 446 lrp->record = lbp->next_rec; 447 lrp->xdrargs = lrp->xdrres = NULL; 448 lrp->lbp = lbp; 449 450 /* Here is the logic for mmap() vs. read() */ 451 buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record); 452 453 /* Setup for the 'real' XDR decode of the entire record */ 454 xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_DECODE); 455 456 /* calculate the offset for the next record */ 457 lbp->next_rec = next_rec; 458 459 return (lrp); 460 } 461 462 /* 463 * Simple removal of the log record from the log buffer queue. 464 * Make sure to manage the count of records queued. 465 */ 466 static struct nfslog_lr * 467 remove_lrp_from_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp) 468 { 469 if (lbp->lrps == lrp) { 470 if (lbp->lrps == lbp->lrps->next) { 471 lbp->lrps = NULL; 472 } else { 473 lbp->lrps = lrp->next; 474 remque(lrp); 475 } 476 } else { 477 remque(lrp); 478 } 479 lbp->num_lrps--; 480 return (lrp); 481 } 482 483 /* 484 * Insert a log record struct on the log buffer struct. The log buffer 485 * has a pointer to the head of a queue of log records that have been 486 * read from the buffer file but have not been processed yet because 487 * the record id did not match the sequence desired for processing. 488 * The insertion must be in the 'correct'/sorted order which adds 489 * to the complexity of this function. 490 */ 491 static void 492 insert_lrp_to_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp) 493 { 494 int ins_rec_id = lrp->log_record.re_header.rh_rec_id; 495 struct nfslog_lr *curlrp; 496 497 if (lbp->lrps == NULL) { 498 /* that was easy */ 499 lbp->lrps = lrp; 500 } else { 501 /* 502 * Does this lrp go before the first on the list? 503 * If so, do the insertion by hand since insque is not 504 * as flexible when queueing an element to the head of 505 * a list. 506 */ 507 if (ins_rec_id < lbp->lrps->log_record.re_header.rh_rec_id) { 508 lrp->next = lbp->lrps; 509 lrp->prev = lbp->lrps->prev; 510 lbp->lrps->prev->next = lrp; 511 lbp->lrps->prev = lrp; 512 lbp->lrps = lrp; 513 } else { 514 /* 515 * Search the queue for the correct insertion point. 516 * Be careful about the insque so that the record 517 * ends up in the right place. 518 */ 519 curlrp = lbp->lrps; 520 do { 521 if (ins_rec_id < 522 curlrp->next->log_record.re_header.rh_rec_id) 523 break; 524 curlrp = curlrp->next; 525 } while (curlrp != lbp->lrps); 526 if (curlrp == lbp->lrps) 527 insque(lrp, lbp->lrps->prev); 528 else 529 insque(lrp, curlrp); 530 } 531 } 532 /* always keep track of how many we have */ 533 lbp->num_lrps++; 534 } 535 536 /* 537 * We are rewriting the buffer header at the start of the log buffer 538 * for the sole purpose of resetting the bh_offset field. This is 539 * supposed to represent the progress that the nfslogd daemon has made 540 * in its processing of the log buffer file. 541 * 'lbp->last_record_offset' contains the absolute offset of the end 542 * of the last element processed. The on-disk buffer offset is relative 543 * to the buffer header, therefore we subtract the length of the buffer 544 * header from the absolute offset. 545 */ 546 static void 547 nfslog_rewrite_bufheader(struct nfslog_buf *lbp) 548 { 549 XDR xdrs; 550 nfslog_buffer_header bh; 551 /* size big enough for buffer header encode */ 552 #define XBUFSIZE 128 553 char buffer[XBUFSIZE]; 554 unsigned int wsize; 555 556 /* 557 * if version 1 buffer is large and the current offset cannot be 558 * represented, then don't update the offset in the buffer. 559 */ 560 if (lbp->bh.bh_flags & NFSLOG_BH_OFFSET_OVERFLOW) { 561 /* No need to update the header - offset too big */ 562 return; 563 } 564 /* 565 * build the buffer header from the original that was saved 566 * on initialization; note that the offset is taken from the 567 * last record processed (the last offset that represents 568 * all records processed without any holes in the processing) 569 */ 570 bh = lbp->bh; 571 572 /* 573 * if version 1 buffer is large and the current offset cannot be 574 * represented in 32 bits, then save only the last valid offset 575 * in the buffer and mark the flags to indicate that. 576 */ 577 if ((bh.bh_version > 1) || 578 (lbp->last_record_offset - bh.bh_length < UINT32_MAX)) { 579 bh.bh_offset = lbp->last_record_offset - bh.bh_length; 580 } else { 581 /* don't update the offset in the buffer */ 582 bh.bh_flags |= NFSLOG_BH_OFFSET_OVERFLOW; 583 lbp->bh.bh_flags = bh.bh_flags; 584 syslog(LOG_ERR, gettext( 585 "nfslog_rewrite_bufheader: %s: offset does not fit " 586 "in a 32 bit field\n"), lbp->bufpath); 587 } 588 589 xdrmem_create(&xdrs, buffer, XBUFSIZE, XDR_ENCODE); 590 591 if (!xdr_nfslog_buffer_header(&xdrs, &bh)) { 592 syslog(LOG_ERR, gettext( 593 "error in re-writing buffer file %s header\n"), 594 lbp->bufpath); 595 return; 596 } 597 598 wsize = xdr_getpos(&xdrs); 599 600 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) { 601 /* go to the beginning of the file */ 602 (void) lseek(lbp->fd, 0, SEEK_SET); 603 (void) write(lbp->fd, buffer, wsize); 604 (void) lseek(lbp->fd, lbp->next_rec, SEEK_SET); 605 (void) fsync(lbp->fd); 606 } else { 607 bcopy(buffer, (void *)lbp->mmap_addr, wsize); 608 (void) msync((void *)lbp->mmap_addr, wsize, MS_SYNC); 609 } 610 } 611 612 /* 613 * With the provided lrp, we will take and 'insert' the range that the 614 * record covered in the buffer file into a list of processed ranges 615 * for the buffer file. These ranges represent the records processed 616 * but not 'marked' in the buffer header as being processed. 617 * This insertion process is being done for two reasons. The first is that 618 * we do not want to pay the performance penalty of re-writing the buffer header 619 * for each record that we process. The second reason is that the records 620 * may be processed out of order because of the unique ids. This will occur 621 * if the kernel has written the records to the buffer file out of order. 622 * The read routine will 'sort' them as the records are read. 623 * 624 * We do not want to re-write the buffer header such that a record is 625 * represented and being processed when it has not been. In the case 626 * that the nfslogd daemon restarts processing and the buffer header 627 * has been re-written improperly, some records could be skipped. 628 * We will be taking the conservative approach and only writing buffer 629 * header offsets when the entire offset range has been processed. 630 */ 631 static void 632 nfslog_ins_last_rec_processed(struct nfslog_lr *lrp) 633 { 634 struct processed_records *prp, *tp; 635 636 /* init the data struct as if it were the only one */ 637 prp = malloc(sizeof (*prp)); 638 prp->next = prp->prev = prp; 639 prp->start_offset = lrp->f_offset; 640 prp->len = lrp->recsize; 641 prp->num_recs = 1; 642 643 /* always add since we know we are going to insert */ 644 lrp->lbp->num_pr_queued++; 645 646 /* Is this the first one? If so, take the easy way out */ 647 if (lrp->lbp->prp == NULL) { 648 lrp->lbp->prp = prp; 649 } else { 650 /* sort on insertion... */ 651 tp = lrp->lbp->prp; 652 do { 653 if (prp->start_offset < tp->start_offset) 654 break; 655 tp = tp->next; 656 } while (tp != lrp->lbp->prp); 657 /* insert where appropriate (before the one we found */ 658 insque(prp, tp->prev); 659 /* 660 * special case where the insertion was done at the 661 * head of the list 662 */ 663 if (tp == lrp->lbp->prp && prp->start_offset < tp->start_offset) 664 lrp->lbp->prp = prp; 665 666 /* 667 * now that the entry is in place, we need to see if it can 668 * be combined with the previous or following entries. 669 * combination is done by adding to the length. 670 */ 671 if (prp->start_offset == 672 (prp->prev->start_offset + prp->prev->len)) { 673 tp = prp->prev; 674 remque(prp); 675 tp->len += prp->len; 676 tp->num_recs += prp->num_recs; 677 free(prp); 678 prp = tp; 679 } 680 if (prp->next->start_offset == 681 (prp->start_offset + prp->len)) { 682 prp->len += prp->next->len; 683 prp->num_recs += prp->next->num_recs; 684 tp = prp->next; 685 remque(tp); 686 free(tp); 687 } 688 } 689 690 if (lrp->lbp->num_pr_queued > MAX_RECS_TO_DELAY) { 691 prp = lrp->lbp->prp; 692 if (lrp->lbp->last_record_offset == 693 prp->start_offset) { 694 695 /* adjust the offset for the entire buffer */ 696 lrp->lbp->last_record_offset = 697 prp->start_offset + prp->len; 698 699 nfslog_rewrite_bufheader(lrp->lbp); 700 701 tp = prp->next; 702 if (tp != prp) 703 remque(prp); 704 else 705 tp = NULL; 706 lrp->lbp->prp = tp; 707 lrp->lbp->num_pr_queued -= prp->num_recs; 708 free(prp); 709 } 710 } 711 } 712 713 /* 714 * nfslog_get_logrecord is responsible for retrieving the next log record 715 * from the buffer file. This would normally be very straightforward but there 716 * is the added complexity of attempting to order the requests coming out of 717 * the buffer file. The fundamental problems is that the kernel nfs logging 718 * functionality does not guarantee that the records were written to the file 719 * in the order that the NFS server processed them. This can cause a problem 720 * in the fh -> pathname mapping in the case were a lookup for a file comes 721 * later in the buffer file than other operations on the lookup's target. 722 * The fh mapping database will not have an entry and will therefore not 723 * be able to map the fh to a name. 724 * 725 * So to solve this problem, the kernel nfs logging code tags each record 726 * with a monotonically increasing id and is guaranteed to be allocated 727 * in the order that the requests were processed. Realize however that 728 * this processing guarantee is essentially for one thread on one client. 729 * This id mechanism does not order all requests since it is only the 730 * single client/single thread case that is most concerning to us here. 731 * 732 * This function will do the 'sorting' of the requests as they are 733 * read from the buffer file. The sorting needs to take into account 734 * that some ids may be missing (operations not logged but ids allocated) 735 * and that the id field will eventually wrap over MAXINT. 736 * 737 * Complexity to solve the fh -> pathname mapping issue. 738 */ 739 struct nfslog_lr * 740 nfslog_get_logrecord(struct nfslog_buf *lbp) 741 { 742 /* figure out what the next should be if the world were perfect */ 743 unsigned int next_rec_id = lbp->last_rec_id + 1; 744 struct nfslog_lr *lrp = NULL; 745 746 /* 747 * First we check the queued records on the log buffer struct 748 * to see if the one we want is there. The records are sorted 749 * on the record id during the insertions to the queue so that 750 * this check is easy. 751 */ 752 if (lbp->lrps != NULL) { 753 /* Does the first record match ? */ 754 if (lbp->lrps->log_record.re_header.rh_rec_id == next_rec_id) { 755 lrp = remove_lrp_from_lb(lbp, lbp->lrps); 756 lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id; 757 } else { 758 /* 759 * Here we are checking for wrap of the record id 760 * since it is an unsigned in. The idea is that 761 * if there is a huge span between what we expect 762 * and what is queued then we need to flush/empty 763 * the queued records first. 764 */ 765 if (next_rec_id < 766 lbp->lrps->log_record.re_header.rh_rec_id && 767 ((lbp->lrps->log_record.re_header.rh_rec_id - 768 next_rec_id) > (MAXINT / 2))) { 769 770 lrp = remove_lrp_from_lb(lbp, lbp->lrps); 771 lbp->last_rec_id = 772 lrp->log_record.re_header.rh_rec_id; 773 } 774 } 775 } 776 /* 777 * So the first queued record didn't match (or there were no queued 778 * records to look at). Now we go to the buffer file looking for 779 * the expected log record based on its id. We loop looking for 780 * a matching records and save/queue the records that don't match. 781 * Note that we will queue a maximum number to handle the case 782 * of a missing record id or a queue that is very confused. We don't 783 * want to consume too much memory. 784 */ 785 while (lrp == NULL) { 786 /* Have we queued too many for this buffer? */ 787 if (lbp->num_lrps >= MAX_LRS_READ_AHEAD) { 788 lrp = remove_lrp_from_lb(lbp, lbp->lrps); 789 lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id; 790 break; 791 } 792 /* 793 * Get a record from the buffer file. If none are available, 794 * this is probably and EOF condition (could be a read error 795 * as well but that is masked. :-(). No records in the 796 * file means that we need to pull any queued records 797 * so that we don't miss any in the processing. 798 */ 799 if ((lrp = nfslog_read_buffer(lbp)) == NULL) { 800 if (lbp->lrps != NULL) { 801 lrp = remove_lrp_from_lb(lbp, lbp->lrps); 802 lbp->last_rec_id = 803 lrp->log_record.re_header.rh_rec_id; 804 } else { 805 return (NULL); /* it was really and EOF */ 806 } 807 } else { 808 /* 809 * Just read a record from the buffer file and now we 810 * need to XDR the record header so that we can take 811 * a look at the record id. 812 */ 813 if (!xdr_nfslog_request_record(&lrp->xdrs, 814 &lrp->log_record)) { 815 /* Free and return EOF/NULL on error */ 816 nfslog_free_logrecord(lrp, FALSE); 817 return (NULL); 818 } 819 /* 820 * If the new record is less than or matches the 821 * expected record id, then we return this record 822 */ 823 if (lrp->log_record.re_header.rh_rec_id <= 824 next_rec_id) { 825 826 lbp->last_rec_id = 827 lrp->log_record.re_header.rh_rec_id; 828 } else { 829 /* 830 * This is not the one we were looking 831 * for; queue it for later processing 832 * (queueing sorts on record id) 833 */ 834 insert_lrp_to_lb(lbp, lrp); 835 lrp = NULL; 836 } 837 } 838 } 839 return (lrp); 840 } 841 842 /* 843 * Free the log record provided. 844 * This is complex because the associated XDR streams also need to be freed 845 * since allocation could have occured during the DECODE phase. The record 846 * header, args and results need to be XDR_FREEd. The xdr funtions will 847 * be provided if a free needs to be done. 848 * 849 * Note that caller tells us if the record being freed was processed. 850 * If so, then the buffer header should be updated. Updating the buffer 851 * header keeps track of where the nfslogd daemon left off in its processing 852 * if it is unable to complete the entire file. 853 */ 854 void 855 nfslog_free_logrecord(struct nfslog_lr *lrp, bool_t processing_complete) 856 { 857 caddr_t buffer; 858 nfslog_request_record *reqrec; 859 860 if (processing_complete) { 861 nfslog_ins_last_rec_processed(lrp); 862 } 863 864 reqrec = &lrp->log_record; 865 866 buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record); 867 868 xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_FREE); 869 870 (void) xdr_nfslog_request_record(&lrp->xdrs, reqrec); 871 872 if (lrp->xdrargs != NULL && reqrec->re_rpc_arg) 873 (*lrp->xdrargs)(&lrp->xdrs, reqrec->re_rpc_arg); 874 875 if (reqrec->re_rpc_arg) 876 free(reqrec->re_rpc_arg); 877 878 if (lrp->xdrres != NULL && reqrec->re_rpc_res) 879 (*lrp->xdrres)(&lrp->xdrs, reqrec->re_rpc_res); 880 881 if (reqrec->re_rpc_res) 882 free(reqrec->re_rpc_res); 883 884 free_lrp(lrp); 885 } 886 887 static void 888 free_lrp(struct nfslog_lr *lrp) 889 { 890 if (lrp->buffer != NULL) 891 free(lrp->buffer); 892 free(lrp); 893 } 894 895 /* 896 * Utility function used elsewhere 897 */ 898 void 899 nfslog_opaque_print_buf(void *buf, int len, char *outbuf, int *outbufoffsetp, 900 int maxoffset) 901 { 902 int i, j; 903 uint_t *ip; 904 uchar_t *u_buf = (uchar_t *)buf; 905 int outbufoffset = *outbufoffsetp; 906 907 outbufoffset += sprintf(&outbuf[outbufoffset], " \""); 908 if (len <= sizeof (int)) { 909 for (j = 0; (j < len) && (outbufoffset < maxoffset); 910 j++, u_buf++) 911 outbufoffset += sprintf(&outbuf[outbufoffset], 912 "%02x", *u_buf); 913 return; 914 } 915 /* More than 4 bytes, print with spaces in integer offsets */ 916 j = (int)((uintptr_t)buf % sizeof (int)); 917 i = 0; 918 if (j > 0) { 919 i = sizeof (int) - j; 920 for (; (j < sizeof (int)) && (outbufoffset < maxoffset); 921 j++, u_buf++) 922 outbufoffset += sprintf(&outbuf[outbufoffset], 923 "%02x", *u_buf); 924 } 925 /* LINTED */ 926 ip = (uint_t *)u_buf; 927 for (; ((i + sizeof (int)) <= len) && (outbufoffset < maxoffset); 928 i += sizeof (int), ip++) { 929 outbufoffset += sprintf(&outbuf[outbufoffset], " %08x", *ip); 930 } 931 if (i < len) { 932 /* Last element not int */ 933 u_buf = (uchar_t *)ip; 934 if (i > j) /* not first element */ 935 outbufoffset += sprintf(&outbuf[outbufoffset], " "); 936 for (; (i < len) && (outbufoffset < maxoffset); i++, u_buf++) { 937 outbufoffset += sprintf(&outbuf[outbufoffset], 938 "%02x", *u_buf); 939 } 940 } 941 if (outbufoffset < maxoffset) 942 outbufoffset += sprintf(&outbuf[outbufoffset], "\""); 943 *outbufoffsetp = outbufoffset; 944 } 945