1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * nfs log - read buffer file and return structs in usable form
29 */
30
31 #include <ctype.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <stddef.h>
35 #include <string.h>
36 #include <fcntl.h>
37 #include <unistd.h>
38 #include <signal.h>
39 #include <sys/types.h>
40 #include <sys/param.h>
41 #include <sys/stat.h>
42 #include <sys/utsname.h>
43 #include <sys/mman.h>
44 #include <strings.h>
45 #include <errno.h>
46 #include <syslog.h>
47 #include <time.h>
48 #include <limits.h>
49 #include <libintl.h>
50 #include <values.h>
51 #include <search.h>
52 #include <pwd.h>
53 #include <netdb.h>
54 #include <rpc/rpc.h>
55 #include <netconfig.h>
56 #include <netdir.h>
57 #include <nfs/nfs_sec.h>
58 #include <nfs/export.h>
59 #include <rpc/auth.h>
60 #include <rpc/svc.h>
61 #include <rpc/xdr.h>
62 #include <rpc/clnt.h>
63 #include <nfs/nfs.h>
64 #include <nfs/nfs_log.h>
65 #include "nfslogd.h"
66
67 #define MAX_LRS_READ_AHEAD 2048
68 #define MAX_RECS_TO_DELAY 32768
69
70 static int nfslog_init_buf(char *, struct nfslog_buf *, int *);
71 static void nfslog_free_buf(struct nfslog_buf *, int);
72 static struct nfslog_lr *nfslog_read_buffer(struct nfslog_buf *);
73 static void free_lrp(struct nfslog_lr *);
74 static struct nfslog_lr *remove_lrp_from_lb(struct nfslog_buf *,
75 struct nfslog_lr *);
76 static void insert_lrp_to_lb(struct nfslog_buf *,
77 struct nfslog_lr *);
78 static void nfslog_rewrite_bufheader(struct nfslog_buf *);
79
80 /*
81 * Treat the provided path name as an NFS log buffer file.
82 * Allocate a data structure for its handling and initialize it.
83 * *error contains the previous error condition encountered for
84 * this object. This value can be used to avoid printing the last
85 * error endlessly.
86 * It will set *error appropriately after processing.
87 */
88 struct nfslog_buf *
nfslog_open_buf(char * bufpath,int * error)89 nfslog_open_buf(char *bufpath, int *error)
90 {
91 struct nfslog_buf *lbp = NULL;
92
93 if (bufpath == NULL) {
94 *error = EINVAL;
95 return (NULL);
96 }
97
98 if ((lbp = malloc(sizeof (struct nfslog_buf))) == NULL) {
99 *error = ENOMEM;
100 return (NULL);
101 }
102 bzero(lbp, sizeof (struct nfslog_buf));
103
104 if (nfslog_init_buf(bufpath, lbp, error)) {
105 free(lbp);
106 return (NULL);
107 }
108 return (lbp);
109 }
110
111 /*
112 * Free the log buffer struct with all of its baggage and free the data struct
113 */
114 void
nfslog_close_buf(struct nfslog_buf * lbp,int close_quick)115 nfslog_close_buf(struct nfslog_buf *lbp, int close_quick)
116 {
117 nfslog_free_buf(lbp, close_quick);
118 free(lbp);
119 }
120
121 /*
122 * Set up the log buffer struct; simple things are opening and locking
123 * the buffer file and then on to mmap()ing it for later use by the
124 * XDR decode path. Make sure to read the buffer header before
125 * returning so that we will be at the first true log record.
126 *
127 * *error contains the last error encountered on this object. It can
128 * be used to avoid reporting the same error endlessly. It is reset
129 * to the current error code on return.
130 */
131 static int
nfslog_init_buf(char * bufpath,struct nfslog_buf * lbp,int * error)132 nfslog_init_buf(char *bufpath, struct nfslog_buf *lbp, int *error)
133 {
134 struct stat sb;
135 int preverror = *error;
136
137 lbp->next = lbp;
138 lbp->prev = lbp;
139 /*
140 * set these values so that the free routine will know what to do
141 */
142 lbp->mmap_addr = (intptr_t)MAP_FAILED;
143 lbp->last_rec_id = MAXINT - 1;
144 lbp->bh.bh_length = 0;
145 lbp->bh_lrp = NULL;
146 lbp->num_lrps = 0;
147 lbp->lrps = NULL;
148 lbp->last_record_offset = 0;
149 lbp->prp = NULL;
150 lbp->num_pr_queued = 0;
151
152 lbp->bufpath = strdup(bufpath);
153 if (lbp->bufpath == NULL) {
154 *error = ENOMEM;
155 if (preverror != *error) {
156 syslog(LOG_ERR, gettext("Cannot strdup '%s': %s"),
157 bufpath, strerror(*error));
158 }
159 nfslog_free_buf(lbp, FALSE);
160 return (*error);
161 }
162
163 if ((lbp->fd = open(bufpath, O_RDWR)) < 0) {
164 *error = errno;
165 if (preverror != *error) {
166 syslog(LOG_ERR, gettext("Cannot open '%s': %s"),
167 bufpath, strerror(*error));
168 }
169 nfslog_free_buf(lbp, FALSE);
170 return (*error);
171 }
172
173 /*
174 * Lock the entire buffer file to prevent conflicting access.
175 * We get a write lock because we want only 1 process to be
176 * generating records from it.
177 */
178 lbp->fl.l_type = F_WRLCK;
179 lbp->fl.l_whence = SEEK_SET; /* beginning of file */
180 lbp->fl.l_start = (offset_t)0;
181 lbp->fl.l_len = 0; /* entire file */
182 lbp->fl.l_sysid = 0;
183 lbp->fl.l_pid = 0;
184 if (fcntl(lbp->fd, F_SETLKW, &lbp->fl) == -1) {
185 *error = errno;
186 if (preverror != *error) {
187 syslog(LOG_ERR, gettext("Cannot lock (%s): %s"),
188 bufpath, strerror(*error));
189 }
190 nfslog_free_buf(lbp, FALSE);
191 return (*error);
192 }
193
194 if (fstat(lbp->fd, &sb)) {
195 *error = errno;
196 if (preverror != *error) {
197 syslog(LOG_ERR, gettext("Cannot stat (%s): %s"),
198 bufpath, strerror(*error));
199 }
200 nfslog_free_buf(lbp, FALSE);
201 return (*error);
202 }
203 lbp->filesize = sb.st_size;
204
205 lbp->mmap_addr = (intptr_t)mmap(0, lbp->filesize, PROT_READ|PROT_WRITE,
206 MAP_SHARED|MAP_NORESERVE, lbp->fd, 0);
207
208 /* This is part of the duality of the use of either mmap()|read() */
209 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
210 lbp->next_rec = 0;
211 } else {
212 lbp->next_rec = lbp->mmap_addr;
213 }
214
215 /* Read the header */
216 if ((lbp->bh_lrp = nfslog_read_buffer(lbp)) == NULL) {
217 *error = EIO;
218 if (preverror != *error) {
219 syslog(LOG_ERR, gettext(
220 "error in reading file '%s': %s"),
221 bufpath, strerror(EIO));
222 }
223 nfslog_free_buf(lbp, FALSE);
224 return (*error);
225 }
226
227 if (!xdr_nfslog_buffer_header(&lbp->bh_lrp->xdrs, &lbp->bh)) {
228 *error = EIO;
229 if (preverror != *error) {
230 syslog(LOG_ERR, gettext(
231 "error in reading file '%s': %s"),
232 bufpath, strerror(*error));
233 }
234 nfslog_free_buf(lbp, FALSE);
235 return (*error);
236 }
237
238 /*
239 * Set the pointer to the next record based on the buffer header.
240 * 'lbp->bh.bh_offset' contains the offset of where to begin
241 * processing relative to the buffer header.
242 */
243 lbp->next_rec += lbp->bh.bh_offset;
244
245 /*
246 * If we are going to be using read() for file data, then we may
247 * have to adjust the current file pointer to take into account
248 * a starting point other than the beginning of the file.
249 * If mmap is being used, this is taken care of as a side effect of
250 * setting up the value of next_rec.
251 */
252 if (lbp->mmap_addr == (intptr_t)MAP_FAILED && lbp->next_rec != 0) {
253 (void) lseek(lbp->fd, lbp->next_rec, SEEK_SET);
254 /* This is a special case of setting the last_record_offset */
255 lbp->last_record_offset = lbp->next_rec;
256 } else {
257 lbp->last_record_offset = lbp->next_rec - lbp->mmap_addr;
258 }
259
260 return (*error = 0);
261 }
262
263 /*
264 * Free the nfslog buffer and its associated allocations
265 */
266 static void
nfslog_free_buf(struct nfslog_buf * lbp,int close_quick)267 nfslog_free_buf(struct nfslog_buf *lbp, int close_quick)
268 {
269 XDR xdrs;
270 int error;
271 caddr_t buffer;
272 struct nfslog_lr *lrp, *lrp_next;
273 struct processed_records *prp, *tprp;
274
275 /* work to free the offset records and rewrite header */
276 if (lbp->prp) {
277 if (lbp->last_record_offset == lbp->prp->start_offset) {
278
279 /* adjust the offset for the entire buffer */
280 lbp->last_record_offset =
281 lbp->prp->start_offset + lbp->prp->len;
282
283 nfslog_rewrite_bufheader(lbp);
284 }
285 if (close_quick)
286 return;
287 prp = lbp->prp;
288 do {
289 tprp = prp->next;
290 free(prp);
291 prp = tprp;
292 } while (lbp->prp != prp);
293 }
294
295 if (close_quick)
296 return;
297
298 /* Take care of the queue log records first */
299 if (lbp->lrps != NULL) {
300 lrp = lbp->lrps;
301 do {
302 lrp_next = lrp->next;
303 nfslog_free_logrecord(lrp, FALSE);
304 lrp = lrp_next;
305 } while (lrp != lbp->lrps);
306 lbp->lrps = NULL;
307 }
308
309 /* The buffer header was decoded and needs to be freed */
310 if (lbp->bh.bh_length != 0) {
311 buffer = (lbp->bh_lrp->buffer != NULL ?
312 lbp->bh_lrp->buffer : (caddr_t)lbp->mmap_addr);
313 xdrmem_create(&xdrs, buffer, lbp->bh_lrp->recsize, XDR_FREE);
314 (void) xdr_nfslog_buffer_header(&xdrs, &lbp->bh);
315 lbp->bh.bh_length = 0;
316 }
317
318 /* get rid of the bufheader lrp */
319 if (lbp->bh_lrp != NULL) {
320 free_lrp(lbp->bh_lrp);
321 lbp->bh_lrp = NULL;
322 }
323
324 /* Clean up for mmap() usage */
325 if (lbp->mmap_addr != (intptr_t)MAP_FAILED) {
326 if (munmap((void *)lbp->mmap_addr, lbp->filesize)) {
327 error = errno;
328 syslog(LOG_ERR, gettext("munmap failed: %s: %s"),
329 (lbp->bufpath != NULL ? lbp->bufpath : ""),
330 strerror(error));
331 }
332 lbp->mmap_addr = (intptr_t)MAP_FAILED;
333 }
334
335 /* Finally close the buffer file */
336 if (lbp->fd >= 0) {
337 lbp->fl.l_type = F_UNLCK;
338 if (fcntl(lbp->fd, F_SETLK, &lbp->fl) == -1) {
339 error = errno;
340 syslog(LOG_ERR,
341 gettext("Cannot unlock file %s: %s"),
342 (lbp->bufpath != NULL ? lbp->bufpath : ""),
343 strerror(error));
344 }
345 (void) close(lbp->fd);
346 lbp->fd = -1;
347 }
348 if (lbp->bufpath != NULL)
349 free(lbp->bufpath);
350 }
351
352 /*
353 * We are reading a record from the log buffer file. Since we are reading
354 * an XDR stream, we first have to read the first integer to determine
355 * how much to read in whole for this record. Our preference is to use
356 * mmap() but if failed initially we will be using read(). Need to be
357 * careful about proper initialization of the log record both from a field
358 * perspective and for XDR decoding.
359 */
360 static struct nfslog_lr *
nfslog_read_buffer(struct nfslog_buf * lbp)361 nfslog_read_buffer(struct nfslog_buf *lbp)
362 {
363 XDR xdrs;
364 unsigned int record_size;
365 struct nfslog_lr *lrp;
366 char *sizebuf, tbuf[16];
367 caddr_t buffer;
368 offset_t next_rec;
369
370 lrp = (struct nfslog_lr *)malloc(sizeof (*lrp));
371 bzero(lrp, sizeof (*lrp));
372
373 /* Check to see if mmap worked */
374 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
375 /*
376 * EOF or other failure; we don't try to recover, just return
377 */
378 if (read(lbp->fd, tbuf, BYTES_PER_XDR_UNIT) <= 0) {
379 free_lrp(lrp);
380 return (NULL);
381 }
382 sizebuf = tbuf;
383 } else {
384 /* EOF check for the mmap() case */
385 if (lbp->filesize <= lbp->next_rec - lbp->mmap_addr) {
386 free_lrp(lrp);
387 return (NULL);
388 }
389 sizebuf = (char *)(uintptr_t)lbp->next_rec;
390 }
391
392 /* We have to XDR the first int so we know how much is in this record */
393 xdrmem_create(&xdrs, sizebuf, sizeof (unsigned int), XDR_DECODE);
394
395 if (!xdr_u_int(&xdrs, &record_size)) {
396 free_lrp(lrp);
397 return (NULL);
398 }
399
400 lrp->recsize = record_size;
401 next_rec = lbp->next_rec + lrp->recsize;
402
403 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
404 /*
405 * Read() case - shouldn't be used very much.
406 * Note: The 'buffer' field is used later on
407 * to determine which method is being used mmap()|read()
408 */
409 if (lbp->filesize < next_rec) {
410 /* partial record from buffer */
411 syslog(LOG_ERR, gettext(
412 "Last partial record in work buffer %s "
413 "discarded\n"), lbp->bufpath);
414 free_lrp(lrp);
415 return (NULL);
416 }
417
418 if ((lrp->buffer = malloc(lrp->recsize)) == NULL) {
419 free_lrp(lrp);
420 return (NULL);
421 }
422 bcopy(sizebuf, lrp->buffer, BYTES_PER_XDR_UNIT);
423 if (read(lbp->fd, &lrp->buffer[BYTES_PER_XDR_UNIT],
424 lrp->recsize - BYTES_PER_XDR_UNIT) <= 0) {
425 free_lrp(lrp);
426 return (NULL);
427 }
428 } else if (lbp->filesize < next_rec - lbp->mmap_addr) {
429 /* partial record from buffer */
430 syslog(LOG_ERR, gettext(
431 "Last partial record in work buffer %s "
432 "discarded\n"), lbp->bufpath);
433 free_lrp(lrp);
434 return (NULL);
435 }
436
437
438 /* other initializations */
439 lrp->next = lrp->prev = lrp;
440 /* Keep track of the offset at which this record was read */
441 if (lbp->mmap_addr == (intptr_t)MAP_FAILED)
442 lrp->f_offset = lbp->next_rec;
443 else
444 lrp->f_offset = lbp->next_rec - lbp->mmap_addr;
445 /* This is the true address of the record */
446 lrp->record = lbp->next_rec;
447 lrp->xdrargs = lrp->xdrres = NULL;
448 lrp->lbp = lbp;
449
450 /* Here is the logic for mmap() vs. read() */
451 buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record);
452
453 /* Setup for the 'real' XDR decode of the entire record */
454 xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_DECODE);
455
456 /* calculate the offset for the next record */
457 lbp->next_rec = next_rec;
458
459 return (lrp);
460 }
461
462 /*
463 * Simple removal of the log record from the log buffer queue.
464 * Make sure to manage the count of records queued.
465 */
466 static struct nfslog_lr *
remove_lrp_from_lb(struct nfslog_buf * lbp,struct nfslog_lr * lrp)467 remove_lrp_from_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp)
468 {
469 if (lbp->lrps == lrp) {
470 if (lbp->lrps == lbp->lrps->next) {
471 lbp->lrps = NULL;
472 } else {
473 lbp->lrps = lrp->next;
474 remque(lrp);
475 }
476 } else {
477 remque(lrp);
478 }
479 lbp->num_lrps--;
480 return (lrp);
481 }
482
483 /*
484 * Insert a log record struct on the log buffer struct. The log buffer
485 * has a pointer to the head of a queue of log records that have been
486 * read from the buffer file but have not been processed yet because
487 * the record id did not match the sequence desired for processing.
488 * The insertion must be in the 'correct'/sorted order which adds
489 * to the complexity of this function.
490 */
491 static void
insert_lrp_to_lb(struct nfslog_buf * lbp,struct nfslog_lr * lrp)492 insert_lrp_to_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp)
493 {
494 int ins_rec_id = lrp->log_record.re_header.rh_rec_id;
495 struct nfslog_lr *curlrp;
496
497 if (lbp->lrps == NULL) {
498 /* that was easy */
499 lbp->lrps = lrp;
500 } else {
501 /*
502 * Does this lrp go before the first on the list?
503 * If so, do the insertion by hand since insque is not
504 * as flexible when queueing an element to the head of
505 * a list.
506 */
507 if (ins_rec_id < lbp->lrps->log_record.re_header.rh_rec_id) {
508 lrp->next = lbp->lrps;
509 lrp->prev = lbp->lrps->prev;
510 lbp->lrps->prev->next = lrp;
511 lbp->lrps->prev = lrp;
512 lbp->lrps = lrp;
513 } else {
514 /*
515 * Search the queue for the correct insertion point.
516 * Be careful about the insque so that the record
517 * ends up in the right place.
518 */
519 curlrp = lbp->lrps;
520 do {
521 if (ins_rec_id <
522 curlrp->next->log_record.re_header.rh_rec_id)
523 break;
524 curlrp = curlrp->next;
525 } while (curlrp != lbp->lrps);
526 if (curlrp == lbp->lrps)
527 insque(lrp, lbp->lrps->prev);
528 else
529 insque(lrp, curlrp);
530 }
531 }
532 /* always keep track of how many we have */
533 lbp->num_lrps++;
534 }
535
536 /*
537 * We are rewriting the buffer header at the start of the log buffer
538 * for the sole purpose of resetting the bh_offset field. This is
539 * supposed to represent the progress that the nfslogd daemon has made
540 * in its processing of the log buffer file.
541 * 'lbp->last_record_offset' contains the absolute offset of the end
542 * of the last element processed. The on-disk buffer offset is relative
543 * to the buffer header, therefore we subtract the length of the buffer
544 * header from the absolute offset.
545 */
546 static void
nfslog_rewrite_bufheader(struct nfslog_buf * lbp)547 nfslog_rewrite_bufheader(struct nfslog_buf *lbp)
548 {
549 XDR xdrs;
550 nfslog_buffer_header bh;
551 /* size big enough for buffer header encode */
552 #define XBUFSIZE 128
553 char buffer[XBUFSIZE];
554 unsigned int wsize;
555
556 /*
557 * if version 1 buffer is large and the current offset cannot be
558 * represented, then don't update the offset in the buffer.
559 */
560 if (lbp->bh.bh_flags & NFSLOG_BH_OFFSET_OVERFLOW) {
561 /* No need to update the header - offset too big */
562 return;
563 }
564 /*
565 * build the buffer header from the original that was saved
566 * on initialization; note that the offset is taken from the
567 * last record processed (the last offset that represents
568 * all records processed without any holes in the processing)
569 */
570 bh = lbp->bh;
571
572 /*
573 * if version 1 buffer is large and the current offset cannot be
574 * represented in 32 bits, then save only the last valid offset
575 * in the buffer and mark the flags to indicate that.
576 */
577 if ((bh.bh_version > 1) ||
578 (lbp->last_record_offset - bh.bh_length < UINT32_MAX)) {
579 bh.bh_offset = lbp->last_record_offset - bh.bh_length;
580 } else {
581 /* don't update the offset in the buffer */
582 bh.bh_flags |= NFSLOG_BH_OFFSET_OVERFLOW;
583 lbp->bh.bh_flags = bh.bh_flags;
584 syslog(LOG_ERR, gettext(
585 "nfslog_rewrite_bufheader: %s: offset does not fit "
586 "in a 32 bit field\n"), lbp->bufpath);
587 }
588
589 xdrmem_create(&xdrs, buffer, XBUFSIZE, XDR_ENCODE);
590
591 if (!xdr_nfslog_buffer_header(&xdrs, &bh)) {
592 syslog(LOG_ERR, gettext(
593 "error in re-writing buffer file %s header\n"),
594 lbp->bufpath);
595 return;
596 }
597
598 wsize = xdr_getpos(&xdrs);
599
600 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
601 /* go to the beginning of the file */
602 (void) lseek(lbp->fd, 0, SEEK_SET);
603 (void) write(lbp->fd, buffer, wsize);
604 (void) lseek(lbp->fd, lbp->next_rec, SEEK_SET);
605 (void) fsync(lbp->fd);
606 } else {
607 bcopy(buffer, (void *)lbp->mmap_addr, wsize);
608 (void) msync((void *)lbp->mmap_addr, wsize, MS_SYNC);
609 }
610 }
611
612 /*
613 * With the provided lrp, we will take and 'insert' the range that the
614 * record covered in the buffer file into a list of processed ranges
615 * for the buffer file. These ranges represent the records processed
616 * but not 'marked' in the buffer header as being processed.
617 * This insertion process is being done for two reasons. The first is that
618 * we do not want to pay the performance penalty of re-writing the buffer header
619 * for each record that we process. The second reason is that the records
620 * may be processed out of order because of the unique ids. This will occur
621 * if the kernel has written the records to the buffer file out of order.
622 * The read routine will 'sort' them as the records are read.
623 *
624 * We do not want to re-write the buffer header such that a record is
625 * represented and being processed when it has not been. In the case
626 * that the nfslogd daemon restarts processing and the buffer header
627 * has been re-written improperly, some records could be skipped.
628 * We will be taking the conservative approach and only writing buffer
629 * header offsets when the entire offset range has been processed.
630 */
631 static void
nfslog_ins_last_rec_processed(struct nfslog_lr * lrp)632 nfslog_ins_last_rec_processed(struct nfslog_lr *lrp)
633 {
634 struct processed_records *prp, *tp;
635
636 /* init the data struct as if it were the only one */
637 prp = malloc(sizeof (*prp));
638 prp->next = prp->prev = prp;
639 prp->start_offset = lrp->f_offset;
640 prp->len = lrp->recsize;
641 prp->num_recs = 1;
642
643 /* always add since we know we are going to insert */
644 lrp->lbp->num_pr_queued++;
645
646 /* Is this the first one? If so, take the easy way out */
647 if (lrp->lbp->prp == NULL) {
648 lrp->lbp->prp = prp;
649 } else {
650 /* sort on insertion... */
651 tp = lrp->lbp->prp;
652 do {
653 if (prp->start_offset < tp->start_offset)
654 break;
655 tp = tp->next;
656 } while (tp != lrp->lbp->prp);
657 /* insert where appropriate (before the one we found */
658 insque(prp, tp->prev);
659 /*
660 * special case where the insertion was done at the
661 * head of the list
662 */
663 if (tp == lrp->lbp->prp && prp->start_offset < tp->start_offset)
664 lrp->lbp->prp = prp;
665
666 /*
667 * now that the entry is in place, we need to see if it can
668 * be combined with the previous or following entries.
669 * combination is done by adding to the length.
670 */
671 if (prp->start_offset ==
672 (prp->prev->start_offset + prp->prev->len)) {
673 tp = prp->prev;
674 remque(prp);
675 tp->len += prp->len;
676 tp->num_recs += prp->num_recs;
677 free(prp);
678 prp = tp;
679 }
680 if (prp->next->start_offset ==
681 (prp->start_offset + prp->len)) {
682 prp->len += prp->next->len;
683 prp->num_recs += prp->next->num_recs;
684 tp = prp->next;
685 remque(tp);
686 free(tp);
687 }
688 }
689
690 if (lrp->lbp->num_pr_queued > MAX_RECS_TO_DELAY) {
691 prp = lrp->lbp->prp;
692 if (lrp->lbp->last_record_offset ==
693 prp->start_offset) {
694
695 /* adjust the offset for the entire buffer */
696 lrp->lbp->last_record_offset =
697 prp->start_offset + prp->len;
698
699 nfslog_rewrite_bufheader(lrp->lbp);
700
701 tp = prp->next;
702 if (tp != prp)
703 remque(prp);
704 else
705 tp = NULL;
706 lrp->lbp->prp = tp;
707 lrp->lbp->num_pr_queued -= prp->num_recs;
708 free(prp);
709 }
710 }
711 }
712
713 /*
714 * nfslog_get_logrecord is responsible for retrieving the next log record
715 * from the buffer file. This would normally be very straightforward but there
716 * is the added complexity of attempting to order the requests coming out of
717 * the buffer file. The fundamental problems is that the kernel nfs logging
718 * functionality does not guarantee that the records were written to the file
719 * in the order that the NFS server processed them. This can cause a problem
720 * in the fh -> pathname mapping in the case were a lookup for a file comes
721 * later in the buffer file than other operations on the lookup's target.
722 * The fh mapping database will not have an entry and will therefore not
723 * be able to map the fh to a name.
724 *
725 * So to solve this problem, the kernel nfs logging code tags each record
726 * with a monotonically increasing id and is guaranteed to be allocated
727 * in the order that the requests were processed. Realize however that
728 * this processing guarantee is essentially for one thread on one client.
729 * This id mechanism does not order all requests since it is only the
730 * single client/single thread case that is most concerning to us here.
731 *
732 * This function will do the 'sorting' of the requests as they are
733 * read from the buffer file. The sorting needs to take into account
734 * that some ids may be missing (operations not logged but ids allocated)
735 * and that the id field will eventually wrap over MAXINT.
736 *
737 * Complexity to solve the fh -> pathname mapping issue.
738 */
739 struct nfslog_lr *
nfslog_get_logrecord(struct nfslog_buf * lbp)740 nfslog_get_logrecord(struct nfslog_buf *lbp)
741 {
742 /* figure out what the next should be if the world were perfect */
743 unsigned int next_rec_id = lbp->last_rec_id + 1;
744 struct nfslog_lr *lrp = NULL;
745
746 /*
747 * First we check the queued records on the log buffer struct
748 * to see if the one we want is there. The records are sorted
749 * on the record id during the insertions to the queue so that
750 * this check is easy.
751 */
752 if (lbp->lrps != NULL) {
753 /* Does the first record match ? */
754 if (lbp->lrps->log_record.re_header.rh_rec_id == next_rec_id) {
755 lrp = remove_lrp_from_lb(lbp, lbp->lrps);
756 lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id;
757 } else {
758 /*
759 * Here we are checking for wrap of the record id
760 * since it is an unsigned in. The idea is that
761 * if there is a huge span between what we expect
762 * and what is queued then we need to flush/empty
763 * the queued records first.
764 */
765 if (next_rec_id <
766 lbp->lrps->log_record.re_header.rh_rec_id &&
767 ((lbp->lrps->log_record.re_header.rh_rec_id -
768 next_rec_id) > (MAXINT / 2))) {
769
770 lrp = remove_lrp_from_lb(lbp, lbp->lrps);
771 lbp->last_rec_id =
772 lrp->log_record.re_header.rh_rec_id;
773 }
774 }
775 }
776 /*
777 * So the first queued record didn't match (or there were no queued
778 * records to look at). Now we go to the buffer file looking for
779 * the expected log record based on its id. We loop looking for
780 * a matching records and save/queue the records that don't match.
781 * Note that we will queue a maximum number to handle the case
782 * of a missing record id or a queue that is very confused. We don't
783 * want to consume too much memory.
784 */
785 while (lrp == NULL) {
786 /* Have we queued too many for this buffer? */
787 if (lbp->num_lrps >= MAX_LRS_READ_AHEAD) {
788 lrp = remove_lrp_from_lb(lbp, lbp->lrps);
789 lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id;
790 break;
791 }
792 /*
793 * Get a record from the buffer file. If none are available,
794 * this is probably and EOF condition (could be a read error
795 * as well but that is masked. :-(). No records in the
796 * file means that we need to pull any queued records
797 * so that we don't miss any in the processing.
798 */
799 if ((lrp = nfslog_read_buffer(lbp)) == NULL) {
800 if (lbp->lrps != NULL) {
801 lrp = remove_lrp_from_lb(lbp, lbp->lrps);
802 lbp->last_rec_id =
803 lrp->log_record.re_header.rh_rec_id;
804 } else {
805 return (NULL); /* it was really and EOF */
806 }
807 } else {
808 /*
809 * Just read a record from the buffer file and now we
810 * need to XDR the record header so that we can take
811 * a look at the record id.
812 */
813 if (!xdr_nfslog_request_record(&lrp->xdrs,
814 &lrp->log_record)) {
815 /* Free and return EOF/NULL on error */
816 nfslog_free_logrecord(lrp, FALSE);
817 return (NULL);
818 }
819 /*
820 * If the new record is less than or matches the
821 * expected record id, then we return this record
822 */
823 if (lrp->log_record.re_header.rh_rec_id <=
824 next_rec_id) {
825
826 lbp->last_rec_id =
827 lrp->log_record.re_header.rh_rec_id;
828 } else {
829 /*
830 * This is not the one we were looking
831 * for; queue it for later processing
832 * (queueing sorts on record id)
833 */
834 insert_lrp_to_lb(lbp, lrp);
835 lrp = NULL;
836 }
837 }
838 }
839 return (lrp);
840 }
841
842 /*
843 * Free the log record provided.
844 * This is complex because the associated XDR streams also need to be freed
845 * since allocation could have occured during the DECODE phase. The record
846 * header, args and results need to be XDR_FREEd. The xdr funtions will
847 * be provided if a free needs to be done.
848 *
849 * Note that caller tells us if the record being freed was processed.
850 * If so, then the buffer header should be updated. Updating the buffer
851 * header keeps track of where the nfslogd daemon left off in its processing
852 * if it is unable to complete the entire file.
853 */
854 void
nfslog_free_logrecord(struct nfslog_lr * lrp,bool_t processing_complete)855 nfslog_free_logrecord(struct nfslog_lr *lrp, bool_t processing_complete)
856 {
857 caddr_t buffer;
858 nfslog_request_record *reqrec;
859
860 if (processing_complete) {
861 nfslog_ins_last_rec_processed(lrp);
862 }
863
864 reqrec = &lrp->log_record;
865
866 buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record);
867
868 xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_FREE);
869
870 (void) xdr_nfslog_request_record(&lrp->xdrs, reqrec);
871
872 if (lrp->xdrargs != NULL && reqrec->re_rpc_arg)
873 (*lrp->xdrargs)(&lrp->xdrs, reqrec->re_rpc_arg);
874
875 if (reqrec->re_rpc_arg)
876 free(reqrec->re_rpc_arg);
877
878 if (lrp->xdrres != NULL && reqrec->re_rpc_res)
879 (*lrp->xdrres)(&lrp->xdrs, reqrec->re_rpc_res);
880
881 if (reqrec->re_rpc_res)
882 free(reqrec->re_rpc_res);
883
884 free_lrp(lrp);
885 }
886
887 static void
free_lrp(struct nfslog_lr * lrp)888 free_lrp(struct nfslog_lr *lrp)
889 {
890 if (lrp->buffer != NULL)
891 free(lrp->buffer);
892 free(lrp);
893 }
894
895 /*
896 * Utility function used elsewhere
897 */
898 void
nfslog_opaque_print_buf(void * buf,int len,char * outbuf,int * outbufoffsetp,int maxoffset)899 nfslog_opaque_print_buf(void *buf, int len, char *outbuf, int *outbufoffsetp,
900 int maxoffset)
901 {
902 int i, j;
903 uint_t *ip;
904 uchar_t *u_buf = (uchar_t *)buf;
905 int outbufoffset = *outbufoffsetp;
906
907 outbufoffset += sprintf(&outbuf[outbufoffset], " \"");
908 if (len <= sizeof (int)) {
909 for (j = 0; (j < len) && (outbufoffset < maxoffset);
910 j++, u_buf++)
911 outbufoffset += sprintf(&outbuf[outbufoffset],
912 "%02x", *u_buf);
913 return;
914 }
915 /* More than 4 bytes, print with spaces in integer offsets */
916 j = (int)((uintptr_t)buf % sizeof (int));
917 i = 0;
918 if (j > 0) {
919 i = sizeof (int) - j;
920 for (; (j < sizeof (int)) && (outbufoffset < maxoffset);
921 j++, u_buf++)
922 outbufoffset += sprintf(&outbuf[outbufoffset],
923 "%02x", *u_buf);
924 }
925 /* LINTED */
926 ip = (uint_t *)u_buf;
927 for (; ((i + sizeof (int)) <= len) && (outbufoffset < maxoffset);
928 i += sizeof (int), ip++) {
929 outbufoffset += sprintf(&outbuf[outbufoffset], " %08x", *ip);
930 }
931 if (i < len) {
932 /* Last element not int */
933 u_buf = (uchar_t *)ip;
934 if (i > j) /* not first element */
935 outbufoffset += sprintf(&outbuf[outbufoffset], " ");
936 for (; (i < len) && (outbufoffset < maxoffset); i++, u_buf++) {
937 outbufoffset += sprintf(&outbuf[outbufoffset],
938 "%02x", *u_buf);
939 }
940 }
941 if (outbufoffset < maxoffset)
942 outbufoffset += sprintf(&outbuf[outbufoffset], "\"");
943 *outbufoffsetp = outbufoffset;
944 }
945