1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 /*
30 * nfs log - read buffer file and return structs in usable form
31 */
32
33 #include <ctype.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stddef.h>
37 #include <string.h>
38 #include <fcntl.h>
39 #include <unistd.h>
40 #include <signal.h>
41 #include <sys/types.h>
42 #include <sys/param.h>
43 #include <sys/stat.h>
44 #include <sys/utsname.h>
45 #include <sys/mman.h>
46 #include <strings.h>
47 #include <errno.h>
48 #include <syslog.h>
49 #include <time.h>
50 #include <limits.h>
51 #include <libintl.h>
52 #include <values.h>
53 #include <search.h>
54 #include <pwd.h>
55 #include <netdb.h>
56 #include <rpc/rpc.h>
57 #include <netconfig.h>
58 #include <netdir.h>
59 #include <nfs/nfs_sec.h>
60 #include <nfs/export.h>
61 #include <rpc/auth.h>
62 #include <rpc/svc.h>
63 #include <rpc/xdr.h>
64 #include <rpc/clnt.h>
65 #include <nfs/nfs.h>
66 #include <nfs/nfs_log.h>
67 #include "nfslogd.h"
68
69 #define MAX_LRS_READ_AHEAD 2048
70 #define MAX_RECS_TO_DELAY 32768
71
72 static int nfslog_init_buf(char *, struct nfslog_buf *, int *);
73 static void nfslog_free_buf(struct nfslog_buf *, int);
74 static struct nfslog_lr *nfslog_read_buffer(struct nfslog_buf *);
75 static void free_lrp(struct nfslog_lr *);
76 static struct nfslog_lr *remove_lrp_from_lb(struct nfslog_buf *,
77 struct nfslog_lr *);
78 static void insert_lrp_to_lb(struct nfslog_buf *,
79 struct nfslog_lr *);
80 static void nfslog_rewrite_bufheader(struct nfslog_buf *);
81
82 /*
83 * Treat the provided path name as an NFS log buffer file.
84 * Allocate a data structure for its handling and initialize it.
85 * *error contains the previous error condition encountered for
86 * this object. This value can be used to avoid printing the last
87 * error endlessly.
88 * It will set *error appropriately after processing.
89 */
90 struct nfslog_buf *
nfslog_open_buf(char * bufpath,int * error)91 nfslog_open_buf(char *bufpath, int *error)
92 {
93 struct nfslog_buf *lbp = NULL;
94
95 if (bufpath == NULL) {
96 *error = EINVAL;
97 return (NULL);
98 }
99
100 if ((lbp = malloc(sizeof (struct nfslog_buf))) == NULL) {
101 *error = ENOMEM;
102 return (NULL);
103 }
104 bzero(lbp, sizeof (struct nfslog_buf));
105
106 if (nfslog_init_buf(bufpath, lbp, error)) {
107 free(lbp);
108 return (NULL);
109 }
110 return (lbp);
111 }
112
113 /*
114 * Free the log buffer struct with all of its baggage and free the data struct
115 */
116 void
nfslog_close_buf(struct nfslog_buf * lbp,int close_quick)117 nfslog_close_buf(struct nfslog_buf *lbp, int close_quick)
118 {
119 nfslog_free_buf(lbp, close_quick);
120 free(lbp);
121 }
122
123 /*
124 * Set up the log buffer struct; simple things are opening and locking
125 * the buffer file and then on to mmap()ing it for later use by the
126 * XDR decode path. Make sure to read the buffer header before
127 * returning so that we will be at the first true log record.
128 *
129 * *error contains the last error encountered on this object. It can
130 * be used to avoid reporting the same error endlessly. It is reset
131 * to the current error code on return.
132 */
133 static int
nfslog_init_buf(char * bufpath,struct nfslog_buf * lbp,int * error)134 nfslog_init_buf(char *bufpath, struct nfslog_buf *lbp, int *error)
135 {
136 struct stat sb;
137 int preverror = *error;
138
139 lbp->next = lbp;
140 lbp->prev = lbp;
141 /*
142 * set these values so that the free routine will know what to do
143 */
144 lbp->mmap_addr = (intptr_t)MAP_FAILED;
145 lbp->last_rec_id = MAXINT - 1;
146 lbp->bh.bh_length = 0;
147 lbp->bh_lrp = NULL;
148 lbp->num_lrps = 0;
149 lbp->lrps = NULL;
150 lbp->last_record_offset = 0;
151 lbp->prp = NULL;
152 lbp->num_pr_queued = 0;
153
154 lbp->bufpath = strdup(bufpath);
155 if (lbp->bufpath == NULL) {
156 *error = ENOMEM;
157 if (preverror != *error) {
158 syslog(LOG_ERR, gettext("Cannot strdup '%s': %s"),
159 bufpath, strerror(*error));
160 }
161 nfslog_free_buf(lbp, FALSE);
162 return (*error);
163 }
164
165 if ((lbp->fd = open(bufpath, O_RDWR)) < 0) {
166 *error = errno;
167 if (preverror != *error) {
168 syslog(LOG_ERR, gettext("Cannot open '%s': %s"),
169 bufpath, strerror(*error));
170 }
171 nfslog_free_buf(lbp, FALSE);
172 return (*error);
173 }
174
175 /*
176 * Lock the entire buffer file to prevent conflicting access.
177 * We get a write lock because we want only 1 process to be
178 * generating records from it.
179 */
180 lbp->fl.l_type = F_WRLCK;
181 lbp->fl.l_whence = SEEK_SET; /* beginning of file */
182 lbp->fl.l_start = (offset_t)0;
183 lbp->fl.l_len = 0; /* entire file */
184 lbp->fl.l_sysid = 0;
185 lbp->fl.l_pid = 0;
186 if (fcntl(lbp->fd, F_SETLKW, &lbp->fl) == -1) {
187 *error = errno;
188 if (preverror != *error) {
189 syslog(LOG_ERR, gettext("Cannot lock (%s): %s"),
190 bufpath, strerror(*error));
191 }
192 nfslog_free_buf(lbp, FALSE);
193 return (*error);
194 }
195
196 if (fstat(lbp->fd, &sb)) {
197 *error = errno;
198 if (preverror != *error) {
199 syslog(LOG_ERR, gettext("Cannot stat (%s): %s"),
200 bufpath, strerror(*error));
201 }
202 nfslog_free_buf(lbp, FALSE);
203 return (*error);
204 }
205 lbp->filesize = sb.st_size;
206
207 lbp->mmap_addr = (intptr_t)mmap(0, lbp->filesize, PROT_READ|PROT_WRITE,
208 MAP_SHARED|MAP_NORESERVE, lbp->fd, 0);
209
210 /* This is part of the duality of the use of either mmap()|read() */
211 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
212 lbp->next_rec = 0;
213 } else {
214 lbp->next_rec = lbp->mmap_addr;
215 }
216
217 /* Read the header */
218 if ((lbp->bh_lrp = nfslog_read_buffer(lbp)) == NULL) {
219 *error = EIO;
220 if (preverror != *error) {
221 syslog(LOG_ERR, gettext(
222 "error in reading file '%s': %s"),
223 bufpath, strerror(EIO));
224 }
225 nfslog_free_buf(lbp, FALSE);
226 return (*error);
227 }
228
229 if (!xdr_nfslog_buffer_header(&lbp->bh_lrp->xdrs, &lbp->bh)) {
230 *error = EIO;
231 if (preverror != *error) {
232 syslog(LOG_ERR, gettext(
233 "error in reading file '%s': %s"),
234 bufpath, strerror(*error));
235 }
236 nfslog_free_buf(lbp, FALSE);
237 return (*error);
238 }
239
240 /*
241 * Set the pointer to the next record based on the buffer header.
242 * 'lbp->bh.bh_offset' contains the offset of where to begin
243 * processing relative to the buffer header.
244 */
245 lbp->next_rec += lbp->bh.bh_offset;
246
247 /*
248 * If we are going to be using read() for file data, then we may
249 * have to adjust the current file pointer to take into account
250 * a starting point other than the beginning of the file.
251 * If mmap is being used, this is taken care of as a side effect of
252 * setting up the value of next_rec.
253 */
254 if (lbp->mmap_addr == (intptr_t)MAP_FAILED && lbp->next_rec != 0) {
255 (void) lseek(lbp->fd, lbp->next_rec, SEEK_SET);
256 /* This is a special case of setting the last_record_offset */
257 lbp->last_record_offset = lbp->next_rec;
258 } else {
259 lbp->last_record_offset = lbp->next_rec - lbp->mmap_addr;
260 }
261
262 return (*error = 0);
263 }
264
265 /*
266 * Free the nfslog buffer and its associated allocations
267 */
268 static void
nfslog_free_buf(struct nfslog_buf * lbp,int close_quick)269 nfslog_free_buf(struct nfslog_buf *lbp, int close_quick)
270 {
271 XDR xdrs;
272 int error;
273 caddr_t buffer;
274 struct nfslog_lr *lrp, *lrp_next;
275 struct processed_records *prp, *tprp;
276
277 /* work to free the offset records and rewrite header */
278 if (lbp->prp) {
279 if (lbp->last_record_offset == lbp->prp->start_offset) {
280
281 /* adjust the offset for the entire buffer */
282 lbp->last_record_offset =
283 lbp->prp->start_offset + lbp->prp->len;
284
285 nfslog_rewrite_bufheader(lbp);
286 }
287 if (close_quick)
288 return;
289 prp = lbp->prp;
290 do {
291 tprp = prp->next;
292 free(prp);
293 prp = tprp;
294 } while (lbp->prp != prp);
295 }
296
297 if (close_quick)
298 return;
299
300 /* Take care of the queue log records first */
301 if (lbp->lrps != NULL) {
302 lrp = lbp->lrps;
303 do {
304 lrp_next = lrp->next;
305 nfslog_free_logrecord(lrp, FALSE);
306 lrp = lrp_next;
307 } while (lrp != lbp->lrps);
308 lbp->lrps = NULL;
309 }
310
311 /* The buffer header was decoded and needs to be freed */
312 if (lbp->bh.bh_length != 0) {
313 buffer = (lbp->bh_lrp->buffer != NULL ?
314 lbp->bh_lrp->buffer : (caddr_t)lbp->mmap_addr);
315 xdrmem_create(&xdrs, buffer, lbp->bh_lrp->recsize, XDR_FREE);
316 (void) xdr_nfslog_buffer_header(&xdrs, &lbp->bh);
317 lbp->bh.bh_length = 0;
318 }
319
320 /* get rid of the bufheader lrp */
321 if (lbp->bh_lrp != NULL) {
322 free_lrp(lbp->bh_lrp);
323 lbp->bh_lrp = NULL;
324 }
325
326 /* Clean up for mmap() usage */
327 if (lbp->mmap_addr != (intptr_t)MAP_FAILED) {
328 if (munmap((void *)lbp->mmap_addr, lbp->filesize)) {
329 error = errno;
330 syslog(LOG_ERR, gettext("munmap failed: %s: %s"),
331 (lbp->bufpath != NULL ? lbp->bufpath : ""),
332 strerror(error));
333 }
334 lbp->mmap_addr = (intptr_t)MAP_FAILED;
335 }
336
337 /* Finally close the buffer file */
338 if (lbp->fd >= 0) {
339 lbp->fl.l_type = F_UNLCK;
340 if (fcntl(lbp->fd, F_SETLK, &lbp->fl) == -1) {
341 error = errno;
342 syslog(LOG_ERR,
343 gettext("Cannot unlock file %s: %s"),
344 (lbp->bufpath != NULL ? lbp->bufpath : ""),
345 strerror(error));
346 }
347 (void) close(lbp->fd);
348 lbp->fd = -1;
349 }
350 if (lbp->bufpath != NULL)
351 free(lbp->bufpath);
352 }
353
354 /*
355 * We are reading a record from the log buffer file. Since we are reading
356 * an XDR stream, we first have to read the first integer to determine
357 * how much to read in whole for this record. Our preference is to use
358 * mmap() but if failed initially we will be using read(). Need to be
359 * careful about proper initialization of the log record both from a field
360 * perspective and for XDR decoding.
361 */
362 static struct nfslog_lr *
nfslog_read_buffer(struct nfslog_buf * lbp)363 nfslog_read_buffer(struct nfslog_buf *lbp)
364 {
365 XDR xdrs;
366 unsigned int record_size;
367 struct nfslog_lr *lrp;
368 char *sizebuf, tbuf[16];
369 caddr_t buffer;
370 offset_t next_rec;
371
372 lrp = (struct nfslog_lr *)malloc(sizeof (*lrp));
373 bzero(lrp, sizeof (*lrp));
374
375 /* Check to see if mmap worked */
376 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
377 /*
378 * EOF or other failure; we don't try to recover, just return
379 */
380 if (read(lbp->fd, tbuf, BYTES_PER_XDR_UNIT) <= 0) {
381 free_lrp(lrp);
382 return (NULL);
383 }
384 sizebuf = tbuf;
385 } else {
386 /* EOF check for the mmap() case */
387 if (lbp->filesize <= lbp->next_rec - lbp->mmap_addr) {
388 free_lrp(lrp);
389 return (NULL);
390 }
391 sizebuf = (char *)(uintptr_t)lbp->next_rec;
392 }
393
394 /* We have to XDR the first int so we know how much is in this record */
395 xdrmem_create(&xdrs, sizebuf, sizeof (unsigned int), XDR_DECODE);
396
397 if (!xdr_u_int(&xdrs, &record_size)) {
398 free_lrp(lrp);
399 return (NULL);
400 }
401
402 lrp->recsize = record_size;
403 next_rec = lbp->next_rec + lrp->recsize;
404
405 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
406 /*
407 * Read() case - shouldn't be used very much.
408 * Note: The 'buffer' field is used later on
409 * to determine which method is being used mmap()|read()
410 */
411 if (lbp->filesize < next_rec) {
412 /* partial record from buffer */
413 syslog(LOG_ERR, gettext(
414 "Last partial record in work buffer %s "
415 "discarded\n"), lbp->bufpath);
416 free_lrp(lrp);
417 return (NULL);
418 }
419
420 if ((lrp->buffer = malloc(lrp->recsize)) == NULL) {
421 free_lrp(lrp);
422 return (NULL);
423 }
424 bcopy(sizebuf, lrp->buffer, BYTES_PER_XDR_UNIT);
425 if (read(lbp->fd, &lrp->buffer[BYTES_PER_XDR_UNIT],
426 lrp->recsize - BYTES_PER_XDR_UNIT) <= 0) {
427 free_lrp(lrp);
428 return (NULL);
429 }
430 } else if (lbp->filesize < next_rec - lbp->mmap_addr) {
431 /* partial record from buffer */
432 syslog(LOG_ERR, gettext(
433 "Last partial record in work buffer %s "
434 "discarded\n"), lbp->bufpath);
435 free_lrp(lrp);
436 return (NULL);
437 }
438
439
440 /* other initializations */
441 lrp->next = lrp->prev = lrp;
442 /* Keep track of the offset at which this record was read */
443 if (lbp->mmap_addr == (intptr_t)MAP_FAILED)
444 lrp->f_offset = lbp->next_rec;
445 else
446 lrp->f_offset = lbp->next_rec - lbp->mmap_addr;
447 /* This is the true address of the record */
448 lrp->record = lbp->next_rec;
449 lrp->xdrargs = lrp->xdrres = NULL;
450 lrp->lbp = lbp;
451
452 /* Here is the logic for mmap() vs. read() */
453 buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record);
454
455 /* Setup for the 'real' XDR decode of the entire record */
456 xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_DECODE);
457
458 /* calculate the offset for the next record */
459 lbp->next_rec = next_rec;
460
461 return (lrp);
462 }
463
464 /*
465 * Simple removal of the log record from the log buffer queue.
466 * Make sure to manage the count of records queued.
467 */
468 static struct nfslog_lr *
remove_lrp_from_lb(struct nfslog_buf * lbp,struct nfslog_lr * lrp)469 remove_lrp_from_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp)
470 {
471 if (lbp->lrps == lrp) {
472 if (lbp->lrps == lbp->lrps->next) {
473 lbp->lrps = NULL;
474 } else {
475 lbp->lrps = lrp->next;
476 remque(lrp);
477 }
478 } else {
479 remque(lrp);
480 }
481 lbp->num_lrps--;
482 return (lrp);
483 }
484
485 /*
486 * Insert a log record struct on the log buffer struct. The log buffer
487 * has a pointer to the head of a queue of log records that have been
488 * read from the buffer file but have not been processed yet because
489 * the record id did not match the sequence desired for processing.
490 * The insertion must be in the 'correct'/sorted order which adds
491 * to the complexity of this function.
492 */
493 static void
insert_lrp_to_lb(struct nfslog_buf * lbp,struct nfslog_lr * lrp)494 insert_lrp_to_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp)
495 {
496 int ins_rec_id = lrp->log_record.re_header.rh_rec_id;
497 struct nfslog_lr *curlrp;
498
499 if (lbp->lrps == NULL) {
500 /* that was easy */
501 lbp->lrps = lrp;
502 } else {
503 /*
504 * Does this lrp go before the first on the list?
505 * If so, do the insertion by hand since insque is not
506 * as flexible when queueing an element to the head of
507 * a list.
508 */
509 if (ins_rec_id < lbp->lrps->log_record.re_header.rh_rec_id) {
510 lrp->next = lbp->lrps;
511 lrp->prev = lbp->lrps->prev;
512 lbp->lrps->prev->next = lrp;
513 lbp->lrps->prev = lrp;
514 lbp->lrps = lrp;
515 } else {
516 /*
517 * Search the queue for the correct insertion point.
518 * Be careful about the insque so that the record
519 * ends up in the right place.
520 */
521 curlrp = lbp->lrps;
522 do {
523 if (ins_rec_id <
524 curlrp->next->log_record.re_header.rh_rec_id)
525 break;
526 curlrp = curlrp->next;
527 } while (curlrp != lbp->lrps);
528 if (curlrp == lbp->lrps)
529 insque(lrp, lbp->lrps->prev);
530 else
531 insque(lrp, curlrp);
532 }
533 }
534 /* always keep track of how many we have */
535 lbp->num_lrps++;
536 }
537
538 /*
539 * We are rewriting the buffer header at the start of the log buffer
540 * for the sole purpose of resetting the bh_offset field. This is
541 * supposed to represent the progress that the nfslogd daemon has made
542 * in its processing of the log buffer file.
543 * 'lbp->last_record_offset' contains the absolute offset of the end
544 * of the last element processed. The on-disk buffer offset is relative
545 * to the buffer header, therefore we subtract the length of the buffer
546 * header from the absolute offset.
547 */
548 static void
nfslog_rewrite_bufheader(struct nfslog_buf * lbp)549 nfslog_rewrite_bufheader(struct nfslog_buf *lbp)
550 {
551 XDR xdrs;
552 nfslog_buffer_header bh;
553 /* size big enough for buffer header encode */
554 #define XBUFSIZE 128
555 char buffer[XBUFSIZE];
556 unsigned int wsize;
557
558 /*
559 * if version 1 buffer is large and the current offset cannot be
560 * represented, then don't update the offset in the buffer.
561 */
562 if (lbp->bh.bh_flags & NFSLOG_BH_OFFSET_OVERFLOW) {
563 /* No need to update the header - offset too big */
564 return;
565 }
566 /*
567 * build the buffer header from the original that was saved
568 * on initialization; note that the offset is taken from the
569 * last record processed (the last offset that represents
570 * all records processed without any holes in the processing)
571 */
572 bh = lbp->bh;
573
574 /*
575 * if version 1 buffer is large and the current offset cannot be
576 * represented in 32 bits, then save only the last valid offset
577 * in the buffer and mark the flags to indicate that.
578 */
579 if ((bh.bh_version > 1) ||
580 (lbp->last_record_offset - bh.bh_length < UINT32_MAX)) {
581 bh.bh_offset = lbp->last_record_offset - bh.bh_length;
582 } else {
583 /* don't update the offset in the buffer */
584 bh.bh_flags |= NFSLOG_BH_OFFSET_OVERFLOW;
585 lbp->bh.bh_flags = bh.bh_flags;
586 syslog(LOG_ERR, gettext(
587 "nfslog_rewrite_bufheader: %s: offset does not fit "
588 "in a 32 bit field\n"), lbp->bufpath);
589 }
590
591 xdrmem_create(&xdrs, buffer, XBUFSIZE, XDR_ENCODE);
592
593 if (!xdr_nfslog_buffer_header(&xdrs, &bh)) {
594 syslog(LOG_ERR, gettext(
595 "error in re-writing buffer file %s header\n"),
596 lbp->bufpath);
597 return;
598 }
599
600 wsize = xdr_getpos(&xdrs);
601
602 if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
603 /* go to the beginning of the file */
604 (void) lseek(lbp->fd, 0, SEEK_SET);
605 (void) write(lbp->fd, buffer, wsize);
606 (void) lseek(lbp->fd, lbp->next_rec, SEEK_SET);
607 (void) fsync(lbp->fd);
608 } else {
609 bcopy(buffer, (void *)lbp->mmap_addr, wsize);
610 (void) msync((void *)lbp->mmap_addr, wsize, MS_SYNC);
611 }
612 }
613
614 /*
615 * With the provided lrp, we will take and 'insert' the range that the
616 * record covered in the buffer file into a list of processed ranges
617 * for the buffer file. These ranges represent the records processed
618 * but not 'marked' in the buffer header as being processed.
619 * This insertion process is being done for two reasons. The first is that
620 * we do not want to pay the performance penalty of re-writing the buffer header
621 * for each record that we process. The second reason is that the records
622 * may be processed out of order because of the unique ids. This will occur
623 * if the kernel has written the records to the buffer file out of order.
624 * The read routine will 'sort' them as the records are read.
625 *
626 * We do not want to re-write the buffer header such that a record is
627 * represented and being processed when it has not been. In the case
628 * that the nfslogd daemon restarts processing and the buffer header
629 * has been re-written improperly, some records could be skipped.
630 * We will be taking the conservative approach and only writing buffer
631 * header offsets when the entire offset range has been processed.
632 */
633 static void
nfslog_ins_last_rec_processed(struct nfslog_lr * lrp)634 nfslog_ins_last_rec_processed(struct nfslog_lr *lrp)
635 {
636 struct processed_records *prp, *tp;
637
638 /* init the data struct as if it were the only one */
639 prp = malloc(sizeof (*prp));
640 prp->next = prp->prev = prp;
641 prp->start_offset = lrp->f_offset;
642 prp->len = lrp->recsize;
643 prp->num_recs = 1;
644
645 /* always add since we know we are going to insert */
646 lrp->lbp->num_pr_queued++;
647
648 /* Is this the first one? If so, take the easy way out */
649 if (lrp->lbp->prp == NULL) {
650 lrp->lbp->prp = prp;
651 } else {
652 /* sort on insertion... */
653 tp = lrp->lbp->prp;
654 do {
655 if (prp->start_offset < tp->start_offset)
656 break;
657 tp = tp->next;
658 } while (tp != lrp->lbp->prp);
659 /* insert where appropriate (before the one we found */
660 insque(prp, tp->prev);
661 /*
662 * special case where the insertion was done at the
663 * head of the list
664 */
665 if (tp == lrp->lbp->prp && prp->start_offset < tp->start_offset)
666 lrp->lbp->prp = prp;
667
668 /*
669 * now that the entry is in place, we need to see if it can
670 * be combined with the previous or following entries.
671 * combination is done by adding to the length.
672 */
673 if (prp->start_offset ==
674 (prp->prev->start_offset + prp->prev->len)) {
675 tp = prp->prev;
676 remque(prp);
677 tp->len += prp->len;
678 tp->num_recs += prp->num_recs;
679 free(prp);
680 prp = tp;
681 }
682 if (prp->next->start_offset ==
683 (prp->start_offset + prp->len)) {
684 prp->len += prp->next->len;
685 prp->num_recs += prp->next->num_recs;
686 tp = prp->next;
687 remque(tp);
688 free(tp);
689 }
690 }
691
692 if (lrp->lbp->num_pr_queued > MAX_RECS_TO_DELAY) {
693 prp = lrp->lbp->prp;
694 if (lrp->lbp->last_record_offset ==
695 prp->start_offset) {
696
697 /* adjust the offset for the entire buffer */
698 lrp->lbp->last_record_offset =
699 prp->start_offset + prp->len;
700
701 nfslog_rewrite_bufheader(lrp->lbp);
702
703 tp = prp->next;
704 if (tp != prp)
705 remque(prp);
706 else
707 tp = NULL;
708 lrp->lbp->prp = tp;
709 lrp->lbp->num_pr_queued -= prp->num_recs;
710 free(prp);
711 }
712 }
713 }
714
715 /*
716 * nfslog_get_logrecord is responsible for retrieving the next log record
717 * from the buffer file. This would normally be very straightforward but there
718 * is the added complexity of attempting to order the requests coming out of
719 * the buffer file. The fundamental problems is that the kernel nfs logging
720 * functionality does not guarantee that the records were written to the file
721 * in the order that the NFS server processed them. This can cause a problem
722 * in the fh -> pathname mapping in the case were a lookup for a file comes
723 * later in the buffer file than other operations on the lookup's target.
724 * The fh mapping database will not have an entry and will therefore not
725 * be able to map the fh to a name.
726 *
727 * So to solve this problem, the kernel nfs logging code tags each record
728 * with a monotonically increasing id and is guaranteed to be allocated
729 * in the order that the requests were processed. Realize however that
730 * this processing guarantee is essentially for one thread on one client.
731 * This id mechanism does not order all requests since it is only the
732 * single client/single thread case that is most concerning to us here.
733 *
734 * This function will do the 'sorting' of the requests as they are
735 * read from the buffer file. The sorting needs to take into account
736 * that some ids may be missing (operations not logged but ids allocated)
737 * and that the id field will eventually wrap over MAXINT.
738 *
739 * Complexity to solve the fh -> pathname mapping issue.
740 */
741 struct nfslog_lr *
nfslog_get_logrecord(struct nfslog_buf * lbp)742 nfslog_get_logrecord(struct nfslog_buf *lbp)
743 {
744 /* figure out what the next should be if the world were perfect */
745 unsigned int next_rec_id = lbp->last_rec_id + 1;
746 struct nfslog_lr *lrp = NULL;
747
748 /*
749 * First we check the queued records on the log buffer struct
750 * to see if the one we want is there. The records are sorted
751 * on the record id during the insertions to the queue so that
752 * this check is easy.
753 */
754 if (lbp->lrps != NULL) {
755 /* Does the first record match ? */
756 if (lbp->lrps->log_record.re_header.rh_rec_id == next_rec_id) {
757 lrp = remove_lrp_from_lb(lbp, lbp->lrps);
758 lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id;
759 } else {
760 /*
761 * Here we are checking for wrap of the record id
762 * since it is an unsigned in. The idea is that
763 * if there is a huge span between what we expect
764 * and what is queued then we need to flush/empty
765 * the queued records first.
766 */
767 if (next_rec_id <
768 lbp->lrps->log_record.re_header.rh_rec_id &&
769 ((lbp->lrps->log_record.re_header.rh_rec_id -
770 next_rec_id) > (MAXINT / 2))) {
771
772 lrp = remove_lrp_from_lb(lbp, lbp->lrps);
773 lbp->last_rec_id =
774 lrp->log_record.re_header.rh_rec_id;
775 }
776 }
777 }
778 /*
779 * So the first queued record didn't match (or there were no queued
780 * records to look at). Now we go to the buffer file looking for
781 * the expected log record based on its id. We loop looking for
782 * a matching records and save/queue the records that don't match.
783 * Note that we will queue a maximum number to handle the case
784 * of a missing record id or a queue that is very confused. We don't
785 * want to consume too much memory.
786 */
787 while (lrp == NULL) {
788 /* Have we queued too many for this buffer? */
789 if (lbp->num_lrps >= MAX_LRS_READ_AHEAD) {
790 lrp = remove_lrp_from_lb(lbp, lbp->lrps);
791 lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id;
792 break;
793 }
794 /*
795 * Get a record from the buffer file. If none are available,
796 * this is probably and EOF condition (could be a read error
797 * as well but that is masked. :-(). No records in the
798 * file means that we need to pull any queued records
799 * so that we don't miss any in the processing.
800 */
801 if ((lrp = nfslog_read_buffer(lbp)) == NULL) {
802 if (lbp->lrps != NULL) {
803 lrp = remove_lrp_from_lb(lbp, lbp->lrps);
804 lbp->last_rec_id =
805 lrp->log_record.re_header.rh_rec_id;
806 } else {
807 return (NULL); /* it was really and EOF */
808 }
809 } else {
810 /*
811 * Just read a record from the buffer file and now we
812 * need to XDR the record header so that we can take
813 * a look at the record id.
814 */
815 if (!xdr_nfslog_request_record(&lrp->xdrs,
816 &lrp->log_record)) {
817 /* Free and return EOF/NULL on error */
818 nfslog_free_logrecord(lrp, FALSE);
819 return (NULL);
820 }
821 /*
822 * If the new record is less than or matches the
823 * expected record id, then we return this record
824 */
825 if (lrp->log_record.re_header.rh_rec_id <=
826 next_rec_id) {
827
828 lbp->last_rec_id =
829 lrp->log_record.re_header.rh_rec_id;
830 } else {
831 /*
832 * This is not the one we were looking
833 * for; queue it for later processing
834 * (queueing sorts on record id)
835 */
836 insert_lrp_to_lb(lbp, lrp);
837 lrp = NULL;
838 }
839 }
840 }
841 return (lrp);
842 }
843
844 /*
845 * Free the log record provided.
846 * This is complex because the associated XDR streams also need to be freed
847 * since allocation could have occured during the DECODE phase. The record
848 * header, args and results need to be XDR_FREEd. The xdr funtions will
849 * be provided if a free needs to be done.
850 *
851 * Note that caller tells us if the record being freed was processed.
852 * If so, then the buffer header should be updated. Updating the buffer
853 * header keeps track of where the nfslogd daemon left off in its processing
854 * if it is unable to complete the entire file.
855 */
856 void
nfslog_free_logrecord(struct nfslog_lr * lrp,bool_t processing_complete)857 nfslog_free_logrecord(struct nfslog_lr *lrp, bool_t processing_complete)
858 {
859 caddr_t buffer;
860 nfslog_request_record *reqrec;
861
862 if (processing_complete) {
863 nfslog_ins_last_rec_processed(lrp);
864 }
865
866 reqrec = &lrp->log_record;
867
868 buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record);
869
870 xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_FREE);
871
872 (void) xdr_nfslog_request_record(&lrp->xdrs, reqrec);
873
874 if (lrp->xdrargs != NULL && reqrec->re_rpc_arg)
875 (*lrp->xdrargs)(&lrp->xdrs, reqrec->re_rpc_arg);
876
877 if (reqrec->re_rpc_arg)
878 free(reqrec->re_rpc_arg);
879
880 if (lrp->xdrres != NULL && reqrec->re_rpc_res)
881 (*lrp->xdrres)(&lrp->xdrs, reqrec->re_rpc_res);
882
883 if (reqrec->re_rpc_res)
884 free(reqrec->re_rpc_res);
885
886 free_lrp(lrp);
887 }
888
889 static void
free_lrp(struct nfslog_lr * lrp)890 free_lrp(struct nfslog_lr *lrp)
891 {
892 if (lrp->buffer != NULL)
893 free(lrp->buffer);
894 free(lrp);
895 }
896
897 /*
898 * Utility function used elsewhere
899 */
900 void
nfslog_opaque_print_buf(void * buf,int len,char * outbuf,int * outbufoffsetp,int maxoffset)901 nfslog_opaque_print_buf(void *buf, int len, char *outbuf, int *outbufoffsetp,
902 int maxoffset)
903 {
904 int i, j;
905 uint_t *ip;
906 uchar_t *u_buf = (uchar_t *)buf;
907 int outbufoffset = *outbufoffsetp;
908
909 outbufoffset += sprintf(&outbuf[outbufoffset], " \"");
910 if (len <= sizeof (int)) {
911 for (j = 0; (j < len) && (outbufoffset < maxoffset);
912 j++, u_buf++)
913 outbufoffset += sprintf(&outbuf[outbufoffset],
914 "%02x", *u_buf);
915 return;
916 }
917 /* More than 4 bytes, print with spaces in integer offsets */
918 j = (int)((uintptr_t)buf % sizeof (int));
919 i = 0;
920 if (j > 0) {
921 i = sizeof (int) - j;
922 for (; (j < sizeof (int)) && (outbufoffset < maxoffset);
923 j++, u_buf++)
924 outbufoffset += sprintf(&outbuf[outbufoffset],
925 "%02x", *u_buf);
926 }
927 /* LINTED */
928 ip = (uint_t *)u_buf;
929 for (; ((i + sizeof (int)) <= len) && (outbufoffset < maxoffset);
930 i += sizeof (int), ip++) {
931 outbufoffset += sprintf(&outbuf[outbufoffset], " %08x", *ip);
932 }
933 if (i < len) {
934 /* Last element not int */
935 u_buf = (uchar_t *)ip;
936 if (i > j) /* not first element */
937 outbufoffset += sprintf(&outbuf[outbufoffset], " ");
938 for (; (i < len) && (outbufoffset < maxoffset); i++, u_buf++) {
939 outbufoffset += sprintf(&outbuf[outbufoffset],
940 "%02x", *u_buf);
941 }
942 }
943 if (outbufoffset < maxoffset)
944 outbufoffset += sprintf(&outbuf[outbufoffset], "\"");
945 *outbufoffsetp = outbufoffset;
946 }
947