xref: /titanic_41/usr/src/uts/common/inet/ipf/ip_frag.c (revision bf56214c0556fa6864189c826d39dbe156bb22a0)
1 /*
2  * Copyright (C) 1993-2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #pragma ident	"%Z%%M%	%I%	%E% SMI"
11 
12 #if defined(KERNEL) || defined(_KERNEL)
13 # undef KERNEL
14 # undef _KERNEL
15 # define        KERNEL	1
16 # define        _KERNEL	1
17 #endif
18 #include <sys/errno.h>
19 #include <sys/types.h>
20 #include <sys/param.h>
21 #include <sys/time.h>
22 #include <sys/file.h>
23 #ifdef __hpux
24 # include <sys/timeout.h>
25 #endif
26 #if !defined(_KERNEL)
27 # include <stdio.h>
28 # include <string.h>
29 # include <stdlib.h>
30 # define _KERNEL
31 # ifdef __OpenBSD__
32 struct file;
33 # endif
34 # include <sys/uio.h>
35 # undef _KERNEL
36 #endif
37 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
38 # include <sys/filio.h>
39 # include <sys/fcntl.h>
40 #else
41 # include <sys/ioctl.h>
42 #endif
43 #if !defined(linux)
44 # include <sys/protosw.h>
45 #endif
46 #include <sys/socket.h>
47 #if defined(_KERNEL)
48 # include <sys/systm.h>
49 # if !defined(__SVR4) && !defined(__svr4__)
50 #  include <sys/mbuf.h>
51 # endif
52 #endif
53 #if !defined(__SVR4) && !defined(__svr4__)
54 # if defined(_KERNEL) && !defined(__sgi) && !defined(AIX)
55 #  include <sys/kernel.h>
56 # endif
57 #else
58 # include <sys/byteorder.h>
59 # ifdef _KERNEL
60 #  include <sys/dditypes.h>
61 # endif
62 # include <sys/stream.h>
63 # include <sys/kmem.h>
64 #endif
65 #include <net/if.h>
66 #ifdef sun
67 # include <net/af.h>
68 #endif
69 #include <net/route.h>
70 #include <netinet/in.h>
71 #include <netinet/in_systm.h>
72 #include <netinet/ip.h>
73 #if !defined(linux)
74 # include <netinet/ip_var.h>
75 #endif
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 #include <netinet/ip_icmp.h>
79 #include "netinet/ip_compat.h"
80 #include <netinet/tcpip.h>
81 #include "netinet/ip_fil.h"
82 #include "netinet/ip_nat.h"
83 #include "netinet/ip_frag.h"
84 #include "netinet/ip_state.h"
85 #include "netinet/ip_auth.h"
86 #include "netinet/ipf_stack.h"
87 #if (__FreeBSD_version >= 300000)
88 # include <sys/malloc.h>
89 # if defined(_KERNEL)
90 #  ifndef IPFILTER_LKM
91 #   include <sys/libkern.h>
92 #   include <sys/systm.h>
93 #  endif
94 extern struct callout_handle fr_slowtimer_ch;
95 # endif
96 #endif
97 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
98 # include <sys/callout.h>
99 extern struct callout fr_slowtimer_ch;
100 #endif
101 #if defined(__OpenBSD__)
102 # include <sys/timeout.h>
103 extern struct timeout fr_slowtimer_ch;
104 #endif
105 /* END OF INCLUDES */
106 
107 #if !defined(lint)
108 static const char sccsid[] = "@(#)ip_frag.c	1.11 3/24/96 (C) 1993-2000 Darren Reed";
109 static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.77.2.5 2005/08/11 14:33:10 darrenr Exp $";
110 #endif
111 
112 static ipfr_t *ipfr_newfrag __P((fr_info_t *, u_32_t, ipfr_t **));
113 static ipfr_t *fr_fraglookup __P((fr_info_t *, ipfr_t **));
114 static void fr_fragdelete __P((ipfr_t *, ipfr_t ***, ipf_stack_t *));
115 
116 /* ------------------------------------------------------------------------ */
117 /* Function:    fr_fraginit                                                 */
118 /* Returns:     int - 0 == success, -1 == error                             */
119 /* Parameters:  Nil                                                         */
120 /*                                                                          */
121 /* Initialise the hash tables for the fragment cache lookups.               */
122 /* ------------------------------------------------------------------------ */
123 int fr_fraginit(ifs)
124 ipf_stack_t *ifs;
125 {
126 	ifs->ifs_ipfr_tail = &ifs->ifs_ipfr_list;
127 	ifs->ifs_ipfr_nattail = &ifs->ifs_ipfr_natlist;
128 	ifs->ifs_ipfr_ipidtail = &ifs->ifs_ipfr_ipidlist;
129 	/* the IP frag related variables are set in ipftuneable_setdefs() to
130 	 * their default values
131 	 */
132 
133 	KMALLOCS(ifs->ifs_ipfr_heads, ipfr_t **,
134 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
135 	if (ifs->ifs_ipfr_heads == NULL)
136 		return -1;
137 	bzero((char *)ifs->ifs_ipfr_heads,
138 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
139 
140 	KMALLOCS(ifs->ifs_ipfr_nattab, ipfr_t **,
141 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
142 	if (ifs->ifs_ipfr_nattab == NULL)
143 		return -1;
144 	bzero((char *)ifs->ifs_ipfr_nattab,
145 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
146 
147 	KMALLOCS(ifs->ifs_ipfr_ipidtab, ipfr_t **,
148 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
149 	if (ifs->ifs_ipfr_ipidtab == NULL)
150 		return -1;
151 	bzero((char *)ifs->ifs_ipfr_ipidtab,
152 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
153 
154 	RWLOCK_INIT(&ifs->ifs_ipf_frag, "ipf fragment rwlock");
155 
156 	/* Initialise frblock with "block in all" */
157 	bzero((char *)&ifs->ifs_frblock, sizeof(ifs->ifs_frblock));
158 	ifs->ifs_frblock.fr_flags = FR_BLOCK|FR_INQUE;	/* block in */
159 	ifs->ifs_frblock.fr_ref = 1;
160 
161 	ifs->ifs_fr_frag_init = 1;
162 
163 	return 0;
164 }
165 
166 
167 /* ------------------------------------------------------------------------ */
168 /* Function:    fr_fragunload                                               */
169 /* Returns:     Nil                                                         */
170 /* Parameters:  Nil                                                         */
171 /*                                                                          */
172 /* Free all memory allocated whilst running and from initialisation.        */
173 /* ------------------------------------------------------------------------ */
174 void fr_fragunload(ifs)
175 ipf_stack_t *ifs;
176 {
177 	if (ifs->ifs_fr_frag_init == 1) {
178 		fr_fragclear(ifs);
179 
180 		RW_DESTROY(&ifs->ifs_ipf_frag);
181 		ifs->ifs_fr_frag_init = 0;
182 	}
183 
184 	if (ifs->ifs_ipfr_heads != NULL) {
185 		KFREES(ifs->ifs_ipfr_heads,
186 		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
187 	}
188 	ifs->ifs_ipfr_heads = NULL;
189 
190 	if (ifs->ifs_ipfr_nattab != NULL) {
191 		KFREES(ifs->ifs_ipfr_nattab,
192 		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
193 	}
194 	ifs->ifs_ipfr_nattab = NULL;
195 
196 	if (ifs->ifs_ipfr_ipidtab != NULL) {
197 		KFREES(ifs->ifs_ipfr_ipidtab,
198 		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
199 	}
200 	ifs->ifs_ipfr_ipidtab = NULL;
201 }
202 
203 
204 /* ------------------------------------------------------------------------ */
205 /* Function:    fr_fragstats                                                */
206 /* Returns:     ipfrstat_t* - pointer to struct with current frag stats     */
207 /* Parameters:  Nil                                                         */
208 /*                                                                          */
209 /* Updates ipfr_stats with current information and returns a pointer to it  */
210 /* ------------------------------------------------------------------------ */
211 ipfrstat_t *fr_fragstats(ifs)
212 ipf_stack_t *ifs;
213 {
214 	ifs->ifs_ipfr_stats.ifs_table = ifs->ifs_ipfr_heads;
215 	ifs->ifs_ipfr_stats.ifs_nattab = ifs->ifs_ipfr_nattab;
216 	ifs->ifs_ipfr_stats.ifs_inuse = ifs->ifs_ipfr_inuse;
217 	return &ifs->ifs_ipfr_stats;
218 }
219 
220 
221 /* ------------------------------------------------------------------------ */
222 /* Function:    ipfr_newfrag                                                */
223 /* Returns:     ipfr_t * - pointer to fragment cache state info or NULL     */
224 /* Parameters:  fin(I)   - pointer to packet information                    */
225 /*              table(I) - pointer to frag table to add to                  */
226 /*                                                                          */
227 /* Add a new entry to the fragment cache, registering it as having come     */
228 /* through this box, with the result of the filter operation.               */
229 /* ------------------------------------------------------------------------ */
230 static ipfr_t *ipfr_newfrag(fin, pass, table)
231 fr_info_t *fin;
232 u_32_t pass;
233 ipfr_t *table[];
234 {
235 	ipfr_t *fra, frag;
236 	u_int idx, off;
237 	ipf_stack_t *ifs = fin->fin_ifs;
238 
239 	if (ifs->ifs_ipfr_inuse >= IPFT_SIZE)
240 		return NULL;
241 
242 	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
243 		return NULL;
244 
245 	if (pass & FR_FRSTRICT)
246 		if (fin->fin_off != 0)
247 			return NULL;
248 
249 	frag.ipfr_p = fin->fin_p;
250 	idx = fin->fin_p;
251 	frag.ipfr_id = fin->fin_id;
252 	idx += fin->fin_id;
253 	frag.ipfr_source = fin->fin_fi.fi_src;
254 	idx += frag.ipfr_src.s_addr;
255 	frag.ipfr_dest = fin->fin_fi.fi_dst;
256 	idx += frag.ipfr_dst.s_addr;
257 	frag.ipfr_ifp = fin->fin_ifp;
258 	idx *= 127;
259 	idx %= IPFT_SIZE;
260 
261 	frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
262 	frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
263 	frag.ipfr_auth = fin->fin_fi.fi_auth;
264 
265 	/*
266 	 * first, make sure it isn't already there...
267 	 */
268 	for (fra = table[idx]; (fra != NULL); fra = fra->ipfr_hnext)
269 		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp,
270 			  IPFR_CMPSZ)) {
271 			ifs->ifs_ipfr_stats.ifs_exists++;
272 			return NULL;
273 		}
274 
275 	/*
276 	 * allocate some memory, if possible, if not, just record that we
277 	 * failed to do so.
278 	 */
279 	KMALLOC(fra, ipfr_t *);
280 	if (fra == NULL) {
281 		ifs->ifs_ipfr_stats.ifs_nomem++;
282 		return NULL;
283 	}
284 
285 	fra->ipfr_rule = fin->fin_fr;
286 	if (fra->ipfr_rule != NULL) {
287 
288 		frentry_t *fr;
289 
290 		fr = fin->fin_fr;
291 		MUTEX_ENTER(&fr->fr_lock);
292 		fr->fr_ref++;
293 		MUTEX_EXIT(&fr->fr_lock);
294 	}
295 
296 	/*
297 	 * Insert the fragment into the fragment table, copy the struct used
298 	 * in the search using bcopy rather than reassign each field.
299 	 * Set the ttl to the default.
300 	 */
301 	if ((fra->ipfr_hnext = table[idx]) != NULL)
302 		table[idx]->ipfr_hprev = &fra->ipfr_hnext;
303 	fra->ipfr_hprev = table + idx;
304 	fra->ipfr_data = NULL;
305 	table[idx] = fra;
306 	bcopy((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ);
307 	fra->ipfr_ttl = ifs->ifs_fr_ticks + ifs->ifs_fr_ipfrttl;
308 
309 	/*
310 	 * Compute the offset of the expected start of the next packet.
311 	 */
312 	off = fin->fin_off;
313 	if (off == 0) {
314 		fra->ipfr_seen0 = 1;
315 		fra->ipfr_firstend = fin->fin_flen;
316 	} else {
317 		fra->ipfr_seen0 = 0;
318 		fra->ipfr_firstend = 0;
319 	}
320 	fra->ipfr_off = off + fin->fin_dlen;
321 	fra->ipfr_pass = pass;
322 	fra->ipfr_ref = 1;
323 	ifs->ifs_ipfr_stats.ifs_new++;
324 	ifs->ifs_ipfr_inuse++;
325 	return fra;
326 }
327 
328 
329 /* ------------------------------------------------------------------------ */
330 /* Function:    fr_newfrag                                                  */
331 /* Returns:     int - 0 == success, -1 == error                             */
332 /* Parameters:  fin(I)  - pointer to packet information                     */
333 /*                                                                          */
334 /* Add a new entry to the fragment cache table based on the current packet  */
335 /* ------------------------------------------------------------------------ */
336 int fr_newfrag(fin, pass)
337 u_32_t pass;
338 fr_info_t *fin;
339 {
340 	ipfr_t	*fra;
341 	ipf_stack_t *ifs = fin->fin_ifs;
342 
343 	if (ifs->ifs_fr_frag_lock != 0)
344 		return -1;
345 
346 	WRITE_ENTER(&ifs->ifs_ipf_frag);
347 	fra = ipfr_newfrag(fin, pass, ifs->ifs_ipfr_heads);
348 	if (fra != NULL) {
349 		*ifs->ifs_ipfr_tail = fra;
350 		fra->ipfr_prev = ifs->ifs_ipfr_tail;
351 		ifs->ifs_ipfr_tail = &fra->ipfr_next;
352 		if (ifs->ifs_ipfr_list == NULL)
353 			ifs->ifs_ipfr_list = fra;
354 		fra->ipfr_next = NULL;
355 	}
356 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
357 	return fra ? 0 : -1;
358 }
359 
360 
361 /* ------------------------------------------------------------------------ */
362 /* Function:    fr_nat_newfrag                                              */
363 /* Returns:     int - 0 == success, -1 == error                             */
364 /* Parameters:  fin(I)  - pointer to packet information                     */
365 /*              nat(I)  - pointer to NAT structure                          */
366 /*                                                                          */
367 /* Create a new NAT fragment cache entry based on the current packet and    */
368 /* the NAT structure for this "session".                                    */
369 /* ------------------------------------------------------------------------ */
370 int fr_nat_newfrag(fin, pass, nat)
371 fr_info_t *fin;
372 u_32_t pass;
373 nat_t *nat;
374 {
375 	ipfr_t	*fra;
376 	ipf_stack_t *ifs = fin->fin_ifs;
377 
378 	if ((fin->fin_v != 4) || (ifs->ifs_fr_frag_lock != 0))
379 		return 0;
380 
381 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
382 	fra = ipfr_newfrag(fin, pass, ifs->ifs_ipfr_nattab);
383 	if (fra != NULL) {
384 		fra->ipfr_data = nat;
385 		nat->nat_data = fra;
386 		*ifs->ifs_ipfr_nattail = fra;
387 		fra->ipfr_prev = ifs->ifs_ipfr_nattail;
388 		ifs->ifs_ipfr_nattail = &fra->ipfr_next;
389 		fra->ipfr_next = NULL;
390 	}
391 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
392 	return fra ? 0 : -1;
393 }
394 
395 
396 /* ------------------------------------------------------------------------ */
397 /* Function:    fr_ipid_newfrag                                             */
398 /* Returns:     int - 0 == success, -1 == error                             */
399 /* Parameters:  fin(I)  - pointer to packet information                     */
400 /*              ipid(I) - new IP ID for this fragmented packet              */
401 /*                                                                          */
402 /* Create a new fragment cache entry for this packet and store, as a data   */
403 /* pointer, the new IP ID value.                                            */
404 /* ------------------------------------------------------------------------ */
405 int fr_ipid_newfrag(fin, ipid)
406 fr_info_t *fin;
407 u_32_t ipid;
408 {
409 	ipfr_t	*fra;
410 	ipf_stack_t *ifs = fin->fin_ifs;
411 
412 	if (ifs->ifs_fr_frag_lock)
413 		return 0;
414 
415 	WRITE_ENTER(&ifs->ifs_ipf_ipidfrag);
416 	fra = ipfr_newfrag(fin, 0, ifs->ifs_ipfr_ipidtab);
417 	if (fra != NULL) {
418 		fra->ipfr_data = (void *)(uintptr_t)ipid;
419 		*ifs->ifs_ipfr_ipidtail = fra;
420 		fra->ipfr_prev = ifs->ifs_ipfr_ipidtail;
421 		ifs->ifs_ipfr_ipidtail = &fra->ipfr_next;
422 		fra->ipfr_next = NULL;
423 	}
424 	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
425 	return fra ? 0 : -1;
426 }
427 
428 
429 /* ------------------------------------------------------------------------ */
430 /* Function:    fr_fraglookup                                               */
431 /* Returns:     ipfr_t * - pointer to ipfr_t structure if there's a         */
432 /*                         matching entry in the frag table, else NULL      */
433 /* Parameters:  fin(I)   - pointer to packet information                    */
434 /*              table(I) - pointer to fragment cache table to search        */
435 /*                                                                          */
436 /* Check the fragment cache to see if there is already a record of this     */
437 /* packet with its filter result known.                                     */
438 /* ------------------------------------------------------------------------ */
439 static ipfr_t *fr_fraglookup(fin, table)
440 fr_info_t *fin;
441 ipfr_t *table[];
442 {
443 	ipfr_t *f, frag;
444 	u_int idx;
445 	ipf_stack_t *ifs = fin->fin_ifs;
446 
447 	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
448 		return NULL;
449 
450 	/*
451 	 * For fragments, we record protocol, packet id, TOS and both IP#'s
452 	 * (these should all be the same for all fragments of a packet).
453 	 *
454 	 * build up a hash value to index the table with.
455 	 */
456 	frag.ipfr_p = fin->fin_p;
457 	idx = fin->fin_p;
458 	frag.ipfr_id = fin->fin_id;
459 	idx += fin->fin_id;
460 	frag.ipfr_source = fin->fin_fi.fi_src;
461 	idx += frag.ipfr_src.s_addr;
462 	frag.ipfr_dest = fin->fin_fi.fi_dst;
463 	idx += frag.ipfr_dst.s_addr;
464 	frag.ipfr_ifp = fin->fin_ifp;
465 	idx *= 127;
466 	idx %= IPFT_SIZE;
467 
468 	frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
469 	frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
470 	frag.ipfr_auth = fin->fin_fi.fi_auth;
471 
472 	/*
473 	 * check the table, careful to only compare the right amount of data
474 	 */
475 	for (f = table[idx]; f; f = f->ipfr_hnext)
476 		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&f->ipfr_ifp,
477 			  IPFR_CMPSZ)) {
478 			u_short	off;
479 
480 			/*
481 			 * We don't want to let short packets match because
482 			 * they could be compromising the security of other
483 			 * rules that want to match on layer 4 fields (and
484 			 * can't because they have been fragmented off.)
485 			 * Why do this check here?  The counter acts as an
486 			 * indicator of this kind of attack, whereas if it was
487 			 * elsewhere, it wouldn't know if other matching
488 			 * packets had been seen.
489 			 */
490 			if (fin->fin_flx & FI_SHORT) {
491 				ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_short);
492 				continue;
493 			}
494 
495 			/*
496 			 * XXX - We really need to be guarding against the
497 			 * retransmission of (src,dst,id,offset-range) here
498 			 * because a fragmented packet is never resent with
499 			 * the same IP ID# (or shouldn't).
500 			 */
501 			off = fin->fin_off; /* same as in ipfr_newfrag() */
502 			if (f->ipfr_seen0) {
503 				if (off == 0) {
504 					ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_retrans0);
505 					continue;
506 				}
507 			} else if (off == 0) {
508 				f->ipfr_seen0 = 1;
509 				f->ipfr_firstend = fin->fin_flen;
510 			}
511 
512 			if (f != table[idx]) {
513 				ipfr_t **fp;
514 
515 				/*
516 				 * Move fragment info. to the top of the list
517 				 * to speed up searches.  First, delink...
518 				 */
519 				fp = f->ipfr_hprev;
520 				(*fp) = f->ipfr_hnext;
521 				if (f->ipfr_hnext != NULL)
522 					f->ipfr_hnext->ipfr_hprev = fp;
523 				/*
524 				 * Then put back at the top of the chain.
525 				 */
526 				f->ipfr_hnext = table[idx];
527 				table[idx]->ipfr_hprev = &f->ipfr_hnext;
528 				f->ipfr_hprev = table + idx;
529 				table[idx] = f;
530 			}
531 
532 			if (fin->fin_v == 6) {
533 				if (f->ipfr_seen0 && (off < f->ipfr_firstend))
534 					fin->fin_flx |= FI_BAD;
535 			}
536 			/*
537 			 * If we've follwed the fragments, and this is the
538 			 * last (in order), shrink expiration time.
539 			 */
540 			if (off == f->ipfr_off) {
541 				if (!(fin->fin_ip->ip_off & IP_MF))
542 					f->ipfr_ttl = ifs->ifs_fr_ticks + 1;
543 				f->ipfr_off = fin->fin_dlen + off;
544 			} else if (f->ipfr_pass & FR_FRSTRICT)
545 				continue;
546 			ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_hits);
547 			return f;
548 		}
549 	return NULL;
550 }
551 
552 
553 /* ------------------------------------------------------------------------ */
554 /* Function:    fr_nat_knownfrag                                            */
555 /* Returns:     nat_t* - pointer to 'parent' NAT structure if frag table    */
556 /*                       match found, else NULL                             */
557 /* Parameters:  fin(I)  - pointer to packet information                     */
558 /*                                                                          */
559 /* Functional interface for NAT lookups of the NAT fragment cache           */
560 /* ------------------------------------------------------------------------ */
561 nat_t *fr_nat_knownfrag(fin)
562 fr_info_t *fin;
563 {
564 	nat_t	*nat;
565 	ipfr_t	*ipf;
566 	ipf_stack_t *ifs = fin->fin_ifs;
567 
568 	if ((fin->fin_v != 4) || (ifs->ifs_fr_frag_lock) || !ifs->ifs_ipfr_natlist)
569 		return NULL;
570 	READ_ENTER(&ifs->ifs_ipf_natfrag);
571 	ipf = fr_fraglookup(fin, ifs->ifs_ipfr_nattab);
572 	if (ipf != NULL) {
573 		nat = ipf->ipfr_data;
574 		/*
575 		 * This is the last fragment for this packet.
576 		 */
577 		if ((ipf->ipfr_ttl == ifs->ifs_fr_ticks + 1) && (nat != NULL)) {
578 			nat->nat_data = NULL;
579 			ipf->ipfr_data = NULL;
580 		}
581 	} else
582 		nat = NULL;
583 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
584 	return nat;
585 }
586 
587 
588 /* ------------------------------------------------------------------------ */
589 /* Function:    fr_ipid_knownfrag                                           */
590 /* Returns:     u_32_t - IPv4 ID for this packet if match found, else       */
591 /*                       return 0xfffffff to indicate no match.             */
592 /* Parameters:  fin(I) - pointer to packet information                      */
593 /*                                                                          */
594 /* Functional interface for IP ID lookups of the IP ID fragment cache       */
595 /* ------------------------------------------------------------------------ */
596 u_32_t fr_ipid_knownfrag(fin)
597 fr_info_t *fin;
598 {
599 	ipfr_t	*ipf;
600 	u_32_t	id;
601 	ipf_stack_t *ifs = fin->fin_ifs;
602 
603 	if ((fin->fin_v != 4) || (ifs->ifs_fr_frag_lock) || !ifs->ifs_ipfr_ipidlist)
604 		return 0xffffffff;
605 
606 	READ_ENTER(&ifs->ifs_ipf_ipidfrag);
607 	ipf = fr_fraglookup(fin, ifs->ifs_ipfr_ipidtab);
608 	if (ipf != NULL)
609 		id = (u_32_t)(uintptr_t)ipf->ipfr_data;
610 	else
611 		id = 0xffffffff;
612 	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
613 	return id;
614 }
615 
616 
617 /* ------------------------------------------------------------------------ */
618 /* Function:    fr_knownfrag                                                */
619 /* Returns:     frentry_t* - pointer to filter rule if a match is found in  */
620 /*                           the frag cache table, else NULL.               */
621 /* Parameters:  fin(I)   - pointer to packet information                    */
622 /*              passp(O) - pointer to where to store rule flags resturned   */
623 /*                                                                          */
624 /* Functional interface for normal lookups of the fragment cache.  If a     */
625 /* match is found, return the rule pointer and flags from the rule, except  */
626 /* that if FR_LOGFIRST is set, reset FR_LOG.                                */
627 /* ------------------------------------------------------------------------ */
628 frentry_t *fr_knownfrag(fin, passp)
629 fr_info_t *fin;
630 u_32_t *passp;
631 {
632 	frentry_t *fr = NULL;
633 	ipfr_t	*fra;
634 	u_32_t pass, oflx;
635 	ipf_stack_t *ifs = fin->fin_ifs;
636 
637 	if ((ifs->ifs_fr_frag_lock) || (ifs->ifs_ipfr_list == NULL))
638 		return NULL;
639 
640 	READ_ENTER(&ifs->ifs_ipf_frag);
641 	oflx = fin->fin_flx;
642 	fra = fr_fraglookup(fin, ifs->ifs_ipfr_heads);
643 	if (fra != NULL) {
644 		fr = fra->ipfr_rule;
645 		fin->fin_fr = fr;
646 		if (fr != NULL) {
647 			pass = fr->fr_flags;
648 			if ((pass & FR_LOGFIRST) != 0)
649 				pass &= ~(FR_LOGFIRST|FR_LOG);
650 			*passp = pass;
651 		}
652 	}
653 	if (!(oflx & FI_BAD) && (fin->fin_flx & FI_BAD)) {
654 		*passp &= ~FR_CMDMASK;
655 		*passp |= FR_BLOCK;
656 		fr = &ifs->ifs_frblock;
657 	}
658 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
659 	return fr;
660 }
661 
662 
663 /* ------------------------------------------------------------------------ */
664 /* Function:    fr_forget                                                   */
665 /* Returns:     Nil                                                         */
666 /* Parameters:  ptr(I) - pointer to data structure                          */
667 /*                                                                          */
668 /* Search through all of the fragment cache entries and wherever a pointer  */
669 /* is found to match ptr, reset it to NULL.                                 */
670 /* ------------------------------------------------------------------------ */
671 void fr_forget(ptr, ifs)
672 void *ptr;
673 ipf_stack_t *ifs;
674 {
675 	ipfr_t	*fr;
676 
677 	WRITE_ENTER(&ifs->ifs_ipf_frag);
678 	for (fr = ifs->ifs_ipfr_list; fr; fr = fr->ipfr_next)
679 		if (fr->ipfr_data == ptr)
680 			fr->ipfr_data = NULL;
681 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
682 }
683 
684 
685 /* ------------------------------------------------------------------------ */
686 /* Function:    fr_forgetnat                                                */
687 /* Returns:     Nil                                                         */
688 /* Parameters:  ptr(I) - pointer to data structure                          */
689 /*                                                                          */
690 /* Search through all of the fragment cache entries for NAT and wherever a  */
691 /* pointer  is found to match ptr, reset it to NULL.                        */
692 /* ------------------------------------------------------------------------ */
693 void fr_forgetnat(ptr, ifs)
694 void *ptr;
695 ipf_stack_t *ifs;
696 {
697 	ipfr_t	*fr;
698 
699 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
700 	for (fr = ifs->ifs_ipfr_natlist; fr; fr = fr->ipfr_next)
701 		if (fr->ipfr_data == ptr)
702 			fr->ipfr_data = NULL;
703 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
704 }
705 
706 
707 /* ------------------------------------------------------------------------ */
708 /* Function:    fr_fragdelete                                               */
709 /* Returns:     Nil                                                         */
710 /* Parameters:  fra(I)   - pointer to fragment structure to delete          */
711 /*              tail(IO) - pointer to the pointer to the tail of the frag   */
712 /*                         list                                             */
713 /*                                                                          */
714 /* Remove a fragment cache table entry from the table & list.  Also free    */
715 /* the filter rule it is associated with it if it is no longer used as a    */
716 /* result of decreasing the reference count.                                */
717 /* ------------------------------------------------------------------------ */
718 static void fr_fragdelete(fra, tail, ifs)
719 ipfr_t *fra, ***tail;
720 ipf_stack_t *ifs;
721 {
722 	frentry_t *fr;
723 
724 	fr = fra->ipfr_rule;
725 	if (fr != NULL)
726 	    (void)fr_derefrule(&fr, ifs);
727 
728 	if (fra->ipfr_next)
729 		fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
730 	*fra->ipfr_prev = fra->ipfr_next;
731 	if (*tail == &fra->ipfr_next)
732 		*tail = fra->ipfr_prev;
733 
734 	if (fra->ipfr_hnext)
735 		fra->ipfr_hnext->ipfr_hprev = fra->ipfr_hprev;
736 	*fra->ipfr_hprev = fra->ipfr_hnext;
737 
738 	if (fra->ipfr_ref <= 0)
739 		KFREE(fra);
740 }
741 
742 
743 /* ------------------------------------------------------------------------ */
744 /* Function:    fr_fragclear                                                */
745 /* Returns:     Nil                                                         */
746 /* Parameters:  Nil                                                         */
747 /*                                                                          */
748 /* Free memory in use by fragment state information kept.  Do the normal    */
749 /* fragment state stuff first and then the NAT-fragment table.              */
750 /* ------------------------------------------------------------------------ */
751 void fr_fragclear(ifs)
752 ipf_stack_t *ifs;
753 {
754 	ipfr_t	*fra;
755 	nat_t	*nat;
756 
757 	WRITE_ENTER(&ifs->ifs_ipf_frag);
758 	while ((fra = ifs->ifs_ipfr_list) != NULL) {
759 		fra->ipfr_ref--;
760 		fr_fragdelete(fra, &ifs->ifs_ipfr_tail, ifs);
761 	}
762 	ifs->ifs_ipfr_tail = &ifs->ifs_ipfr_list;
763 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
764 
765 	WRITE_ENTER(&ifs->ifs_ipf_nat);
766 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
767 	while ((fra = ifs->ifs_ipfr_natlist) != NULL) {
768 		nat = fra->ipfr_data;
769 		if (nat != NULL) {
770 			if (nat->nat_data == fra)
771 				nat->nat_data = NULL;
772 		}
773 		fra->ipfr_ref--;
774 		fr_fragdelete(fra, &ifs->ifs_ipfr_nattail, ifs);
775 	}
776 	ifs->ifs_ipfr_nattail = &ifs->ifs_ipfr_natlist;
777 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
778 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
779 }
780 
781 
782 /* ------------------------------------------------------------------------ */
783 /* Function:    fr_fragexpire                                               */
784 /* Returns:     Nil                                                         */
785 /* Parameters:  Nil                                                         */
786 /*                                                                          */
787 /* Expire entries in the fragment cache table that have been there too long */
788 /* ------------------------------------------------------------------------ */
789 void fr_fragexpire(ifs)
790 ipf_stack_t *ifs;
791 {
792 	ipfr_t	**fp, *fra;
793 	nat_t	*nat;
794 	SPL_INT(s);
795 
796 	if (ifs->ifs_fr_frag_lock)
797 		return;
798 
799 	SPL_NET(s);
800 	WRITE_ENTER(&ifs->ifs_ipf_frag);
801 	/*
802 	 * Go through the entire table, looking for entries to expire,
803 	 * which is indicated by the ttl being less than or equal to
804 	 * ifs_fr_ticks.
805 	 */
806 	for (fp = &ifs->ifs_ipfr_list; ((fra = *fp) != NULL); ) {
807 		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
808 			break;
809 		fra->ipfr_ref--;
810 		fr_fragdelete(fra, &ifs->ifs_ipfr_tail, ifs);
811 		ifs->ifs_ipfr_stats.ifs_expire++;
812 		ifs->ifs_ipfr_inuse--;
813 	}
814 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
815 
816 	WRITE_ENTER(&ifs->ifs_ipf_ipidfrag);
817 	for (fp = &ifs->ifs_ipfr_ipidlist; ((fra = *fp) != NULL); ) {
818 		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
819 			break;
820 		fra->ipfr_ref--;
821 		fr_fragdelete(fra, &ifs->ifs_ipfr_ipidtail, ifs);
822 		ifs->ifs_ipfr_stats.ifs_expire++;
823 		ifs->ifs_ipfr_inuse--;
824 	}
825 	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
826 
827 	/*
828 	 * Same again for the NAT table, except that if the structure also
829 	 * still points to a NAT structure, and the NAT structure points back
830 	 * at the one to be free'd, NULL the reference from the NAT struct.
831 	 * NOTE: We need to grab both mutex's early, and in this order so as
832 	 * to prevent a deadlock if both try to expire at the same time.
833 	 */
834 	WRITE_ENTER(&ifs->ifs_ipf_nat);
835 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
836 	for (fp = &ifs->ifs_ipfr_natlist; ((fra = *fp) != NULL); ) {
837 		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
838 			break;
839 		nat = fra->ipfr_data;
840 		if (nat != NULL) {
841 			if (nat->nat_data == fra)
842 				nat->nat_data = NULL;
843 		}
844 		fra->ipfr_ref--;
845 		fr_fragdelete(fra, &ifs->ifs_ipfr_nattail, ifs);
846 		ifs->ifs_ipfr_stats.ifs_expire++;
847 		ifs->ifs_ipfr_inuse--;
848 	}
849 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
850 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
851 	SPL_X(s);
852 }
853 
854 
855 /* ------------------------------------------------------------------------ */
856 /* Function:    fr_slowtimer                                                */
857 /* Returns:     Nil                                                         */
858 /* Parameters:  Nil                                                         */
859 /*                                                                          */
860 /* Slowly expire held state for fragments.  Timeouts are set * in           */
861 /* expectation of this being called twice per second.                       */
862 /* ------------------------------------------------------------------------ */
863 #if !defined(_KERNEL) || (!SOLARIS && !defined(__hpux) && !defined(__sgi) && \
864 			  !defined(__osf__) && !defined(linux))
865 # if defined(_KERNEL) && ((BSD >= 199103) || defined(__sgi))
866 void fr_slowtimer __P((void *arg))
867 # else
868 int fr_slowtimer(void *arg)
869 # endif
870 {
871 	ipf_stack_t *ifs = arg;
872 
873 	READ_ENTER(&ifs->ifs_ipf_global);
874 
875 	fr_fragexpire(ifs);
876 	fr_timeoutstate(ifs);
877 	fr_natexpire(ifs);
878 	fr_authexpire(ifs);
879 	ifs->ifs_fr_ticks++;
880 	if (ifs->ifs_fr_running <= 0)
881 		goto done;
882 # ifdef _KERNEL
883 #  if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
884 	callout_reset(&fr_slowtimer_ch, hz / 2, fr_slowtimer, NULL);
885 #  else
886 #   if defined(__OpenBSD__)
887 	timeout_add(&fr_slowtimer_ch, hz/2);
888 #   else
889 #    if (__FreeBSD_version >= 300000)
890 	fr_slowtimer_ch = timeout(fr_slowtimer, NULL, hz/2);
891 #    else
892 #     ifdef linux
893 	;
894 #     else
895 	timeout(fr_slowtimer, NULL, hz/2);
896 #     endif
897 #    endif /* FreeBSD */
898 #   endif /* OpenBSD */
899 #  endif /* NetBSD */
900 # endif
901 done:
902 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
903 # if (BSD < 199103) || !defined(_KERNEL)
904 	return 0;
905 # endif
906 }
907 #endif /* !SOLARIS && !defined(__hpux) && !defined(__sgi) */
908 
909 /*ARGSUSED*/
910 int fr_nextfrag(token, itp, top, tail, lock, ifs)
911 ipftoken_t *token;
912 ipfgeniter_t *itp;
913 ipfr_t **top, ***tail;
914 ipfrwlock_t *lock;
915 ipf_stack_t *ifs;
916 {
917 	ipfr_t *frag, *next, zero;
918 	int error = 0;
919 
920 	frag = token->ipt_data;
921 	if (frag == (ipfr_t *)-1) {
922 		ipf_freetoken(token, ifs);
923 		return ESRCH;
924 	}
925 
926 	READ_ENTER(lock);
927 	if (frag == NULL)
928 		next = *top;
929 	else
930 		next = frag->ipfr_next;
931 
932 	if (next != NULL) {
933 		ATOMIC_INC(next->ipfr_ref);
934 		token->ipt_data = next;
935 	} else {
936 		bzero(&zero, sizeof(zero));
937 		next = &zero;
938 		token->ipt_data = (void *)-1;
939 	}
940 	RWLOCK_EXIT(lock);
941 
942 	if (frag != NULL) {
943 		fr_fragderef(&frag, lock, ifs);
944 	}
945 
946 	error = COPYOUT(next, itp->igi_data, sizeof(*next));
947 	if (error != 0)
948 		error = EFAULT;
949 
950 	return error;
951 }
952 
953 
954 void fr_fragderef(frp, lock, ifs)
955 ipfr_t **frp;
956 ipfrwlock_t *lock;
957 ipf_stack_t *ifs;
958 {
959 	ipfr_t *fra;
960 
961 	fra = *frp;
962 	*frp = NULL;
963 
964 	WRITE_ENTER(lock);
965 	fra->ipfr_ref--;
966 	if (fra->ipfr_ref <= 0) {
967 		KFREE(fra);
968 		ifs->ifs_ipfr_stats.ifs_expire++;
969 		ifs->ifs_ipfr_inuse--;
970 	}
971 	RWLOCK_EXIT(lock);
972 }
973