xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_frag.c (revision aecc710ab066150d47e5e9e7269e2e0d69107b4e)
1 /*
2  * Copyright (C) 1993-2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #if defined(KERNEL) || defined(_KERNEL)
11 # undef KERNEL
12 # undef _KERNEL
13 # define        KERNEL	1
14 # define        _KERNEL	1
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/time.h>
20 #include <sys/file.h>
21 #ifdef __hpux
22 # include <sys/timeout.h>
23 #endif
24 #if !defined(_KERNEL)
25 # include <stdio.h>
26 # include <string.h>
27 # include <stdlib.h>
28 # define _KERNEL
29 # ifdef __OpenBSD__
30 struct file;
31 # endif
32 # include <sys/uio.h>
33 # undef _KERNEL
34 #endif
35 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
36 # include <sys/filio.h>
37 # include <sys/fcntl.h>
38 #else
39 # include <sys/ioctl.h>
40 #endif
41 #if !defined(linux)
42 # include <sys/protosw.h>
43 #endif
44 #include <sys/socket.h>
45 #if defined(_KERNEL)
46 # include <sys/systm.h>
47 # if !defined(__SVR4) && !defined(__svr4__)
48 #  include <sys/mbuf.h>
49 # endif
50 #endif
51 #if !defined(__SVR4) && !defined(__svr4__)
52 # if defined(_KERNEL) && !defined(__sgi) && !defined(AIX)
53 #  include <sys/kernel.h>
54 # endif
55 #else
56 # include <sys/byteorder.h>
57 # ifdef _KERNEL
58 #  include <sys/dditypes.h>
59 # endif
60 # include <sys/stream.h>
61 # include <sys/kmem.h>
62 #endif
63 #include <net/if.h>
64 #ifdef sun
65 # include <net/af.h>
66 #endif
67 #include <net/route.h>
68 #include <netinet/in.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #if !defined(linux)
72 # include <netinet/ip_var.h>
73 #endif
74 #include <netinet/tcp.h>
75 #include <netinet/udp.h>
76 #include <netinet/ip_icmp.h>
77 #include "netinet/ip_compat.h"
78 #include <netinet/tcpip.h>
79 #include "netinet/ip_fil.h"
80 #include "netinet/ip_nat.h"
81 #include "netinet/ip_frag.h"
82 #include "netinet/ip_state.h"
83 #include "netinet/ip_auth.h"
84 #include "netinet/ipf_stack.h"
85 #if (__FreeBSD_version >= 300000)
86 # include <sys/malloc.h>
87 # if defined(_KERNEL)
88 #  ifndef IPFILTER_LKM
89 #   include <sys/libkern.h>
90 #   include <sys/systm.h>
91 #  endif
92 extern struct callout_handle fr_slowtimer_ch;
93 # endif
94 #endif
95 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
96 # include <sys/callout.h>
97 extern struct callout fr_slowtimer_ch;
98 #endif
99 #if defined(__OpenBSD__)
100 # include <sys/timeout.h>
101 extern struct timeout fr_slowtimer_ch;
102 #endif
103 /* END OF INCLUDES */
104 
105 #if !defined(lint)
106 static const char sccsid[] = "@(#)ip_frag.c	1.11 3/24/96 (C) 1993-2000 Darren Reed";
107 static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.77.2.5 2005/08/11 14:33:10 darrenr Exp $";
108 #endif
109 
110 static INLINE int ipfr_index __P((fr_info_t *, ipfr_t *));
111 static ipfr_t *ipfr_newfrag __P((fr_info_t *, u_32_t, ipfr_t **));
112 static ipfr_t *fr_fraglookup __P((fr_info_t *, ipfr_t **));
113 static void fr_fragdelete __P((ipfr_t *, ipfr_t ***, ipf_stack_t *));
114 
115 /* ------------------------------------------------------------------------ */
116 /* Function:    fr_fraginit                                                 */
117 /* Returns:     int - 0 == success, -1 == error                             */
118 /* Parameters:  Nil                                                         */
119 /*                                                                          */
120 /* Initialise the hash tables for the fragment cache lookups.               */
121 /* ------------------------------------------------------------------------ */
122 int fr_fraginit(ifs)
123 ipf_stack_t *ifs;
124 {
125 	ifs->ifs_ipfr_tail = &ifs->ifs_ipfr_list;
126 	ifs->ifs_ipfr_nattail = &ifs->ifs_ipfr_natlist;
127 	ifs->ifs_ipfr_ipidtail = &ifs->ifs_ipfr_ipidlist;
128 	/* the IP frag related variables are set in ipftuneable_setdefs() to
129 	 * their default values
130 	 */
131 
132 	KMALLOCS(ifs->ifs_ipfr_heads, ipfr_t **,
133 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
134 	if (ifs->ifs_ipfr_heads == NULL)
135 		return -1;
136 	bzero((char *)ifs->ifs_ipfr_heads,
137 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
138 
139 	KMALLOCS(ifs->ifs_ipfr_nattab, ipfr_t **,
140 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
141 	if (ifs->ifs_ipfr_nattab == NULL)
142 		return -1;
143 	bzero((char *)ifs->ifs_ipfr_nattab,
144 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
145 
146 	KMALLOCS(ifs->ifs_ipfr_ipidtab, ipfr_t **,
147 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
148 	if (ifs->ifs_ipfr_ipidtab == NULL)
149 		return -1;
150 	bzero((char *)ifs->ifs_ipfr_ipidtab,
151 	    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
152 
153 	RWLOCK_INIT(&ifs->ifs_ipf_frag, "ipf fragment rwlock");
154 
155 	/* Initialise frblock with "block in all" */
156 	bzero((char *)&ifs->ifs_frblock, sizeof(ifs->ifs_frblock));
157 	ifs->ifs_frblock.fr_flags = FR_BLOCK|FR_INQUE;	/* block in */
158 	ifs->ifs_frblock.fr_ref = 1;
159 
160 	ifs->ifs_fr_frag_init = 1;
161 
162 	return 0;
163 }
164 
165 
166 /* ------------------------------------------------------------------------ */
167 /* Function:    fr_fragunload                                               */
168 /* Returns:     Nil                                                         */
169 /* Parameters:  Nil                                                         */
170 /*                                                                          */
171 /* Free all memory allocated whilst running and from initialisation.        */
172 /* ------------------------------------------------------------------------ */
173 void fr_fragunload(ifs)
174 ipf_stack_t *ifs;
175 {
176 	if (ifs->ifs_fr_frag_init == 1) {
177 		fr_fragclear(ifs);
178 
179 		RW_DESTROY(&ifs->ifs_ipf_frag);
180 		ifs->ifs_fr_frag_init = 0;
181 	}
182 
183 	if (ifs->ifs_ipfr_heads != NULL) {
184 		KFREES(ifs->ifs_ipfr_heads,
185 		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
186 	}
187 	ifs->ifs_ipfr_heads = NULL;
188 
189 	if (ifs->ifs_ipfr_nattab != NULL) {
190 		KFREES(ifs->ifs_ipfr_nattab,
191 		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
192 	}
193 	ifs->ifs_ipfr_nattab = NULL;
194 
195 	if (ifs->ifs_ipfr_ipidtab != NULL) {
196 		KFREES(ifs->ifs_ipfr_ipidtab,
197 		    ifs->ifs_ipfr_size * sizeof(ipfr_t *));
198 	}
199 	ifs->ifs_ipfr_ipidtab = NULL;
200 }
201 
202 
203 /* ------------------------------------------------------------------------ */
204 /* Function:    fr_fragstats                                                */
205 /* Returns:     ipfrstat_t* - pointer to struct with current frag stats     */
206 /* Parameters:  Nil                                                         */
207 /*                                                                          */
208 /* Updates ipfr_stats with current information and returns a pointer to it  */
209 /* ------------------------------------------------------------------------ */
210 ipfrstat_t *fr_fragstats(ifs)
211 ipf_stack_t *ifs;
212 {
213 	ifs->ifs_ipfr_stats.ifs_table = ifs->ifs_ipfr_heads;
214 	ifs->ifs_ipfr_stats.ifs_nattab = ifs->ifs_ipfr_nattab;
215 	ifs->ifs_ipfr_stats.ifs_inuse = ifs->ifs_ipfr_inuse;
216 	return &ifs->ifs_ipfr_stats;
217 }
218 
219 
220 /* ------------------------------------------------------------------------ */
221 /* Function:    ipfr_index                                                  */
222 /* Returns:     int     - index in fragment table for given packet          */
223 /* Parameters:  fin(I)  - pointer to packet information                     */
224 /*              frag(O) - pointer to ipfr_t structure to fill               */
225 /*                                                                          */
226 /* Compute the index in the fragment table while filling the per packet     */
227 /* part of the fragment state.                                              */
228 /* ------------------------------------------------------------------------ */
229 static INLINE int ipfr_index(fin, frag)
230 fr_info_t *fin;
231 ipfr_t *frag;
232 {
233 	u_int idx;
234 
235 	/*
236 	 * For fragments, we record protocol, packet id, TOS and both IP#'s
237 	 * (these should all be the same for all fragments of a packet).
238 	 *
239 	 * build up a hash value to index the table with.
240 	 */
241 
242 #ifdef	USE_INET6
243 	if (fin->fin_v == 6) {
244 		ip6_t *ip6 = (ip6_t *)fin->fin_ip;
245 
246 		frag->ipfr_p = fin->fin_fi.fi_p;
247 		frag->ipfr_id = fin->fin_id;
248 		frag->ipfr_tos = ip6->ip6_flow & IPV6_FLOWINFO_MASK;
249 		frag->ipfr_src.in6 = ip6->ip6_src;
250 		frag->ipfr_dst.in6 = ip6->ip6_dst;
251 	} else
252 #endif
253 	{
254 		ip_t *ip = fin->fin_ip;
255 
256 		frag->ipfr_p = ip->ip_p;
257 		frag->ipfr_id = ip->ip_id;
258 		frag->ipfr_tos = ip->ip_tos;
259 		frag->ipfr_src.in4.s_addr = ip->ip_src.s_addr;
260 		frag->ipfr_src.i6[1] = 0;
261 		frag->ipfr_src.i6[2] = 0;
262 		frag->ipfr_src.i6[3] = 0;
263 		frag->ipfr_dst.in4.s_addr = ip->ip_dst.s_addr;
264 		frag->ipfr_dst.i6[1] = 0;
265 		frag->ipfr_dst.i6[2] = 0;
266 		frag->ipfr_dst.i6[3] = 0;
267 	}
268 	frag->ipfr_ifp = fin->fin_ifp;
269 	frag->ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
270 	frag->ipfr_secmsk = fin->fin_fi.fi_secmsk;
271 	frag->ipfr_auth = fin->fin_fi.fi_auth;
272 
273 	idx = frag->ipfr_p;
274 	idx += frag->ipfr_id;
275 	idx += frag->ipfr_src.i6[0];
276 	idx += frag->ipfr_src.i6[1];
277 	idx += frag->ipfr_src.i6[2];
278 	idx += frag->ipfr_src.i6[3];
279 	idx += frag->ipfr_dst.i6[0];
280 	idx += frag->ipfr_dst.i6[1];
281 	idx += frag->ipfr_dst.i6[2];
282 	idx += frag->ipfr_dst.i6[3];
283 	idx *= 127;
284 	idx %= IPFT_SIZE;
285 
286 	return idx;
287 }
288 
289 
290 /* ------------------------------------------------------------------------ */
291 /* Function:    ipfr_newfrag                                                */
292 /* Returns:     ipfr_t * - pointer to fragment cache state info or NULL     */
293 /* Parameters:  fin(I)   - pointer to packet information                    */
294 /*              table(I) - pointer to frag table to add to                  */
295 /*                                                                          */
296 /* Add a new entry to the fragment cache, registering it as having come     */
297 /* through this box, with the result of the filter operation.               */
298 /* ------------------------------------------------------------------------ */
299 static ipfr_t *ipfr_newfrag(fin, pass, table)
300 fr_info_t *fin;
301 u_32_t pass;
302 ipfr_t *table[];
303 {
304 	ipfr_t *fra, frag;
305 	u_int idx, off;
306 	ipf_stack_t *ifs = fin->fin_ifs;
307 
308 	if (ifs->ifs_ipfr_inuse >= ifs->ifs_ipfr_size)
309 		return NULL;
310 
311 	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
312 		return NULL;
313 
314 	if (pass & FR_FRSTRICT)
315 		if (fin->fin_off != 0)
316 			return NULL;
317 
318 	idx = ipfr_index(fin, &frag);
319 
320 	/*
321 	 * first, make sure it isn't already there...
322 	 */
323 	for (fra = table[idx]; (fra != NULL); fra = fra->ipfr_hnext)
324 		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp,
325 			  IPFR_CMPSZ)) {
326 			ifs->ifs_ipfr_stats.ifs_exists++;
327 			return NULL;
328 		}
329 
330 	/*
331 	 * allocate some memory, if possible, if not, just record that we
332 	 * failed to do so.
333 	 */
334 	KMALLOC(fra, ipfr_t *);
335 	if (fra == NULL) {
336 		ifs->ifs_ipfr_stats.ifs_nomem++;
337 		return NULL;
338 	}
339 
340 	fra->ipfr_rule = fin->fin_fr;
341 	if (fra->ipfr_rule != NULL) {
342 
343 		frentry_t *fr;
344 
345 		fr = fin->fin_fr;
346 		MUTEX_ENTER(&fr->fr_lock);
347 		fr->fr_ref++;
348 		MUTEX_EXIT(&fr->fr_lock);
349 	}
350 
351 	/*
352 	 * Insert the fragment into the fragment table, copy the struct used
353 	 * in the search using bcopy rather than reassign each field.
354 	 * Set the ttl to the default.
355 	 */
356 	if ((fra->ipfr_hnext = table[idx]) != NULL)
357 		table[idx]->ipfr_hprev = &fra->ipfr_hnext;
358 	fra->ipfr_hprev = table + idx;
359 	fra->ipfr_data = NULL;
360 	table[idx] = fra;
361 	bcopy((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ);
362 	fra->ipfr_ttl = ifs->ifs_fr_ticks + ifs->ifs_fr_ipfrttl;
363 
364 	/*
365 	 * Compute the offset of the expected start of the next packet.
366 	 */
367 	off = fin->fin_off >> 3;
368 	if (off == 0) {
369 		fra->ipfr_seen0 = 1;
370 	} else {
371 		fra->ipfr_seen0 = 0;
372 	}
373 	fra->ipfr_off = off + fin->fin_dlen;
374 	fra->ipfr_pass = pass;
375 	fra->ipfr_ref = 1;
376 	ifs->ifs_ipfr_stats.ifs_new++;
377 	ifs->ifs_ipfr_inuse++;
378 	return fra;
379 }
380 
381 
382 /* ------------------------------------------------------------------------ */
383 /* Function:    fr_newfrag                                                  */
384 /* Returns:     int - 0 == success, -1 == error                             */
385 /* Parameters:  fin(I)  - pointer to packet information                     */
386 /*                                                                          */
387 /* Add a new entry to the fragment cache table based on the current packet  */
388 /* ------------------------------------------------------------------------ */
389 int fr_newfrag(fin, pass)
390 u_32_t pass;
391 fr_info_t *fin;
392 {
393 	ipfr_t	*fra;
394 	ipf_stack_t *ifs = fin->fin_ifs;
395 
396 	if (ifs->ifs_fr_frag_lock != 0)
397 		return -1;
398 
399 	WRITE_ENTER(&ifs->ifs_ipf_frag);
400 	fra = ipfr_newfrag(fin, pass, ifs->ifs_ipfr_heads);
401 	if (fra != NULL) {
402 		*ifs->ifs_ipfr_tail = fra;
403 		fra->ipfr_prev = ifs->ifs_ipfr_tail;
404 		ifs->ifs_ipfr_tail = &fra->ipfr_next;
405 		if (ifs->ifs_ipfr_list == NULL)
406 			ifs->ifs_ipfr_list = fra;
407 		fra->ipfr_next = NULL;
408 	}
409 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
410 	return fra ? 0 : -1;
411 }
412 
413 
414 /* ------------------------------------------------------------------------ */
415 /* Function:    fr_nat_newfrag                                              */
416 /* Returns:     int - 0 == success, -1 == error                             */
417 /* Parameters:  fin(I)  - pointer to packet information                     */
418 /*              nat(I)  - pointer to NAT structure                          */
419 /*                                                                          */
420 /* Create a new NAT fragment cache entry based on the current packet and    */
421 /* the NAT structure for this "session".                                    */
422 /* ------------------------------------------------------------------------ */
423 int fr_nat_newfrag(fin, pass, nat)
424 fr_info_t *fin;
425 u_32_t pass;
426 nat_t *nat;
427 {
428 	ipfr_t	*fra;
429 	ipf_stack_t *ifs = fin->fin_ifs;
430 
431 	if (ifs->ifs_fr_frag_lock != 0)
432 		return 0;
433 
434 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
435 	fra = ipfr_newfrag(fin, pass, ifs->ifs_ipfr_nattab);
436 	if (fra != NULL) {
437 		fra->ipfr_data = nat;
438 		nat->nat_data = fra;
439 		*ifs->ifs_ipfr_nattail = fra;
440 		fra->ipfr_prev = ifs->ifs_ipfr_nattail;
441 		ifs->ifs_ipfr_nattail = &fra->ipfr_next;
442 		fra->ipfr_next = NULL;
443 	}
444 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
445 	return fra ? 0 : -1;
446 }
447 
448 
449 /* ------------------------------------------------------------------------ */
450 /* Function:    fr_ipid_newfrag                                             */
451 /* Returns:     int - 0 == success, -1 == error                             */
452 /* Parameters:  fin(I)  - pointer to packet information                     */
453 /*              ipid(I) - new IP ID for this fragmented packet              */
454 /*                                                                          */
455 /* Create a new fragment cache entry for this packet and store, as a data   */
456 /* pointer, the new IP ID value.                                            */
457 /* ------------------------------------------------------------------------ */
458 int fr_ipid_newfrag(fin, ipid)
459 fr_info_t *fin;
460 u_32_t ipid;
461 {
462 	ipfr_t	*fra;
463 	ipf_stack_t *ifs = fin->fin_ifs;
464 
465 	if (ifs->ifs_fr_frag_lock)
466 		return 0;
467 
468 	WRITE_ENTER(&ifs->ifs_ipf_ipidfrag);
469 	fra = ipfr_newfrag(fin, 0, ifs->ifs_ipfr_ipidtab);
470 	if (fra != NULL) {
471 		fra->ipfr_data = (void *)(uintptr_t)ipid;
472 		*ifs->ifs_ipfr_ipidtail = fra;
473 		fra->ipfr_prev = ifs->ifs_ipfr_ipidtail;
474 		ifs->ifs_ipfr_ipidtail = &fra->ipfr_next;
475 		fra->ipfr_next = NULL;
476 	}
477 	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
478 	return fra ? 0 : -1;
479 }
480 
481 
482 /* ------------------------------------------------------------------------ */
483 /* Function:    fr_fraglookup                                               */
484 /* Returns:     ipfr_t * - pointer to ipfr_t structure if there's a         */
485 /*                         matching entry in the frag table, else NULL      */
486 /* Parameters:  fin(I)   - pointer to packet information                    */
487 /*              table(I) - pointer to fragment cache table to search        */
488 /*                                                                          */
489 /* Check the fragment cache to see if there is already a record of this     */
490 /* packet with its filter result known.                                     */
491 /* ------------------------------------------------------------------------ */
492 static ipfr_t *fr_fraglookup(fin, table)
493 fr_info_t *fin;
494 ipfr_t *table[];
495 {
496 	ipfr_t *f, frag;
497 	u_int idx;
498 	ipf_stack_t *ifs = fin->fin_ifs;
499 
500 	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
501 		return NULL;
502 
503 	/*
504 	 * For fragments, we record protocol, packet id, TOS and both IP#'s
505 	 * (these should all be the same for all fragments of a packet).
506 	 *
507 	 * build up a hash value to index the table with.
508 	 */
509 	idx = ipfr_index(fin, &frag);
510 
511 	/*
512 	 * check the table, careful to only compare the right amount of data
513 	 */
514 	for (f = table[idx]; f; f = f->ipfr_hnext) {
515 		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&f->ipfr_ifp,
516 			  IPFR_CMPSZ)) {
517 			u_short	off;
518 
519 			/*
520 			 * We don't want to let short packets match because
521 			 * they could be compromising the security of other
522 			 * rules that want to match on layer 4 fields (and
523 			 * can't because they have been fragmented off.)
524 			 * Why do this check here?  The counter acts as an
525 			 * indicator of this kind of attack, whereas if it was
526 			 * elsewhere, it wouldn't know if other matching
527 			 * packets had been seen.
528 			 */
529 			if (fin->fin_flx & FI_SHORT) {
530 				ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_short);
531 				continue;
532 			}
533 
534 			/*
535 			 * XXX - We really need to be guarding against the
536 			 * retransmission of (src,dst,id,offset-range) here
537 			 * because a fragmented packet is never resent with
538 			 * the same IP ID# (or shouldn't).
539 			 */
540 			off = fin->fin_off >> 3;
541 			if (f->ipfr_seen0) {
542 				if (off == 0) {
543 					ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_retrans0);
544 					continue;
545 				}
546 			} else if (off == 0) {
547 				f->ipfr_seen0 = 1;
548 			}
549 
550 			/*
551 			 * If we've follwed the fragments, and this is the
552 			 * last (in order), shrink expiration time.
553 			 */
554 			if (off == f->ipfr_off) {
555 				if (!(fin->fin_flx & FI_MOREFRAG))
556 					f->ipfr_ttl = ifs->ifs_fr_ticks + 1;
557 				f->ipfr_off = fin->fin_dlen + off;
558 			} else if (f->ipfr_pass & FR_FRSTRICT) {
559 				continue;
560 			}
561 			ATOMIC_INCL(ifs->ifs_ipfr_stats.ifs_hits);
562 			return f;
563 		}
564 	}
565 
566 	return NULL;
567 }
568 
569 
570 /* ------------------------------------------------------------------------ */
571 /* Function:    fr_nat_knownfrag                                            */
572 /* Returns:     nat_t* - pointer to 'parent' NAT structure if frag table    */
573 /*                       match found, else NULL                             */
574 /* Parameters:  fin(I)  - pointer to packet information                     */
575 /*                                                                          */
576 /* Functional interface for NAT lookups of the NAT fragment cache           */
577 /* ------------------------------------------------------------------------ */
578 nat_t *fr_nat_knownfrag(fin)
579 fr_info_t *fin;
580 {
581 	nat_t	*nat;
582 	ipfr_t	*ipf;
583 	ipf_stack_t *ifs = fin->fin_ifs;
584 
585 	if (ifs->ifs_fr_frag_lock || !ifs->ifs_ipfr_natlist)
586 		return NULL;
587 	READ_ENTER(&ifs->ifs_ipf_natfrag);
588 	ipf = fr_fraglookup(fin, ifs->ifs_ipfr_nattab);
589 	if (ipf != NULL) {
590 		nat = ipf->ipfr_data;
591 		/*
592 		 * This is the last fragment for this packet.
593 		 */
594 		if ((ipf->ipfr_ttl == ifs->ifs_fr_ticks + 1) && (nat != NULL)) {
595 			nat->nat_data = NULL;
596 			ipf->ipfr_data = NULL;
597 		}
598 	} else
599 		nat = NULL;
600 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
601 	return nat;
602 }
603 
604 
605 /* ------------------------------------------------------------------------ */
606 /* Function:    fr_ipid_knownfrag                                           */
607 /* Returns:     u_32_t - IPv4 ID for this packet if match found, else       */
608 /*                       return 0xfffffff to indicate no match.             */
609 /* Parameters:  fin(I) - pointer to packet information                      */
610 /*                                                                          */
611 /* Functional interface for IP ID lookups of the IP ID fragment cache       */
612 /* ------------------------------------------------------------------------ */
613 u_32_t fr_ipid_knownfrag(fin)
614 fr_info_t *fin;
615 {
616 	ipfr_t	*ipf;
617 	u_32_t	id;
618 	ipf_stack_t *ifs = fin->fin_ifs;
619 
620 	if (ifs->ifs_fr_frag_lock || !ifs->ifs_ipfr_ipidlist)
621 		return 0xffffffff;
622 
623 	READ_ENTER(&ifs->ifs_ipf_ipidfrag);
624 	ipf = fr_fraglookup(fin, ifs->ifs_ipfr_ipidtab);
625 	if (ipf != NULL)
626 		id = (u_32_t)(uintptr_t)ipf->ipfr_data;
627 	else
628 		id = 0xffffffff;
629 	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
630 	return id;
631 }
632 
633 
634 /* ------------------------------------------------------------------------ */
635 /* Function:    fr_knownfrag                                                */
636 /* Returns:     frentry_t* - pointer to filter rule if a match is found in  */
637 /*                           the frag cache table, else NULL.               */
638 /* Parameters:  fin(I)   - pointer to packet information                    */
639 /*              passp(O) - pointer to where to store rule flags resturned   */
640 /*                                                                          */
641 /* Functional interface for normal lookups of the fragment cache.  If a     */
642 /* match is found, return the rule pointer and flags from the rule, except  */
643 /* that if FR_LOGFIRST is set, reset FR_LOG.                                */
644 /* ------------------------------------------------------------------------ */
645 frentry_t *fr_knownfrag(fin, passp)
646 fr_info_t *fin;
647 u_32_t *passp;
648 {
649 	frentry_t *fr = NULL;
650 	ipfr_t	*fra;
651 	u_32_t pass, oflx;
652 	ipf_stack_t *ifs = fin->fin_ifs;
653 
654 	if (ifs->ifs_fr_frag_lock || (ifs->ifs_ipfr_list == NULL))
655 		return NULL;
656 
657 	READ_ENTER(&ifs->ifs_ipf_frag);
658 	oflx = fin->fin_flx;
659 	fra = fr_fraglookup(fin, ifs->ifs_ipfr_heads);
660 	if (fra != NULL) {
661 		fr = fra->ipfr_rule;
662 		fin->fin_fr = fr;
663 		if (fr != NULL) {
664 			pass = fr->fr_flags;
665 			if ((pass & FR_LOGFIRST) != 0)
666 				pass &= ~(FR_LOGFIRST|FR_LOG);
667 			*passp = pass;
668 		}
669 	}
670 	if (!(oflx & FI_BAD) && (fin->fin_flx & FI_BAD)) {
671 		*passp &= ~FR_CMDMASK;
672 		*passp |= FR_BLOCK;
673 		fr = &ifs->ifs_frblock;
674 	}
675 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
676 	return fr;
677 }
678 
679 
680 /* ------------------------------------------------------------------------ */
681 /* Function:    fr_forget                                                   */
682 /* Returns:     Nil                                                         */
683 /* Parameters:  ptr(I) - pointer to data structure                          */
684 /*                                                                          */
685 /* Search through all of the fragment cache entries and wherever a pointer  */
686 /* is found to match ptr, reset it to NULL.                                 */
687 /* ------------------------------------------------------------------------ */
688 void fr_forget(ptr, ifs)
689 void *ptr;
690 ipf_stack_t *ifs;
691 {
692 	ipfr_t	*fr;
693 
694 	WRITE_ENTER(&ifs->ifs_ipf_frag);
695 	for (fr = ifs->ifs_ipfr_list; fr; fr = fr->ipfr_next)
696 		if (fr->ipfr_data == ptr)
697 			fr->ipfr_data = NULL;
698 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
699 }
700 
701 
702 /* ------------------------------------------------------------------------ */
703 /* Function:    fr_forgetnat                                                */
704 /* Returns:     Nil                                                         */
705 /* Parameters:  ptr(I) - pointer to data structure                          */
706 /*                                                                          */
707 /* Search through all of the fragment cache entries for NAT and wherever a  */
708 /* pointer  is found to match ptr, reset it to NULL.                        */
709 /* ------------------------------------------------------------------------ */
710 void fr_forgetnat(ptr, ifs)
711 void *ptr;
712 ipf_stack_t *ifs;
713 {
714 	ipfr_t	*fr;
715 
716 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
717 	for (fr = ifs->ifs_ipfr_natlist; fr; fr = fr->ipfr_next)
718 		if (fr->ipfr_data == ptr)
719 			fr->ipfr_data = NULL;
720 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
721 }
722 
723 
724 /* ------------------------------------------------------------------------ */
725 /* Function:    fr_fragdelete                                               */
726 /* Returns:     Nil                                                         */
727 /* Parameters:  fra(I)   - pointer to fragment structure to delete          */
728 /*              tail(IO) - pointer to the pointer to the tail of the frag   */
729 /*                         list                                             */
730 /*                                                                          */
731 /* Remove a fragment cache table entry from the table & list.  Also free    */
732 /* the filter rule it is associated with it if it is no longer used as a    */
733 /* result of decreasing the reference count.                                */
734 /* ------------------------------------------------------------------------ */
735 static void fr_fragdelete(fra, tail, ifs)
736 ipfr_t *fra, ***tail;
737 ipf_stack_t *ifs;
738 {
739 	frentry_t *fr;
740 
741 	fr = fra->ipfr_rule;
742 	if (fr != NULL)
743 	    (void)fr_derefrule(&fr, ifs);
744 
745 	if (fra->ipfr_next)
746 		fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
747 	*fra->ipfr_prev = fra->ipfr_next;
748 	if (*tail == &fra->ipfr_next)
749 		*tail = fra->ipfr_prev;
750 
751 	if (fra->ipfr_hnext)
752 		fra->ipfr_hnext->ipfr_hprev = fra->ipfr_hprev;
753 	*fra->ipfr_hprev = fra->ipfr_hnext;
754 
755 	if (fra->ipfr_ref <= 0)
756 		KFREE(fra);
757 }
758 
759 
760 /* ------------------------------------------------------------------------ */
761 /* Function:    fr_fragclear                                                */
762 /* Returns:     Nil                                                         */
763 /* Parameters:  Nil                                                         */
764 /*                                                                          */
765 /* Free memory in use by fragment state information kept.  Do the normal    */
766 /* fragment state stuff first and then the NAT-fragment table.              */
767 /* ------------------------------------------------------------------------ */
768 void fr_fragclear(ifs)
769 ipf_stack_t *ifs;
770 {
771 	ipfr_t	*fra;
772 	nat_t	*nat;
773 
774 	WRITE_ENTER(&ifs->ifs_ipf_frag);
775 	while ((fra = ifs->ifs_ipfr_list) != NULL) {
776 		fra->ipfr_ref--;
777 		fr_fragdelete(fra, &ifs->ifs_ipfr_tail, ifs);
778 	}
779 	ifs->ifs_ipfr_tail = &ifs->ifs_ipfr_list;
780 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
781 
782 	WRITE_ENTER(&ifs->ifs_ipf_nat);
783 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
784 	while ((fra = ifs->ifs_ipfr_natlist) != NULL) {
785 		nat = fra->ipfr_data;
786 		if (nat != NULL) {
787 			if (nat->nat_data == fra)
788 				nat->nat_data = NULL;
789 		}
790 		fra->ipfr_ref--;
791 		fr_fragdelete(fra, &ifs->ifs_ipfr_nattail, ifs);
792 	}
793 	ifs->ifs_ipfr_nattail = &ifs->ifs_ipfr_natlist;
794 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
795 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
796 }
797 
798 
799 /* ------------------------------------------------------------------------ */
800 /* Function:    fr_fragexpire                                               */
801 /* Returns:     Nil                                                         */
802 /* Parameters:  Nil                                                         */
803 /*                                                                          */
804 /* Expire entries in the fragment cache table that have been there too long */
805 /* ------------------------------------------------------------------------ */
806 void fr_fragexpire(ifs)
807 ipf_stack_t *ifs;
808 {
809 	ipfr_t	**fp, *fra;
810 	nat_t	*nat;
811 	SPL_INT(s);
812 
813 	if (ifs->ifs_fr_frag_lock)
814 		return;
815 
816 	SPL_NET(s);
817 	WRITE_ENTER(&ifs->ifs_ipf_frag);
818 	/*
819 	 * Go through the entire table, looking for entries to expire,
820 	 * which is indicated by the ttl being less than or equal to
821 	 * ifs_fr_ticks.
822 	 */
823 	for (fp = &ifs->ifs_ipfr_list; ((fra = *fp) != NULL); ) {
824 		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
825 			break;
826 		fra->ipfr_ref--;
827 		fr_fragdelete(fra, &ifs->ifs_ipfr_tail, ifs);
828 		ifs->ifs_ipfr_stats.ifs_expire++;
829 		ifs->ifs_ipfr_inuse--;
830 	}
831 	RWLOCK_EXIT(&ifs->ifs_ipf_frag);
832 
833 	WRITE_ENTER(&ifs->ifs_ipf_ipidfrag);
834 	for (fp = &ifs->ifs_ipfr_ipidlist; ((fra = *fp) != NULL); ) {
835 		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
836 			break;
837 		fra->ipfr_ref--;
838 		fr_fragdelete(fra, &ifs->ifs_ipfr_ipidtail, ifs);
839 		ifs->ifs_ipfr_stats.ifs_expire++;
840 		ifs->ifs_ipfr_inuse--;
841 	}
842 	RWLOCK_EXIT(&ifs->ifs_ipf_ipidfrag);
843 
844 	/*
845 	 * Same again for the NAT table, except that if the structure also
846 	 * still points to a NAT structure, and the NAT structure points back
847 	 * at the one to be free'd, NULL the reference from the NAT struct.
848 	 * NOTE: We need to grab both mutex's early, and in this order so as
849 	 * to prevent a deadlock if both try to expire at the same time.
850 	 */
851 	WRITE_ENTER(&ifs->ifs_ipf_nat);
852 	WRITE_ENTER(&ifs->ifs_ipf_natfrag);
853 	for (fp = &ifs->ifs_ipfr_natlist; ((fra = *fp) != NULL); ) {
854 		if (fra->ipfr_ttl > ifs->ifs_fr_ticks)
855 			break;
856 		nat = fra->ipfr_data;
857 		if (nat != NULL) {
858 			if (nat->nat_data == fra)
859 				nat->nat_data = NULL;
860 		}
861 		fra->ipfr_ref--;
862 		fr_fragdelete(fra, &ifs->ifs_ipfr_nattail, ifs);
863 		ifs->ifs_ipfr_stats.ifs_expire++;
864 		ifs->ifs_ipfr_inuse--;
865 	}
866 	RWLOCK_EXIT(&ifs->ifs_ipf_natfrag);
867 	RWLOCK_EXIT(&ifs->ifs_ipf_nat);
868 	SPL_X(s);
869 }
870 
871 
872 /* ------------------------------------------------------------------------ */
873 /* Function:    fr_slowtimer                                                */
874 /* Returns:     Nil                                                         */
875 /* Parameters:  Nil                                                         */
876 /*                                                                          */
877 /* Slowly expire held state for fragments.  Timeouts are set * in           */
878 /* expectation of this being called twice per second.                       */
879 /* ------------------------------------------------------------------------ */
880 #if !defined(_KERNEL) || (!defined(SOLARIS) && !defined(__hpux) && \
881 	!defined(__sgi) && !defined(__osf__) && !defined(linux))
882 # if defined(_KERNEL) && ((BSD >= 199103) || defined(__sgi))
883 void fr_slowtimer __P((void *arg))
884 # else
885 int fr_slowtimer(void *arg)
886 # endif
887 {
888 	ipf_stack_t *ifs = arg;
889 
890 	READ_ENTER(&ifs->ifs_ipf_global);
891 
892 	fr_fragexpire(ifs);
893 	fr_timeoutstate(ifs);
894 	fr_natexpire(ifs);
895 	fr_authexpire(ifs);
896 	ifs->ifs_fr_ticks++;
897 	if (ifs->ifs_fr_running <= 0)
898 		goto done;
899 # ifdef _KERNEL
900 #  if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
901 	callout_reset(&fr_slowtimer_ch, hz / 2, fr_slowtimer, NULL);
902 #  else
903 #   if defined(__OpenBSD__)
904 	timeout_add(&fr_slowtimer_ch, hz/2);
905 #   else
906 #    if (__FreeBSD_version >= 300000)
907 	fr_slowtimer_ch = timeout(fr_slowtimer, NULL, hz/2);
908 #    else
909 #     ifdef linux
910 	;
911 #     else
912 	timeout(fr_slowtimer, NULL, hz/2);
913 #     endif
914 #    endif /* FreeBSD */
915 #   endif /* OpenBSD */
916 #  endif /* NetBSD */
917 # endif
918 done:
919 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
920 # if (BSD < 199103) || !defined(_KERNEL)
921 	return 0;
922 # endif
923 }
924 #endif /* !SOLARIS && !defined(__hpux) && !defined(__sgi) */
925 
926 /*ARGSUSED*/
927 int fr_nextfrag(token, itp, top, tail, lock, ifs)
928 ipftoken_t *token;
929 ipfgeniter_t *itp;
930 ipfr_t **top, ***tail;
931 ipfrwlock_t *lock;
932 ipf_stack_t *ifs;
933 {
934 	ipfr_t *frag, *next, zero;
935 	int error = 0;
936 
937 	READ_ENTER(lock);
938 
939 	/*
940 	 * Retrieve "previous" entry from token and find the next entry.
941 	 */
942 	frag = token->ipt_data;
943 	if (frag == NULL)
944 		next = *top;
945 	else
946 		next = frag->ipfr_next;
947 
948 	/*
949 	 * If we found an entry, add reference to it and update token.
950 	 * Otherwise, zero out data to be returned and NULL out token.
951 	 */
952 	if (next != NULL) {
953 		ATOMIC_INC(next->ipfr_ref);
954 		token->ipt_data = next;
955 	} else {
956 		bzero(&zero, sizeof(zero));
957 		next = &zero;
958 		token->ipt_data = NULL;
959 	}
960 
961 	/*
962 	 * Now that we have ref, it's save to give up lock.
963 	 */
964 	RWLOCK_EXIT(lock);
965 
966 	/*
967 	 * Copy out data and clean up references and token as needed.
968 	 */
969 	error = COPYOUT(next, itp->igi_data, sizeof(*next));
970 	if (error != 0)
971 		error = EFAULT;
972 	if (token->ipt_data == NULL) {
973 		ipf_freetoken(token, ifs);
974 	} else {
975 		if (frag != NULL)
976 			fr_fragderef(&frag, lock, ifs);
977 		if (next->ipfr_next == NULL)
978 			ipf_freetoken(token, ifs);
979 	}
980 	return error;
981 }
982 
983 
984 void fr_fragderef(frp, lock, ifs)
985 ipfr_t **frp;
986 ipfrwlock_t *lock;
987 ipf_stack_t *ifs;
988 {
989 	ipfr_t *fra;
990 
991 	fra = *frp;
992 	*frp = NULL;
993 
994 	WRITE_ENTER(lock);
995 	fra->ipfr_ref--;
996 	if (fra->ipfr_ref <= 0) {
997 		KFREE(fra);
998 		ifs->ifs_ipfr_stats.ifs_expire++;
999 		ifs->ifs_ipfr_inuse--;
1000 	}
1001 	RWLOCK_EXIT(lock);
1002 }
1003