xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_frag.c (revision 60405de4d8688d96dd05157c28db3ade5c9bc234)
1 /*
2  * Copyright (C) 1993-2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #pragma ident	"%Z%%M%	%I%	%E% SMI"
11 
12 #if defined(KERNEL) || defined(_KERNEL)
13 # undef KERNEL
14 # undef _KERNEL
15 # define        KERNEL	1
16 # define        _KERNEL	1
17 #endif
18 #include <sys/errno.h>
19 #include <sys/types.h>
20 #include <sys/param.h>
21 #include <sys/time.h>
22 #include <sys/file.h>
23 #ifdef __hpux
24 # include <sys/timeout.h>
25 #endif
26 #if !defined(_KERNEL)
27 # include <stdio.h>
28 # include <string.h>
29 # include <stdlib.h>
30 # define _KERNEL
31 # ifdef __OpenBSD__
32 struct file;
33 # endif
34 # include <sys/uio.h>
35 # undef _KERNEL
36 #endif
37 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
38 # include <sys/filio.h>
39 # include <sys/fcntl.h>
40 #else
41 # include <sys/ioctl.h>
42 #endif
43 #if !defined(linux)
44 # include <sys/protosw.h>
45 #endif
46 #include <sys/socket.h>
47 #if defined(_KERNEL)
48 # include <sys/systm.h>
49 # if !defined(__SVR4) && !defined(__svr4__)
50 #  include <sys/mbuf.h>
51 # endif
52 #endif
53 #if !defined(__SVR4) && !defined(__svr4__)
54 # if defined(_KERNEL) && !defined(__sgi) && !defined(AIX)
55 #  include <sys/kernel.h>
56 # endif
57 #else
58 # include <sys/byteorder.h>
59 # ifdef _KERNEL
60 #  include <sys/dditypes.h>
61 # endif
62 # include <sys/stream.h>
63 # include <sys/kmem.h>
64 #endif
65 #include <net/if.h>
66 #ifdef sun
67 # include <net/af.h>
68 #endif
69 #include <net/route.h>
70 #include <netinet/in.h>
71 #include <netinet/in_systm.h>
72 #include <netinet/ip.h>
73 #if !defined(linux)
74 # include <netinet/ip_var.h>
75 #endif
76 #include <netinet/tcp.h>
77 #include <netinet/udp.h>
78 #include <netinet/ip_icmp.h>
79 #include "netinet/ip_compat.h"
80 #include <netinet/tcpip.h>
81 #include "netinet/ip_fil.h"
82 #include "netinet/ip_nat.h"
83 #include "netinet/ip_frag.h"
84 #include "netinet/ip_state.h"
85 #include "netinet/ip_auth.h"
86 #include "netinet/ip_proxy.h"
87 #if (__FreeBSD_version >= 300000)
88 # include <sys/malloc.h>
89 # if defined(_KERNEL)
90 #  ifndef IPFILTER_LKM
91 #   include <sys/libkern.h>
92 #   include <sys/systm.h>
93 #  endif
94 extern struct callout_handle fr_slowtimer_ch;
95 # endif
96 #endif
97 #if defined(__NetBSD__) && (__NetBSD_Version__ >= 104230000)
98 # include <sys/callout.h>
99 extern struct callout fr_slowtimer_ch;
100 #endif
101 #if defined(__OpenBSD__)
102 # include <sys/timeout.h>
103 extern struct timeout fr_slowtimer_ch;
104 #endif
105 /* END OF INCLUDES */
106 
107 #if !defined(lint)
108 static const char sccsid[] = "@(#)ip_frag.c	1.11 3/24/96 (C) 1993-2000 Darren Reed";
109 static const char rcsid[] = "@(#)$Id: ip_frag.c,v 2.77.2.5 2005/08/11 14:33:10 darrenr Exp $";
110 #endif
111 
112 
113 static ipfr_t   *ipfr_list = NULL;
114 static ipfr_t   **ipfr_tail = &ipfr_list;
115 static ipfr_t	**ipfr_heads;
116 
117 static ipfr_t   *ipfr_natlist = NULL;
118 static ipfr_t   **ipfr_nattail = &ipfr_natlist;
119 static ipfr_t	**ipfr_nattab;
120 
121 static ipfr_t   *ipfr_ipidlist = NULL;
122 static ipfr_t   **ipfr_ipidtail = &ipfr_ipidlist;
123 static ipfr_t	**ipfr_ipidtab;
124 
125 static ipfrstat_t ipfr_stats;
126 static int	ipfr_inuse = 0;
127 int		ipfr_size = IPFT_SIZE;
128 
129 int	fr_ipfrttl = 120;	/* 60 seconds */
130 int	fr_frag_lock = 0;
131 int	fr_frag_init = 0;
132 u_long	fr_ticks = 0;
133 
134 
135 static ipfr_t *ipfr_newfrag __P((fr_info_t *, u_32_t, ipfr_t **));
136 static ipfr_t *fr_fraglookup __P((fr_info_t *, ipfr_t **));
137 static void fr_fragdelete __P((ipfr_t *, ipfr_t ***));
138 
139 static frentry_t frblock;
140 
141 /* ------------------------------------------------------------------------ */
142 /* Function:    fr_fraginit                                                 */
143 /* Returns:     int - 0 == success, -1 == error                             */
144 /* Parameters:  Nil                                                         */
145 /*                                                                          */
146 /* Initialise the hash tables for the fragment cache lookups.               */
147 /* ------------------------------------------------------------------------ */
148 int fr_fraginit()
149 {
150 	KMALLOCS(ipfr_heads, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
151 	if (ipfr_heads == NULL)
152 		return -1;
153 	bzero((char *)ipfr_heads, ipfr_size * sizeof(ipfr_t *));
154 
155 	KMALLOCS(ipfr_nattab, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
156 	if (ipfr_nattab == NULL)
157 		return -1;
158 	bzero((char *)ipfr_nattab, ipfr_size * sizeof(ipfr_t *));
159 
160 	KMALLOCS(ipfr_ipidtab, ipfr_t **, ipfr_size * sizeof(ipfr_t *));
161 	if (ipfr_ipidtab == NULL)
162 		return -1;
163 	bzero((char *)ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *));
164 
165 	RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock");
166 
167 	/* Initialise frblock with "block in all" */
168 	bzero((char *)&frblock, sizeof(frblock));
169 	frblock.fr_flags = FR_BLOCK|FR_INQUE;	/* block in */
170 	frblock.fr_ref = 1;
171 
172 	fr_frag_init = 1;
173 
174 	return 0;
175 }
176 
177 
178 /* ------------------------------------------------------------------------ */
179 /* Function:    fr_fragunload                                               */
180 /* Returns:     Nil                                                         */
181 /* Parameters:  Nil                                                         */
182 /*                                                                          */
183 /* Free all memory allocated whilst running and from initialisation.        */
184 /* ------------------------------------------------------------------------ */
185 void fr_fragunload()
186 {
187 	if (fr_frag_init == 1) {
188 		fr_fragclear();
189 
190 		RW_DESTROY(&ipf_frag);
191 		fr_frag_init = 0;
192 	}
193 
194 	if (ipfr_heads != NULL)
195 		KFREES(ipfr_heads, ipfr_size * sizeof(ipfr_t *));
196 	ipfr_heads = NULL;
197 
198 	if (ipfr_nattab != NULL)
199 		KFREES(ipfr_nattab, ipfr_size * sizeof(ipfr_t *));
200 	ipfr_nattab = NULL;
201 
202 	if (ipfr_ipidtab != NULL)
203 		KFREES(ipfr_ipidtab, ipfr_size * sizeof(ipfr_t *));
204 	ipfr_ipidtab = NULL;
205 }
206 
207 
208 /* ------------------------------------------------------------------------ */
209 /* Function:    fr_fragstats                                                */
210 /* Returns:     ipfrstat_t* - pointer to struct with current frag stats     */
211 /* Parameters:  Nil                                                         */
212 /*                                                                          */
213 /* Updates ipfr_stats with current information and returns a pointer to it  */
214 /* ------------------------------------------------------------------------ */
215 ipfrstat_t *fr_fragstats()
216 {
217 	ipfr_stats.ifs_table = ipfr_heads;
218 	ipfr_stats.ifs_nattab = ipfr_nattab;
219 	ipfr_stats.ifs_inuse = ipfr_inuse;
220 	return &ipfr_stats;
221 }
222 
223 
224 /* ------------------------------------------------------------------------ */
225 /* Function:    ipfr_newfrag                                                */
226 /* Returns:     ipfr_t * - pointer to fragment cache state info or NULL     */
227 /* Parameters:  fin(I)   - pointer to packet information                    */
228 /*              table(I) - pointer to frag table to add to                  */
229 /*                                                                          */
230 /* Add a new entry to the fragment cache, registering it as having come     */
231 /* through this box, with the result of the filter operation.               */
232 /* ------------------------------------------------------------------------ */
233 static ipfr_t *ipfr_newfrag(fin, pass, table)
234 fr_info_t *fin;
235 u_32_t pass;
236 ipfr_t *table[];
237 {
238 	ipfr_t *fra, frag;
239 	u_int idx, off;
240 
241 	if (ipfr_inuse >= IPFT_SIZE)
242 		return NULL;
243 
244 	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
245 		return NULL;
246 
247 	if (pass & FR_FRSTRICT)
248 		if (fin->fin_off != 0)
249 			return NULL;
250 
251 	frag.ipfr_p = fin->fin_p;
252 	idx = fin->fin_p;
253 	frag.ipfr_id = fin->fin_id;
254 	idx += fin->fin_id;
255 	frag.ipfr_source = fin->fin_fi.fi_src;
256 	idx += frag.ipfr_src.s_addr;
257 	frag.ipfr_dest = fin->fin_fi.fi_dst;
258 	idx += frag.ipfr_dst.s_addr;
259 	frag.ipfr_ifp = fin->fin_ifp;
260 	idx *= 127;
261 	idx %= IPFT_SIZE;
262 
263 	frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
264 	frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
265 	frag.ipfr_auth = fin->fin_fi.fi_auth;
266 
267 	/*
268 	 * first, make sure it isn't already there...
269 	 */
270 	for (fra = table[idx]; (fra != NULL); fra = fra->ipfr_hnext)
271 		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp,
272 			  IPFR_CMPSZ)) {
273 			ipfr_stats.ifs_exists++;
274 			return NULL;
275 		}
276 
277 	/*
278 	 * allocate some memory, if possible, if not, just record that we
279 	 * failed to do so.
280 	 */
281 	KMALLOC(fra, ipfr_t *);
282 	if (fra == NULL) {
283 		ipfr_stats.ifs_nomem++;
284 		return NULL;
285 	}
286 
287 	fra->ipfr_rule = fin->fin_fr;
288 	if (fra->ipfr_rule != NULL) {
289 
290 		frentry_t *fr;
291 
292 		fr = fin->fin_fr;
293 		MUTEX_ENTER(&fr->fr_lock);
294 		fr->fr_ref++;
295 		MUTEX_EXIT(&fr->fr_lock);
296 	}
297 
298 	/*
299 	 * Insert the fragment into the fragment table, copy the struct used
300 	 * in the search using bcopy rather than reassign each field.
301 	 * Set the ttl to the default.
302 	 */
303 	if ((fra->ipfr_hnext = table[idx]) != NULL)
304 		table[idx]->ipfr_hprev = &fra->ipfr_hnext;
305 	fra->ipfr_hprev = table + idx;
306 	fra->ipfr_data = NULL;
307 	table[idx] = fra;
308 	bcopy((char *)&frag.ipfr_ifp, (char *)&fra->ipfr_ifp, IPFR_CMPSZ);
309 	fra->ipfr_ttl = fr_ticks + fr_ipfrttl;
310 
311 	/*
312 	 * Compute the offset of the expected start of the next packet.
313 	 */
314 	off = fin->fin_off;
315 	if (off == 0) {
316 		fra->ipfr_seen0 = 1;
317 		fra->ipfr_firstend = fin->fin_flen;
318 	} else {
319 		fra->ipfr_seen0 = 0;
320 		fra->ipfr_firstend = 0;
321 	}
322 	fra->ipfr_off = off + fin->fin_dlen;
323 	fra->ipfr_pass = pass;
324 	ipfr_stats.ifs_new++;
325 	ipfr_inuse++;
326 	return fra;
327 }
328 
329 
330 /* ------------------------------------------------------------------------ */
331 /* Function:    fr_newfrag                                                  */
332 /* Returns:     int - 0 == success, -1 == error                             */
333 /* Parameters:  fin(I)  - pointer to packet information                     */
334 /*                                                                          */
335 /* Add a new entry to the fragment cache table based on the current packet  */
336 /* ------------------------------------------------------------------------ */
337 int fr_newfrag(fin, pass)
338 u_32_t pass;
339 fr_info_t *fin;
340 {
341 	ipfr_t	*fra;
342 
343 	if (fr_frag_lock != 0)
344 		return -1;
345 
346 	WRITE_ENTER(&ipf_frag);
347 	fra = ipfr_newfrag(fin, pass, ipfr_heads);
348 	if (fra != NULL) {
349 		*ipfr_tail = fra;
350 		fra->ipfr_prev = ipfr_tail;
351 		ipfr_tail = &fra->ipfr_next;
352 		if (ipfr_list == NULL)
353 			ipfr_list = fra;
354 		fra->ipfr_next = NULL;
355 	}
356 	RWLOCK_EXIT(&ipf_frag);
357 	return fra ? 0 : -1;
358 }
359 
360 
361 /* ------------------------------------------------------------------------ */
362 /* Function:    fr_nat_newfrag                                              */
363 /* Returns:     int - 0 == success, -1 == error                             */
364 /* Parameters:  fin(I)  - pointer to packet information                     */
365 /*              nat(I)  - pointer to NAT structure                          */
366 /*                                                                          */
367 /* Create a new NAT fragment cache entry based on the current packet and    */
368 /* the NAT structure for this "session".                                    */
369 /* ------------------------------------------------------------------------ */
370 int fr_nat_newfrag(fin, pass, nat)
371 fr_info_t *fin;
372 u_32_t pass;
373 nat_t *nat;
374 {
375 	ipfr_t	*fra;
376 
377 	if ((fin->fin_v != 4) || (fr_frag_lock != 0))
378 		return 0;
379 
380 	WRITE_ENTER(&ipf_natfrag);
381 	fra = ipfr_newfrag(fin, pass, ipfr_nattab);
382 	if (fra != NULL) {
383 		fra->ipfr_data = nat;
384 		nat->nat_data = fra;
385 		*ipfr_nattail = fra;
386 		fra->ipfr_prev = ipfr_nattail;
387 		ipfr_nattail = &fra->ipfr_next;
388 		fra->ipfr_next = NULL;
389 	}
390 	RWLOCK_EXIT(&ipf_natfrag);
391 	return fra ? 0 : -1;
392 }
393 
394 
395 /* ------------------------------------------------------------------------ */
396 /* Function:    fr_ipid_newfrag                                             */
397 /* Returns:     int - 0 == success, -1 == error                             */
398 /* Parameters:  fin(I)  - pointer to packet information                     */
399 /*              ipid(I) - new IP ID for this fragmented packet              */
400 /*                                                                          */
401 /* Create a new fragment cache entry for this packet and store, as a data   */
402 /* pointer, the new IP ID value.                                            */
403 /* ------------------------------------------------------------------------ */
404 int fr_ipid_newfrag(fin, ipid)
405 fr_info_t *fin;
406 u_32_t ipid;
407 {
408 	ipfr_t	*fra;
409 
410 	if (fr_frag_lock)
411 		return 0;
412 
413 	WRITE_ENTER(&ipf_ipidfrag);
414 	fra = ipfr_newfrag(fin, 0, ipfr_ipidtab);
415 	if (fra != NULL) {
416 		fra->ipfr_data = (void *)(uintptr_t)ipid;
417 		*ipfr_ipidtail = fra;
418 		fra->ipfr_prev = ipfr_ipidtail;
419 		ipfr_ipidtail = &fra->ipfr_next;
420 		fra->ipfr_next = NULL;
421 	}
422 	RWLOCK_EXIT(&ipf_ipidfrag);
423 	return fra ? 0 : -1;
424 }
425 
426 
427 /* ------------------------------------------------------------------------ */
428 /* Function:    fr_fraglookup                                               */
429 /* Returns:     ipfr_t * - pointer to ipfr_t structure if there's a         */
430 /*                         matching entry in the frag table, else NULL      */
431 /* Parameters:  fin(I)   - pointer to packet information                    */
432 /*              table(I) - pointer to fragment cache table to search        */
433 /*                                                                          */
434 /* Check the fragment cache to see if there is already a record of this     */
435 /* packet with its filter result known.                                     */
436 /* ------------------------------------------------------------------------ */
437 static ipfr_t *fr_fraglookup(fin, table)
438 fr_info_t *fin;
439 ipfr_t *table[];
440 {
441 	ipfr_t *f, frag;
442 	u_int idx;
443 
444 	if ((fin->fin_flx & (FI_FRAG|FI_BAD)) != FI_FRAG)
445 		return NULL;
446 
447 	/*
448 	 * For fragments, we record protocol, packet id, TOS and both IP#'s
449 	 * (these should all be the same for all fragments of a packet).
450 	 *
451 	 * build up a hash value to index the table with.
452 	 */
453 	frag.ipfr_p = fin->fin_p;
454 	idx = fin->fin_p;
455 	frag.ipfr_id = fin->fin_id;
456 	idx += fin->fin_id;
457 	frag.ipfr_source = fin->fin_fi.fi_src;
458 	idx += frag.ipfr_src.s_addr;
459 	frag.ipfr_dest = fin->fin_fi.fi_dst;
460 	idx += frag.ipfr_dst.s_addr;
461 	frag.ipfr_ifp = fin->fin_ifp;
462 	idx *= 127;
463 	idx %= IPFT_SIZE;
464 
465 	frag.ipfr_optmsk = fin->fin_fi.fi_optmsk & IPF_OPTCOPY;
466 	frag.ipfr_secmsk = fin->fin_fi.fi_secmsk;
467 	frag.ipfr_auth = fin->fin_fi.fi_auth;
468 
469 	/*
470 	 * check the table, careful to only compare the right amount of data
471 	 */
472 	for (f = table[idx]; f; f = f->ipfr_hnext)
473 		if (!bcmp((char *)&frag.ipfr_ifp, (char *)&f->ipfr_ifp,
474 			  IPFR_CMPSZ)) {
475 			u_short	off;
476 
477 			/*
478 			 * We don't want to let short packets match because
479 			 * they could be compromising the security of other
480 			 * rules that want to match on layer 4 fields (and
481 			 * can't because they have been fragmented off.)
482 			 * Why do this check here?  The counter acts as an
483 			 * indicator of this kind of attack, whereas if it was
484 			 * elsewhere, it wouldn't know if other matching
485 			 * packets had been seen.
486 			 */
487 			if (fin->fin_flx & FI_SHORT) {
488 				ATOMIC_INCL(ipfr_stats.ifs_short);
489 				continue;
490 			}
491 
492 			/*
493 			 * XXX - We really need to be guarding against the
494 			 * retransmission of (src,dst,id,offset-range) here
495 			 * because a fragmented packet is never resent with
496 			 * the same IP ID# (or shouldn't).
497 			 */
498 			off = fin->fin_off; /* same as in ipfr_newfrag() */
499 			if (f->ipfr_seen0) {
500 				if (off == 0) {
501 					ATOMIC_INCL(ipfr_stats.ifs_retrans0);
502 					continue;
503 				}
504 			} else if (off == 0) {
505 				f->ipfr_seen0 = 1;
506 				f->ipfr_firstend = fin->fin_flen;
507 			}
508 
509 			if (f != table[idx]) {
510 				ipfr_t **fp;
511 
512 				/*
513 				 * Move fragment info. to the top of the list
514 				 * to speed up searches.  First, delink...
515 				 */
516 				fp = f->ipfr_hprev;
517 				(*fp) = f->ipfr_hnext;
518 				if (f->ipfr_hnext != NULL)
519 					f->ipfr_hnext->ipfr_hprev = fp;
520 				/*
521 				 * Then put back at the top of the chain.
522 				 */
523 				f->ipfr_hnext = table[idx];
524 				table[idx]->ipfr_hprev = &f->ipfr_hnext;
525 				f->ipfr_hprev = table + idx;
526 				table[idx] = f;
527 			}
528 
529 			if (fin->fin_v == 6) {
530 				if (f->ipfr_seen0 && (off < f->ipfr_firstend))
531 					fin->fin_flx |= FI_BAD;
532 			}
533 			/*
534 			 * If we've follwed the fragments, and this is the
535 			 * last (in order), shrink expiration time.
536 			 */
537 			if (off == f->ipfr_off) {
538 				if (!(fin->fin_ip->ip_off & IP_MF))
539 					f->ipfr_ttl = fr_ticks + 1;
540 				f->ipfr_off = fin->fin_dlen + off;
541 			} else if (f->ipfr_pass & FR_FRSTRICT)
542 				continue;
543 			ATOMIC_INCL(ipfr_stats.ifs_hits);
544 			return f;
545 		}
546 	return NULL;
547 }
548 
549 
550 /* ------------------------------------------------------------------------ */
551 /* Function:    fr_nat_knownfrag                                            */
552 /* Returns:     nat_t* - pointer to 'parent' NAT structure if frag table    */
553 /*                       match found, else NULL                             */
554 /* Parameters:  fin(I)  - pointer to packet information                     */
555 /*                                                                          */
556 /* Functional interface for NAT lookups of the NAT fragment cache           */
557 /* ------------------------------------------------------------------------ */
558 nat_t *fr_nat_knownfrag(fin)
559 fr_info_t *fin;
560 {
561 	nat_t	*nat;
562 	ipfr_t	*ipf;
563 
564 	if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_natlist)
565 		return NULL;
566 	READ_ENTER(&ipf_natfrag);
567 	ipf = fr_fraglookup(fin, ipfr_nattab);
568 	if (ipf != NULL) {
569 		nat = ipf->ipfr_data;
570 		/*
571 		 * This is the last fragment for this packet.
572 		 */
573 		if ((ipf->ipfr_ttl == fr_ticks + 1) && (nat != NULL)) {
574 			nat->nat_data = NULL;
575 			ipf->ipfr_data = NULL;
576 		}
577 	} else
578 		nat = NULL;
579 	RWLOCK_EXIT(&ipf_natfrag);
580 	return nat;
581 }
582 
583 
584 /* ------------------------------------------------------------------------ */
585 /* Function:    fr_ipid_knownfrag                                           */
586 /* Returns:     u_32_t - IPv4 ID for this packet if match found, else       */
587 /*                       return 0xfffffff to indicate no match.             */
588 /* Parameters:  fin(I) - pointer to packet information                      */
589 /*                                                                          */
590 /* Functional interface for IP ID lookups of the IP ID fragment cache       */
591 /* ------------------------------------------------------------------------ */
592 u_32_t fr_ipid_knownfrag(fin)
593 fr_info_t *fin;
594 {
595 	ipfr_t	*ipf;
596 	u_32_t	id;
597 
598 	if ((fin->fin_v != 4) || (fr_frag_lock) || !ipfr_ipidlist)
599 		return 0xffffffff;
600 
601 	READ_ENTER(&ipf_ipidfrag);
602 	ipf = fr_fraglookup(fin, ipfr_ipidtab);
603 	if (ipf != NULL)
604 		id = (u_32_t)(uintptr_t)ipf->ipfr_data;
605 	else
606 		id = 0xffffffff;
607 	RWLOCK_EXIT(&ipf_ipidfrag);
608 	return id;
609 }
610 
611 
612 /* ------------------------------------------------------------------------ */
613 /* Function:    fr_knownfrag                                                */
614 /* Returns:     frentry_t* - pointer to filter rule if a match is found in  */
615 /*                           the frag cache table, else NULL.               */
616 /* Parameters:  fin(I)   - pointer to packet information                    */
617 /*              passp(O) - pointer to where to store rule flags resturned   */
618 /*                                                                          */
619 /* Functional interface for normal lookups of the fragment cache.  If a     */
620 /* match is found, return the rule pointer and flags from the rule, except  */
621 /* that if FR_LOGFIRST is set, reset FR_LOG.                                */
622 /* ------------------------------------------------------------------------ */
623 frentry_t *fr_knownfrag(fin, passp)
624 fr_info_t *fin;
625 u_32_t *passp;
626 {
627 	frentry_t *fr = NULL;
628 	ipfr_t	*fra;
629 	u_32_t pass, oflx;
630 
631 	if ((fr_frag_lock) || (ipfr_list == NULL))
632 		return NULL;
633 
634 	READ_ENTER(&ipf_frag);
635 	oflx = fin->fin_flx;
636 	fra = fr_fraglookup(fin, ipfr_heads);
637 	if (fra != NULL) {
638 		fr = fra->ipfr_rule;
639 		fin->fin_fr = fr;
640 		if (fr != NULL) {
641 			pass = fr->fr_flags;
642 			if ((pass & FR_LOGFIRST) != 0)
643 				pass &= ~(FR_LOGFIRST|FR_LOG);
644 			*passp = pass;
645 		}
646 	}
647 	if (!(oflx & FI_BAD) && (fin->fin_flx & FI_BAD)) {
648 		*passp &= ~FR_CMDMASK;
649 		*passp |= FR_BLOCK;
650 		fr = &frblock;
651 	}
652 	RWLOCK_EXIT(&ipf_frag);
653 	return fr;
654 }
655 
656 
657 /* ------------------------------------------------------------------------ */
658 /* Function:    fr_forget                                                   */
659 /* Returns:     Nil                                                         */
660 /* Parameters:  ptr(I) - pointer to data structure                          */
661 /*                                                                          */
662 /* Search through all of the fragment cache entries and wherever a pointer  */
663 /* is found to match ptr, reset it to NULL.                                 */
664 /* ------------------------------------------------------------------------ */
665 void fr_forget(ptr)
666 void *ptr;
667 {
668 	ipfr_t	*fr;
669 
670 	WRITE_ENTER(&ipf_frag);
671 	for (fr = ipfr_list; fr; fr = fr->ipfr_next)
672 		if (fr->ipfr_data == ptr)
673 			fr->ipfr_data = NULL;
674 	RWLOCK_EXIT(&ipf_frag);
675 }
676 
677 
678 /* ------------------------------------------------------------------------ */
679 /* Function:    fr_forgetnat                                                */
680 /* Returns:     Nil                                                         */
681 /* Parameters:  ptr(I) - pointer to data structure                          */
682 /*                                                                          */
683 /* Search through all of the fragment cache entries for NAT and wherever a  */
684 /* pointer  is found to match ptr, reset it to NULL.                        */
685 /* ------------------------------------------------------------------------ */
686 void fr_forgetnat(ptr)
687 void *ptr;
688 {
689 	ipfr_t	*fr;
690 
691 	WRITE_ENTER(&ipf_natfrag);
692 	for (fr = ipfr_natlist; fr; fr = fr->ipfr_next)
693 		if (fr->ipfr_data == ptr)
694 			fr->ipfr_data = NULL;
695 	RWLOCK_EXIT(&ipf_natfrag);
696 }
697 
698 
699 /* ------------------------------------------------------------------------ */
700 /* Function:    fr_fragdelete                                               */
701 /* Returns:     Nil                                                         */
702 /* Parameters:  fra(I)   - pointer to fragment structure to delete          */
703 /*              tail(IO) - pointer to the pointer to the tail of the frag   */
704 /*                         list                                             */
705 /*                                                                          */
706 /* Remove a fragment cache table entry from the table & list.  Also free    */
707 /* the filter rule it is associated with it if it is no longer used as a    */
708 /* result of decreasing the reference count.                                */
709 /* ------------------------------------------------------------------------ */
710 static void fr_fragdelete(fra, tail)
711 ipfr_t *fra, ***tail;
712 {
713 	frentry_t *fr;
714 
715 	fr = fra->ipfr_rule;
716 	if (fr != NULL)
717 		(void)fr_derefrule(&fr);
718 
719 	if (fra->ipfr_next)
720 		fra->ipfr_next->ipfr_prev = fra->ipfr_prev;
721 	*fra->ipfr_prev = fra->ipfr_next;
722 	if (*tail == &fra->ipfr_next)
723 		*tail = fra->ipfr_prev;
724 
725 	if (fra->ipfr_hnext)
726 		fra->ipfr_hnext->ipfr_hprev = fra->ipfr_hprev;
727 	*fra->ipfr_hprev = fra->ipfr_hnext;
728 	KFREE(fra);
729 }
730 
731 
732 /* ------------------------------------------------------------------------ */
733 /* Function:    fr_fragclear                                                */
734 /* Returns:     Nil                                                         */
735 /* Parameters:  Nil                                                         */
736 /*                                                                          */
737 /* Free memory in use by fragment state information kept.  Do the normal    */
738 /* fragment state stuff first and then the NAT-fragment table.              */
739 /* ------------------------------------------------------------------------ */
740 void fr_fragclear()
741 {
742 	ipfr_t	*fra;
743 	nat_t	*nat;
744 
745 	WRITE_ENTER(&ipf_frag);
746 	while ((fra = ipfr_list) != NULL)
747 		fr_fragdelete(fra, &ipfr_tail);
748 	ipfr_tail = &ipfr_list;
749 	RWLOCK_EXIT(&ipf_frag);
750 
751 	WRITE_ENTER(&ipf_nat);
752 	WRITE_ENTER(&ipf_natfrag);
753 	while ((fra = ipfr_natlist) != NULL) {
754 		nat = fra->ipfr_data;
755 		if (nat != NULL) {
756 			if (nat->nat_data == fra)
757 				nat->nat_data = NULL;
758 		}
759 		fr_fragdelete(fra, &ipfr_nattail);
760 	}
761 	ipfr_nattail = &ipfr_natlist;
762 	RWLOCK_EXIT(&ipf_natfrag);
763 	RWLOCK_EXIT(&ipf_nat);
764 }
765 
766 
767 /* ------------------------------------------------------------------------ */
768 /* Function:    fr_fragexpire                                               */
769 /* Returns:     Nil                                                         */
770 /* Parameters:  Nil                                                         */
771 /*                                                                          */
772 /* Expire entries in the fragment cache table that have been there too long */
773 /* ------------------------------------------------------------------------ */
774 void fr_fragexpire()
775 {
776 	ipfr_t	**fp, *fra;
777 	nat_t	*nat;
778 	SPL_INT(s);
779 
780 	if (fr_frag_lock)
781 		return;
782 
783 	SPL_NET(s);
784 	WRITE_ENTER(&ipf_frag);
785 	/*
786 	 * Go through the entire table, looking for entries to expire,
787 	 * which is indicated by the ttl being less than or equal to fr_ticks.
788 	 */
789 	for (fp = &ipfr_list; ((fra = *fp) != NULL); ) {
790 		if (fra->ipfr_ttl > fr_ticks)
791 			break;
792 		fr_fragdelete(fra, &ipfr_tail);
793 		ipfr_stats.ifs_expire++;
794 		ipfr_inuse--;
795 	}
796 	RWLOCK_EXIT(&ipf_frag);
797 
798 	WRITE_ENTER(&ipf_ipidfrag);
799 	for (fp = &ipfr_ipidlist; ((fra = *fp) != NULL); ) {
800 		if (fra->ipfr_ttl > fr_ticks)
801 			break;
802 		fr_fragdelete(fra, &ipfr_ipidtail);
803 		ipfr_stats.ifs_expire++;
804 		ipfr_inuse--;
805 	}
806 	RWLOCK_EXIT(&ipf_ipidfrag);
807 
808 	/*
809 	 * Same again for the NAT table, except that if the structure also
810 	 * still points to a NAT structure, and the NAT structure points back
811 	 * at the one to be free'd, NULL the reference from the NAT struct.
812 	 * NOTE: We need to grab both mutex's early, and in this order so as
813 	 * to prevent a deadlock if both try to expire at the same time.
814 	 */
815 	WRITE_ENTER(&ipf_nat);
816 	WRITE_ENTER(&ipf_natfrag);
817 	for (fp = &ipfr_natlist; ((fra = *fp) != NULL); ) {
818 		if (fra->ipfr_ttl > fr_ticks)
819 			break;
820 		nat = fra->ipfr_data;
821 		if (nat != NULL) {
822 			if (nat->nat_data == fra)
823 				nat->nat_data = NULL;
824 		}
825 		fr_fragdelete(fra, &ipfr_nattail);
826 		ipfr_stats.ifs_expire++;
827 		ipfr_inuse--;
828 	}
829 	RWLOCK_EXIT(&ipf_natfrag);
830 	RWLOCK_EXIT(&ipf_nat);
831 	SPL_X(s);
832 }
833 
834 
835 /* ------------------------------------------------------------------------ */
836 /* Function:    fr_slowtimer                                                */
837 /* Returns:     Nil                                                         */
838 /* Parameters:  Nil                                                         */
839 /*                                                                          */
840 /* Slowly expire held state for fragments.  Timeouts are set * in           */
841 /* expectation of this being called twice per second.                       */
842 /* ------------------------------------------------------------------------ */
843 #if !defined(_KERNEL) || (!SOLARIS && !defined(__hpux) && !defined(__sgi) && \
844 			  !defined(__osf__) && !defined(linux))
845 # if defined(_KERNEL) && ((BSD >= 199103) || defined(__sgi))
846 void fr_slowtimer __P((void *ptr))
847 # else
848 int fr_slowtimer()
849 # endif
850 {
851 	READ_ENTER(&ipf_global);
852 
853 	fr_fragexpire();
854 	fr_timeoutstate();
855 	fr_natexpire();
856 	fr_authexpire();
857 	fr_ticks++;
858 	if (fr_running <= 0)
859 		goto done;
860 # ifdef _KERNEL
861 #  if defined(__NetBSD__) && (__NetBSD_Version__ >= 104240000)
862 	callout_reset(&fr_slowtimer_ch, hz / 2, fr_slowtimer, NULL);
863 #  else
864 #   if defined(__OpenBSD__)
865 	timeout_add(&fr_slowtimer_ch, hz/2);
866 #   else
867 #    if (__FreeBSD_version >= 300000)
868 	fr_slowtimer_ch = timeout(fr_slowtimer, NULL, hz/2);
869 #    else
870 #     ifdef linux
871 	;
872 #     else
873 	timeout(fr_slowtimer, NULL, hz/2);
874 #     endif
875 #    endif /* FreeBSD */
876 #   endif /* OpenBSD */
877 #  endif /* NetBSD */
878 # endif
879 done:
880 	RWLOCK_EXIT(&ipf_global);
881 # if (BSD < 199103) || !defined(_KERNEL)
882 	return 0;
883 # endif
884 }
885 #endif /* !SOLARIS && !defined(__hpux) && !defined(__sgi) */
886