xref: /freebsd/sys/netinet/ip_reass.c (revision f305e6eaf8d98d9c2ca7ca97791e84521f25b801)
1 /*-
2  * Copyright (c) 2015 Gleb Smirnoff <glebius@FreeBSD.org>
3  * Copyright (c) 2015 Adrian Chadd <adrian@FreeBSD.org>
4  * Copyright (c) 1982, 1986, 1988, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 4. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 #include "opt_rss.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/eventhandler.h>
42 #include <sys/hash.h>
43 #include <sys/mbuf.h>
44 #include <sys/malloc.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/sysctl.h>
48 
49 #include <net/rss_config.h>
50 #include <net/vnet.h>
51 
52 #include <netinet/in.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip_var.h>
55 #include <netinet/in_rss.h>
56 #ifdef MAC
57 #include <security/mac/mac_framework.h>
58 #endif
59 
60 SYSCTL_DECL(_net_inet_ip);
61 
62 /*
63  * Reassembly headers are stored in hash buckets.
64  */
65 #define	IPREASS_NHASH_LOG2	6
66 #define	IPREASS_NHASH		(1 << IPREASS_NHASH_LOG2)
67 #define	IPREASS_HMASK		(IPREASS_NHASH - 1)
68 
69 struct ipqbucket {
70 	TAILQ_HEAD(ipqhead, ipq) head;
71 	struct mtx		 lock;
72 };
73 
74 static VNET_DEFINE(struct ipqbucket, ipq[IPREASS_NHASH]);
75 #define	V_ipq		VNET(ipq)
76 static VNET_DEFINE(uint32_t, ipq_hashseed);
77 #define V_ipq_hashseed   VNET(ipq_hashseed)
78 
79 #define	IPQ_LOCK(i)	mtx_lock(&V_ipq[i].lock)
80 #define	IPQ_TRYLOCK(i)	mtx_trylock(&V_ipq[i].lock)
81 #define	IPQ_UNLOCK(i)	mtx_unlock(&V_ipq[i].lock)
82 #define	IPQ_LOCK_ASSERT(i)	mtx_assert(&V_ipq[i].lock, MA_OWNED)
83 
84 void		ipreass_init(void);
85 void		ipreass_drain(void);
86 void		ipreass_slowtimo(void);
87 #ifdef VIMAGE
88 void		ipreass_destroy(void);
89 #endif
90 static int	sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS);
91 static void	ipreass_zone_change(void *);
92 static void	ipreass_drain_tomax(void);
93 static void	ipq_free(struct ipqhead *, struct ipq *);
94 static struct ipq * ipq_reuse(int);
95 
96 static inline void
97 ipq_timeout(struct ipqhead *head, struct ipq *fp)
98 {
99 
100 	IPSTAT_ADD(ips_fragtimeout, fp->ipq_nfrags);
101 	ipq_free(head, fp);
102 }
103 
104 static inline void
105 ipq_drop(struct ipqhead *head, struct ipq *fp)
106 {
107 
108 	IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
109 	ipq_free(head, fp);
110 }
111 
112 static VNET_DEFINE(uma_zone_t, ipq_zone);
113 #define	V_ipq_zone	VNET(ipq_zone)
114 SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_VNET |
115     CTLTYPE_INT | CTLFLAG_RW, NULL, 0, sysctl_maxfragpackets, "I",
116     "Maximum number of IPv4 fragment reassembly queue entries");
117 SYSCTL_UMA_CUR(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_VNET,
118     &VNET_NAME(ipq_zone),
119     "Current number of IPv4 fragment reassembly queue entries");
120 
121 static VNET_DEFINE(int, noreass);
122 #define	V_noreass	VNET(noreass)
123 
124 static VNET_DEFINE(int, maxfragsperpacket);
125 #define	V_maxfragsperpacket	VNET(maxfragsperpacket)
126 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_VNET | CTLFLAG_RW,
127     &VNET_NAME(maxfragsperpacket), 0,
128     "Maximum number of IPv4 fragments allowed per packet");
129 
130 /*
131  * Take incoming datagram fragment and try to reassemble it into
132  * whole datagram.  If the argument is the first fragment or one
133  * in between the function will return NULL and store the mbuf
134  * in the fragment chain.  If the argument is the last fragment
135  * the packet will be reassembled and the pointer to the new
136  * mbuf returned for further processing.  Only m_tags attached
137  * to the first packet/fragment are preserved.
138  * The IP header is *NOT* adjusted out of iplen.
139  */
140 #define	M_IP_FRAG	M_PROTO9
141 struct mbuf *
142 ip_reass(struct mbuf *m)
143 {
144 	struct ip *ip;
145 	struct mbuf *p, *q, *nq, *t;
146 	struct ipq *fp;
147 	struct ipqhead *head;
148 	int i, hlen, next;
149 	u_int8_t ecn, ecn0;
150 	uint32_t hash;
151 #ifdef	RSS
152 	uint32_t rss_hash, rss_type;
153 #endif
154 
155 	/*
156 	 * If no reassembling or maxfragsperpacket are 0,
157 	 * never accept fragments.
158 	 */
159 	if (V_noreass == 1 || V_maxfragsperpacket == 0) {
160 		IPSTAT_INC(ips_fragments);
161 		IPSTAT_INC(ips_fragdropped);
162 		m_freem(m);
163 		return (NULL);
164 	}
165 
166 	ip = mtod(m, struct ip *);
167 	hlen = ip->ip_hl << 2;
168 
169 	/*
170 	 * Adjust ip_len to not reflect header,
171 	 * convert offset of this to bytes.
172 	 */
173 	ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
174 	if (ip->ip_off & htons(IP_MF)) {
175 		/*
176 		 * Make sure that fragments have a data length
177 		 * that's a non-zero multiple of 8 bytes.
178 		 */
179 		if (ip->ip_len == htons(0) || (ntohs(ip->ip_len) & 0x7) != 0) {
180 			IPSTAT_INC(ips_toosmall); /* XXX */
181 			IPSTAT_INC(ips_fragdropped);
182 			m_freem(m);
183 			return (NULL);
184 		}
185 		m->m_flags |= M_IP_FRAG;
186 	} else
187 		m->m_flags &= ~M_IP_FRAG;
188 	ip->ip_off = htons(ntohs(ip->ip_off) << 3);
189 
190 	/*
191 	 * Attempt reassembly; if it succeeds, proceed.
192 	 * ip_reass() will return a different mbuf.
193 	 */
194 	IPSTAT_INC(ips_fragments);
195 	m->m_pkthdr.PH_loc.ptr = ip;
196 
197 	/*
198 	 * Presence of header sizes in mbufs
199 	 * would confuse code below.
200 	 */
201 	m->m_data += hlen;
202 	m->m_len -= hlen;
203 
204 	hash = ip->ip_src.s_addr ^ ip->ip_id;
205 	hash = jenkins_hash32(&hash, 1, V_ipq_hashseed) & IPREASS_HMASK;
206 	head = &V_ipq[hash].head;
207 	IPQ_LOCK(hash);
208 
209 	/*
210 	 * Look for queue of fragments
211 	 * of this datagram.
212 	 */
213 	TAILQ_FOREACH(fp, head, ipq_list)
214 		if (ip->ip_id == fp->ipq_id &&
215 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
216 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
217 #ifdef MAC
218 		    mac_ipq_match(m, fp) &&
219 #endif
220 		    ip->ip_p == fp->ipq_p)
221 			break;
222 	/*
223 	 * If first fragment to arrive, create a reassembly queue.
224 	 */
225 	if (fp == NULL) {
226 		fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
227 		if (fp == NULL)
228 			fp = ipq_reuse(hash);
229 #ifdef MAC
230 		if (mac_ipq_init(fp, M_NOWAIT) != 0) {
231 			uma_zfree(V_ipq_zone, fp);
232 			fp = NULL;
233 			goto dropfrag;
234 		}
235 		mac_ipq_create(m, fp);
236 #endif
237 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
238 		fp->ipq_nfrags = 1;
239 		fp->ipq_ttl = IPFRAGTTL;
240 		fp->ipq_p = ip->ip_p;
241 		fp->ipq_id = ip->ip_id;
242 		fp->ipq_src = ip->ip_src;
243 		fp->ipq_dst = ip->ip_dst;
244 		fp->ipq_frags = m;
245 		m->m_nextpkt = NULL;
246 		goto done;
247 	} else {
248 		fp->ipq_nfrags++;
249 #ifdef MAC
250 		mac_ipq_update(m, fp);
251 #endif
252 	}
253 
254 #define GETIP(m)	((struct ip*)((m)->m_pkthdr.PH_loc.ptr))
255 
256 	/*
257 	 * Handle ECN by comparing this segment with the first one;
258 	 * if CE is set, do not lose CE.
259 	 * drop if CE and not-ECT are mixed for the same packet.
260 	 */
261 	ecn = ip->ip_tos & IPTOS_ECN_MASK;
262 	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
263 	if (ecn == IPTOS_ECN_CE) {
264 		if (ecn0 == IPTOS_ECN_NOTECT)
265 			goto dropfrag;
266 		if (ecn0 != IPTOS_ECN_CE)
267 			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
268 	}
269 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
270 		goto dropfrag;
271 
272 	/*
273 	 * Find a segment which begins after this one does.
274 	 */
275 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
276 		if (ntohs(GETIP(q)->ip_off) > ntohs(ip->ip_off))
277 			break;
278 
279 	/*
280 	 * If there is a preceding segment, it may provide some of
281 	 * our data already.  If so, drop the data from the incoming
282 	 * segment.  If it provides all of our data, drop us, otherwise
283 	 * stick new segment in the proper place.
284 	 *
285 	 * If some of the data is dropped from the preceding
286 	 * segment, then it's checksum is invalidated.
287 	 */
288 	if (p) {
289 		i = ntohs(GETIP(p)->ip_off) + ntohs(GETIP(p)->ip_len) -
290 		    ntohs(ip->ip_off);
291 		if (i > 0) {
292 			if (i >= ntohs(ip->ip_len))
293 				goto dropfrag;
294 			m_adj(m, i);
295 			m->m_pkthdr.csum_flags = 0;
296 			ip->ip_off = htons(ntohs(ip->ip_off) + i);
297 			ip->ip_len = htons(ntohs(ip->ip_len) - i);
298 		}
299 		m->m_nextpkt = p->m_nextpkt;
300 		p->m_nextpkt = m;
301 	} else {
302 		m->m_nextpkt = fp->ipq_frags;
303 		fp->ipq_frags = m;
304 	}
305 
306 	/*
307 	 * While we overlap succeeding segments trim them or,
308 	 * if they are completely covered, dequeue them.
309 	 */
310 	for (; q != NULL && ntohs(ip->ip_off) + ntohs(ip->ip_len) >
311 	    ntohs(GETIP(q)->ip_off); q = nq) {
312 		i = (ntohs(ip->ip_off) + ntohs(ip->ip_len)) -
313 		    ntohs(GETIP(q)->ip_off);
314 		if (i < ntohs(GETIP(q)->ip_len)) {
315 			GETIP(q)->ip_len = htons(ntohs(GETIP(q)->ip_len) - i);
316 			GETIP(q)->ip_off = htons(ntohs(GETIP(q)->ip_off) + i);
317 			m_adj(q, i);
318 			q->m_pkthdr.csum_flags = 0;
319 			break;
320 		}
321 		nq = q->m_nextpkt;
322 		m->m_nextpkt = nq;
323 		IPSTAT_INC(ips_fragdropped);
324 		fp->ipq_nfrags--;
325 		m_freem(q);
326 	}
327 
328 	/*
329 	 * Check for complete reassembly and perform frag per packet
330 	 * limiting.
331 	 *
332 	 * Frag limiting is performed here so that the nth frag has
333 	 * a chance to complete the packet before we drop the packet.
334 	 * As a result, n+1 frags are actually allowed per packet, but
335 	 * only n will ever be stored. (n = maxfragsperpacket.)
336 	 *
337 	 */
338 	next = 0;
339 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
340 		if (ntohs(GETIP(q)->ip_off) != next) {
341 			if (fp->ipq_nfrags > V_maxfragsperpacket)
342 				ipq_drop(head, fp);
343 			goto done;
344 		}
345 		next += ntohs(GETIP(q)->ip_len);
346 	}
347 	/* Make sure the last packet didn't have the IP_MF flag */
348 	if (p->m_flags & M_IP_FRAG) {
349 		if (fp->ipq_nfrags > V_maxfragsperpacket)
350 			ipq_drop(head, fp);
351 		goto done;
352 	}
353 
354 	/*
355 	 * Reassembly is complete.  Make sure the packet is a sane size.
356 	 */
357 	q = fp->ipq_frags;
358 	ip = GETIP(q);
359 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
360 		IPSTAT_INC(ips_toolong);
361 		ipq_drop(head, fp);
362 		goto done;
363 	}
364 
365 	/*
366 	 * Concatenate fragments.
367 	 */
368 	m = q;
369 	t = m->m_next;
370 	m->m_next = NULL;
371 	m_cat(m, t);
372 	nq = q->m_nextpkt;
373 	q->m_nextpkt = NULL;
374 	for (q = nq; q != NULL; q = nq) {
375 		nq = q->m_nextpkt;
376 		q->m_nextpkt = NULL;
377 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
378 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
379 		m_cat(m, q);
380 	}
381 	/*
382 	 * In order to do checksumming faster we do 'end-around carry' here
383 	 * (and not in for{} loop), though it implies we are not going to
384 	 * reassemble more than 64k fragments.
385 	 */
386 	while (m->m_pkthdr.csum_data & 0xffff0000)
387 		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
388 		    (m->m_pkthdr.csum_data >> 16);
389 #ifdef MAC
390 	mac_ipq_reassemble(fp, m);
391 	mac_ipq_destroy(fp);
392 #endif
393 
394 	/*
395 	 * Create header for new ip packet by modifying header of first
396 	 * packet;  dequeue and discard fragment reassembly header.
397 	 * Make header visible.
398 	 */
399 	ip->ip_len = htons((ip->ip_hl << 2) + next);
400 	ip->ip_src = fp->ipq_src;
401 	ip->ip_dst = fp->ipq_dst;
402 	TAILQ_REMOVE(head, fp, ipq_list);
403 	uma_zfree(V_ipq_zone, fp);
404 	m->m_len += (ip->ip_hl << 2);
405 	m->m_data -= (ip->ip_hl << 2);
406 	/* some debugging cruft by sklower, below, will go away soon */
407 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
408 		m_fixhdr(m);
409 	IPSTAT_INC(ips_reassembled);
410 	IPQ_UNLOCK(hash);
411 
412 #ifdef	RSS
413 	/*
414 	 * Query the RSS layer for the flowid / flowtype for the
415 	 * mbuf payload.
416 	 *
417 	 * For now, just assume we have to calculate a new one.
418 	 * Later on we should check to see if the assigned flowid matches
419 	 * what RSS wants for the given IP protocol and if so, just keep it.
420 	 *
421 	 * We then queue into the relevant netisr so it can be dispatched
422 	 * to the correct CPU.
423 	 *
424 	 * Note - this may return 1, which means the flowid in the mbuf
425 	 * is correct for the configured RSS hash types and can be used.
426 	 */
427 	if (rss_mbuf_software_hash_v4(m, 0, &rss_hash, &rss_type) == 0) {
428 		m->m_pkthdr.flowid = rss_hash;
429 		M_HASHTYPE_SET(m, rss_type);
430 	}
431 
432 	/*
433 	 * Queue/dispatch for reprocessing.
434 	 *
435 	 * Note: this is much slower than just handling the frame in the
436 	 * current receive context.  It's likely worth investigating
437 	 * why this is.
438 	 */
439 	netisr_dispatch(NETISR_IP_DIRECT, m);
440 	return (NULL);
441 #endif
442 
443 	/* Handle in-line */
444 	return (m);
445 
446 dropfrag:
447 	IPSTAT_INC(ips_fragdropped);
448 	if (fp != NULL)
449 		fp->ipq_nfrags--;
450 	m_freem(m);
451 done:
452 	IPQ_UNLOCK(hash);
453 	return (NULL);
454 
455 #undef GETIP
456 }
457 
458 /*
459  * Initialize IP reassembly structures.
460  */
461 void
462 ipreass_init(void)
463 {
464 
465 	for (int i = 0; i < IPREASS_NHASH; i++) {
466 		TAILQ_INIT(&V_ipq[i].head);
467 		mtx_init(&V_ipq[i].lock, "IP reassembly", NULL,
468 		    MTX_DEF | MTX_DUPOK);
469 	}
470 	V_ipq_hashseed = arc4random();
471 	V_maxfragsperpacket = 16;
472 	V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
473 	    NULL, UMA_ALIGN_PTR, 0);
474 	uma_zone_set_max(V_ipq_zone, nmbclusters / 32);
475 
476 	if (IS_DEFAULT_VNET(curvnet))
477 		EVENTHANDLER_REGISTER(nmbclusters_change, ipreass_zone_change,
478 		    NULL, EVENTHANDLER_PRI_ANY);
479 }
480 
481 /*
482  * If a timer expires on a reassembly queue, discard it.
483  */
484 void
485 ipreass_slowtimo(void)
486 {
487 	struct ipq *fp, *tmp;
488 
489 	for (int i = 0; i < IPREASS_NHASH; i++) {
490 		IPQ_LOCK(i);
491 		TAILQ_FOREACH_SAFE(fp, &V_ipq[i].head, ipq_list, tmp)
492 		if (--fp->ipq_ttl == 0)
493 				ipq_timeout(&V_ipq[i].head, fp);
494 		IPQ_UNLOCK(i);
495 	}
496 }
497 
498 /*
499  * Drain off all datagram fragments.
500  */
501 void
502 ipreass_drain(void)
503 {
504 
505 	for (int i = 0; i < IPREASS_NHASH; i++) {
506 		IPQ_LOCK(i);
507 		while(!TAILQ_EMPTY(&V_ipq[i].head))
508 			ipq_drop(&V_ipq[i].head, TAILQ_FIRST(&V_ipq[i].head));
509 		IPQ_UNLOCK(i);
510 	}
511 }
512 
513 #ifdef VIMAGE
514 /*
515  * Destroy IP reassembly structures.
516  */
517 void
518 ipreass_destroy(void)
519 {
520 
521 	ipreass_drain();
522 	uma_zdestroy(V_ipq_zone);
523 	for (int i = 0; i < IPREASS_NHASH; i++)
524 		mtx_destroy(&V_ipq[i].lock);
525 }
526 #endif
527 
528 /*
529  * After maxnipq has been updated, propagate the change to UMA.  The UMA zone
530  * max has slightly different semantics than the sysctl, for historical
531  * reasons.
532  */
533 static void
534 ipreass_drain_tomax(void)
535 {
536 	int target;
537 
538 	/*
539 	 * If we are over the maximum number of fragments,
540 	 * drain off enough to get down to the new limit,
541 	 * stripping off last elements on queues.  Every
542 	 * run we strip the oldest element from each bucket.
543 	 */
544 	target = uma_zone_get_max(V_ipq_zone);
545 	while (uma_zone_get_cur(V_ipq_zone) > target) {
546 		struct ipq *fp;
547 
548 		for (int i = 0; i < IPREASS_NHASH; i++) {
549 			IPQ_LOCK(i);
550 			fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
551 			if (fp != NULL)
552 				ipq_timeout(&V_ipq[i].head, fp);
553 			IPQ_UNLOCK(i);
554 		}
555 	}
556 }
557 
558 static void
559 ipreass_zone_change(void *tag)
560 {
561 
562 	uma_zone_set_max(V_ipq_zone, nmbclusters / 32);
563 	ipreass_drain_tomax();
564 }
565 
566 /*
567  * Change the limit on the UMA zone, or disable the fragment allocation
568  * at all.  Since 0 and -1 is a special values here, we need our own handler,
569  * instead of sysctl_handle_uma_zone_max().
570  */
571 static int
572 sysctl_maxfragpackets(SYSCTL_HANDLER_ARGS)
573 {
574 	int error, max;
575 
576 	if (V_noreass == 0) {
577 		max = uma_zone_get_max(V_ipq_zone);
578 		if (max == 0)
579 			max = -1;
580 	} else
581 		max = 0;
582 	error = sysctl_handle_int(oidp, &max, 0, req);
583 	if (error || !req->newptr)
584 		return (error);
585 	if (max > 0) {
586 		/*
587 		 * XXXRW: Might be a good idea to sanity check the argument
588 		 * and place an extreme upper bound.
589 		 */
590 		max = uma_zone_set_max(V_ipq_zone, max);
591 		ipreass_drain_tomax();
592 		V_noreass = 0;
593 	} else if (max == 0) {
594 		V_noreass = 1;
595 		ipreass_drain();
596 	} else if (max == -1) {
597 		V_noreass = 0;
598 		uma_zone_set_max(V_ipq_zone, 0);
599 	} else
600 		return (EINVAL);
601 	return (0);
602 }
603 
604 /*
605  * Seek for old fragment queue header that can be reused.  Try to
606  * reuse a header from currently locked hash bucket.
607  */
608 static struct ipq *
609 ipq_reuse(int start)
610 {
611 	struct ipq *fp;
612 	int i;
613 
614 	IPQ_LOCK_ASSERT(start);
615 
616 	for (i = start;; i++) {
617 		if (i == IPREASS_NHASH)
618 			i = 0;
619 		if (i != start && IPQ_TRYLOCK(i) == 0)
620 			continue;
621 		fp = TAILQ_LAST(&V_ipq[i].head, ipqhead);
622 		if (fp) {
623 			struct mbuf *m;
624 
625 			IPSTAT_ADD(ips_fragtimeout, fp->ipq_nfrags);
626 			while (fp->ipq_frags) {
627 				m = fp->ipq_frags;
628 				fp->ipq_frags = m->m_nextpkt;
629 				m_freem(m);
630 			}
631 			TAILQ_REMOVE(&V_ipq[i].head, fp, ipq_list);
632 			if (i != start)
633 				IPQ_UNLOCK(i);
634 			IPQ_LOCK_ASSERT(start);
635 			return (fp);
636 		}
637 		if (i != start)
638 			IPQ_UNLOCK(i);
639 	}
640 }
641 
642 /*
643  * Free a fragment reassembly header and all associated datagrams.
644  */
645 static void
646 ipq_free(struct ipqhead *fhp, struct ipq *fp)
647 {
648 	struct mbuf *q;
649 
650 	while (fp->ipq_frags) {
651 		q = fp->ipq_frags;
652 		fp->ipq_frags = q->m_nextpkt;
653 		m_freem(q);
654 	}
655 	TAILQ_REMOVE(fhp, fp, ipq_list);
656 	uma_zfree(V_ipq_zone, fp);
657 }
658