1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * $KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $ 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_rss.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/hash.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/domain.h> 45 #include <sys/eventhandler.h> 46 #include <sys/kernel.h> 47 #include <sys/protosw.h> 48 #include <sys/socket.h> 49 #include <sys/errno.h> 50 #include <sys/time.h> 51 #include <sys/kernel.h> 52 #include <sys/syslog.h> 53 54 #include <machine/atomic.h> 55 56 #include <net/if.h> 57 #include <net/if_var.h> 58 #include <net/netisr.h> 59 #include <net/route.h> 60 #include <net/vnet.h> 61 62 #include <netinet/in.h> 63 #include <netinet/in_var.h> 64 #include <netinet/ip6.h> 65 #include <netinet6/ip6_var.h> 66 #include <netinet/icmp6.h> 67 #include <netinet/in_systm.h> /* for ECN definitions */ 68 #include <netinet/ip.h> /* for ECN definitions */ 69 70 #include <security/mac/mac_framework.h> 71 72 /* 73 * Reassembly headers are stored in hash buckets. 74 */ 75 #define IP6REASS_NHASH_LOG2 10 76 #define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2) 77 #define IP6REASS_HMASK (IP6REASS_NHASH - 1) 78 79 static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *, 80 uint32_t bucket __unused); 81 static void frag6_deq(struct ip6asfrag *, uint32_t bucket __unused); 82 static void frag6_insque_head(struct ip6q *, struct ip6q *, 83 uint32_t bucket); 84 static void frag6_remque(struct ip6q *, uint32_t bucket); 85 static void frag6_freef(struct ip6q *, uint32_t bucket); 86 87 struct ip6qbucket { 88 struct ip6q ip6q; 89 struct mtx lock; 90 int count; 91 }; 92 93 VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets); 94 volatile u_int frag6_nfrags = 0; 95 VNET_DEFINE_STATIC(struct ip6qbucket, ip6q[IP6REASS_NHASH]); 96 VNET_DEFINE_STATIC(uint32_t, ip6q_hashseed); 97 98 #define V_frag6_nfragpackets VNET(frag6_nfragpackets) 99 #define V_ip6q VNET(ip6q) 100 #define V_ip6q_hashseed VNET(ip6q_hashseed) 101 102 #define IP6Q_LOCK(i) mtx_lock(&V_ip6q[(i)].lock) 103 #define IP6Q_TRYLOCK(i) mtx_trylock(&V_ip6q[(i)].lock) 104 #define IP6Q_LOCK_ASSERT(i) mtx_assert(&V_ip6q[(i)].lock, MA_OWNED) 105 #define IP6Q_UNLOCK(i) mtx_unlock(&V_ip6q[(i)].lock) 106 #define IP6Q_HEAD(i) (&V_ip6q[(i)].ip6q) 107 108 static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header"); 109 110 /* 111 * By default, limit the number of IP6 fragments across all reassembly 112 * queues to 1/32 of the total number of mbuf clusters. 113 * 114 * Limit the total number of reassembly queues per VNET to the 115 * IP6 fragment limit, but ensure the limit will not allow any bucket 116 * to grow above 100 items. (The bucket limit is 117 * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct 118 * multiplier to reach a 100-item limit.) 119 * The 100-item limit was chosen as brief testing seems to show that 120 * this produces "reasonable" performance on some subset of systems 121 * under DoS attack. 122 */ 123 #define IP6_MAXFRAGS (nmbclusters / 32) 124 #define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50)) 125 126 /* 127 * Initialise reassembly queue and fragment identifier. 128 */ 129 void 130 frag6_set_bucketsize() 131 { 132 int i; 133 134 if ((i = V_ip6_maxfragpackets) > 0) 135 V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1); 136 } 137 138 static void 139 frag6_change(void *tag) 140 { 141 VNET_ITERATOR_DECL(vnet_iter); 142 143 ip6_maxfrags = IP6_MAXFRAGS; 144 VNET_LIST_RLOCK_NOSLEEP(); 145 VNET_FOREACH(vnet_iter) { 146 CURVNET_SET(vnet_iter); 147 V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; 148 frag6_set_bucketsize(); 149 CURVNET_RESTORE(); 150 } 151 VNET_LIST_RUNLOCK_NOSLEEP(); 152 } 153 154 void 155 frag6_init(void) 156 { 157 struct ip6q *q6; 158 int i; 159 160 V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; 161 frag6_set_bucketsize(); 162 for (i = 0; i < IP6REASS_NHASH; i++) { 163 q6 = IP6Q_HEAD(i); 164 q6->ip6q_next = q6->ip6q_prev = q6; 165 mtx_init(&V_ip6q[i].lock, "ip6qlock", NULL, MTX_DEF); 166 V_ip6q[i].count = 0; 167 } 168 V_ip6q_hashseed = arc4random(); 169 V_ip6_maxfragsperpacket = 64; 170 if (!IS_DEFAULT_VNET(curvnet)) 171 return; 172 173 ip6_maxfrags = IP6_MAXFRAGS; 174 EVENTHANDLER_REGISTER(nmbclusters_change, 175 frag6_change, NULL, EVENTHANDLER_PRI_ANY); 176 } 177 178 /* 179 * In RFC2460, fragment and reassembly rule do not agree with each other, 180 * in terms of next header field handling in fragment header. 181 * While the sender will use the same value for all of the fragmented packets, 182 * receiver is suggested not to check the consistency. 183 * 184 * fragment rule (p20): 185 * (2) A Fragment header containing: 186 * The Next Header value that identifies the first header of 187 * the Fragmentable Part of the original packet. 188 * -> next header field is same for all fragments 189 * 190 * reassembly rule (p21): 191 * The Next Header field of the last header of the Unfragmentable 192 * Part is obtained from the Next Header field of the first 193 * fragment's Fragment header. 194 * -> should grab it from the first fragment only 195 * 196 * The following note also contradicts with fragment rule - no one is going to 197 * send different fragment with different next header field. 198 * 199 * additional note (p22): 200 * The Next Header values in the Fragment headers of different 201 * fragments of the same original packet may differ. Only the value 202 * from the Offset zero fragment packet is used for reassembly. 203 * -> should grab it from the first fragment only 204 * 205 * There is no explicit reason given in the RFC. Historical reason maybe? 206 */ 207 /* 208 * Fragment input 209 */ 210 int 211 frag6_input(struct mbuf **mp, int *offp, int proto) 212 { 213 struct mbuf *m = *mp, *t; 214 struct ip6_hdr *ip6; 215 struct ip6_frag *ip6f; 216 struct ip6q *head, *q6; 217 struct ip6asfrag *af6, *ip6af, *af6dwn; 218 struct in6_ifaddr *ia; 219 int offset = *offp, nxt, i, next; 220 int first_frag = 0; 221 int fragoff, frgpartlen; /* must be larger than u_int16_t */ 222 uint32_t hashkey[(sizeof(struct in6_addr) * 2 + 223 sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)]; 224 uint32_t hash, *hashkeyp; 225 struct ifnet *dstifp; 226 u_int8_t ecn, ecn0; 227 #ifdef RSS 228 struct m_tag *mtag; 229 struct ip6_direct_ctx *ip6dc; 230 #endif 231 232 #if 0 233 char ip6buf[INET6_ADDRSTRLEN]; 234 #endif 235 236 ip6 = mtod(m, struct ip6_hdr *); 237 #ifndef PULLDOWN_TEST 238 IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE); 239 ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); 240 #else 241 IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); 242 if (ip6f == NULL) 243 return (IPPROTO_DONE); 244 #endif 245 246 dstifp = NULL; 247 /* find the destination interface of the packet. */ 248 ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); 249 if (ia != NULL) { 250 dstifp = ia->ia_ifp; 251 ifa_free(&ia->ia_ifa); 252 } 253 /* jumbo payload can't contain a fragment header */ 254 if (ip6->ip6_plen == 0) { 255 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); 256 in6_ifstat_inc(dstifp, ifs6_reass_fail); 257 return IPPROTO_DONE; 258 } 259 260 /* 261 * check whether fragment packet's fragment length is 262 * multiple of 8 octets. 263 * sizeof(struct ip6_frag) == 8 264 * sizeof(struct ip6_hdr) = 40 265 */ 266 if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && 267 (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { 268 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 269 offsetof(struct ip6_hdr, ip6_plen)); 270 in6_ifstat_inc(dstifp, ifs6_reass_fail); 271 return IPPROTO_DONE; 272 } 273 274 IP6STAT_INC(ip6s_fragments); 275 in6_ifstat_inc(dstifp, ifs6_reass_reqd); 276 277 /* offset now points to data portion */ 278 offset += sizeof(struct ip6_frag); 279 280 /* 281 * Handle "atomic" fragments (offset and m bit set to 0) upfront, 282 * unrelated to any reassembly (see RFC 6946 and section 4.5 of RFC 283 * 8200). Just skip the fragment header. 284 */ 285 if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) { 286 IP6STAT_INC(ip6s_atomicfrags); 287 in6_ifstat_inc(dstifp, ifs6_reass_ok); 288 *offp = offset; 289 m->m_flags |= M_FRAGMENTED; 290 return (ip6f->ip6f_nxt); 291 } 292 293 /* Get fragment length and discard 0-byte fragments. */ 294 frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; 295 if (frgpartlen == 0) { 296 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 297 offsetof(struct ip6_hdr, ip6_plen)); 298 in6_ifstat_inc(dstifp, ifs6_reass_fail); 299 IP6STAT_INC(ip6s_fragdropped); 300 return IPPROTO_DONE; 301 } 302 303 hashkeyp = hashkey; 304 memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr)); 305 hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp); 306 memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr)); 307 hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp); 308 *hashkeyp = ip6f->ip6f_ident; 309 hash = jenkins_hash32(hashkey, nitems(hashkey), V_ip6q_hashseed); 310 hash &= IP6REASS_HMASK; 311 head = IP6Q_HEAD(hash); 312 IP6Q_LOCK(hash); 313 314 /* 315 * Enforce upper bound on number of fragments. 316 * If maxfrag is 0, never accept fragments. 317 * If maxfrag is -1, accept all fragments without limitation. 318 */ 319 if (ip6_maxfrags < 0) 320 ; 321 else if (atomic_load_int(&frag6_nfrags) >= (u_int)ip6_maxfrags) 322 goto dropfrag; 323 324 for (q6 = head->ip6q_next; q6 != head; q6 = q6->ip6q_next) 325 if (ip6f->ip6f_ident == q6->ip6q_ident && 326 IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && 327 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst) 328 #ifdef MAC 329 && mac_ip6q_match(m, q6) 330 #endif 331 ) 332 break; 333 334 if (q6 == head) { 335 /* 336 * the first fragment to arrive, create a reassembly queue. 337 */ 338 first_frag = 1; 339 340 /* 341 * Enforce upper bound on number of fragmented packets 342 * for which we attempt reassembly; 343 * If maxfragpackets is 0, never accept fragments. 344 * If maxfragpackets is -1, accept all fragments without 345 * limitation. 346 */ 347 if (V_ip6_maxfragpackets < 0) 348 ; 349 else if (V_ip6q[hash].count >= V_ip6_maxfragbucketsize || 350 atomic_load_int(&V_frag6_nfragpackets) >= 351 (u_int)V_ip6_maxfragpackets) 352 goto dropfrag; 353 atomic_add_int(&V_frag6_nfragpackets, 1); 354 q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE, 355 M_NOWAIT); 356 if (q6 == NULL) 357 goto dropfrag; 358 bzero(q6, sizeof(*q6)); 359 #ifdef MAC 360 if (mac_ip6q_init(q6, M_NOWAIT) != 0) { 361 free(q6, M_FTABLE); 362 goto dropfrag; 363 } 364 mac_ip6q_create(m, q6); 365 #endif 366 frag6_insque_head(q6, head, hash); 367 368 /* ip6q_nxt will be filled afterwards, from 1st fragment */ 369 q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; 370 #ifdef notyet 371 q6->ip6q_nxtp = (u_char *)nxtp; 372 #endif 373 q6->ip6q_ident = ip6f->ip6f_ident; 374 q6->ip6q_ttl = IPV6_FRAGTTL; 375 q6->ip6q_src = ip6->ip6_src; 376 q6->ip6q_dst = ip6->ip6_dst; 377 q6->ip6q_ecn = 378 (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 379 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ 380 381 q6->ip6q_nfrag = 0; 382 } 383 384 /* 385 * If it's the 1st fragment, record the length of the 386 * unfragmentable part and the next header of the fragment header. 387 */ 388 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); 389 if (fragoff == 0) { 390 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - 391 sizeof(struct ip6_frag); 392 q6->ip6q_nxt = ip6f->ip6f_nxt; 393 } 394 395 /* 396 * Check that the reassembled packet would not exceed 65535 bytes 397 * in size. 398 * If it would exceed, discard the fragment and return an ICMP error. 399 */ 400 if (q6->ip6q_unfrglen >= 0) { 401 /* The 1st fragment has already arrived. */ 402 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { 403 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 404 offset - sizeof(struct ip6_frag) + 405 offsetof(struct ip6_frag, ip6f_offlg)); 406 IP6Q_UNLOCK(hash); 407 return (IPPROTO_DONE); 408 } 409 } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { 410 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 411 offset - sizeof(struct ip6_frag) + 412 offsetof(struct ip6_frag, ip6f_offlg)); 413 IP6Q_UNLOCK(hash); 414 return (IPPROTO_DONE); 415 } 416 /* 417 * If it's the first fragment, do the above check for each 418 * fragment already stored in the reassembly queue. 419 */ 420 if (fragoff == 0) { 421 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 422 af6 = af6dwn) { 423 af6dwn = af6->ip6af_down; 424 425 if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen > 426 IPV6_MAXPACKET) { 427 struct mbuf *merr = IP6_REASS_MBUF(af6); 428 struct ip6_hdr *ip6err; 429 int erroff = af6->ip6af_offset; 430 431 /* dequeue the fragment. */ 432 frag6_deq(af6, hash); 433 free(af6, M_FTABLE); 434 435 /* adjust pointer. */ 436 ip6err = mtod(merr, struct ip6_hdr *); 437 438 /* 439 * Restore source and destination addresses 440 * in the erroneous IPv6 header. 441 */ 442 ip6err->ip6_src = q6->ip6q_src; 443 ip6err->ip6_dst = q6->ip6q_dst; 444 445 icmp6_error(merr, ICMP6_PARAM_PROB, 446 ICMP6_PARAMPROB_HEADER, 447 erroff - sizeof(struct ip6_frag) + 448 offsetof(struct ip6_frag, ip6f_offlg)); 449 } 450 } 451 } 452 453 ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE, 454 M_NOWAIT); 455 if (ip6af == NULL) 456 goto dropfrag; 457 bzero(ip6af, sizeof(*ip6af)); 458 ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; 459 ip6af->ip6af_off = fragoff; 460 ip6af->ip6af_frglen = frgpartlen; 461 ip6af->ip6af_offset = offset; 462 IP6_REASS_MBUF(ip6af) = m; 463 464 if (first_frag) { 465 af6 = (struct ip6asfrag *)q6; 466 goto insert; 467 } 468 469 /* 470 * Handle ECN by comparing this segment with the first one; 471 * if CE is set, do not lose CE. 472 * drop if CE and not-ECT are mixed for the same packet. 473 */ 474 ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 475 ecn0 = q6->ip6q_ecn; 476 if (ecn == IPTOS_ECN_CE) { 477 if (ecn0 == IPTOS_ECN_NOTECT) { 478 free(ip6af, M_FTABLE); 479 goto dropfrag; 480 } 481 if (ecn0 != IPTOS_ECN_CE) 482 q6->ip6q_ecn = IPTOS_ECN_CE; 483 } 484 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { 485 free(ip6af, M_FTABLE); 486 goto dropfrag; 487 } 488 489 /* 490 * Find a segment which begins after this one does. 491 */ 492 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 493 af6 = af6->ip6af_down) 494 if (af6->ip6af_off > ip6af->ip6af_off) 495 break; 496 497 #if 0 498 /* 499 * If there is a preceding segment, it may provide some of 500 * our data already. If so, drop the data from the incoming 501 * segment. If it provides all of our data, drop us. 502 */ 503 if (af6->ip6af_up != (struct ip6asfrag *)q6) { 504 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen 505 - ip6af->ip6af_off; 506 if (i > 0) { 507 if (i >= ip6af->ip6af_frglen) 508 goto dropfrag; 509 m_adj(IP6_REASS_MBUF(ip6af), i); 510 ip6af->ip6af_off += i; 511 ip6af->ip6af_frglen -= i; 512 } 513 } 514 515 /* 516 * While we overlap succeeding segments trim them or, 517 * if they are completely covered, dequeue them. 518 */ 519 while (af6 != (struct ip6asfrag *)q6 && 520 ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) { 521 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; 522 if (i < af6->ip6af_frglen) { 523 af6->ip6af_frglen -= i; 524 af6->ip6af_off += i; 525 m_adj(IP6_REASS_MBUF(af6), i); 526 break; 527 } 528 af6 = af6->ip6af_down; 529 m_freem(IP6_REASS_MBUF(af6->ip6af_up)); 530 frag6_deq(af6->ip6af_up, hash); 531 } 532 #else 533 /* 534 * If the incoming framgent overlaps some existing fragments in 535 * the reassembly queue, drop it, since it is dangerous to override 536 * existing fragments from a security point of view. 537 * We don't know which fragment is the bad guy - here we trust 538 * fragment that came in earlier, with no real reason. 539 * 540 * Note: due to changes after disabling this part, mbuf passed to 541 * m_adj() below now does not meet the requirement. 542 */ 543 if (af6->ip6af_up != (struct ip6asfrag *)q6) { 544 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen 545 - ip6af->ip6af_off; 546 if (i > 0) { 547 #if 0 /* suppress the noisy log */ 548 log(LOG_ERR, "%d bytes of a fragment from %s " 549 "overlaps the previous fragment\n", 550 i, ip6_sprintf(ip6buf, &q6->ip6q_src)); 551 #endif 552 free(ip6af, M_FTABLE); 553 goto dropfrag; 554 } 555 } 556 if (af6 != (struct ip6asfrag *)q6) { 557 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; 558 if (i > 0) { 559 #if 0 /* suppress the noisy log */ 560 log(LOG_ERR, "%d bytes of a fragment from %s " 561 "overlaps the succeeding fragment", 562 i, ip6_sprintf(ip6buf, &q6->ip6q_src)); 563 #endif 564 free(ip6af, M_FTABLE); 565 goto dropfrag; 566 } 567 } 568 #endif 569 570 insert: 571 #ifdef MAC 572 if (!first_frag) 573 mac_ip6q_update(m, q6); 574 #endif 575 576 /* 577 * Stick new segment in its place; 578 * check for complete reassembly. 579 * If not complete, check fragment limit. 580 * Move to front of packet queue, as we are 581 * the most recently active fragmented packet. 582 */ 583 frag6_enq(ip6af, af6->ip6af_up, hash); 584 atomic_add_int(&frag6_nfrags, 1); 585 q6->ip6q_nfrag++; 586 #if 0 /* xxx */ 587 if (q6 != head->ip6q_next) { 588 frag6_remque(q6, hash); 589 frag6_insque_head(q6, head, hash); 590 } 591 #endif 592 next = 0; 593 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 594 af6 = af6->ip6af_down) { 595 if (af6->ip6af_off != next) { 596 if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) { 597 IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag); 598 frag6_freef(q6, hash); 599 } 600 IP6Q_UNLOCK(hash); 601 return IPPROTO_DONE; 602 } 603 next += af6->ip6af_frglen; 604 } 605 if (af6->ip6af_up->ip6af_mff) { 606 if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) { 607 IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag); 608 frag6_freef(q6, hash); 609 } 610 IP6Q_UNLOCK(hash); 611 return IPPROTO_DONE; 612 } 613 614 /* 615 * Reassembly is complete; concatenate fragments. 616 */ 617 ip6af = q6->ip6q_down; 618 t = m = IP6_REASS_MBUF(ip6af); 619 af6 = ip6af->ip6af_down; 620 frag6_deq(ip6af, hash); 621 while (af6 != (struct ip6asfrag *)q6) { 622 m->m_pkthdr.csum_flags &= 623 IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags; 624 m->m_pkthdr.csum_data += 625 IP6_REASS_MBUF(af6)->m_pkthdr.csum_data; 626 627 af6dwn = af6->ip6af_down; 628 frag6_deq(af6, hash); 629 while (t->m_next) 630 t = t->m_next; 631 m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset); 632 m_demote_pkthdr(IP6_REASS_MBUF(af6)); 633 m_cat(t, IP6_REASS_MBUF(af6)); 634 free(af6, M_FTABLE); 635 af6 = af6dwn; 636 } 637 638 while (m->m_pkthdr.csum_data & 0xffff0000) 639 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + 640 (m->m_pkthdr.csum_data >> 16); 641 642 /* adjust offset to point where the original next header starts */ 643 offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); 644 free(ip6af, M_FTABLE); 645 ip6 = mtod(m, struct ip6_hdr *); 646 ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); 647 if (q6->ip6q_ecn == IPTOS_ECN_CE) 648 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); 649 nxt = q6->ip6q_nxt; 650 #ifdef notyet 651 *q6->ip6q_nxtp = (u_char)(nxt & 0xff); 652 #endif 653 654 if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) { 655 frag6_remque(q6, hash); 656 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); 657 #ifdef MAC 658 mac_ip6q_destroy(q6); 659 #endif 660 free(q6, M_FTABLE); 661 atomic_subtract_int(&V_frag6_nfragpackets, 1); 662 663 goto dropfrag; 664 } 665 666 /* 667 * Store NXT to the original. 668 */ 669 m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t), 670 (caddr_t)&nxt); 671 672 frag6_remque(q6, hash); 673 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); 674 #ifdef MAC 675 mac_ip6q_reassemble(q6, m); 676 mac_ip6q_destroy(q6); 677 #endif 678 free(q6, M_FTABLE); 679 atomic_subtract_int(&V_frag6_nfragpackets, 1); 680 681 if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ 682 int plen = 0; 683 for (t = m; t; t = t->m_next) 684 plen += t->m_len; 685 m->m_pkthdr.len = plen; 686 } 687 688 #ifdef RSS 689 mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc), 690 M_NOWAIT); 691 if (mtag == NULL) 692 goto dropfrag; 693 694 ip6dc = (struct ip6_direct_ctx *)(mtag + 1); 695 ip6dc->ip6dc_nxt = nxt; 696 ip6dc->ip6dc_off = offset; 697 698 m_tag_prepend(m, mtag); 699 #endif 700 701 IP6Q_UNLOCK(hash); 702 IP6STAT_INC(ip6s_reassembled); 703 in6_ifstat_inc(dstifp, ifs6_reass_ok); 704 705 #ifdef RSS 706 /* 707 * Queue/dispatch for reprocessing. 708 */ 709 netisr_dispatch(NETISR_IPV6_DIRECT, m); 710 return IPPROTO_DONE; 711 #endif 712 713 /* 714 * Tell launch routine the next header 715 */ 716 717 *mp = m; 718 *offp = offset; 719 720 return nxt; 721 722 dropfrag: 723 IP6Q_UNLOCK(hash); 724 in6_ifstat_inc(dstifp, ifs6_reass_fail); 725 IP6STAT_INC(ip6s_fragdropped); 726 m_freem(m); 727 return IPPROTO_DONE; 728 } 729 730 /* 731 * Free a fragment reassembly header and all 732 * associated datagrams. 733 */ 734 static void 735 frag6_freef(struct ip6q *q6, uint32_t bucket) 736 { 737 struct ip6asfrag *af6, *down6; 738 739 IP6Q_LOCK_ASSERT(bucket); 740 741 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 742 af6 = down6) { 743 struct mbuf *m = IP6_REASS_MBUF(af6); 744 745 down6 = af6->ip6af_down; 746 frag6_deq(af6, bucket); 747 748 /* 749 * Return ICMP time exceeded error for the 1st fragment. 750 * Just free other fragments. 751 */ 752 if (af6->ip6af_off == 0) { 753 struct ip6_hdr *ip6; 754 755 /* adjust pointer */ 756 ip6 = mtod(m, struct ip6_hdr *); 757 758 /* restore source and destination addresses */ 759 ip6->ip6_src = q6->ip6q_src; 760 ip6->ip6_dst = q6->ip6q_dst; 761 762 icmp6_error(m, ICMP6_TIME_EXCEEDED, 763 ICMP6_TIME_EXCEED_REASSEMBLY, 0); 764 } else 765 m_freem(m); 766 free(af6, M_FTABLE); 767 } 768 frag6_remque(q6, bucket); 769 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); 770 #ifdef MAC 771 mac_ip6q_destroy(q6); 772 #endif 773 free(q6, M_FTABLE); 774 atomic_subtract_int(&V_frag6_nfragpackets, 1); 775 } 776 777 /* 778 * Put an ip fragment on a reassembly chain. 779 * Like insque, but pointers in middle of structure. 780 */ 781 static void 782 frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6, 783 uint32_t bucket __unused) 784 { 785 786 IP6Q_LOCK_ASSERT(bucket); 787 788 af6->ip6af_up = up6; 789 af6->ip6af_down = up6->ip6af_down; 790 up6->ip6af_down->ip6af_up = af6; 791 up6->ip6af_down = af6; 792 } 793 794 /* 795 * To frag6_enq as remque is to insque. 796 */ 797 static void 798 frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused) 799 { 800 801 IP6Q_LOCK_ASSERT(bucket); 802 803 af6->ip6af_up->ip6af_down = af6->ip6af_down; 804 af6->ip6af_down->ip6af_up = af6->ip6af_up; 805 } 806 807 static void 808 frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket) 809 { 810 811 IP6Q_LOCK_ASSERT(bucket); 812 KASSERT(IP6Q_HEAD(bucket) == old, 813 ("%s: attempt to insert at head of wrong bucket" 814 " (bucket=%u, old=%p)", __func__, bucket, old)); 815 816 new->ip6q_prev = old; 817 new->ip6q_next = old->ip6q_next; 818 old->ip6q_next->ip6q_prev= new; 819 old->ip6q_next = new; 820 V_ip6q[bucket].count++; 821 } 822 823 static void 824 frag6_remque(struct ip6q *p6, uint32_t bucket) 825 { 826 827 IP6Q_LOCK_ASSERT(bucket); 828 829 p6->ip6q_prev->ip6q_next = p6->ip6q_next; 830 p6->ip6q_next->ip6q_prev = p6->ip6q_prev; 831 V_ip6q[bucket].count--; 832 } 833 834 /* 835 * IPv6 reassembling timer processing; 836 * if a timer expires on a reassembly 837 * queue, discard it. 838 */ 839 void 840 frag6_slowtimo(void) 841 { 842 VNET_ITERATOR_DECL(vnet_iter); 843 struct ip6q *head, *q6; 844 int i; 845 846 VNET_LIST_RLOCK_NOSLEEP(); 847 VNET_FOREACH(vnet_iter) { 848 CURVNET_SET(vnet_iter); 849 for (i = 0; i < IP6REASS_NHASH; i++) { 850 IP6Q_LOCK(i); 851 head = IP6Q_HEAD(i); 852 q6 = head->ip6q_next; 853 if (q6 == NULL) { 854 /* 855 * XXXJTL: This should never happen. This 856 * should turn into an assertion. 857 */ 858 IP6Q_UNLOCK(i); 859 continue; 860 } 861 while (q6 != head) { 862 --q6->ip6q_ttl; 863 q6 = q6->ip6q_next; 864 if (q6->ip6q_prev->ip6q_ttl == 0) { 865 IP6STAT_ADD(ip6s_fragtimeout, 866 q6->ip6q_prev->ip6q_nfrag); 867 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 868 frag6_freef(q6->ip6q_prev, i); 869 } 870 } 871 /* 872 * If we are over the maximum number of fragments 873 * (due to the limit being lowered), drain off 874 * enough to get down to the new limit. 875 * Note that we drain all reassembly queues if 876 * maxfragpackets is 0 (fragmentation is disabled), 877 * and don't enforce a limit when maxfragpackets 878 * is negative. 879 */ 880 while ((V_ip6_maxfragpackets == 0 || 881 (V_ip6_maxfragpackets > 0 && 882 V_ip6q[i].count > V_ip6_maxfragbucketsize)) && 883 head->ip6q_prev != head) { 884 IP6STAT_ADD(ip6s_fragoverflow, 885 q6->ip6q_prev->ip6q_nfrag); 886 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 887 frag6_freef(head->ip6q_prev, i); 888 } 889 IP6Q_UNLOCK(i); 890 } 891 /* 892 * If we are still over the maximum number of fragmented 893 * packets, drain off enough to get down to the new limit. 894 */ 895 i = 0; 896 while (V_ip6_maxfragpackets >= 0 && 897 atomic_load_int(&V_frag6_nfragpackets) > 898 (u_int)V_ip6_maxfragpackets) { 899 IP6Q_LOCK(i); 900 head = IP6Q_HEAD(i); 901 if (head->ip6q_prev != head) { 902 IP6STAT_ADD(ip6s_fragoverflow, 903 q6->ip6q_prev->ip6q_nfrag); 904 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 905 frag6_freef(head->ip6q_prev, i); 906 } 907 IP6Q_UNLOCK(i); 908 i = (i + 1) % IP6REASS_NHASH; 909 } 910 CURVNET_RESTORE(); 911 } 912 VNET_LIST_RUNLOCK_NOSLEEP(); 913 } 914 915 /* 916 * Drain off all datagram fragments. 917 */ 918 void 919 frag6_drain(void) 920 { 921 VNET_ITERATOR_DECL(vnet_iter); 922 struct ip6q *head; 923 int i; 924 925 VNET_LIST_RLOCK_NOSLEEP(); 926 VNET_FOREACH(vnet_iter) { 927 CURVNET_SET(vnet_iter); 928 for (i = 0; i < IP6REASS_NHASH; i++) { 929 if (IP6Q_TRYLOCK(i) == 0) 930 continue; 931 head = IP6Q_HEAD(i); 932 while (head->ip6q_next != head) { 933 IP6STAT_INC(ip6s_fragdropped); 934 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 935 frag6_freef(head->ip6q_next, i); 936 } 937 IP6Q_UNLOCK(i); 938 } 939 CURVNET_RESTORE(); 940 } 941 VNET_LIST_RUNLOCK_NOSLEEP(); 942 } 943 944 int 945 ip6_deletefraghdr(struct mbuf *m, int offset, int wait) 946 { 947 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 948 struct mbuf *t; 949 950 /* Delete frag6 header. */ 951 if (m->m_len >= offset + sizeof(struct ip6_frag)) { 952 /* This is the only possible case with !PULLDOWN_TEST. */ 953 bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag), 954 offset); 955 m->m_data += sizeof(struct ip6_frag); 956 m->m_len -= sizeof(struct ip6_frag); 957 } else { 958 /* This comes with no copy if the boundary is on cluster. */ 959 if ((t = m_split(m, offset, wait)) == NULL) 960 return (ENOMEM); 961 m_adj(t, sizeof(struct ip6_frag)); 962 m_cat(m, t); 963 } 964 965 m->m_flags |= M_FRAGMENTED; 966 return (0); 967 } 968