1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * $KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $ 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_rss.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/domain.h> 42 #include <sys/eventhandler.h> 43 #include <sys/hash.h> 44 #include <sys/kernel.h> 45 #include <sys/malloc.h> 46 #include <sys/mbuf.h> 47 #include <sys/protosw.h> 48 #include <sys/socket.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/netisr.h> 55 #include <net/route.h> 56 #include <net/vnet.h> 57 58 #include <netinet/in.h> 59 #include <netinet/in_var.h> 60 #include <netinet/ip6.h> 61 #include <netinet6/ip6_var.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/in_systm.h> /* for ECN definitions */ 64 #include <netinet/ip.h> /* for ECN definitions */ 65 66 #ifdef MAC 67 #include <security/mac/mac_framework.h> 68 #endif 69 70 /* 71 * Reassembly headers are stored in hash buckets. 72 */ 73 #define IP6REASS_NHASH_LOG2 10 74 #define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2) 75 #define IP6REASS_HMASK (IP6REASS_NHASH - 1) 76 77 static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *, 78 uint32_t bucket __unused); 79 static void frag6_deq(struct ip6asfrag *, uint32_t bucket __unused); 80 static void frag6_insque_head(struct ip6q *, struct ip6q *, 81 uint32_t bucket); 82 static void frag6_remque(struct ip6q *, uint32_t bucket); 83 static void frag6_freef(struct ip6q *, uint32_t bucket); 84 85 struct ip6qbucket { 86 struct ip6q ip6q; 87 struct mtx lock; 88 int count; 89 }; 90 91 static MALLOC_DEFINE(M_FRAG6, "frag6", "IPv6 fragment reassembly header"); 92 93 /* System wide (global) maximum and count of packets in reassembly queues. */ 94 static int ip6_maxfrags; 95 static volatile u_int frag6_nfrags = 0; 96 97 /* Maximum and current packets in per-VNET reassembly queue. */ 98 VNET_DEFINE_STATIC(int, ip6_maxfragpackets); 99 VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets); 100 #define V_ip6_maxfragpackets VNET(ip6_maxfragpackets) 101 #define V_frag6_nfragpackets VNET(frag6_nfragpackets) 102 103 /* Maximum per-VNET reassembly queues per bucket and fragments per packet. */ 104 VNET_DEFINE_STATIC(int, ip6_maxfragbucketsize); 105 VNET_DEFINE_STATIC(int, ip6_maxfragsperpacket); 106 #define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize) 107 #define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket) 108 109 /* Per-VNET reassembly queue buckets. */ 110 VNET_DEFINE_STATIC(struct ip6qbucket, ip6qb[IP6REASS_NHASH]); 111 VNET_DEFINE_STATIC(uint32_t, ip6qb_hashseed); 112 #define V_ip6qb VNET(ip6qb) 113 #define V_ip6qb_hashseed VNET(ip6qb_hashseed) 114 115 #define IP6QB_LOCK(_b) mtx_lock(&V_ip6qb[(_b)].lock) 116 #define IP6QB_TRYLOCK(_b) mtx_trylock(&V_ip6qb[(_b)].lock) 117 #define IP6QB_LOCK_ASSERT(_b) mtx_assert(&V_ip6qb[(_b)].lock, MA_OWNED) 118 #define IP6QB_UNLOCK(_b) mtx_unlock(&V_ip6qb[(_b)].lock) 119 #define IP6QB_HEAD(_b) (&V_ip6qb[(_b)].ip6q) 120 121 /* 122 * By default, limit the number of IP6 fragments across all reassembly 123 * queues to 1/32 of the total number of mbuf clusters. 124 * 125 * Limit the total number of reassembly queues per VNET to the 126 * IP6 fragment limit, but ensure the limit will not allow any bucket 127 * to grow above 100 items. (The bucket limit is 128 * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct 129 * multiplier to reach a 100-item limit.) 130 * The 100-item limit was chosen as brief testing seems to show that 131 * this produces "reasonable" performance on some subset of systems 132 * under DoS attack. 133 */ 134 #define IP6_MAXFRAGS (nmbclusters / 32) 135 #define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50)) 136 137 138 /* 139 * Sysctls and helper function. 140 */ 141 SYSCTL_DECL(_net_inet6_ip6); 142 143 static void 144 frag6_set_bucketsize(void) 145 { 146 int i; 147 148 if ((i = V_ip6_maxfragpackets) > 0) 149 V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1); 150 } 151 152 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, 153 CTLFLAG_RW, &ip6_maxfrags, 0, 154 "Maximum allowed number of outstanding IPv6 packet fragments. " 155 "A value of 0 means no fragmented packets will be accepted, while a " 156 "a value of -1 means no limit"); 157 158 static int 159 sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS) 160 { 161 int error, val; 162 163 val = V_ip6_maxfragpackets; 164 error = sysctl_handle_int(oidp, &val, 0, req); 165 if (error != 0 || !req->newptr) 166 return (error); 167 V_ip6_maxfragpackets = val; 168 frag6_set_bucketsize(); 169 return (0); 170 } 171 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, 172 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0, 173 sysctl_ip6_maxfragpackets, "I", 174 "Default maximum number of outstanding fragmented IPv6 packets. " 175 "A value of 0 means no fragmented packets will be accepted, while a " 176 "a value of -1 means no limit"); 177 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket, 178 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0, 179 "Maximum allowed number of fragments per packet"); 180 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize, 181 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0, 182 "Maximum number of reassembly queues per hash bucket"); 183 184 185 /* 186 * Remove the IPv6 fragmentation header from the mbuf. 187 */ 188 int 189 ip6_deletefraghdr(struct mbuf *m, int offset, int wait) 190 { 191 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 192 struct mbuf *t; 193 194 /* Delete frag6 header. */ 195 if (m->m_len >= offset + sizeof(struct ip6_frag)) { 196 /* This is the only possible case with !PULLDOWN_TEST. */ 197 bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag), 198 offset); 199 m->m_data += sizeof(struct ip6_frag); 200 m->m_len -= sizeof(struct ip6_frag); 201 } else { 202 /* This comes with no copy if the boundary is on cluster. */ 203 if ((t = m_split(m, offset, wait)) == NULL) 204 return (ENOMEM); 205 m_adj(t, sizeof(struct ip6_frag)); 206 m_cat(m, t); 207 } 208 209 m->m_flags |= M_FRAGMENTED; 210 return (0); 211 } 212 213 /* 214 * Free a fragment reassembly header and all 215 * associated datagrams. 216 */ 217 static void 218 frag6_freef(struct ip6q *q6, uint32_t bucket) 219 { 220 struct ip6asfrag *af6, *down6; 221 222 IP6QB_LOCK_ASSERT(bucket); 223 224 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 225 af6 = down6) { 226 struct mbuf *m = IP6_REASS_MBUF(af6); 227 228 down6 = af6->ip6af_down; 229 frag6_deq(af6, bucket); 230 231 /* 232 * Return ICMP time exceeded error for the 1st fragment. 233 * Just free other fragments. 234 */ 235 if (af6->ip6af_off == 0) { 236 struct ip6_hdr *ip6; 237 238 /* adjust pointer */ 239 ip6 = mtod(m, struct ip6_hdr *); 240 241 /* restore source and destination addresses */ 242 ip6->ip6_src = q6->ip6q_src; 243 ip6->ip6_dst = q6->ip6q_dst; 244 245 icmp6_error(m, ICMP6_TIME_EXCEEDED, 246 ICMP6_TIME_EXCEED_REASSEMBLY, 0); 247 } else 248 m_freem(m); 249 free(af6, M_FRAG6); 250 } 251 frag6_remque(q6, bucket); 252 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); 253 #ifdef MAC 254 mac_ip6q_destroy(q6); 255 #endif 256 free(q6, M_FRAG6); 257 atomic_subtract_int(&V_frag6_nfragpackets, 1); 258 } 259 260 /* 261 * In RFC2460, fragment and reassembly rule do not agree with each other, 262 * in terms of next header field handling in fragment header. 263 * While the sender will use the same value for all of the fragmented packets, 264 * receiver is suggested not to check the consistency. 265 * 266 * fragment rule (p20): 267 * (2) A Fragment header containing: 268 * The Next Header value that identifies the first header of 269 * the Fragmentable Part of the original packet. 270 * -> next header field is same for all fragments 271 * 272 * reassembly rule (p21): 273 * The Next Header field of the last header of the Unfragmentable 274 * Part is obtained from the Next Header field of the first 275 * fragment's Fragment header. 276 * -> should grab it from the first fragment only 277 * 278 * The following note also contradicts with fragment rule - no one is going to 279 * send different fragment with different next header field. 280 * 281 * additional note (p22): 282 * The Next Header values in the Fragment headers of different 283 * fragments of the same original packet may differ. Only the value 284 * from the Offset zero fragment packet is used for reassembly. 285 * -> should grab it from the first fragment only 286 * 287 * There is no explicit reason given in the RFC. Historical reason maybe? 288 */ 289 /* 290 * Fragment input 291 */ 292 int 293 frag6_input(struct mbuf **mp, int *offp, int proto) 294 { 295 struct mbuf *m = *mp, *t; 296 struct ip6_hdr *ip6; 297 struct ip6_frag *ip6f; 298 struct ip6q *head, *q6; 299 struct ip6asfrag *af6, *ip6af, *af6dwn; 300 struct in6_ifaddr *ia; 301 int offset = *offp, nxt, i, next; 302 int first_frag = 0; 303 int fragoff, frgpartlen; /* must be larger than u_int16_t */ 304 uint32_t hashkey[(sizeof(struct in6_addr) * 2 + 305 sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)]; 306 uint32_t bucket, *hashkeyp; 307 struct ifnet *dstifp; 308 u_int8_t ecn, ecn0; 309 #ifdef RSS 310 struct m_tag *mtag; 311 struct ip6_direct_ctx *ip6dc; 312 #endif 313 314 ip6 = mtod(m, struct ip6_hdr *); 315 #ifndef PULLDOWN_TEST 316 IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE); 317 ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); 318 #else 319 IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); 320 if (ip6f == NULL) 321 return (IPPROTO_DONE); 322 #endif 323 324 dstifp = NULL; 325 /* find the destination interface of the packet. */ 326 ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); 327 if (ia != NULL) { 328 dstifp = ia->ia_ifp; 329 ifa_free(&ia->ia_ifa); 330 } 331 /* jumbo payload can't contain a fragment header */ 332 if (ip6->ip6_plen == 0) { 333 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); 334 in6_ifstat_inc(dstifp, ifs6_reass_fail); 335 return IPPROTO_DONE; 336 } 337 338 /* 339 * check whether fragment packet's fragment length is 340 * multiple of 8 octets. 341 * sizeof(struct ip6_frag) == 8 342 * sizeof(struct ip6_hdr) = 40 343 */ 344 if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && 345 (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { 346 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 347 offsetof(struct ip6_hdr, ip6_plen)); 348 in6_ifstat_inc(dstifp, ifs6_reass_fail); 349 return IPPROTO_DONE; 350 } 351 352 IP6STAT_INC(ip6s_fragments); 353 in6_ifstat_inc(dstifp, ifs6_reass_reqd); 354 355 /* offset now points to data portion */ 356 offset += sizeof(struct ip6_frag); 357 358 /* 359 * Handle "atomic" fragments (offset and m bit set to 0) upfront, 360 * unrelated to any reassembly (see RFC 6946 and section 4.5 of RFC 361 * 8200). Just skip the fragment header. 362 */ 363 if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) { 364 IP6STAT_INC(ip6s_atomicfrags); 365 in6_ifstat_inc(dstifp, ifs6_reass_ok); 366 *offp = offset; 367 m->m_flags |= M_FRAGMENTED; 368 return (ip6f->ip6f_nxt); 369 } 370 371 /* Get fragment length and discard 0-byte fragments. */ 372 frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; 373 if (frgpartlen == 0) { 374 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 375 offsetof(struct ip6_hdr, ip6_plen)); 376 in6_ifstat_inc(dstifp, ifs6_reass_fail); 377 IP6STAT_INC(ip6s_fragdropped); 378 return IPPROTO_DONE; 379 } 380 381 hashkeyp = hashkey; 382 memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr)); 383 hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp); 384 memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr)); 385 hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp); 386 *hashkeyp = ip6f->ip6f_ident; 387 bucket = jenkins_hash32(hashkey, nitems(hashkey), V_ip6qb_hashseed); 388 bucket &= IP6REASS_HMASK; 389 head = IP6QB_HEAD(bucket); 390 IP6QB_LOCK(bucket); 391 392 /* 393 * Enforce upper bound on number of fragments. 394 * If maxfrag is 0, never accept fragments. 395 * If maxfrag is -1, accept all fragments without limitation. 396 */ 397 if (ip6_maxfrags < 0) 398 ; 399 else if (atomic_load_int(&frag6_nfrags) >= (u_int)ip6_maxfrags) 400 goto dropfrag; 401 402 for (q6 = head->ip6q_next; q6 != head; q6 = q6->ip6q_next) 403 if (ip6f->ip6f_ident == q6->ip6q_ident && 404 IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && 405 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst) 406 #ifdef MAC 407 && mac_ip6q_match(m, q6) 408 #endif 409 ) 410 break; 411 412 if (q6 == head) { 413 /* 414 * the first fragment to arrive, create a reassembly queue. 415 */ 416 first_frag = 1; 417 418 /* 419 * Enforce upper bound on number of fragmented packets 420 * for which we attempt reassembly; 421 * If maxfragpackets is 0, never accept fragments. 422 * If maxfragpackets is -1, accept all fragments without 423 * limitation. 424 */ 425 if (V_ip6_maxfragpackets < 0) 426 ; 427 else if (V_ip6qb[bucket].count >= V_ip6_maxfragbucketsize || 428 atomic_load_int(&V_frag6_nfragpackets) >= 429 (u_int)V_ip6_maxfragpackets) 430 goto dropfrag; 431 atomic_add_int(&V_frag6_nfragpackets, 1); 432 q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FRAG6, 433 M_NOWAIT | M_ZERO); 434 if (q6 == NULL) 435 goto dropfrag; 436 #ifdef MAC 437 if (mac_ip6q_init(q6, M_NOWAIT) != 0) { 438 free(q6, M_FRAG6); 439 goto dropfrag; 440 } 441 mac_ip6q_create(m, q6); 442 #endif 443 frag6_insque_head(q6, head, bucket); 444 445 /* ip6q_nxt will be filled afterwards, from 1st fragment */ 446 q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; 447 #ifdef notyet 448 q6->ip6q_nxtp = (u_char *)nxtp; 449 #endif 450 q6->ip6q_ident = ip6f->ip6f_ident; 451 q6->ip6q_ttl = IPV6_FRAGTTL; 452 q6->ip6q_src = ip6->ip6_src; 453 q6->ip6q_dst = ip6->ip6_dst; 454 q6->ip6q_ecn = 455 (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 456 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ 457 458 q6->ip6q_nfrag = 0; 459 } 460 461 /* 462 * If it's the 1st fragment, record the length of the 463 * unfragmentable part and the next header of the fragment header. 464 */ 465 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); 466 if (fragoff == 0) { 467 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - 468 sizeof(struct ip6_frag); 469 q6->ip6q_nxt = ip6f->ip6f_nxt; 470 } 471 472 /* 473 * Check that the reassembled packet would not exceed 65535 bytes 474 * in size. 475 * If it would exceed, discard the fragment and return an ICMP error. 476 */ 477 if (q6->ip6q_unfrglen >= 0) { 478 /* The 1st fragment has already arrived. */ 479 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { 480 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 481 offset - sizeof(struct ip6_frag) + 482 offsetof(struct ip6_frag, ip6f_offlg)); 483 IP6QB_UNLOCK(bucket); 484 return (IPPROTO_DONE); 485 } 486 } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { 487 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 488 offset - sizeof(struct ip6_frag) + 489 offsetof(struct ip6_frag, ip6f_offlg)); 490 IP6QB_UNLOCK(bucket); 491 return (IPPROTO_DONE); 492 } 493 /* 494 * If it's the first fragment, do the above check for each 495 * fragment already stored in the reassembly queue. 496 */ 497 if (fragoff == 0) { 498 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 499 af6 = af6dwn) { 500 af6dwn = af6->ip6af_down; 501 502 if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen > 503 IPV6_MAXPACKET) { 504 struct mbuf *merr = IP6_REASS_MBUF(af6); 505 struct ip6_hdr *ip6err; 506 int erroff = af6->ip6af_offset; 507 508 /* dequeue the fragment. */ 509 frag6_deq(af6, bucket); 510 free(af6, M_FRAG6); 511 512 /* adjust pointer. */ 513 ip6err = mtod(merr, struct ip6_hdr *); 514 515 /* 516 * Restore source and destination addresses 517 * in the erroneous IPv6 header. 518 */ 519 ip6err->ip6_src = q6->ip6q_src; 520 ip6err->ip6_dst = q6->ip6q_dst; 521 522 icmp6_error(merr, ICMP6_PARAM_PROB, 523 ICMP6_PARAMPROB_HEADER, 524 erroff - sizeof(struct ip6_frag) + 525 offsetof(struct ip6_frag, ip6f_offlg)); 526 } 527 } 528 } 529 530 ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FRAG6, 531 M_NOWAIT | M_ZERO); 532 if (ip6af == NULL) 533 goto dropfrag; 534 ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; 535 ip6af->ip6af_off = fragoff; 536 ip6af->ip6af_frglen = frgpartlen; 537 ip6af->ip6af_offset = offset; 538 IP6_REASS_MBUF(ip6af) = m; 539 540 if (first_frag) { 541 af6 = (struct ip6asfrag *)q6; 542 goto insert; 543 } 544 545 /* 546 * Handle ECN by comparing this segment with the first one; 547 * if CE is set, do not lose CE. 548 * drop if CE and not-ECT are mixed for the same packet. 549 */ 550 ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 551 ecn0 = q6->ip6q_ecn; 552 if (ecn == IPTOS_ECN_CE) { 553 if (ecn0 == IPTOS_ECN_NOTECT) { 554 free(ip6af, M_FRAG6); 555 goto dropfrag; 556 } 557 if (ecn0 != IPTOS_ECN_CE) 558 q6->ip6q_ecn = IPTOS_ECN_CE; 559 } 560 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { 561 free(ip6af, M_FRAG6); 562 goto dropfrag; 563 } 564 565 /* 566 * Find a segment which begins after this one does. 567 */ 568 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 569 af6 = af6->ip6af_down) 570 if (af6->ip6af_off > ip6af->ip6af_off) 571 break; 572 573 /* 574 * If the incoming framgent overlaps some existing fragments in 575 * the reassembly queue, drop it, since it is dangerous to override 576 * existing fragments from a security point of view. 577 * We don't know which fragment is the bad guy - here we trust 578 * fragment that came in earlier, with no real reason. 579 * 580 * Note: due to changes after disabling this part, mbuf passed to 581 * m_adj() below now does not meet the requirement. 582 */ 583 if (af6->ip6af_up != (struct ip6asfrag *)q6) { 584 i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen 585 - ip6af->ip6af_off; 586 if (i > 0) { 587 free(ip6af, M_FRAG6); 588 goto dropfrag; 589 } 590 } 591 if (af6 != (struct ip6asfrag *)q6) { 592 i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off; 593 if (i > 0) { 594 free(ip6af, M_FRAG6); 595 goto dropfrag; 596 } 597 } 598 599 insert: 600 #ifdef MAC 601 if (!first_frag) 602 mac_ip6q_update(m, q6); 603 #endif 604 605 /* 606 * Stick new segment in its place; 607 * check for complete reassembly. 608 * If not complete, check fragment limit. 609 * Move to front of packet queue, as we are 610 * the most recently active fragmented packet. 611 */ 612 frag6_enq(ip6af, af6->ip6af_up, bucket); 613 atomic_add_int(&frag6_nfrags, 1); 614 q6->ip6q_nfrag++; 615 next = 0; 616 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 617 af6 = af6->ip6af_down) { 618 if (af6->ip6af_off != next) { 619 if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) { 620 IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag); 621 frag6_freef(q6, bucket); 622 } 623 IP6QB_UNLOCK(bucket); 624 return IPPROTO_DONE; 625 } 626 next += af6->ip6af_frglen; 627 } 628 if (af6->ip6af_up->ip6af_mff) { 629 if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) { 630 IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag); 631 frag6_freef(q6, bucket); 632 } 633 IP6QB_UNLOCK(bucket); 634 return IPPROTO_DONE; 635 } 636 637 /* 638 * Reassembly is complete; concatenate fragments. 639 */ 640 ip6af = q6->ip6q_down; 641 t = m = IP6_REASS_MBUF(ip6af); 642 af6 = ip6af->ip6af_down; 643 frag6_deq(ip6af, bucket); 644 while (af6 != (struct ip6asfrag *)q6) { 645 m->m_pkthdr.csum_flags &= 646 IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags; 647 m->m_pkthdr.csum_data += 648 IP6_REASS_MBUF(af6)->m_pkthdr.csum_data; 649 650 af6dwn = af6->ip6af_down; 651 frag6_deq(af6, bucket); 652 while (t->m_next) 653 t = t->m_next; 654 m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset); 655 m_demote_pkthdr(IP6_REASS_MBUF(af6)); 656 m_cat(t, IP6_REASS_MBUF(af6)); 657 free(af6, M_FRAG6); 658 af6 = af6dwn; 659 } 660 661 while (m->m_pkthdr.csum_data & 0xffff0000) 662 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + 663 (m->m_pkthdr.csum_data >> 16); 664 665 /* adjust offset to point where the original next header starts */ 666 offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); 667 free(ip6af, M_FRAG6); 668 ip6 = mtod(m, struct ip6_hdr *); 669 ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr)); 670 if (q6->ip6q_ecn == IPTOS_ECN_CE) 671 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); 672 nxt = q6->ip6q_nxt; 673 674 if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) { 675 frag6_remque(q6, bucket); 676 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); 677 #ifdef MAC 678 mac_ip6q_destroy(q6); 679 #endif 680 free(q6, M_FRAG6); 681 atomic_subtract_int(&V_frag6_nfragpackets, 1); 682 683 goto dropfrag; 684 } 685 686 /* 687 * Store NXT to the original. 688 */ 689 m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t), 690 (caddr_t)&nxt); 691 692 frag6_remque(q6, bucket); 693 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); 694 #ifdef MAC 695 mac_ip6q_reassemble(q6, m); 696 mac_ip6q_destroy(q6); 697 #endif 698 free(q6, M_FRAG6); 699 atomic_subtract_int(&V_frag6_nfragpackets, 1); 700 701 if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ 702 int plen = 0; 703 for (t = m; t; t = t->m_next) 704 plen += t->m_len; 705 m->m_pkthdr.len = plen; 706 } 707 708 #ifdef RSS 709 mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc), 710 M_NOWAIT); 711 if (mtag == NULL) 712 goto dropfrag; 713 714 ip6dc = (struct ip6_direct_ctx *)(mtag + 1); 715 ip6dc->ip6dc_nxt = nxt; 716 ip6dc->ip6dc_off = offset; 717 718 m_tag_prepend(m, mtag); 719 #endif 720 721 IP6QB_UNLOCK(bucket); 722 IP6STAT_INC(ip6s_reassembled); 723 in6_ifstat_inc(dstifp, ifs6_reass_ok); 724 725 #ifdef RSS 726 /* 727 * Queue/dispatch for reprocessing. 728 */ 729 netisr_dispatch(NETISR_IPV6_DIRECT, m); 730 return IPPROTO_DONE; 731 #endif 732 733 /* 734 * Tell launch routine the next header 735 */ 736 737 *mp = m; 738 *offp = offset; 739 740 return nxt; 741 742 dropfrag: 743 IP6QB_UNLOCK(bucket); 744 in6_ifstat_inc(dstifp, ifs6_reass_fail); 745 IP6STAT_INC(ip6s_fragdropped); 746 m_freem(m); 747 return IPPROTO_DONE; 748 } 749 750 /* 751 * IPv6 reassembling timer processing; 752 * if a timer expires on a reassembly 753 * queue, discard it. 754 */ 755 void 756 frag6_slowtimo(void) 757 { 758 VNET_ITERATOR_DECL(vnet_iter); 759 struct ip6q *head, *q6; 760 uint32_t bucket; 761 762 VNET_LIST_RLOCK_NOSLEEP(); 763 VNET_FOREACH(vnet_iter) { 764 CURVNET_SET(vnet_iter); 765 for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) { 766 IP6QB_LOCK(bucket); 767 head = IP6QB_HEAD(bucket); 768 q6 = head->ip6q_next; 769 if (q6 == NULL) { 770 /* 771 * XXXJTL: This should never happen. This 772 * should turn into an assertion. 773 */ 774 IP6QB_UNLOCK(bucket); 775 continue; 776 } 777 while (q6 != head) { 778 --q6->ip6q_ttl; 779 q6 = q6->ip6q_next; 780 if (q6->ip6q_prev->ip6q_ttl == 0) { 781 IP6STAT_ADD(ip6s_fragtimeout, 782 q6->ip6q_prev->ip6q_nfrag); 783 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 784 frag6_freef(q6->ip6q_prev, bucket); 785 } 786 } 787 /* 788 * If we are over the maximum number of fragments 789 * (due to the limit being lowered), drain off 790 * enough to get down to the new limit. 791 * Note that we drain all reassembly queues if 792 * maxfragpackets is 0 (fragmentation is disabled), 793 * and don't enforce a limit when maxfragpackets 794 * is negative. 795 */ 796 while ((V_ip6_maxfragpackets == 0 || 797 (V_ip6_maxfragpackets > 0 && 798 V_ip6qb[bucket].count > V_ip6_maxfragbucketsize)) && 799 head->ip6q_prev != head) { 800 IP6STAT_ADD(ip6s_fragoverflow, 801 q6->ip6q_prev->ip6q_nfrag); 802 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 803 frag6_freef(head->ip6q_prev, bucket); 804 } 805 IP6QB_UNLOCK(bucket); 806 } 807 /* 808 * If we are still over the maximum number of fragmented 809 * packets, drain off enough to get down to the new limit. 810 */ 811 bucket = 0; 812 while (V_ip6_maxfragpackets >= 0 && 813 atomic_load_int(&V_frag6_nfragpackets) > 814 (u_int)V_ip6_maxfragpackets) { 815 IP6QB_LOCK(bucket); 816 head = IP6QB_HEAD(bucket); 817 if (head->ip6q_prev != head) { 818 IP6STAT_ADD(ip6s_fragoverflow, 819 q6->ip6q_prev->ip6q_nfrag); 820 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 821 frag6_freef(head->ip6q_prev, bucket); 822 } 823 IP6QB_UNLOCK(bucket); 824 bucket = (bucket + 1) % IP6REASS_NHASH; 825 } 826 CURVNET_RESTORE(); 827 } 828 VNET_LIST_RUNLOCK_NOSLEEP(); 829 } 830 831 static void 832 frag6_change(void *tag) 833 { 834 VNET_ITERATOR_DECL(vnet_iter); 835 836 ip6_maxfrags = IP6_MAXFRAGS; 837 VNET_LIST_RLOCK_NOSLEEP(); 838 VNET_FOREACH(vnet_iter) { 839 CURVNET_SET(vnet_iter); 840 V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; 841 frag6_set_bucketsize(); 842 CURVNET_RESTORE(); 843 } 844 VNET_LIST_RUNLOCK_NOSLEEP(); 845 } 846 847 /* 848 * Initialise reassembly queue and fragment identifier. 849 */ 850 void 851 frag6_init(void) 852 { 853 struct ip6q *q6; 854 uint32_t bucket; 855 856 V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; 857 frag6_set_bucketsize(); 858 for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) { 859 q6 = IP6QB_HEAD(bucket); 860 q6->ip6q_next = q6->ip6q_prev = q6; 861 mtx_init(&V_ip6qb[bucket].lock, "ip6qlock", NULL, MTX_DEF); 862 V_ip6qb[bucket].count = 0; 863 } 864 V_ip6qb_hashseed = arc4random(); 865 V_ip6_maxfragsperpacket = 64; 866 if (!IS_DEFAULT_VNET(curvnet)) 867 return; 868 869 ip6_maxfrags = IP6_MAXFRAGS; 870 EVENTHANDLER_REGISTER(nmbclusters_change, 871 frag6_change, NULL, EVENTHANDLER_PRI_ANY); 872 } 873 874 /* 875 * Drain off all datagram fragments. 876 */ 877 void 878 frag6_drain(void) 879 { 880 VNET_ITERATOR_DECL(vnet_iter); 881 struct ip6q *head; 882 uint32_t bucket; 883 884 VNET_LIST_RLOCK_NOSLEEP(); 885 VNET_FOREACH(vnet_iter) { 886 CURVNET_SET(vnet_iter); 887 for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) { 888 if (IP6QB_TRYLOCK(bucket) == 0) 889 continue; 890 head = IP6QB_HEAD(bucket); 891 while (head->ip6q_next != head) { 892 IP6STAT_INC(ip6s_fragdropped); 893 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 894 frag6_freef(head->ip6q_next, bucket); 895 } 896 IP6QB_UNLOCK(bucket); 897 } 898 CURVNET_RESTORE(); 899 } 900 VNET_LIST_RUNLOCK_NOSLEEP(); 901 } 902 903 /* 904 * Put an ip fragment on a reassembly chain. 905 * Like insque, but pointers in middle of structure. 906 */ 907 static void 908 frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6, 909 uint32_t bucket __unused) 910 { 911 912 IP6QB_LOCK_ASSERT(bucket); 913 914 af6->ip6af_up = up6; 915 af6->ip6af_down = up6->ip6af_down; 916 up6->ip6af_down->ip6af_up = af6; 917 up6->ip6af_down = af6; 918 } 919 920 /* 921 * To frag6_enq as remque is to insque. 922 */ 923 static void 924 frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused) 925 { 926 927 IP6QB_LOCK_ASSERT(bucket); 928 929 af6->ip6af_up->ip6af_down = af6->ip6af_down; 930 af6->ip6af_down->ip6af_up = af6->ip6af_up; 931 } 932 933 static void 934 frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket) 935 { 936 937 IP6QB_LOCK_ASSERT(bucket); 938 KASSERT(IP6QB_HEAD(bucket) == old, 939 ("%s: attempt to insert at head of wrong bucket" 940 " (bucket=%u, old=%p)", __func__, bucket, old)); 941 942 new->ip6q_prev = old; 943 new->ip6q_next = old->ip6q_next; 944 old->ip6q_next->ip6q_prev= new; 945 old->ip6q_next = new; 946 V_ip6qb[bucket].count++; 947 } 948 949 static void 950 frag6_remque(struct ip6q *p6, uint32_t bucket) 951 { 952 953 IP6QB_LOCK_ASSERT(bucket); 954 955 p6->ip6q_prev->ip6q_next = p6->ip6q_next; 956 p6->ip6q_next->ip6q_prev = p6->ip6q_prev; 957 V_ip6qb[bucket].count--; 958 } 959