1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * $KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $ 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_rss.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/domain.h> 42 #include <sys/eventhandler.h> 43 #include <sys/hash.h> 44 #include <sys/kernel.h> 45 #include <sys/malloc.h> 46 #include <sys/mbuf.h> 47 #include <sys/protosw.h> 48 #include <sys/socket.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/netisr.h> 55 #include <net/route.h> 56 #include <net/vnet.h> 57 58 #include <netinet/in.h> 59 #include <netinet/in_var.h> 60 #include <netinet/ip6.h> 61 #include <netinet6/ip6_var.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/in_systm.h> /* For ECN definitions. */ 64 #include <netinet/ip.h> /* For ECN definitions. */ 65 66 #ifdef MAC 67 #include <security/mac/mac_framework.h> 68 #endif 69 70 /* Reassembly headers are stored in hash buckets. */ 71 #define IP6REASS_NHASH_LOG2 10 72 #define IP6REASS_NHASH (1 << IP6REASS_NHASH_LOG2) 73 #define IP6REASS_HMASK (IP6REASS_NHASH - 1) 74 75 static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *, 76 uint32_t bucket __unused); 77 static void frag6_deq(struct ip6asfrag *, uint32_t bucket __unused); 78 static void frag6_insque_head(struct ip6q *, struct ip6q *, 79 uint32_t bucket); 80 static void frag6_remque(struct ip6q *, uint32_t bucket); 81 static void frag6_freef(struct ip6q *, uint32_t bucket); 82 83 struct ip6qbucket { 84 struct ip6q ip6q; 85 struct mtx lock; 86 int count; 87 }; 88 89 struct ip6asfrag { 90 struct ip6asfrag *ip6af_down; 91 struct ip6asfrag *ip6af_up; 92 struct mbuf *ip6af_m; 93 int ip6af_offset; /* offset in ip6af_m to next header */ 94 int ip6af_frglen; /* fragmentable part length */ 95 int ip6af_off; /* fragment offset */ 96 u_int16_t ip6af_mff; /* more fragment bit in frag off */ 97 }; 98 99 #define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m)) 100 101 static MALLOC_DEFINE(M_FRAG6, "frag6", "IPv6 fragment reassembly header"); 102 103 /* System wide (global) maximum and count of packets in reassembly queues. */ 104 static int ip6_maxfrags; 105 static volatile u_int frag6_nfrags = 0; 106 107 /* Maximum and current packets in per-VNET reassembly queue. */ 108 VNET_DEFINE_STATIC(int, ip6_maxfragpackets); 109 VNET_DEFINE_STATIC(volatile u_int, frag6_nfragpackets); 110 #define V_ip6_maxfragpackets VNET(ip6_maxfragpackets) 111 #define V_frag6_nfragpackets VNET(frag6_nfragpackets) 112 113 /* Maximum per-VNET reassembly queues per bucket and fragments per packet. */ 114 VNET_DEFINE_STATIC(int, ip6_maxfragbucketsize); 115 VNET_DEFINE_STATIC(int, ip6_maxfragsperpacket); 116 #define V_ip6_maxfragbucketsize VNET(ip6_maxfragbucketsize) 117 #define V_ip6_maxfragsperpacket VNET(ip6_maxfragsperpacket) 118 119 /* Per-VNET reassembly queue buckets. */ 120 VNET_DEFINE_STATIC(struct ip6qbucket, ip6qb[IP6REASS_NHASH]); 121 VNET_DEFINE_STATIC(uint32_t, ip6qb_hashseed); 122 #define V_ip6qb VNET(ip6qb) 123 #define V_ip6qb_hashseed VNET(ip6qb_hashseed) 124 125 #define IP6QB_LOCK(_b) mtx_lock(&V_ip6qb[(_b)].lock) 126 #define IP6QB_TRYLOCK(_b) mtx_trylock(&V_ip6qb[(_b)].lock) 127 #define IP6QB_LOCK_ASSERT(_b) mtx_assert(&V_ip6qb[(_b)].lock, MA_OWNED) 128 #define IP6QB_UNLOCK(_b) mtx_unlock(&V_ip6qb[(_b)].lock) 129 #define IP6QB_HEAD(_b) (&V_ip6qb[(_b)].ip6q) 130 131 /* 132 * By default, limit the number of IP6 fragments across all reassembly 133 * queues to 1/32 of the total number of mbuf clusters. 134 * 135 * Limit the total number of reassembly queues per VNET to the 136 * IP6 fragment limit, but ensure the limit will not allow any bucket 137 * to grow above 100 items. (The bucket limit is 138 * IP_MAXFRAGPACKETS / (IPREASS_NHASH / 2), so the 50 is the correct 139 * multiplier to reach a 100-item limit.) 140 * The 100-item limit was chosen as brief testing seems to show that 141 * this produces "reasonable" performance on some subset of systems 142 * under DoS attack. 143 */ 144 #define IP6_MAXFRAGS (nmbclusters / 32) 145 #define IP6_MAXFRAGPACKETS (imin(IP6_MAXFRAGS, IP6REASS_NHASH * 50)) 146 147 148 /* 149 * Sysctls and helper function. 150 */ 151 SYSCTL_DECL(_net_inet6_ip6); 152 153 static void 154 frag6_set_bucketsize(void) 155 { 156 int i; 157 158 if ((i = V_ip6_maxfragpackets) > 0) 159 V_ip6_maxfragbucketsize = imax(i / (IP6REASS_NHASH / 2), 1); 160 } 161 162 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags, 163 CTLFLAG_RW, &ip6_maxfrags, 0, 164 "Maximum allowed number of outstanding IPv6 packet fragments. " 165 "A value of 0 means no fragmented packets will be accepted, while a " 166 "a value of -1 means no limit"); 167 168 static int 169 sysctl_ip6_maxfragpackets(SYSCTL_HANDLER_ARGS) 170 { 171 int error, val; 172 173 val = V_ip6_maxfragpackets; 174 error = sysctl_handle_int(oidp, &val, 0, req); 175 if (error != 0 || !req->newptr) 176 return (error); 177 V_ip6_maxfragpackets = val; 178 frag6_set_bucketsize(); 179 return (0); 180 } 181 SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets, 182 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, NULL, 0, 183 sysctl_ip6_maxfragpackets, "I", 184 "Default maximum number of outstanding fragmented IPv6 packets. " 185 "A value of 0 means no fragmented packets will be accepted, while a " 186 "a value of -1 means no limit"); 187 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGSPERPACKET, maxfragsperpacket, 188 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragsperpacket), 0, 189 "Maximum allowed number of fragments per packet"); 190 SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGBUCKETSIZE, maxfragbucketsize, 191 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragbucketsize), 0, 192 "Maximum number of reassembly queues per hash bucket"); 193 194 195 /* 196 * Remove the IPv6 fragmentation header from the mbuf. 197 */ 198 int 199 ip6_deletefraghdr(struct mbuf *m, int offset, int wait) 200 { 201 struct ip6_hdr *ip6; 202 struct mbuf *t; 203 204 /* Delete frag6 header. */ 205 if (m->m_len >= offset + sizeof(struct ip6_frag)) { 206 207 /* This is the only possible case with !PULLDOWN_TEST. */ 208 ip6 = mtod(m, struct ip6_hdr *); 209 bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag), 210 offset); 211 m->m_data += sizeof(struct ip6_frag); 212 m->m_len -= sizeof(struct ip6_frag); 213 } else { 214 215 /* This comes with no copy if the boundary is on cluster. */ 216 if ((t = m_split(m, offset, wait)) == NULL) 217 return (ENOMEM); 218 m_adj(t, sizeof(struct ip6_frag)); 219 m_cat(m, t); 220 } 221 222 m->m_flags |= M_FRAGMENTED; 223 return (0); 224 } 225 226 /* 227 * Free a fragment reassembly header and all associated datagrams. 228 */ 229 static void 230 frag6_freef(struct ip6q *q6, uint32_t bucket) 231 { 232 struct ip6_hdr *ip6; 233 struct ip6asfrag *af6, *down6; 234 struct mbuf *m; 235 236 IP6QB_LOCK_ASSERT(bucket); 237 238 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 239 af6 = down6) { 240 241 m = IP6_REASS_MBUF(af6); 242 down6 = af6->ip6af_down; 243 frag6_deq(af6, bucket); 244 245 /* 246 * Return ICMP time exceeded error for the 1st fragment. 247 * Just free other fragments. 248 */ 249 if (af6->ip6af_off == 0 && m->m_pkthdr.rcvif != NULL) { 250 251 /* Adjust pointer. */ 252 ip6 = mtod(m, struct ip6_hdr *); 253 254 /* Restore source and destination addresses. */ 255 ip6->ip6_src = q6->ip6q_src; 256 ip6->ip6_dst = q6->ip6q_dst; 257 258 icmp6_error(m, ICMP6_TIME_EXCEEDED, 259 ICMP6_TIME_EXCEED_REASSEMBLY, 0); 260 } else 261 m_freem(m); 262 263 free(af6, M_FRAG6); 264 } 265 frag6_remque(q6, bucket); 266 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); 267 #ifdef MAC 268 mac_ip6q_destroy(q6); 269 #endif 270 free(q6, M_FRAG6); 271 atomic_subtract_int(&V_frag6_nfragpackets, 1); 272 } 273 274 /* 275 * Drain off all datagram fragments belonging to 276 * the given network interface. 277 */ 278 static void 279 frag6_cleanup(void *arg __unused, struct ifnet *ifp) 280 { 281 struct ip6q *q6, *q6n, *head; 282 struct ip6asfrag *af6; 283 struct mbuf *m; 284 int i; 285 286 KASSERT(ifp != NULL, ("%s: ifp is NULL", __func__)); 287 288 CURVNET_SET_QUIET(ifp->if_vnet); 289 for (i = 0; i < IP6REASS_NHASH; i++) { 290 IP6QB_LOCK(i); 291 head = IP6QB_HEAD(i); 292 /* Scan fragment list. */ 293 for (q6 = head->ip6q_next; q6 != head; q6 = q6n) { 294 q6n = q6->ip6q_next; 295 296 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 297 af6 = af6->ip6af_down) { 298 m = IP6_REASS_MBUF(af6); 299 300 /* clear no longer valid rcvif pointer */ 301 if (m->m_pkthdr.rcvif == ifp) 302 m->m_pkthdr.rcvif = NULL; 303 } 304 } 305 IP6QB_UNLOCK(i); 306 } 307 CURVNET_RESTORE(); 308 } 309 EVENTHANDLER_DEFINE(ifnet_departure_event, frag6_cleanup, NULL, 0); 310 311 /* 312 * Like in RFC2460, in RFC8200, fragment and reassembly rules do not agree with 313 * each other, in terms of next header field handling in fragment header. 314 * While the sender will use the same value for all of the fragmented packets, 315 * receiver is suggested not to check for consistency. 316 * 317 * Fragment rules (p18,p19): 318 * (2) A Fragment header containing: 319 * The Next Header value that identifies the first header 320 * after the Per-Fragment headers of the original packet. 321 * -> next header field is same for all fragments 322 * 323 * Reassembly rule (p20): 324 * The Next Header field of the last header of the Per-Fragment 325 * headers is obtained from the Next Header field of the first 326 * fragment's Fragment header. 327 * -> should grab it from the first fragment only 328 * 329 * The following note also contradicts with fragment rule - no one is going to 330 * send different fragment with different next header field. 331 * 332 * Additional note (p22) [not an error]: 333 * The Next Header values in the Fragment headers of different 334 * fragments of the same original packet may differ. Only the value 335 * from the Offset zero fragment packet is used for reassembly. 336 * -> should grab it from the first fragment only 337 * 338 * There is no explicit reason given in the RFC. Historical reason maybe? 339 */ 340 /* 341 * Fragment input. 342 */ 343 int 344 frag6_input(struct mbuf **mp, int *offp, int proto) 345 { 346 struct ifnet *dstifp; 347 struct ifnet *srcifp; 348 struct in6_ifaddr *ia6; 349 struct ip6_hdr *ip6; 350 struct ip6_frag *ip6f; 351 struct ip6q *head, *q6; 352 struct ip6asfrag *af6, *af6dwn, *ip6af; 353 struct mbuf *m, *t; 354 uint32_t hashkey[(sizeof(struct in6_addr) * 2 + 355 sizeof(ip6f->ip6f_ident)) / sizeof(uint32_t)]; 356 uint32_t bucket, *hashkeyp; 357 int fragoff, frgpartlen; /* Must be larger than uint16_t. */ 358 int nxt, offset, plen; 359 uint8_t ecn, ecn0; 360 bool only_frag; 361 #ifdef RSS 362 struct ip6_direct_ctx *ip6dc; 363 struct m_tag *mtag; 364 #endif 365 366 m = *mp; 367 offset = *offp; 368 369 ip6 = mtod(m, struct ip6_hdr *); 370 #ifndef PULLDOWN_TEST 371 IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE); 372 ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset); 373 #else 374 IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f)); 375 if (ip6f == NULL) 376 return (IPPROTO_DONE); 377 #endif 378 379 /* 380 * Store receive network interface pointer for later. 381 */ 382 srcifp = m->m_pkthdr.rcvif; 383 384 dstifp = NULL; 385 /* Find the destination interface of the packet. */ 386 ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); 387 if (ia6 != NULL) { 388 dstifp = ia6->ia_ifp; 389 ifa_free(&ia6->ia_ifa); 390 } 391 392 /* Jumbo payload cannot contain a fragment header. */ 393 if (ip6->ip6_plen == 0) { 394 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset); 395 in6_ifstat_inc(dstifp, ifs6_reass_fail); 396 return (IPPROTO_DONE); 397 } 398 399 /* 400 * Check whether fragment packet's fragment length is a 401 * multiple of 8 octets (unless it is the last one). 402 * sizeof(struct ip6_frag) == 8 403 * sizeof(struct ip6_hdr) = 40 404 */ 405 if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) && 406 (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) { 407 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 408 offsetof(struct ip6_hdr, ip6_plen)); 409 in6_ifstat_inc(dstifp, ifs6_reass_fail); 410 return (IPPROTO_DONE); 411 } 412 413 IP6STAT_INC(ip6s_fragments); 414 in6_ifstat_inc(dstifp, ifs6_reass_reqd); 415 416 /* Offset now points to data portion. */ 417 offset += sizeof(struct ip6_frag); 418 419 /* 420 * Handle "atomic" fragments (offset and m bit set to 0) upfront, 421 * unrelated to any reassembly. Still need to remove the frag hdr. 422 * See RFC 6946 and section 4.5 of RFC 8200. 423 */ 424 if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) { 425 IP6STAT_INC(ip6s_atomicfrags); 426 /* XXX-BZ handle correctly. */ 427 in6_ifstat_inc(dstifp, ifs6_reass_ok); 428 *offp = offset; 429 m->m_flags |= M_FRAGMENTED; 430 return (ip6f->ip6f_nxt); 431 } 432 433 /* Get fragment length and discard 0-byte fragments. */ 434 frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset; 435 if (frgpartlen == 0) { 436 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 437 offsetof(struct ip6_hdr, ip6_plen)); 438 in6_ifstat_inc(dstifp, ifs6_reass_fail); 439 IP6STAT_INC(ip6s_fragdropped); 440 return (IPPROTO_DONE); 441 } 442 443 /* Generate a hash value for fragment bucket selection. */ 444 hashkeyp = hashkey; 445 memcpy(hashkeyp, &ip6->ip6_src, sizeof(struct in6_addr)); 446 hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp); 447 memcpy(hashkeyp, &ip6->ip6_dst, sizeof(struct in6_addr)); 448 hashkeyp += sizeof(struct in6_addr) / sizeof(*hashkeyp); 449 *hashkeyp = ip6f->ip6f_ident; 450 bucket = jenkins_hash32(hashkey, nitems(hashkey), V_ip6qb_hashseed); 451 bucket &= IP6REASS_HMASK; 452 head = IP6QB_HEAD(bucket); 453 IP6QB_LOCK(bucket); 454 455 /* 456 * Enforce upper bound on number of fragments for the entire system. 457 * If maxfrag is 0, never accept fragments. 458 * If maxfrag is -1, accept all fragments without limitation. 459 */ 460 if (ip6_maxfrags < 0) 461 ; 462 else if (atomic_load_int(&frag6_nfrags) >= (u_int)ip6_maxfrags) 463 goto dropfrag; 464 465 for (q6 = head->ip6q_next; q6 != head; q6 = q6->ip6q_next) 466 if (ip6f->ip6f_ident == q6->ip6q_ident && 467 IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) && 468 IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst) 469 #ifdef MAC 470 && mac_ip6q_match(m, q6) 471 #endif 472 ) 473 break; 474 475 only_frag = false; 476 if (q6 == head) { 477 478 /* A first fragment to arrive creates a reassembly queue. */ 479 only_frag = true; 480 481 /* 482 * Enforce upper bound on number of fragmented packets 483 * for which we attempt reassembly; 484 * If maxfragpackets is 0, never accept fragments. 485 * If maxfragpackets is -1, accept all fragments without 486 * limitation. 487 */ 488 if (V_ip6_maxfragpackets < 0) 489 ; 490 else if (V_ip6qb[bucket].count >= V_ip6_maxfragbucketsize || 491 atomic_load_int(&V_frag6_nfragpackets) >= 492 (u_int)V_ip6_maxfragpackets) 493 goto dropfrag; 494 atomic_add_int(&V_frag6_nfragpackets, 1); 495 496 /* Allocate IPv6 fragement packet queue entry. */ 497 q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FRAG6, 498 M_NOWAIT | M_ZERO); 499 if (q6 == NULL) 500 goto dropfrag; 501 #ifdef MAC 502 if (mac_ip6q_init(q6, M_NOWAIT) != 0) { 503 free(q6, M_FRAG6); 504 goto dropfrag; 505 } 506 mac_ip6q_create(m, q6); 507 #endif 508 frag6_insque_head(q6, head, bucket); 509 510 /* ip6q_nxt will be filled afterwards, from 1st fragment. */ 511 q6->ip6q_down = q6->ip6q_up = (struct ip6asfrag *)q6; 512 #ifdef notyet 513 q6->ip6q_nxtp = (u_char *)nxtp; 514 #endif 515 q6->ip6q_ident = ip6f->ip6f_ident; 516 q6->ip6q_ttl = IPV6_FRAGTTL; 517 q6->ip6q_src = ip6->ip6_src; 518 q6->ip6q_dst = ip6->ip6_dst; 519 q6->ip6q_ecn = 520 (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 521 q6->ip6q_unfrglen = -1; /* The 1st fragment has not arrived. */ 522 523 q6->ip6q_nfrag = 0; 524 } 525 526 /* 527 * If it is the 1st fragment, record the length of the 528 * unfragmentable part and the next header of the fragment header. 529 */ 530 fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK); 531 if (fragoff == 0) { 532 q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) - 533 sizeof(struct ip6_frag); 534 q6->ip6q_nxt = ip6f->ip6f_nxt; 535 } 536 537 /* 538 * Check that the reassembled packet would not exceed 65535 bytes 539 * in size. 540 * If it would exceed, discard the fragment and return an ICMP error. 541 */ 542 if (q6->ip6q_unfrglen >= 0) { 543 /* The 1st fragment has already arrived. */ 544 if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) { 545 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 546 offset - sizeof(struct ip6_frag) + 547 offsetof(struct ip6_frag, ip6f_offlg)); 548 IP6QB_UNLOCK(bucket); 549 return (IPPROTO_DONE); 550 } 551 } else if (fragoff + frgpartlen > IPV6_MAXPACKET) { 552 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, 553 offset - sizeof(struct ip6_frag) + 554 offsetof(struct ip6_frag, ip6f_offlg)); 555 IP6QB_UNLOCK(bucket); 556 return (IPPROTO_DONE); 557 } 558 /* 559 * If it is the first fragment, do the above check for each 560 * fragment already stored in the reassembly queue. 561 */ 562 if (fragoff == 0) { 563 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 564 af6 = af6dwn) { 565 af6dwn = af6->ip6af_down; 566 567 if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen > 568 IPV6_MAXPACKET) { 569 struct ip6_hdr *ip6err; 570 struct mbuf *merr; 571 int erroff; 572 573 merr = IP6_REASS_MBUF(af6); 574 erroff = af6->ip6af_offset; 575 576 /* Dequeue the fragment. */ 577 frag6_deq(af6, bucket); 578 free(af6, M_FRAG6); 579 580 /* Set a valid receive interface pointer. */ 581 merr->m_pkthdr.rcvif = srcifp; 582 583 /* Adjust pointer. */ 584 ip6err = mtod(merr, struct ip6_hdr *); 585 586 /* 587 * Restore source and destination addresses 588 * in the erroneous IPv6 header. 589 */ 590 ip6err->ip6_src = q6->ip6q_src; 591 ip6err->ip6_dst = q6->ip6q_dst; 592 593 icmp6_error(merr, ICMP6_PARAM_PROB, 594 ICMP6_PARAMPROB_HEADER, 595 erroff - sizeof(struct ip6_frag) + 596 offsetof(struct ip6_frag, ip6f_offlg)); 597 } 598 } 599 } 600 601 /* Allocate an IPv6 fragement queue entry for this fragmented part. */ 602 ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FRAG6, 603 M_NOWAIT | M_ZERO); 604 if (ip6af == NULL) 605 goto dropfrag; 606 ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG; 607 ip6af->ip6af_off = fragoff; 608 ip6af->ip6af_frglen = frgpartlen; 609 ip6af->ip6af_offset = offset; 610 IP6_REASS_MBUF(ip6af) = m; 611 612 if (only_frag) { 613 af6 = (struct ip6asfrag *)q6; 614 goto insert; 615 } 616 617 /* Do duplicate, condition, and boundry checks. */ 618 /* 619 * Handle ECN by comparing this segment with the first one; 620 * if CE is set, do not lose CE. 621 * Drop if CE and not-ECT are mixed for the same packet. 622 */ 623 ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK; 624 ecn0 = q6->ip6q_ecn; 625 if (ecn == IPTOS_ECN_CE) { 626 if (ecn0 == IPTOS_ECN_NOTECT) { 627 free(ip6af, M_FRAG6); 628 goto dropfrag; 629 } 630 if (ecn0 != IPTOS_ECN_CE) 631 q6->ip6q_ecn = IPTOS_ECN_CE; 632 } 633 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) { 634 free(ip6af, M_FRAG6); 635 goto dropfrag; 636 } 637 638 /* Find a fragmented part which begins after this one does. */ 639 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 640 af6 = af6->ip6af_down) 641 if (af6->ip6af_off > ip6af->ip6af_off) 642 break; 643 644 /* 645 * If the incoming framgent overlaps some existing fragments in 646 * the reassembly queue, drop both the new fragment and the 647 * entire reassembly queue. However, if the new fragment 648 * is an exact duplicate of an existing fragment, only silently 649 * drop the existing fragment and leave the fragmentation queue 650 * unchanged, as allowed by the RFC. (RFC 8200, 4.5) 651 */ 652 if (af6->ip6af_up != (struct ip6asfrag *)q6) { 653 if (af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen - 654 ip6af->ip6af_off > 0) { 655 free(ip6af, M_FRAG6); 656 goto dropfrag; 657 } 658 } 659 if (af6 != (struct ip6asfrag *)q6) { 660 if (ip6af->ip6af_off + ip6af->ip6af_frglen - 661 af6->ip6af_off > 0) { 662 free(ip6af, M_FRAG6); 663 goto dropfrag; 664 } 665 } 666 667 insert: 668 #ifdef MAC 669 if (!only_frag) 670 mac_ip6q_update(m, q6); 671 #endif 672 673 /* 674 * Stick new segment in its place; check for complete reassembly. 675 * If not complete, check fragment limit. Move to front of packet 676 * queue, as we are the most recently active fragmented packet. 677 */ 678 frag6_enq(ip6af, af6->ip6af_up, bucket); 679 atomic_add_int(&frag6_nfrags, 1); 680 q6->ip6q_nfrag++; 681 plen = 0; 682 for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6; 683 af6 = af6->ip6af_down) { 684 if (af6->ip6af_off != plen) { 685 if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) { 686 IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag); 687 frag6_freef(q6, bucket); 688 } 689 IP6QB_UNLOCK(bucket); 690 return (IPPROTO_DONE); 691 } 692 plen += af6->ip6af_frglen; 693 } 694 if (af6->ip6af_up->ip6af_mff) { 695 if (q6->ip6q_nfrag > V_ip6_maxfragsperpacket) { 696 IP6STAT_ADD(ip6s_fragdropped, q6->ip6q_nfrag); 697 frag6_freef(q6, bucket); 698 } 699 IP6QB_UNLOCK(bucket); 700 return (IPPROTO_DONE); 701 } 702 703 /* Reassembly is complete; concatenate fragments. */ 704 ip6af = q6->ip6q_down; 705 t = m = IP6_REASS_MBUF(ip6af); 706 af6 = ip6af->ip6af_down; 707 frag6_deq(ip6af, bucket); 708 while (af6 != (struct ip6asfrag *)q6) { 709 m->m_pkthdr.csum_flags &= 710 IP6_REASS_MBUF(af6)->m_pkthdr.csum_flags; 711 m->m_pkthdr.csum_data += 712 IP6_REASS_MBUF(af6)->m_pkthdr.csum_data; 713 714 af6dwn = af6->ip6af_down; 715 frag6_deq(af6, bucket); 716 while (t->m_next) 717 t = t->m_next; 718 m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset); 719 m_demote_pkthdr(IP6_REASS_MBUF(af6)); 720 m_cat(t, IP6_REASS_MBUF(af6)); 721 free(af6, M_FRAG6); 722 af6 = af6dwn; 723 } 724 725 while (m->m_pkthdr.csum_data & 0xffff0000) 726 m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) + 727 (m->m_pkthdr.csum_data >> 16); 728 729 /* Adjust offset to point where the original next header starts. */ 730 offset = ip6af->ip6af_offset - sizeof(struct ip6_frag); 731 free(ip6af, M_FRAG6); 732 ip6 = mtod(m, struct ip6_hdr *); 733 ip6->ip6_plen = htons((u_short)plen + offset - sizeof(struct ip6_hdr)); 734 if (q6->ip6q_ecn == IPTOS_ECN_CE) 735 ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20); 736 nxt = q6->ip6q_nxt; 737 738 if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) { 739 frag6_remque(q6, bucket); 740 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); 741 #ifdef MAC 742 mac_ip6q_destroy(q6); 743 #endif 744 free(q6, M_FRAG6); 745 atomic_subtract_int(&V_frag6_nfragpackets, 1); 746 747 goto dropfrag; 748 } 749 750 /* Set nxt(-hdr field value) to the original value. */ 751 m_copyback(m, ip6_get_prevhdr(m, offset), sizeof(uint8_t), 752 (caddr_t)&nxt); 753 754 frag6_remque(q6, bucket); 755 atomic_subtract_int(&frag6_nfrags, q6->ip6q_nfrag); 756 #ifdef MAC 757 mac_ip6q_reassemble(q6, m); 758 mac_ip6q_destroy(q6); 759 #endif 760 free(q6, M_FRAG6); 761 atomic_subtract_int(&V_frag6_nfragpackets, 1); 762 763 if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */ 764 765 plen = 0; 766 for (t = m; t; t = t->m_next) 767 plen += t->m_len; 768 m->m_pkthdr.len = plen; 769 /* Set a valid receive interface pointer. */ 770 m->m_pkthdr.rcvif = srcifp; 771 } 772 773 #ifdef RSS 774 mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc), 775 M_NOWAIT); 776 if (mtag == NULL) 777 goto dropfrag; 778 779 ip6dc = (struct ip6_direct_ctx *)(mtag + 1); 780 ip6dc->ip6dc_nxt = nxt; 781 ip6dc->ip6dc_off = offset; 782 783 m_tag_prepend(m, mtag); 784 #endif 785 786 IP6QB_UNLOCK(bucket); 787 IP6STAT_INC(ip6s_reassembled); 788 in6_ifstat_inc(dstifp, ifs6_reass_ok); 789 790 #ifdef RSS 791 /* Queue/dispatch for reprocessing. */ 792 netisr_dispatch(NETISR_IPV6_DIRECT, m); 793 return (IPPROTO_DONE); 794 #endif 795 796 /* Tell launch routine the next header. */ 797 *mp = m; 798 *offp = offset; 799 800 return (nxt); 801 802 dropfrag: 803 IP6QB_UNLOCK(bucket); 804 in6_ifstat_inc(dstifp, ifs6_reass_fail); 805 IP6STAT_INC(ip6s_fragdropped); 806 m_freem(m); 807 return (IPPROTO_DONE); 808 } 809 810 /* 811 * IPv6 reassembling timer processing; 812 * if a timer expires on a reassembly queue, discard it. 813 */ 814 void 815 frag6_slowtimo(void) 816 { 817 VNET_ITERATOR_DECL(vnet_iter); 818 struct ip6q *head, *q6; 819 uint32_t bucket; 820 821 VNET_LIST_RLOCK_NOSLEEP(); 822 VNET_FOREACH(vnet_iter) { 823 CURVNET_SET(vnet_iter); 824 for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) { 825 IP6QB_LOCK(bucket); 826 head = IP6QB_HEAD(bucket); 827 q6 = head->ip6q_next; 828 if (q6 == NULL) { 829 /* 830 * XXXJTL: This should never happen. This 831 * should turn into an assertion. 832 */ 833 IP6QB_UNLOCK(bucket); 834 continue; 835 } 836 while (q6 != head) { 837 --q6->ip6q_ttl; 838 q6 = q6->ip6q_next; 839 if (q6->ip6q_prev->ip6q_ttl == 0) { 840 IP6STAT_ADD(ip6s_fragtimeout, 841 q6->ip6q_prev->ip6q_nfrag); 842 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 843 frag6_freef(q6->ip6q_prev, bucket); 844 } 845 } 846 /* 847 * If we are over the maximum number of fragments 848 * (due to the limit being lowered), drain off 849 * enough to get down to the new limit. 850 * Note that we drain all reassembly queues if 851 * maxfragpackets is 0 (fragmentation is disabled), 852 * and do not enforce a limit when maxfragpackets 853 * is negative. 854 */ 855 while ((V_ip6_maxfragpackets == 0 || 856 (V_ip6_maxfragpackets > 0 && 857 V_ip6qb[bucket].count > V_ip6_maxfragbucketsize)) && 858 head->ip6q_prev != head) { 859 IP6STAT_ADD(ip6s_fragoverflow, 860 q6->ip6q_prev->ip6q_nfrag); 861 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 862 frag6_freef(head->ip6q_prev, bucket); 863 } 864 IP6QB_UNLOCK(bucket); 865 } 866 /* 867 * If we are still over the maximum number of fragmented 868 * packets, drain off enough to get down to the new limit. 869 */ 870 bucket = 0; 871 while (V_ip6_maxfragpackets >= 0 && 872 atomic_load_int(&V_frag6_nfragpackets) > 873 (u_int)V_ip6_maxfragpackets) { 874 IP6QB_LOCK(bucket); 875 head = IP6QB_HEAD(bucket); 876 if (head->ip6q_prev != head) { 877 IP6STAT_ADD(ip6s_fragoverflow, 878 q6->ip6q_prev->ip6q_nfrag); 879 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 880 frag6_freef(head->ip6q_prev, bucket); 881 } 882 IP6QB_UNLOCK(bucket); 883 bucket = (bucket + 1) % IP6REASS_NHASH; 884 } 885 CURVNET_RESTORE(); 886 } 887 VNET_LIST_RUNLOCK_NOSLEEP(); 888 } 889 890 /* 891 * Eventhandler to adjust limits in case nmbclusters change. 892 */ 893 static void 894 frag6_change(void *tag) 895 { 896 VNET_ITERATOR_DECL(vnet_iter); 897 898 ip6_maxfrags = IP6_MAXFRAGS; 899 VNET_LIST_RLOCK_NOSLEEP(); 900 VNET_FOREACH(vnet_iter) { 901 CURVNET_SET(vnet_iter); 902 V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; 903 frag6_set_bucketsize(); 904 CURVNET_RESTORE(); 905 } 906 VNET_LIST_RUNLOCK_NOSLEEP(); 907 } 908 909 /* 910 * Initialise reassembly queue and fragment identifier. 911 */ 912 void 913 frag6_init(void) 914 { 915 struct ip6q *q6; 916 uint32_t bucket; 917 918 V_ip6_maxfragpackets = IP6_MAXFRAGPACKETS; 919 frag6_set_bucketsize(); 920 for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) { 921 q6 = IP6QB_HEAD(bucket); 922 q6->ip6q_next = q6->ip6q_prev = q6; 923 mtx_init(&V_ip6qb[bucket].lock, "ip6qlock", NULL, MTX_DEF); 924 V_ip6qb[bucket].count = 0; 925 } 926 V_ip6qb_hashseed = arc4random(); 927 V_ip6_maxfragsperpacket = 64; 928 if (!IS_DEFAULT_VNET(curvnet)) 929 return; 930 931 ip6_maxfrags = IP6_MAXFRAGS; 932 EVENTHANDLER_REGISTER(nmbclusters_change, 933 frag6_change, NULL, EVENTHANDLER_PRI_ANY); 934 } 935 936 /* 937 * Drain off all datagram fragments. 938 */ 939 void 940 frag6_drain(void) 941 { 942 VNET_ITERATOR_DECL(vnet_iter); 943 struct ip6q *head; 944 uint32_t bucket; 945 946 VNET_LIST_RLOCK_NOSLEEP(); 947 VNET_FOREACH(vnet_iter) { 948 CURVNET_SET(vnet_iter); 949 for (bucket = 0; bucket < IP6REASS_NHASH; bucket++) { 950 if (IP6QB_TRYLOCK(bucket) == 0) 951 continue; 952 head = IP6QB_HEAD(bucket); 953 while (head->ip6q_next != head) { 954 IP6STAT_INC(ip6s_fragdropped); 955 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */ 956 frag6_freef(head->ip6q_next, bucket); 957 } 958 IP6QB_UNLOCK(bucket); 959 } 960 CURVNET_RESTORE(); 961 } 962 VNET_LIST_RUNLOCK_NOSLEEP(); 963 } 964 965 /* 966 * Put an ip fragment on a reassembly chain. 967 * Like insque, but pointers in middle of structure. 968 */ 969 static void 970 frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6, 971 uint32_t bucket __unused) 972 { 973 974 IP6QB_LOCK_ASSERT(bucket); 975 976 af6->ip6af_up = up6; 977 af6->ip6af_down = up6->ip6af_down; 978 up6->ip6af_down->ip6af_up = af6; 979 up6->ip6af_down = af6; 980 } 981 982 /* 983 * To frag6_enq as remque is to insque. 984 */ 985 static void 986 frag6_deq(struct ip6asfrag *af6, uint32_t bucket __unused) 987 { 988 989 IP6QB_LOCK_ASSERT(bucket); 990 991 af6->ip6af_up->ip6af_down = af6->ip6af_down; 992 af6->ip6af_down->ip6af_up = af6->ip6af_up; 993 } 994 995 static void 996 frag6_insque_head(struct ip6q *new, struct ip6q *old, uint32_t bucket) 997 { 998 999 IP6QB_LOCK_ASSERT(bucket); 1000 KASSERT(IP6QB_HEAD(bucket) == old, 1001 ("%s: attempt to insert at head of wrong bucket" 1002 " (bucket=%u, old=%p)", __func__, bucket, old)); 1003 1004 new->ip6q_prev = old; 1005 new->ip6q_next = old->ip6q_next; 1006 old->ip6q_next->ip6q_prev= new; 1007 old->ip6q_next = new; 1008 V_ip6qb[bucket].count++; 1009 } 1010 1011 static void 1012 frag6_remque(struct ip6q *p6, uint32_t bucket) 1013 { 1014 1015 IP6QB_LOCK_ASSERT(bucket); 1016 1017 p6->ip6q_prev->ip6q_next = p6->ip6q_next; 1018 p6->ip6q_next->ip6q_prev = p6->ip6q_prev; 1019 V_ip6qb[bucket].count--; 1020 } 1021