1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _INET_IP_IMPL_H 27 #define _INET_IP_IMPL_H 28 29 /* 30 * IP implementation private declarations. These interfaces are 31 * used to build the IP module and are not meant to be accessed 32 * by any modules except IP itself. They are undocumented and are 33 * subject to change without notice. 34 */ 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #ifdef _KERNEL 41 42 #include <sys/sdt.h> 43 #include <sys/dld.h> 44 45 #define IP_MOD_ID 5701 46 47 #define INET_NAME "ip" 48 49 #ifdef _BIG_ENDIAN 50 #define IP_HDR_CSUM_TTL_ADJUST 256 51 #define IP_TCP_CSUM_COMP IPPROTO_TCP 52 #define IP_UDP_CSUM_COMP IPPROTO_UDP 53 #else 54 #define IP_HDR_CSUM_TTL_ADJUST 1 55 #define IP_TCP_CSUM_COMP (IPPROTO_TCP << 8) 56 #define IP_UDP_CSUM_COMP (IPPROTO_UDP << 8) 57 #endif 58 59 #define TCP_CHECKSUM_OFFSET 16 60 #define TCP_CHECKSUM_SIZE 2 61 62 #define UDP_CHECKSUM_OFFSET 6 63 #define UDP_CHECKSUM_SIZE 2 64 65 #define IPH_TCPH_CHECKSUMP(ipha, hlen) \ 66 ((uint16_t *)(((uchar_t *)(ipha)) + ((hlen) + TCP_CHECKSUM_OFFSET))) 67 68 #define IPH_UDPH_CHECKSUMP(ipha, hlen) \ 69 ((uint16_t *)(((uchar_t *)(ipha)) + ((hlen) + UDP_CHECKSUM_OFFSET))) 70 71 #define ILL_HCKSUM_CAPABLE(ill) \ 72 (((ill)->ill_capabilities & ILL_CAPAB_HCKSUM) != 0) 73 /* 74 * Macro that performs software checksum calculation on the IP header. 75 */ 76 #define IP_HDR_CKSUM(ipha, sum, v_hlen_tos_len, ttl_protocol) { \ 77 (sum) += (ttl_protocol) + (ipha)->ipha_ident + \ 78 ((v_hlen_tos_len) >> 16) + \ 79 ((v_hlen_tos_len) & 0xFFFF) + \ 80 (ipha)->ipha_fragment_offset_and_flags; \ 81 (sum) = (((sum) & 0xFFFF) + ((sum) >> 16)); \ 82 (sum) = ~((sum) + ((sum) >> 16)); \ 83 (ipha)->ipha_hdr_checksum = (uint16_t)(sum); \ 84 } 85 86 #define IS_IP_HDR_HWCKSUM(ipsec, mp, ill) \ 87 ((!ipsec) && (DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) && \ 88 ILL_HCKSUM_CAPABLE(ill) && dohwcksum) 89 90 /* 91 * This macro acts as a wrapper around IP_CKSUM_XMIT_FAST, and it performs 92 * several checks on the IRE and ILL (among other things) in order to see 93 * whether or not hardware checksum offload is allowed for the outgoing 94 * packet. It assumes that the caller has held a reference to the IRE. 95 */ 96 #define IP_CKSUM_XMIT(ill, ire, mp, ihp, up, proto, start, end, \ 97 max_frag, ipsec_len, pseudo) { \ 98 uint32_t _hck_flags; \ 99 /* \ 100 * We offload checksum calculation to hardware when IPsec isn't \ 101 * present and if fragmentation isn't required. We also check \ 102 * if M_DATA fastpath is safe to be used on the corresponding \ 103 * IRE; this check is performed without grabbing ire_lock but \ 104 * instead by holding a reference to it. This is sufficient \ 105 * for IRE_CACHE; for IRE_BROADCAST on non-Ethernet links, the \ 106 * DL_NOTE_FASTPATH_FLUSH indication could come up from the \ 107 * driver and trigger the IRE (hence fp_mp) deletion. This is \ 108 * why only IRE_CACHE type is eligible for offload. \ 109 * \ 110 * The presense of IP options also forces the network stack to \ 111 * calculate the checksum in software. This is because: \ 112 * \ 113 * Wrap around: certain partial-checksum NICs (eri, ce) limit \ 114 * the size of "start offset" width to 6-bit. This effectively \ 115 * sets the largest value of the offset to 64-bytes, starting \ 116 * from the MAC header. When the cumulative MAC and IP headers \ 117 * exceed such limit, the offset will wrap around. This causes \ 118 * the checksum to be calculated at the wrong place. \ 119 * \ 120 * IPv4 source routing: none of the full-checksum capable NICs \ 121 * is capable of correctly handling the IPv4 source-routing \ 122 * option for purposes of calculating the pseudo-header; the \ 123 * actual destination is different from the destination in the \ 124 * header which is that of the next-hop. (This case may not be \ 125 * true for NICs which can parse IPv6 extension headers, but \ 126 * we choose to simplify the implementation by not offloading \ 127 * checksum when they are present.) \ 128 * \ 129 */ \ 130 if ((ill) != NULL && ILL_HCKSUM_CAPABLE(ill) && \ 131 !((ire)->ire_flags & RTF_MULTIRT) && \ 132 (!((ire)->ire_type & IRE_BROADCAST) || \ 133 (ill)->ill_type == IFT_ETHER) && \ 134 (ipsec_len) == 0 && \ 135 (((ire)->ire_ipversion == IPV4_VERSION && \ 136 (start) == IP_SIMPLE_HDR_LENGTH && \ 137 ((ire)->ire_nce != NULL && \ 138 (ire)->ire_nce->nce_fp_mp != NULL && \ 139 MBLKHEAD(mp) >= MBLKL((ire)->ire_nce->nce_fp_mp))) || \ 140 ((ire)->ire_ipversion == IPV6_VERSION && \ 141 (start) == IPV6_HDR_LEN && \ 142 (ire)->ire_nce->nce_fp_mp != NULL && \ 143 MBLKHEAD(mp) >= MBLKL((ire)->ire_nce->nce_fp_mp))) && \ 144 (max_frag) >= (uint_t)((end) + (ipsec_len)) && \ 145 dohwcksum) { \ 146 _hck_flags = (ill)->ill_hcksum_capab->ill_hcksum_txflags; \ 147 } else { \ 148 _hck_flags = 0; \ 149 } \ 150 IP_CKSUM_XMIT_FAST((ire)->ire_ipversion, _hck_flags, mp, ihp, \ 151 up, proto, start, end, pseudo); \ 152 } 153 154 /* 155 * Based on the device capabilities, this macro either marks an outgoing 156 * packet with hardware checksum offload information or calculate the 157 * checksum in software. If the latter is performed, the checksum field 158 * of the dblk is cleared; otherwise it will be non-zero and contain the 159 * necessary flag(s) for the driver. 160 */ 161 #define IP_CKSUM_XMIT_FAST(ipver, hck_flags, mp, ihp, up, proto, start, \ 162 end, pseudo) { \ 163 uint32_t _sum; \ 164 /* \ 165 * Underlying interface supports hardware checksum offload for \ 166 * the payload; leave the payload checksum for the hardware to \ 167 * calculate. N.B: We only need to set up checksum info on the \ 168 * first mblk. \ 169 */ \ 170 DB_CKSUMFLAGS(mp) = 0; \ 171 if (((ipver) == IPV4_VERSION && \ 172 ((hck_flags) & HCKSUM_INET_FULL_V4)) || \ 173 ((ipver) == IPV6_VERSION && \ 174 ((hck_flags) & HCKSUM_INET_FULL_V6))) { \ 175 /* \ 176 * Hardware calculates pseudo-header, header and the \ 177 * payload checksums, so clear the checksum field in \ 178 * the protocol header. \ 179 */ \ 180 *(up) = 0; \ 181 DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM; \ 182 } else if ((hck_flags) & HCKSUM_INET_PARTIAL) { \ 183 /* \ 184 * Partial checksum offload has been enabled. Fill \ 185 * the checksum field in the protocl header with the \ 186 * pseudo-header checksum value. \ 187 */ \ 188 _sum = ((proto) == IPPROTO_UDP) ? \ 189 IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP; \ 190 _sum += *(up) + (pseudo); \ 191 _sum = (_sum & 0xFFFF) + (_sum >> 16); \ 192 *(up) = (_sum & 0xFFFF) + (_sum >> 16); \ 193 /* \ 194 * Offsets are relative to beginning of IP header. \ 195 */ \ 196 DB_CKSUMSTART(mp) = (start); \ 197 DB_CKSUMSTUFF(mp) = ((proto) == IPPROTO_UDP) ? \ 198 (start) + UDP_CHECKSUM_OFFSET : \ 199 (start) + TCP_CHECKSUM_OFFSET; \ 200 DB_CKSUMEND(mp) = (end); \ 201 DB_CKSUMFLAGS(mp) |= HCK_PARTIALCKSUM; \ 202 } else { \ 203 /* \ 204 * Software checksumming. \ 205 */ \ 206 _sum = ((proto) == IPPROTO_UDP) ? \ 207 IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP; \ 208 _sum += (pseudo); \ 209 _sum = IP_CSUM(mp, start, _sum); \ 210 *(up) = (uint16_t)(((proto) == IPPROTO_UDP) ? \ 211 (_sum ? _sum : ~_sum) : _sum); \ 212 } \ 213 /* \ 214 * Hardware supports IP header checksum offload; clear the \ 215 * contents of IP header checksum field as expected by NIC. \ 216 * Do this only if we offloaded either full or partial sum. \ 217 */ \ 218 if ((ipver) == IPV4_VERSION && DB_CKSUMFLAGS(mp) != 0 && \ 219 ((hck_flags) & HCKSUM_IPHDRCKSUM)) { \ 220 DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM; \ 221 ((ipha_t *)(ihp))->ipha_hdr_checksum = 0; \ 222 } \ 223 } 224 225 /* 226 * Macro to inspect the checksum of a fully-reassembled incoming datagram. 227 */ 228 #define IP_CKSUM_RECV_REASS(hck_flags, off, pseudo, sum, err) { \ 229 (err) = B_FALSE; \ 230 if ((hck_flags) & HCK_FULLCKSUM) { \ 231 /* \ 232 * The sum of all fragment checksums should \ 233 * result in -0 (0xFFFF) or otherwise invalid. \ 234 */ \ 235 if ((sum) != 0xFFFF) \ 236 (err) = B_TRUE; \ 237 } else if ((hck_flags) & HCK_PARTIALCKSUM) { \ 238 (sum) += (pseudo); \ 239 (sum) = ((sum) & 0xFFFF) + ((sum) >> 16); \ 240 (sum) = ((sum) & 0xFFFF) + ((sum) >> 16); \ 241 if (~(sum) & 0xFFFF) \ 242 (err) = B_TRUE; \ 243 } else if (((sum) = IP_CSUM(mp, off, pseudo)) != 0) { \ 244 (err) = B_TRUE; \ 245 } \ 246 } 247 248 /* 249 * This macro inspects an incoming packet to see if the checksum value 250 * contained in it is valid; if the hardware has provided the information, 251 * the value is verified, otherwise it performs software checksumming. 252 * The checksum value is returned to caller. 253 */ 254 #define IP_CKSUM_RECV(hck_flags, sum, cksum_start, ulph_off, mp, mp1, err) { \ 255 int32_t _len; \ 256 \ 257 (err) = B_FALSE; \ 258 if ((hck_flags) & HCK_FULLCKSUM) { \ 259 /* \ 260 * Full checksum has been computed by the hardware \ 261 * and has been attached. If the driver wants us to \ 262 * verify the correctness of the attached value, in \ 263 * order to protect against faulty hardware, compare \ 264 * it against -0 (0xFFFF) to see if it's valid. \ 265 */ \ 266 (sum) = DB_CKSUM16(mp); \ 267 if (!((hck_flags) & HCK_FULLCKSUM_OK) && (sum) != 0xFFFF) \ 268 (err) = B_TRUE; \ 269 } else if (((hck_flags) & HCK_PARTIALCKSUM) && \ 270 ((mp1) == NULL || (mp1)->b_cont == NULL) && \ 271 (ulph_off) >= DB_CKSUMSTART(mp) && \ 272 ((_len = (ulph_off) - DB_CKSUMSTART(mp)) & 1) == 0) { \ 273 uint32_t _adj; \ 274 /* \ 275 * Partial checksum has been calculated by hardware \ 276 * and attached to the packet; in addition, any \ 277 * prepended extraneous data is even byte aligned, \ 278 * and there are at most two mblks associated with \ 279 * the packet. If any such data exists, we adjust \ 280 * the checksum; also take care any postpended data. \ 281 */ \ 282 IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, _len, _adj); \ 283 /* \ 284 * One's complement subtract extraneous checksum \ 285 */ \ 286 (sum) += DB_CKSUM16(mp); \ 287 if (_adj >= (sum)) \ 288 (sum) = ~(_adj - (sum)) & 0xFFFF; \ 289 else \ 290 (sum) -= _adj; \ 291 (sum) = ((sum) & 0xFFFF) + ((int)(sum) >> 16); \ 292 (sum) = ((sum) & 0xFFFF) + ((int)(sum) >> 16); \ 293 if (~(sum) & 0xFFFF) \ 294 (err) = B_TRUE; \ 295 } else if (((sum) = IP_CSUM(mp, ulph_off, sum)) != 0) { \ 296 (err) = B_TRUE; \ 297 } \ 298 } 299 300 /* 301 * Macro to adjust a given checksum value depending on any prepended 302 * or postpended data on the packet. It expects the start offset to 303 * begin at an even boundary and that the packet consists of at most 304 * two mblks. 305 */ 306 #define IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj) { \ 307 /* \ 308 * Prepended extraneous data; adjust checksum. \ 309 */ \ 310 if ((len) > 0) \ 311 (adj) = IP_BCSUM_PARTIAL(cksum_start, len, 0); \ 312 else \ 313 (adj) = 0; \ 314 /* \ 315 * len is now the total length of mblk(s) \ 316 */ \ 317 (len) = MBLKL(mp); \ 318 if ((mp1) == NULL) \ 319 (mp1) = (mp); \ 320 else \ 321 (len) += MBLKL(mp1); \ 322 /* \ 323 * Postpended extraneous data; adjust checksum. \ 324 */ \ 325 if (((len) = (DB_CKSUMEND(mp) - len)) > 0) { \ 326 uint32_t _pad; \ 327 \ 328 _pad = IP_BCSUM_PARTIAL((mp1)->b_wptr, len, 0); \ 329 /* \ 330 * If the postpended extraneous data was odd \ 331 * byte aligned, swap resulting checksum bytes. \ 332 */ \ 333 if ((uintptr_t)(mp1)->b_wptr & 1) \ 334 (adj) += ((_pad << 8) & 0xFFFF) | (_pad >> 8); \ 335 else \ 336 (adj) += _pad; \ 337 (adj) = ((adj) & 0xFFFF) + ((int)(adj) >> 16); \ 338 } \ 339 } 340 341 #define ILL_MDT_CAPABLE(ill) \ 342 (((ill)->ill_capabilities & ILL_CAPAB_MDT) != 0) 343 344 /* 345 * ioctl identifier and structure for Multidata Transmit update 346 * private M_CTL communication from IP to ULP. 347 */ 348 #define MDT_IOC_INFO_UPDATE (('M' << 8) + 1020) 349 350 typedef struct ip_mdt_info_s { 351 uint_t mdt_info_id; /* MDT_IOC_INFO_UPDATE */ 352 ill_mdt_capab_t mdt_capab; /* ILL MDT capabilities */ 353 } ip_mdt_info_t; 354 355 /* 356 * Macro that determines whether or not a given ILL is allowed for MDT. 357 */ 358 #define ILL_MDT_USABLE(ill) \ 359 (ILL_MDT_CAPABLE(ill) && \ 360 ill->ill_mdt_capab != NULL && \ 361 ill->ill_mdt_capab->ill_mdt_version == MDT_VERSION_2 && \ 362 ill->ill_mdt_capab->ill_mdt_on != 0) 363 364 #define ILL_LSO_CAPABLE(ill) \ 365 (((ill)->ill_capabilities & ILL_CAPAB_DLD_LSO) != 0) 366 367 /* 368 * ioctl identifier and structure for Large Segment Offload 369 * private M_CTL communication from IP to ULP. 370 */ 371 #define LSO_IOC_INFO_UPDATE (('L' << 24) + ('S' << 16) + ('O' << 8)) 372 373 typedef struct ip_lso_info_s { 374 uint_t lso_info_id; /* LSO_IOC_INFO_UPDATE */ 375 ill_lso_capab_t lso_capab; /* ILL LSO capabilities */ 376 } ip_lso_info_t; 377 378 /* 379 * Macro that determines whether or not a given ILL is allowed for LSO. 380 */ 381 #define ILL_LSO_USABLE(ill) \ 382 (ILL_LSO_CAPABLE(ill) && \ 383 ill->ill_lso_capab != NULL && \ 384 ill->ill_lso_capab->ill_lso_on != 0) 385 386 #define ILL_LSO_TCP_USABLE(ill) \ 387 (ILL_LSO_USABLE(ill) && \ 388 ill->ill_lso_capab->ill_lso_flags & DLD_LSO_TX_BASIC_TCP_IPV4) 389 390 /* 391 * Macro that determines whether or not a given CONN may be considered 392 * for fast path prior to proceeding further with LSO or Multidata. 393 */ 394 #define CONN_IS_LSO_MD_FASTPATH(connp) \ 395 ((connp)->conn_dontroute == 0 && /* SO_DONTROUTE */ \ 396 !((connp)->conn_nexthop_set) && /* IP_NEXTHOP */ \ 397 (connp)->conn_outgoing_ill == NULL) /* IP{V6}_BOUND_IF */ 398 399 /* Definitions for fragmenting IP packets using MDT. */ 400 401 /* 402 * Smaller and private version of pdescinfo_t used specifically for IP, 403 * which allows for only a single payload span per packet. 404 */ 405 typedef struct ip_pdescinfo_s PDESCINFO_STRUCT(2) ip_pdescinfo_t; 406 407 /* 408 * Macro version of ip_can_frag_mdt() which avoids the function call if we 409 * only examine a single message block. 410 */ 411 #define IP_CAN_FRAG_MDT(mp, hdr_len, len) \ 412 (((mp)->b_cont == NULL) ? \ 413 (MBLKL(mp) >= ((hdr_len) + ip_wput_frag_mdt_min)) : \ 414 ip_can_frag_mdt((mp), (hdr_len), (len))) 415 416 /* 417 * Macro that determines whether or not a given IPC requires 418 * outbound IPSEC processing. 419 */ 420 #define CONN_IPSEC_OUT_ENCAPSULATED(connp) \ 421 ((connp)->conn_out_enforce_policy || \ 422 ((connp)->conn_latch != NULL && \ 423 (connp)->conn_latch->ipl_out_policy != NULL)) 424 425 /* 426 * These are used by the synchronous streams code in tcp and udp. 427 * When we set the flags for a wakeup from a synchronous stream we 428 * always set RSLEEP in sd_wakeq, even if we have a read thread waiting 429 * to do the io. This is in case the read thread gets interrupted 430 * before completing the io. The RSLEEP flag in sd_wakeq is used to 431 * indicate that there is data available at the synchronous barrier. 432 * The assumption is that subsequent functions calls through rwnext() 433 * will reset sd_wakeq appropriately. 434 */ 435 #define STR_WAKEUP_CLEAR(stp) { \ 436 mutex_enter(&stp->sd_lock); \ 437 stp->sd_wakeq &= ~RSLEEP; \ 438 mutex_exit(&stp->sd_lock); \ 439 } 440 441 #define STR_WAKEUP_SET(stp) { \ 442 mutex_enter(&stp->sd_lock); \ 443 if (stp->sd_flag & RSLEEP) { \ 444 stp->sd_flag &= ~RSLEEP; \ 445 cv_broadcast(&_RD(stp->sd_wrq)->q_wait); \ 446 } \ 447 stp->sd_wakeq |= RSLEEP; \ 448 mutex_exit(&stp->sd_lock); \ 449 } 450 451 /* 452 * Combined wakeup and sendsig to avoid dropping and reacquiring the 453 * sd_lock. The list of messages waiting at the synchronous barrier is 454 * supplied in order to determine whether a wakeup needs to occur. We 455 * only send a wakeup to the application when necessary, i.e. during 456 * the first enqueue when the received messages list will be NULL. 457 */ 458 #define STR_WAKEUP_SENDSIG(stp, rcv_list) { \ 459 int _events; \ 460 mutex_enter(&stp->sd_lock); \ 461 if (rcv_list == NULL) { \ 462 if (stp->sd_flag & RSLEEP) { \ 463 stp->sd_flag &= ~RSLEEP; \ 464 cv_broadcast(&_RD(stp->sd_wrq)->q_wait); \ 465 } \ 466 stp->sd_wakeq |= RSLEEP; \ 467 } \ 468 if ((_events = stp->sd_sigflags & (S_INPUT | S_RDNORM)) != 0) \ 469 strsendsig(stp->sd_siglist, _events, 0, 0); \ 470 if (stp->sd_rput_opt & SR_POLLIN) { \ 471 stp->sd_rput_opt &= ~SR_POLLIN; \ 472 mutex_exit(&stp->sd_lock); \ 473 pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM); \ 474 } else { \ 475 mutex_exit(&stp->sd_lock); \ 476 } \ 477 } 478 479 #define CONN_UDP_SYNCSTR(connp) \ 480 (IPCL_IS_UDP(connp) && (connp)->conn_udp->udp_direct_sockfs) 481 482 /* 483 * Macro that checks whether or not a particular UDP conn is 484 * flow-controlling on the read-side. If udp module is directly 485 * above ip, check to see if the drain queue is full; note here 486 * that we check this without any lock protection because this 487 * is a coarse granularity inbound flow-control. If the module 488 * above ip is not udp, then use canputnext to determine the 489 * flow-control. 490 * 491 * Note that these checks are done after the conn is found in 492 * the UDP fanout table. 493 * FIXME? Might be faster to check both udp_drain_qfull and canputnext. 494 */ 495 #define CONN_UDP_FLOWCTLD(connp) \ 496 (CONN_UDP_SYNCSTR(connp) ? \ 497 (connp)->conn_udp->udp_drain_qfull : \ 498 !canputnext((connp)->conn_rq)) 499 500 /* Macro that follows definitions of flags for mac_tx() (see mac_client.h) */ 501 #define IP_DROP_ON_NO_DESC 0x01 /* Equivalent to MAC_DROP_ON_NO_DESC */ 502 503 #define ILL_DIRECT_CAPABLE(ill) \ 504 (((ill)->ill_capabilities & ILL_CAPAB_DLD_DIRECT) != 0) 505 506 #define ILL_SEND_TX(ill, ire, hint, mp, flag, connp) { \ 507 if (ILL_DIRECT_CAPABLE(ill) && DB_TYPE(mp) == M_DATA) { \ 508 ill_dld_direct_t *idd; \ 509 uintptr_t cookie; \ 510 conn_t *udp_connp = (conn_t *)connp; \ 511 \ 512 idd = &(ill)->ill_dld_capab->idc_direct; \ 513 /* \ 514 * Send the packet directly to DLD, where it \ 515 * may be queued depending on the availability \ 516 * of transmit resources at the media layer. \ 517 * Ignore the returned value for the time being \ 518 * In future, we may want to take this into \ 519 * account and flow control the TCP. \ 520 */ \ 521 cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, \ 522 (uintptr_t)(hint), flag); \ 523 \ 524 /* \ 525 * non-NULL cookie indicates flow control situation \ 526 * and the cookie itself identifies this specific \ 527 * Tx ring that is blocked. This cookie is used to \ 528 * block the UDP conn that is sending packets over \ 529 * this specific Tx ring. \ 530 */ \ 531 if ((cookie != NULL) && (udp_connp != NULL) && \ 532 (udp_connp->conn_ulp == IPPROTO_UDP)) { \ 533 idl_tx_list_t *idl_txl; \ 534 ip_stack_t *ipst; \ 535 \ 536 /* \ 537 * Flow controlled. \ 538 */ \ 539 DTRACE_PROBE2(ill__send__tx__cookie, \ 540 uintptr_t, cookie, conn_t *, udp_connp); \ 541 ipst = udp_connp->conn_netstack->netstack_ip; \ 542 idl_txl = \ 543 &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)];\ 544 mutex_enter(&idl_txl->txl_lock); \ 545 if (udp_connp->conn_direct_blocked || \ 546 (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, \ 547 cookie) == 0)) { \ 548 DTRACE_PROBE1(ill__tx__not__blocked, \ 549 boolean, \ 550 udp_connp->conn_direct_blocked); \ 551 } else if (idl_txl->txl_cookie != NULL && \ 552 idl_txl->txl_cookie != cookie) { \ 553 udp_t *udp = udp_connp->conn_udp; \ 554 udp_stack_t *us = udp->udp_us; \ 555 \ 556 DTRACE_PROBE2(ill__send__tx__collision, \ 557 uintptr_t, cookie, \ 558 uintptr_t, idl_txl->txl_cookie); \ 559 UDP_STAT(us, udp_cookie_coll); \ 560 } else { \ 561 udp_connp->conn_direct_blocked = B_TRUE;\ 562 idl_txl->txl_cookie = cookie; \ 563 conn_drain_insert(udp_connp, idl_txl); \ 564 DTRACE_PROBE1(ill__send__tx__insert, \ 565 conn_t *, udp_connp); \ 566 } \ 567 mutex_exit(&idl_txl->txl_lock); \ 568 } \ 569 } else { \ 570 putnext((ire)->ire_stq, mp); \ 571 } \ 572 } 573 574 #define MBLK_RX_FANOUT_SLOWPATH(mp, ipha) \ 575 (DB_TYPE(mp) != M_DATA || DB_REF(mp) != 1 || !OK_32PTR(ipha) || \ 576 (((uchar_t *)ipha + IP_SIMPLE_HDR_LENGTH) >= (mp)->b_wptr)) 577 578 /* 579 * In non-global zone exclusive IP stacks, data structures such as IRE 580 * entries pretend that they're in the global zone. The following 581 * macro evaluates to the real zoneid instead of a pretend 582 * GLOBAL_ZONEID. 583 */ 584 #define IP_REAL_ZONEID(zoneid, ipst) \ 585 (((zoneid) == GLOBAL_ZONEID) ? \ 586 netstackid_to_zoneid((ipst)->ips_netstack->netstack_stackid) : \ 587 (zoneid)) 588 589 extern int ip_wput_frag_mdt_min; 590 extern boolean_t ip_can_frag_mdt(mblk_t *, ssize_t, ssize_t); 591 extern mblk_t *ip_prepend_zoneid(mblk_t *, zoneid_t, ip_stack_t *); 592 extern void ill_flow_enable(void *, ip_mac_tx_cookie_t); 593 extern zoneid_t ip_get_zoneid_v4(ipaddr_t, mblk_t *, ip_stack_t *, zoneid_t); 594 extern zoneid_t ip_get_zoneid_v6(in6_addr_t *, mblk_t *, const ill_t *, 595 ip_stack_t *, zoneid_t); 596 597 /* 598 * flag passed in by IP based protocols to get a private ip stream with 599 * no conn_t. Note this flag has the same value as SO_FALLBACK 600 */ 601 #define IP_HELPER_STR SO_FALLBACK 602 603 #define IP_MOD_MINPSZ 1 604 #define IP_MOD_MAXPSZ INFPSZ 605 #define IP_MOD_HIWAT 65536 606 #define IP_MOD_LOWAT 1024 607 608 #define DEV_IP "/devices/pseudo/ip@0:ip" 609 #define DEV_IP6 "/devices/pseudo/ip6@0:ip6" 610 611 extern struct kmem_cache *ip_helper_stream_cache; 612 613 #endif /* _KERNEL */ 614 615 #ifdef __cplusplus 616 } 617 #endif 618 619 #endif /* _INET_IP_IMPL_H */ 620