1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IEEE 802.3ad Link Aggregation - Send code. 30 * 31 * Implements the Distributor function. 32 */ 33 34 #include <sys/conf.h> 35 #include <sys/modctl.h> 36 #include <sys/sunddi.h> 37 #include <sys/vlan.h> 38 #include <sys/strsun.h> 39 #include <sys/strsubr.h> 40 41 #include <inet/common.h> 42 #include <inet/led.h> 43 #include <inet/ip.h> 44 #include <inet/ip6.h> 45 #include <inet/tcp.h> 46 #include <netinet/udp.h> 47 #include <inet/ipsec_impl.h> 48 #include <inet/sadb.h> 49 #include <inet/ipsecesp.h> 50 #include <inet/ipsecah.h> 51 52 #include <sys/aggr.h> 53 #include <sys/aggr_impl.h> 54 55 #define HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3]) 56 #define HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5]) 57 58 static uint16_t aggr_send_ip6_hdr_len(mblk_t *, ip6_t *); 59 60 static uint_t 61 aggr_send_port(aggr_grp_t *grp, mblk_t *mp) 62 { 63 struct ether_header *ehp; 64 uint16_t sap; 65 uint_t skip_len; 66 uint8_t proto; 67 uint32_t policy = grp->lg_tx_policy; 68 uint32_t hash = 0; 69 70 ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); 71 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 72 73 /* compute MAC hash */ 74 75 ehp = (struct ether_header *)mp->b_rptr; 76 77 if (policy & AGGR_POLICY_L2) { 78 uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; 79 uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; 80 hash = HASH_MAC(mac_src) ^ HASH_MAC(mac_dst); 81 policy &= ~AGGR_POLICY_L2; 82 } 83 84 if (policy == 0) 85 goto done; 86 87 /* skip ethernet header */ 88 89 if (ntohs(ehp->ether_type) == ETHERTYPE_VLAN) { 90 struct ether_vlan_header *evhp; 91 92 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 93 evhp = (struct ether_vlan_header *)mp->b_rptr; 94 sap = ntohs(evhp->ether_type); 95 skip_len = sizeof (struct ether_vlan_header); 96 } else { 97 sap = ntohs(ehp->ether_type); 98 skip_len = sizeof (struct ether_header); 99 } 100 101 /* if ethernet header is in its own mblk, skip it */ 102 if (MBLKL(mp) <= skip_len) { 103 skip_len -= MBLKL(mp); 104 mp = mp->b_cont; 105 } 106 107 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 108 109 /* compute IP src/dst addresses hash and skip IPv{4,6} header */ 110 111 switch (sap) { 112 case ETHERTYPE_IP: { 113 ipha_t *iphp; 114 115 ASSERT(MBLKL(mp) >= skip_len + sizeof (ipha_t)); 116 iphp = (ipha_t *)(mp->b_rptr + skip_len); 117 proto = iphp->ipha_protocol; 118 skip_len += IPH_HDR_LENGTH(iphp); 119 120 if (policy & AGGR_POLICY_L3) { 121 uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src); 122 uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst); 123 124 hash ^= (HASH_4BYTES(ip_src) ^ HASH_4BYTES(ip_dst)); 125 policy &= ~AGGR_POLICY_L3; 126 } 127 break; 128 } 129 case ETHERTYPE_IPV6: { 130 ip6_t *ip6hp; 131 132 /* 133 * if ipv6 packet has options, the proto will not be one of the 134 * ones handled by the ULP processor below, and will return 0 135 * as the index 136 */ 137 ASSERT(MBLKL(mp) >= skip_len + sizeof (ip6_t)); 138 ip6hp = (ip6_t *)(mp->b_rptr + skip_len); 139 proto = ip6hp->ip6_nxt; 140 skip_len += aggr_send_ip6_hdr_len(mp, ip6hp); 141 142 if (policy & AGGR_POLICY_L3) { 143 uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); 144 uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); 145 146 hash ^= (HASH_4BYTES(ip_src) ^ HASH_4BYTES(ip_dst)); 147 policy &= ~AGGR_POLICY_L3; 148 } 149 break; 150 } 151 default: 152 goto done; 153 } 154 155 if (!(policy & AGGR_POLICY_L4)) 156 goto done; 157 158 /* if ip header is in its own mblk, skip it */ 159 if (MBLKL(mp) <= skip_len) { 160 skip_len -= MBLKL(mp); 161 mp = mp->b_cont; 162 } 163 164 /* parse ULP header */ 165 again: 166 switch (proto) { 167 case IPPROTO_TCP: 168 case IPPROTO_UDP: 169 case IPPROTO_ESP: 170 case IPPROTO_SCTP: 171 /* 172 * These Internet Protocols are intentionally designed 173 * for hashing from the git-go. Port numbers are in the first 174 * word for transports, SPI is first for ESP. 175 */ 176 hash ^= HASH_4BYTES((mp->b_rptr + skip_len)); 177 break; 178 179 case IPPROTO_AH: { 180 ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); 181 182 uint_t ah_length = AH_TOTAL_LEN(ah); 183 proto = ah->ah_nexthdr; 184 skip_len += ah_length; 185 186 /* if ip header is in its own mblk, skip it */ 187 if (MBLKL(mp) <= skip_len) { 188 skip_len -= MBLKL(mp); 189 mp = mp->b_cont; 190 } 191 192 goto again; 193 } 194 } 195 196 done: 197 return (hash % grp->lg_ntx_ports); 198 } 199 200 /* 201 * Update the TX load balancing policy of the specified group. 202 */ 203 void 204 aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) 205 { 206 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 207 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 208 209 grp->lg_tx_policy = policy; 210 } 211 212 /* 213 * Send function invoked by the MAC service module. 214 */ 215 mblk_t * 216 aggr_m_tx(void *arg, mblk_t *mp) 217 { 218 aggr_grp_t *grp = arg; 219 aggr_port_t *port; 220 mblk_t *nextp; 221 const mac_txinfo_t *mtp; 222 223 for (;;) { 224 rw_enter(&grp->lg_lock, RW_READER); 225 if (grp->lg_ntx_ports == 0) { 226 /* 227 * We could have returned from aggr_m_start() before 228 * the ports were actually attached. Drop the chain. 229 */ 230 rw_exit(&grp->lg_lock); 231 freemsgchain(mp); 232 return (NULL); 233 } 234 nextp = mp->b_next; 235 mp->b_next = NULL; 236 237 port = grp->lg_tx_ports[aggr_send_port(grp, mp)]; 238 ASSERT(port->lp_state == AGGR_PORT_STATE_ATTACHED); 239 240 rw_exit(&grp->lg_lock); 241 242 /* 243 * We store the transmit info pointer locally in case it 244 * changes between loading mt_fn and mt_arg. 245 */ 246 mtp = port->lp_txinfo; 247 if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 248 mp->b_next = nextp; 249 break; 250 } 251 252 if ((mp = nextp) == NULL) 253 break; 254 } 255 return (mp); 256 } 257 258 /* 259 * Enable sending on the specified port. 260 */ 261 void 262 aggr_send_port_enable(aggr_port_t *port) 263 { 264 aggr_grp_t *grp = port->lp_grp; 265 266 if (port->lp_tx_enabled || (port->lp_state != 267 AGGR_PORT_STATE_ATTACHED)) { 268 /* already enabled or port not yet attached */ 269 return; 270 } 271 272 /* 273 * Add to group's array of tx ports. 274 */ 275 if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) { 276 /* current array too small */ 277 aggr_port_t **new_ports; 278 uint_t new_size; 279 280 new_size = grp->lg_ntx_ports+1; 281 new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *), 282 KM_SLEEP); 283 284 if (grp->lg_tx_ports_size > 0) { 285 ASSERT(grp->lg_tx_ports != NULL); 286 bcopy(grp->lg_tx_ports, new_ports, 287 grp->lg_ntx_ports * sizeof (aggr_port_t *)); 288 kmem_free(grp->lg_tx_ports, 289 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 290 } 291 292 grp->lg_tx_ports = new_ports; 293 grp->lg_tx_ports_size = new_size; 294 } 295 296 grp->lg_tx_ports[grp->lg_ntx_ports++] = port; 297 port->lp_tx_idx = grp->lg_ntx_ports-1; 298 299 port->lp_tx_enabled = B_TRUE; 300 } 301 302 /* 303 * Disable sending from the specified port. 304 */ 305 void 306 aggr_send_port_disable(aggr_port_t *port) 307 { 308 uint_t idx, ntx; 309 aggr_grp_t *grp = port->lp_grp; 310 311 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 312 313 if (!port->lp_tx_enabled) { 314 /* not yet enabled */ 315 return; 316 } 317 318 idx = port->lp_tx_idx; 319 ntx = grp->lg_ntx_ports; 320 ASSERT(idx < ntx); 321 322 /* remove from array of attached ports */ 323 if (idx == (ntx - 1)) { 324 grp->lg_tx_ports[idx] = NULL; 325 } else { 326 /* not the last entry, replace with last one */ 327 aggr_port_t *victim; 328 329 victim = grp->lg_tx_ports[ntx - 1]; 330 grp->lg_tx_ports[ntx - 1] = NULL; 331 victim->lp_tx_idx = idx; 332 grp->lg_tx_ports[idx] = victim; 333 } 334 335 port->lp_tx_idx = 0; 336 grp->lg_ntx_ports--; 337 338 port->lp_tx_enabled = B_FALSE; 339 } 340 341 static uint16_t 342 aggr_send_ip6_hdr_len(mblk_t *mp, ip6_t *ip6h) 343 { 344 uint16_t length; 345 uint_t ehdrlen; 346 uint8_t *nexthdrp; 347 uint8_t *whereptr; 348 uint8_t *endptr; 349 ip6_dest_t *desthdr; 350 ip6_rthdr_t *rthdr; 351 ip6_frag_t *fraghdr; 352 353 length = IPV6_HDR_LEN; 354 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 355 endptr = mp->b_wptr; 356 357 nexthdrp = &ip6h->ip6_nxt; 358 while (whereptr < endptr) { 359 switch (*nexthdrp) { 360 case IPPROTO_HOPOPTS: 361 case IPPROTO_DSTOPTS: 362 /* Assumes the headers are identical for hbh and dst */ 363 desthdr = (ip6_dest_t *)whereptr; 364 ehdrlen = 8 * (desthdr->ip6d_len + 1); 365 nexthdrp = &desthdr->ip6d_nxt; 366 break; 367 case IPPROTO_ROUTING: 368 rthdr = (ip6_rthdr_t *)whereptr; 369 ehdrlen = 8 * (rthdr->ip6r_len + 1); 370 nexthdrp = &rthdr->ip6r_nxt; 371 break; 372 case IPPROTO_FRAGMENT: 373 fraghdr = (ip6_frag_t *)whereptr; 374 ehdrlen = sizeof (ip6_frag_t); 375 nexthdrp = &fraghdr->ip6f_nxt; 376 break; 377 case IPPROTO_NONE: 378 /* No next header means we're finished */ 379 default: 380 return (length); 381 } 382 length += ehdrlen; 383 whereptr += ehdrlen; 384 } 385 386 return (length); 387 } 388