1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IEEE 802.3ad Link Aggregation - Send code. 30 * 31 * Implements the Distributor function. 32 */ 33 34 #include <sys/conf.h> 35 #include <sys/modctl.h> 36 #include <sys/sunddi.h> 37 #include <sys/vlan.h> 38 #include <sys/strsun.h> 39 #include <sys/strsubr.h> 40 41 #include <inet/common.h> 42 #include <inet/led.h> 43 #include <inet/ip.h> 44 #include <inet/ip6.h> 45 #include <inet/tcp.h> 46 #include <netinet/udp.h> 47 #include <inet/ipsec_impl.h> 48 #include <inet/sadb.h> 49 #include <inet/ipsecesp.h> 50 #include <inet/ipsecah.h> 51 52 #include <sys/aggr.h> 53 #include <sys/aggr_impl.h> 54 55 #define HASH32(x) (((x) >> 24) ^ ((x) >> 16) ^ ((x) >> 8) ^ (x)) 56 #define HASH_MAC(x) (x[0] ^ x[1] ^ x[2] ^ x[3] ^ x[4] ^ x[5]) 57 58 static uint16_t aggr_send_ip6_hdr_len(mblk_t *, ip6_t *); 59 60 static uint_t 61 aggr_send_port(aggr_grp_t *grp, mblk_t *mp) 62 { 63 struct ether_header *ehp; 64 uint16_t sap; 65 uint_t skip_len; 66 uint8_t proto; 67 uint32_t policy = grp->lg_tx_policy; 68 uint32_t hash = 0; 69 70 ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); 71 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 72 73 /* compute MAC hash */ 74 75 ehp = (struct ether_header *)mp->b_rptr; 76 77 if (policy & AGGR_POLICY_L2) { 78 uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; 79 uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; 80 hash = HASH_MAC(mac_src) ^ HASH_MAC(mac_dst); 81 policy &= ~AGGR_POLICY_L2; 82 } 83 84 if (policy == 0) 85 goto done; 86 87 /* skip ethernet header */ 88 89 if (ntohs(ehp->ether_type) == ETHERTYPE_VLAN) { 90 struct ether_vlan_header *evhp; 91 92 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 93 evhp = (struct ether_vlan_header *)mp->b_rptr; 94 sap = ntohs(evhp->ether_type); 95 skip_len = sizeof (struct ether_vlan_header); 96 } else { 97 sap = ntohs(ehp->ether_type); 98 skip_len = sizeof (struct ether_header); 99 } 100 101 /* if ethernet header is in its own mblk, skip it */ 102 if (MBLKL(mp) <= skip_len) { 103 skip_len -= MBLKL(mp); 104 mp = mp->b_cont; 105 } 106 107 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 108 109 /* compute IP src/dst addresses hash and skip IPv{4,6} header */ 110 111 switch (sap) { 112 case ETHERTYPE_IP: { 113 ipha_t *iphp; 114 115 ASSERT(MBLKL(mp) >= skip_len + sizeof (ipha_t)); 116 iphp = (ipha_t *)(mp->b_rptr + skip_len); 117 proto = iphp->ipha_protocol; 118 skip_len += IPH_HDR_LENGTH(iphp); 119 120 if (policy & AGGR_POLICY_L3) { 121 uint32_t ip_src = iphp->ipha_src; 122 uint32_t ip_dst = iphp->ipha_dst; 123 hash ^= (HASH32(htonl(ip_src)) ^ HASH32(htonl(ip_dst))); 124 policy &= ~AGGR_POLICY_L3; 125 } 126 break; 127 } 128 case ETHERTYPE_IPV6: { 129 ip6_t *ip6hp; 130 131 /* 132 * if ipv6 packet has options, the proto will not be one of the 133 * ones handled by the ULP processor below, and will return 0 134 * as the index 135 */ 136 ASSERT(MBLKL(mp) >= skip_len + sizeof (ip6_t)); 137 ip6hp = (ip6_t *)(mp->b_rptr + skip_len); 138 proto = ip6hp->ip6_nxt; 139 skip_len += aggr_send_ip6_hdr_len(mp, ip6hp); 140 141 if (policy & AGGR_POLICY_L3) { 142 uint32_t ip_src = ip6hp->ip6_src.s6_addr32[3]; 143 uint32_t ip_dst = ip6hp->ip6_dst.s6_addr32[3]; 144 hash ^= (HASH32(htonl(ip_src)) ^ HASH32(htonl(ip_dst))); 145 policy &= ~AGGR_POLICY_L3; 146 } 147 break; 148 } 149 default: 150 goto done; 151 } 152 153 if (!(policy & AGGR_POLICY_L4)) 154 goto done; 155 156 /* if ip header is in its own mblk, skip it */ 157 if (MBLKL(mp) <= skip_len) { 158 skip_len -= MBLKL(mp); 159 mp = mp->b_cont; 160 } 161 162 /* parse ULP header */ 163 again: 164 switch (proto) { 165 case IPPROTO_TCP: 166 case IPPROTO_UDP: 167 case IPPROTO_ESP: 168 case IPPROTO_SCTP: 169 /* 170 * These Internet Protocols are intentionally designed 171 * for hashing from the git-go. Port numbers are in the first 172 * word for transports, SPI is first for ESP. 173 */ 174 hash ^= HASH32(*(uint32_t *)(mp->b_rptr + skip_len)); 175 break; 176 177 case IPPROTO_AH: { 178 ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); 179 180 uint_t ah_length = AH_TOTAL_LEN(ah); 181 proto = ah->ah_nexthdr; 182 skip_len += ah_length; 183 184 /* if ip header is in its own mblk, skip it */ 185 if (MBLKL(mp) <= skip_len) { 186 skip_len -= MBLKL(mp); 187 mp = mp->b_cont; 188 } 189 190 goto again; 191 } 192 } 193 194 done: 195 return (hash % grp->lg_ntx_ports); 196 } 197 198 /* 199 * Update the TX load balancing policy of the specified group. 200 */ 201 void 202 aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) 203 { 204 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 205 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 206 207 grp->lg_tx_policy = policy; 208 } 209 210 /* 211 * Send function invoked by the MAC service module. 212 */ 213 mblk_t * 214 aggr_m_tx(void *arg, mblk_t *mp) 215 { 216 aggr_grp_t *grp = arg; 217 aggr_port_t *port; 218 mblk_t *nextp; 219 const mac_txinfo_t *mtp; 220 221 for (;;) { 222 rw_enter(&grp->lg_lock, RW_READER); 223 if (grp->lg_ntx_ports == 0) { 224 /* 225 * We could have returned from aggr_m_start() before 226 * the ports were actually attached. Drop the chain. 227 */ 228 rw_exit(&grp->lg_lock); 229 freemsgchain(mp); 230 return (NULL); 231 } 232 nextp = mp->b_next; 233 mp->b_next = NULL; 234 235 port = grp->lg_tx_ports[aggr_send_port(grp, mp)]; 236 ASSERT(port->lp_state == AGGR_PORT_STATE_ATTACHED); 237 238 rw_exit(&grp->lg_lock); 239 240 /* 241 * We store the transmit info pointer locally in case it 242 * changes between loading mt_fn and mt_arg. 243 */ 244 mtp = port->lp_txinfo; 245 if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 246 mp->b_next = nextp; 247 break; 248 } 249 250 if ((mp = nextp) == NULL) 251 break; 252 } 253 return (mp); 254 } 255 256 /* 257 * Enable sending on the specified port. 258 */ 259 void 260 aggr_send_port_enable(aggr_port_t *port) 261 { 262 aggr_grp_t *grp = port->lp_grp; 263 264 if (port->lp_tx_enabled || (port->lp_state != 265 AGGR_PORT_STATE_ATTACHED)) { 266 /* already enabled or port not yet attached */ 267 return; 268 } 269 270 /* 271 * Add to group's array of tx ports. 272 */ 273 if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) { 274 /* current array too small */ 275 aggr_port_t **new_ports; 276 uint_t new_size; 277 278 new_size = grp->lg_ntx_ports+1; 279 new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *), 280 KM_SLEEP); 281 282 if (grp->lg_tx_ports_size > 0) { 283 ASSERT(grp->lg_tx_ports != NULL); 284 bcopy(grp->lg_tx_ports, new_ports, 285 grp->lg_ntx_ports * sizeof (aggr_port_t *)); 286 kmem_free(grp->lg_tx_ports, 287 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 288 } 289 290 grp->lg_tx_ports = new_ports; 291 grp->lg_tx_ports_size = new_size; 292 } 293 294 grp->lg_tx_ports[grp->lg_ntx_ports++] = port; 295 port->lp_tx_idx = grp->lg_ntx_ports-1; 296 297 port->lp_tx_enabled = B_TRUE; 298 } 299 300 /* 301 * Disable sending from the specified port. 302 */ 303 void 304 aggr_send_port_disable(aggr_port_t *port) 305 { 306 uint_t idx, ntx; 307 aggr_grp_t *grp = port->lp_grp; 308 309 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 310 311 if (!port->lp_tx_enabled) { 312 /* not yet enabled */ 313 return; 314 } 315 316 idx = port->lp_tx_idx; 317 ntx = grp->lg_ntx_ports; 318 ASSERT(idx < ntx); 319 320 /* remove from array of attached ports */ 321 if (idx == (ntx - 1)) { 322 grp->lg_tx_ports[idx] = NULL; 323 } else { 324 /* not the last entry, replace with last one */ 325 aggr_port_t *victim; 326 327 victim = grp->lg_tx_ports[ntx - 1]; 328 grp->lg_tx_ports[ntx - 1] = NULL; 329 victim->lp_tx_idx = idx; 330 grp->lg_tx_ports[idx] = victim; 331 } 332 333 port->lp_tx_idx = 0; 334 grp->lg_ntx_ports--; 335 336 port->lp_tx_enabled = B_FALSE; 337 } 338 339 static uint16_t 340 aggr_send_ip6_hdr_len(mblk_t *mp, ip6_t *ip6h) 341 { 342 uint16_t length; 343 uint_t ehdrlen; 344 uint8_t *nexthdrp; 345 uint8_t *whereptr; 346 uint8_t *endptr; 347 ip6_dest_t *desthdr; 348 ip6_rthdr_t *rthdr; 349 ip6_frag_t *fraghdr; 350 351 length = IPV6_HDR_LEN; 352 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 353 endptr = mp->b_wptr; 354 355 nexthdrp = &ip6h->ip6_nxt; 356 while (whereptr < endptr) { 357 switch (*nexthdrp) { 358 case IPPROTO_HOPOPTS: 359 case IPPROTO_DSTOPTS: 360 /* Assumes the headers are identical for hbh and dst */ 361 desthdr = (ip6_dest_t *)whereptr; 362 ehdrlen = 8 * (desthdr->ip6d_len + 1); 363 nexthdrp = &desthdr->ip6d_nxt; 364 break; 365 case IPPROTO_ROUTING: 366 rthdr = (ip6_rthdr_t *)whereptr; 367 ehdrlen = 8 * (rthdr->ip6r_len + 1); 368 nexthdrp = &rthdr->ip6r_nxt; 369 break; 370 case IPPROTO_FRAGMENT: 371 fraghdr = (ip6_frag_t *)whereptr; 372 ehdrlen = sizeof (ip6_frag_t); 373 nexthdrp = &fraghdr->ip6f_nxt; 374 break; 375 case IPPROTO_NONE: 376 /* No next header means we're finished */ 377 default: 378 return (length); 379 } 380 length += ehdrlen; 381 whereptr += ehdrlen; 382 } 383 384 return (length); 385 } 386