1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * IEEE 802.3ad Link Aggregation - Send code. 31 * 32 * Implements the Distributor function. 33 */ 34 35 #include <sys/conf.h> 36 #include <sys/modctl.h> 37 #include <sys/sunddi.h> 38 #include <sys/vlan.h> 39 #include <sys/strsun.h> 40 #include <sys/strsubr.h> 41 42 #include <inet/common.h> 43 #include <inet/led.h> 44 #include <inet/ip.h> 45 #include <inet/ip6.h> 46 #include <inet/tcp.h> 47 #include <netinet/udp.h> 48 #include <inet/ipsecesp.h> 49 #include <inet/ipsecah.h> 50 51 #include <sys/aggr.h> 52 #include <sys/aggr_impl.h> 53 54 #define HASH32(x) (((x) >> 24) ^ ((x) >> 16) ^ ((x) >> 8) ^ (x)) 55 #define HASH_MAC(x) (x[0] ^ x[1] ^ x[2] ^ x[3] ^ x[4] ^ x[5]) 56 57 static uint16_t aggr_send_ip6_hdr_len(mblk_t *, ip6_t *); 58 59 static uint_t 60 aggr_send_port(aggr_grp_t *grp, mblk_t *mp) 61 { 62 struct ether_header *ehp; 63 uint16_t sap; 64 uint_t skip_len; 65 uint8_t proto; 66 uint32_t policy = grp->lg_tx_policy; 67 uint32_t hash = 0; 68 69 ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); 70 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 71 72 /* compute MAC hash */ 73 74 ehp = (struct ether_header *)mp->b_rptr; 75 76 if (policy & AGGR_POLICY_L2) { 77 uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; 78 uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; 79 hash = HASH_MAC(mac_src) ^ HASH_MAC(mac_dst); 80 policy &= ~AGGR_POLICY_L2; 81 } 82 83 if (policy == 0) 84 goto done; 85 86 /* skip ethernet header */ 87 88 if (ntohs(ehp->ether_type) == VLAN_TPID) { 89 struct ether_vlan_header *evhp; 90 91 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 92 evhp = (struct ether_vlan_header *)mp->b_rptr; 93 sap = ntohs(evhp->ether_type); 94 skip_len = sizeof (struct ether_vlan_header); 95 } else { 96 sap = ntohs(ehp->ether_type); 97 skip_len = sizeof (struct ether_header); 98 } 99 100 /* if ethernet header is in its own mblk, skip it */ 101 if (MBLKL(mp) <= skip_len) { 102 skip_len -= MBLKL(mp); 103 mp = mp->b_cont; 104 } 105 106 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 107 108 /* compute IP src/dst addresses hash and skip IPv{4,6} header */ 109 110 switch (sap) { 111 case ETHERTYPE_IP: { 112 ipha_t *iphp; 113 114 ASSERT(MBLKL(mp) >= skip_len + sizeof (ipha_t)); 115 iphp = (ipha_t *)(mp->b_rptr + skip_len); 116 proto = iphp->ipha_protocol; 117 skip_len += IPH_HDR_LENGTH(iphp); 118 119 if (policy & AGGR_POLICY_L3) { 120 uint32_t ip_src = iphp->ipha_src; 121 uint32_t ip_dst = iphp->ipha_dst; 122 hash ^= (HASH32(htonl(ip_src)) ^ HASH32(htonl(ip_dst))); 123 policy &= ~AGGR_POLICY_L3; 124 } 125 break; 126 } 127 case ETHERTYPE_IPV6: { 128 ip6_t *ip6hp; 129 130 /* 131 * if ipv6 packet has options, the proto will not be one of the 132 * ones handled by the ULP processor below, and will return 0 133 * as the index 134 */ 135 ASSERT(MBLKL(mp) >= skip_len + sizeof (ip6_t)); 136 ip6hp = (ip6_t *)(mp->b_rptr + skip_len); 137 proto = ip6hp->ip6_nxt; 138 skip_len += aggr_send_ip6_hdr_len(mp, ip6hp); 139 140 if (policy & AGGR_POLICY_L3) { 141 uint32_t ip_src = ip6hp->ip6_src.s6_addr32[3]; 142 uint32_t ip_dst = ip6hp->ip6_dst.s6_addr32[3]; 143 hash ^= (HASH32(htonl(ip_src)) ^ HASH32(htonl(ip_dst))); 144 policy &= ~AGGR_POLICY_L3; 145 } 146 break; 147 } 148 default: 149 goto done; 150 } 151 152 if (!(policy & AGGR_POLICY_L4)) 153 goto done; 154 155 /* if ip header is in its own mblk, skip it */ 156 if (MBLKL(mp) <= skip_len) { 157 skip_len -= MBLKL(mp); 158 mp = mp->b_cont; 159 } 160 161 /* parse ULP header */ 162 again: 163 switch (proto) { 164 case IPPROTO_TCP: 165 case IPPROTO_UDP: 166 case IPPROTO_ESP: 167 case IPPROTO_SCTP: 168 /* 169 * These Internet Protocols are intentionally designed 170 * for hashing from the git-go. Port numbers are in the first 171 * word for transports, SPI is first for ESP. 172 */ 173 hash ^= HASH32(*(uint32_t *)(mp->b_rptr + skip_len)); 174 break; 175 176 case IPPROTO_AH: { 177 ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); 178 179 uint_t ah_length = AH_TOTAL_LEN(ah); 180 proto = ah->ah_nexthdr; 181 skip_len += ah_length; 182 183 /* if ip header is in its own mblk, skip it */ 184 if (MBLKL(mp) <= skip_len) { 185 skip_len -= MBLKL(mp); 186 mp = mp->b_cont; 187 } 188 189 goto again; 190 } 191 } 192 193 done: 194 return (hash % grp->lg_ntx_ports); 195 } 196 197 /* 198 * Update the TX load balancing policy of the specified group. 199 */ 200 void 201 aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) 202 { 203 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 204 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 205 206 grp->lg_tx_policy = policy; 207 } 208 209 /* 210 * Send function invoked by the MAC service module. 211 */ 212 mblk_t * 213 aggr_m_tx(void *arg, mblk_t *mp) 214 { 215 aggr_grp_t *grp = arg; 216 aggr_port_t *port; 217 mblk_t *nextp; 218 219 rw_enter(&grp->lg_lock, RW_READER); 220 221 if (grp->lg_ntx_ports == 0) { 222 /* 223 * We could have returned from aggr_m_start() before 224 * the ports were actually attached. Drop the chain. 225 */ 226 rw_exit(&grp->lg_lock); 227 228 freemsgchain(mp); 229 return (NULL); 230 } 231 232 for (;;) { 233 nextp = mp->b_next; 234 mp->b_next = NULL; 235 236 port = grp->lg_tx_ports[aggr_send_port(grp, mp)]; 237 ASSERT(port->lp_state == AGGR_PORT_STATE_ATTACHED); 238 239 rw_exit(&grp->lg_lock); 240 241 if ((mp = port->lp_tx(port->lp_tx_arg, mp)) != NULL) { 242 mp->b_next = nextp; 243 goto done; 244 } 245 246 if ((mp = nextp) == NULL) 247 goto done; 248 249 rw_enter(&grp->lg_lock, RW_READER); 250 } 251 252 done: 253 return (mp); 254 } 255 256 /* 257 * Enable sending on the specified port. 258 */ 259 void 260 aggr_send_port_enable(aggr_port_t *port) 261 { 262 aggr_grp_t *grp = port->lp_grp; 263 264 if (port->lp_tx_enabled || (port->lp_state != 265 AGGR_PORT_STATE_ATTACHED)) { 266 /* already enabled or port not yet attached */ 267 return; 268 } 269 270 /* 271 * Add to group's array of tx ports. 272 */ 273 if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) { 274 /* current array too small */ 275 aggr_port_t **new_ports; 276 uint_t new_size; 277 278 new_size = grp->lg_ntx_ports+1; 279 new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *), 280 KM_SLEEP); 281 282 if (grp->lg_tx_ports_size > 0) { 283 ASSERT(grp->lg_tx_ports != NULL); 284 bcopy(grp->lg_tx_ports, new_ports, 285 grp->lg_ntx_ports * sizeof (aggr_port_t *)); 286 kmem_free(grp->lg_tx_ports, 287 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 288 } 289 290 grp->lg_tx_ports = new_ports; 291 grp->lg_tx_ports_size = new_size; 292 } 293 294 grp->lg_tx_ports[grp->lg_ntx_ports++] = port; 295 port->lp_tx_idx = grp->lg_ntx_ports-1; 296 297 port->lp_tx_enabled = B_TRUE; 298 } 299 300 /* 301 * Disable sending from the specified port. 302 */ 303 void 304 aggr_send_port_disable(aggr_port_t *port) 305 { 306 uint_t idx, ntx; 307 aggr_grp_t *grp = port->lp_grp; 308 309 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 310 311 if (!port->lp_tx_enabled) { 312 /* not yet enabled */ 313 return; 314 } 315 316 idx = port->lp_tx_idx; 317 ntx = grp->lg_ntx_ports; 318 ASSERT(idx < ntx); 319 320 /* remove from array of attached ports */ 321 if (idx == (ntx - 1)) { 322 grp->lg_tx_ports[idx] = NULL; 323 } else { 324 /* not the last entry, replace with last one */ 325 aggr_port_t *victim; 326 327 victim = grp->lg_tx_ports[ntx - 1]; 328 grp->lg_tx_ports[ntx - 1] = NULL; 329 victim->lp_tx_idx = idx; 330 grp->lg_tx_ports[idx] = victim; 331 } 332 333 port->lp_tx_idx = 0; 334 grp->lg_ntx_ports--; 335 336 port->lp_tx_enabled = B_FALSE; 337 } 338 339 static uint16_t 340 aggr_send_ip6_hdr_len(mblk_t *mp, ip6_t *ip6h) 341 { 342 uint16_t length; 343 uint_t ehdrlen; 344 uint8_t *nexthdrp; 345 uint8_t *whereptr; 346 uint8_t *endptr; 347 ip6_dest_t *desthdr; 348 ip6_rthdr_t *rthdr; 349 ip6_frag_t *fraghdr; 350 351 length = IPV6_HDR_LEN; 352 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 353 endptr = mp->b_wptr; 354 355 nexthdrp = &ip6h->ip6_nxt; 356 while (whereptr < endptr) { 357 switch (*nexthdrp) { 358 case IPPROTO_HOPOPTS: 359 case IPPROTO_DSTOPTS: 360 /* Assumes the headers are identical for hbh and dst */ 361 desthdr = (ip6_dest_t *)whereptr; 362 ehdrlen = 8 * (desthdr->ip6d_len + 1); 363 nexthdrp = &desthdr->ip6d_nxt; 364 break; 365 case IPPROTO_ROUTING: 366 rthdr = (ip6_rthdr_t *)whereptr; 367 ehdrlen = 8 * (rthdr->ip6r_len + 1); 368 nexthdrp = &rthdr->ip6r_nxt; 369 break; 370 case IPPROTO_FRAGMENT: 371 fraghdr = (ip6_frag_t *)whereptr; 372 ehdrlen = sizeof (ip6_frag_t); 373 nexthdrp = &fraghdr->ip6f_nxt; 374 break; 375 case IPPROTO_NONE: 376 /* No next header means we're finished */ 377 default: 378 return (length); 379 } 380 length += ehdrlen; 381 whereptr += ehdrlen; 382 } 383 384 return (length); 385 } 386