1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * IEEE 802.3ad Link Aggregation - Send code. 30 * 31 * Implements the Distributor function. 32 */ 33 34 #include <sys/conf.h> 35 #include <sys/modctl.h> 36 #include <sys/sunddi.h> 37 #include <sys/vlan.h> 38 #include <sys/strsun.h> 39 #include <sys/strsubr.h> 40 41 #include <inet/common.h> 42 #include <inet/led.h> 43 #include <inet/ip.h> 44 #include <inet/ip6.h> 45 #include <inet/tcp.h> 46 #include <netinet/udp.h> 47 #include <inet/ipsecesp.h> 48 #include <inet/ipsecah.h> 49 50 #include <sys/aggr.h> 51 #include <sys/aggr_impl.h> 52 53 #define HASH32(x) (((x) >> 24) ^ ((x) >> 16) ^ ((x) >> 8) ^ (x)) 54 #define HASH_MAC(x) (x[0] ^ x[1] ^ x[2] ^ x[3] ^ x[4] ^ x[5]) 55 56 static uint16_t aggr_send_ip6_hdr_len(mblk_t *, ip6_t *); 57 58 static uint_t 59 aggr_send_port(aggr_grp_t *grp, mblk_t *mp) 60 { 61 struct ether_header *ehp; 62 uint16_t sap; 63 uint_t skip_len; 64 uint8_t proto; 65 uint32_t policy = grp->lg_tx_policy; 66 uint32_t hash = 0; 67 68 ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); 69 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 70 71 /* compute MAC hash */ 72 73 ehp = (struct ether_header *)mp->b_rptr; 74 75 if (policy & AGGR_POLICY_L2) { 76 uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; 77 uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; 78 hash = HASH_MAC(mac_src) ^ HASH_MAC(mac_dst); 79 policy &= ~AGGR_POLICY_L2; 80 } 81 82 if (policy == 0) 83 goto done; 84 85 /* skip ethernet header */ 86 87 if (ntohs(ehp->ether_type) == ETHERTYPE_VLAN) { 88 struct ether_vlan_header *evhp; 89 90 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 91 evhp = (struct ether_vlan_header *)mp->b_rptr; 92 sap = ntohs(evhp->ether_type); 93 skip_len = sizeof (struct ether_vlan_header); 94 } else { 95 sap = ntohs(ehp->ether_type); 96 skip_len = sizeof (struct ether_header); 97 } 98 99 /* if ethernet header is in its own mblk, skip it */ 100 if (MBLKL(mp) <= skip_len) { 101 skip_len -= MBLKL(mp); 102 mp = mp->b_cont; 103 } 104 105 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 106 107 /* compute IP src/dst addresses hash and skip IPv{4,6} header */ 108 109 switch (sap) { 110 case ETHERTYPE_IP: { 111 ipha_t *iphp; 112 113 ASSERT(MBLKL(mp) >= skip_len + sizeof (ipha_t)); 114 iphp = (ipha_t *)(mp->b_rptr + skip_len); 115 proto = iphp->ipha_protocol; 116 skip_len += IPH_HDR_LENGTH(iphp); 117 118 if (policy & AGGR_POLICY_L3) { 119 uint32_t ip_src = iphp->ipha_src; 120 uint32_t ip_dst = iphp->ipha_dst; 121 hash ^= (HASH32(htonl(ip_src)) ^ HASH32(htonl(ip_dst))); 122 policy &= ~AGGR_POLICY_L3; 123 } 124 break; 125 } 126 case ETHERTYPE_IPV6: { 127 ip6_t *ip6hp; 128 129 /* 130 * if ipv6 packet has options, the proto will not be one of the 131 * ones handled by the ULP processor below, and will return 0 132 * as the index 133 */ 134 ASSERT(MBLKL(mp) >= skip_len + sizeof (ip6_t)); 135 ip6hp = (ip6_t *)(mp->b_rptr + skip_len); 136 proto = ip6hp->ip6_nxt; 137 skip_len += aggr_send_ip6_hdr_len(mp, ip6hp); 138 139 if (policy & AGGR_POLICY_L3) { 140 uint32_t ip_src = ip6hp->ip6_src.s6_addr32[3]; 141 uint32_t ip_dst = ip6hp->ip6_dst.s6_addr32[3]; 142 hash ^= (HASH32(htonl(ip_src)) ^ HASH32(htonl(ip_dst))); 143 policy &= ~AGGR_POLICY_L3; 144 } 145 break; 146 } 147 default: 148 goto done; 149 } 150 151 if (!(policy & AGGR_POLICY_L4)) 152 goto done; 153 154 /* if ip header is in its own mblk, skip it */ 155 if (MBLKL(mp) <= skip_len) { 156 skip_len -= MBLKL(mp); 157 mp = mp->b_cont; 158 } 159 160 /* parse ULP header */ 161 again: 162 switch (proto) { 163 case IPPROTO_TCP: 164 case IPPROTO_UDP: 165 case IPPROTO_ESP: 166 case IPPROTO_SCTP: 167 /* 168 * These Internet Protocols are intentionally designed 169 * for hashing from the git-go. Port numbers are in the first 170 * word for transports, SPI is first for ESP. 171 */ 172 hash ^= HASH32(*(uint32_t *)(mp->b_rptr + skip_len)); 173 break; 174 175 case IPPROTO_AH: { 176 ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); 177 178 uint_t ah_length = AH_TOTAL_LEN(ah); 179 proto = ah->ah_nexthdr; 180 skip_len += ah_length; 181 182 /* if ip header is in its own mblk, skip it */ 183 if (MBLKL(mp) <= skip_len) { 184 skip_len -= MBLKL(mp); 185 mp = mp->b_cont; 186 } 187 188 goto again; 189 } 190 } 191 192 done: 193 return (hash % grp->lg_ntx_ports); 194 } 195 196 /* 197 * Update the TX load balancing policy of the specified group. 198 */ 199 void 200 aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) 201 { 202 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 203 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 204 205 grp->lg_tx_policy = policy; 206 } 207 208 /* 209 * Send function invoked by the MAC service module. 210 */ 211 mblk_t * 212 aggr_m_tx(void *arg, mblk_t *mp) 213 { 214 aggr_grp_t *grp = arg; 215 aggr_port_t *port; 216 mblk_t *nextp; 217 const mac_txinfo_t *mtp; 218 219 for (;;) { 220 rw_enter(&grp->lg_lock, RW_READER); 221 if (grp->lg_ntx_ports == 0) { 222 /* 223 * We could have returned from aggr_m_start() before 224 * the ports were actually attached. Drop the chain. 225 */ 226 rw_exit(&grp->lg_lock); 227 freemsgchain(mp); 228 return (NULL); 229 } 230 nextp = mp->b_next; 231 mp->b_next = NULL; 232 233 port = grp->lg_tx_ports[aggr_send_port(grp, mp)]; 234 ASSERT(port->lp_state == AGGR_PORT_STATE_ATTACHED); 235 236 rw_exit(&grp->lg_lock); 237 238 /* 239 * We store the transmit info pointer locally in case it 240 * changes between loading mt_fn and mt_arg. 241 */ 242 mtp = port->lp_txinfo; 243 if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 244 mp->b_next = nextp; 245 break; 246 } 247 248 if ((mp = nextp) == NULL) 249 break; 250 } 251 return (mp); 252 } 253 254 /* 255 * Enable sending on the specified port. 256 */ 257 void 258 aggr_send_port_enable(aggr_port_t *port) 259 { 260 aggr_grp_t *grp = port->lp_grp; 261 262 if (port->lp_tx_enabled || (port->lp_state != 263 AGGR_PORT_STATE_ATTACHED)) { 264 /* already enabled or port not yet attached */ 265 return; 266 } 267 268 /* 269 * Add to group's array of tx ports. 270 */ 271 if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) { 272 /* current array too small */ 273 aggr_port_t **new_ports; 274 uint_t new_size; 275 276 new_size = grp->lg_ntx_ports+1; 277 new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *), 278 KM_SLEEP); 279 280 if (grp->lg_tx_ports_size > 0) { 281 ASSERT(grp->lg_tx_ports != NULL); 282 bcopy(grp->lg_tx_ports, new_ports, 283 grp->lg_ntx_ports * sizeof (aggr_port_t *)); 284 kmem_free(grp->lg_tx_ports, 285 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 286 } 287 288 grp->lg_tx_ports = new_ports; 289 grp->lg_tx_ports_size = new_size; 290 } 291 292 grp->lg_tx_ports[grp->lg_ntx_ports++] = port; 293 port->lp_tx_idx = grp->lg_ntx_ports-1; 294 295 port->lp_tx_enabled = B_TRUE; 296 } 297 298 /* 299 * Disable sending from the specified port. 300 */ 301 void 302 aggr_send_port_disable(aggr_port_t *port) 303 { 304 uint_t idx, ntx; 305 aggr_grp_t *grp = port->lp_grp; 306 307 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 308 309 if (!port->lp_tx_enabled) { 310 /* not yet enabled */ 311 return; 312 } 313 314 idx = port->lp_tx_idx; 315 ntx = grp->lg_ntx_ports; 316 ASSERT(idx < ntx); 317 318 /* remove from array of attached ports */ 319 if (idx == (ntx - 1)) { 320 grp->lg_tx_ports[idx] = NULL; 321 } else { 322 /* not the last entry, replace with last one */ 323 aggr_port_t *victim; 324 325 victim = grp->lg_tx_ports[ntx - 1]; 326 grp->lg_tx_ports[ntx - 1] = NULL; 327 victim->lp_tx_idx = idx; 328 grp->lg_tx_ports[idx] = victim; 329 } 330 331 port->lp_tx_idx = 0; 332 grp->lg_ntx_ports--; 333 334 port->lp_tx_enabled = B_FALSE; 335 } 336 337 static uint16_t 338 aggr_send_ip6_hdr_len(mblk_t *mp, ip6_t *ip6h) 339 { 340 uint16_t length; 341 uint_t ehdrlen; 342 uint8_t *nexthdrp; 343 uint8_t *whereptr; 344 uint8_t *endptr; 345 ip6_dest_t *desthdr; 346 ip6_rthdr_t *rthdr; 347 ip6_frag_t *fraghdr; 348 349 length = IPV6_HDR_LEN; 350 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 351 endptr = mp->b_wptr; 352 353 nexthdrp = &ip6h->ip6_nxt; 354 while (whereptr < endptr) { 355 switch (*nexthdrp) { 356 case IPPROTO_HOPOPTS: 357 case IPPROTO_DSTOPTS: 358 /* Assumes the headers are identical for hbh and dst */ 359 desthdr = (ip6_dest_t *)whereptr; 360 ehdrlen = 8 * (desthdr->ip6d_len + 1); 361 nexthdrp = &desthdr->ip6d_nxt; 362 break; 363 case IPPROTO_ROUTING: 364 rthdr = (ip6_rthdr_t *)whereptr; 365 ehdrlen = 8 * (rthdr->ip6r_len + 1); 366 nexthdrp = &rthdr->ip6r_nxt; 367 break; 368 case IPPROTO_FRAGMENT: 369 fraghdr = (ip6_frag_t *)whereptr; 370 ehdrlen = sizeof (ip6_frag_t); 371 nexthdrp = &fraghdr->ip6f_nxt; 372 break; 373 case IPPROTO_NONE: 374 /* No next header means we're finished */ 375 default: 376 return (length); 377 } 378 length += ehdrlen; 379 whereptr += ehdrlen; 380 } 381 382 return (length); 383 } 384