1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * IEEE 802.3ad Link Aggregation - Send code. 31 * 32 * Implements the Distributor function. 33 */ 34 35 #include <sys/conf.h> 36 #include <sys/modctl.h> 37 #include <sys/sunddi.h> 38 #include <sys/vlan.h> 39 #include <sys/strsun.h> 40 #include <sys/strsubr.h> 41 42 #include <inet/common.h> 43 #include <inet/led.h> 44 #include <inet/ip.h> 45 #include <inet/ip6.h> 46 #include <inet/tcp.h> 47 #include <netinet/udp.h> 48 #include <inet/ipsecesp.h> 49 #include <inet/ipsecah.h> 50 51 #include <sys/aggr.h> 52 #include <sys/aggr_impl.h> 53 54 #define HASH32(x) (((x) >> 24) ^ ((x) >> 16) ^ ((x) >> 8) ^ (x)) 55 #define HASH_MAC(x) (x[0] ^ x[1] ^ x[2] ^ x[3] ^ x[4] ^ x[5]) 56 57 static uint16_t aggr_send_ip6_hdr_len(mblk_t *, ip6_t *); 58 59 static uint_t 60 aggr_send_port(aggr_grp_t *grp, mblk_t *mp) 61 { 62 struct ether_header *ehp; 63 uint16_t sap; 64 uint_t skip_len; 65 uint8_t proto; 66 uint32_t policy = grp->lg_tx_policy; 67 uint32_t hash = 0; 68 69 ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); 70 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 71 72 /* compute MAC hash */ 73 74 ehp = (struct ether_header *)mp->b_rptr; 75 76 if (policy & AGGR_POLICY_L2) { 77 uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; 78 uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; 79 hash = HASH_MAC(mac_src) ^ HASH_MAC(mac_dst); 80 policy &= ~AGGR_POLICY_L2; 81 } 82 83 if (policy == 0) 84 goto done; 85 86 /* skip ethernet header */ 87 88 if (ntohs(ehp->ether_type) == VLAN_TPID) { 89 struct ether_vlan_header *evhp; 90 91 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 92 evhp = (struct ether_vlan_header *)mp->b_rptr; 93 sap = ntohs(evhp->ether_type); 94 skip_len = sizeof (struct ether_vlan_header); 95 } else { 96 sap = ntohs(ehp->ether_type); 97 skip_len = sizeof (struct ether_header); 98 } 99 100 /* if ethernet header is in its own mblk, skip it */ 101 if (MBLKL(mp) <= skip_len) { 102 skip_len -= MBLKL(mp); 103 mp = mp->b_cont; 104 } 105 106 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 107 108 /* compute IP src/dst addresses hash and skip IPv{4,6} header */ 109 110 switch (sap) { 111 case ETHERTYPE_IP: { 112 ipha_t *iphp; 113 114 ASSERT(MBLKL(mp) >= skip_len + sizeof (ipha_t)); 115 iphp = (ipha_t *)(mp->b_rptr + skip_len); 116 proto = iphp->ipha_protocol; 117 skip_len += IPH_HDR_LENGTH(iphp); 118 119 if (policy & AGGR_POLICY_L3) { 120 uint32_t ip_src = iphp->ipha_src; 121 uint32_t ip_dst = iphp->ipha_dst; 122 hash ^= (HASH32(htonl(ip_src)) ^ HASH32(htonl(ip_dst))); 123 policy &= ~AGGR_POLICY_L3; 124 } 125 break; 126 } 127 case ETHERTYPE_IPV6: { 128 ip6_t *ip6hp; 129 130 /* 131 * if ipv6 packet has options, the proto will not be one of the 132 * ones handled by the ULP processor below, and will return 0 133 * as the index 134 */ 135 ASSERT(MBLKL(mp) >= skip_len + sizeof (ip6_t)); 136 ip6hp = (ip6_t *)(mp->b_rptr + skip_len); 137 proto = ip6hp->ip6_nxt; 138 skip_len += aggr_send_ip6_hdr_len(mp, ip6hp); 139 140 if (policy & AGGR_POLICY_L3) { 141 uint32_t ip_src = ip6hp->ip6_src.s6_addr32[3]; 142 uint32_t ip_dst = ip6hp->ip6_dst.s6_addr32[3]; 143 hash ^= (HASH32(htonl(ip_src)) ^ HASH32(htonl(ip_dst))); 144 policy &= ~AGGR_POLICY_L3; 145 } 146 break; 147 } 148 default: 149 goto done; 150 } 151 152 if (!(policy & AGGR_POLICY_L4)) 153 goto done; 154 155 /* if ip header is in its own mblk, skip it */ 156 if (MBLKL(mp) <= skip_len) { 157 skip_len -= MBLKL(mp); 158 mp = mp->b_cont; 159 } 160 161 /* parse ULP header */ 162 again: 163 switch (proto) { 164 case IPPROTO_TCP: 165 case IPPROTO_UDP: 166 case IPPROTO_ESP: 167 case IPPROTO_SCTP: 168 /* 169 * These Internet Protocols are intentionally designed 170 * for hashing from the git-go. Port numbers are in the first 171 * word for transports, SPI is first for ESP. 172 */ 173 hash ^= HASH32(*(uint32_t *)(mp->b_rptr + skip_len)); 174 break; 175 176 case IPPROTO_AH: { 177 ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); 178 179 uint_t ah_length = AH_TOTAL_LEN(ah); 180 proto = ah->ah_nexthdr; 181 skip_len += ah_length; 182 183 /* if ip header is in its own mblk, skip it */ 184 if (MBLKL(mp) <= skip_len) { 185 skip_len -= MBLKL(mp); 186 mp = mp->b_cont; 187 } 188 189 goto again; 190 } 191 } 192 193 done: 194 return (hash % grp->lg_ntx_ports); 195 } 196 197 /* 198 * Update the TX load balancing policy of the specified group. 199 */ 200 void 201 aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) 202 { 203 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 204 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 205 206 grp->lg_tx_policy = policy; 207 } 208 209 /* 210 * Send function invoked by the MAC service module. 211 */ 212 mblk_t * 213 aggr_m_tx(void *arg, mblk_t *mp) 214 { 215 aggr_grp_t *grp = arg; 216 aggr_port_t *port; 217 mblk_t *nextp; 218 const mac_txinfo_t *mtp; 219 220 rw_enter(&grp->lg_lock, RW_READER); 221 222 if (grp->lg_ntx_ports == 0) { 223 /* 224 * We could have returned from aggr_m_start() before 225 * the ports were actually attached. Drop the chain. 226 */ 227 rw_exit(&grp->lg_lock); 228 229 freemsgchain(mp); 230 return (NULL); 231 } 232 233 for (;;) { 234 nextp = mp->b_next; 235 mp->b_next = NULL; 236 237 port = grp->lg_tx_ports[aggr_send_port(grp, mp)]; 238 ASSERT(port->lp_state == AGGR_PORT_STATE_ATTACHED); 239 240 rw_exit(&grp->lg_lock); 241 242 /* 243 * We store the transmit info pointer locally in case it 244 * changes between loading mt_fn and mt_arg. 245 */ 246 mtp = port->lp_txinfo; 247 if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 248 mp->b_next = nextp; 249 goto done; 250 } 251 252 if ((mp = nextp) == NULL) 253 goto done; 254 255 rw_enter(&grp->lg_lock, RW_READER); 256 } 257 258 done: 259 return (mp); 260 } 261 262 /* 263 * Enable sending on the specified port. 264 */ 265 void 266 aggr_send_port_enable(aggr_port_t *port) 267 { 268 aggr_grp_t *grp = port->lp_grp; 269 270 if (port->lp_tx_enabled || (port->lp_state != 271 AGGR_PORT_STATE_ATTACHED)) { 272 /* already enabled or port not yet attached */ 273 return; 274 } 275 276 /* 277 * Add to group's array of tx ports. 278 */ 279 if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) { 280 /* current array too small */ 281 aggr_port_t **new_ports; 282 uint_t new_size; 283 284 new_size = grp->lg_ntx_ports+1; 285 new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *), 286 KM_SLEEP); 287 288 if (grp->lg_tx_ports_size > 0) { 289 ASSERT(grp->lg_tx_ports != NULL); 290 bcopy(grp->lg_tx_ports, new_ports, 291 grp->lg_ntx_ports * sizeof (aggr_port_t *)); 292 kmem_free(grp->lg_tx_ports, 293 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 294 } 295 296 grp->lg_tx_ports = new_ports; 297 grp->lg_tx_ports_size = new_size; 298 } 299 300 grp->lg_tx_ports[grp->lg_ntx_ports++] = port; 301 port->lp_tx_idx = grp->lg_ntx_ports-1; 302 303 port->lp_tx_enabled = B_TRUE; 304 } 305 306 /* 307 * Disable sending from the specified port. 308 */ 309 void 310 aggr_send_port_disable(aggr_port_t *port) 311 { 312 uint_t idx, ntx; 313 aggr_grp_t *grp = port->lp_grp; 314 315 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 316 317 if (!port->lp_tx_enabled) { 318 /* not yet enabled */ 319 return; 320 } 321 322 idx = port->lp_tx_idx; 323 ntx = grp->lg_ntx_ports; 324 ASSERT(idx < ntx); 325 326 /* remove from array of attached ports */ 327 if (idx == (ntx - 1)) { 328 grp->lg_tx_ports[idx] = NULL; 329 } else { 330 /* not the last entry, replace with last one */ 331 aggr_port_t *victim; 332 333 victim = grp->lg_tx_ports[ntx - 1]; 334 grp->lg_tx_ports[ntx - 1] = NULL; 335 victim->lp_tx_idx = idx; 336 grp->lg_tx_ports[idx] = victim; 337 } 338 339 port->lp_tx_idx = 0; 340 grp->lg_ntx_ports--; 341 342 port->lp_tx_enabled = B_FALSE; 343 } 344 345 static uint16_t 346 aggr_send_ip6_hdr_len(mblk_t *mp, ip6_t *ip6h) 347 { 348 uint16_t length; 349 uint_t ehdrlen; 350 uint8_t *nexthdrp; 351 uint8_t *whereptr; 352 uint8_t *endptr; 353 ip6_dest_t *desthdr; 354 ip6_rthdr_t *rthdr; 355 ip6_frag_t *fraghdr; 356 357 length = IPV6_HDR_LEN; 358 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 359 endptr = mp->b_wptr; 360 361 nexthdrp = &ip6h->ip6_nxt; 362 while (whereptr < endptr) { 363 switch (*nexthdrp) { 364 case IPPROTO_HOPOPTS: 365 case IPPROTO_DSTOPTS: 366 /* Assumes the headers are identical for hbh and dst */ 367 desthdr = (ip6_dest_t *)whereptr; 368 ehdrlen = 8 * (desthdr->ip6d_len + 1); 369 nexthdrp = &desthdr->ip6d_nxt; 370 break; 371 case IPPROTO_ROUTING: 372 rthdr = (ip6_rthdr_t *)whereptr; 373 ehdrlen = 8 * (rthdr->ip6r_len + 1); 374 nexthdrp = &rthdr->ip6r_nxt; 375 break; 376 case IPPROTO_FRAGMENT: 377 fraghdr = (ip6_frag_t *)whereptr; 378 ehdrlen = sizeof (ip6_frag_t); 379 nexthdrp = &fraghdr->ip6f_nxt; 380 break; 381 case IPPROTO_NONE: 382 /* No next header means we're finished */ 383 default: 384 return (length); 385 } 386 length += ehdrlen; 387 whereptr += ehdrlen; 388 } 389 390 return (length); 391 } 392