1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * IEEE 802.3ad Link Aggregation - Send code. 28 * 29 * Implements the Distributor function. 30 */ 31 32 #include <sys/conf.h> 33 #include <sys/modctl.h> 34 #include <sys/sunddi.h> 35 #include <sys/vlan.h> 36 #include <sys/strsun.h> 37 #include <sys/strsubr.h> 38 39 #include <inet/common.h> 40 #include <inet/led.h> 41 #include <inet/ip.h> 42 #include <inet/ip6.h> 43 #include <inet/tcp.h> 44 #include <netinet/udp.h> 45 #include <inet/ipsec_impl.h> 46 #include <inet/sadb.h> 47 #include <inet/ipsecesp.h> 48 #include <inet/ipsecah.h> 49 50 #include <sys/aggr.h> 51 #include <sys/aggr_impl.h> 52 53 #define HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3]) 54 #define HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5]) 55 56 static uint16_t aggr_send_ip6_hdr_len(mblk_t *, ip6_t *); 57 58 static uint_t 59 aggr_send_port(aggr_grp_t *grp, mblk_t *mp) 60 { 61 struct ether_header *ehp; 62 uint16_t sap; 63 uint_t skip_len; 64 uint8_t proto; 65 uint32_t policy = grp->lg_tx_policy; 66 uint32_t hash = 0; 67 68 ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); 69 ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 70 71 /* compute MAC hash */ 72 73 ehp = (struct ether_header *)mp->b_rptr; 74 75 if (policy & AGGR_POLICY_L2) { 76 uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; 77 uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; 78 hash = HASH_MAC(mac_src) ^ HASH_MAC(mac_dst); 79 policy &= ~AGGR_POLICY_L2; 80 } 81 82 if (policy == 0) 83 goto done; 84 85 /* skip ethernet header */ 86 87 if (ntohs(ehp->ether_type) == ETHERTYPE_VLAN) { 88 struct ether_vlan_header *evhp; 89 mblk_t *newmp = NULL; 90 91 skip_len = sizeof (struct ether_vlan_header); 92 if (MBLKL(mp) < skip_len) { 93 /* the vlan tag is the payload, pull up first */ 94 newmp = msgpullup(mp, -1); 95 if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) { 96 goto done; 97 } 98 evhp = (struct ether_vlan_header *)newmp->b_rptr; 99 } else { 100 evhp = (struct ether_vlan_header *)mp->b_rptr; 101 } 102 103 sap = ntohs(evhp->ether_type); 104 freemsg(newmp); 105 } else { 106 sap = ntohs(ehp->ether_type); 107 skip_len = sizeof (struct ether_header); 108 } 109 110 /* if ethernet header is in its own mblk, skip it */ 111 if (MBLKL(mp) <= skip_len) { 112 skip_len -= MBLKL(mp); 113 mp = mp->b_cont; 114 } 115 116 sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 117 118 /* compute IP src/dst addresses hash and skip IPv{4,6} header */ 119 120 switch (sap) { 121 case ETHERTYPE_IP: { 122 ipha_t *iphp; 123 124 if (MBLKL(mp) < (skip_len + sizeof (ipha_t))) 125 goto done; 126 127 iphp = (ipha_t *)(mp->b_rptr + skip_len); 128 proto = iphp->ipha_protocol; 129 skip_len += IPH_HDR_LENGTH(iphp); 130 131 if (policy & AGGR_POLICY_L3) { 132 uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src); 133 uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst); 134 135 hash ^= (HASH_4BYTES(ip_src) ^ HASH_4BYTES(ip_dst)); 136 policy &= ~AGGR_POLICY_L3; 137 } 138 break; 139 } 140 case ETHERTYPE_IPV6: { 141 ip6_t *ip6hp; 142 143 /* 144 * if ipv6 packet has options, the proto will not be one of the 145 * ones handled by the ULP processor below, and will return 0 146 * as the index 147 */ 148 if (MBLKL(mp) < (skip_len + sizeof (ip6_t))) 149 goto done; 150 151 ip6hp = (ip6_t *)(mp->b_rptr + skip_len); 152 proto = ip6hp->ip6_nxt; 153 skip_len += aggr_send_ip6_hdr_len(mp, ip6hp); 154 155 if (policy & AGGR_POLICY_L3) { 156 uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); 157 uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); 158 159 hash ^= (HASH_4BYTES(ip_src) ^ HASH_4BYTES(ip_dst)); 160 policy &= ~AGGR_POLICY_L3; 161 } 162 break; 163 } 164 default: 165 goto done; 166 } 167 168 if (!(policy & AGGR_POLICY_L4)) 169 goto done; 170 171 /* if ip header is in its own mblk, skip it */ 172 if (MBLKL(mp) <= skip_len) { 173 skip_len -= MBLKL(mp); 174 mp = mp->b_cont; 175 } 176 177 /* parse ULP header */ 178 again: 179 switch (proto) { 180 case IPPROTO_TCP: 181 case IPPROTO_UDP: 182 case IPPROTO_ESP: 183 case IPPROTO_SCTP: 184 /* 185 * These Internet Protocols are intentionally designed 186 * for hashing from the git-go. Port numbers are in the first 187 * word for transports, SPI is first for ESP. 188 */ 189 hash ^= HASH_4BYTES((mp->b_rptr + skip_len)); 190 break; 191 192 case IPPROTO_AH: { 193 ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); 194 195 uint_t ah_length = AH_TOTAL_LEN(ah); 196 proto = ah->ah_nexthdr; 197 skip_len += ah_length; 198 199 /* if ip header is in its own mblk, skip it */ 200 if (MBLKL(mp) <= skip_len) { 201 skip_len -= MBLKL(mp); 202 mp = mp->b_cont; 203 } 204 205 goto again; 206 } 207 } 208 209 done: 210 return (hash % grp->lg_ntx_ports); 211 } 212 213 /* 214 * Update the TX load balancing policy of the specified group. 215 */ 216 void 217 aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) 218 { 219 ASSERT(AGGR_LACP_LOCK_HELD(grp)); 220 ASSERT(RW_WRITE_HELD(&grp->lg_lock)); 221 222 grp->lg_tx_policy = policy; 223 } 224 225 /* 226 * Send function invoked by the MAC service module. 227 */ 228 mblk_t * 229 aggr_m_tx(void *arg, mblk_t *mp) 230 { 231 aggr_grp_t *grp = arg; 232 aggr_port_t *port; 233 mblk_t *nextp; 234 const mac_txinfo_t *mtp; 235 236 for (;;) { 237 rw_enter(&grp->lg_lock, RW_READER); 238 if (grp->lg_ntx_ports == 0) { 239 /* 240 * We could have returned from aggr_m_start() before 241 * the ports were actually attached. Drop the chain. 242 */ 243 rw_exit(&grp->lg_lock); 244 freemsgchain(mp); 245 return (NULL); 246 } 247 nextp = mp->b_next; 248 mp->b_next = NULL; 249 250 port = grp->lg_tx_ports[aggr_send_port(grp, mp)]; 251 ASSERT(port->lp_state == AGGR_PORT_STATE_ATTACHED); 252 253 rw_exit(&grp->lg_lock); 254 255 /* 256 * We store the transmit info pointer locally in case it 257 * changes between loading mt_fn and mt_arg. 258 */ 259 mtp = port->lp_txinfo; 260 if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 261 mp->b_next = nextp; 262 break; 263 } 264 265 if ((mp = nextp) == NULL) 266 break; 267 } 268 return (mp); 269 } 270 271 /* 272 * Enable sending on the specified port. 273 */ 274 void 275 aggr_send_port_enable(aggr_port_t *port) 276 { 277 aggr_grp_t *grp = port->lp_grp; 278 279 if (port->lp_tx_enabled || (port->lp_state != 280 AGGR_PORT_STATE_ATTACHED)) { 281 /* already enabled or port not yet attached */ 282 return; 283 } 284 285 /* 286 * Add to group's array of tx ports. 287 */ 288 if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) { 289 /* current array too small */ 290 aggr_port_t **new_ports; 291 uint_t new_size; 292 293 new_size = grp->lg_ntx_ports+1; 294 new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *), 295 KM_SLEEP); 296 297 if (grp->lg_tx_ports_size > 0) { 298 ASSERT(grp->lg_tx_ports != NULL); 299 bcopy(grp->lg_tx_ports, new_ports, 300 grp->lg_ntx_ports * sizeof (aggr_port_t *)); 301 kmem_free(grp->lg_tx_ports, 302 grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 303 } 304 305 grp->lg_tx_ports = new_ports; 306 grp->lg_tx_ports_size = new_size; 307 } 308 309 grp->lg_tx_ports[grp->lg_ntx_ports++] = port; 310 port->lp_tx_idx = grp->lg_ntx_ports-1; 311 312 port->lp_tx_enabled = B_TRUE; 313 } 314 315 /* 316 * Disable sending from the specified port. 317 */ 318 void 319 aggr_send_port_disable(aggr_port_t *port) 320 { 321 uint_t idx, ntx; 322 aggr_grp_t *grp = port->lp_grp; 323 324 ASSERT(RW_WRITE_HELD(&port->lp_lock)); 325 326 if (!port->lp_tx_enabled) { 327 /* not yet enabled */ 328 return; 329 } 330 331 idx = port->lp_tx_idx; 332 ntx = grp->lg_ntx_ports; 333 ASSERT(idx < ntx); 334 335 /* remove from array of attached ports */ 336 if (idx == (ntx - 1)) { 337 grp->lg_tx_ports[idx] = NULL; 338 } else { 339 /* not the last entry, replace with last one */ 340 aggr_port_t *victim; 341 342 victim = grp->lg_tx_ports[ntx - 1]; 343 grp->lg_tx_ports[ntx - 1] = NULL; 344 victim->lp_tx_idx = idx; 345 grp->lg_tx_ports[idx] = victim; 346 } 347 348 port->lp_tx_idx = 0; 349 grp->lg_ntx_ports--; 350 351 port->lp_tx_enabled = B_FALSE; 352 } 353 354 static uint16_t 355 aggr_send_ip6_hdr_len(mblk_t *mp, ip6_t *ip6h) 356 { 357 uint16_t length; 358 uint_t ehdrlen; 359 uint8_t *nexthdrp; 360 uint8_t *whereptr; 361 uint8_t *endptr; 362 ip6_dest_t *desthdr; 363 ip6_rthdr_t *rthdr; 364 ip6_frag_t *fraghdr; 365 366 length = IPV6_HDR_LEN; 367 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 368 endptr = mp->b_wptr; 369 370 nexthdrp = &ip6h->ip6_nxt; 371 while (whereptr < endptr) { 372 switch (*nexthdrp) { 373 case IPPROTO_HOPOPTS: 374 case IPPROTO_DSTOPTS: 375 /* Assumes the headers are identical for hbh and dst */ 376 desthdr = (ip6_dest_t *)whereptr; 377 ehdrlen = 8 * (desthdr->ip6d_len + 1); 378 nexthdrp = &desthdr->ip6d_nxt; 379 break; 380 case IPPROTO_ROUTING: 381 rthdr = (ip6_rthdr_t *)whereptr; 382 ehdrlen = 8 * (rthdr->ip6r_len + 1); 383 nexthdrp = &rthdr->ip6r_nxt; 384 break; 385 case IPPROTO_FRAGMENT: 386 fraghdr = (ip6_frag_t *)whereptr; 387 ehdrlen = sizeof (ip6_frag_t); 388 nexthdrp = &fraghdr->ip6f_nxt; 389 break; 390 case IPPROTO_NONE: 391 /* No next header means we're finished */ 392 default: 393 return (length); 394 } 395 length += ehdrlen; 396 whereptr += ehdrlen; 397 } 398 399 return (length); 400 } 401