17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5c0192a57Sericheng * Common Development and Distribution License (the "License"). 6c0192a57Sericheng * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 220dc2366fSVenugopal Iyer * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* 277c478bd9Sstevel@tonic-gate * IEEE 802.3ad Link Aggregation - Send code. 287c478bd9Sstevel@tonic-gate * 297c478bd9Sstevel@tonic-gate * Implements the Distributor function. 307c478bd9Sstevel@tonic-gate */ 317c478bd9Sstevel@tonic-gate 327c478bd9Sstevel@tonic-gate #include <sys/conf.h> 337c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 347c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 350dc2366fSVenugopal Iyer #include <sys/callb.h> 367c478bd9Sstevel@tonic-gate #include <sys/vlan.h> 377c478bd9Sstevel@tonic-gate #include <sys/strsun.h> 387c478bd9Sstevel@tonic-gate #include <sys/strsubr.h> 39ae6aa22aSVenugopal Iyer #include <sys/dlpi.h> 407c478bd9Sstevel@tonic-gate 417c478bd9Sstevel@tonic-gate #include <inet/common.h> 427c478bd9Sstevel@tonic-gate #include <inet/led.h> 437c478bd9Sstevel@tonic-gate #include <inet/ip.h> 447c478bd9Sstevel@tonic-gate #include <inet/ip6.h> 457c478bd9Sstevel@tonic-gate #include <inet/tcp.h> 467c478bd9Sstevel@tonic-gate #include <netinet/udp.h> 477c478bd9Sstevel@tonic-gate 487c478bd9Sstevel@tonic-gate #include <sys/aggr.h> 497c478bd9Sstevel@tonic-gate #include <sys/aggr_impl.h> 507c478bd9Sstevel@tonic-gate 517c478bd9Sstevel@tonic-gate /* 527c478bd9Sstevel@tonic-gate * Update the TX load balancing policy of the specified group. 537c478bd9Sstevel@tonic-gate */ 547c478bd9Sstevel@tonic-gate void 557c478bd9Sstevel@tonic-gate aggr_send_update_policy(aggr_grp_t *grp, uint32_t policy) 567c478bd9Sstevel@tonic-gate { 57ae6aa22aSVenugopal Iyer uint8_t mac_policy = 0; 58ae6aa22aSVenugopal Iyer 59da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 607c478bd9Sstevel@tonic-gate 61ae6aa22aSVenugopal Iyer if ((policy & AGGR_POLICY_L2) != 0) 62ae6aa22aSVenugopal Iyer mac_policy |= MAC_PKT_HASH_L2; 63ae6aa22aSVenugopal Iyer if ((policy & AGGR_POLICY_L3) != 0) 64ae6aa22aSVenugopal Iyer mac_policy |= MAC_PKT_HASH_L3; 65ae6aa22aSVenugopal Iyer if ((policy & AGGR_POLICY_L4) != 0) 66ae6aa22aSVenugopal Iyer mac_policy |= MAC_PKT_HASH_L4; 67ae6aa22aSVenugopal Iyer 687c478bd9Sstevel@tonic-gate grp->lg_tx_policy = policy; 69ae6aa22aSVenugopal Iyer grp->lg_mac_tx_policy = mac_policy; 707c478bd9Sstevel@tonic-gate } 717c478bd9Sstevel@tonic-gate 720dc2366fSVenugopal Iyer #define HASH_HINT(hint) \ 730dc2366fSVenugopal Iyer ((hint) ^ ((hint) >> 24) ^ ((hint) >> 16) ^ ((hint) >> 8)) 740dc2366fSVenugopal Iyer 757c478bd9Sstevel@tonic-gate /* 760dc2366fSVenugopal Iyer * Function invoked by mac layer to find a specific TX ring on a port 770dc2366fSVenugopal Iyer * to send data. 787c478bd9Sstevel@tonic-gate */ 797c478bd9Sstevel@tonic-gate mblk_t * 800dc2366fSVenugopal Iyer aggr_find_tx_ring(void *arg, mblk_t *mp, uintptr_t hint, mac_ring_handle_t *rh) 817c478bd9Sstevel@tonic-gate { 827c478bd9Sstevel@tonic-gate aggr_grp_t *grp = arg; 837c478bd9Sstevel@tonic-gate aggr_port_t *port; 84da14cebeSEric Cheng uint64_t hash; 857c478bd9Sstevel@tonic-gate 86da14cebeSEric Cheng rw_enter(&grp->lg_tx_lock, RW_READER); 877c478bd9Sstevel@tonic-gate if (grp->lg_ntx_ports == 0) { 887c478bd9Sstevel@tonic-gate /* 897c478bd9Sstevel@tonic-gate * We could have returned from aggr_m_start() before 907c478bd9Sstevel@tonic-gate * the ports were actually attached. Drop the chain. 917c478bd9Sstevel@tonic-gate */ 92da14cebeSEric Cheng rw_exit(&grp->lg_tx_lock); 937c478bd9Sstevel@tonic-gate freemsgchain(mp); 947c478bd9Sstevel@tonic-gate return (NULL); 957c478bd9Sstevel@tonic-gate } 960dc2366fSVenugopal Iyer hash = mac_pkt_hash(DL_ETHER, mp, grp->lg_mac_tx_policy, B_TRUE); 97da14cebeSEric Cheng port = grp->lg_tx_ports[hash % grp->lg_ntx_ports]; 987c478bd9Sstevel@tonic-gate 9998b1442aSmeem /* 1000dc2366fSVenugopal Iyer * Use hash as the hint so to direct traffic to 1010dc2366fSVenugopal Iyer * different TX rings. Note below bit operation 1020dc2366fSVenugopal Iyer * is needed in case hint is 0 to get the most 1030dc2366fSVenugopal Iyer * benefit from HASH_HINT() algorithm. 10498b1442aSmeem */ 1050dc2366fSVenugopal Iyer if (port->lp_tx_ring_cnt > 1) { 1060dc2366fSVenugopal Iyer if (hint == 0) { 107da14cebeSEric Cheng hash = (hash << 24 | hash << 16 | hash); 108da14cebeSEric Cheng hash = (hash << 32 | hash); 1090dc2366fSVenugopal Iyer } else { 1100dc2366fSVenugopal Iyer hash = hint; 1117c478bd9Sstevel@tonic-gate } 1120dc2366fSVenugopal Iyer hash = HASH_HINT(hash); 1130dc2366fSVenugopal Iyer *rh = port->lp_pseudo_tx_rings[hash % port->lp_tx_ring_cnt]; 1140dc2366fSVenugopal Iyer } else { 1150dc2366fSVenugopal Iyer *rh = port->lp_pseudo_tx_rings[0]; 116da14cebeSEric Cheng } 1170dc2366fSVenugopal Iyer rw_exit(&grp->lg_tx_lock); 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate return (mp); 1207c478bd9Sstevel@tonic-gate } 1217c478bd9Sstevel@tonic-gate 1227c478bd9Sstevel@tonic-gate /* 1230dc2366fSVenugopal Iyer * aggr_tx_notify_thread: 1240dc2366fSVenugopal Iyer * 1250dc2366fSVenugopal Iyer * aggr_tx_ring_update() callback function wakes up this thread when 1260dc2366fSVenugopal Iyer * it gets called. This thread will call mac_tx_ring_update() to 1270dc2366fSVenugopal Iyer * notify upper mac of flow control getting relieved. Note that 1280dc2366fSVenugopal Iyer * aggr_tx_ring_update() cannot call mac_tx_ring_update() directly 1290dc2366fSVenugopal Iyer * because aggr_tx_ring_update() is called from lower mac with 1300dc2366fSVenugopal Iyer * mi_rw_lock held. 1310dc2366fSVenugopal Iyer */ 1320dc2366fSVenugopal Iyer void 1330dc2366fSVenugopal Iyer aggr_tx_notify_thread(void *arg) 1340dc2366fSVenugopal Iyer { 1350dc2366fSVenugopal Iyer callb_cpr_t cprinfo; 1360dc2366fSVenugopal Iyer aggr_grp_t *grp = (aggr_grp_t *)arg; 1370dc2366fSVenugopal Iyer mac_ring_handle_t pseudo_mrh; 1380dc2366fSVenugopal Iyer 1390dc2366fSVenugopal Iyer CALLB_CPR_INIT(&cprinfo, &grp->lg_tx_flowctl_lock, callb_generic_cpr, 1400dc2366fSVenugopal Iyer "aggr_tx_notify_thread"); 1410dc2366fSVenugopal Iyer 1420dc2366fSVenugopal Iyer mutex_enter(&grp->lg_tx_flowctl_lock); 1430dc2366fSVenugopal Iyer while (!grp->lg_tx_notify_done) { 1440dc2366fSVenugopal Iyer if ((grp->lg_tx_blocked_cnt) == 0) { 1450dc2366fSVenugopal Iyer CALLB_CPR_SAFE_BEGIN(&cprinfo); 1460dc2366fSVenugopal Iyer cv_wait(&grp->lg_tx_flowctl_cv, 1470dc2366fSVenugopal Iyer &grp->lg_tx_flowctl_lock); 1480dc2366fSVenugopal Iyer CALLB_CPR_SAFE_END(&cprinfo, &grp->lg_tx_flowctl_lock); 1490dc2366fSVenugopal Iyer continue; 1500dc2366fSVenugopal Iyer } 1510dc2366fSVenugopal Iyer while (grp->lg_tx_blocked_cnt != 0) { 1520dc2366fSVenugopal Iyer grp->lg_tx_blocked_cnt--; 1530dc2366fSVenugopal Iyer pseudo_mrh = 1540dc2366fSVenugopal Iyer grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt]; 1550dc2366fSVenugopal Iyer mutex_exit(&grp->lg_tx_flowctl_lock); 1560dc2366fSVenugopal Iyer mac_tx_ring_update(grp->lg_mh, pseudo_mrh); 1570dc2366fSVenugopal Iyer mutex_enter(&grp->lg_tx_flowctl_lock); 1580dc2366fSVenugopal Iyer } 1590dc2366fSVenugopal Iyer } 1600dc2366fSVenugopal Iyer /* 1610dc2366fSVenugopal Iyer * The grp is being destroyed, exit the thread. 1620dc2366fSVenugopal Iyer */ 1630dc2366fSVenugopal Iyer grp->lg_tx_notify_thread = NULL; 1640dc2366fSVenugopal Iyer CALLB_CPR_EXIT(&cprinfo); 1650dc2366fSVenugopal Iyer thread_exit(); 1660dc2366fSVenugopal Iyer } 1670dc2366fSVenugopal Iyer 1680dc2366fSVenugopal Iyer /* 1690dc2366fSVenugopal Iyer * Callback function registered with lower mac to receive wakeups from 1700dc2366fSVenugopal Iyer * drivers when flow control is relieved (i.e. Tx descriptors are 1710dc2366fSVenugopal Iyer * available). 1720dc2366fSVenugopal Iyer */ 1730dc2366fSVenugopal Iyer void 1740dc2366fSVenugopal Iyer aggr_tx_ring_update(void *arg1, uintptr_t arg2) 1750dc2366fSVenugopal Iyer { 1760dc2366fSVenugopal Iyer aggr_port_t *port = (aggr_port_t *)arg1; 1770dc2366fSVenugopal Iyer mac_ring_handle_t mrh = (mac_ring_handle_t)arg2; 1780dc2366fSVenugopal Iyer mac_ring_handle_t pseudo_mrh; 1790dc2366fSVenugopal Iyer aggr_grp_t *grp = port->lp_grp; 1800dc2366fSVenugopal Iyer int i = 0; 1810dc2366fSVenugopal Iyer 1820dc2366fSVenugopal Iyer if (mrh == NULL) { 1830dc2366fSVenugopal Iyer /* 1840dc2366fSVenugopal Iyer * If the underlying NIC does not expose TX rings, 1850dc2366fSVenugopal Iyer * still as pseudo TX ring is presented to the 1860dc2366fSVenugopal Iyer * aggr mac. 1870dc2366fSVenugopal Iyer */ 1880dc2366fSVenugopal Iyer pseudo_mrh = port->lp_pseudo_tx_rings[0]; 1890dc2366fSVenugopal Iyer } else { 1900dc2366fSVenugopal Iyer for (i = 0; i < port->lp_tx_ring_cnt; i++) { 1910dc2366fSVenugopal Iyer if (port->lp_tx_rings[i] == mrh) 1920dc2366fSVenugopal Iyer break; 1930dc2366fSVenugopal Iyer } 1940dc2366fSVenugopal Iyer ASSERT(i < port->lp_tx_ring_cnt); 1950dc2366fSVenugopal Iyer pseudo_mrh = port->lp_pseudo_tx_rings[i]; 1960dc2366fSVenugopal Iyer } 1970dc2366fSVenugopal Iyer mutex_enter(&grp->lg_tx_flowctl_lock); 1980dc2366fSVenugopal Iyer /* 1990dc2366fSVenugopal Iyer * It could be possible that some (broken?) device driver 2000dc2366fSVenugopal Iyer * could send more than one wakeup on the same ring. In 2010dc2366fSVenugopal Iyer * such a case, multiple instances of the same pseudo TX 2020dc2366fSVenugopal Iyer * ring should not be saved in lg_tx_blocked_rings[] 2030dc2366fSVenugopal Iyer * array. So first check if woken up ring (pseudo_mrh) is 2040dc2366fSVenugopal Iyer * already in the lg_tx_blocked_rings[] array. 2050dc2366fSVenugopal Iyer */ 2060dc2366fSVenugopal Iyer for (i = 0; i < grp->lg_tx_blocked_cnt; i++) { 2070dc2366fSVenugopal Iyer if (grp->lg_tx_blocked_rings[i] == pseudo_mrh) { 2080dc2366fSVenugopal Iyer mutex_exit(&grp->lg_tx_flowctl_lock); 2090dc2366fSVenugopal Iyer return; 2100dc2366fSVenugopal Iyer } 2110dc2366fSVenugopal Iyer } 2120dc2366fSVenugopal Iyer /* A distinct mac_ring_handle. Save and increment count */ 2130dc2366fSVenugopal Iyer grp->lg_tx_blocked_rings[grp->lg_tx_blocked_cnt] = pseudo_mrh; 2140dc2366fSVenugopal Iyer grp->lg_tx_blocked_cnt++; 2150dc2366fSVenugopal Iyer cv_signal(&grp->lg_tx_flowctl_cv); 2160dc2366fSVenugopal Iyer mutex_exit(&grp->lg_tx_flowctl_lock); 2170dc2366fSVenugopal Iyer } 2180dc2366fSVenugopal Iyer 2190dc2366fSVenugopal Iyer /* 2200dc2366fSVenugopal Iyer * Send function invoked by the MAC service module. 2210dc2366fSVenugopal Iyer */ 2220dc2366fSVenugopal Iyer mblk_t * 2230dc2366fSVenugopal Iyer aggr_ring_tx(void *arg, mblk_t *mp) 2240dc2366fSVenugopal Iyer { 2250dc2366fSVenugopal Iyer aggr_pseudo_tx_ring_t *pseudo_ring = (aggr_pseudo_tx_ring_t *)arg; 2260dc2366fSVenugopal Iyer aggr_port_t *port = pseudo_ring->atr_port; 2270dc2366fSVenugopal Iyer 2280dc2366fSVenugopal Iyer return (mac_hwring_send_priv(port->lp_mch, pseudo_ring->atr_hw_rh, mp)); 2290dc2366fSVenugopal Iyer } 2300dc2366fSVenugopal Iyer 2310dc2366fSVenugopal Iyer /* 2327c478bd9Sstevel@tonic-gate * Enable sending on the specified port. 2337c478bd9Sstevel@tonic-gate */ 2347c478bd9Sstevel@tonic-gate void 2357c478bd9Sstevel@tonic-gate aggr_send_port_enable(aggr_port_t *port) 2367c478bd9Sstevel@tonic-gate { 2377c478bd9Sstevel@tonic-gate aggr_grp_t *grp = port->lp_grp; 2387c478bd9Sstevel@tonic-gate 239da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 240da14cebeSEric Cheng 2417c478bd9Sstevel@tonic-gate if (port->lp_tx_enabled || (port->lp_state != 2427c478bd9Sstevel@tonic-gate AGGR_PORT_STATE_ATTACHED)) { 2437c478bd9Sstevel@tonic-gate /* already enabled or port not yet attached */ 2447c478bd9Sstevel@tonic-gate return; 2457c478bd9Sstevel@tonic-gate } 2467c478bd9Sstevel@tonic-gate 2477c478bd9Sstevel@tonic-gate /* 2487c478bd9Sstevel@tonic-gate * Add to group's array of tx ports. 2497c478bd9Sstevel@tonic-gate */ 250da14cebeSEric Cheng rw_enter(&grp->lg_tx_lock, RW_WRITER); 2517c478bd9Sstevel@tonic-gate if (grp->lg_tx_ports_size < grp->lg_ntx_ports+1) { 2527c478bd9Sstevel@tonic-gate /* current array too small */ 2537c478bd9Sstevel@tonic-gate aggr_port_t **new_ports; 2547c478bd9Sstevel@tonic-gate uint_t new_size; 2557c478bd9Sstevel@tonic-gate 2567c478bd9Sstevel@tonic-gate new_size = grp->lg_ntx_ports+1; 2577c478bd9Sstevel@tonic-gate new_ports = kmem_zalloc(new_size * sizeof (aggr_port_t *), 2587c478bd9Sstevel@tonic-gate KM_SLEEP); 2597c478bd9Sstevel@tonic-gate 2607c478bd9Sstevel@tonic-gate if (grp->lg_tx_ports_size > 0) { 2617c478bd9Sstevel@tonic-gate ASSERT(grp->lg_tx_ports != NULL); 2627c478bd9Sstevel@tonic-gate bcopy(grp->lg_tx_ports, new_ports, 2637c478bd9Sstevel@tonic-gate grp->lg_ntx_ports * sizeof (aggr_port_t *)); 2647c478bd9Sstevel@tonic-gate kmem_free(grp->lg_tx_ports, 2657c478bd9Sstevel@tonic-gate grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 2667c478bd9Sstevel@tonic-gate } 2677c478bd9Sstevel@tonic-gate 2687c478bd9Sstevel@tonic-gate grp->lg_tx_ports = new_ports; 2697c478bd9Sstevel@tonic-gate grp->lg_tx_ports_size = new_size; 2707c478bd9Sstevel@tonic-gate } 2717c478bd9Sstevel@tonic-gate 2727c478bd9Sstevel@tonic-gate grp->lg_tx_ports[grp->lg_ntx_ports++] = port; 2737c478bd9Sstevel@tonic-gate port->lp_tx_idx = grp->lg_ntx_ports-1; 274da14cebeSEric Cheng rw_exit(&grp->lg_tx_lock); 2757c478bd9Sstevel@tonic-gate 2767c478bd9Sstevel@tonic-gate port->lp_tx_enabled = B_TRUE; 277*09b7f21aSRobert Mustacchi 278*09b7f21aSRobert Mustacchi aggr_grp_update_default(grp); 2797c478bd9Sstevel@tonic-gate } 2807c478bd9Sstevel@tonic-gate 2817c478bd9Sstevel@tonic-gate /* 2827c478bd9Sstevel@tonic-gate * Disable sending from the specified port. 2837c478bd9Sstevel@tonic-gate */ 2847c478bd9Sstevel@tonic-gate void 2857c478bd9Sstevel@tonic-gate aggr_send_port_disable(aggr_port_t *port) 2867c478bd9Sstevel@tonic-gate { 2877c478bd9Sstevel@tonic-gate uint_t idx, ntx; 2887c478bd9Sstevel@tonic-gate aggr_grp_t *grp = port->lp_grp; 2897c478bd9Sstevel@tonic-gate 290da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 291da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(port->lp_mh)); 2927c478bd9Sstevel@tonic-gate 2937c478bd9Sstevel@tonic-gate if (!port->lp_tx_enabled) { 2947c478bd9Sstevel@tonic-gate /* not yet enabled */ 2957c478bd9Sstevel@tonic-gate return; 2967c478bd9Sstevel@tonic-gate } 2977c478bd9Sstevel@tonic-gate 298da14cebeSEric Cheng rw_enter(&grp->lg_tx_lock, RW_WRITER); 2997c478bd9Sstevel@tonic-gate idx = port->lp_tx_idx; 3007c478bd9Sstevel@tonic-gate ntx = grp->lg_ntx_ports; 3017c478bd9Sstevel@tonic-gate ASSERT(idx < ntx); 3027c478bd9Sstevel@tonic-gate 3037c478bd9Sstevel@tonic-gate /* remove from array of attached ports */ 3047c478bd9Sstevel@tonic-gate if (idx == (ntx - 1)) { 3057c478bd9Sstevel@tonic-gate grp->lg_tx_ports[idx] = NULL; 3067c478bd9Sstevel@tonic-gate } else { 3077c478bd9Sstevel@tonic-gate /* not the last entry, replace with last one */ 3087c478bd9Sstevel@tonic-gate aggr_port_t *victim; 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate victim = grp->lg_tx_ports[ntx - 1]; 3117c478bd9Sstevel@tonic-gate grp->lg_tx_ports[ntx - 1] = NULL; 3127c478bd9Sstevel@tonic-gate victim->lp_tx_idx = idx; 3137c478bd9Sstevel@tonic-gate grp->lg_tx_ports[idx] = victim; 3147c478bd9Sstevel@tonic-gate } 3157c478bd9Sstevel@tonic-gate 3167c478bd9Sstevel@tonic-gate port->lp_tx_idx = 0; 3177c478bd9Sstevel@tonic-gate grp->lg_ntx_ports--; 318da14cebeSEric Cheng rw_exit(&grp->lg_tx_lock); 3197c478bd9Sstevel@tonic-gate 3207c478bd9Sstevel@tonic-gate port->lp_tx_enabled = B_FALSE; 321*09b7f21aSRobert Mustacchi 322*09b7f21aSRobert Mustacchi aggr_grp_update_default(grp); 3237c478bd9Sstevel@tonic-gate } 324