1da14cebeSEric Cheng /* 2da14cebeSEric Cheng * CDDL HEADER START 3da14cebeSEric Cheng * 4da14cebeSEric Cheng * The contents of this file are subject to the terms of the 5da14cebeSEric Cheng * Common Development and Distribution License (the "License"). 6da14cebeSEric Cheng * You may not use this file except in compliance with the License. 7da14cebeSEric Cheng * 8da14cebeSEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9da14cebeSEric Cheng * or http://www.opensolaris.org/os/licensing. 10da14cebeSEric Cheng * See the License for the specific language governing permissions 11da14cebeSEric Cheng * and limitations under the License. 12da14cebeSEric Cheng * 13da14cebeSEric Cheng * When distributing Covered Code, include this CDDL HEADER in each 14da14cebeSEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15da14cebeSEric Cheng * If applicable, add the following below this CDDL HEADER, with the 16da14cebeSEric Cheng * fields enclosed by brackets "[]" replaced with your own identifying 17da14cebeSEric Cheng * information: Portions Copyright [yyyy] [name of copyright owner] 18da14cebeSEric Cheng * 19da14cebeSEric Cheng * CDDL HEADER END 20da14cebeSEric Cheng */ 21da14cebeSEric Cheng 22da14cebeSEric Cheng /* 239820c710SBaban Kenkre * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24da14cebeSEric Cheng * Use is subject to license terms. 25da14cebeSEric Cheng */ 26da14cebeSEric Cheng 27da14cebeSEric Cheng #include <sys/strsun.h> 28da14cebeSEric Cheng #include <sys/sdt.h> 29da14cebeSEric Cheng #include <sys/mac.h> 30da14cebeSEric Cheng #include <sys/mac_impl.h> 31da14cebeSEric Cheng #include <sys/mac_client_impl.h> 32*0dc2366fSVenugopal Iyer #include <sys/mac_stat.h> 33da14cebeSEric Cheng #include <sys/dls.h> 34da14cebeSEric Cheng #include <sys/dls_impl.h> 35da14cebeSEric Cheng #include <sys/mac_soft_ring.h> 36da14cebeSEric Cheng #include <sys/ethernet.h> 37*0dc2366fSVenugopal Iyer #include <sys/cpupart.h> 38*0dc2366fSVenugopal Iyer #include <sys/pool.h> 39*0dc2366fSVenugopal Iyer #include <sys/pool_pset.h> 40da14cebeSEric Cheng #include <sys/vlan.h> 41da14cebeSEric Cheng #include <inet/ip.h> 42da14cebeSEric Cheng #include <inet/ip6.h> 43da14cebeSEric Cheng #include <netinet/tcp.h> 44da14cebeSEric Cheng #include <netinet/udp.h> 45da14cebeSEric Cheng #include <netinet/sctp.h> 46da14cebeSEric Cheng 47*0dc2366fSVenugopal Iyer typedef struct flow_stats_s { 48*0dc2366fSVenugopal Iyer uint64_t fs_obytes; 49*0dc2366fSVenugopal Iyer uint64_t fs_opackets; 50*0dc2366fSVenugopal Iyer uint64_t fs_oerrors; 51*0dc2366fSVenugopal Iyer uint64_t fs_ibytes; 52*0dc2366fSVenugopal Iyer uint64_t fs_ipackets; 53*0dc2366fSVenugopal Iyer uint64_t fs_ierrors; 54*0dc2366fSVenugopal Iyer } flow_stats_t; 55*0dc2366fSVenugopal Iyer 56*0dc2366fSVenugopal Iyer 57da14cebeSEric Cheng /* global flow table, will be a per exclusive-zone table later */ 58da14cebeSEric Cheng static mod_hash_t *flow_hash; 59da14cebeSEric Cheng static krwlock_t flow_tab_lock; 60da14cebeSEric Cheng 61da14cebeSEric Cheng static kmem_cache_t *flow_cache; 62da14cebeSEric Cheng static kmem_cache_t *flow_tab_cache; 63da14cebeSEric Cheng static flow_ops_t flow_l2_ops; 64da14cebeSEric Cheng 65da14cebeSEric Cheng typedef struct { 66da14cebeSEric Cheng const char *fs_name; 67da14cebeSEric Cheng uint_t fs_offset; 68da14cebeSEric Cheng } flow_stats_info_t; 69da14cebeSEric Cheng 70da14cebeSEric Cheng #define FS_OFF(f) (offsetof(flow_stats_t, f)) 71da14cebeSEric Cheng static flow_stats_info_t flow_stats_list[] = { 72*0dc2366fSVenugopal Iyer {"rbytes", FS_OFF(fs_ibytes)}, 73da14cebeSEric Cheng {"ipackets", FS_OFF(fs_ipackets)}, 74da14cebeSEric Cheng {"ierrors", FS_OFF(fs_ierrors)}, 75da14cebeSEric Cheng {"obytes", FS_OFF(fs_obytes)}, 76da14cebeSEric Cheng {"opackets", FS_OFF(fs_opackets)}, 77da14cebeSEric Cheng {"oerrors", FS_OFF(fs_oerrors)} 78da14cebeSEric Cheng }; 79da14cebeSEric Cheng #define FS_SIZE (sizeof (flow_stats_list) / sizeof (flow_stats_info_t)) 80da14cebeSEric Cheng 81da14cebeSEric Cheng /* 82da14cebeSEric Cheng * Checks whether a flow mask is legal. 83da14cebeSEric Cheng */ 84da14cebeSEric Cheng static flow_tab_info_t *mac_flow_tab_info_get(flow_mask_t); 85da14cebeSEric Cheng 86da14cebeSEric Cheng static void 87da14cebeSEric Cheng flow_stat_init(kstat_named_t *knp) 88da14cebeSEric Cheng { 89da14cebeSEric Cheng int i; 90da14cebeSEric Cheng 91da14cebeSEric Cheng for (i = 0; i < FS_SIZE; i++, knp++) { 92da14cebeSEric Cheng kstat_named_init(knp, flow_stats_list[i].fs_name, 93da14cebeSEric Cheng KSTAT_DATA_UINT64); 94da14cebeSEric Cheng } 95da14cebeSEric Cheng } 96da14cebeSEric Cheng 97da14cebeSEric Cheng static int 98da14cebeSEric Cheng flow_stat_update(kstat_t *ksp, int rw) 99da14cebeSEric Cheng { 100da14cebeSEric Cheng flow_entry_t *fep = ksp->ks_private; 101da14cebeSEric Cheng kstat_named_t *knp = ksp->ks_data; 102da14cebeSEric Cheng uint64_t *statp; 103da14cebeSEric Cheng int i; 104*0dc2366fSVenugopal Iyer mac_rx_stats_t *mac_rx_stat; 105*0dc2366fSVenugopal Iyer mac_tx_stats_t *mac_tx_stat; 106*0dc2366fSVenugopal Iyer flow_stats_t flow_stats; 107*0dc2366fSVenugopal Iyer mac_soft_ring_set_t *mac_srs; 108da14cebeSEric Cheng 109da14cebeSEric Cheng if (rw != KSTAT_READ) 110da14cebeSEric Cheng return (EACCES); 111da14cebeSEric Cheng 112*0dc2366fSVenugopal Iyer bzero(&flow_stats, sizeof (flow_stats_t)); 113*0dc2366fSVenugopal Iyer 114*0dc2366fSVenugopal Iyer for (i = 0; i < fep->fe_rx_srs_cnt; i++) { 115*0dc2366fSVenugopal Iyer mac_srs = (mac_soft_ring_set_t *)fep->fe_rx_srs[i]; 116*0dc2366fSVenugopal Iyer if (mac_srs == NULL) /* Multicast flow */ 117*0dc2366fSVenugopal Iyer break; 118*0dc2366fSVenugopal Iyer mac_rx_stat = &mac_srs->srs_rx.sr_stat; 119*0dc2366fSVenugopal Iyer 120*0dc2366fSVenugopal Iyer flow_stats.fs_ibytes += mac_rx_stat->mrs_intrbytes + 121*0dc2366fSVenugopal Iyer mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; 122*0dc2366fSVenugopal Iyer 123*0dc2366fSVenugopal Iyer flow_stats.fs_ipackets += mac_rx_stat->mrs_intrcnt + 124*0dc2366fSVenugopal Iyer mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; 125*0dc2366fSVenugopal Iyer 126*0dc2366fSVenugopal Iyer flow_stats.fs_ierrors += mac_rx_stat->mrs_ierrors; 127*0dc2366fSVenugopal Iyer } 128*0dc2366fSVenugopal Iyer 129*0dc2366fSVenugopal Iyer mac_srs = (mac_soft_ring_set_t *)fep->fe_tx_srs; 130*0dc2366fSVenugopal Iyer if (mac_srs == NULL) /* Multicast flow */ 131*0dc2366fSVenugopal Iyer goto done; 132*0dc2366fSVenugopal Iyer mac_tx_stat = &mac_srs->srs_tx.st_stat; 133*0dc2366fSVenugopal Iyer 134*0dc2366fSVenugopal Iyer flow_stats.fs_obytes = mac_tx_stat->mts_obytes; 135*0dc2366fSVenugopal Iyer flow_stats.fs_opackets = mac_tx_stat->mts_opackets; 136*0dc2366fSVenugopal Iyer flow_stats.fs_oerrors = mac_tx_stat->mts_oerrors; 137*0dc2366fSVenugopal Iyer 138*0dc2366fSVenugopal Iyer done: 139da14cebeSEric Cheng for (i = 0; i < FS_SIZE; i++, knp++) { 140da14cebeSEric Cheng statp = (uint64_t *) 141*0dc2366fSVenugopal Iyer ((uchar_t *)&flow_stats + flow_stats_list[i].fs_offset); 142da14cebeSEric Cheng knp->value.ui64 = *statp; 143da14cebeSEric Cheng } 144da14cebeSEric Cheng return (0); 145da14cebeSEric Cheng } 146da14cebeSEric Cheng 147da14cebeSEric Cheng static void 148da14cebeSEric Cheng flow_stat_create(flow_entry_t *fep) 149da14cebeSEric Cheng { 150da14cebeSEric Cheng kstat_t *ksp; 151da14cebeSEric Cheng kstat_named_t *knp; 152da14cebeSEric Cheng uint_t nstats = FS_SIZE; 153da14cebeSEric Cheng 1542b24ab6bSSebastien Roy /* 1552b24ab6bSSebastien Roy * Fow now, flow entries are only manipulated and visible from the 1562b24ab6bSSebastien Roy * global zone. 1572b24ab6bSSebastien Roy */ 1582b24ab6bSSebastien Roy ksp = kstat_create_zone("unix", 0, (char *)fep->fe_flow_name, "flow", 1592b24ab6bSSebastien Roy KSTAT_TYPE_NAMED, nstats, 0, GLOBAL_ZONEID); 160da14cebeSEric Cheng if (ksp == NULL) 161da14cebeSEric Cheng return; 162da14cebeSEric Cheng 163da14cebeSEric Cheng ksp->ks_update = flow_stat_update; 164da14cebeSEric Cheng ksp->ks_private = fep; 165da14cebeSEric Cheng fep->fe_ksp = ksp; 166da14cebeSEric Cheng 167da14cebeSEric Cheng knp = (kstat_named_t *)ksp->ks_data; 168da14cebeSEric Cheng flow_stat_init(knp); 169da14cebeSEric Cheng kstat_install(ksp); 170da14cebeSEric Cheng } 171da14cebeSEric Cheng 172da14cebeSEric Cheng void 173da14cebeSEric Cheng flow_stat_destroy(flow_entry_t *fep) 174da14cebeSEric Cheng { 175da14cebeSEric Cheng if (fep->fe_ksp != NULL) { 176da14cebeSEric Cheng kstat_delete(fep->fe_ksp); 177da14cebeSEric Cheng fep->fe_ksp = NULL; 178da14cebeSEric Cheng } 179da14cebeSEric Cheng } 180da14cebeSEric Cheng 181da14cebeSEric Cheng /* 182da14cebeSEric Cheng * Initialize the flow table 183da14cebeSEric Cheng */ 184da14cebeSEric Cheng void 185da14cebeSEric Cheng mac_flow_init() 186da14cebeSEric Cheng { 187da14cebeSEric Cheng flow_cache = kmem_cache_create("flow_entry_cache", 188da14cebeSEric Cheng sizeof (flow_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 189da14cebeSEric Cheng flow_tab_cache = kmem_cache_create("flow_tab_cache", 190da14cebeSEric Cheng sizeof (flow_tab_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 191da14cebeSEric Cheng flow_hash = mod_hash_create_extended("flow_hash", 192da14cebeSEric Cheng 100, mod_hash_null_keydtor, mod_hash_null_valdtor, 193da14cebeSEric Cheng mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 194da14cebeSEric Cheng rw_init(&flow_tab_lock, NULL, RW_DEFAULT, NULL); 195da14cebeSEric Cheng } 196da14cebeSEric Cheng 197da14cebeSEric Cheng /* 198da14cebeSEric Cheng * Cleanup and release the flow table 199da14cebeSEric Cheng */ 200da14cebeSEric Cheng void 201da14cebeSEric Cheng mac_flow_fini() 202da14cebeSEric Cheng { 203da14cebeSEric Cheng kmem_cache_destroy(flow_cache); 204da14cebeSEric Cheng kmem_cache_destroy(flow_tab_cache); 205da14cebeSEric Cheng mod_hash_destroy_hash(flow_hash); 206da14cebeSEric Cheng rw_destroy(&flow_tab_lock); 207da14cebeSEric Cheng } 208da14cebeSEric Cheng 209da14cebeSEric Cheng /* 210da14cebeSEric Cheng * mac_create_flow(): create a flow_entry_t. 211da14cebeSEric Cheng */ 212da14cebeSEric Cheng int 213da14cebeSEric Cheng mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name, 214da14cebeSEric Cheng void *client_cookie, uint_t type, flow_entry_t **flentp) 215da14cebeSEric Cheng { 216da14cebeSEric Cheng flow_entry_t *flent = *flentp; 217da14cebeSEric Cheng int err = 0; 218da14cebeSEric Cheng 219da14cebeSEric Cheng if (mrp != NULL) { 220*0dc2366fSVenugopal Iyer err = mac_validate_props(NULL, mrp); 221da14cebeSEric Cheng if (err != 0) 222da14cebeSEric Cheng return (err); 223da14cebeSEric Cheng } 224da14cebeSEric Cheng 225da14cebeSEric Cheng if (flent == NULL) { 226da14cebeSEric Cheng flent = kmem_cache_alloc(flow_cache, KM_SLEEP); 227da14cebeSEric Cheng bzero(flent, sizeof (*flent)); 228da14cebeSEric Cheng mutex_init(&flent->fe_lock, NULL, MUTEX_DEFAULT, NULL); 229da14cebeSEric Cheng cv_init(&flent->fe_cv, NULL, CV_DEFAULT, NULL); 230da14cebeSEric Cheng 231da14cebeSEric Cheng /* Initialize the receiver function to a safe routine */ 232da14cebeSEric Cheng flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop; 233da14cebeSEric Cheng flent->fe_index = -1; 234da14cebeSEric Cheng } 235da000602SGirish Moodalbail (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 236da14cebeSEric Cheng 237da14cebeSEric Cheng /* This is an initial flow, will be configured later */ 238da14cebeSEric Cheng if (fd == NULL) { 239da14cebeSEric Cheng *flentp = flent; 240da14cebeSEric Cheng return (0); 241da14cebeSEric Cheng } 242da14cebeSEric Cheng 243da14cebeSEric Cheng flent->fe_client_cookie = client_cookie; 244da14cebeSEric Cheng flent->fe_type = type; 245da14cebeSEric Cheng 246da14cebeSEric Cheng /* Save flow desc */ 247da14cebeSEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 248da14cebeSEric Cheng 249da14cebeSEric Cheng if (mrp != NULL) { 250da14cebeSEric Cheng /* 251da14cebeSEric Cheng * We have already set fe_resource_props for a Link. 252da14cebeSEric Cheng */ 253da14cebeSEric Cheng if (type & FLOW_USER) { 254da14cebeSEric Cheng bcopy(mrp, &flent->fe_resource_props, 255da14cebeSEric Cheng sizeof (mac_resource_props_t)); 256da14cebeSEric Cheng } 257da14cebeSEric Cheng /* 258da14cebeSEric Cheng * The effective resource list should reflect the priority 259da14cebeSEric Cheng * that we set implicitly. 260da14cebeSEric Cheng */ 261da14cebeSEric Cheng if (!(mrp->mrp_mask & MRP_PRIORITY)) 262da14cebeSEric Cheng mrp->mrp_mask |= MRP_PRIORITY; 263da14cebeSEric Cheng if (type & FLOW_USER) 264da14cebeSEric Cheng mrp->mrp_priority = MPL_SUBFLOW_DEFAULT; 265da14cebeSEric Cheng else 266da14cebeSEric Cheng mrp->mrp_priority = MPL_LINK_DEFAULT; 267*0dc2366fSVenugopal Iyer bzero(mrp->mrp_pool, MAXPATHLEN); 268*0dc2366fSVenugopal Iyer bzero(&mrp->mrp_cpus, sizeof (mac_cpus_t)); 269da14cebeSEric Cheng bcopy(mrp, &flent->fe_effective_props, 270da14cebeSEric Cheng sizeof (mac_resource_props_t)); 271da14cebeSEric Cheng } 272da14cebeSEric Cheng flow_stat_create(flent); 273da14cebeSEric Cheng 274da14cebeSEric Cheng *flentp = flent; 275da14cebeSEric Cheng return (0); 276da14cebeSEric Cheng } 277da14cebeSEric Cheng 278da14cebeSEric Cheng /* 279da14cebeSEric Cheng * Validate flow entry and add it to a flow table. 280da14cebeSEric Cheng */ 281da14cebeSEric Cheng int 282da14cebeSEric Cheng mac_flow_add(flow_tab_t *ft, flow_entry_t *flent) 283da14cebeSEric Cheng { 284da14cebeSEric Cheng flow_entry_t **headp, **p; 285da14cebeSEric Cheng flow_ops_t *ops = &ft->ft_ops; 286da14cebeSEric Cheng flow_mask_t mask; 287da14cebeSEric Cheng uint32_t index; 288da14cebeSEric Cheng int err; 289da14cebeSEric Cheng 290da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 291da14cebeSEric Cheng 292da14cebeSEric Cheng /* 293da14cebeSEric Cheng * Check for invalid bits in mask. 294da14cebeSEric Cheng */ 295da14cebeSEric Cheng mask = flent->fe_flow_desc.fd_mask; 296da14cebeSEric Cheng if ((mask & ft->ft_mask) == 0 || (mask & ~ft->ft_mask) != 0) 297da14cebeSEric Cheng return (EOPNOTSUPP); 298da14cebeSEric Cheng 299da14cebeSEric Cheng /* 300da14cebeSEric Cheng * Validate flent. 301da14cebeSEric Cheng */ 302da14cebeSEric Cheng if ((err = ops->fo_accept_fe(ft, flent)) != 0) { 303da14cebeSEric Cheng DTRACE_PROBE3(accept_failed, flow_tab_t *, ft, 304da14cebeSEric Cheng flow_entry_t *, flent, int, err); 305da14cebeSEric Cheng return (err); 306da14cebeSEric Cheng } 307da14cebeSEric Cheng 308da14cebeSEric Cheng /* 309da14cebeSEric Cheng * Flent is valid. now calculate hash and insert it 310da14cebeSEric Cheng * into hash table. 311da14cebeSEric Cheng */ 312da14cebeSEric Cheng index = ops->fo_hash_fe(ft, flent); 313da14cebeSEric Cheng 314da14cebeSEric Cheng /* 315da14cebeSEric Cheng * We do not need a lock up until now because we were 316da14cebeSEric Cheng * not accessing the flow table. 317da14cebeSEric Cheng */ 318da14cebeSEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 319da14cebeSEric Cheng headp = &ft->ft_table[index]; 320da14cebeSEric Cheng 321da14cebeSEric Cheng /* 322da14cebeSEric Cheng * Check for duplicate flow. 323da14cebeSEric Cheng */ 324da14cebeSEric Cheng for (p = headp; *p != NULL; p = &(*p)->fe_next) { 325da14cebeSEric Cheng if ((*p)->fe_flow_desc.fd_mask != 326da14cebeSEric Cheng flent->fe_flow_desc.fd_mask) 327da14cebeSEric Cheng continue; 328da14cebeSEric Cheng 329da14cebeSEric Cheng if (ft->ft_ops.fo_match_fe(ft, *p, flent)) { 330da14cebeSEric Cheng rw_exit(&ft->ft_lock); 331da14cebeSEric Cheng DTRACE_PROBE3(dup_flow, flow_tab_t *, ft, 332da14cebeSEric Cheng flow_entry_t *, flent, int, err); 333da14cebeSEric Cheng return (EALREADY); 334da14cebeSEric Cheng } 335da14cebeSEric Cheng } 336da14cebeSEric Cheng 337da14cebeSEric Cheng /* 338da14cebeSEric Cheng * Insert flow to hash list. 339da14cebeSEric Cheng */ 340da14cebeSEric Cheng err = ops->fo_insert_fe(ft, headp, flent); 341da14cebeSEric Cheng if (err != 0) { 342da14cebeSEric Cheng rw_exit(&ft->ft_lock); 343da14cebeSEric Cheng DTRACE_PROBE3(insert_failed, flow_tab_t *, ft, 344da14cebeSEric Cheng flow_entry_t *, flent, int, err); 345da14cebeSEric Cheng return (err); 346da14cebeSEric Cheng } 347da14cebeSEric Cheng 348da14cebeSEric Cheng /* 349da14cebeSEric Cheng * Save the hash index so it can be used by mac_flow_remove(). 350da14cebeSEric Cheng */ 351da14cebeSEric Cheng flent->fe_index = (int)index; 352da14cebeSEric Cheng 353da14cebeSEric Cheng /* 354da14cebeSEric Cheng * Save the flow tab back reference. 355da14cebeSEric Cheng */ 356da14cebeSEric Cheng flent->fe_flow_tab = ft; 357da14cebeSEric Cheng FLOW_MARK(flent, FE_FLOW_TAB); 358da14cebeSEric Cheng ft->ft_flow_count++; 359da14cebeSEric Cheng rw_exit(&ft->ft_lock); 360da14cebeSEric Cheng return (0); 361da14cebeSEric Cheng } 362da14cebeSEric Cheng 363da14cebeSEric Cheng /* 364da14cebeSEric Cheng * Remove a flow from a mac client's subflow table 365da14cebeSEric Cheng */ 366da14cebeSEric Cheng void 367da14cebeSEric Cheng mac_flow_rem_subflow(flow_entry_t *flent) 368da14cebeSEric Cheng { 369da14cebeSEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 370da14cebeSEric Cheng mac_client_impl_t *mcip = ft->ft_mcip; 3715d460eafSCathy Zhou mac_handle_t mh = (mac_handle_t)ft->ft_mip; 372da14cebeSEric Cheng 3735d460eafSCathy Zhou ASSERT(MAC_PERIM_HELD(mh)); 374da14cebeSEric Cheng 375da14cebeSEric Cheng mac_flow_remove(ft, flent, B_FALSE); 376da14cebeSEric Cheng if (flent->fe_mcip == NULL) { 377da14cebeSEric Cheng /* 378da14cebeSEric Cheng * The interface is not yet plumbed and mac_client_flow_add 379da14cebeSEric Cheng * was not done. 380da14cebeSEric Cheng */ 381da14cebeSEric Cheng if (FLOW_TAB_EMPTY(ft)) { 382da14cebeSEric Cheng mac_flow_tab_destroy(ft); 383da14cebeSEric Cheng mcip->mci_subflow_tab = NULL; 384da14cebeSEric Cheng } 3855d460eafSCathy Zhou } else { 386da14cebeSEric Cheng mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 387da14cebeSEric Cheng mac_link_flow_clean((mac_client_handle_t)mcip, flent); 388da14cebeSEric Cheng } 3895d460eafSCathy Zhou mac_fastpath_enable(mh); 3905d460eafSCathy Zhou } 391da14cebeSEric Cheng 392da14cebeSEric Cheng /* 393da14cebeSEric Cheng * Add a flow to a mac client's subflow table and instantiate the flow 394da14cebeSEric Cheng * in the mac by creating the associated SRSs etc. 395da14cebeSEric Cheng */ 396da14cebeSEric Cheng int 397da14cebeSEric Cheng mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent, 398da14cebeSEric Cheng boolean_t instantiate_flow) 399da14cebeSEric Cheng { 400da14cebeSEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 4015d460eafSCathy Zhou mac_handle_t mh = (mac_handle_t)mcip->mci_mip; 402da14cebeSEric Cheng flow_tab_info_t *ftinfo; 403da14cebeSEric Cheng flow_mask_t mask; 404da14cebeSEric Cheng flow_tab_t *ft; 405da14cebeSEric Cheng int err; 406da14cebeSEric Cheng boolean_t ft_created = B_FALSE; 407da14cebeSEric Cheng 4085d460eafSCathy Zhou ASSERT(MAC_PERIM_HELD(mh)); 4095d460eafSCathy Zhou 4105d460eafSCathy Zhou if ((err = mac_fastpath_disable(mh)) != 0) 4115d460eafSCathy Zhou return (err); 412da14cebeSEric Cheng 413da14cebeSEric Cheng /* 414da14cebeSEric Cheng * If the subflow table exists already just add the new subflow 415da14cebeSEric Cheng * to the existing table, else we create a new subflow table below. 416da14cebeSEric Cheng */ 417da14cebeSEric Cheng ft = mcip->mci_subflow_tab; 418da14cebeSEric Cheng if (ft == NULL) { 419da14cebeSEric Cheng mask = flent->fe_flow_desc.fd_mask; 420da14cebeSEric Cheng /* 421da14cebeSEric Cheng * Try to create a new table and then add the subflow to the 422da14cebeSEric Cheng * newly created subflow table 423da14cebeSEric Cheng */ 4245d460eafSCathy Zhou if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) { 4255d460eafSCathy Zhou mac_fastpath_enable(mh); 426da14cebeSEric Cheng return (EOPNOTSUPP); 4275d460eafSCathy Zhou } 428da14cebeSEric Cheng 429da14cebeSEric Cheng mac_flow_tab_create(ftinfo->fti_ops, mask, ftinfo->fti_size, 430da14cebeSEric Cheng mcip->mci_mip, &ft); 431da14cebeSEric Cheng ft_created = B_TRUE; 432da14cebeSEric Cheng } 433da14cebeSEric Cheng 434da14cebeSEric Cheng err = mac_flow_add(ft, flent); 435da14cebeSEric Cheng if (err != 0) { 436da14cebeSEric Cheng if (ft_created) 437da14cebeSEric Cheng mac_flow_tab_destroy(ft); 4385d460eafSCathy Zhou mac_fastpath_enable(mh); 439da14cebeSEric Cheng return (err); 440da14cebeSEric Cheng } 441da14cebeSEric Cheng 442da14cebeSEric Cheng if (instantiate_flow) { 443da14cebeSEric Cheng /* Now activate the flow by creating its SRSs */ 444da14cebeSEric Cheng ASSERT(MCIP_DATAPATH_SETUP(mcip)); 445da14cebeSEric Cheng err = mac_link_flow_init((mac_client_handle_t)mcip, flent); 446da14cebeSEric Cheng if (err != 0) { 447da14cebeSEric Cheng mac_flow_remove(ft, flent, B_FALSE); 448da14cebeSEric Cheng if (ft_created) 449da14cebeSEric Cheng mac_flow_tab_destroy(ft); 4505d460eafSCathy Zhou mac_fastpath_enable(mh); 451da14cebeSEric Cheng return (err); 452da14cebeSEric Cheng } 453da14cebeSEric Cheng } else { 454da14cebeSEric Cheng FLOW_MARK(flent, FE_UF_NO_DATAPATH); 455da14cebeSEric Cheng } 456da14cebeSEric Cheng if (ft_created) { 457da14cebeSEric Cheng ASSERT(mcip->mci_subflow_tab == NULL); 458da14cebeSEric Cheng ft->ft_mcip = mcip; 459da14cebeSEric Cheng mcip->mci_subflow_tab = ft; 460da14cebeSEric Cheng if (instantiate_flow) 461da14cebeSEric Cheng mac_client_update_classifier(mcip, B_TRUE); 462da14cebeSEric Cheng } 463da14cebeSEric Cheng return (0); 464da14cebeSEric Cheng } 465da14cebeSEric Cheng 466da14cebeSEric Cheng /* 467da14cebeSEric Cheng * Remove flow entry from flow table. 468da14cebeSEric Cheng */ 469da14cebeSEric Cheng void 470da14cebeSEric Cheng mac_flow_remove(flow_tab_t *ft, flow_entry_t *flent, boolean_t temp) 471da14cebeSEric Cheng { 472da14cebeSEric Cheng flow_entry_t **fp; 473da14cebeSEric Cheng 474da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 475da14cebeSEric Cheng if (!(flent->fe_flags & FE_FLOW_TAB)) 476da14cebeSEric Cheng return; 477da14cebeSEric Cheng 478da14cebeSEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 479da14cebeSEric Cheng /* 480da14cebeSEric Cheng * If this is a permanent removal from the flow table, mark it 481da14cebeSEric Cheng * CONDEMNED to prevent future references. If this is a temporary 482da14cebeSEric Cheng * removal from the table, say to update the flow descriptor then 483da14cebeSEric Cheng * we don't mark it CONDEMNED 484da14cebeSEric Cheng */ 485da14cebeSEric Cheng if (!temp) 486da14cebeSEric Cheng FLOW_MARK(flent, FE_CONDEMNED); 487da14cebeSEric Cheng /* 488da14cebeSEric Cheng * Locate the specified flent. 489da14cebeSEric Cheng */ 490da14cebeSEric Cheng fp = &ft->ft_table[flent->fe_index]; 491da14cebeSEric Cheng while (*fp != flent) 492da14cebeSEric Cheng fp = &(*fp)->fe_next; 493da14cebeSEric Cheng 494da14cebeSEric Cheng /* 495da14cebeSEric Cheng * The flent must exist. Otherwise it's a bug. 496da14cebeSEric Cheng */ 497da14cebeSEric Cheng ASSERT(fp != NULL); 498da14cebeSEric Cheng *fp = flent->fe_next; 499da14cebeSEric Cheng flent->fe_next = NULL; 500da14cebeSEric Cheng 501da14cebeSEric Cheng /* 502da14cebeSEric Cheng * Reset fe_index to -1 so any attempt to call mac_flow_remove() 503da14cebeSEric Cheng * on a flent that is supposed to be in the table (FE_FLOW_TAB) 504da14cebeSEric Cheng * will panic. 505da14cebeSEric Cheng */ 506da14cebeSEric Cheng flent->fe_index = -1; 507da14cebeSEric Cheng FLOW_UNMARK(flent, FE_FLOW_TAB); 508da14cebeSEric Cheng ft->ft_flow_count--; 509da14cebeSEric Cheng rw_exit(&ft->ft_lock); 510da14cebeSEric Cheng } 511da14cebeSEric Cheng 512da14cebeSEric Cheng /* 513da14cebeSEric Cheng * This is the flow lookup routine used by the mac sw classifier engine. 514da14cebeSEric Cheng */ 515da14cebeSEric Cheng int 516da14cebeSEric Cheng mac_flow_lookup(flow_tab_t *ft, mblk_t *mp, uint_t flags, flow_entry_t **flentp) 517da14cebeSEric Cheng { 518da14cebeSEric Cheng flow_state_t s; 519da14cebeSEric Cheng flow_entry_t *flent; 520da14cebeSEric Cheng flow_ops_t *ops = &ft->ft_ops; 521da14cebeSEric Cheng boolean_t retried = B_FALSE; 522da14cebeSEric Cheng int i, err; 523da14cebeSEric Cheng 524da14cebeSEric Cheng s.fs_flags = flags; 525da14cebeSEric Cheng retry: 526ae6aa22aSVenugopal Iyer s.fs_mp = mp; 527da14cebeSEric Cheng 528da14cebeSEric Cheng /* 529da14cebeSEric Cheng * Walk the list of predeclared accept functions. 530da14cebeSEric Cheng * Each of these would accumulate enough state to allow the next 531da14cebeSEric Cheng * accept routine to make progress. 532da14cebeSEric Cheng */ 533da14cebeSEric Cheng for (i = 0; i < FLOW_MAX_ACCEPT && ops->fo_accept[i] != NULL; i++) { 534da14cebeSEric Cheng if ((err = (ops->fo_accept[i])(ft, &s)) != 0) { 535ae6aa22aSVenugopal Iyer mblk_t *last; 536ae6aa22aSVenugopal Iyer 537da14cebeSEric Cheng /* 538da14cebeSEric Cheng * ENOBUFS indicates that the mp could be too short 539da14cebeSEric Cheng * and may need a pullup. 540da14cebeSEric Cheng */ 541da14cebeSEric Cheng if (err != ENOBUFS || retried) 542da14cebeSEric Cheng return (err); 543da14cebeSEric Cheng 544da14cebeSEric Cheng /* 545ae6aa22aSVenugopal Iyer * The pullup is done on the last processed mblk, not 546ae6aa22aSVenugopal Iyer * the starting one. pullup is not done if the mblk 547ae6aa22aSVenugopal Iyer * has references or if b_cont is NULL. 548da14cebeSEric Cheng */ 549ae6aa22aSVenugopal Iyer last = s.fs_mp; 550ae6aa22aSVenugopal Iyer if (DB_REF(last) > 1 || last->b_cont == NULL || 551ae6aa22aSVenugopal Iyer pullupmsg(last, -1) == 0) 552da14cebeSEric Cheng return (EINVAL); 553da14cebeSEric Cheng 554da14cebeSEric Cheng retried = B_TRUE; 555da14cebeSEric Cheng DTRACE_PROBE2(need_pullup, flow_tab_t *, ft, 556da14cebeSEric Cheng flow_state_t *, &s); 557da14cebeSEric Cheng goto retry; 558da14cebeSEric Cheng } 559da14cebeSEric Cheng } 560da14cebeSEric Cheng 561da14cebeSEric Cheng /* 562da14cebeSEric Cheng * The packet is considered sane. We may now attempt to 563da14cebeSEric Cheng * find the corresponding flent. 564da14cebeSEric Cheng */ 565da14cebeSEric Cheng rw_enter(&ft->ft_lock, RW_READER); 566da14cebeSEric Cheng flent = ft->ft_table[ops->fo_hash(ft, &s)]; 567da14cebeSEric Cheng for (; flent != NULL; flent = flent->fe_next) { 568da14cebeSEric Cheng if (flent->fe_match(ft, flent, &s)) { 569da14cebeSEric Cheng FLOW_TRY_REFHOLD(flent, err); 570da14cebeSEric Cheng if (err != 0) 571da14cebeSEric Cheng continue; 572da14cebeSEric Cheng *flentp = flent; 573da14cebeSEric Cheng rw_exit(&ft->ft_lock); 574da14cebeSEric Cheng return (0); 575da14cebeSEric Cheng } 576da14cebeSEric Cheng } 577da14cebeSEric Cheng rw_exit(&ft->ft_lock); 578da14cebeSEric Cheng return (ENOENT); 579da14cebeSEric Cheng } 580da14cebeSEric Cheng 581da14cebeSEric Cheng /* 582da14cebeSEric Cheng * Walk flow table. 583da14cebeSEric Cheng * The caller is assumed to have proper perimeter protection. 584da14cebeSEric Cheng */ 585da14cebeSEric Cheng int 586da14cebeSEric Cheng mac_flow_walk_nolock(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *), 587da14cebeSEric Cheng void *arg) 588da14cebeSEric Cheng { 589da14cebeSEric Cheng int err, i, cnt = 0; 590da14cebeSEric Cheng flow_entry_t *flent; 591da14cebeSEric Cheng 592da14cebeSEric Cheng if (ft == NULL) 593da14cebeSEric Cheng return (0); 594da14cebeSEric Cheng 595da14cebeSEric Cheng for (i = 0; i < ft->ft_size; i++) { 596da14cebeSEric Cheng for (flent = ft->ft_table[i]; flent != NULL; 597da14cebeSEric Cheng flent = flent->fe_next) { 598da14cebeSEric Cheng cnt++; 599da14cebeSEric Cheng err = (*fn)(flent, arg); 600da14cebeSEric Cheng if (err != 0) 601da14cebeSEric Cheng return (err); 602da14cebeSEric Cheng } 603da14cebeSEric Cheng } 604da14cebeSEric Cheng VERIFY(cnt == ft->ft_flow_count); 605da14cebeSEric Cheng return (0); 606da14cebeSEric Cheng } 607da14cebeSEric Cheng 608da14cebeSEric Cheng /* 609da14cebeSEric Cheng * Same as the above except a mutex is used for protection here. 610da14cebeSEric Cheng */ 611da14cebeSEric Cheng int 612da14cebeSEric Cheng mac_flow_walk(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *), 613da14cebeSEric Cheng void *arg) 614da14cebeSEric Cheng { 615da14cebeSEric Cheng int err; 616da14cebeSEric Cheng 617da14cebeSEric Cheng if (ft == NULL) 618da14cebeSEric Cheng return (0); 619da14cebeSEric Cheng 620da14cebeSEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 621da14cebeSEric Cheng err = mac_flow_walk_nolock(ft, fn, arg); 622da14cebeSEric Cheng rw_exit(&ft->ft_lock); 623da14cebeSEric Cheng return (err); 624da14cebeSEric Cheng } 625da14cebeSEric Cheng 626da14cebeSEric Cheng static boolean_t mac_flow_clean(flow_entry_t *); 627da14cebeSEric Cheng 628da14cebeSEric Cheng /* 629da14cebeSEric Cheng * Destroy a flow entry. Called when the last reference on a flow is released. 630da14cebeSEric Cheng */ 631da14cebeSEric Cheng void 632da14cebeSEric Cheng mac_flow_destroy(flow_entry_t *flent) 633da14cebeSEric Cheng { 634da14cebeSEric Cheng ASSERT(flent->fe_refcnt == 0); 635da14cebeSEric Cheng 636da14cebeSEric Cheng if ((flent->fe_type & FLOW_USER) != 0) { 637da14cebeSEric Cheng ASSERT(mac_flow_clean(flent)); 638da14cebeSEric Cheng } else { 639da14cebeSEric Cheng mac_flow_cleanup(flent); 640da14cebeSEric Cheng } 641*0dc2366fSVenugopal Iyer mac_misc_stat_delete(flent); 642da14cebeSEric Cheng mutex_destroy(&flent->fe_lock); 643da14cebeSEric Cheng cv_destroy(&flent->fe_cv); 644da14cebeSEric Cheng flow_stat_destroy(flent); 645da14cebeSEric Cheng kmem_cache_free(flow_cache, flent); 646da14cebeSEric Cheng } 647da14cebeSEric Cheng 648da14cebeSEric Cheng /* 649da14cebeSEric Cheng * XXX eric 650da14cebeSEric Cheng * The MAC_FLOW_PRIORITY checks in mac_resource_ctl_set() and 651da14cebeSEric Cheng * mac_link_flow_modify() should really be moved/reworked into the 652da14cebeSEric Cheng * two functions below. This would consolidate all the mac property 653da14cebeSEric Cheng * checking in one place. I'm leaving this alone for now since it's 654da14cebeSEric Cheng * out of scope of the new flows work. 655da14cebeSEric Cheng */ 656da14cebeSEric Cheng /* ARGSUSED */ 657da14cebeSEric Cheng uint32_t 658da14cebeSEric Cheng mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp) 659da14cebeSEric Cheng { 660da14cebeSEric Cheng uint32_t changed_mask = 0; 661da14cebeSEric Cheng mac_resource_props_t *fmrp = &flent->fe_effective_props; 662da14cebeSEric Cheng int i; 663da14cebeSEric Cheng 664da14cebeSEric Cheng if ((mrp->mrp_mask & MRP_MAXBW) != 0 && 665*0dc2366fSVenugopal Iyer (!(fmrp->mrp_mask & MRP_MAXBW) || 666*0dc2366fSVenugopal Iyer (fmrp->mrp_maxbw != mrp->mrp_maxbw))) { 667da14cebeSEric Cheng changed_mask |= MRP_MAXBW; 668da14cebeSEric Cheng if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) { 669da14cebeSEric Cheng fmrp->mrp_mask &= ~MRP_MAXBW; 670*0dc2366fSVenugopal Iyer fmrp->mrp_maxbw = 0; 671da14cebeSEric Cheng } else { 672da14cebeSEric Cheng fmrp->mrp_mask |= MRP_MAXBW; 673*0dc2366fSVenugopal Iyer fmrp->mrp_maxbw = mrp->mrp_maxbw; 674da14cebeSEric Cheng } 675da14cebeSEric Cheng } 676da14cebeSEric Cheng 677da14cebeSEric Cheng if ((mrp->mrp_mask & MRP_PRIORITY) != 0) { 678da14cebeSEric Cheng if (fmrp->mrp_priority != mrp->mrp_priority) 679da14cebeSEric Cheng changed_mask |= MRP_PRIORITY; 680da14cebeSEric Cheng if (mrp->mrp_priority == MPL_RESET) { 681da14cebeSEric Cheng fmrp->mrp_priority = MPL_SUBFLOW_DEFAULT; 682da14cebeSEric Cheng fmrp->mrp_mask &= ~MRP_PRIORITY; 683da14cebeSEric Cheng } else { 684da14cebeSEric Cheng fmrp->mrp_priority = mrp->mrp_priority; 685da14cebeSEric Cheng fmrp->mrp_mask |= MRP_PRIORITY; 686da14cebeSEric Cheng } 687da14cebeSEric Cheng } 688da14cebeSEric Cheng 689da14cebeSEric Cheng /* modify fanout */ 690da14cebeSEric Cheng if ((mrp->mrp_mask & MRP_CPUS) != 0) { 691da14cebeSEric Cheng if ((fmrp->mrp_ncpus == mrp->mrp_ncpus) && 692da14cebeSEric Cheng (fmrp->mrp_fanout_mode == mrp->mrp_fanout_mode)) { 693da14cebeSEric Cheng for (i = 0; i < mrp->mrp_ncpus; i++) { 694da14cebeSEric Cheng if (mrp->mrp_cpu[i] != fmrp->mrp_cpu[i]) 695da14cebeSEric Cheng break; 696da14cebeSEric Cheng } 697da14cebeSEric Cheng if (i == mrp->mrp_ncpus) { 698da14cebeSEric Cheng /* 699da14cebeSEric Cheng * The new set of cpus passed is exactly 700da14cebeSEric Cheng * the same as the existing set. 701da14cebeSEric Cheng */ 702da14cebeSEric Cheng return (changed_mask); 703da14cebeSEric Cheng } 704da14cebeSEric Cheng } 705da14cebeSEric Cheng changed_mask |= MRP_CPUS; 706da14cebeSEric Cheng MAC_COPY_CPUS(mrp, fmrp); 707da14cebeSEric Cheng } 708*0dc2366fSVenugopal Iyer 709*0dc2366fSVenugopal Iyer /* 710*0dc2366fSVenugopal Iyer * Modify the rings property. 711*0dc2366fSVenugopal Iyer */ 712*0dc2366fSVenugopal Iyer if (mrp->mrp_mask & MRP_RX_RINGS || mrp->mrp_mask & MRP_TX_RINGS) 713*0dc2366fSVenugopal Iyer mac_set_rings_effective(flent->fe_mcip); 714*0dc2366fSVenugopal Iyer 715*0dc2366fSVenugopal Iyer if ((mrp->mrp_mask & MRP_POOL) != 0) { 716*0dc2366fSVenugopal Iyer if (strcmp(fmrp->mrp_pool, mrp->mrp_pool) != 0) 717*0dc2366fSVenugopal Iyer changed_mask |= MRP_POOL; 718*0dc2366fSVenugopal Iyer if (strlen(mrp->mrp_pool) == 0) 719*0dc2366fSVenugopal Iyer fmrp->mrp_mask &= ~MRP_POOL; 720*0dc2366fSVenugopal Iyer else 721*0dc2366fSVenugopal Iyer fmrp->mrp_mask |= MRP_POOL; 722*0dc2366fSVenugopal Iyer (void) strncpy(fmrp->mrp_pool, mrp->mrp_pool, MAXPATHLEN); 723*0dc2366fSVenugopal Iyer } 724da14cebeSEric Cheng return (changed_mask); 725da14cebeSEric Cheng } 726da14cebeSEric Cheng 727da14cebeSEric Cheng void 728da14cebeSEric Cheng mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp) 729da14cebeSEric Cheng { 730da14cebeSEric Cheng uint32_t changed_mask; 731da14cebeSEric Cheng mac_client_impl_t *mcip = flent->fe_mcip; 732da14cebeSEric Cheng mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip); 733*0dc2366fSVenugopal Iyer mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); 734*0dc2366fSVenugopal Iyer cpupart_t *cpupart = NULL; 735*0dc2366fSVenugopal Iyer boolean_t use_default = B_FALSE; 736da14cebeSEric Cheng 737da14cebeSEric Cheng ASSERT(flent != NULL); 738da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 739da14cebeSEric Cheng 740da14cebeSEric Cheng rw_enter(&ft->ft_lock, RW_WRITER); 741da14cebeSEric Cheng 742da14cebeSEric Cheng /* Update the cached values inside the subflow entry */ 743da14cebeSEric Cheng changed_mask = mac_flow_modify_props(flent, mrp); 744da14cebeSEric Cheng rw_exit(&ft->ft_lock); 745da14cebeSEric Cheng /* 746da14cebeSEric Cheng * Push the changed parameters to the scheduling code in the 747da14cebeSEric Cheng * SRS's, to take effect right away. 748da14cebeSEric Cheng */ 749da14cebeSEric Cheng if (changed_mask & MRP_MAXBW) { 750da14cebeSEric Cheng mac_srs_update_bwlimit(flent, mrp); 751da14cebeSEric Cheng /* 752da14cebeSEric Cheng * If bandwidth is changed, we may have to change 753da14cebeSEric Cheng * the number of soft ring to be used for fanout. 754da14cebeSEric Cheng * Call mac_flow_update_fanout() if MAC_BIND_CPU 755da14cebeSEric Cheng * is not set and there is no user supplied cpu 756da14cebeSEric Cheng * info. This applies only to link at this time. 757da14cebeSEric Cheng */ 758da14cebeSEric Cheng if (!(flent->fe_type & FLOW_USER) && 759da14cebeSEric Cheng !(changed_mask & MRP_CPUS) && 760da14cebeSEric Cheng !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) { 761da14cebeSEric Cheng mac_fanout_setup(mcip, flent, mcip_mrp, 762*0dc2366fSVenugopal Iyer mac_rx_deliver, mcip, NULL, NULL); 763da14cebeSEric Cheng } 764da14cebeSEric Cheng } 765da14cebeSEric Cheng if (mrp->mrp_mask & MRP_PRIORITY) 766da14cebeSEric Cheng mac_flow_update_priority(mcip, flent); 767da14cebeSEric Cheng 768da14cebeSEric Cheng if (changed_mask & MRP_CPUS) 769*0dc2366fSVenugopal Iyer mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL, 770*0dc2366fSVenugopal Iyer NULL); 771*0dc2366fSVenugopal Iyer 772*0dc2366fSVenugopal Iyer if (mrp->mrp_mask & MRP_POOL) { 773*0dc2366fSVenugopal Iyer pool_lock(); 774*0dc2366fSVenugopal Iyer cpupart = mac_pset_find(mrp, &use_default); 775*0dc2366fSVenugopal Iyer mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL, 776*0dc2366fSVenugopal Iyer cpupart); 777*0dc2366fSVenugopal Iyer mac_set_pool_effective(use_default, cpupart, mrp, emrp); 778*0dc2366fSVenugopal Iyer pool_unlock(); 779*0dc2366fSVenugopal Iyer } 780da14cebeSEric Cheng } 781da14cebeSEric Cheng 782da14cebeSEric Cheng /* 783da14cebeSEric Cheng * This function waits for a certain condition to be met and is generally 784da14cebeSEric Cheng * used before a destructive or quiescing operation. 785da14cebeSEric Cheng */ 786da14cebeSEric Cheng void 787da14cebeSEric Cheng mac_flow_wait(flow_entry_t *flent, mac_flow_state_t event) 788da14cebeSEric Cheng { 789da14cebeSEric Cheng mutex_enter(&flent->fe_lock); 790da14cebeSEric Cheng flent->fe_flags |= FE_WAITER; 791da14cebeSEric Cheng 792da14cebeSEric Cheng switch (event) { 793da14cebeSEric Cheng case FLOW_DRIVER_UPCALL: 794da14cebeSEric Cheng /* 795da14cebeSEric Cheng * We want to make sure the driver upcalls have finished before 796da14cebeSEric Cheng * we signal the Rx SRS worker to quit. 797da14cebeSEric Cheng */ 798da14cebeSEric Cheng while (flent->fe_refcnt != 1) 799da14cebeSEric Cheng cv_wait(&flent->fe_cv, &flent->fe_lock); 800da14cebeSEric Cheng break; 801da14cebeSEric Cheng 802da14cebeSEric Cheng case FLOW_USER_REF: 803da14cebeSEric Cheng /* 804da14cebeSEric Cheng * Wait for the fe_user_refcnt to drop to 0. The flow has 805da14cebeSEric Cheng * been removed from the global flow hash. 806da14cebeSEric Cheng */ 807da14cebeSEric Cheng ASSERT(!(flent->fe_flags & FE_G_FLOW_HASH)); 808da14cebeSEric Cheng while (flent->fe_user_refcnt != 0) 809da14cebeSEric Cheng cv_wait(&flent->fe_cv, &flent->fe_lock); 810da14cebeSEric Cheng break; 811da14cebeSEric Cheng 812da14cebeSEric Cheng default: 813da14cebeSEric Cheng ASSERT(0); 814da14cebeSEric Cheng } 815da14cebeSEric Cheng 816da14cebeSEric Cheng flent->fe_flags &= ~FE_WAITER; 817da14cebeSEric Cheng mutex_exit(&flent->fe_lock); 818da14cebeSEric Cheng } 819da14cebeSEric Cheng 820da14cebeSEric Cheng static boolean_t 821da14cebeSEric Cheng mac_flow_clean(flow_entry_t *flent) 822da14cebeSEric Cheng { 823da14cebeSEric Cheng ASSERT(flent->fe_next == NULL); 824da14cebeSEric Cheng ASSERT(flent->fe_tx_srs == NULL); 825da14cebeSEric Cheng ASSERT(flent->fe_rx_srs_cnt == 0 && flent->fe_rx_srs[0] == NULL); 826da14cebeSEric Cheng ASSERT(flent->fe_mbg == NULL); 827da14cebeSEric Cheng 828da14cebeSEric Cheng return (B_TRUE); 829da14cebeSEric Cheng } 830da14cebeSEric Cheng 831da14cebeSEric Cheng void 832da14cebeSEric Cheng mac_flow_cleanup(flow_entry_t *flent) 833da14cebeSEric Cheng { 834da14cebeSEric Cheng if ((flent->fe_type & FLOW_USER) == 0) { 835da14cebeSEric Cheng ASSERT((flent->fe_mbg == NULL && flent->fe_mcip != NULL) || 836da14cebeSEric Cheng (flent->fe_mbg != NULL && flent->fe_mcip == NULL)); 837da14cebeSEric Cheng ASSERT(flent->fe_refcnt == 0); 838da14cebeSEric Cheng } else { 839da14cebeSEric Cheng ASSERT(flent->fe_refcnt == 1); 840da14cebeSEric Cheng } 841da14cebeSEric Cheng 842da14cebeSEric Cheng if (flent->fe_mbg != NULL) { 843da14cebeSEric Cheng ASSERT(flent->fe_tx_srs == NULL); 844da14cebeSEric Cheng /* This is a multicast or broadcast flow entry */ 845da14cebeSEric Cheng mac_bcast_grp_free(flent->fe_mbg); 846da14cebeSEric Cheng flent->fe_mbg = NULL; 847da14cebeSEric Cheng } 848da14cebeSEric Cheng 849da14cebeSEric Cheng if (flent->fe_tx_srs != NULL) { 850da14cebeSEric Cheng ASSERT(flent->fe_mbg == NULL); 851da14cebeSEric Cheng mac_srs_free(flent->fe_tx_srs); 852da14cebeSEric Cheng flent->fe_tx_srs = NULL; 853da14cebeSEric Cheng } 854da14cebeSEric Cheng 855da14cebeSEric Cheng /* 856da14cebeSEric Cheng * In the normal case fe_rx_srs_cnt is 1. However in the error case 857da14cebeSEric Cheng * when mac_unicast_add fails we may not have set up any SRS 858da14cebeSEric Cheng * in which case fe_rx_srs_cnt will be zero. 859da14cebeSEric Cheng */ 860da14cebeSEric Cheng if (flent->fe_rx_srs_cnt != 0) { 861da14cebeSEric Cheng ASSERT(flent->fe_rx_srs_cnt == 1); 862da14cebeSEric Cheng mac_srs_free(flent->fe_rx_srs[0]); 863da14cebeSEric Cheng flent->fe_rx_srs[0] = NULL; 864da14cebeSEric Cheng flent->fe_rx_srs_cnt = 0; 865da14cebeSEric Cheng } 866da14cebeSEric Cheng ASSERT(flent->fe_rx_srs[0] == NULL); 867da14cebeSEric Cheng } 868da14cebeSEric Cheng 869da14cebeSEric Cheng void 870da14cebeSEric Cheng mac_flow_get_desc(flow_entry_t *flent, flow_desc_t *fd) 871da14cebeSEric Cheng { 872da14cebeSEric Cheng /* 873da14cebeSEric Cheng * Grab the fe_lock to see a self-consistent fe_flow_desc. 874da14cebeSEric Cheng * Updates to the fe_flow_desc happen under the fe_lock 875da14cebeSEric Cheng * after removing the flent from the flow table 876da14cebeSEric Cheng */ 877da14cebeSEric Cheng mutex_enter(&flent->fe_lock); 878da14cebeSEric Cheng bcopy(&flent->fe_flow_desc, fd, sizeof (*fd)); 879da14cebeSEric Cheng mutex_exit(&flent->fe_lock); 880da14cebeSEric Cheng } 881da14cebeSEric Cheng 882da14cebeSEric Cheng /* 883da14cebeSEric Cheng * Update a field of a flow entry. The mac perimeter ensures that 884da14cebeSEric Cheng * this is the only thread doing a modify operation on this mac end point. 885da14cebeSEric Cheng * So the flow table can't change or disappear. The ft_lock protects access 886da14cebeSEric Cheng * to the flow entry, and holding the lock ensures that there isn't any thread 887da14cebeSEric Cheng * accessing the flow entry or attempting a flow table lookup. However 888da14cebeSEric Cheng * data threads that are using the flow entry based on the old descriptor 889da14cebeSEric Cheng * will continue to use the flow entry. If strong coherence is required 890da14cebeSEric Cheng * then the flow will have to be quiesced before the descriptor can be 891da14cebeSEric Cheng * changed. 892da14cebeSEric Cheng */ 893da14cebeSEric Cheng void 894da14cebeSEric Cheng mac_flow_set_desc(flow_entry_t *flent, flow_desc_t *fd) 895da14cebeSEric Cheng { 896da14cebeSEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 897da14cebeSEric Cheng flow_desc_t old_desc; 898da14cebeSEric Cheng int err; 899da14cebeSEric Cheng 900da14cebeSEric Cheng if (ft == NULL) { 901da14cebeSEric Cheng /* 902da14cebeSEric Cheng * The flow hasn't yet been inserted into the table, 903da14cebeSEric Cheng * so only the caller knows about this flow, however for 904da14cebeSEric Cheng * uniformity we grab the fe_lock here. 905da14cebeSEric Cheng */ 906da14cebeSEric Cheng mutex_enter(&flent->fe_lock); 907da14cebeSEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 908da14cebeSEric Cheng mutex_exit(&flent->fe_lock); 909da14cebeSEric Cheng } 910da14cebeSEric Cheng 911da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 912da14cebeSEric Cheng 913da14cebeSEric Cheng /* 914da14cebeSEric Cheng * Need to remove the flow entry from the table and reinsert it, 915da14cebeSEric Cheng * into a potentially diference hash line. The hash depends on 916da14cebeSEric Cheng * the new descriptor fields. However access to fe_desc itself 917da14cebeSEric Cheng * is always under the fe_lock. This helps log and stat functions 918da14cebeSEric Cheng * see a self-consistent fe_flow_desc. 919da14cebeSEric Cheng */ 920da14cebeSEric Cheng mac_flow_remove(ft, flent, B_TRUE); 921da14cebeSEric Cheng old_desc = flent->fe_flow_desc; 922da14cebeSEric Cheng 923da14cebeSEric Cheng mutex_enter(&flent->fe_lock); 924da14cebeSEric Cheng bcopy(fd, &flent->fe_flow_desc, sizeof (*fd)); 925da14cebeSEric Cheng mutex_exit(&flent->fe_lock); 926da14cebeSEric Cheng 927da14cebeSEric Cheng if (mac_flow_add(ft, flent) != 0) { 928da14cebeSEric Cheng /* 929da14cebeSEric Cheng * The add failed say due to an invalid flow descriptor. 930da14cebeSEric Cheng * Undo the update 931da14cebeSEric Cheng */ 932da14cebeSEric Cheng flent->fe_flow_desc = old_desc; 933da14cebeSEric Cheng err = mac_flow_add(ft, flent); 934da14cebeSEric Cheng ASSERT(err == 0); 935da14cebeSEric Cheng } 936da14cebeSEric Cheng } 937da14cebeSEric Cheng 938da14cebeSEric Cheng void 939da14cebeSEric Cheng mac_flow_set_name(flow_entry_t *flent, const char *name) 940da14cebeSEric Cheng { 941da14cebeSEric Cheng flow_tab_t *ft = flent->fe_flow_tab; 942da14cebeSEric Cheng 943da14cebeSEric Cheng if (ft == NULL) { 944da14cebeSEric Cheng /* 945da14cebeSEric Cheng * The flow hasn't yet been inserted into the table, 946da14cebeSEric Cheng * so only the caller knows about this flow 947da14cebeSEric Cheng */ 948da000602SGirish Moodalbail (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 949da14cebeSEric Cheng } else { 950da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 951da14cebeSEric Cheng } 952da14cebeSEric Cheng 953da14cebeSEric Cheng mutex_enter(&flent->fe_lock); 954da000602SGirish Moodalbail (void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN); 955da14cebeSEric Cheng mutex_exit(&flent->fe_lock); 956da14cebeSEric Cheng } 957da14cebeSEric Cheng 958da14cebeSEric Cheng /* 959da14cebeSEric Cheng * Return the client-private cookie that was associated with 960da14cebeSEric Cheng * the flow when it was created. 961da14cebeSEric Cheng */ 962da14cebeSEric Cheng void * 963da14cebeSEric Cheng mac_flow_get_client_cookie(flow_entry_t *flent) 964da14cebeSEric Cheng { 965da14cebeSEric Cheng return (flent->fe_client_cookie); 966da14cebeSEric Cheng } 967da14cebeSEric Cheng 968da14cebeSEric Cheng /* 969da14cebeSEric Cheng * Forward declarations. 970da14cebeSEric Cheng */ 971da14cebeSEric Cheng static uint32_t flow_l2_hash(flow_tab_t *, flow_state_t *); 9722b24ab6bSSebastien Roy static uint32_t flow_l2_hash_fe(flow_tab_t *, flow_entry_t *); 973da14cebeSEric Cheng static int flow_l2_accept(flow_tab_t *, flow_state_t *); 974da14cebeSEric Cheng static uint32_t flow_ether_hash(flow_tab_t *, flow_state_t *); 9752b24ab6bSSebastien Roy static uint32_t flow_ether_hash_fe(flow_tab_t *, flow_entry_t *); 976da14cebeSEric Cheng static int flow_ether_accept(flow_tab_t *, flow_state_t *); 977da14cebeSEric Cheng 978da14cebeSEric Cheng /* 979da14cebeSEric Cheng * Create flow table. 980da14cebeSEric Cheng */ 981da14cebeSEric Cheng void 982da14cebeSEric Cheng mac_flow_tab_create(flow_ops_t *ops, flow_mask_t mask, uint_t size, 983da14cebeSEric Cheng mac_impl_t *mip, flow_tab_t **ftp) 984da14cebeSEric Cheng { 985da14cebeSEric Cheng flow_tab_t *ft; 986da14cebeSEric Cheng flow_ops_t *new_ops; 987da14cebeSEric Cheng 988da14cebeSEric Cheng ft = kmem_cache_alloc(flow_tab_cache, KM_SLEEP); 989da14cebeSEric Cheng bzero(ft, sizeof (*ft)); 990da14cebeSEric Cheng 991da14cebeSEric Cheng ft->ft_table = kmem_zalloc(size * sizeof (flow_entry_t *), KM_SLEEP); 992da14cebeSEric Cheng 993da14cebeSEric Cheng /* 994da14cebeSEric Cheng * We make a copy of the ops vector instead of just pointing to it 995da14cebeSEric Cheng * because we might want to customize the ops vector on a per table 996da14cebeSEric Cheng * basis (e.g. for optimization). 997da14cebeSEric Cheng */ 998da14cebeSEric Cheng new_ops = &ft->ft_ops; 999da14cebeSEric Cheng bcopy(ops, new_ops, sizeof (*ops)); 1000da14cebeSEric Cheng ft->ft_mask = mask; 1001da14cebeSEric Cheng ft->ft_size = size; 1002da14cebeSEric Cheng ft->ft_mip = mip; 1003da14cebeSEric Cheng 1004da14cebeSEric Cheng /* 10052b24ab6bSSebastien Roy * Optimizations for DL_ETHER media. 1006da14cebeSEric Cheng */ 1007da14cebeSEric Cheng if (mip->mi_info.mi_nativemedia == DL_ETHER) { 1008da14cebeSEric Cheng if (new_ops->fo_hash == flow_l2_hash) 1009da14cebeSEric Cheng new_ops->fo_hash = flow_ether_hash; 10102b24ab6bSSebastien Roy if (new_ops->fo_hash_fe == flow_l2_hash_fe) 10112b24ab6bSSebastien Roy new_ops->fo_hash_fe = flow_ether_hash_fe; 1012da14cebeSEric Cheng if (new_ops->fo_accept[0] == flow_l2_accept) 1013da14cebeSEric Cheng new_ops->fo_accept[0] = flow_ether_accept; 1014da14cebeSEric Cheng } 1015da14cebeSEric Cheng *ftp = ft; 1016da14cebeSEric Cheng } 1017da14cebeSEric Cheng 1018da14cebeSEric Cheng void 1019da14cebeSEric Cheng mac_flow_l2tab_create(mac_impl_t *mip, flow_tab_t **ftp) 1020da14cebeSEric Cheng { 1021da14cebeSEric Cheng mac_flow_tab_create(&flow_l2_ops, FLOW_LINK_DST | FLOW_LINK_VID, 1022da14cebeSEric Cheng 1024, mip, ftp); 1023da14cebeSEric Cheng } 1024da14cebeSEric Cheng 1025da14cebeSEric Cheng /* 1026da14cebeSEric Cheng * Destroy flow table. 1027da14cebeSEric Cheng */ 1028da14cebeSEric Cheng void 1029da14cebeSEric Cheng mac_flow_tab_destroy(flow_tab_t *ft) 1030da14cebeSEric Cheng { 1031da14cebeSEric Cheng if (ft == NULL) 1032da14cebeSEric Cheng return; 1033da14cebeSEric Cheng 1034da14cebeSEric Cheng ASSERT(ft->ft_flow_count == 0); 1035da14cebeSEric Cheng kmem_free(ft->ft_table, ft->ft_size * sizeof (flow_entry_t *)); 1036da14cebeSEric Cheng bzero(ft, sizeof (*ft)); 1037da14cebeSEric Cheng kmem_cache_free(flow_tab_cache, ft); 1038da14cebeSEric Cheng } 1039da14cebeSEric Cheng 1040da14cebeSEric Cheng /* 1041da14cebeSEric Cheng * Add a new flow entry to the global flow hash table 1042da14cebeSEric Cheng */ 1043da14cebeSEric Cheng int 1044da14cebeSEric Cheng mac_flow_hash_add(flow_entry_t *flent) 1045da14cebeSEric Cheng { 1046da14cebeSEric Cheng int err; 1047da14cebeSEric Cheng 1048da14cebeSEric Cheng rw_enter(&flow_tab_lock, RW_WRITER); 1049da14cebeSEric Cheng err = mod_hash_insert(flow_hash, 1050da14cebeSEric Cheng (mod_hash_key_t)flent->fe_flow_name, (mod_hash_val_t)flent); 1051da14cebeSEric Cheng if (err != 0) { 1052da14cebeSEric Cheng rw_exit(&flow_tab_lock); 1053da14cebeSEric Cheng return (EEXIST); 1054da14cebeSEric Cheng } 1055da14cebeSEric Cheng /* Mark as inserted into the global flow hash table */ 1056da14cebeSEric Cheng FLOW_MARK(flent, FE_G_FLOW_HASH); 1057da14cebeSEric Cheng rw_exit(&flow_tab_lock); 1058da14cebeSEric Cheng return (err); 1059da14cebeSEric Cheng } 1060da14cebeSEric Cheng 1061da14cebeSEric Cheng /* 1062da14cebeSEric Cheng * Remove a flow entry from the global flow hash table 1063da14cebeSEric Cheng */ 1064da14cebeSEric Cheng void 1065da14cebeSEric Cheng mac_flow_hash_remove(flow_entry_t *flent) 1066da14cebeSEric Cheng { 1067da14cebeSEric Cheng mod_hash_val_t val; 1068da14cebeSEric Cheng 1069da14cebeSEric Cheng rw_enter(&flow_tab_lock, RW_WRITER); 1070da14cebeSEric Cheng VERIFY(mod_hash_remove(flow_hash, 1071da14cebeSEric Cheng (mod_hash_key_t)flent->fe_flow_name, &val) == 0); 1072da14cebeSEric Cheng 1073da14cebeSEric Cheng /* Clear the mark that says inserted into the global flow hash table */ 1074da14cebeSEric Cheng FLOW_UNMARK(flent, FE_G_FLOW_HASH); 1075da14cebeSEric Cheng rw_exit(&flow_tab_lock); 1076da14cebeSEric Cheng } 1077da14cebeSEric Cheng 1078da14cebeSEric Cheng /* 1079da14cebeSEric Cheng * Retrieve a flow entry from the global flow hash table. 1080da14cebeSEric Cheng */ 1081da14cebeSEric Cheng int 1082da14cebeSEric Cheng mac_flow_lookup_byname(char *name, flow_entry_t **flentp) 1083da14cebeSEric Cheng { 1084da14cebeSEric Cheng int err; 1085da14cebeSEric Cheng flow_entry_t *flent; 1086da14cebeSEric Cheng 1087da14cebeSEric Cheng rw_enter(&flow_tab_lock, RW_READER); 1088da14cebeSEric Cheng err = mod_hash_find(flow_hash, (mod_hash_key_t)name, 1089da14cebeSEric Cheng (mod_hash_val_t *)&flent); 1090da14cebeSEric Cheng if (err != 0) { 1091da14cebeSEric Cheng rw_exit(&flow_tab_lock); 1092da14cebeSEric Cheng return (ENOENT); 1093da14cebeSEric Cheng } 1094da14cebeSEric Cheng ASSERT(flent != NULL); 1095da14cebeSEric Cheng FLOW_USER_REFHOLD(flent); 1096da14cebeSEric Cheng rw_exit(&flow_tab_lock); 1097da14cebeSEric Cheng 1098da14cebeSEric Cheng *flentp = flent; 1099da14cebeSEric Cheng return (0); 1100da14cebeSEric Cheng } 1101da14cebeSEric Cheng 1102da14cebeSEric Cheng /* 1103da14cebeSEric Cheng * Initialize or release mac client flows by walking the subflow table. 1104da14cebeSEric Cheng * These are typically invoked during plumb/unplumb of links. 1105da14cebeSEric Cheng */ 1106da14cebeSEric Cheng 1107da14cebeSEric Cheng static int 1108da14cebeSEric Cheng mac_link_init_flows_cb(flow_entry_t *flent, void *arg) 1109da14cebeSEric Cheng { 1110da14cebeSEric Cheng mac_client_impl_t *mcip = arg; 1111da14cebeSEric Cheng 1112da14cebeSEric Cheng if (mac_link_flow_init(arg, flent) != 0) { 1113da14cebeSEric Cheng cmn_err(CE_WARN, "Failed to initialize flow '%s' on link '%s'", 1114da14cebeSEric Cheng flent->fe_flow_name, mcip->mci_name); 1115da14cebeSEric Cheng } else { 1116da14cebeSEric Cheng FLOW_UNMARK(flent, FE_UF_NO_DATAPATH); 1117da14cebeSEric Cheng } 1118da14cebeSEric Cheng return (0); 1119da14cebeSEric Cheng } 1120da14cebeSEric Cheng 1121da14cebeSEric Cheng void 1122da14cebeSEric Cheng mac_link_init_flows(mac_client_handle_t mch) 1123da14cebeSEric Cheng { 1124da14cebeSEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1125da14cebeSEric Cheng 1126da14cebeSEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 1127da14cebeSEric Cheng mac_link_init_flows_cb, mcip); 1128da14cebeSEric Cheng /* 1129da14cebeSEric Cheng * If mac client had subflow(s) configured before plumb, change 1130da14cebeSEric Cheng * function to mac_rx_srs_subflow_process and in case of hardware 1131da14cebeSEric Cheng * classification, disable polling. 1132da14cebeSEric Cheng */ 1133da14cebeSEric Cheng mac_client_update_classifier(mcip, B_TRUE); 1134da14cebeSEric Cheng 1135da14cebeSEric Cheng } 1136da14cebeSEric Cheng 1137da14cebeSEric Cheng boolean_t 1138da14cebeSEric Cheng mac_link_has_flows(mac_client_handle_t mch) 1139da14cebeSEric Cheng { 1140da14cebeSEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1141da14cebeSEric Cheng 1142da14cebeSEric Cheng if (!FLOW_TAB_EMPTY(mcip->mci_subflow_tab)) 1143da14cebeSEric Cheng return (B_TRUE); 1144da14cebeSEric Cheng 1145da14cebeSEric Cheng return (B_FALSE); 1146da14cebeSEric Cheng } 1147da14cebeSEric Cheng 1148da14cebeSEric Cheng static int 1149da14cebeSEric Cheng mac_link_release_flows_cb(flow_entry_t *flent, void *arg) 1150da14cebeSEric Cheng { 1151da14cebeSEric Cheng FLOW_MARK(flent, FE_UF_NO_DATAPATH); 1152da14cebeSEric Cheng mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 1153da14cebeSEric Cheng mac_link_flow_clean(arg, flent); 1154da14cebeSEric Cheng return (0); 1155da14cebeSEric Cheng } 1156da14cebeSEric Cheng 1157da14cebeSEric Cheng void 1158da14cebeSEric Cheng mac_link_release_flows(mac_client_handle_t mch) 1159da14cebeSEric Cheng { 1160da14cebeSEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1161da14cebeSEric Cheng 1162da14cebeSEric Cheng /* 1163da14cebeSEric Cheng * Change the mci_flent callback back to mac_rx_srs_process() 1164da14cebeSEric Cheng * because flows are about to be deactivated. 1165da14cebeSEric Cheng */ 1166da14cebeSEric Cheng mac_client_update_classifier(mcip, B_FALSE); 1167da14cebeSEric Cheng (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 1168da14cebeSEric Cheng mac_link_release_flows_cb, mcip); 1169da14cebeSEric Cheng } 1170da14cebeSEric Cheng 1171da14cebeSEric Cheng void 1172da14cebeSEric Cheng mac_rename_flow(flow_entry_t *fep, const char *new_name) 1173da14cebeSEric Cheng { 1174da14cebeSEric Cheng mac_flow_set_name(fep, new_name); 1175da14cebeSEric Cheng if (fep->fe_ksp != NULL) { 1176da14cebeSEric Cheng flow_stat_destroy(fep); 1177da14cebeSEric Cheng flow_stat_create(fep); 1178da14cebeSEric Cheng } 1179da14cebeSEric Cheng } 1180da14cebeSEric Cheng 1181da14cebeSEric Cheng /* 1182da14cebeSEric Cheng * mac_link_flow_init() 1183da14cebeSEric Cheng * Internal flow interface used for allocating SRSs and related 1184da14cebeSEric Cheng * data structures. Not meant to be used by mac clients. 1185da14cebeSEric Cheng */ 1186da14cebeSEric Cheng int 1187da14cebeSEric Cheng mac_link_flow_init(mac_client_handle_t mch, flow_entry_t *sub_flow) 1188da14cebeSEric Cheng { 1189da14cebeSEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1190da14cebeSEric Cheng mac_impl_t *mip = mcip->mci_mip; 1191da14cebeSEric Cheng int err; 1192da14cebeSEric Cheng 1193da14cebeSEric Cheng ASSERT(mch != NULL); 1194da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1195da14cebeSEric Cheng 1196da14cebeSEric Cheng if ((err = mac_datapath_setup(mcip, sub_flow, SRST_FLOW)) != 0) 1197da14cebeSEric Cheng return (err); 1198da14cebeSEric Cheng 1199da14cebeSEric Cheng sub_flow->fe_mcip = mcip; 1200da14cebeSEric Cheng 1201da14cebeSEric Cheng return (0); 1202da14cebeSEric Cheng } 1203da14cebeSEric Cheng 1204da14cebeSEric Cheng /* 1205da14cebeSEric Cheng * mac_link_flow_add() 1206da14cebeSEric Cheng * Used by flowadm(1m) or kernel mac clients for creating flows. 1207da14cebeSEric Cheng */ 1208da14cebeSEric Cheng int 1209da14cebeSEric Cheng mac_link_flow_add(datalink_id_t linkid, char *flow_name, 1210da14cebeSEric Cheng flow_desc_t *flow_desc, mac_resource_props_t *mrp) 1211da14cebeSEric Cheng { 1212da14cebeSEric Cheng flow_entry_t *flent = NULL; 1213da14cebeSEric Cheng int err; 1214da14cebeSEric Cheng dls_dl_handle_t dlh; 1215da14cebeSEric Cheng dls_link_t *dlp; 1216da14cebeSEric Cheng boolean_t link_held = B_FALSE; 1217da14cebeSEric Cheng boolean_t hash_added = B_FALSE; 1218da14cebeSEric Cheng mac_perim_handle_t mph; 1219da14cebeSEric Cheng 1220da14cebeSEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 1221da14cebeSEric Cheng if (err == 0) { 1222da14cebeSEric Cheng FLOW_USER_REFRELE(flent); 1223da14cebeSEric Cheng return (EEXIST); 1224da14cebeSEric Cheng } 1225da14cebeSEric Cheng 1226da14cebeSEric Cheng /* 1227da14cebeSEric Cheng * First create a flow entry given the description provided 1228da14cebeSEric Cheng * by the caller. 1229da14cebeSEric Cheng */ 1230da14cebeSEric Cheng err = mac_flow_create(flow_desc, mrp, flow_name, NULL, 1231da14cebeSEric Cheng FLOW_USER | FLOW_OTHER, &flent); 1232da14cebeSEric Cheng 1233da14cebeSEric Cheng if (err != 0) 1234da14cebeSEric Cheng return (err); 1235da14cebeSEric Cheng 1236da14cebeSEric Cheng /* 1237da14cebeSEric Cheng * We've got a local variable referencing this flow now, so we need 1238da14cebeSEric Cheng * to hold it. We'll release this flow before returning. 1239da14cebeSEric Cheng * All failures until we return will undo any action that may internally 1240da14cebeSEric Cheng * held the flow, so the last REFRELE will assure a clean freeing 1241da14cebeSEric Cheng * of resources. 1242da14cebeSEric Cheng */ 1243da14cebeSEric Cheng FLOW_REFHOLD(flent); 1244da14cebeSEric Cheng 1245da14cebeSEric Cheng flent->fe_link_id = linkid; 1246da14cebeSEric Cheng FLOW_MARK(flent, FE_INCIPIENT); 1247da14cebeSEric Cheng 1248da14cebeSEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 1249da14cebeSEric Cheng if (err != 0) { 1250da14cebeSEric Cheng FLOW_FINAL_REFRELE(flent); 1251da14cebeSEric Cheng return (err); 1252da14cebeSEric Cheng } 1253da14cebeSEric Cheng 1254da14cebeSEric Cheng /* 1255da14cebeSEric Cheng * dls will eventually be merged with mac so it's ok 1256da14cebeSEric Cheng * to call dls' internal functions. 1257da14cebeSEric Cheng */ 1258da14cebeSEric Cheng err = dls_devnet_hold_link(linkid, &dlh, &dlp); 1259da14cebeSEric Cheng if (err != 0) 1260da14cebeSEric Cheng goto bail; 1261da14cebeSEric Cheng 1262da14cebeSEric Cheng link_held = B_TRUE; 1263da14cebeSEric Cheng 1264da14cebeSEric Cheng /* 1265da14cebeSEric Cheng * Add the flow to the global flow table, this table will be per 1266da14cebeSEric Cheng * exclusive zone so each zone can have its own flow namespace. 1267da14cebeSEric Cheng * RFE 6625651 will fix this. 1268da14cebeSEric Cheng * 1269da14cebeSEric Cheng */ 1270da14cebeSEric Cheng if ((err = mac_flow_hash_add(flent)) != 0) 1271da14cebeSEric Cheng goto bail; 1272da14cebeSEric Cheng 1273da14cebeSEric Cheng hash_added = B_TRUE; 1274da14cebeSEric Cheng 1275da14cebeSEric Cheng /* 1276da14cebeSEric Cheng * do not allow flows to be configured on an anchor VNIC 1277da14cebeSEric Cheng */ 1278da14cebeSEric Cheng if (mac_capab_get(dlp->dl_mh, MAC_CAPAB_ANCHOR_VNIC, NULL)) { 1279da14cebeSEric Cheng err = ENOTSUP; 1280da14cebeSEric Cheng goto bail; 1281da14cebeSEric Cheng } 1282da14cebeSEric Cheng 1283da14cebeSEric Cheng /* 1284da14cebeSEric Cheng * Add the subflow to the subflow table. Also instantiate the flow 1285ae6aa22aSVenugopal Iyer * in the mac if there is an active user (we check if the MAC client's 1286ae6aa22aSVenugopal Iyer * datapath has been setup). 1287da14cebeSEric Cheng */ 1288ae6aa22aSVenugopal Iyer err = mac_flow_add_subflow(dlp->dl_mch, flent, 1289ae6aa22aSVenugopal Iyer MCIP_DATAPATH_SETUP((mac_client_impl_t *)dlp->dl_mch)); 1290da14cebeSEric Cheng if (err != 0) 1291da14cebeSEric Cheng goto bail; 1292da14cebeSEric Cheng 1293da14cebeSEric Cheng FLOW_UNMARK(flent, FE_INCIPIENT); 1294da14cebeSEric Cheng dls_devnet_rele_link(dlh, dlp); 1295da14cebeSEric Cheng mac_perim_exit(mph); 1296da14cebeSEric Cheng return (0); 1297da14cebeSEric Cheng 1298da14cebeSEric Cheng bail: 1299da14cebeSEric Cheng if (hash_added) 1300da14cebeSEric Cheng mac_flow_hash_remove(flent); 1301da14cebeSEric Cheng 1302da14cebeSEric Cheng if (link_held) 1303da14cebeSEric Cheng dls_devnet_rele_link(dlh, dlp); 1304da14cebeSEric Cheng 1305da14cebeSEric Cheng /* 1306da14cebeSEric Cheng * Wait for any transient global flow hash refs to clear 1307da14cebeSEric Cheng * and then release the creation reference on the flow 1308da14cebeSEric Cheng */ 1309da14cebeSEric Cheng mac_flow_wait(flent, FLOW_USER_REF); 1310da14cebeSEric Cheng FLOW_FINAL_REFRELE(flent); 1311da14cebeSEric Cheng mac_perim_exit(mph); 1312da14cebeSEric Cheng return (err); 1313da14cebeSEric Cheng } 1314da14cebeSEric Cheng 1315da14cebeSEric Cheng /* 1316da14cebeSEric Cheng * mac_link_flow_clean() 1317da14cebeSEric Cheng * Internal flow interface used for freeing SRSs and related 1318da14cebeSEric Cheng * data structures. Not meant to be used by mac clients. 1319da14cebeSEric Cheng */ 1320da14cebeSEric Cheng void 1321da14cebeSEric Cheng mac_link_flow_clean(mac_client_handle_t mch, flow_entry_t *sub_flow) 1322da14cebeSEric Cheng { 1323da14cebeSEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1324da14cebeSEric Cheng mac_impl_t *mip = mcip->mci_mip; 1325da14cebeSEric Cheng boolean_t last_subflow; 1326da14cebeSEric Cheng 1327da14cebeSEric Cheng ASSERT(mch != NULL); 1328da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1329da14cebeSEric Cheng 1330da14cebeSEric Cheng /* 1331da14cebeSEric Cheng * This sub flow entry may fail to be fully initialized by 1332da14cebeSEric Cheng * mac_link_flow_init(). If so, simply return. 1333da14cebeSEric Cheng */ 1334da14cebeSEric Cheng if (sub_flow->fe_mcip == NULL) 1335da14cebeSEric Cheng return; 1336da14cebeSEric Cheng 1337da14cebeSEric Cheng last_subflow = FLOW_TAB_EMPTY(mcip->mci_subflow_tab); 1338da14cebeSEric Cheng /* 1339da14cebeSEric Cheng * Tear down the data path 1340da14cebeSEric Cheng */ 1341da14cebeSEric Cheng mac_datapath_teardown(mcip, sub_flow, SRST_FLOW); 1342da14cebeSEric Cheng sub_flow->fe_mcip = NULL; 1343da14cebeSEric Cheng 1344da14cebeSEric Cheng /* 1345da14cebeSEric Cheng * Delete the SRSs associated with this subflow. If this is being 1346da14cebeSEric Cheng * driven by flowadm(1M) then the subflow will be deleted by 1347da14cebeSEric Cheng * dls_rem_flow. However if this is a result of the interface being 1348da14cebeSEric Cheng * unplumbed then the subflow itself won't be deleted. 1349da14cebeSEric Cheng */ 1350da14cebeSEric Cheng mac_flow_cleanup(sub_flow); 1351da14cebeSEric Cheng 1352da14cebeSEric Cheng /* 1353da14cebeSEric Cheng * If all the subflows are gone, renable some of the stuff 1354da14cebeSEric Cheng * we disabled when adding a subflow, polling etc. 1355da14cebeSEric Cheng */ 1356da14cebeSEric Cheng if (last_subflow) { 1357da14cebeSEric Cheng /* 1358da14cebeSEric Cheng * The subflow table itself is not protected by any locks or 1359da14cebeSEric Cheng * refcnts. Hence quiesce the client upfront before clearing 1360da14cebeSEric Cheng * mci_subflow_tab. 1361da14cebeSEric Cheng */ 1362da14cebeSEric Cheng mac_client_quiesce(mcip); 1363da14cebeSEric Cheng mac_client_update_classifier(mcip, B_FALSE); 1364da14cebeSEric Cheng mac_flow_tab_destroy(mcip->mci_subflow_tab); 1365da14cebeSEric Cheng mcip->mci_subflow_tab = NULL; 1366da14cebeSEric Cheng mac_client_restart(mcip); 1367da14cebeSEric Cheng } 1368da14cebeSEric Cheng } 1369da14cebeSEric Cheng 1370da14cebeSEric Cheng /* 1371da14cebeSEric Cheng * mac_link_flow_remove() 1372da14cebeSEric Cheng * Used by flowadm(1m) or kernel mac clients for removing flows. 1373da14cebeSEric Cheng */ 1374da14cebeSEric Cheng int 1375da14cebeSEric Cheng mac_link_flow_remove(char *flow_name) 1376da14cebeSEric Cheng { 1377da14cebeSEric Cheng flow_entry_t *flent; 1378da14cebeSEric Cheng mac_perim_handle_t mph; 1379da14cebeSEric Cheng int err; 1380da14cebeSEric Cheng datalink_id_t linkid; 1381da14cebeSEric Cheng 1382da14cebeSEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 1383da14cebeSEric Cheng if (err != 0) 1384da14cebeSEric Cheng return (err); 1385da14cebeSEric Cheng 1386da14cebeSEric Cheng linkid = flent->fe_link_id; 1387da14cebeSEric Cheng FLOW_USER_REFRELE(flent); 1388da14cebeSEric Cheng 1389da14cebeSEric Cheng /* 1390da14cebeSEric Cheng * The perim must be acquired before acquiring any other references 1391da14cebeSEric Cheng * to maintain the lock and perimeter hierarchy. Please note the 1392da14cebeSEric Cheng * FLOW_REFRELE above. 1393da14cebeSEric Cheng */ 1394da14cebeSEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 1395da14cebeSEric Cheng if (err != 0) 1396da14cebeSEric Cheng return (err); 1397da14cebeSEric Cheng 1398da14cebeSEric Cheng /* 1399da14cebeSEric Cheng * Note the second lookup of the flow, because a concurrent thread 1400da14cebeSEric Cheng * may have removed it already while we were waiting to enter the 1401da14cebeSEric Cheng * link's perimeter. 1402da14cebeSEric Cheng */ 1403da14cebeSEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 1404da14cebeSEric Cheng if (err != 0) { 1405da14cebeSEric Cheng mac_perim_exit(mph); 1406da14cebeSEric Cheng return (err); 1407da14cebeSEric Cheng } 1408da14cebeSEric Cheng FLOW_USER_REFRELE(flent); 1409da14cebeSEric Cheng 1410da14cebeSEric Cheng /* 1411da14cebeSEric Cheng * Remove the flow from the subflow table and deactivate the flow 1412da14cebeSEric Cheng * by quiescing and removings its SRSs 1413da14cebeSEric Cheng */ 1414da14cebeSEric Cheng mac_flow_rem_subflow(flent); 1415da14cebeSEric Cheng 1416da14cebeSEric Cheng /* 1417da14cebeSEric Cheng * Finally, remove the flow from the global table. 1418da14cebeSEric Cheng */ 1419da14cebeSEric Cheng mac_flow_hash_remove(flent); 1420da14cebeSEric Cheng 1421da14cebeSEric Cheng /* 1422da14cebeSEric Cheng * Wait for any transient global flow hash refs to clear 1423da14cebeSEric Cheng * and then release the creation reference on the flow 1424da14cebeSEric Cheng */ 1425da14cebeSEric Cheng mac_flow_wait(flent, FLOW_USER_REF); 1426da14cebeSEric Cheng FLOW_FINAL_REFRELE(flent); 1427da14cebeSEric Cheng 1428da14cebeSEric Cheng mac_perim_exit(mph); 1429da14cebeSEric Cheng 1430da14cebeSEric Cheng return (0); 1431da14cebeSEric Cheng } 1432da14cebeSEric Cheng 1433da14cebeSEric Cheng /* 1434da14cebeSEric Cheng * mac_link_flow_modify() 1435da14cebeSEric Cheng * Modifies the properties of a flow identified by its name. 1436da14cebeSEric Cheng */ 1437da14cebeSEric Cheng int 1438da14cebeSEric Cheng mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp) 1439da14cebeSEric Cheng { 1440da14cebeSEric Cheng flow_entry_t *flent; 1441da14cebeSEric Cheng mac_client_impl_t *mcip; 1442da14cebeSEric Cheng int err = 0; 1443da14cebeSEric Cheng mac_perim_handle_t mph; 1444da14cebeSEric Cheng datalink_id_t linkid; 1445da14cebeSEric Cheng flow_tab_t *flow_tab; 1446da14cebeSEric Cheng 1447*0dc2366fSVenugopal Iyer err = mac_validate_props(NULL, mrp); 1448da14cebeSEric Cheng if (err != 0) 1449da14cebeSEric Cheng return (err); 1450da14cebeSEric Cheng 1451da14cebeSEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 1452da14cebeSEric Cheng if (err != 0) 1453da14cebeSEric Cheng return (err); 1454da14cebeSEric Cheng 1455da14cebeSEric Cheng linkid = flent->fe_link_id; 1456da14cebeSEric Cheng FLOW_USER_REFRELE(flent); 1457da14cebeSEric Cheng 1458da14cebeSEric Cheng /* 1459da14cebeSEric Cheng * The perim must be acquired before acquiring any other references 1460da14cebeSEric Cheng * to maintain the lock and perimeter hierarchy. Please note the 1461da14cebeSEric Cheng * FLOW_REFRELE above. 1462da14cebeSEric Cheng */ 1463da14cebeSEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 1464da14cebeSEric Cheng if (err != 0) 1465da14cebeSEric Cheng return (err); 1466da14cebeSEric Cheng 1467da14cebeSEric Cheng /* 1468da14cebeSEric Cheng * Note the second lookup of the flow, because a concurrent thread 1469da14cebeSEric Cheng * may have removed it already while we were waiting to enter the 1470da14cebeSEric Cheng * link's perimeter. 1471da14cebeSEric Cheng */ 1472da14cebeSEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 1473da14cebeSEric Cheng if (err != 0) { 1474da14cebeSEric Cheng mac_perim_exit(mph); 1475da14cebeSEric Cheng return (err); 1476da14cebeSEric Cheng } 1477da14cebeSEric Cheng FLOW_USER_REFRELE(flent); 1478da14cebeSEric Cheng 1479da14cebeSEric Cheng /* 1480da14cebeSEric Cheng * If this flow is attached to a MAC client, then pass the request 1481da14cebeSEric Cheng * along to the client. 1482da14cebeSEric Cheng * Otherwise, just update the cached values. 1483da14cebeSEric Cheng */ 1484da14cebeSEric Cheng mcip = flent->fe_mcip; 1485da14cebeSEric Cheng mac_update_resources(mrp, &flent->fe_resource_props, B_TRUE); 1486da14cebeSEric Cheng if (mcip != NULL) { 1487da14cebeSEric Cheng if ((flow_tab = mcip->mci_subflow_tab) == NULL) { 1488da14cebeSEric Cheng err = ENOENT; 1489da14cebeSEric Cheng } else { 1490da14cebeSEric Cheng mac_flow_modify(flow_tab, flent, mrp); 1491da14cebeSEric Cheng } 1492da14cebeSEric Cheng } else { 1493da14cebeSEric Cheng (void) mac_flow_modify_props(flent, mrp); 1494da14cebeSEric Cheng } 1495da14cebeSEric Cheng 1496da14cebeSEric Cheng done: 1497da14cebeSEric Cheng mac_perim_exit(mph); 1498da14cebeSEric Cheng return (err); 1499da14cebeSEric Cheng } 1500da14cebeSEric Cheng 1501da14cebeSEric Cheng 1502da14cebeSEric Cheng /* 1503da14cebeSEric Cheng * State structure and misc functions used by mac_link_flow_walk(). 1504da14cebeSEric Cheng */ 1505da14cebeSEric Cheng typedef struct { 1506da14cebeSEric Cheng int (*ws_func)(mac_flowinfo_t *, void *); 1507da14cebeSEric Cheng void *ws_arg; 1508da14cebeSEric Cheng } flow_walk_state_t; 1509da14cebeSEric Cheng 1510da14cebeSEric Cheng static void 1511da14cebeSEric Cheng mac_link_flowinfo_copy(mac_flowinfo_t *finfop, flow_entry_t *flent) 1512da14cebeSEric Cheng { 1513da000602SGirish Moodalbail (void) strlcpy(finfop->fi_flow_name, flent->fe_flow_name, 1514da000602SGirish Moodalbail MAXFLOWNAMELEN); 1515da14cebeSEric Cheng finfop->fi_link_id = flent->fe_link_id; 1516da14cebeSEric Cheng finfop->fi_flow_desc = flent->fe_flow_desc; 1517da14cebeSEric Cheng finfop->fi_resource_props = flent->fe_resource_props; 1518da14cebeSEric Cheng } 1519da14cebeSEric Cheng 1520da14cebeSEric Cheng static int 1521da14cebeSEric Cheng mac_link_flow_walk_cb(flow_entry_t *flent, void *arg) 1522da14cebeSEric Cheng { 1523da14cebeSEric Cheng flow_walk_state_t *statep = arg; 1524*0dc2366fSVenugopal Iyer mac_flowinfo_t *finfo; 1525*0dc2366fSVenugopal Iyer int err; 1526da14cebeSEric Cheng 1527*0dc2366fSVenugopal Iyer finfo = kmem_zalloc(sizeof (*finfo), KM_SLEEP); 1528*0dc2366fSVenugopal Iyer mac_link_flowinfo_copy(finfo, flent); 1529*0dc2366fSVenugopal Iyer err = statep->ws_func(finfo, statep->ws_arg); 1530*0dc2366fSVenugopal Iyer kmem_free(finfo, sizeof (*finfo)); 1531*0dc2366fSVenugopal Iyer return (err); 1532da14cebeSEric Cheng } 1533da14cebeSEric Cheng 1534da14cebeSEric Cheng /* 1535da14cebeSEric Cheng * mac_link_flow_walk() 1536da14cebeSEric Cheng * Invokes callback 'func' for all flows belonging to the specified link. 1537da14cebeSEric Cheng */ 1538da14cebeSEric Cheng int 1539da14cebeSEric Cheng mac_link_flow_walk(datalink_id_t linkid, 1540da14cebeSEric Cheng int (*func)(mac_flowinfo_t *, void *), void *arg) 1541da14cebeSEric Cheng { 1542da14cebeSEric Cheng mac_client_impl_t *mcip; 1543da14cebeSEric Cheng mac_perim_handle_t mph; 1544da14cebeSEric Cheng flow_walk_state_t state; 1545da14cebeSEric Cheng dls_dl_handle_t dlh; 1546da14cebeSEric Cheng dls_link_t *dlp; 1547da14cebeSEric Cheng int err; 1548da14cebeSEric Cheng 1549da14cebeSEric Cheng err = mac_perim_enter_by_linkid(linkid, &mph); 1550da14cebeSEric Cheng if (err != 0) 1551da14cebeSEric Cheng return (err); 1552da14cebeSEric Cheng 1553da14cebeSEric Cheng err = dls_devnet_hold_link(linkid, &dlh, &dlp); 1554da14cebeSEric Cheng if (err != 0) { 1555da14cebeSEric Cheng mac_perim_exit(mph); 1556da14cebeSEric Cheng return (err); 1557da14cebeSEric Cheng } 1558da14cebeSEric Cheng 1559da14cebeSEric Cheng mcip = (mac_client_impl_t *)dlp->dl_mch; 1560da14cebeSEric Cheng state.ws_func = func; 1561da14cebeSEric Cheng state.ws_arg = arg; 1562da14cebeSEric Cheng 1563da14cebeSEric Cheng err = mac_flow_walk_nolock(mcip->mci_subflow_tab, 1564da14cebeSEric Cheng mac_link_flow_walk_cb, &state); 1565da14cebeSEric Cheng 1566da14cebeSEric Cheng dls_devnet_rele_link(dlh, dlp); 1567da14cebeSEric Cheng mac_perim_exit(mph); 1568da14cebeSEric Cheng return (err); 1569da14cebeSEric Cheng } 1570da14cebeSEric Cheng 1571da14cebeSEric Cheng /* 1572da14cebeSEric Cheng * mac_link_flow_info() 1573da14cebeSEric Cheng * Retrieves information about a specific flow. 1574da14cebeSEric Cheng */ 1575da14cebeSEric Cheng int 1576da14cebeSEric Cheng mac_link_flow_info(char *flow_name, mac_flowinfo_t *finfo) 1577da14cebeSEric Cheng { 1578da14cebeSEric Cheng flow_entry_t *flent; 1579da14cebeSEric Cheng int err; 1580da14cebeSEric Cheng 1581da14cebeSEric Cheng err = mac_flow_lookup_byname(flow_name, &flent); 1582da14cebeSEric Cheng if (err != 0) 1583da14cebeSEric Cheng return (err); 1584da14cebeSEric Cheng 1585da14cebeSEric Cheng mac_link_flowinfo_copy(finfo, flent); 1586da14cebeSEric Cheng FLOW_USER_REFRELE(flent); 1587da14cebeSEric Cheng return (0); 1588da14cebeSEric Cheng } 1589da14cebeSEric Cheng 15902b24ab6bSSebastien Roy /* 15912b24ab6bSSebastien Roy * Hash function macro that takes an Ethernet address and VLAN id as input. 15922b24ab6bSSebastien Roy */ 15932b24ab6bSSebastien Roy #define HASH_ETHER_VID(a, v, s) \ 1594da14cebeSEric Cheng ((((uint32_t)(a)[3] + (a)[4] + (a)[5]) ^ (v)) % (s)) 1595da14cebeSEric Cheng 15962b24ab6bSSebastien Roy /* 15972b24ab6bSSebastien Roy * Generic layer-2 address hashing function that takes an address and address 15982b24ab6bSSebastien Roy * length as input. This is the DJB hash function. 15992b24ab6bSSebastien Roy */ 16002b24ab6bSSebastien Roy static uint32_t 16012b24ab6bSSebastien Roy flow_l2_addrhash(uint8_t *addr, size_t addrlen, size_t htsize) 16022b24ab6bSSebastien Roy { 16032b24ab6bSSebastien Roy uint32_t hash = 5381; 16042b24ab6bSSebastien Roy size_t i; 16052b24ab6bSSebastien Roy 16062b24ab6bSSebastien Roy for (i = 0; i < addrlen; i++) 16072b24ab6bSSebastien Roy hash = ((hash << 5) + hash) + addr[i]; 16082b24ab6bSSebastien Roy return (hash % htsize); 16092b24ab6bSSebastien Roy } 16102b24ab6bSSebastien Roy 1611da14cebeSEric Cheng #define PKT_TOO_SMALL(s, end) ((s)->fs_mp->b_wptr < (end)) 1612da14cebeSEric Cheng 1613ae6aa22aSVenugopal Iyer #define CHECK_AND_ADJUST_START_PTR(s, start) { \ 1614ae6aa22aSVenugopal Iyer if ((s)->fs_mp->b_wptr == (start)) { \ 1615ae6aa22aSVenugopal Iyer mblk_t *next = (s)->fs_mp->b_cont; \ 1616ae6aa22aSVenugopal Iyer if (next == NULL) \ 1617ae6aa22aSVenugopal Iyer return (EINVAL); \ 1618ae6aa22aSVenugopal Iyer \ 1619ae6aa22aSVenugopal Iyer (s)->fs_mp = next; \ 1620ae6aa22aSVenugopal Iyer (start) = next->b_rptr; \ 1621ae6aa22aSVenugopal Iyer } \ 1622ae6aa22aSVenugopal Iyer } 1623ae6aa22aSVenugopal Iyer 1624da14cebeSEric Cheng /* ARGSUSED */ 1625da14cebeSEric Cheng static boolean_t 1626da14cebeSEric Cheng flow_l2_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 1627da14cebeSEric Cheng { 1628da14cebeSEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 1629da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 1630da14cebeSEric Cheng 1631da14cebeSEric Cheng return (l2->l2_vid == fd->fd_vid && 1632da14cebeSEric Cheng bcmp(l2->l2_daddr, fd->fd_dst_mac, fd->fd_mac_len) == 0); 1633da14cebeSEric Cheng } 1634da14cebeSEric Cheng 1635da14cebeSEric Cheng /* 1636da14cebeSEric Cheng * Layer 2 hash function. 1637da14cebeSEric Cheng * Must be paired with flow_l2_accept() within a set of flow_ops 1638da14cebeSEric Cheng * because it assumes the dest address is already extracted. 1639da14cebeSEric Cheng */ 1640da14cebeSEric Cheng static uint32_t 1641da14cebeSEric Cheng flow_l2_hash(flow_tab_t *ft, flow_state_t *s) 1642da14cebeSEric Cheng { 16432b24ab6bSSebastien Roy return (flow_l2_addrhash(s->fs_l2info.l2_daddr, 16442b24ab6bSSebastien Roy ft->ft_mip->mi_type->mt_addr_length, ft->ft_size)); 1645da14cebeSEric Cheng } 1646da14cebeSEric Cheng 1647da14cebeSEric Cheng /* 1648da14cebeSEric Cheng * This is the generic layer 2 accept function. 1649da14cebeSEric Cheng * It makes use of mac_header_info() to extract the header length, 1650da14cebeSEric Cheng * sap, vlan ID and destination address. 1651da14cebeSEric Cheng */ 1652da14cebeSEric Cheng static int 1653da14cebeSEric Cheng flow_l2_accept(flow_tab_t *ft, flow_state_t *s) 1654da14cebeSEric Cheng { 1655da14cebeSEric Cheng boolean_t is_ether; 1656da14cebeSEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 1657da14cebeSEric Cheng mac_header_info_t mhi; 1658da14cebeSEric Cheng int err; 1659da14cebeSEric Cheng 1660da14cebeSEric Cheng is_ether = (ft->ft_mip->mi_info.mi_nativemedia == DL_ETHER); 1661da14cebeSEric Cheng if ((err = mac_header_info((mac_handle_t)ft->ft_mip, 1662da14cebeSEric Cheng s->fs_mp, &mhi)) != 0) { 1663da14cebeSEric Cheng if (err == EINVAL) 1664da14cebeSEric Cheng err = ENOBUFS; 1665da14cebeSEric Cheng 1666da14cebeSEric Cheng return (err); 1667da14cebeSEric Cheng } 1668da14cebeSEric Cheng 1669da14cebeSEric Cheng l2->l2_start = s->fs_mp->b_rptr; 1670da14cebeSEric Cheng l2->l2_daddr = (uint8_t *)mhi.mhi_daddr; 1671da14cebeSEric Cheng 1672da14cebeSEric Cheng if (is_ether && mhi.mhi_bindsap == ETHERTYPE_VLAN && 1673da14cebeSEric Cheng ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) { 1674da14cebeSEric Cheng struct ether_vlan_header *evhp = 1675da14cebeSEric Cheng (struct ether_vlan_header *)l2->l2_start; 1676da14cebeSEric Cheng 1677da14cebeSEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp))) 1678da14cebeSEric Cheng return (ENOBUFS); 1679da14cebeSEric Cheng 1680da14cebeSEric Cheng l2->l2_sap = ntohs(evhp->ether_type); 1681da14cebeSEric Cheng l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci)); 1682da14cebeSEric Cheng l2->l2_hdrsize = sizeof (*evhp); 1683da14cebeSEric Cheng } else { 1684da14cebeSEric Cheng l2->l2_sap = mhi.mhi_bindsap; 1685da14cebeSEric Cheng l2->l2_vid = 0; 1686da14cebeSEric Cheng l2->l2_hdrsize = (uint32_t)mhi.mhi_hdrsize; 1687da14cebeSEric Cheng } 1688da14cebeSEric Cheng return (0); 1689da14cebeSEric Cheng } 1690da14cebeSEric Cheng 1691da14cebeSEric Cheng /* 1692da14cebeSEric Cheng * flow_ether_hash()/accept() are optimized versions of flow_l2_hash()/ 1693da14cebeSEric Cheng * accept(). The notable difference is that dest address is now extracted 1694da14cebeSEric Cheng * by hash() rather than by accept(). This saves a few memory references 1695da14cebeSEric Cheng * for flow tables that do not care about mac addresses. 1696da14cebeSEric Cheng */ 1697da14cebeSEric Cheng static uint32_t 1698da14cebeSEric Cheng flow_ether_hash(flow_tab_t *ft, flow_state_t *s) 1699da14cebeSEric Cheng { 1700da14cebeSEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 1701da14cebeSEric Cheng struct ether_vlan_header *evhp; 1702da14cebeSEric Cheng 1703da14cebeSEric Cheng evhp = (struct ether_vlan_header *)l2->l2_start; 1704da14cebeSEric Cheng l2->l2_daddr = evhp->ether_dhost.ether_addr_octet; 17052b24ab6bSSebastien Roy return (HASH_ETHER_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size)); 17062b24ab6bSSebastien Roy } 17072b24ab6bSSebastien Roy 17082b24ab6bSSebastien Roy static uint32_t 17092b24ab6bSSebastien Roy flow_ether_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 17102b24ab6bSSebastien Roy { 17112b24ab6bSSebastien Roy flow_desc_t *fd = &flent->fe_flow_desc; 17122b24ab6bSSebastien Roy 17132b24ab6bSSebastien Roy ASSERT((fd->fd_mask & FLOW_LINK_VID) != 0 || fd->fd_vid == 0); 17142b24ab6bSSebastien Roy return (HASH_ETHER_VID(fd->fd_dst_mac, fd->fd_vid, ft->ft_size)); 1715da14cebeSEric Cheng } 1716da14cebeSEric Cheng 1717da14cebeSEric Cheng /* ARGSUSED */ 1718da14cebeSEric Cheng static int 1719da14cebeSEric Cheng flow_ether_accept(flow_tab_t *ft, flow_state_t *s) 1720da14cebeSEric Cheng { 1721da14cebeSEric Cheng flow_l2info_t *l2 = &s->fs_l2info; 1722da14cebeSEric Cheng struct ether_vlan_header *evhp; 1723da14cebeSEric Cheng uint16_t sap; 1724da14cebeSEric Cheng 1725da14cebeSEric Cheng evhp = (struct ether_vlan_header *)s->fs_mp->b_rptr; 1726da14cebeSEric Cheng l2->l2_start = (uchar_t *)evhp; 1727da14cebeSEric Cheng 1728da14cebeSEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (struct ether_header))) 1729da14cebeSEric Cheng return (ENOBUFS); 1730da14cebeSEric Cheng 1731da14cebeSEric Cheng if ((sap = ntohs(evhp->ether_tpid)) == ETHERTYPE_VLAN && 1732da14cebeSEric Cheng ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) { 1733da14cebeSEric Cheng if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp))) 1734da14cebeSEric Cheng return (ENOBUFS); 1735da14cebeSEric Cheng 1736da14cebeSEric Cheng l2->l2_sap = ntohs(evhp->ether_type); 1737da14cebeSEric Cheng l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci)); 1738da14cebeSEric Cheng l2->l2_hdrsize = sizeof (struct ether_vlan_header); 1739da14cebeSEric Cheng } else { 1740da14cebeSEric Cheng l2->l2_sap = sap; 1741da14cebeSEric Cheng l2->l2_vid = 0; 1742da14cebeSEric Cheng l2->l2_hdrsize = sizeof (struct ether_header); 1743da14cebeSEric Cheng } 1744da14cebeSEric Cheng return (0); 1745da14cebeSEric Cheng } 1746da14cebeSEric Cheng 1747da14cebeSEric Cheng /* 1748da14cebeSEric Cheng * Validates a layer 2 flow entry. 1749da14cebeSEric Cheng */ 1750da14cebeSEric Cheng static int 1751da14cebeSEric Cheng flow_l2_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 1752da14cebeSEric Cheng { 1753da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 1754da14cebeSEric Cheng 1755da14cebeSEric Cheng /* 17562b24ab6bSSebastien Roy * Dest address is mandatory, and 0 length addresses are not yet 17572b24ab6bSSebastien Roy * supported. 1758da14cebeSEric Cheng */ 17592b24ab6bSSebastien Roy if ((fd->fd_mask & FLOW_LINK_DST) == 0 || fd->fd_mac_len == 0) 1760da14cebeSEric Cheng return (EINVAL); 1761da14cebeSEric Cheng 1762da14cebeSEric Cheng if ((fd->fd_mask & FLOW_LINK_VID) != 0) { 1763da14cebeSEric Cheng /* 1764da14cebeSEric Cheng * VLAN flows are only supported over ethernet macs. 1765da14cebeSEric Cheng */ 1766da14cebeSEric Cheng if (ft->ft_mip->mi_info.mi_nativemedia != DL_ETHER) 1767da14cebeSEric Cheng return (EINVAL); 1768da14cebeSEric Cheng 1769da14cebeSEric Cheng if (fd->fd_vid == 0) 1770da14cebeSEric Cheng return (EINVAL); 1771da14cebeSEric Cheng 1772da14cebeSEric Cheng } 1773da14cebeSEric Cheng flent->fe_match = flow_l2_match; 1774da14cebeSEric Cheng return (0); 1775da14cebeSEric Cheng } 1776da14cebeSEric Cheng 1777da14cebeSEric Cheng /* 1778da14cebeSEric Cheng * Calculates hash index of flow entry. 1779da14cebeSEric Cheng */ 1780da14cebeSEric Cheng static uint32_t 1781da14cebeSEric Cheng flow_l2_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 1782da14cebeSEric Cheng { 1783da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 1784da14cebeSEric Cheng 17852b24ab6bSSebastien Roy ASSERT((fd->fd_mask & FLOW_LINK_VID) == 0 && fd->fd_vid == 0); 17862b24ab6bSSebastien Roy return (flow_l2_addrhash(fd->fd_dst_mac, 17872b24ab6bSSebastien Roy ft->ft_mip->mi_type->mt_addr_length, ft->ft_size)); 1788da14cebeSEric Cheng } 1789da14cebeSEric Cheng 1790da14cebeSEric Cheng /* 1791da14cebeSEric Cheng * This is used for duplicate flow checking. 1792da14cebeSEric Cheng */ 1793da14cebeSEric Cheng /* ARGSUSED */ 1794da14cebeSEric Cheng static boolean_t 1795da14cebeSEric Cheng flow_l2_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 1796da14cebeSEric Cheng { 1797da14cebeSEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 1798da14cebeSEric Cheng 1799da14cebeSEric Cheng ASSERT(fd1->fd_mac_len == fd2->fd_mac_len && fd1->fd_mac_len != 0); 1800da14cebeSEric Cheng return (bcmp(&fd1->fd_dst_mac, &fd2->fd_dst_mac, 1801da14cebeSEric Cheng fd1->fd_mac_len) == 0 && fd1->fd_vid == fd2->fd_vid); 1802da14cebeSEric Cheng } 1803da14cebeSEric Cheng 1804da14cebeSEric Cheng /* 1805da14cebeSEric Cheng * Generic flow entry insertion function. 1806da14cebeSEric Cheng * Used by flow tables that do not have ordering requirements. 1807da14cebeSEric Cheng */ 1808da14cebeSEric Cheng /* ARGSUSED */ 1809da14cebeSEric Cheng static int 1810da14cebeSEric Cheng flow_generic_insert_fe(flow_tab_t *ft, flow_entry_t **headp, 1811da14cebeSEric Cheng flow_entry_t *flent) 1812da14cebeSEric Cheng { 1813da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 1814da14cebeSEric Cheng 1815da14cebeSEric Cheng if (*headp != NULL) { 1816da14cebeSEric Cheng ASSERT(flent->fe_next == NULL); 1817da14cebeSEric Cheng flent->fe_next = *headp; 1818da14cebeSEric Cheng } 1819da14cebeSEric Cheng *headp = flent; 1820da14cebeSEric Cheng return (0); 1821da14cebeSEric Cheng } 1822da14cebeSEric Cheng 1823da14cebeSEric Cheng /* 1824da14cebeSEric Cheng * IP version independent DSField matching function. 1825da14cebeSEric Cheng */ 1826da14cebeSEric Cheng /* ARGSUSED */ 1827da14cebeSEric Cheng static boolean_t 1828da14cebeSEric Cheng flow_ip_dsfield_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 1829da14cebeSEric Cheng { 1830da14cebeSEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 1831da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 1832da14cebeSEric Cheng 1833da14cebeSEric Cheng switch (l3info->l3_version) { 1834da14cebeSEric Cheng case IPV4_VERSION: { 1835da14cebeSEric Cheng ipha_t *ipha = (ipha_t *)l3info->l3_start; 1836da14cebeSEric Cheng 1837da14cebeSEric Cheng return ((ipha->ipha_type_of_service & 1838da14cebeSEric Cheng fd->fd_dsfield_mask) == fd->fd_dsfield); 1839da14cebeSEric Cheng } 1840da14cebeSEric Cheng case IPV6_VERSION: { 1841da14cebeSEric Cheng ip6_t *ip6h = (ip6_t *)l3info->l3_start; 1842da14cebeSEric Cheng 1843da14cebeSEric Cheng return ((IPV6_FLOW_TCLASS(ip6h->ip6_vcf) & 1844da14cebeSEric Cheng fd->fd_dsfield_mask) == fd->fd_dsfield); 1845da14cebeSEric Cheng } 1846da14cebeSEric Cheng default: 1847da14cebeSEric Cheng return (B_FALSE); 1848da14cebeSEric Cheng } 1849da14cebeSEric Cheng } 1850da14cebeSEric Cheng 1851da14cebeSEric Cheng /* 1852da14cebeSEric Cheng * IP v4 and v6 address matching. 1853da14cebeSEric Cheng * The netmask only needs to be applied on the packet but not on the 1854da14cebeSEric Cheng * flow_desc since fd_local_addr/fd_remote_addr are premasked subnets. 1855da14cebeSEric Cheng */ 1856da14cebeSEric Cheng 1857da14cebeSEric Cheng /* ARGSUSED */ 1858da14cebeSEric Cheng static boolean_t 1859da14cebeSEric Cheng flow_ip_v4_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 1860da14cebeSEric Cheng { 1861da14cebeSEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 1862da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 1863da14cebeSEric Cheng ipha_t *ipha = (ipha_t *)l3info->l3_start; 1864da14cebeSEric Cheng in_addr_t addr; 1865da14cebeSEric Cheng 1866da14cebeSEric Cheng addr = (l3info->l3_dst_or_src ? ipha->ipha_dst : ipha->ipha_src); 1867da14cebeSEric Cheng if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) { 1868da14cebeSEric Cheng return ((addr & V4_PART_OF_V6(fd->fd_local_netmask)) == 1869da14cebeSEric Cheng V4_PART_OF_V6(fd->fd_local_addr)); 1870da14cebeSEric Cheng } 1871da14cebeSEric Cheng return ((addr & V4_PART_OF_V6(fd->fd_remote_netmask)) == 1872da14cebeSEric Cheng V4_PART_OF_V6(fd->fd_remote_addr)); 1873da14cebeSEric Cheng } 1874da14cebeSEric Cheng 1875da14cebeSEric Cheng /* ARGSUSED */ 1876da14cebeSEric Cheng static boolean_t 1877da14cebeSEric Cheng flow_ip_v6_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 1878da14cebeSEric Cheng { 1879da14cebeSEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 1880da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 1881da14cebeSEric Cheng ip6_t *ip6h = (ip6_t *)l3info->l3_start; 1882da14cebeSEric Cheng in6_addr_t *addrp; 1883da14cebeSEric Cheng 1884da14cebeSEric Cheng addrp = (l3info->l3_dst_or_src ? &ip6h->ip6_dst : &ip6h->ip6_src); 1885da14cebeSEric Cheng if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) { 1886da14cebeSEric Cheng return (V6_MASK_EQ(*addrp, fd->fd_local_netmask, 1887da14cebeSEric Cheng fd->fd_local_addr)); 1888da14cebeSEric Cheng } 1889da14cebeSEric Cheng return (V6_MASK_EQ(*addrp, fd->fd_remote_netmask, fd->fd_remote_addr)); 1890da14cebeSEric Cheng } 1891da14cebeSEric Cheng 1892da14cebeSEric Cheng /* ARGSUSED */ 1893da14cebeSEric Cheng static boolean_t 1894da14cebeSEric Cheng flow_ip_proto_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 1895da14cebeSEric Cheng { 1896da14cebeSEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 1897da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 1898da14cebeSEric Cheng 1899da14cebeSEric Cheng return (l3info->l3_protocol == fd->fd_protocol); 1900da14cebeSEric Cheng } 1901da14cebeSEric Cheng 1902da14cebeSEric Cheng static uint32_t 1903da14cebeSEric Cheng flow_ip_hash(flow_tab_t *ft, flow_state_t *s) 1904da14cebeSEric Cheng { 1905da14cebeSEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 1906da14cebeSEric Cheng flow_mask_t mask = ft->ft_mask; 1907da14cebeSEric Cheng 1908da14cebeSEric Cheng if ((mask & FLOW_IP_LOCAL) != 0) { 1909da14cebeSEric Cheng l3info->l3_dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0); 1910da14cebeSEric Cheng } else if ((mask & FLOW_IP_REMOTE) != 0) { 1911da14cebeSEric Cheng l3info->l3_dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0); 1912da14cebeSEric Cheng } else if ((mask & FLOW_IP_DSFIELD) != 0) { 1913da14cebeSEric Cheng /* 1914da14cebeSEric Cheng * DSField flents are arranged as a single list. 1915da14cebeSEric Cheng */ 1916da14cebeSEric Cheng return (0); 1917da14cebeSEric Cheng } 1918da14cebeSEric Cheng /* 1919da14cebeSEric Cheng * IP addr flents are hashed into two lists, v4 or v6. 1920da14cebeSEric Cheng */ 1921da14cebeSEric Cheng ASSERT(ft->ft_size >= 2); 1922da14cebeSEric Cheng return ((l3info->l3_version == IPV4_VERSION) ? 0 : 1); 1923da14cebeSEric Cheng } 1924da14cebeSEric Cheng 1925da14cebeSEric Cheng static uint32_t 1926da14cebeSEric Cheng flow_ip_proto_hash(flow_tab_t *ft, flow_state_t *s) 1927da14cebeSEric Cheng { 1928da14cebeSEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 1929da14cebeSEric Cheng 1930da14cebeSEric Cheng return (l3info->l3_protocol % ft->ft_size); 1931da14cebeSEric Cheng } 1932da14cebeSEric Cheng 1933da14cebeSEric Cheng /* ARGSUSED */ 1934da14cebeSEric Cheng static int 1935da14cebeSEric Cheng flow_ip_accept(flow_tab_t *ft, flow_state_t *s) 1936da14cebeSEric Cheng { 1937da14cebeSEric Cheng flow_l2info_t *l2info = &s->fs_l2info; 1938da14cebeSEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 1939da14cebeSEric Cheng uint16_t sap = l2info->l2_sap; 1940da14cebeSEric Cheng uchar_t *l3_start; 1941da14cebeSEric Cheng 1942ae6aa22aSVenugopal Iyer l3_start = l2info->l2_start + l2info->l2_hdrsize; 1943ae6aa22aSVenugopal Iyer 1944ae6aa22aSVenugopal Iyer /* 1945ae6aa22aSVenugopal Iyer * Adjust start pointer if we're at the end of an mblk. 1946ae6aa22aSVenugopal Iyer */ 1947ae6aa22aSVenugopal Iyer CHECK_AND_ADJUST_START_PTR(s, l3_start); 1948ae6aa22aSVenugopal Iyer 1949ae6aa22aSVenugopal Iyer l3info->l3_start = l3_start; 1950da14cebeSEric Cheng if (!OK_32PTR(l3_start)) 1951da14cebeSEric Cheng return (EINVAL); 1952da14cebeSEric Cheng 1953da14cebeSEric Cheng switch (sap) { 1954da14cebeSEric Cheng case ETHERTYPE_IP: { 1955da14cebeSEric Cheng ipha_t *ipha = (ipha_t *)l3_start; 1956da14cebeSEric Cheng 1957da14cebeSEric Cheng if (PKT_TOO_SMALL(s, l3_start + IP_SIMPLE_HDR_LENGTH)) 1958da14cebeSEric Cheng return (ENOBUFS); 1959da14cebeSEric Cheng 1960da14cebeSEric Cheng l3info->l3_hdrsize = IPH_HDR_LENGTH(ipha); 1961da14cebeSEric Cheng l3info->l3_protocol = ipha->ipha_protocol; 1962da14cebeSEric Cheng l3info->l3_version = IPV4_VERSION; 1963da14cebeSEric Cheng l3info->l3_fragmented = 1964da14cebeSEric Cheng IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags); 1965da14cebeSEric Cheng break; 1966da14cebeSEric Cheng } 1967da14cebeSEric Cheng case ETHERTYPE_IPV6: { 1968da14cebeSEric Cheng ip6_t *ip6h = (ip6_t *)l3_start; 1969*0dc2366fSVenugopal Iyer ip6_frag_t *frag = NULL; 1970da14cebeSEric Cheng uint16_t ip6_hdrlen; 1971da14cebeSEric Cheng uint8_t nexthdr; 1972da14cebeSEric Cheng 1973*0dc2366fSVenugopal Iyer if (!mac_ip_hdr_length_v6(ip6h, s->fs_mp->b_wptr, &ip6_hdrlen, 1974*0dc2366fSVenugopal Iyer &nexthdr, &frag)) { 1975da14cebeSEric Cheng return (ENOBUFS); 1976da14cebeSEric Cheng } 1977da14cebeSEric Cheng l3info->l3_hdrsize = ip6_hdrlen; 1978da14cebeSEric Cheng l3info->l3_protocol = nexthdr; 1979da14cebeSEric Cheng l3info->l3_version = IPV6_VERSION; 1980*0dc2366fSVenugopal Iyer l3info->l3_fragmented = (frag != NULL); 1981da14cebeSEric Cheng break; 1982da14cebeSEric Cheng } 1983da14cebeSEric Cheng default: 1984da14cebeSEric Cheng return (EINVAL); 1985da14cebeSEric Cheng } 1986da14cebeSEric Cheng return (0); 1987da14cebeSEric Cheng } 1988da14cebeSEric Cheng 1989da14cebeSEric Cheng /* ARGSUSED */ 1990da14cebeSEric Cheng static int 1991da14cebeSEric Cheng flow_ip_proto_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 1992da14cebeSEric Cheng { 1993da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 1994da14cebeSEric Cheng 1995da14cebeSEric Cheng switch (fd->fd_protocol) { 1996da14cebeSEric Cheng case IPPROTO_TCP: 1997da14cebeSEric Cheng case IPPROTO_UDP: 1998da14cebeSEric Cheng case IPPROTO_SCTP: 1999da14cebeSEric Cheng case IPPROTO_ICMP: 2000da14cebeSEric Cheng case IPPROTO_ICMPV6: 2001da14cebeSEric Cheng flent->fe_match = flow_ip_proto_match; 2002da14cebeSEric Cheng return (0); 2003da14cebeSEric Cheng default: 2004da14cebeSEric Cheng return (EINVAL); 2005da14cebeSEric Cheng } 2006da14cebeSEric Cheng } 2007da14cebeSEric Cheng 2008da14cebeSEric Cheng /* ARGSUSED */ 2009da14cebeSEric Cheng static int 2010da14cebeSEric Cheng flow_ip_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 2011da14cebeSEric Cheng { 2012da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 2013da14cebeSEric Cheng flow_mask_t mask; 2014da14cebeSEric Cheng uint8_t version; 2015da14cebeSEric Cheng in6_addr_t *addr, *netmask; 2016da14cebeSEric Cheng 2017da14cebeSEric Cheng /* 2018da14cebeSEric Cheng * DSField does not require a IP version. 2019da14cebeSEric Cheng */ 2020da14cebeSEric Cheng if (fd->fd_mask == FLOW_IP_DSFIELD) { 2021da14cebeSEric Cheng if (fd->fd_dsfield_mask == 0) 2022da14cebeSEric Cheng return (EINVAL); 2023da14cebeSEric Cheng 2024da14cebeSEric Cheng flent->fe_match = flow_ip_dsfield_match; 2025da14cebeSEric Cheng return (0); 2026da14cebeSEric Cheng } 2027da14cebeSEric Cheng 2028da14cebeSEric Cheng /* 2029da14cebeSEric Cheng * IP addresses must come with a version to avoid ambiguity. 2030da14cebeSEric Cheng */ 2031da14cebeSEric Cheng if ((fd->fd_mask & FLOW_IP_VERSION) == 0) 2032da14cebeSEric Cheng return (EINVAL); 2033da14cebeSEric Cheng 2034da14cebeSEric Cheng version = fd->fd_ipversion; 2035da14cebeSEric Cheng if (version != IPV4_VERSION && version != IPV6_VERSION) 2036da14cebeSEric Cheng return (EINVAL); 2037da14cebeSEric Cheng 2038da14cebeSEric Cheng mask = fd->fd_mask & ~FLOW_IP_VERSION; 2039da14cebeSEric Cheng switch (mask) { 2040da14cebeSEric Cheng case FLOW_IP_LOCAL: 2041da14cebeSEric Cheng addr = &fd->fd_local_addr; 2042da14cebeSEric Cheng netmask = &fd->fd_local_netmask; 2043da14cebeSEric Cheng break; 2044da14cebeSEric Cheng case FLOW_IP_REMOTE: 2045da14cebeSEric Cheng addr = &fd->fd_remote_addr; 2046da14cebeSEric Cheng netmask = &fd->fd_remote_netmask; 2047da14cebeSEric Cheng break; 2048da14cebeSEric Cheng default: 2049da14cebeSEric Cheng return (EINVAL); 2050da14cebeSEric Cheng } 2051da14cebeSEric Cheng 2052da14cebeSEric Cheng /* 2053da14cebeSEric Cheng * Apply netmask onto specified address. 2054da14cebeSEric Cheng */ 2055da14cebeSEric Cheng V6_MASK_COPY(*addr, *netmask, *addr); 2056da14cebeSEric Cheng if (version == IPV4_VERSION) { 2057da14cebeSEric Cheng ipaddr_t v4addr = V4_PART_OF_V6((*addr)); 2058da14cebeSEric Cheng ipaddr_t v4mask = V4_PART_OF_V6((*netmask)); 2059da14cebeSEric Cheng 2060da14cebeSEric Cheng if (v4addr == 0 || v4mask == 0) 2061da14cebeSEric Cheng return (EINVAL); 2062da14cebeSEric Cheng flent->fe_match = flow_ip_v4_match; 2063da14cebeSEric Cheng } else { 2064da14cebeSEric Cheng if (IN6_IS_ADDR_UNSPECIFIED(addr) || 2065da14cebeSEric Cheng IN6_IS_ADDR_UNSPECIFIED(netmask)) 2066da14cebeSEric Cheng return (EINVAL); 2067da14cebeSEric Cheng flent->fe_match = flow_ip_v6_match; 2068da14cebeSEric Cheng } 2069da14cebeSEric Cheng return (0); 2070da14cebeSEric Cheng } 2071da14cebeSEric Cheng 2072da14cebeSEric Cheng static uint32_t 2073da14cebeSEric Cheng flow_ip_proto_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 2074da14cebeSEric Cheng { 2075da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 2076da14cebeSEric Cheng 2077da14cebeSEric Cheng return (fd->fd_protocol % ft->ft_size); 2078da14cebeSEric Cheng } 2079da14cebeSEric Cheng 2080da14cebeSEric Cheng static uint32_t 2081da14cebeSEric Cheng flow_ip_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 2082da14cebeSEric Cheng { 2083da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 2084da14cebeSEric Cheng 2085da14cebeSEric Cheng /* 2086da14cebeSEric Cheng * DSField flents are arranged as a single list. 2087da14cebeSEric Cheng */ 2088da14cebeSEric Cheng if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0) 2089da14cebeSEric Cheng return (0); 2090da14cebeSEric Cheng 2091da14cebeSEric Cheng /* 2092da14cebeSEric Cheng * IP addr flents are hashed into two lists, v4 or v6. 2093da14cebeSEric Cheng */ 2094da14cebeSEric Cheng ASSERT(ft->ft_size >= 2); 2095da14cebeSEric Cheng return ((fd->fd_ipversion == IPV4_VERSION) ? 0 : 1); 2096da14cebeSEric Cheng } 2097da14cebeSEric Cheng 2098da14cebeSEric Cheng /* ARGSUSED */ 2099da14cebeSEric Cheng static boolean_t 2100da14cebeSEric Cheng flow_ip_proto_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 2101da14cebeSEric Cheng { 2102da14cebeSEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 2103da14cebeSEric Cheng 2104da14cebeSEric Cheng return (fd1->fd_protocol == fd2->fd_protocol); 2105da14cebeSEric Cheng } 2106da14cebeSEric Cheng 2107da14cebeSEric Cheng /* ARGSUSED */ 2108da14cebeSEric Cheng static boolean_t 2109da14cebeSEric Cheng flow_ip_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 2110da14cebeSEric Cheng { 2111da14cebeSEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 2112da14cebeSEric Cheng in6_addr_t *a1, *m1, *a2, *m2; 2113da14cebeSEric Cheng 2114da14cebeSEric Cheng ASSERT(fd1->fd_mask == fd2->fd_mask); 2115da14cebeSEric Cheng if (fd1->fd_mask == FLOW_IP_DSFIELD) { 2116da14cebeSEric Cheng return (fd1->fd_dsfield == fd2->fd_dsfield && 2117da14cebeSEric Cheng fd1->fd_dsfield_mask == fd2->fd_dsfield_mask); 2118da14cebeSEric Cheng } 2119da14cebeSEric Cheng 2120da14cebeSEric Cheng /* 2121da14cebeSEric Cheng * flow_ip_accept_fe() already validated the version. 2122da14cebeSEric Cheng */ 2123da14cebeSEric Cheng ASSERT((fd1->fd_mask & FLOW_IP_VERSION) != 0); 2124da14cebeSEric Cheng if (fd1->fd_ipversion != fd2->fd_ipversion) 2125da14cebeSEric Cheng return (B_FALSE); 2126da14cebeSEric Cheng 2127da14cebeSEric Cheng switch (fd1->fd_mask & ~FLOW_IP_VERSION) { 2128da14cebeSEric Cheng case FLOW_IP_LOCAL: 2129da14cebeSEric Cheng a1 = &fd1->fd_local_addr; 2130da14cebeSEric Cheng m1 = &fd1->fd_local_netmask; 2131da14cebeSEric Cheng a2 = &fd2->fd_local_addr; 2132da14cebeSEric Cheng m2 = &fd2->fd_local_netmask; 2133da14cebeSEric Cheng break; 2134da14cebeSEric Cheng case FLOW_IP_REMOTE: 2135da14cebeSEric Cheng a1 = &fd1->fd_remote_addr; 2136da14cebeSEric Cheng m1 = &fd1->fd_remote_netmask; 2137da14cebeSEric Cheng a2 = &fd2->fd_remote_addr; 2138da14cebeSEric Cheng m2 = &fd2->fd_remote_netmask; 2139da14cebeSEric Cheng break; 2140da14cebeSEric Cheng default: 2141da14cebeSEric Cheng /* 2142da14cebeSEric Cheng * This is unreachable given the checks in 2143da14cebeSEric Cheng * flow_ip_accept_fe(). 2144da14cebeSEric Cheng */ 2145da14cebeSEric Cheng return (B_FALSE); 2146da14cebeSEric Cheng } 2147da14cebeSEric Cheng 2148da14cebeSEric Cheng if (fd1->fd_ipversion == IPV4_VERSION) { 2149da14cebeSEric Cheng return (V4_PART_OF_V6((*a1)) == V4_PART_OF_V6((*a2)) && 2150da14cebeSEric Cheng V4_PART_OF_V6((*m1)) == V4_PART_OF_V6((*m2))); 2151da14cebeSEric Cheng 2152da14cebeSEric Cheng } else { 2153da14cebeSEric Cheng return (IN6_ARE_ADDR_EQUAL(a1, a2) && 2154da14cebeSEric Cheng IN6_ARE_ADDR_EQUAL(m1, m2)); 2155da14cebeSEric Cheng } 2156da14cebeSEric Cheng } 2157da14cebeSEric Cheng 2158da14cebeSEric Cheng static int 2159da14cebeSEric Cheng flow_ip_mask2plen(in6_addr_t *v6mask) 2160da14cebeSEric Cheng { 2161da14cebeSEric Cheng int bits; 2162da14cebeSEric Cheng int plen = IPV6_ABITS; 2163da14cebeSEric Cheng int i; 2164da14cebeSEric Cheng 2165da14cebeSEric Cheng for (i = 3; i >= 0; i--) { 2166da14cebeSEric Cheng if (v6mask->s6_addr32[i] == 0) { 2167da14cebeSEric Cheng plen -= 32; 2168da14cebeSEric Cheng continue; 2169da14cebeSEric Cheng } 2170da14cebeSEric Cheng bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 2171da14cebeSEric Cheng if (bits == 0) 2172da14cebeSEric Cheng break; 2173da14cebeSEric Cheng plen -= bits; 2174da14cebeSEric Cheng } 2175da14cebeSEric Cheng return (plen); 2176da14cebeSEric Cheng } 2177da14cebeSEric Cheng 2178da14cebeSEric Cheng /* ARGSUSED */ 2179da14cebeSEric Cheng static int 2180da14cebeSEric Cheng flow_ip_insert_fe(flow_tab_t *ft, flow_entry_t **headp, 2181da14cebeSEric Cheng flow_entry_t *flent) 2182da14cebeSEric Cheng { 2183da14cebeSEric Cheng flow_entry_t **p = headp; 2184da14cebeSEric Cheng flow_desc_t *fd0, *fd; 2185da14cebeSEric Cheng in6_addr_t *m0, *m; 2186da14cebeSEric Cheng int plen0, plen; 2187da14cebeSEric Cheng 2188da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip)); 2189da14cebeSEric Cheng 2190da14cebeSEric Cheng /* 2191da14cebeSEric Cheng * No special ordering needed for dsfield. 2192da14cebeSEric Cheng */ 2193da14cebeSEric Cheng fd0 = &flent->fe_flow_desc; 2194da14cebeSEric Cheng if ((fd0->fd_mask & FLOW_IP_DSFIELD) != 0) { 2195da14cebeSEric Cheng if (*p != NULL) { 2196da14cebeSEric Cheng ASSERT(flent->fe_next == NULL); 2197da14cebeSEric Cheng flent->fe_next = *p; 2198da14cebeSEric Cheng } 2199da14cebeSEric Cheng *p = flent; 2200da14cebeSEric Cheng return (0); 2201da14cebeSEric Cheng } 2202da14cebeSEric Cheng 2203da14cebeSEric Cheng /* 2204da14cebeSEric Cheng * IP address flows are arranged in descending prefix length order. 2205da14cebeSEric Cheng */ 2206da14cebeSEric Cheng m0 = ((fd0->fd_mask & FLOW_IP_LOCAL) != 0) ? 2207da14cebeSEric Cheng &fd0->fd_local_netmask : &fd0->fd_remote_netmask; 2208da14cebeSEric Cheng plen0 = flow_ip_mask2plen(m0); 2209da14cebeSEric Cheng ASSERT(plen0 != 0); 2210da14cebeSEric Cheng 2211da14cebeSEric Cheng for (; *p != NULL; p = &(*p)->fe_next) { 2212da14cebeSEric Cheng fd = &(*p)->fe_flow_desc; 2213da14cebeSEric Cheng 2214da14cebeSEric Cheng /* 2215da14cebeSEric Cheng * Normally a dsfield flent shouldn't end up on the same 2216da14cebeSEric Cheng * list as an IP address because flow tables are (for now) 2217da14cebeSEric Cheng * disjoint. If we decide to support both IP and dsfield 2218da14cebeSEric Cheng * in the same table in the future, this check will allow 2219da14cebeSEric Cheng * for that. 2220da14cebeSEric Cheng */ 2221da14cebeSEric Cheng if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0) 2222da14cebeSEric Cheng continue; 2223da14cebeSEric Cheng 2224da14cebeSEric Cheng /* 2225da14cebeSEric Cheng * We also allow for the mixing of local and remote address 2226da14cebeSEric Cheng * flents within one list. 2227da14cebeSEric Cheng */ 2228da14cebeSEric Cheng m = ((fd->fd_mask & FLOW_IP_LOCAL) != 0) ? 2229da14cebeSEric Cheng &fd->fd_local_netmask : &fd->fd_remote_netmask; 2230da14cebeSEric Cheng plen = flow_ip_mask2plen(m); 2231da14cebeSEric Cheng 2232da14cebeSEric Cheng if (plen <= plen0) 2233da14cebeSEric Cheng break; 2234da14cebeSEric Cheng } 2235da14cebeSEric Cheng if (*p != NULL) { 2236da14cebeSEric Cheng ASSERT(flent->fe_next == NULL); 2237da14cebeSEric Cheng flent->fe_next = *p; 2238da14cebeSEric Cheng } 2239da14cebeSEric Cheng *p = flent; 2240da14cebeSEric Cheng return (0); 2241da14cebeSEric Cheng } 2242da14cebeSEric Cheng 2243da14cebeSEric Cheng /* 2244da14cebeSEric Cheng * Transport layer protocol and port matching functions. 2245da14cebeSEric Cheng */ 2246da14cebeSEric Cheng 2247da14cebeSEric Cheng /* ARGSUSED */ 2248da14cebeSEric Cheng static boolean_t 2249da14cebeSEric Cheng flow_transport_lport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 2250da14cebeSEric Cheng { 2251da14cebeSEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 2252da14cebeSEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 2253da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 2254da14cebeSEric Cheng 2255da14cebeSEric Cheng return (fd->fd_protocol == l3info->l3_protocol && 2256da14cebeSEric Cheng fd->fd_local_port == l4info->l4_hash_port); 2257da14cebeSEric Cheng } 2258da14cebeSEric Cheng 2259da14cebeSEric Cheng /* ARGSUSED */ 2260da14cebeSEric Cheng static boolean_t 2261da14cebeSEric Cheng flow_transport_rport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s) 2262da14cebeSEric Cheng { 2263da14cebeSEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 2264da14cebeSEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 2265da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 2266da14cebeSEric Cheng 2267da14cebeSEric Cheng return (fd->fd_protocol == l3info->l3_protocol && 2268da14cebeSEric Cheng fd->fd_remote_port == l4info->l4_hash_port); 2269da14cebeSEric Cheng } 2270da14cebeSEric Cheng 2271da14cebeSEric Cheng /* 2272da14cebeSEric Cheng * Transport hash function. 2273da14cebeSEric Cheng * Since we only support either local or remote port flows, 2274da14cebeSEric Cheng * we only need to extract one of the ports to be used for 2275da14cebeSEric Cheng * matching. 2276da14cebeSEric Cheng */ 2277da14cebeSEric Cheng static uint32_t 2278da14cebeSEric Cheng flow_transport_hash(flow_tab_t *ft, flow_state_t *s) 2279da14cebeSEric Cheng { 2280da14cebeSEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 2281da14cebeSEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 2282da14cebeSEric Cheng uint8_t proto = l3info->l3_protocol; 2283da14cebeSEric Cheng boolean_t dst_or_src; 2284da14cebeSEric Cheng 2285da14cebeSEric Cheng if ((ft->ft_mask & FLOW_ULP_PORT_LOCAL) != 0) { 2286da14cebeSEric Cheng dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0); 2287da14cebeSEric Cheng } else { 2288da14cebeSEric Cheng dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0); 2289da14cebeSEric Cheng } 2290da14cebeSEric Cheng 2291da14cebeSEric Cheng l4info->l4_hash_port = dst_or_src ? l4info->l4_dst_port : 2292da14cebeSEric Cheng l4info->l4_src_port; 2293da14cebeSEric Cheng 2294da14cebeSEric Cheng return ((l4info->l4_hash_port ^ (proto << 4)) % ft->ft_size); 2295da14cebeSEric Cheng } 2296da14cebeSEric Cheng 2297da14cebeSEric Cheng /* 2298da14cebeSEric Cheng * Unlike other accept() functions above, we do not need to get the header 2299da14cebeSEric Cheng * size because this is our highest layer so far. If we want to do support 2300da14cebeSEric Cheng * other higher layer protocols, we would need to save the l4_hdrsize 2301da14cebeSEric Cheng * in the code below. 2302da14cebeSEric Cheng */ 2303da14cebeSEric Cheng 2304da14cebeSEric Cheng /* ARGSUSED */ 2305da14cebeSEric Cheng static int 2306da14cebeSEric Cheng flow_transport_accept(flow_tab_t *ft, flow_state_t *s) 2307da14cebeSEric Cheng { 2308da14cebeSEric Cheng flow_l3info_t *l3info = &s->fs_l3info; 2309da14cebeSEric Cheng flow_l4info_t *l4info = &s->fs_l4info; 2310da14cebeSEric Cheng uint8_t proto = l3info->l3_protocol; 2311da14cebeSEric Cheng uchar_t *l4_start; 2312da14cebeSEric Cheng 2313ae6aa22aSVenugopal Iyer l4_start = l3info->l3_start + l3info->l3_hdrsize; 2314ae6aa22aSVenugopal Iyer 2315ae6aa22aSVenugopal Iyer /* 2316ae6aa22aSVenugopal Iyer * Adjust start pointer if we're at the end of an mblk. 2317ae6aa22aSVenugopal Iyer */ 2318ae6aa22aSVenugopal Iyer CHECK_AND_ADJUST_START_PTR(s, l4_start); 2319ae6aa22aSVenugopal Iyer 2320ae6aa22aSVenugopal Iyer l4info->l4_start = l4_start; 2321da14cebeSEric Cheng if (!OK_32PTR(l4_start)) 2322da14cebeSEric Cheng return (EINVAL); 2323da14cebeSEric Cheng 2324da14cebeSEric Cheng if (l3info->l3_fragmented == B_TRUE) 2325da14cebeSEric Cheng return (EINVAL); 2326da14cebeSEric Cheng 2327da14cebeSEric Cheng switch (proto) { 2328da14cebeSEric Cheng case IPPROTO_TCP: { 2329da14cebeSEric Cheng struct tcphdr *tcph = (struct tcphdr *)l4_start; 2330da14cebeSEric Cheng 2331da14cebeSEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*tcph))) 2332da14cebeSEric Cheng return (ENOBUFS); 2333da14cebeSEric Cheng 2334da14cebeSEric Cheng l4info->l4_src_port = tcph->th_sport; 2335da14cebeSEric Cheng l4info->l4_dst_port = tcph->th_dport; 2336da14cebeSEric Cheng break; 2337da14cebeSEric Cheng } 2338da14cebeSEric Cheng case IPPROTO_UDP: { 2339da14cebeSEric Cheng struct udphdr *udph = (struct udphdr *)l4_start; 2340da14cebeSEric Cheng 2341da14cebeSEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*udph))) 2342da14cebeSEric Cheng return (ENOBUFS); 2343da14cebeSEric Cheng 2344da14cebeSEric Cheng l4info->l4_src_port = udph->uh_sport; 2345da14cebeSEric Cheng l4info->l4_dst_port = udph->uh_dport; 2346da14cebeSEric Cheng break; 2347da14cebeSEric Cheng } 2348da14cebeSEric Cheng case IPPROTO_SCTP: { 2349da14cebeSEric Cheng sctp_hdr_t *sctph = (sctp_hdr_t *)l4_start; 2350da14cebeSEric Cheng 2351da14cebeSEric Cheng if (PKT_TOO_SMALL(s, l4_start + sizeof (*sctph))) 2352da14cebeSEric Cheng return (ENOBUFS); 2353da14cebeSEric Cheng 2354da14cebeSEric Cheng l4info->l4_src_port = sctph->sh_sport; 2355da14cebeSEric Cheng l4info->l4_dst_port = sctph->sh_dport; 2356da14cebeSEric Cheng break; 2357da14cebeSEric Cheng } 2358da14cebeSEric Cheng default: 2359da14cebeSEric Cheng return (EINVAL); 2360da14cebeSEric Cheng } 2361da14cebeSEric Cheng 2362da14cebeSEric Cheng return (0); 2363da14cebeSEric Cheng } 2364da14cebeSEric Cheng 2365da14cebeSEric Cheng /* 2366da14cebeSEric Cheng * Validates transport flow entry. 2367da14cebeSEric Cheng * The protocol field must be present. 2368da14cebeSEric Cheng */ 2369da14cebeSEric Cheng 2370da14cebeSEric Cheng /* ARGSUSED */ 2371da14cebeSEric Cheng static int 2372da14cebeSEric Cheng flow_transport_accept_fe(flow_tab_t *ft, flow_entry_t *flent) 2373da14cebeSEric Cheng { 2374da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 2375da14cebeSEric Cheng flow_mask_t mask = fd->fd_mask; 2376da14cebeSEric Cheng 2377da14cebeSEric Cheng if ((mask & FLOW_IP_PROTOCOL) == 0) 2378da14cebeSEric Cheng return (EINVAL); 2379da14cebeSEric Cheng 2380da14cebeSEric Cheng switch (fd->fd_protocol) { 2381da14cebeSEric Cheng case IPPROTO_TCP: 2382da14cebeSEric Cheng case IPPROTO_UDP: 2383da14cebeSEric Cheng case IPPROTO_SCTP: 2384da14cebeSEric Cheng break; 2385da14cebeSEric Cheng default: 2386da14cebeSEric Cheng return (EINVAL); 2387da14cebeSEric Cheng } 2388da14cebeSEric Cheng 2389da14cebeSEric Cheng switch (mask & ~FLOW_IP_PROTOCOL) { 2390da14cebeSEric Cheng case FLOW_ULP_PORT_LOCAL: 2391da14cebeSEric Cheng if (fd->fd_local_port == 0) 2392da14cebeSEric Cheng return (EINVAL); 2393da14cebeSEric Cheng 2394da14cebeSEric Cheng flent->fe_match = flow_transport_lport_match; 2395da14cebeSEric Cheng break; 2396da14cebeSEric Cheng case FLOW_ULP_PORT_REMOTE: 2397da14cebeSEric Cheng if (fd->fd_remote_port == 0) 2398da14cebeSEric Cheng return (EINVAL); 2399da14cebeSEric Cheng 2400da14cebeSEric Cheng flent->fe_match = flow_transport_rport_match; 2401da14cebeSEric Cheng break; 2402da14cebeSEric Cheng case 0: 2403da14cebeSEric Cheng /* 2404da14cebeSEric Cheng * transport-only flows conflicts with our table type. 2405da14cebeSEric Cheng */ 2406da14cebeSEric Cheng return (EOPNOTSUPP); 2407da14cebeSEric Cheng default: 2408da14cebeSEric Cheng return (EINVAL); 2409da14cebeSEric Cheng } 2410da14cebeSEric Cheng 2411da14cebeSEric Cheng return (0); 2412da14cebeSEric Cheng } 2413da14cebeSEric Cheng 2414da14cebeSEric Cheng static uint32_t 2415da14cebeSEric Cheng flow_transport_hash_fe(flow_tab_t *ft, flow_entry_t *flent) 2416da14cebeSEric Cheng { 2417da14cebeSEric Cheng flow_desc_t *fd = &flent->fe_flow_desc; 2418da14cebeSEric Cheng uint16_t port = 0; 2419da14cebeSEric Cheng 2420da14cebeSEric Cheng port = ((fd->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) ? 2421da14cebeSEric Cheng fd->fd_local_port : fd->fd_remote_port; 2422da14cebeSEric Cheng 2423da14cebeSEric Cheng return ((port ^ (fd->fd_protocol << 4)) % ft->ft_size); 2424da14cebeSEric Cheng } 2425da14cebeSEric Cheng 2426da14cebeSEric Cheng /* ARGSUSED */ 2427da14cebeSEric Cheng static boolean_t 2428da14cebeSEric Cheng flow_transport_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2) 2429da14cebeSEric Cheng { 2430da14cebeSEric Cheng flow_desc_t *fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc; 2431da14cebeSEric Cheng 2432da14cebeSEric Cheng if (fd1->fd_protocol != fd2->fd_protocol) 2433da14cebeSEric Cheng return (B_FALSE); 2434da14cebeSEric Cheng 2435da14cebeSEric Cheng if ((fd1->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) 2436da14cebeSEric Cheng return (fd1->fd_local_port == fd2->fd_local_port); 2437da14cebeSEric Cheng 243825ec3e3dSEric Cheng if ((fd1->fd_mask & FLOW_ULP_PORT_REMOTE) != 0) 2439da14cebeSEric Cheng return (fd1->fd_remote_port == fd2->fd_remote_port); 244025ec3e3dSEric Cheng 244125ec3e3dSEric Cheng return (B_TRUE); 2442da14cebeSEric Cheng } 2443da14cebeSEric Cheng 2444da14cebeSEric Cheng static flow_ops_t flow_l2_ops = { 2445da14cebeSEric Cheng flow_l2_accept_fe, 2446da14cebeSEric Cheng flow_l2_hash_fe, 2447da14cebeSEric Cheng flow_l2_match_fe, 2448da14cebeSEric Cheng flow_generic_insert_fe, 2449da14cebeSEric Cheng flow_l2_hash, 2450da14cebeSEric Cheng {flow_l2_accept} 2451da14cebeSEric Cheng }; 2452da14cebeSEric Cheng 2453da14cebeSEric Cheng static flow_ops_t flow_ip_ops = { 2454da14cebeSEric Cheng flow_ip_accept_fe, 2455da14cebeSEric Cheng flow_ip_hash_fe, 2456da14cebeSEric Cheng flow_ip_match_fe, 2457da14cebeSEric Cheng flow_ip_insert_fe, 2458da14cebeSEric Cheng flow_ip_hash, 2459da14cebeSEric Cheng {flow_l2_accept, flow_ip_accept} 2460da14cebeSEric Cheng }; 2461da14cebeSEric Cheng 2462da14cebeSEric Cheng static flow_ops_t flow_ip_proto_ops = { 2463da14cebeSEric Cheng flow_ip_proto_accept_fe, 2464da14cebeSEric Cheng flow_ip_proto_hash_fe, 2465da14cebeSEric Cheng flow_ip_proto_match_fe, 2466da14cebeSEric Cheng flow_generic_insert_fe, 2467da14cebeSEric Cheng flow_ip_proto_hash, 2468da14cebeSEric Cheng {flow_l2_accept, flow_ip_accept} 2469da14cebeSEric Cheng }; 2470da14cebeSEric Cheng 2471da14cebeSEric Cheng static flow_ops_t flow_transport_ops = { 2472da14cebeSEric Cheng flow_transport_accept_fe, 2473da14cebeSEric Cheng flow_transport_hash_fe, 2474da14cebeSEric Cheng flow_transport_match_fe, 2475da14cebeSEric Cheng flow_generic_insert_fe, 2476da14cebeSEric Cheng flow_transport_hash, 2477da14cebeSEric Cheng {flow_l2_accept, flow_ip_accept, flow_transport_accept} 2478da14cebeSEric Cheng }; 2479da14cebeSEric Cheng 2480da14cebeSEric Cheng static flow_tab_info_t flow_tab_info_list[] = { 2481da14cebeSEric Cheng {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_LOCAL, 2}, 2482da14cebeSEric Cheng {&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_REMOTE, 2}, 2483da14cebeSEric Cheng {&flow_ip_ops, FLOW_IP_DSFIELD, 1}, 2484da14cebeSEric Cheng {&flow_ip_proto_ops, FLOW_IP_PROTOCOL, 256}, 248525ec3e3dSEric Cheng {&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_LOCAL, 1024}, 248625ec3e3dSEric Cheng {&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_REMOTE, 1024} 2487da14cebeSEric Cheng }; 2488da14cebeSEric Cheng 2489da14cebeSEric Cheng #define FLOW_MAX_TAB_INFO \ 2490da14cebeSEric Cheng ((sizeof (flow_tab_info_list)) / sizeof (flow_tab_info_t)) 2491da14cebeSEric Cheng 2492da14cebeSEric Cheng static flow_tab_info_t * 2493da14cebeSEric Cheng mac_flow_tab_info_get(flow_mask_t mask) 2494da14cebeSEric Cheng { 2495da14cebeSEric Cheng int i; 2496da14cebeSEric Cheng 2497da14cebeSEric Cheng for (i = 0; i < FLOW_MAX_TAB_INFO; i++) { 2498da14cebeSEric Cheng if (mask == flow_tab_info_list[i].fti_mask) 2499da14cebeSEric Cheng return (&flow_tab_info_list[i]); 2500da14cebeSEric Cheng } 2501da14cebeSEric Cheng return (NULL); 2502da14cebeSEric Cheng } 2503