1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _MAC_FLOW_IMPL_H 28 #define _MAC_FLOW_IMPL_H 29 30 #ifdef __cplusplus 31 extern "C" { 32 #endif 33 34 #include <sys/param.h> 35 #include <sys/atomic.h> 36 #include <sys/ksynch.h> 37 #include <sys/mac_flow.h> 38 #include <sys/stream.h> 39 #include <sys/sdt.h> 40 #include <net/if.h> 41 42 /* 43 * Macros to increment/decrement the reference count on a flow_entry_t. 44 */ 45 #define FLOW_REFHOLD(flent) { \ 46 DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \ 47 mutex_enter(&(flent)->fe_lock); \ 48 (flent)->fe_refcnt++; \ 49 mutex_exit(&(flent)->fe_lock); \ 50 } 51 52 /* 53 * Data paths must not attempt to use a flow entry if it is marked INCIPIENT 54 * or QUIESCE. In the former case the set up is not yet complete and the 55 * data path could stumble on inconsistent data structures. In the latter 56 * case a control operation is waiting for quiescence so that it can 57 * change callbacks or other structures without the use of locks. 58 */ 59 #define FLOW_TRY_REFHOLD(flent, err) { \ 60 DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \ 61 (err) = 0; \ 62 mutex_enter(&(flent)->fe_lock); \ 63 if ((flent)->fe_flags & (FE_INCIPIENT | FE_QUIESCE | FE_CONDEMNED | \ 64 FE_UF_NO_DATAPATH | FE_MC_NO_DATAPATH)) \ 65 (err) = -1; \ 66 else \ 67 (flent)->fe_refcnt++; \ 68 mutex_exit(&(flent)->fe_lock); \ 69 } 70 71 #define FLOW_REFRELE(flent) { \ 72 DTRACE_PROBE1(flow_refrele, flow_entry_t *, (flent)); \ 73 mutex_enter(&(flent)->fe_lock); \ 74 ASSERT((flent)->fe_refcnt != 0); \ 75 (flent)->fe_refcnt--; \ 76 if ((flent)->fe_flags & FE_WAITER) { \ 77 ASSERT((flent)->fe_refcnt != 0); \ 78 cv_signal(&(flent)->fe_cv); \ 79 mutex_exit(&(flent)->fe_lock); \ 80 } else if ((flent)->fe_refcnt == 0) { \ 81 mac_flow_destroy(flent); \ 82 } else { \ 83 mutex_exit(&(flent)->fe_lock); \ 84 } \ 85 } 86 87 #define FLOW_USER_REFHOLD(flent) { \ 88 mutex_enter(&(flent)->fe_lock); \ 89 (flent)->fe_user_refcnt++; \ 90 mutex_exit(&(flent)->fe_lock); \ 91 } 92 93 #define FLOW_USER_REFRELE(flent) { \ 94 mutex_enter(&(flent)->fe_lock); \ 95 ASSERT((flent)->fe_user_refcnt != 0); \ 96 if (--(flent)->fe_user_refcnt == 0 && \ 97 ((flent)->fe_flags & FE_WAITER)) \ 98 cv_signal(&(flent)->fe_cv); \ 99 mutex_exit(&(flent)->fe_lock); \ 100 } 101 102 #define FLOW_FINAL_REFRELE(flent) { \ 103 ASSERT(flent->fe_refcnt == 1 && flent->fe_user_refcnt == 0); \ 104 FLOW_REFRELE(flent); \ 105 } 106 107 /* 108 * Mark or unmark the flent with a bit flag 109 */ 110 #define FLOW_MARK(flent, flag) { \ 111 mutex_enter(&(flent)->fe_lock); \ 112 (flent)->fe_flags |= flag; \ 113 mutex_exit(&(flent)->fe_lock); \ 114 } 115 116 #define FLOW_UNMARK(flent, flag) { \ 117 mutex_enter(&(flent)->fe_lock); \ 118 (flent)->fe_flags &= ~flag; \ 119 mutex_exit(&(flent)->fe_lock); \ 120 } 121 122 #define FLENT_TO_MIP(flent) \ 123 (flent->fe_mbg != NULL ? mac_bcast_grp_mip(flent->fe_mbg) : \ 124 ((mac_client_impl_t *)flent->fe_mcip)->mci_mip) 125 126 /* Convert a bandwidth expressed in bps to a number of bytes per tick. */ 127 #define FLOW_BYTES_PER_TICK(bps) (((bps) >> 3) / hz) 128 129 /* 130 * Given an underlying range and a priority level, obtain the minimum for the 131 * new range. 132 */ 133 #define FLOW_MIN_PRIORITY(min, max, pri) \ 134 ((min) + ((((max) - (min)) / MRP_PRIORITY_LEVELS) * (pri))) 135 136 /* 137 * Given an underlying range and a minimum level (base), obtain the maximum 138 * for the new range. 139 */ 140 #define FLOW_MAX_PRIORITY(min, max, base) \ 141 ((base) + (((max) - (min)) / MRP_PRIORITY_LEVELS)) 142 143 /* 144 * Given an underlying range and a priority level, get the absolute 145 * priority value. For now there are just 3 values, high, low and 146 * medium so we can just return max, min or min + (max - min) / 2. 147 * If there are more than three we need to change this computation. 148 */ 149 #define FLOW_PRIORITY(min, max, pri) \ 150 (pri) == MPL_HIGH ? (max) : \ 151 (pri) == MPL_LOW ? (min) : \ 152 ((min) + (((max) - (min)) / 2)) 153 154 #define MAC_FLOW_TAB_SIZE 500 155 156 typedef struct flow_entry_s flow_entry_t; 157 typedef struct flow_tab_s flow_tab_t; 158 typedef struct flow_state_s flow_state_t; 159 struct mac_impl_s; 160 struct mac_client_impl_s; 161 162 /* 163 * Classification flags used to lookup the flow. 164 */ 165 #define FLOW_INBOUND 0x01 166 #define FLOW_OUTBOUND 0x02 167 /* Don't compare VID when classifying the packets, see mac_rx_classify() */ 168 #define FLOW_IGNORE_VLAN 0x04 169 170 /* Generic flow client function signature */ 171 typedef void (*flow_fn_t)(void *, void *, mblk_t *, boolean_t); 172 173 /* Flow state */ 174 typedef enum { 175 FLOW_DRIVER_UPCALL, 176 FLOW_USER_REF 177 } mac_flow_state_t; 178 179 /* Matches a flow_entry_t using the extracted flow_state_t info */ 180 typedef boolean_t (*flow_match_fn_t)(flow_tab_t *, flow_entry_t *, 181 flow_state_t *); 182 183 /* fe_flags */ 184 #define FE_QUIESCE 0x01 /* Quiesce the flow */ 185 #define FE_WAITER 0x02 /* Flow has a waiter */ 186 #define FE_FLOW_TAB 0x04 /* Flow is in the flow tab list */ 187 #define FE_G_FLOW_HASH 0x08 /* Flow is in the global flow hash */ 188 #define FE_INCIPIENT 0x10 /* Being setup */ 189 #define FE_CONDEMNED 0x20 /* Being deleted */ 190 #define FE_UF_NO_DATAPATH 0x40 /* No datapath setup for User flow */ 191 #define FE_MC_NO_DATAPATH 0x80 /* No datapath setup for mac client */ 192 193 /* fe_type */ 194 #define FLOW_PRIMARY_MAC 0x01 /* NIC primary MAC address */ 195 #define FLOW_VNIC_MAC 0x02 /* VNIC flow */ 196 #define FLOW_MCAST 0x04 /* Multicast (and broadcast) */ 197 #define FLOW_OTHER 0x08 /* Other flows configured */ 198 #define FLOW_USER 0x10 /* User defined flow */ 199 #define FLOW_VNIC FLOW_VNIC_MAC 200 #define FLOW_NO_STATS 0x20 /* Don't create stats for the flow */ 201 202 /* 203 * Shared Bandwidth control counters between the soft ring set and its 204 * associated soft rings. In case the flow associated with NIC/VNIC 205 * has a group of Rx rings assigned to it, we have the same 206 * number of soft ring sets as we have the Rx ring in the group 207 * and each individual SRS (and its soft rings) decide when to 208 * poll their Rx ring independently. But if there is a B/W limit 209 * associated with the NIC/VNIC, then the B/W control counter is 210 * shared across all the SRS in the group and their associated 211 * soft rings. 212 * 213 * There is a many to 1 mapping between the SRS and 214 * mac_bw_ctl if the flow has a group of Rx rings associated with 215 * it. 216 */ 217 typedef struct mac_bw_ctl_s { 218 kmutex_t mac_bw_lock; 219 uint32_t mac_bw_state; 220 size_t mac_bw_sz; /* ?? Is it needed */ 221 size_t mac_bw_limit; /* Max bytes to process per tick */ 222 size_t mac_bw_used; /* Bytes processed in current tick */ 223 size_t mac_bw_drop_threshold; /* Max queue length */ 224 size_t mac_bw_drop_bytes; 225 size_t mac_bw_polled; 226 size_t mac_bw_intr; 227 clock_t mac_bw_curr_time; 228 } mac_bw_ctl_t; 229 230 struct flow_entry_s { /* Protected by */ 231 struct flow_entry_s *fe_next; /* ft_lock */ 232 233 datalink_id_t fe_link_id; /* WO */ 234 235 /* Properties as specified for this flow */ 236 mac_resource_props_t fe_resource_props; /* SL */ 237 238 /* Properties actually effective at run time for this flow */ 239 mac_resource_props_t fe_effective_props; /* SL */ 240 241 kmutex_t fe_lock; 242 char fe_flow_name[MAXFLOWNAMELEN]; /* fe_lock */ 243 flow_desc_t fe_flow_desc; /* fe_lock */ 244 kcondvar_t fe_cv; /* fe_lock */ 245 /* 246 * Initial flow ref is 1 on creation. A thread that lookups the 247 * flent typically by a mac_flow_lookup() dynamically holds a ref. 248 * If the ref is 1, it means there arent' any upcalls from the driver 249 * or downcalls from the stack using this flent. Structures pointing 250 * to the flent or flent inserted in lists don't count towards this 251 * refcnt. Instead they are tracked using fe_flags. Only a control 252 * thread doing a teardown operation deletes the flent, after waiting 253 * for upcalls to finish synchronously. The fe_refcnt tracks 254 * the number of upcall refs 255 */ 256 uint32_t fe_refcnt; /* fe_lock */ 257 258 /* 259 * This tracks lookups done using the global hash list for user 260 * generated flows. This refcnt only protects the flent itself 261 * from disappearing and helps walkers to read the flent info such 262 * as flow spec. However the flent may be quiesced and the SRS could 263 * be deleted. The fe_user_refcnt tracks the number of global flow 264 * has refs. 265 */ 266 uint32_t fe_user_refcnt; /* fe_lock */ 267 uint_t fe_flags; /* fe_lock */ 268 269 /* 270 * Function/args to invoke for delivering matching packets 271 * Only the function ff_fn may be changed dynamically and atomically. 272 * The ff_arg1 and ff_arg2 are set at creation time and may not 273 * be changed. 274 */ 275 flow_fn_t fe_cb_fn; /* fe_lock */ 276 void *fe_cb_arg1; /* fe_lock */ 277 void *fe_cb_arg2; /* fe_lock */ 278 279 void *fe_client_cookie; /* WO */ 280 void *fe_rx_ring_group; /* SL */ 281 void *fe_rx_srs[MAX_RINGS_PER_GROUP]; /* fe_lock */ 282 int fe_rx_srs_cnt; /* fe_lock */ 283 void *fe_tx_srs; /* WO */ 284 285 /* 286 * This is a unicast flow, and is a mac_client_impl_t 287 */ 288 void *fe_mcip; /* WO */ 289 290 /* 291 * Used by mci_flent_list of mac_client_impl_t to track flows sharing 292 * the same mac_client_impl_t. 293 */ 294 struct flow_entry_s *fe_client_next; 295 296 /* 297 * This is a broadcast or multicast flow and is a mac_bcast_grp_t 298 */ 299 void *fe_mbg; /* WO */ 300 uint_t fe_type; /* WO */ 301 302 /* 303 * BW control info. 304 */ 305 mac_bw_ctl_t fe_tx_bw; 306 mac_bw_ctl_t fe_rx_bw; 307 308 /* 309 * Used by flow table lookup code 310 */ 311 flow_match_fn_t fe_match; 312 313 /* 314 * Used by mac_flow_remove(). 315 */ 316 int fe_index; 317 flow_tab_t *fe_flow_tab; 318 319 kstat_t *fe_ksp; 320 flow_stats_t fe_flowstats; 321 boolean_t fe_desc_logged; 322 uint64_t fe_nic_speed; 323 }; 324 325 /* 326 * Various structures used by the flows framework for keeping track 327 * of packet state information. 328 */ 329 330 /* Layer 2 */ 331 typedef struct flow_l2info_s { 332 uchar_t *l2_start; 333 uint8_t *l2_daddr; 334 uint16_t l2_vid; 335 uint32_t l2_sap; 336 uint_t l2_hdrsize; 337 } flow_l2info_t; 338 339 /* Layer 3 */ 340 typedef struct flow_l3info_s { 341 uchar_t *l3_start; 342 uint8_t l3_protocol; 343 uint8_t l3_version; 344 boolean_t l3_dst_or_src; 345 uint_t l3_hdrsize; 346 boolean_t l3_fragmented; 347 } flow_l3info_t; 348 349 /* Layer 4 */ 350 typedef struct flow_l4info_s { 351 uchar_t *l4_start; 352 uint16_t l4_src_port; 353 uint16_t l4_dst_port; 354 uint16_t l4_hash_port; 355 } flow_l4info_t; 356 357 /* 358 * Combined state structure. 359 * Holds flow direction and an mblk_t pointer. 360 */ 361 struct flow_state_s { 362 uint_t fs_flags; 363 mblk_t *fs_mp; 364 flow_l2info_t fs_l2info; 365 flow_l3info_t fs_l3info; 366 flow_l4info_t fs_l4info; 367 }; 368 369 /* 370 * Flow ops vector. 371 * There are two groups of functions. The ones ending with _fe are 372 * called when a flow is being added. The others (hash, accept) are 373 * called at flow lookup time. 374 */ 375 #define FLOW_MAX_ACCEPT 16 376 typedef struct flow_ops_s { 377 /* 378 * fo_accept_fe(): 379 * Validates the contents of the flow and checks whether 380 * it's compatible with the flow table. sets the fe_match 381 * function of the flow. 382 */ 383 int (*fo_accept_fe)(flow_tab_t *, flow_entry_t *); 384 /* 385 * fo_hash_fe(): 386 * Generates a hash index to the flow table. This function 387 * must use the same algorithm as fo_hash(), which is used 388 * by the flow lookup code path. 389 */ 390 uint32_t (*fo_hash_fe)(flow_tab_t *, flow_entry_t *); 391 /* 392 * fo_match_fe(): 393 * This is used for finding identical flows. 394 */ 395 boolean_t (*fo_match_fe)(flow_tab_t *, flow_entry_t *, 396 flow_entry_t *); 397 /* 398 * fo_insert_fe(): 399 * Used for inserting a flow to a flow chain. 400 * Protocols that have special ordering requirements would 401 * need to implement this. For those that don't, 402 * flow_generic_insert_fe() may be used. 403 */ 404 int (*fo_insert_fe)(flow_tab_t *, flow_entry_t **, 405 flow_entry_t *); 406 407 /* 408 * Calculates the flow hash index based on the accumulated 409 * state in flow_state_t. Must use the same algorithm as 410 * fo_hash_fe(). 411 */ 412 uint32_t (*fo_hash)(flow_tab_t *, flow_state_t *); 413 414 /* 415 * Array of accept fuctions. 416 * Each function in the array will accumulate enough state 417 * (header length, protocol) to allow the next function to 418 * proceed. We support up to FLOW_MAX_ACCEPT functions which 419 * should be sufficient for all practical purposes. 420 */ 421 int (*fo_accept[FLOW_MAX_ACCEPT])(flow_tab_t *, 422 flow_state_t *); 423 } flow_ops_t; 424 425 /* 426 * Generic flow table. 427 */ 428 struct flow_tab_s { 429 krwlock_t ft_lock; 430 /* 431 * Contains a list of functions (described above) 432 * specific to this table type. 433 */ 434 flow_ops_t ft_ops; 435 436 /* 437 * Indicates what types of flows are supported. 438 */ 439 flow_mask_t ft_mask; 440 441 /* 442 * An array of flow_entry_t * of size ft_size. 443 * Each element is the beginning of a hash chain. 444 */ 445 flow_entry_t **ft_table; 446 uint_t ft_size; 447 448 /* 449 * The number of flows inserted into ft_table. 450 */ 451 uint_t ft_flow_count; 452 struct mac_impl_s *ft_mip; 453 struct mac_client_impl_s *ft_mcip; 454 }; 455 456 /* 457 * This is used for describing what type of flow table can be created. 458 * mac_flow.c contains a list of these structures. 459 */ 460 typedef struct flow_tab_info_s { 461 flow_ops_t *fti_ops; 462 flow_mask_t fti_mask; 463 uint_t fti_size; 464 } flow_tab_info_t; 465 466 #define FLOW_TAB_EMPTY(ft) ((ft) == NULL || (ft)->ft_flow_count == 0) 467 468 /* 469 * This is used by mac_tx_send. 470 */ 471 typedef struct mac_tx_stats_s { 472 uint_t ts_opackets; 473 uint_t ts_obytes; 474 uint_t ts_oerrors; 475 } mac_tx_stats_t; 476 477 #define FLOW_STAT_UPDATE(f, s, c) { \ 478 ((flow_entry_t *)(f))->fe_flowstats.fs_##s += ((uint64_t)(c)); \ 479 } 480 481 #define FLOW_TX_STATS_UPDATE(f, s) { \ 482 FLOW_STAT_UPDATE((f), opackets, (s)->ts_opackets); \ 483 FLOW_STAT_UPDATE((f), obytes, (s)->ts_obytes); \ 484 FLOW_STAT_UPDATE((f), oerrors, (s)->ts_oerrors); \ 485 } 486 487 extern void mac_flow_init(); 488 extern void mac_flow_fini(); 489 extern int mac_flow_create(flow_desc_t *, mac_resource_props_t *, 490 char *, void *, uint_t, flow_entry_t **); 491 492 extern int mac_flow_add(flow_tab_t *, flow_entry_t *); 493 extern int mac_flow_add_subflow(mac_client_handle_t, flow_entry_t *, 494 boolean_t); 495 extern int mac_flow_hash_add(flow_entry_t *); 496 extern int mac_flow_lookup_byname(char *, flow_entry_t **); 497 extern int mac_flow_lookup(flow_tab_t *, mblk_t *, uint_t, 498 flow_entry_t **); 499 500 extern int mac_flow_walk(flow_tab_t *, int (*)(flow_entry_t *, void *), 501 void *); 502 503 extern int mac_flow_walk_nolock(flow_tab_t *, 504 int (*)(flow_entry_t *, void *), void *); 505 506 extern void mac_flow_modify(flow_tab_t *, flow_entry_t *, 507 mac_resource_props_t *); 508 509 extern void *mac_flow_get_client_cookie(flow_entry_t *); 510 511 extern uint32_t mac_flow_modify_props(flow_entry_t *, mac_resource_props_t *); 512 513 extern int mac_flow_update(flow_tab_t *, flow_entry_t *, flow_desc_t *); 514 extern void mac_flow_get_desc(flow_entry_t *, flow_desc_t *); 515 extern void mac_flow_set_desc(flow_entry_t *, flow_desc_t *); 516 517 extern void mac_flow_remove(flow_tab_t *, flow_entry_t *, boolean_t); 518 extern void mac_flow_hash_remove(flow_entry_t *); 519 extern void mac_flow_wait(flow_entry_t *, mac_flow_state_t); 520 extern void mac_flow_quiesce(flow_entry_t *); 521 extern void mac_flow_restart(flow_entry_t *); 522 extern void mac_flow_cleanup(flow_entry_t *); 523 extern void mac_flow_destroy(flow_entry_t *); 524 525 extern void mac_flow_tab_create(flow_ops_t *, flow_mask_t, uint_t, 526 struct mac_impl_s *, flow_tab_t **); 527 extern void mac_flow_l2tab_create(struct mac_impl_s *, flow_tab_t **); 528 extern void mac_flow_tab_destroy(flow_tab_t *); 529 extern void mac_flow_drop(void *, void *, mblk_t *); 530 extern void flow_stat_destroy(flow_entry_t *); 531 532 #ifdef __cplusplus 533 } 534 #endif 535 536 #endif /* _MAC_FLOW_IMPL_H */ 537