1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #ifndef _SYS_IB_EOIB_EIB_IMPL_H 27 #define _SYS_IB_EOIB_EIB_IMPL_H 28 29 #ifdef __cplusplus 30 extern "C" { 31 #endif 32 33 #include <sys/ddi.h> 34 #include <sys/mac.h> 35 #include <sys/sunddi.h> 36 #include <sys/varargs.h> 37 #include <sys/vlan.h> 38 #include <sys/ib/ibtl/ibti.h> 39 #include <sys/ib/ibtl/ibvti.h> 40 #include <sys/ib/ib_pkt_hdrs.h> 41 42 #include <sys/ib/clients/eoib/fip.h> 43 #include <sys/ib/clients/eoib/eib.h> 44 45 /* 46 * Driver specific constants 47 */ 48 #define EIB_E_SUCCESS 0 49 #define EIB_E_FAILURE -1 50 #define EIB_MAX_LINE 128 51 #define EIB_MAX_SGL 59 52 #define EIB_MAX_POST_MULTIPLE 4 53 #define EIB_MAX_PAYLOAD_HDR_SZ 160 54 #define EIB_TX_COPY_THRESH 4096 /* greater than mtu */ 55 #define EIB_MAX_VNICS 64 /* do not change this */ 56 #define EIB_LOGIN_TIMEOUT_USEC 8000000 57 #define EIB_RWR_CHUNK_SZ 8 58 #define EIB_IPHDR_ALIGN_ROOM 32 59 #define EIB_IP_HDR_ALIGN 2 60 #define EIB_MAX_RX_PKTS_ONINTR 0x800 61 #define EIB_MAX_LOGIN_ATTEMPTS 3 62 #define EIB_MAX_VHUB_TBL_ATTEMPTS 3 63 #define EIB_MAX_KA_ATTEMPTS 3 64 #define EIB_MAX_ATTEMPTS 10 65 #define EIB_DELAY_HALF_SECOND 500000 66 #define EIB_GRH_SZ (sizeof (ib_grh_t)) 67 68 /* 69 * Debug messages 70 */ 71 #define EIB_MSGS_CRIT 0x01 72 #define EIB_MSGS_ERR 0x02 73 #define EIB_MSGS_WARN 0x04 74 #define EIB_MSGS_DEBUG 0x08 75 #define EIB_MSGS_ARGS 0x10 76 #define EIB_MSGS_PKT 0x20 77 #define EIB_MSGS_VERBOSE 0x40 78 #define EIB_MSGS_DEFAULT (EIB_MSGS_CRIT | EIB_MSGS_ERR | EIB_MSGS_WARN) 79 80 #define EIB_LOGSZ_DEFAULT 0x20000 81 82 #define EIB_DPRINTF_CRIT eib_dprintf_crit 83 #define EIB_DPRINTF_ERR eib_dprintf_err 84 #define EIB_DPRINTF_WARN eib_dprintf_warn 85 #ifdef EIB_DEBUG 86 #define EIB_DPRINTF_DEBUG eib_dprintf_debug 87 #define EIB_DPRINTF_ARGS eib_dprintf_args 88 #define EIB_DPRINTF_PKT eib_dprintf_pkt 89 #define EIB_DPRINTF_VERBOSE eib_dprintf_verbose 90 #else 91 #define EIB_DPRINTF_DEBUG 0 && 92 #define EIB_DPRINTF_ARGS 0 && 93 #define EIB_DPRINTF_PKT 0 && 94 #define EIB_DPRINTF_VERBOSE 0 && 95 #endif 96 97 /* 98 * EoIB threads to provide various services 99 */ 100 #define EIB_EVENTS_HDLR "eib_events_handler" 101 #define EIB_RWQES_REFILLER "eib_rwqes_refiller" 102 #define EIB_VNIC_CREATOR "eib_vnic_creator" 103 #define EIB_TXWQES_MONITOR "eib_txwqe_monitor" 104 #define EIB_LSOBUFS_MONITOR "eib_lsobufs_monitor" 105 106 /* 107 * Macro for finding the least significant bit set in a 64-bit unsigned int 108 */ 109 #define EIB_FIND_LSB_SET(val64) eib_setbit_mod67[((-(val64) & (val64)) % 67)] 110 111 /* 112 * LSO buffers 113 * 114 * Under normal circumstances we should never need to use any buffer 115 * that's larger than MTU. Unfortunately, IB HCA has limitations 116 * on the length of SGL that are much smaller than those for regular 117 * ethernet NICs. Since the network layer doesn't care to limit the 118 * number of mblk fragments in any send mp chain, we end up having to 119 * use these larger buffers occasionally. 120 */ 121 #define EIB_LSO_MAXLEN 65536 122 #define EIB_LSO_BUFSZ 8192 123 #define EIB_LSO_NUM_BUFS 1024 124 #define EIB_LSO_FREE_BUFS_THRESH (EIB_LSO_NUM_BUFS >> 5) 125 126 typedef struct eib_lsobuf_s { 127 struct eib_lsobuf_s *lb_next; 128 uint8_t *lb_buf; 129 int lb_isfree; 130 } eib_lsobuf_t; 131 132 typedef struct eib_lsobkt_s { 133 kmutex_t bk_lock; 134 kcondvar_t bk_cv; 135 uint_t bk_status; 136 uint8_t *bk_mem; 137 eib_lsobuf_t *bk_bufl; 138 eib_lsobuf_t *bk_free_head; 139 ibt_mr_hdl_t bk_mr_hdl; 140 ibt_lkey_t bk_lkey; 141 uint_t bk_nelem; 142 uint_t bk_nfree; 143 } eib_lsobkt_t; 144 145 #define EIB_LBUF_SHORT 0x1 146 #define EIB_LBUF_MONITOR_DIE 0x2 147 148 /* 149 * The admin partition is only used for sending login and logout messages 150 * and receiving login acknowledgements from the gateway. While packets 151 * going out on several vlans at the same time could result in multiple 152 * vnic creations happening at the same time (and therefore multiple login 153 * packets), we serialize the vnic creation via the vnic creator thread, so 154 * we shouldn't need a lot of send wqes or receive wqes. Note also that we 155 * keep the cq size request to slightly less than a 2^n boundary to allow 156 * the alloc cq routine to return the closest 2^n boundary as the real cq 157 * size without wasting too much memory. 158 */ 159 #define EIB_ADMIN_MAX_SWQE 30 160 #define EIB_ADMIN_MAX_RWQE 30 161 #define EIB_ADMIN_CQ_SIZE (EIB_ADMIN_MAX_SWQE + EIB_ADMIN_MAX_RWQE + 1) 162 163 /* 164 * The control qp is per vhub partition, and is used to send and receive 165 * vhub control messages such as vhub table request/response, vhub 166 * update response and vnic alive messages. While the vhub table response 167 * and vhub update messages might take a few rwqes, the vhub table request 168 * is made only once per vnic, and the vnic alive message is periodic 169 * and uses a single swqe as well. Per vnic, we should certainly not need 170 * too many swqes/rwqes. 171 */ 172 #define EIB_CTL_MAX_SWQE 30 173 #define EIB_CTL_MAX_RWQE 30 174 #define EIB_CTL_CQ_SIZE (EIB_CTL_MAX_SWQE + EIB_CTL_MAX_RWQE + 1) 175 176 /* 177 * For the vNIC's data channel, there are three items that are of importance: 178 * the constraints defined below, the hca_max_chan_sz attribute and the value of 179 * (hca_max_cq_sz - 1). The maximum limit on swqe/rwqe is set to the minimum 180 * of these three values. 181 * 182 * While the total number of RWQEs posted to the data channel of any vNIC will 183 * not exceed EIB_DATA_MAX_RWQE, we also do not want to acquire and post all of 184 * it during the data channel initialization, since that is a lot of wqes for 185 * one vnic to consume when we don't even know if the vnic will need it at all. 186 * We post an initial set of EIB_DATA_RWQE_BKT rwqes, and slowly post more and 187 * more sets as we see them being consumed, until we hit the hard limit of 188 * EIB_DATA_MAX_RWQE. 189 */ 190 #define EIB_DATA_MAX_SWQE 4000 191 #define EIB_DATA_MAX_RWQE 4000 192 #define EIB_DATA_RWQE_BKT 512 193 194 /* 195 * vNIC data channel CQ moderation parameters 196 */ 197 #define EIB_TX_COMP_COUNT 10 198 #define EIB_TX_COMP_USEC 300 199 #define EIB_RX_COMP_COUNT 4 200 #define EIB_RX_COMP_USEC 10 201 202 /* 203 * qe_info masks (blk:ndx:type:flags) 204 */ 205 #define EIB_WQEBLK_SHIFT 24 206 #define EIB_WQEBLK_MASK 0xFF 207 #define EIB_WQENDX_SHIFT 16 208 #define EIB_WQENDX_MASK 0xFF 209 #define EIB_WQETYP_SHIFT 8 210 #define EIB_WQETYP_MASK 0xFF 211 #define EIB_WQEFLGS_SHIFT 0 212 #define EIB_WQEFLGS_MASK 0xFF 213 214 /* 215 * Macros to get the bit fields from qe_info 216 */ 217 #define EIB_WQE_BLK(info) (((info) >> EIB_WQEBLK_SHIFT) & EIB_WQEBLK_MASK) 218 #define EIB_WQE_NDX(info) (((info) >> EIB_WQENDX_SHIFT) & EIB_WQENDX_MASK) 219 #define EIB_WQE_TYPE(info) (((info) >> EIB_WQETYP_SHIFT) & EIB_WQETYP_MASK) 220 #define EIB_WQE_FLAGS(info) ((info) & EIB_WQEFLGS_MASK) 221 222 /* 223 * Values for type and flags in qe_info 224 */ 225 #define EIB_WQE_TX 0x1 226 #define EIB_WQE_RX 0x2 227 228 /* 229 * Flags for rx wqes/buffers 230 */ 231 #define EIB_WQE_FLG_POSTED_TO_HCA 0x1 232 #define EIB_WQE_FLG_WITH_NW 0x2 233 234 /* 235 * Flags for tx wqes/buffers 236 */ 237 #define EIB_WQE_FLG_BUFTYPE_LSO 0x4 238 #define EIB_WQE_FLG_BUFTYPE_MAPPED 0x8 239 240 /* 241 * Send/Recv workq entries 242 */ 243 typedef struct eib_wqe_s { 244 struct eib_wqe_pool_s *qe_pool; 245 uint8_t *qe_cpbuf; 246 uint8_t *qe_payload_hdr; 247 uint_t qe_bufsz; 248 uint_t qe_info; 249 int qe_vnic_inst; 250 ibt_ud_dest_hdl_t qe_dest; 251 frtn_t qe_frp; 252 253 mblk_t *qe_mp; 254 ibt_mi_hdl_t qe_iov_hdl; 255 ibt_all_wr_t qe_wr; 256 ibt_wr_ds_t qe_sgl; 257 ibt_wr_ds_t qe_big_sgl[EIB_MAX_SGL]; 258 struct eib_wqe_s *qe_nxt_post; 259 struct eib_chan_s *qe_chan; 260 } eib_wqe_t; 261 262 /* 263 * The wqe in-use/free status in EoIB is managed via a 2-level bitmap 264 * logic. 265 * 266 * Each set of 64 wqes (a "wqe block") is managed by a single 64-bit 267 * integer bitmap. The free status of a set of 64 such wqe blocks (a 268 * "wqe pool") is managed by one 64-bit integer bitmap (if any wqe in 269 * the wqe block is free, the bit in the map is 1, otherwise it is 0). 270 * 271 * The maximum pool size is 4096 wqes, but this can easily be extended 272 * to support more wqes using additional pools of wqes. 273 * 274 * Note that an entire pool of wqes is allocated via a single allocation, 275 * the wqe addresses in a pool are all contiguous. The tx/rx copy buffers 276 * for a wqe pool are also allocated via a single allocation. 277 */ 278 #define EIB_BLKS_PER_POOL 64 279 #define EIB_WQES_PER_BLK 64 /* do not change this */ 280 #define EIB_WQES_PER_POOL (EIB_BLKS_PER_POOL * EIB_WQES_PER_BLK) 281 282 #define EIB_WQE_SZ (sizeof (eib_wqe_t)) 283 #define EIB_WQEBLK_SZ (EIB_WQES_PER_BLK * EIB_WQE_SZ) 284 285 typedef struct eib_wqe_pool_s { 286 struct eib_wqe_pool_s *wp_next; 287 struct eib_s *wp_ss; 288 ib_vaddr_t wp_vaddr; 289 ib_memlen_t wp_memsz; 290 ibt_mr_hdl_t wp_mr; 291 ibt_lkey_t wp_lkey; 292 uint_t wp_nfree_lwm; 293 int wp_type; 294 295 kmutex_t wp_lock; 296 kcondvar_t wp_cv; 297 uint_t wp_status; 298 uint_t wp_nfree; 299 uint64_t wp_free_blks; 300 uint64_t wp_free_wqes[EIB_BLKS_PER_POOL]; 301 struct eib_wqe_s *wp_wqe; 302 } eib_wqe_pool_t; 303 304 /* 305 * Values for wp_type 306 */ 307 #define EIB_WP_TYPE_TX 0x1 308 #define EIB_WP_TYPE_RX 0x2 309 310 /* 311 * Values for wp_status (bit fields) 312 */ 313 #define EIB_TXWQE_SHORT 0x1 /* only for tx wqe pool */ 314 #define EIB_TXWQE_MONITOR_DIE 0x2 /* only for tx wqe pool */ 315 316 #define EIB_RXWQE_SHORT 0x1 /* only for rx wqe pool */ 317 318 /* 319 * The low-water-mark is an indication of when wqe grabs for low-priority 320 * qps should start to get refused (swqe grabs for control messages such 321 * as keepalives and rwqe grabs for posting back to control qps will still 322 * be allowed). The high-water-mark is an indication of when normal 323 * behavior should resume. 324 */ 325 #define EIB_NFREE_SWQES_LWM (EIB_WQES_PER_POOL / 64) /* 1/64 */ 326 #define EIB_NFREE_SWQES_HWM (EIB_WQES_PER_POOL / 32) /* 1/32 */ 327 #define EIB_NFREE_RWQES_LWM (EIB_WQES_PER_POOL / 10) /* 10% */ 328 #define EIB_NFREE_RWQES_HWM (EIB_WQES_PER_POOL / 5) /* 20% */ 329 330 /* 331 * The "rwqes low" is used to determine when we should start using allocb() 332 * to copy and send received mblks in the rx path. It should be a little 333 * above the rwqes low-water-mark, but less than the high-water-mark. 334 */ 335 #define EIB_NFREE_RWQES_LOW \ 336 ((EIB_NFREE_RWQES_LWM + EIB_NFREE_RWQES_HWM) / 2) 337 338 #define EIB_WPRI_HI 1 /* for keepalive posts */ 339 #define EIB_WPRI_LO 2 /* for all other posts */ 340 341 /* 342 * Multicast GID Layout: the multicast gid is specified in big-endian 343 * representation, as a collection of different-sized fields in the 344 * EoIB specification. On Solaris, the multicast gid is represented 345 * as a collection of two 8-byte fields (in ib_gid_t). 346 */ 347 typedef struct eib_mgid_spec_s { 348 uint8_t sp_mgid_prefix[FIP_MGID_PREFIX_LEN]; 349 uint8_t sp_type; 350 uint8_t sp_dmac[ETHERADDRL]; 351 uint8_t sp_rss_hash; 352 uint8_t sp_vhub_id[FIP_VHUBID_LEN]; 353 } eib_mgid_spec_t; 354 355 /* 356 * Values for sp_type in mgid as per EoIB specification 357 */ 358 #define EIB_MGID_VHUB_DATA 0x0 359 #define EIB_MGID_VHUB_UPDATE 0x2 360 #define EIB_MGID_VHUB_TABLE 0x3 361 362 typedef union eib_mgid_s { 363 eib_mgid_spec_t gd_spec; 364 ib_gid_t gd_sol; 365 } eib_mgid_t; 366 367 /* 368 * Gateway properties handed over to us by the EoIB nexus 369 */ 370 typedef struct eib_gw_props_s { 371 kmutex_t pp_gw_lock; 372 373 ib_guid_t pp_gw_system_guid; 374 ib_guid_t pp_gw_guid; 375 ib_sn_prefix_t pp_gw_sn_prefix; 376 377 uint_t pp_gw_adv_period; 378 uint_t pp_gw_ka_period; 379 uint_t pp_vnic_ka_period; 380 381 ib_qpn_t pp_gw_ctrl_qpn; 382 ib_lid_t pp_gw_lid; 383 uint16_t pp_gw_portid; 384 385 uint16_t pp_gw_num_net_vnics; 386 uint8_t pp_gw_flag_available; 387 uint8_t pp_gw_is_host_adm_vnics; 388 uint8_t pp_gw_sl; 389 uint8_t pp_gw_n_rss_qpn; 390 391 uint8_t *pp_gw_system_name; 392 uint8_t *pp_gw_port_name; 393 uint8_t *pp_gw_vendor_id; 394 395 clock_t pp_gw_ka_ticks; /* 2.5 x gw_ka_period */ 396 clock_t pp_vnic_ka_ticks; /* vnic_ka_period */ 397 } eib_gw_props_t; 398 399 /* 400 * Port-specific properties 401 */ 402 typedef struct eib_props_s { 403 uint64_t ep_ifspeed; 404 ib_guid_t ep_hca_guid; 405 uint8_t ep_port_num; 406 ib_gid_t ep_sgid; 407 ib_lid_t ep_blid; 408 uint16_t ep_mtu; 409 ibt_srate_t ep_srate; 410 } eib_props_t; 411 412 /* 413 * Capabilities derived from HCA attributes 414 */ 415 typedef struct eib_caps_s { 416 uint_t cp_lso_maxlen; 417 uint32_t cp_cksum_flags; 418 int cp_resv_lkey_capab; 419 ibt_lkey_t cp_resv_lkey; 420 421 uint_t cp_max_swqe; 422 uint_t cp_max_rwqe; 423 uint_t cp_max_sgl; 424 uint_t cp_hiwm_sgl; 425 } eib_caps_t; 426 427 /* 428 * List of multicast groups the vnic joined 429 */ 430 typedef struct eib_mcg_s { 431 struct eib_mcg_s *mg_next; 432 ib_gid_t mg_rgid; 433 ib_gid_t mg_mgid; 434 uint8_t mg_join_state; 435 uint8_t mg_mac[ETHERADDRL]; 436 ibt_mcg_info_t *mg_mcginfo; 437 } eib_mcg_t; 438 439 /* 440 * Admin/control/data channel information 441 */ 442 typedef struct eib_chan_s { 443 ibt_channel_hdl_t ch_chan; 444 ib_qpn_t ch_qpn; 445 446 ibt_wc_t *ch_wc; 447 ibt_cq_hdl_t ch_cq_hdl; 448 uint_t ch_cq_sz; 449 450 ibt_wc_t *ch_rcv_wc; 451 ibt_cq_hdl_t ch_rcv_cq_hdl; 452 uint_t ch_rcv_cq_sz; 453 454 int ch_vnic_inst; 455 uint_t ch_max_swqes; 456 uint_t ch_max_rwqes; 457 uint_t ch_lwm_rwqes; 458 uint_t ch_rwqe_bktsz; 459 uint_t ch_ip_hdr_align; 460 boolean_t ch_alloc_mp; 461 boolean_t ch_tear_down; 462 463 kmutex_t ch_pkey_lock; 464 ib_pkey_t ch_pkey; 465 uint16_t ch_pkey_ix; 466 467 kmutex_t ch_cep_lock; 468 kcondvar_t ch_cep_cv; 469 ibt_cep_state_t ch_cep_state; 470 471 kmutex_t ch_tx_lock; 472 kcondvar_t ch_tx_cv; 473 uint_t ch_tx_posted; 474 boolean_t ch_tx_busy; 475 struct eib_wqe_s *ch_tx; 476 struct eib_wqe_s *ch_tx_tail; 477 478 kmutex_t ch_rx_lock; 479 kcondvar_t ch_rx_cv; 480 uint_t ch_rx_posted; 481 boolean_t ch_rx_refilling; 482 483 kmutex_t ch_vhub_lock; 484 struct eib_mcg_s *ch_vhub_table; 485 struct eib_mcg_s *ch_vhub_update; 486 struct eib_mcg_s *ch_vhub_data; 487 488 struct eib_chan_s *ch_rxpost_next; 489 } eib_chan_t; 490 491 /* 492 * States for vNIC state machine during login 493 */ 494 #define EIB_LOGIN_INIT 0 495 #define EIB_LOGIN_ACK_WAIT 1 496 #define EIB_LOGIN_ACK_RCVD 2 497 #define EIB_LOGIN_NACK_RCVD 3 498 #define EIB_LOGIN_TBL_WAIT 4 499 #define EIB_LOGIN_TBL_INPROG 5 500 #define EIB_LOGIN_TBL_DONE 6 501 #define EIB_LOGIN_TBL_FAILED 7 502 #define EIB_LOGIN_DONE 8 503 #define EIB_LOGIN_TIMED_OUT 9 504 #define EIB_LOGOUT_DONE 10 505 506 typedef struct eib_login_data_s { 507 ib_guid_t ld_gw_guid; 508 ib_lid_t ld_gw_lid; 509 uint_t ld_syndrome; 510 uint16_t ld_gw_port_id; 511 ib_qpn_t ld_gw_data_qpn; 512 ib_qpn_t ld_gw_ctl_qpn; 513 uint16_t ld_vnic_id; /* includes set msbit */ 514 uint16_t ld_vhub_mtu; 515 uint16_t ld_vhub_pkey; 516 uint16_t ld_assigned_vlan; 517 uint8_t ld_gw_sl; 518 uint8_t ld_n_rss_mcgid; 519 uint8_t ld_n_mac_mcgid; 520 uint8_t ld_vnic_name[FIP_VNIC_NAME_LEN]; 521 uint8_t ld_assigned_mac[ETHERADDRL]; 522 uint8_t ld_gw_mgid_prefix[FIP_MGID_PREFIX_LEN]; 523 uint8_t ld_vlan_in_packets; 524 uint32_t ld_vhub_id; 525 } eib_login_data_t; 526 527 #define EIB_UNICAST_MAC(mac) (((mac)[0] & 0x01) == 0) 528 529 /* 530 * Map to translate between DMAC and {qpn, lid, sl} 531 */ 532 typedef struct eib_vhub_map_s { 533 struct eib_vhub_map_s *mp_next; 534 uint32_t mp_tusn; 535 ib_qpn_t mp_qpn; 536 ib_lid_t mp_lid; 537 uint8_t mp_mac[ETHERADDRL]; 538 uint8_t mp_sl; 539 uint8_t mp_v_rss_type; 540 } eib_vhub_map_t; 541 542 /* 543 * Per-vNIC vHUB Table 544 */ 545 #define EIB_TB_NBUCKETS 13 546 typedef struct eib_vhub_table_s { 547 kmutex_t tb_lock; 548 struct eib_vhub_map_s *tb_gateway; 549 struct eib_vhub_map_s *tb_unicast_miss; 550 struct eib_vhub_map_s *tb_vhub_multicast; 551 struct eib_vhub_map_s *tb_vnic_entry[EIB_TB_NBUCKETS]; 552 struct eib_vhub_map_s *tb_mcast_entry[EIB_TB_NBUCKETS]; 553 554 uint32_t tb_tusn; 555 uint8_t tb_eport_state; 556 557 uint16_t tb_entries_seen; 558 uint16_t tb_entries_in_table; 559 uint32_t tb_checksum; 560 } eib_vhub_table_t; 561 562 typedef struct eib_vhub_update_s { 563 kmutex_t up_lock; 564 eib_vhub_map_t *up_vnic_entry; 565 uint32_t up_tusn; 566 uint8_t up_eport_state; 567 } eib_vhub_update_t; 568 569 typedef struct eib_ether_hdr_s { 570 int eh_tagless; 571 uint16_t eh_ether_type; 572 uint16_t eh_vlan; 573 uint8_t eh_dmac[ETHERADDRL]; 574 uint8_t eh_smac[ETHERADDRL]; 575 } eib_ether_hdr_t; 576 577 /* 578 * vNIC Information 579 */ 580 typedef struct eib_vnic_s { 581 struct eib_s *vn_ss; 582 eib_chan_t *vn_ctl_chan; 583 eib_chan_t *vn_data_chan; 584 int vn_instance; 585 uint16_t vn_vlan; 586 uint16_t vn_id; 587 uint8_t vn_macaddr[ETHERADDRL]; 588 struct eib_login_data_s vn_login_data; 589 590 kmutex_t vn_lock; 591 kcondvar_t vn_cv; 592 uint_t vn_state; 593 struct eib_vhub_table_s *vn_vhub_table; 594 struct eib_vhub_update_s *vn_vhub_update; 595 596 ddi_softint_handle_t vn_ctl_si_hdl; 597 ddi_softint_handle_t vn_data_tx_si_hdl; 598 ddi_softint_handle_t vn_data_rx_si_hdl; 599 } eib_vnic_t; 600 601 602 /* 603 * Base NIC's mac state flags. The lock protects the starting/stopping 604 * bits. Access to the rest of the mac state is protected by these 605 * two bits. 606 */ 607 #define EIB_NIC_STARTING 0x01 608 #define EIB_NIC_STOPPING 0x02 609 #define EIB_NIC_STARTED 0x80 610 #define EIB_NIC_RESTARTING (EIB_NIC_STARTING | EIB_NIC_STOPPING) 611 612 typedef struct eib_node_state_s { 613 kmutex_t ns_lock; 614 kcondvar_t ns_cv; 615 uint_t ns_nic_state; 616 link_state_t ns_link_state; 617 } eib_node_state_t; 618 619 /* 620 * MIB-II statistics to report to the mac layer 621 */ 622 typedef struct eib_stats_s { 623 uint64_t st_obytes; /* bytes sent out */ 624 uint64_t st_opkts; /* pkts sent out */ 625 uint64_t st_brdcstxmit; /* broadcast pkts transmitted */ 626 uint64_t st_multixmit; /* multicast pkts transmitted */ 627 uint64_t st_oerrors; /* transmit errors */ 628 uint64_t st_noxmitbuf; /* transmit pkts discarded */ 629 630 uint64_t st_rbytes; /* bytes received */ 631 uint64_t st_ipkts; /* pkts received */ 632 uint64_t st_brdcstrcv; /* broadcast pkts received */ 633 uint64_t st_multircv; /* multicast pkts received */ 634 uint64_t st_ierrors; /* receive errors */ 635 uint64_t st_norcvbuf; /* receive pkts discarded */ 636 } eib_stats_t; 637 638 #define EIB_UPDATE_COUNTER(addr, val) (atomic_add_64((addr), (val))) 639 #define EIB_INCR_COUNTER(addr) (atomic_inc_64((addr))) 640 #define EIB_DECR_COUNTER(addr) (atomic_dec_64((addr))) 641 642 /* 643 * Cache of address vectors with dlid as the key. Currently we use 644 * eib state structure's ei_lock to protect the individual address 645 * vector's fields. This is a lock granularity that's slightly 646 * bigger than ideal, but it should do for now. 647 */ 648 #define EIB_AV_NBUCKETS 17 649 typedef struct eib_avect_s { 650 struct eib_avect_s *av_next; 651 ibt_adds_vect_t av_vect; 652 uint_t av_ref; 653 } eib_avect_t; 654 655 /* 656 * vNIC creation and deletion are serialized by a non-zero value 657 * to the ei_vnic_state member (i.e. only one vnic may be created 658 * or deleted at a time). The code makes sure to access/update 659 * the ei_active_vnics member only after a successful setting of 660 * ei_vnic_state. 661 */ 662 #define EIB_VN_BEING_CREATED 0x01 663 #define EIB_VN_BEING_DELETED 0x02 664 #define EIB_VN_BEING_MODIFIED (EIB_VN_BEING_CREATED | EIB_VN_BEING_DELETED) 665 666 /* 667 * All possible EoIB event work items that need to be handled 668 */ 669 #define EIB_EV_NONE 0 670 #define EIB_EV_PORT_DOWN 1 671 #define EIB_EV_PORT_UP 2 672 #define EIB_EV_PKEY_CHANGE 3 673 #define EIB_EV_SGID_CHANGE 4 674 #define EIB_EV_CLNT_REREG 5 675 #define EIB_EV_GW_EPORT_DOWN 6 676 #define EIB_EV_GW_DOWN 7 677 #define EIB_EV_GW_UP 8 678 #define EIB_EV_GW_INFO_UPDATE 9 679 #define EIB_EV_MCG_DELETED 10 680 #define EIB_EV_MCG_CREATED 11 681 #define EIB_EV_SHUTDOWN 12 682 683 typedef struct eib_event_s { 684 struct eib_event_s *ev_next; 685 uint_t ev_code; 686 void *ev_arg; 687 } eib_event_t; 688 689 /* 690 * Work element for new vnic creation 691 */ 692 typedef struct eib_vnic_req_s { 693 struct eib_vnic_req_s *vr_next; 694 uint_t vr_req; 695 uint8_t vr_mac[ETHERADDRL]; 696 uint16_t vr_vlan; 697 } eib_vnic_req_t; 698 699 /* 700 * Values for vr_req 701 */ 702 #define EIB_CR_REQ_NEW_VNIC 1 703 #define EIB_CR_REQ_FLUSH 2 704 #define EIB_CR_REQ_DIE 3 705 706 /* 707 * Work element for vnics kept alive by the keepalive manager thread 708 * and bitfield values for ei_ka_vnics_event. 709 */ 710 typedef struct eib_ka_vnics_s { 711 struct eib_ka_vnics_s *ka_next; 712 struct eib_vnic_s *ka_vnic; 713 } eib_ka_vnics_t; 714 715 #define EIB_KA_VNICS_DIE 0x1 716 #define EIB_KA_VNICS_TIMED_OUT 0x2 717 718 /* 719 * EoIB per-instance state 720 */ 721 typedef struct eib_s { 722 ibt_clnt_hdl_t ei_ibt_hdl; 723 ibt_hca_hdl_t ei_hca_hdl; 724 ibt_pd_hdl_t ei_pd_hdl; 725 mac_handle_t ei_mac_hdl; 726 727 ddi_softint_handle_t ei_admin_si_hdl; 728 ddi_callback_id_t ei_login_ack_cb; 729 ddi_callback_id_t ei_gw_alive_cb; 730 ddi_callback_id_t ei_gw_info_cb; 731 732 ibt_hca_attr_t *ei_hca_attrs; 733 dev_info_t *ei_dip; 734 uint_t ei_instance; 735 736 struct eib_gw_props_s *ei_gw_props; 737 struct eib_props_s *ei_props; 738 struct eib_caps_s *ei_caps; 739 struct eib_stats_s *ei_stats; 740 741 struct eib_node_state_s *ei_node_state; 742 struct eib_chan_s *ei_admin_chan; 743 744 struct eib_wqe_pool_s *ei_tx; 745 struct eib_wqe_pool_s *ei_rx; 746 struct eib_lsobkt_s *ei_lso; 747 748 kmutex_t ei_vnic_lock; 749 kcondvar_t ei_vnic_cv; 750 uint_t ei_vnic_state; 751 uint64_t ei_active_vnics; 752 uint64_t ei_zombie_vnics; 753 uint64_t ei_rejoin_vnics; 754 struct eib_vnic_s *ei_vnic[EIB_MAX_VNICS]; 755 struct eib_vnic_s *ei_vnic_pending; 756 int64_t ei_gw_last_heartbeat; 757 boolean_t ei_gw_unreachable; 758 uint8_t ei_gw_eport_state; 759 760 kmutex_t ei_av_lock; 761 struct eib_avect_s *ei_av[EIB_AV_NBUCKETS]; 762 763 kmutex_t ei_ev_lock; 764 kcondvar_t ei_ev_cv; 765 struct eib_event_s *ei_event; 766 767 kmutex_t ei_rxpost_lock; 768 kcondvar_t ei_rxpost_cv; 769 uint_t ei_rxpost_die; 770 struct eib_chan_s *ei_rxpost; 771 772 kmutex_t ei_vnic_req_lock; 773 kcondvar_t ei_vnic_req_cv; 774 struct eib_vnic_req_s *ei_vnic_req; 775 struct eib_vnic_req_s *ei_failed_vnic_req; 776 struct eib_vnic_req_s *ei_pending_vnic_req; 777 778 kmutex_t ei_ka_vnics_lock; 779 kcondvar_t ei_ka_vnics_cv; 780 uint_t ei_ka_vnics_event; 781 struct eib_ka_vnics_s *ei_ka_vnics; 782 783 kt_did_t ei_txwqe_monitor; 784 kt_did_t ei_lsobufs_monitor; 785 kt_did_t ei_rwqes_refiller; 786 kt_did_t ei_vnic_creator; 787 kt_did_t ei_events_handler; 788 kt_did_t ei_keepalives_manager; 789 } eib_t; 790 791 /* 792 * Private read-only datalink properties 793 */ 794 #define EIB_DLPROP_GW_EPORT_STATE "_eib_eport_state" 795 #define EIB_DLPROP_HCA_GUID "_eib_hca_guid" 796 #define EIB_DLPROP_PORT_GUID "_eib_port_guid" 797 798 /* 799 * FUNCTION PROTOTYPES FOR CROSS-FILE LINKAGE 800 */ 801 802 /* 803 * FIP protocol related 804 */ 805 extern int eib_fip_login(eib_t *, eib_vnic_t *, int *); 806 extern int eib_fip_heartbeat(eib_t *, eib_vnic_t *, int *); 807 extern int eib_fip_vhub_table(eib_t *, eib_vnic_t *, int *); 808 extern int eib_fip_logout(eib_t *, eib_vnic_t *, int *); 809 extern int eib_fip_parse_login_ack(eib_t *, uint8_t *, eib_login_data_t *); 810 extern int eib_fip_parse_ctl_pkt(uint8_t *, eib_vnic_t *); 811 812 /* 813 * Service threads and other handlers 814 */ 815 extern void eib_events_handler(eib_t *); 816 extern void eib_svc_enqueue_event(eib_t *, eib_event_t *); 817 extern void eib_refill_rwqes(eib_t *); 818 extern void eib_vnic_creator(eib_t *); 819 extern void eib_monitor_tx_wqes(eib_t *); 820 extern void eib_monitor_lso_bufs(eib_t *); 821 extern void eib_manage_keepalives(eib_t *); 822 extern void eib_stop_events_handler(eib_t *); 823 extern void eib_stop_refill_rwqes(eib_t *); 824 extern void eib_stop_vnic_creator(eib_t *); 825 extern void eib_stop_monitor_tx_wqes(eib_t *); 826 extern int eib_stop_monitor_lso_bufs(eib_t *, boolean_t); 827 extern void eib_stop_manage_keepalives(eib_t *); 828 extern void eib_flush_vnic_reqs(eib_t *); 829 extern void eib_gw_info_cb(dev_info_t *, ddi_eventcookie_t, void *, void *); 830 extern void eib_gw_alive_cb(dev_info_t *, ddi_eventcookie_t, void *, void *); 831 extern void eib_login_ack_cb(dev_info_t *, ddi_eventcookie_t, void *, void *); 832 833 /* 834 * Admin QP related 835 */ 836 extern int eib_adm_setup_qp(eib_t *, int *); 837 extern uint_t eib_adm_comp_handler(caddr_t, caddr_t); 838 extern void eib_rb_adm_setup_qp(eib_t *); 839 840 /* 841 * Control QP related 842 */ 843 extern int eib_ctl_create_qp(eib_t *, eib_vnic_t *, int *); 844 extern uint_t eib_ctl_comp_handler(caddr_t, caddr_t); 845 extern void eib_rb_ctl_create_qp(eib_t *, eib_vnic_t *); 846 847 /* 848 * Data QP related 849 */ 850 extern int eib_data_create_qp(eib_t *, eib_vnic_t *, int *); 851 extern uint_t eib_data_rx_comp_handler(caddr_t, caddr_t); 852 extern uint_t eib_data_tx_comp_handler(caddr_t, caddr_t); 853 extern void eib_data_rx_recycle(caddr_t); 854 extern void eib_data_post_tx(eib_vnic_t *, eib_wqe_t *); 855 extern void eib_data_parse_ether_hdr(mblk_t *, eib_ether_hdr_t *); 856 extern int eib_data_lookup_vnic(eib_t *, uint8_t *, uint16_t, eib_vnic_t **, 857 boolean_t *); 858 extern int eib_data_prepare_frame(eib_vnic_t *, eib_wqe_t *, mblk_t *, 859 eib_ether_hdr_t *); 860 extern void eib_rb_data_create_qp(eib_t *, eib_vnic_t *); 861 862 /* 863 * Resource related 864 */ 865 extern int eib_rsrc_setup_bufs(eib_t *, int *); 866 extern int eib_rsrc_grab_swqes(eib_t *, eib_wqe_t **, uint_t, uint_t *, int); 867 extern int eib_rsrc_grab_rwqes(eib_t *, eib_wqe_t **, uint_t, uint_t *, int); 868 extern int eib_rsrc_grab_lsobufs(eib_t *, uint_t, ibt_wr_ds_t *, uint32_t *); 869 extern eib_wqe_t *eib_rsrc_grab_swqe(eib_t *, int); 870 extern eib_wqe_t *eib_rsrc_grab_rwqe(eib_t *, int); 871 extern void eib_rsrc_return_swqe(eib_t *, eib_wqe_t *, eib_chan_t *); 872 extern void eib_rsrc_return_rwqe(eib_t *, eib_wqe_t *, eib_chan_t *); 873 extern void eib_rsrc_return_lsobufs(eib_t *, ibt_wr_ds_t *, uint32_t); 874 extern void eib_rsrc_decr_posted_swqe(eib_t *, eib_chan_t *); 875 extern void eib_rsrc_decr_posted_rwqe(eib_t *, eib_chan_t *); 876 extern void eib_rsrc_txwqes_needed(eib_t *); 877 extern void eib_rsrc_lsobufs_needed(eib_t *); 878 extern boolean_t eib_rsrc_rxpool_low(eib_wqe_t *); 879 extern void eib_rb_rsrc_setup_bufs(eib_t *, boolean_t); 880 881 /* 882 * IBT related 883 */ 884 extern int eib_ibt_hca_init(eib_t *); 885 extern void eib_ibt_link_mod(eib_t *); 886 extern int eib_ibt_modify_chan_pkey(eib_t *, eib_chan_t *, ib_pkey_t); 887 extern eib_avect_t *eib_ibt_hold_avect(eib_t *, ib_lid_t, uint8_t); 888 extern void eib_ibt_release_avect(eib_t *, eib_avect_t *); 889 extern void eib_ibt_free_avects(eib_t *); 890 extern void eib_ibt_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 891 ibt_async_event_t *); 892 extern void eib_ibt_record_capab(eib_t *, ibt_hca_attr_t *, eib_caps_t *); 893 extern void eib_rb_ibt_hca_init(eib_t *, uint_t); 894 895 /* 896 * Chan related 897 */ 898 extern eib_chan_t *eib_chan_init(void); 899 extern void eib_chan_fini(eib_chan_t *); 900 extern int eib_chan_post_rx(eib_t *, eib_chan_t *, uint_t *); 901 extern int eib_chan_post_recv(eib_t *, eib_chan_t *, eib_wqe_t *); 902 903 /* 904 * Mac layer related 905 */ 906 extern void eib_mac_set_nic_state(eib_t *, uint_t); 907 extern void eib_mac_clr_nic_state(eib_t *, uint_t); 908 extern void eib_mac_upd_nic_state(eib_t *, uint_t, uint_t); 909 extern uint_t eib_mac_get_nic_state(eib_t *); 910 extern void eib_mac_link_state(eib_t *, link_state_t, boolean_t); 911 extern void eib_mac_link_down(eib_t *, boolean_t); 912 extern void eib_mac_link_up(eib_t *, boolean_t); 913 extern int eib_mac_start(eib_t *); 914 extern void eib_mac_stop(eib_t *); 915 extern int eib_mac_multicast(eib_t *, boolean_t, uint8_t *); 916 extern int eib_mac_promisc(eib_t *, boolean_t); 917 extern int eib_mac_tx(eib_t *, mblk_t *); 918 extern int eib_mac_hca_portstate(eib_t *, ib_lid_t *, int *); 919 920 /* 921 * VNIC related 922 */ 923 extern int eib_vnic_create(eib_t *, uint8_t *, uint16_t, eib_vnic_t **, int *); 924 extern void eib_vnic_delete(eib_t *, eib_vnic_t *); 925 extern int eib_vnic_wait_for_login_ack(eib_t *, eib_vnic_t *, int *); 926 extern void eib_vnic_login_ack(eib_t *, eib_login_data_t *); 927 extern int eib_vnic_wait_for_table(eib_t *, eib_vnic_t *, int *); 928 extern void eib_vnic_vhub_table_done(eib_vnic_t *, uint_t); 929 extern int eib_vnic_join_data_mcg(eib_t *, eib_vnic_t *, uint8_t *, 930 boolean_t, int *); 931 extern int eib_vnic_setup_dest(eib_vnic_t *, eib_wqe_t *, uint8_t *, uint16_t); 932 extern void eib_vnic_leave_data_mcg(eib_t *, eib_vnic_t *, uint8_t *); 933 extern void eib_vnic_init_tables(eib_t *, eib_vnic_t *); 934 extern void eib_vnic_fini_tables(eib_t *, eib_vnic_t *, boolean_t); 935 extern eib_chan_t *eib_vnic_get_data_chan(eib_t *, int); 936 extern void eib_vnic_need_new(eib_t *, uint8_t *, uint16_t); 937 extern void eib_vnic_enqueue_req(eib_t *, eib_vnic_req_t *); 938 extern void eib_vnic_resurrect_zombies(eib_t *, uint8_t *); 939 extern void eib_vnic_restart(eib_t *, int, uint8_t *); 940 extern void eib_vnic_rejoin_mcgs(eib_t *); 941 extern void eib_rb_vnic_create(eib_t *, eib_vnic_t *, uint_t); 942 943 /* 944 * Logging and other stuff 945 */ 946 extern void eib_debug_init(void); 947 extern void eib_debug_fini(void); 948 extern void eib_dprintf_crit(int, const char *fmt, ...); 949 extern void eib_dprintf_err(int, const char *fmt, ...); 950 extern void eib_dprintf_warn(int, const char *fmt, ...); 951 #ifdef EIB_DEBUG 952 extern void eib_dprintf_debug(int, const char *fmt, ...); 953 extern void eib_dprintf_args(int, const char *fmt, ...); 954 extern void eib_dprintf_pkt(int, uint8_t *, uint_t); 955 extern void eib_dprintf_verbose(int, const char *fmt, ...); 956 #endif 957 extern int eib_get_props(eib_t *); 958 extern void eib_update_props(eib_t *, eib_gw_info_t *); 959 extern void eib_rb_get_props(eib_t *); 960 961 /* 962 * EoIB specific global variables 963 */ 964 extern ib_gid_t eib_reserved_gid; 965 extern uint8_t eib_zero_mac[]; 966 extern uint8_t eib_broadcast_mac[]; 967 extern int eib_setbit_mod67[]; 968 extern char *eib_pvt_props[]; 969 970 /* 971 * HW/FW workarounds 972 */ 973 extern int eib_wa_no_desc_list_len; 974 extern int eib_wa_no_cksum_offload; 975 extern int eib_wa_no_lso; 976 extern int eib_wa_no_mcast_entries; 977 extern int eib_wa_no_av_discover; 978 extern int eib_wa_no_good_vp_flag; 979 extern int eib_wa_no_good_vhub_cksum; 980 981 /* 982 * Miscellaneous externs 983 */ 984 extern void freemsgchain(mblk_t *); 985 extern pri_t minclsyspri; 986 987 #ifdef __cplusplus 988 } 989 #endif 990 991 #endif /* _SYS_IB_EOIB_EIB_IMPL_H */ 992