1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _SYS_IB_CLIENTS_IBD_H 28 #define _SYS_IB_CLIENTS_IBD_H 29 30 #ifdef __cplusplus 31 extern "C" { 32 #endif 33 34 /* 35 * IETF defined IPoIB encapsulation header, with 2b of ethertype 36 * followed by 2 reserved bytes. This is at the start of the 37 * datagram sent to and received over the wire by the driver. 38 */ 39 typedef struct ipoib_header { 40 ushort_t ipoib_type; 41 ushort_t ipoib_mbz; 42 } ipoib_hdr_t; 43 44 #define IPOIB_HDRSIZE sizeof (struct ipoib_header) 45 46 /* 47 * IETF defined IPoIB link address; IBA QPN, followed by GID, 48 * which has a prefix and suffix, as reported via ARP. 49 */ 50 typedef struct ipoib_mac { 51 uint32_t ipoib_qpn; 52 uint32_t ipoib_gidpref[2]; 53 uint32_t ipoib_gidsuff[2]; 54 } ipoib_mac_t; 55 56 #define IPOIB_ADDRL sizeof (struct ipoib_mac) 57 58 /* 59 * Pseudo header prepended to datagram in DLIOCRAW transmit path 60 * and when GLD hands the datagram to the gldm_send entry point. 61 */ 62 typedef struct ipoib_ptxhdr { 63 ipoib_mac_t ipoib_dest; 64 ipoib_hdr_t ipoib_rhdr; 65 } ipoib_ptxhdr_t; 66 67 #define IPOIBDLSAP(p, offset) ((ipoib_ptxhdr_t *)((caddr_t)(p)+offset)) 68 69 /* 70 * The pseudo-GRH structure that sits before the data in the 71 * receive buffer, and is overlaid on top of the real GRH. 72 * The driver sets the ipoib_vertcflow to 0 if the pseudo-GRH 73 * does not hold valid information. If it is indicated valid, 74 * the driver must additionally provide the sender's qpn in 75 * network byte order in ipoib_sqpn, and not touch the 76 * remaining parts which were DMA'ed in by the IBA hardware. 77 */ 78 typedef struct ipoib_pgrh { 79 uint32_t ipoib_vertcflow; 80 uint32_t ipoib_sqpn; 81 uint32_t ipoib_sgid_pref[2]; 82 uint32_t ipoib_sgid_suff[2]; 83 uint32_t ipoib_dgid_pref[2]; 84 uint32_t ipoib_dgid_suff[2]; 85 } ipoib_pgrh_t; 86 87 /* 88 * The GRH is also dma'ed into recv buffers, thus space needs 89 * to be allocated for them. 90 */ 91 #define IPOIB_GRH_SIZE sizeof (ipoib_pgrh_t) 92 93 #if defined(_KERNEL) && !defined(_BOOT) 94 95 #include <sys/ib/ibtl/ibti.h> 96 #include <sys/ib/ib_pkt_hdrs.h> 97 #include <sys/list.h> 98 #include <sys/mac_provider.h> 99 #include <sys/mac_ib.h> 100 #include <sys/modhash.h> 101 102 /* 103 * Structure to encapsulate various types of async requests. 104 */ 105 typedef struct ibd_acache_rq { 106 struct list_node rq_list; /* list of pending work */ 107 int rq_op; /* what operation */ 108 ipoib_mac_t rq_mac; 109 ib_gid_t rq_gid; 110 void *rq_ptr; 111 } ibd_req_t; 112 113 typedef struct ibd_mcache { 114 struct list_node mc_list; /* full/non list */ 115 uint8_t mc_jstate; 116 boolean_t mc_fullreap; 117 ibt_mcg_info_t mc_info; 118 ibd_req_t mc_req; /* to queue LEAVE req */ 119 } ibd_mce_t; 120 121 typedef struct ibd_acache_s { 122 struct list_node ac_list; /* free/active list */ 123 ibt_ud_dest_hdl_t ac_dest; 124 ipoib_mac_t ac_mac; 125 uint32_t ac_ref; 126 ibd_mce_t *ac_mce; /* for MCG AHs */ 127 } ibd_ace_t; 128 129 #define IBD_MAX_SQSEG 59 130 #define IBD_MAX_RQSEG 1 131 132 typedef enum { 133 IBD_WQE_SEND, 134 IBD_WQE_RECV 135 } ibd_wqe_type_t; 136 137 typedef enum { 138 IBD_WQE_TXBUF = 1, 139 IBD_WQE_LSOBUF = 2, 140 IBD_WQE_MAPPED = 3 141 } ibd_wqe_buftype_t; 142 143 /* 144 * Pre-registered copybuf used for send and receive 145 */ 146 typedef struct ibd_copybuf_s { 147 ibt_wr_ds_t ic_sgl; 148 uint8_t *ic_bufaddr; 149 } ibd_copybuf_t; 150 151 typedef struct ibd_wqe_s { 152 struct ibd_wqe_s *w_next; 153 ibd_wqe_type_t w_type; 154 ibd_copybuf_t w_copybuf; 155 mblk_t *im_mblk; 156 } ibd_wqe_t; 157 158 /* 159 * Send WQE 160 */ 161 typedef struct ibd_swqe_s { 162 ibd_wqe_t w_ibd_swqe; 163 ibd_wqe_buftype_t w_buftype; 164 ibt_send_wr_t w_swr; 165 ibd_ace_t *w_ahandle; 166 ibt_mi_hdl_t w_mi_hdl; 167 ibt_wr_ds_t w_sgl[IBD_MAX_SQSEG]; 168 } ibd_swqe_t; 169 170 #define swqe_next w_ibd_swqe.w_next 171 #define swqe_type w_ibd_swqe.w_type 172 #define swqe_copybuf w_ibd_swqe.w_copybuf 173 #define swqe_im_mblk w_ibd_swqe.im_mblk 174 #define SWQE_TO_WQE(swqe) (ibd_wqe_t *)&((swqe)->w_ibd_swqe) 175 #define WQE_TO_SWQE(wqe) (ibd_swqe_t *)wqe 176 177 /* 178 * Receive WQE 179 */ 180 typedef struct ibd_rwqe_s { 181 ibd_wqe_t w_ibd_rwqe; 182 struct ibd_state_s *w_state; 183 ibt_recv_wr_t w_rwr; 184 boolean_t w_freeing_wqe; 185 frtn_t w_freemsg_cb; 186 } ibd_rwqe_t; 187 188 #define rwqe_next w_ibd_rwqe.w_next 189 #define rwqe_type w_ibd_rwqe.w_type 190 #define rwqe_copybuf w_ibd_rwqe.w_copybuf 191 #define rwqe_im_mblk w_ibd_rwqe.im_mblk 192 #define RWQE_TO_WQE(rwqe) (ibd_wqe_t *)&((rwqe)->w_ibd_rwqe) 193 #define WQE_TO_RWQE(wqe) (ibd_rwqe_t *)wqe 194 195 typedef struct ibd_list_s { 196 kmutex_t dl_mutex; 197 ibd_wqe_t *dl_head; 198 union { 199 boolean_t pending_sends; 200 uint32_t bufs_outstanding; 201 } ustat; 202 uint32_t dl_cnt; 203 } ibd_list_t; 204 205 #define dl_pending_sends ustat.pending_sends 206 #define dl_bufs_outstanding ustat.bufs_outstanding 207 208 /* 209 * LSO buffers 210 * 211 * Under normal circumstances we should never need to use any buffer 212 * that's larger than MTU. Unfortunately, IB HCA has limitations 213 * on the length of SGL that are much smaller than those for regular 214 * ethernet NICs. Since the network layer doesn't care to limit the 215 * number of mblk fragments in any send mp chain, we end up having to 216 * use these larger-than-MTU sized (larger than id_tx_buf_sz actually) 217 * buffers occasionally. 218 */ 219 typedef struct ibd_lsobuf_s { 220 struct ibd_lsobuf_s *lb_next; 221 uint8_t *lb_buf; 222 int lb_isfree; 223 } ibd_lsobuf_t; 224 225 typedef struct ibd_lsobkt_s { 226 uint8_t *bkt_mem; 227 ibd_lsobuf_t *bkt_bufl; 228 ibd_lsobuf_t *bkt_free_head; 229 ibt_mr_hdl_t bkt_mr_hdl; 230 ibt_mr_desc_t bkt_mr_desc; 231 uint_t bkt_nelem; 232 uint_t bkt_nfree; 233 } ibd_lsobkt_t; 234 235 /* 236 * Posting to a single software rx post queue is contentious, 237 * so break it out to (multiple) an array of queues. 238 * 239 * Try to ensure rx_queue structs fall in different cache lines using a filler. 240 * Note: the RX_QUEUE_CACHE_LINE needs to change if the struct changes. 241 */ 242 #define RX_QUEUE_CACHE_LINE \ 243 (64 - ((sizeof (kmutex_t) + 2 * sizeof (ibd_wqe_t *) + \ 244 2 * sizeof (uint32_t)))) 245 typedef struct ibd_rx_queue_s { 246 kmutex_t rx_post_lock; 247 ibd_wqe_t *rx_head; 248 ibd_wqe_t *rx_tail; 249 uint32_t rx_stat; 250 uint32_t rx_cnt; 251 uint8_t rx_cache_filler[RX_QUEUE_CACHE_LINE]; 252 } ibd_rx_queue_t; 253 254 /* 255 * This structure maintains information per port per HCA 256 * (per network interface). 257 */ 258 typedef struct ibd_state_s { 259 dev_info_t *id_dip; 260 ibt_clnt_hdl_t id_ibt_hdl; 261 ibt_hca_hdl_t id_hca_hdl; 262 ibt_pd_hdl_t id_pd_hdl; 263 kmem_cache_t *id_req_kmc; 264 265 ibd_list_t id_tx_rel_list; 266 267 uint32_t id_max_sqseg; 268 uint32_t id_max_sqseg_hiwm; 269 ibd_list_t id_tx_list; 270 ddi_softintr_t id_tx; 271 uint32_t id_tx_sends; 272 273 kmutex_t id_txpost_lock; 274 ibd_swqe_t *id_tx_head; 275 ibd_swqe_t *id_tx_tail; 276 int id_tx_busy; 277 278 uint_t id_tx_buf_sz; 279 uint8_t *id_tx_bufs; 280 ibd_swqe_t *id_tx_wqes; 281 ibt_mr_hdl_t id_tx_mr_hdl; 282 ibt_mr_desc_t id_tx_mr_desc; 283 284 kmutex_t id_lso_lock; 285 ibd_lsobkt_t *id_lso; 286 287 kmutex_t id_scq_poll_lock; 288 int id_scq_poll_busy; 289 290 ibt_cq_hdl_t id_scq_hdl; 291 ibt_wc_t *id_txwcs; 292 uint32_t id_txwcs_size; 293 294 kmutex_t id_rx_post_lock; 295 int id_rx_post_busy; 296 int id_rx_nqueues; 297 ibd_rx_queue_t *id_rx_queues; 298 ibd_wqe_t *id_rx_post_head; 299 300 ibd_rwqe_t *id_rx_wqes; 301 uint8_t *id_rx_bufs; 302 ibt_mr_hdl_t id_rx_mr_hdl; 303 ibt_mr_desc_t id_rx_mr_desc; 304 uint_t id_rx_buf_sz; 305 uint32_t id_num_rwqe; 306 ibd_list_t id_rx_list; 307 ddi_softintr_t id_rx; 308 uint32_t id_rx_bufs_outstanding_limit; 309 uint32_t id_rx_allocb; 310 uint32_t id_rx_allocb_failed; 311 ibd_list_t id_rx_free_list; 312 313 kmutex_t id_rcq_poll_lock; 314 int id_rcq_poll_busy; 315 uint32_t id_rxwcs_size; 316 ibt_wc_t *id_rxwcs; 317 ibt_cq_hdl_t id_rcq_hdl; 318 319 ibt_channel_hdl_t id_chnl_hdl; 320 ib_pkey_t id_pkey; 321 uint16_t id_pkix; 322 uint8_t id_port; 323 ibt_mcg_info_t *id_mcinfo; 324 325 mac_handle_t id_mh; 326 mac_resource_handle_t id_rh; 327 ib_gid_t id_sgid; 328 ib_qpn_t id_qpnum; 329 ipoib_mac_t id_macaddr; 330 ib_gid_t id_mgid; 331 ipoib_mac_t id_bcaddr; 332 333 int id_mtu; 334 uchar_t id_scope; 335 336 kmutex_t id_acache_req_lock; 337 kcondvar_t id_acache_req_cv; 338 struct list id_req_list; 339 kt_did_t id_async_thrid; 340 341 kmutex_t id_ac_mutex; 342 ibd_ace_t *id_ac_hot_ace; 343 struct list id_ah_active; 344 struct list id_ah_free; 345 ipoib_mac_t id_ah_addr; 346 ibd_req_t id_ah_req; 347 char id_ah_op; 348 uint64_t id_ah_error; 349 ibd_ace_t *id_ac_list; 350 mod_hash_t *id_ah_active_hash; 351 352 kmutex_t id_mc_mutex; 353 struct list id_mc_full; 354 struct list id_mc_non; 355 356 kmutex_t id_trap_lock; 357 kcondvar_t id_trap_cv; 358 boolean_t id_trap_stop; 359 uint32_t id_trap_inprog; 360 361 char id_prom_op; 362 363 kmutex_t id_sched_lock; 364 int id_sched_needed; 365 int id_sched_cnt; 366 int id_sched_lso_cnt; 367 368 kmutex_t id_link_mutex; 369 link_state_t id_link_state; 370 uint64_t id_link_speed; 371 372 uint64_t id_num_intrs; 373 uint64_t id_tx_short; 374 uint32_t id_num_swqe; 375 376 uint64_t id_xmt_bytes; 377 uint64_t id_rcv_bytes; 378 uint64_t id_multi_xmt; 379 uint64_t id_brd_xmt; 380 uint64_t id_multi_rcv; 381 uint64_t id_brd_rcv; 382 uint64_t id_xmt_pkt; 383 uint64_t id_rcv_pkt; 384 385 uint32_t id_hwcksum_capab; 386 boolean_t id_lso_policy; 387 boolean_t id_lso_capable; 388 uint_t id_lso_maxlen; 389 int id_hca_res_lkey_capab; 390 ibt_lkey_t id_res_lkey; 391 392 boolean_t id_bgroup_created; 393 kmutex_t id_macst_lock; 394 kcondvar_t id_macst_cv; 395 uint32_t id_mac_state; 396 } ibd_state_t; 397 398 #endif /* _KERNEL && !_BOOT */ 399 400 #ifdef __cplusplus 401 } 402 #endif 403 404 #endif /* _SYS_IB_CLIENTS_IBD_H */ 405