1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _SYS_IB_CLIENTS_IBD_H 28 #define _SYS_IB_CLIENTS_IBD_H 29 30 #ifdef __cplusplus 31 extern "C" { 32 #endif 33 34 /* 35 * IETF defined IPoIB encapsulation header, with 2b of ethertype 36 * followed by 2 reserved bytes. This is at the start of the 37 * datagram sent to and received over the wire by the driver. 38 */ 39 typedef struct ipoib_header { 40 ushort_t ipoib_type; 41 ushort_t ipoib_mbz; 42 } ipoib_hdr_t; 43 44 #define IPOIB_HDRSIZE sizeof (struct ipoib_header) 45 46 /* 47 * IETF defined IPoIB link address; IBA QPN, followed by GID, 48 * which has a prefix and suffix, as reported via ARP. 49 */ 50 typedef struct ipoib_mac { 51 uint32_t ipoib_qpn; 52 uint32_t ipoib_gidpref[2]; 53 uint32_t ipoib_gidsuff[2]; 54 } ipoib_mac_t; 55 56 #define IPOIB_ADDRL sizeof (struct ipoib_mac) 57 58 /* 59 * Pseudo header prepended to datagram in DLIOCRAW transmit path 60 * and when GLD hands the datagram to the gldm_send entry point. 61 */ 62 typedef struct ipoib_ptxhdr { 63 ipoib_mac_t ipoib_dest; 64 ipoib_hdr_t ipoib_rhdr; 65 } ipoib_ptxhdr_t; 66 67 #define IPOIBDLSAP(p, offset) ((ipoib_ptxhdr_t *)((caddr_t)(p)+offset)) 68 69 /* 70 * The pseudo-GRH structure that sits before the data in the 71 * receive buffer, and is overlaid on top of the real GRH. 72 * The driver sets the ipoib_vertcflow to 0 if the pseudo-GRH 73 * does not hold valid information. If it is indicated valid, 74 * the driver must additionally provide the sender's qpn in 75 * network byte order in ipoib_sqpn, and not touch the 76 * remaining parts which were DMA'ed in by the IBA hardware. 77 */ 78 typedef struct ipoib_pgrh { 79 uint32_t ipoib_vertcflow; 80 uint32_t ipoib_sqpn; 81 uint32_t ipoib_sgid_pref[2]; 82 uint32_t ipoib_sgid_suff[2]; 83 uint32_t ipoib_dgid_pref[2]; 84 uint32_t ipoib_dgid_suff[2]; 85 } ipoib_pgrh_t; 86 87 /* 88 * The GRH is also dma'ed into recv buffers, thus space needs 89 * to be allocated for them. 90 */ 91 #define IPOIB_GRH_SIZE sizeof (ipoib_pgrh_t) 92 93 #if defined(_KERNEL) && !defined(_BOOT) 94 95 #include <sys/ib/ibtl/ibti.h> 96 #include <sys/ib/ib_pkt_hdrs.h> 97 #include <sys/list.h> 98 #include <sys/mac_provider.h> 99 #include <sys/mac_ib.h> 100 #include <sys/modhash.h> 101 102 /* 103 * Structure to encapsulate various types of async requests. 104 */ 105 typedef struct ibd_acache_rq { 106 struct list_node rq_list; /* list of pending work */ 107 int rq_op; /* what operation */ 108 ipoib_mac_t rq_mac; 109 ib_gid_t rq_gid; 110 void *rq_ptr; 111 } ibd_req_t; 112 113 typedef struct ibd_mcache { 114 struct list_node mc_list; /* full/non list */ 115 uint8_t mc_jstate; 116 boolean_t mc_fullreap; 117 ibt_mcg_info_t mc_info; 118 ibd_req_t mc_req; /* to queue LEAVE req */ 119 } ibd_mce_t; 120 121 typedef struct ibd_acache_s { 122 struct list_node ac_list; /* free/active list */ 123 ibt_ud_dest_hdl_t ac_dest; 124 ipoib_mac_t ac_mac; 125 uint32_t ac_ref; 126 ibd_mce_t *ac_mce; /* for MCG AHs */ 127 } ibd_ace_t; 128 129 #define IBD_MAX_SQSEG 59 130 #define IBD_MAX_RQSEG 1 131 132 typedef enum { 133 IBD_WQE_SEND, 134 IBD_WQE_RECV 135 } ibd_wqe_type_t; 136 137 typedef enum { 138 IBD_WQE_TXBUF = 1, 139 IBD_WQE_LSOBUF = 2, 140 IBD_WQE_MAPPED = 3 141 } ibd_wqe_buftype_t; 142 143 /* 144 * Pre-registered copybuf used for send and receive 145 */ 146 typedef struct ibd_copybuf_s { 147 ibt_mr_hdl_t ic_mr_hdl; 148 ibt_wr_ds_t ic_sgl; 149 ibt_mr_desc_t ic_mr_desc; 150 uint8_t *ic_bufaddr; 151 } ibd_copybuf_t; 152 153 typedef struct ibd_wqe_s { 154 struct ibd_wqe_s *w_next; 155 struct ibd_wqe_s *w_prev; 156 ibd_wqe_type_t w_type; 157 ibd_copybuf_t w_copybuf; 158 mblk_t *im_mblk; 159 } ibd_wqe_t; 160 161 /* 162 * Send WQE 163 */ 164 typedef struct ibd_swqe_s { 165 ibd_wqe_t w_ibd_swqe; 166 ibd_wqe_buftype_t w_buftype; 167 ibt_send_wr_t w_swr; 168 ibd_ace_t *w_ahandle; 169 ibt_mi_hdl_t w_mi_hdl; 170 ibt_wr_ds_t w_sgl[IBD_MAX_SQSEG]; 171 } ibd_swqe_t; 172 173 #define swqe_next w_ibd_swqe.w_next 174 #define swqe_prev w_ibd_swqe.w_prev 175 #define swqe_type w_ibd_swqe.w_type 176 #define swqe_copybuf w_ibd_swqe.w_copybuf 177 #define swqe_im_mblk w_ibd_swqe.im_mblk 178 #define SWQE_TO_WQE(swqe) (ibd_wqe_t *)&((swqe)->w_ibd_swqe) 179 #define WQE_TO_SWQE(wqe) (ibd_swqe_t *)wqe 180 181 /* 182 * Receive WQE 183 */ 184 typedef struct ibd_rwqe_s { 185 ibd_wqe_t w_ibd_rwqe; 186 struct ibd_state_s *w_state; 187 ibt_recv_wr_t w_rwr; 188 boolean_t w_freeing_wqe; 189 frtn_t w_freemsg_cb; 190 ibd_wqe_t *w_post_link; 191 } ibd_rwqe_t; 192 193 #define rwqe_next w_ibd_rwqe.w_next 194 #define rwqe_prev w_ibd_rwqe.w_prev 195 #define rwqe_type w_ibd_rwqe.w_type 196 #define rwqe_copybuf w_ibd_rwqe.w_copybuf 197 #define rwqe_im_mblk w_ibd_rwqe.im_mblk 198 #define RWQE_TO_WQE(rwqe) (ibd_wqe_t *)&((rwqe)->w_ibd_rwqe) 199 #define WQE_TO_RWQE(wqe) (ibd_rwqe_t *)wqe 200 201 typedef struct ibd_list_s { 202 ibd_wqe_t *dl_head; 203 ibd_wqe_t *dl_tail; 204 union { 205 boolean_t pending_sends; 206 uint32_t bufs_outstanding; 207 } ustat; 208 uint32_t dl_cnt; 209 kmutex_t dl_mutex; 210 } ibd_list_t; 211 212 #define dl_pending_sends ustat.pending_sends 213 #define dl_bufs_outstanding ustat.bufs_outstanding 214 215 /* 216 * LSO buffers 217 * 218 * Under normal circumstances we should never need to use any buffer 219 * that's larger than MTU. Unfortunately, IB HCA has limitations 220 * on the length of SGL that are much smaller than those for regular 221 * ethernet NICs. Since the network layer doesn't care to limit the 222 * number of mblk fragments in any send mp chain, we end up having to 223 * use these larger-than-MTU sized (larger than id_tx_buf_sz actually) 224 * buffers occasionally. 225 */ 226 typedef struct ibd_lsobuf_s { 227 struct ibd_lsobuf_s *lb_next; 228 uint8_t *lb_buf; 229 int lb_isfree; 230 } ibd_lsobuf_t; 231 232 typedef struct ibd_lsobkt_s { 233 uint8_t *bkt_mem; 234 ibd_lsobuf_t *bkt_bufl; 235 ibd_lsobuf_t *bkt_free_head; 236 ibt_mr_hdl_t bkt_mr_hdl; 237 ibt_mr_desc_t bkt_mr_desc; 238 uint_t bkt_nelem; 239 uint_t bkt_nfree; 240 } ibd_lsobkt_t; 241 242 /* 243 * This structure maintains information per port per HCA 244 * (per network interface). 245 */ 246 typedef struct ibd_state_s { 247 dev_info_t *id_dip; 248 ibt_clnt_hdl_t id_ibt_hdl; 249 ibt_hca_hdl_t id_hca_hdl; 250 ibt_pd_hdl_t id_pd_hdl; 251 kmem_cache_t *id_req_kmc; 252 253 uint32_t id_max_sqseg; 254 ibd_list_t id_tx_list; 255 ddi_softintr_t id_tx; 256 uint32_t id_tx_sends; 257 258 uint8_t *id_tx_bufs; 259 ibt_mr_hdl_t id_tx_mr_hdl; 260 ibt_mr_desc_t id_tx_mr_desc; 261 uint_t id_tx_buf_sz; 262 263 kmutex_t id_lso_lock; 264 ibd_lsobkt_t *id_lso; 265 266 kmutex_t id_cq_poll_lock; 267 int id_cq_poll_busy; 268 269 ibt_cq_hdl_t id_scq_hdl; 270 ibt_wc_t *id_txwcs; 271 uint32_t id_txwcs_size; 272 273 kmutex_t id_txpost_lock; 274 ibd_swqe_t *id_tx_head; 275 ibd_wqe_t **id_tx_tailp; 276 int id_tx_busy; 277 278 kmutex_t id_rxpost_lock; 279 ibd_rwqe_t *id_rx_head; 280 ibd_wqe_t **id_rx_tailp; 281 int id_rx_busy; 282 283 kmutex_t id_rx_lock; 284 mblk_t *id_rx_mp; 285 mblk_t *id_rx_mp_tail; 286 uint32_t id_rx_mp_len; 287 288 uint32_t id_num_rwqe; 289 ibd_list_t id_rx_list; 290 ddi_softintr_t id_rx; 291 ibt_cq_hdl_t id_rcq_hdl; 292 ibt_wc_t *id_rxwcs; 293 uint32_t id_rxwcs_size; 294 295 ibt_channel_hdl_t id_chnl_hdl; 296 ib_pkey_t id_pkey; 297 uint16_t id_pkix; 298 uint8_t id_port; 299 ibt_mcg_info_t *id_mcinfo; 300 301 mac_handle_t id_mh; 302 mac_resource_handle_t id_rh; 303 ib_gid_t id_sgid; 304 ib_qpn_t id_qpnum; 305 ipoib_mac_t id_macaddr; 306 ib_gid_t id_mgid; 307 ipoib_mac_t id_bcaddr; 308 309 int id_mtu; 310 uchar_t id_scope; 311 312 kmutex_t id_acache_req_lock; 313 kcondvar_t id_acache_req_cv; 314 struct list id_req_list; 315 kt_did_t id_async_thrid; 316 317 kmutex_t id_ac_mutex; 318 struct list id_ah_active; 319 struct list id_ah_free; 320 ipoib_mac_t id_ah_addr; 321 ibd_req_t id_ah_req; 322 char id_ah_op; 323 uint64_t id_ah_error; 324 ibd_ace_t *id_ac_list; 325 mod_hash_t *id_ah_active_hash; 326 327 kmutex_t id_mc_mutex; 328 struct list id_mc_full; 329 struct list id_mc_non; 330 331 kmutex_t id_trap_lock; 332 kcondvar_t id_trap_cv; 333 boolean_t id_trap_stop; 334 uint32_t id_trap_inprog; 335 336 char id_prom_op; 337 338 kmutex_t id_sched_lock; 339 int id_sched_needed; 340 341 kmutex_t id_link_mutex; 342 link_state_t id_link_state; 343 uint64_t id_link_speed; 344 345 uint64_t id_num_intrs; 346 uint64_t id_tx_short; 347 uint32_t id_num_swqe; 348 349 uint64_t id_xmt_bytes; 350 uint64_t id_rcv_bytes; 351 uint64_t id_multi_xmt; 352 uint64_t id_brd_xmt; 353 uint64_t id_multi_rcv; 354 uint64_t id_brd_rcv; 355 uint64_t id_xmt_pkt; 356 uint64_t id_rcv_pkt; 357 358 uint32_t id_hwcksum_capab; 359 boolean_t id_lso_policy; 360 boolean_t id_lso_capable; 361 uint_t id_lso_maxlen; 362 int id_hca_res_lkey_capab; 363 ibt_lkey_t id_res_lkey; 364 365 boolean_t id_bgroup_created; 366 kmutex_t id_macst_lock; 367 kcondvar_t id_macst_cv; 368 uint32_t id_mac_state; 369 } ibd_state_t; 370 371 #endif /* _KERNEL && !_BOOT */ 372 373 #ifdef __cplusplus 374 } 375 #endif 376 377 #endif /* _SYS_IB_CLIENTS_IBD_H */ 378