1 /* 2 * Copyright (c) 2017-2018 Cavium, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * POSSIBILITY OF SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 * 29 */ 30 31 #ifndef __ECORE_RDMA_H__ 32 #define __ECORE_RDMA_H__ 33 34 #include "ecore_status.h" 35 #include "ecore.h" 36 #include "ecore_hsi_common.h" 37 #include "ecore_proto_if.h" 38 #include "ecore_roce_api.h" 39 #include "ecore_dev_api.h" 40 41 /* Constants */ 42 43 /* HW/FW RoCE Limitations (internal. For external see ecore_rdma_api.h) */ 44 #define ECORE_RDMA_MAX_FMR (RDMA_MAX_TIDS) /* 2^17 - 1 */ 45 #define ECORE_RDMA_MAX_P_KEY (1) 46 #define ECORE_RDMA_MAX_WQE (0x7FFF) /* 2^15 -1 */ 47 #define ECORE_RDMA_MAX_SRQ_WQE_ELEM (0x7FFF) /* 2^15 -1 */ 48 #define ECORE_RDMA_PAGE_SIZE_CAPS (0xFFFFF000) /* TODO: > 4k?! */ 49 #define ECORE_RDMA_ACK_DELAY (15) /* 131 milliseconds */ 50 #define ECORE_RDMA_MAX_MR_SIZE (0x10000000000ULL) /* 2^40 */ 51 #define ECORE_RDMA_MAX_CQS (RDMA_MAX_CQS) /* 64k */ 52 #define ECORE_RDMA_MAX_MRS (RDMA_MAX_TIDS) /* 2^17 - 1 */ 53 /* Add 1 for header element */ 54 #define ECORE_RDMA_MAX_SRQ_ELEM_PER_WQE (RDMA_MAX_SGE_PER_RQ_WQE + 1) 55 #define ECORE_RDMA_MAX_SGE_PER_SRQ_WQE (RDMA_MAX_SGE_PER_RQ_WQE) 56 #define ECORE_RDMA_SRQ_WQE_ELEM_SIZE (16) 57 #define ECORE_RDMA_MAX_SRQS (32 * 1024) /* 32k */ 58 59 /* Configurable */ 60 /* Max CQE is derived from u16/32 size, halved and decremented by 1 to handle 61 * wrap properly and then decremented by 1 again. The latter decrement comes 62 * from a requirement to create a chain that is bigger than what the user 63 * requested by one: 64 * The CQE size is 32 bytes but the FW writes in chunks of 64 65 * bytes, for performance purposes. Allocating an extra entry and telling the 66 * FW we have less prevents overwriting the first entry in case of a wrap i.e. 67 * when the FW writes the last entry and the application hasn't read the first 68 * one. 69 */ 70 #define ECORE_RDMA_MAX_CQE_32_BIT (0x7FFFFFFF - 1) 71 #define ECORE_RDMA_MAX_CQE_16_BIT (0x7FFF - 1) 72 73 enum ecore_rdma_toggle_bit { 74 ECORE_RDMA_TOGGLE_BIT_CLEAR = 0, 75 ECORE_RDMA_TOGGLE_BIT_SET = 1 76 }; 77 78 /* @@@TBD Currently we support only affilited events 79 * enum ecore_rdma_unaffiliated_event_code { 80 * ECORE_RDMA_PORT_ACTIVE, // Link Up 81 * ECORE_RDMA_PORT_CHANGED, // SGID table has changed 82 * ECORE_RDMA_LOCAL_CATASTROPHIC_ERR, // Fatal device error 83 * ECORE_RDMA_PORT_ERR, // Link down 84 * }; 85 */ 86 87 #define QEDR_MAX_BMAP_NAME (10) 88 struct ecore_bmap { 89 u32 max_count; 90 unsigned long *bitmap; 91 char name[QEDR_MAX_BMAP_NAME]; 92 }; 93 94 /* functions for enabling/disabling edpm in rdma PFs according to existence of 95 * qps during DCBx update or bar size 96 */ 97 void ecore_roce_dpm_dcbx(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt); 98 void ecore_rdma_dpm_bar(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt); 99 100 #ifdef CONFIG_ECORE_IWARP 101 102 #define ECORE_IWARP_PREALLOC_CNT (256) 103 104 #define ECORE_IWARP_LL2_SYN_TX_SIZE (128) 105 #define ECORE_IWARP_LL2_SYN_RX_SIZE (256) 106 107 #define ECORE_IWARP_LL2_OOO_DEF_TX_SIZE (256) 108 #define ECORE_IWARP_LL2_OOO_DEF_RX_SIZE (4096) 109 #define ECORE_IWARP_LL2_OOO_MAX_RX_SIZE (16384) 110 111 #define ECORE_IWARP_MAX_SYN_PKT_SIZE (128) 112 #define ECORE_IWARP_HANDLE_INVAL (0xff) 113 114 struct ecore_iwarp_ll2_buff { 115 struct ecore_iwarp_ll2_buff *piggy_buf; 116 void *data; 117 dma_addr_t data_phys_addr; 118 u32 buff_size; 119 }; 120 121 struct ecore_iwarp_ll2_mpa_buf { 122 osal_list_entry_t list_entry; 123 struct ecore_iwarp_ll2_buff *ll2_buf; 124 struct unaligned_opaque_data data; 125 u16 tcp_payload_len; 126 u8 placement_offset; 127 }; 128 129 /* In some cases a fpdu will arrive with only one byte of the header, in this 130 * case the fpdu_length will be partial ( contain only higher byte and 131 * incomplete bytes will contain the invalid value */ 132 #define ECORE_IWARP_INVALID_INCOMPLETE_BYTES 0xffff 133 134 struct ecore_iwarp_fpdu { 135 struct ecore_iwarp_ll2_buff *mpa_buf; 136 dma_addr_t pkt_hdr; 137 u8 pkt_hdr_size; 138 dma_addr_t mpa_frag; 139 void *mpa_frag_virt; 140 u16 mpa_frag_len; 141 u16 fpdu_length; 142 u16 incomplete_bytes; 143 }; 144 145 struct ecore_iwarp_info { 146 osal_list_t listen_list; /* ecore_iwarp_listener */ 147 osal_list_t ep_list; /* ecore_iwarp_ep */ 148 osal_list_t ep_free_list;/* pre-allocated ep's */ 149 osal_list_t mpa_buf_list;/* list of mpa_bufs */ 150 osal_list_t mpa_buf_pending_list; 151 osal_spinlock_t iw_lock; 152 osal_spinlock_t qp_lock; /* for teardown races */ 153 struct iwarp_rxmit_stats_drv stats; 154 u32 rcv_wnd_scale; 155 u16 max_mtu; 156 u16 num_ooo_rx_bufs; 157 u8 mac_addr[ETH_ALEN]; 158 u8 crc_needed; 159 u8 tcp_flags; 160 u8 ll2_syn_handle; 161 u8 ll2_ooo_handle; 162 u8 ll2_mpa_handle; 163 u8 peer2peer; 164 u8 _pad; 165 enum mpa_negotiation_mode mpa_rev; 166 enum mpa_rtr_type rtr_type; 167 struct ecore_iwarp_fpdu *partial_fpdus; 168 struct ecore_iwarp_ll2_mpa_buf *mpa_bufs; 169 u8 *mpa_intermediate_buf; 170 u16 max_num_partial_fpdus; 171 172 /* MPA statistics */ 173 u64 unalign_rx_comp; 174 }; 175 #endif 176 177 #define IS_ECORE_DCQCN(p_hwfn) \ 178 (!!(p_hwfn->pf_params.rdma_pf_params.enable_dcqcn)) 179 180 struct ecore_roce_info { 181 struct roce_events_stats event_stats; 182 183 u8 dcqcn_enabled; 184 u8 dcqcn_reaction_point; 185 }; 186 187 struct ecore_rdma_info { 188 osal_spinlock_t lock; 189 190 struct ecore_bmap cq_map; 191 struct ecore_bmap pd_map; 192 struct ecore_bmap tid_map; 193 struct ecore_bmap srq_map; 194 struct ecore_bmap cid_map; 195 struct ecore_bmap tcp_cid_map; 196 struct ecore_bmap real_cid_map; 197 struct ecore_bmap dpi_map; 198 struct ecore_bmap toggle_bits; 199 struct ecore_rdma_events events; 200 struct ecore_rdma_device *dev; 201 struct ecore_rdma_port *port; 202 u32 last_tid; 203 u8 num_cnqs; 204 struct rdma_sent_stats rdma_sent_pstats; 205 struct rdma_rcv_stats rdma_rcv_tstats; 206 u32 num_qps; 207 u32 num_mrs; 208 u32 num_srqs; 209 u16 queue_zone_base; 210 u16 max_queue_zones; 211 enum protocol_type proto; 212 struct ecore_roce_info roce; 213 #ifdef CONFIG_ECORE_IWARP 214 struct ecore_iwarp_info iwarp; 215 #endif 216 }; 217 218 #ifdef CONFIG_ECORE_IWARP 219 enum ecore_iwarp_qp_state { 220 ECORE_IWARP_QP_STATE_IDLE, 221 ECORE_IWARP_QP_STATE_RTS, 222 ECORE_IWARP_QP_STATE_TERMINATE, 223 ECORE_IWARP_QP_STATE_CLOSING, 224 ECORE_IWARP_QP_STATE_ERROR, 225 }; 226 #endif 227 228 struct ecore_rdma_qp { 229 struct regpair qp_handle; 230 struct regpair qp_handle_async; 231 u32 qpid; /* iwarp: may differ from icid */ 232 u16 icid; 233 enum ecore_roce_qp_state cur_state; 234 #ifdef CONFIG_ECORE_IWARP 235 enum ecore_iwarp_qp_state iwarp_state; 236 #endif 237 bool use_srq; 238 bool signal_all; 239 bool fmr_and_reserved_lkey; 240 241 bool incoming_rdma_read_en; 242 bool incoming_rdma_write_en; 243 bool incoming_atomic_en; 244 bool e2e_flow_control_en; 245 246 u16 pd; /* Protection domain */ 247 u16 pkey; /* Primary P_key index */ 248 u32 dest_qp; 249 u16 mtu; 250 u16 srq_id; 251 u8 traffic_class_tos; /* IPv6/GRH traffic class; IPv4 TOS */ 252 u8 hop_limit_ttl; /* IPv6/GRH hop limit; IPv4 TTL */ 253 u16 dpi; 254 u32 flow_label; /* ignored in IPv4 */ 255 u16 vlan_id; 256 u32 ack_timeout; 257 u8 retry_cnt; 258 u8 rnr_retry_cnt; 259 u8 min_rnr_nak_timer; 260 bool sqd_async; 261 union ecore_gid sgid; /* GRH SGID; IPv4/6 Source IP */ 262 union ecore_gid dgid; /* GRH DGID; IPv4/6 Destination IP */ 263 enum roce_mode roce_mode; 264 u16 udp_src_port; /* RoCEv2 only */ 265 u8 stats_queue; 266 267 /* requeseter */ 268 u8 max_rd_atomic_req; 269 u32 sq_psn; 270 u16 sq_cq_id; /* The cq to be associated with the send queue*/ 271 u16 sq_num_pages; 272 dma_addr_t sq_pbl_ptr; 273 void *orq; 274 dma_addr_t orq_phys_addr; 275 u8 orq_num_pages; 276 bool req_offloaded; 277 278 /* responder */ 279 u8 max_rd_atomic_resp; 280 u32 rq_psn; 281 u16 rq_cq_id; /* The cq to be associated with the receive queue */ 282 u16 rq_num_pages; 283 dma_addr_t rq_pbl_ptr; 284 void *irq; 285 dma_addr_t irq_phys_addr; 286 u8 irq_num_pages; 287 bool resp_offloaded; 288 u32 cq_prod; 289 290 u8 remote_mac_addr[6]; 291 u8 local_mac_addr[6]; 292 293 void *shared_queue; 294 dma_addr_t shared_queue_phys_addr; 295 #ifdef CONFIG_ECORE_IWARP 296 struct ecore_iwarp_ep *ep; 297 #endif 298 }; 299 300 #ifdef CONFIG_ECORE_IWARP 301 302 enum ecore_iwarp_ep_state { 303 ECORE_IWARP_EP_INIT, 304 ECORE_IWARP_EP_MPA_REQ_RCVD, 305 ECORE_IWARP_EP_ESTABLISHED, 306 ECORE_IWARP_EP_CLOSED 307 }; 308 309 union async_output { 310 struct iwarp_eqe_data_mpa_async_completion mpa_response; 311 struct iwarp_eqe_data_tcp_async_completion mpa_request; 312 }; 313 314 #define ECORE_MAX_PRIV_DATA_LEN (512) 315 struct ecore_iwarp_ep_memory { 316 u8 in_pdata[ECORE_MAX_PRIV_DATA_LEN]; 317 u8 out_pdata[ECORE_MAX_PRIV_DATA_LEN]; 318 union async_output async_output; 319 }; 320 321 /* Endpoint structure represents a TCP connection. This connection can be 322 * associated with a QP or not (in which case QP==NULL) 323 */ 324 struct ecore_iwarp_ep { 325 osal_list_entry_t list_entry; 326 int sig; 327 struct ecore_rdma_qp *qp; 328 enum ecore_iwarp_ep_state state; 329 330 /* This contains entire buffer required for ep memories. This is the 331 * only one actually allocated and freed. The rest are pointers into 332 * this buffer 333 */ 334 struct ecore_iwarp_ep_memory *ep_buffer_virt; 335 dma_addr_t ep_buffer_phys; 336 337 struct ecore_iwarp_cm_info cm_info; 338 enum tcp_connect_mode connect_mode; 339 enum mpa_rtr_type rtr_type; 340 enum mpa_negotiation_mode mpa_rev; 341 u32 tcp_cid; 342 u32 cid; 343 u8 remote_mac_addr[6]; 344 u8 local_mac_addr[6]; 345 u16 mss; 346 bool mpa_reply_processed; 347 348 /* The event_cb function is called for asynchrounous events associated 349 * with the ep. It is initialized at different entry points depending 350 * on whether the ep is the tcp connection active side or passive side 351 * The cb_context is passed to the event_cb function. 352 */ 353 iwarp_event_handler event_cb; 354 void *cb_context; 355 356 /* For Passive side - syn packet related data */ 357 struct ecore_iwarp_ll2_buff *syn; 358 u16 syn_ip_payload_length; 359 dma_addr_t syn_phy_addr; 360 }; 361 362 struct ecore_iwarp_listener { 363 osal_list_entry_t list_entry; 364 365 /* The event_cb function is called for connection requests. 366 * The cb_context is passed to the event_cb function. 367 */ 368 iwarp_event_handler event_cb; 369 void *cb_context; 370 u32 max_backlog; 371 u8 ip_version; 372 u32 ip_addr[4]; 373 u16 port; 374 u16 vlan; 375 376 }; 377 378 void ecore_iwarp_async_event(struct ecore_hwfn *p_hwfn, 379 u8 fw_event_code, 380 struct regpair *fw_handle, 381 u8 fw_return_code); 382 383 #endif /* CONFIG_ECORE_IWARP */ 384 385 void ecore_roce_async_event(struct ecore_hwfn *p_hwfn, 386 u8 fw_event_code, 387 union rdma_eqe_data *rdma_data); 388 389 #endif /*__ECORE_RDMA_H__*/ 390