1 /* 2 * Copyright (c) 2017 Mellanox Technologies, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #ifndef _MLX5DV_H_ 34 #define _MLX5DV_H_ 35 36 #include <infiniband/types.h> /* For the __be64 type */ 37 #include <infiniband/endian.h> 38 39 #if defined(__SSE3__) 40 #include <emmintrin.h> 41 #include <tmmintrin.h> 42 #endif /* defined(__SSE3__) */ 43 44 #include <infiniband/verbs.h> 45 46 /* Always inline the functions */ 47 #ifdef __GNUC__ 48 #define MLX5DV_ALWAYS_INLINE inline __attribute__((always_inline)) 49 #else 50 #define MLX5DV_ALWAYS_INLINE inline 51 #endif 52 53 enum { 54 MLX5_RCV_DBR = 0, 55 MLX5_SND_DBR = 1, 56 }; 57 58 enum mlx5dv_context_comp_mask { 59 MLX5DV_CONTEXT_MASK_CQE_COMPRESION = 1 << 0, 60 MLX5DV_CONTEXT_MASK_RESERVED = 1 << 1, 61 }; 62 63 struct mlx5dv_cqe_comp_caps { 64 uint32_t max_num; 65 uint32_t supported_format; /* enum mlx5dv_cqe_comp_res_format */ 66 }; 67 68 /* 69 * Direct verbs device-specific attributes 70 */ 71 struct mlx5dv_context { 72 uint8_t version; 73 uint64_t flags; 74 uint64_t comp_mask; 75 struct mlx5dv_cqe_comp_caps cqe_comp_caps; 76 }; 77 78 enum mlx5dv_context_flags { 79 /* 80 * This flag indicates if CQE version 0 or 1 is needed. 81 */ 82 MLX5DV_CONTEXT_FLAGS_CQE_V1 = (1 << 0), 83 MLX5DV_CONTEXT_FLAGS_MPW = (1 << 1), 84 }; 85 86 enum mlx5dv_cq_init_attr_mask { 87 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE = 1 << 0, 88 MLX5DV_CQ_INIT_ATTR_MASK_RESERVED = 1 << 1, 89 }; 90 91 struct mlx5dv_cq_init_attr { 92 uint64_t comp_mask; /* Use enum mlx5dv_cq_init_attr_mask */ 93 uint8_t cqe_comp_res_format; /* Use enum mlx5dv_cqe_comp_res_format */ 94 }; 95 96 struct ibv_cq_ex *mlx5dv_create_cq(struct ibv_context *context, 97 struct ibv_cq_init_attr_ex *cq_attr, 98 struct mlx5dv_cq_init_attr *mlx5_cq_attr); 99 /* 100 * Most device capabilities are exported by ibv_query_device(...), 101 * but there is HW device-specific information which is important 102 * for data-path, but isn't provided. 103 * 104 * Return 0 on success. 105 */ 106 int mlx5dv_query_device(struct ibv_context *ctx_in, 107 struct mlx5dv_context *attrs_out); 108 109 struct mlx5dv_qp { 110 uint32_t *dbrec; 111 struct { 112 void *buf; 113 uint32_t wqe_cnt; 114 uint32_t stride; 115 } sq; 116 struct { 117 void *buf; 118 uint32_t wqe_cnt; 119 uint32_t stride; 120 } rq; 121 struct { 122 void *reg; 123 uint32_t size; 124 } bf; 125 uint64_t comp_mask; 126 }; 127 128 struct mlx5dv_cq { 129 void *buf; 130 uint32_t *dbrec; 131 uint32_t cqe_cnt; 132 uint32_t cqe_size; 133 void *uar; 134 uint32_t cqn; 135 uint64_t comp_mask; 136 }; 137 138 struct mlx5dv_srq { 139 void *buf; 140 uint32_t *dbrec; 141 uint32_t stride; 142 uint32_t head; 143 uint32_t tail; 144 uint64_t comp_mask; 145 }; 146 147 struct mlx5dv_rwq { 148 void *buf; 149 uint32_t *dbrec; 150 uint32_t wqe_cnt; 151 uint32_t stride; 152 uint64_t comp_mask; 153 }; 154 155 struct mlx5dv_obj { 156 struct { 157 struct ibv_qp *in; 158 struct mlx5dv_qp *out; 159 } qp; 160 struct { 161 struct ibv_cq *in; 162 struct mlx5dv_cq *out; 163 } cq; 164 struct { 165 struct ibv_srq *in; 166 struct mlx5dv_srq *out; 167 } srq; 168 struct { 169 struct ibv_wq *in; 170 struct mlx5dv_rwq *out; 171 } rwq; 172 }; 173 174 enum mlx5dv_obj_type { 175 MLX5DV_OBJ_QP = 1 << 0, 176 MLX5DV_OBJ_CQ = 1 << 1, 177 MLX5DV_OBJ_SRQ = 1 << 2, 178 MLX5DV_OBJ_RWQ = 1 << 3, 179 }; 180 181 /* 182 * This function will initialize mlx5dv_xxx structs based on supplied type. 183 * The information for initialization is taken from ibv_xx structs supplied 184 * as part of input. 185 * 186 * Request information of CQ marks its owned by DV for all consumer index 187 * related actions. 188 * 189 * The initialization type can be combination of several types together. 190 * 191 * Return: 0 in case of success. 192 */ 193 int mlx5dv_init_obj(struct mlx5dv_obj *obj, uint64_t obj_type); 194 195 enum { 196 MLX5_OPCODE_NOP = 0x00, 197 MLX5_OPCODE_SEND_INVAL = 0x01, 198 MLX5_OPCODE_RDMA_WRITE = 0x08, 199 MLX5_OPCODE_RDMA_WRITE_IMM = 0x09, 200 MLX5_OPCODE_SEND = 0x0a, 201 MLX5_OPCODE_SEND_IMM = 0x0b, 202 MLX5_OPCODE_TSO = 0x0e, 203 MLX5_OPCODE_RDMA_READ = 0x10, 204 MLX5_OPCODE_ATOMIC_CS = 0x11, 205 MLX5_OPCODE_ATOMIC_FA = 0x12, 206 MLX5_OPCODE_ATOMIC_MASKED_CS = 0x14, 207 MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15, 208 MLX5_OPCODE_FMR = 0x19, 209 MLX5_OPCODE_LOCAL_INVAL = 0x1b, 210 MLX5_OPCODE_CONFIG_CMD = 0x1f, 211 MLX5_OPCODE_UMR = 0x25, 212 }; 213 214 /* 215 * CQE related part 216 */ 217 218 enum { 219 MLX5_INLINE_SCATTER_32 = 0x4, 220 MLX5_INLINE_SCATTER_64 = 0x8, 221 }; 222 223 enum { 224 MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01, 225 MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02, 226 MLX5_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04, 227 MLX5_CQE_SYNDROME_WR_FLUSH_ERR = 0x05, 228 MLX5_CQE_SYNDROME_MW_BIND_ERR = 0x06, 229 MLX5_CQE_SYNDROME_BAD_RESP_ERR = 0x10, 230 MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11, 231 MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12, 232 MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13, 233 MLX5_CQE_SYNDROME_REMOTE_OP_ERR = 0x14, 234 MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15, 235 MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16, 236 MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22, 237 }; 238 239 enum { 240 MLX5_CQE_L2_OK = 1 << 0, 241 MLX5_CQE_L3_OK = 1 << 1, 242 MLX5_CQE_L4_OK = 1 << 2, 243 }; 244 245 enum { 246 MLX5_CQE_L3_HDR_TYPE_NONE = 0x0, 247 MLX5_CQE_L3_HDR_TYPE_IPV6 = 0x1, 248 MLX5_CQE_L3_HDR_TYPE_IPV4 = 0x2, 249 }; 250 251 enum { 252 MLX5_CQE_OWNER_MASK = 1, 253 MLX5_CQE_REQ = 0, 254 MLX5_CQE_RESP_WR_IMM = 1, 255 MLX5_CQE_RESP_SEND = 2, 256 MLX5_CQE_RESP_SEND_IMM = 3, 257 MLX5_CQE_RESP_SEND_INV = 4, 258 MLX5_CQE_RESIZE_CQ = 5, 259 MLX5_CQE_REQ_ERR = 13, 260 MLX5_CQE_RESP_ERR = 14, 261 MLX5_CQE_INVALID = 15, 262 }; 263 264 enum { 265 MLX5_CQ_DOORBELL = 0x20 266 }; 267 268 enum { 269 MLX5_CQ_DB_REQ_NOT_SOL = 1 << 24, 270 MLX5_CQ_DB_REQ_NOT = 0 << 24, 271 }; 272 273 struct mlx5_err_cqe { 274 uint8_t rsvd0[32]; 275 uint32_t srqn; 276 uint8_t rsvd1[18]; 277 uint8_t vendor_err_synd; 278 uint8_t syndrome; 279 uint32_t s_wqe_opcode_qpn; 280 uint16_t wqe_counter; 281 uint8_t signature; 282 uint8_t op_own; 283 }; 284 285 struct mlx5_cqe64 { 286 uint8_t rsvd0[17]; 287 uint8_t ml_path; 288 uint8_t rsvd20[4]; 289 uint16_t slid; 290 uint32_t flags_rqpn; 291 uint8_t hds_ip_ext; 292 uint8_t l4_hdr_type_etc; 293 uint16_t vlan_info; 294 uint32_t srqn_uidx; 295 uint32_t imm_inval_pkey; 296 uint8_t rsvd40[4]; 297 uint32_t byte_cnt; 298 __be64 timestamp; 299 uint32_t sop_drop_qpn; 300 uint16_t wqe_counter; 301 uint8_t signature; 302 uint8_t op_own; 303 }; 304 305 enum mlx5dv_cqe_comp_res_format { 306 MLX5DV_CQE_RES_FORMAT_HASH = 1 << 0, 307 MLX5DV_CQE_RES_FORMAT_CSUM = 1 << 1, 308 MLX5DV_CQE_RES_FORMAT_RESERVED = 1 << 2, 309 }; 310 311 static MLX5DV_ALWAYS_INLINE 312 uint8_t mlx5dv_get_cqe_owner(struct mlx5_cqe64 *cqe) 313 { 314 return cqe->op_own & 0x1; 315 } 316 317 static MLX5DV_ALWAYS_INLINE 318 void mlx5dv_set_cqe_owner(struct mlx5_cqe64 *cqe, uint8_t val) 319 { 320 cqe->op_own = (val & 0x1) | (cqe->op_own & ~0x1); 321 } 322 323 /* Solicited event */ 324 static MLX5DV_ALWAYS_INLINE 325 uint8_t mlx5dv_get_cqe_se(struct mlx5_cqe64 *cqe) 326 { 327 return (cqe->op_own >> 1) & 0x1; 328 } 329 330 static MLX5DV_ALWAYS_INLINE 331 uint8_t mlx5dv_get_cqe_format(struct mlx5_cqe64 *cqe) 332 { 333 return (cqe->op_own >> 2) & 0x3; 334 } 335 336 static MLX5DV_ALWAYS_INLINE 337 uint8_t mlx5dv_get_cqe_opcode(struct mlx5_cqe64 *cqe) 338 { 339 return cqe->op_own >> 4; 340 } 341 342 /* 343 * WQE related part 344 */ 345 enum { 346 MLX5_INVALID_LKEY = 0x100, 347 }; 348 349 enum { 350 MLX5_EXTENDED_UD_AV = 0x80000000, 351 }; 352 353 enum { 354 MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2, 355 MLX5_WQE_CTRL_SOLICITED = 1 << 1, 356 MLX5_WQE_CTRL_FENCE = 4 << 5, 357 MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE = 1 << 5, 358 }; 359 360 enum { 361 MLX5_SEND_WQE_BB = 64, 362 MLX5_SEND_WQE_SHIFT = 6, 363 }; 364 365 enum { 366 MLX5_INLINE_SEG = 0x80000000, 367 }; 368 369 enum { 370 MLX5_ETH_WQE_L3_CSUM = (1 << 6), 371 MLX5_ETH_WQE_L4_CSUM = (1 << 7), 372 }; 373 374 struct mlx5_wqe_srq_next_seg { 375 uint8_t rsvd0[2]; 376 uint16_t next_wqe_index; 377 uint8_t signature; 378 uint8_t rsvd1[11]; 379 }; 380 381 struct mlx5_wqe_data_seg { 382 uint32_t byte_count; 383 uint32_t lkey; 384 uint64_t addr; 385 }; 386 387 struct mlx5_wqe_ctrl_seg { 388 uint32_t opmod_idx_opcode; 389 uint32_t qpn_ds; 390 uint8_t signature; 391 uint8_t rsvd[2]; 392 uint8_t fm_ce_se; 393 uint32_t imm; 394 }; 395 396 struct mlx5_wqe_av { 397 union { 398 struct { 399 uint32_t qkey; 400 uint32_t reserved; 401 } qkey; 402 uint64_t dc_key; 403 } key; 404 uint32_t dqp_dct; 405 uint8_t stat_rate_sl; 406 uint8_t fl_mlid; 407 uint16_t rlid; 408 uint8_t reserved0[4]; 409 uint8_t rmac[6]; 410 uint8_t tclass; 411 uint8_t hop_limit; 412 uint32_t grh_gid_fl; 413 uint8_t rgid[16]; 414 }; 415 416 struct mlx5_wqe_datagram_seg { 417 struct mlx5_wqe_av av; 418 }; 419 420 struct mlx5_wqe_raddr_seg { 421 uint64_t raddr; 422 uint32_t rkey; 423 uint32_t reserved; 424 }; 425 426 struct mlx5_wqe_atomic_seg { 427 uint64_t swap_add; 428 uint64_t compare; 429 }; 430 431 struct mlx5_wqe_inl_data_seg { 432 uint32_t byte_count; 433 }; 434 435 struct mlx5_wqe_eth_seg { 436 uint32_t rsvd0; 437 uint8_t cs_flags; 438 uint8_t rsvd1; 439 uint16_t mss; 440 uint32_t rsvd2; 441 uint16_t inline_hdr_sz; 442 uint8_t inline_hdr_start[2]; 443 uint8_t inline_hdr[16]; 444 }; 445 446 /* 447 * Control segment - contains some control information for the current WQE. 448 * 449 * Output: 450 * seg - control segment to be filled 451 * Input: 452 * pi - WQEBB number of the first block of this WQE. 453 * This number should wrap at 0xffff, regardless of 454 * size of the WQ. 455 * opcode - Opcode of this WQE. Encodes the type of operation 456 * to be executed on the QP. 457 * opmod - Opcode modifier. 458 * qp_num - QP/SQ number this WQE is posted to. 459 * fm_ce_se - FM (fence mode), CE (completion and event mode) 460 * and SE (solicited event). 461 * ds - WQE size in octowords (16-byte units). DS accounts for all 462 * the segments in the WQE as summarized in WQE construction. 463 * signature - WQE signature. 464 * imm - Immediate data/Invalidation key/UMR mkey. 465 */ 466 static MLX5DV_ALWAYS_INLINE 467 void mlx5dv_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, 468 uint8_t opcode, uint8_t opmod, uint32_t qp_num, 469 uint8_t fm_ce_se, uint8_t ds, 470 uint8_t signature, uint32_t imm) 471 { 472 seg->opmod_idx_opcode = htobe32(((uint32_t)opmod << 24) | ((uint32_t)pi << 8) | opcode); 473 seg->qpn_ds = htobe32((qp_num << 8) | ds); 474 seg->fm_ce_se = fm_ce_se; 475 seg->signature = signature; 476 /* 477 * The caller should prepare "imm" in advance based on WR opcode. 478 * For IBV_WR_SEND_WITH_IMM and IBV_WR_RDMA_WRITE_WITH_IMM, 479 * the "imm" should be assigned as is. 480 * For the IBV_WR_SEND_WITH_INV, it should be htobe32(imm). 481 */ 482 seg->imm = imm; 483 } 484 485 /* x86 optimized version of mlx5dv_set_ctrl_seg() 486 * 487 * This is useful when doing calculations on large data sets 488 * for parallel calculations. 489 * 490 * It doesn't suit for serialized algorithms. 491 */ 492 #if defined(__SSE3__) 493 static MLX5DV_ALWAYS_INLINE 494 void mlx5dv_x86_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, 495 uint8_t opcode, uint8_t opmod, uint32_t qp_num, 496 uint8_t fm_ce_se, uint8_t ds, 497 uint8_t signature, uint32_t imm) 498 { 499 __m128i val = _mm_set_epi32(imm, qp_num, (ds << 16) | pi, 500 (signature << 24) | (opcode << 16) | (opmod << 8) | fm_ce_se); 501 __m128i mask = _mm_set_epi8(15, 14, 13, 12, /* immediate */ 502 0, /* signal/fence_mode */ 503 0x80, 0x80, /* reserved */ 504 3, /* signature */ 505 6, /* data size */ 506 8, 9, 10, /* QP num */ 507 2, /* opcode */ 508 4, 5, /* sw_pi in BE */ 509 1 /* opmod */ 510 ); 511 *(__m128i *) seg = _mm_shuffle_epi8(val, mask); 512 } 513 #endif /* defined(__SSE3__) */ 514 515 /* 516 * Datagram Segment - contains address information required in order 517 * to form a datagram message. 518 * 519 * Output: 520 * seg - datagram segment to be filled. 521 * Input: 522 * key - Q_key/access key. 523 * dqp_dct - Destination QP number for UD and DCT for DC. 524 * ext - Address vector extension. 525 * stat_rate_sl - Maximum static rate control, SL/ethernet priority. 526 * fl_mlid - Force loopback and source LID for IB. 527 * rlid - Remote LID 528 * rmac - Remote MAC 529 * tclass - GRH tclass/IPv6 tclass/IPv4 ToS 530 * hop_limit - GRH hop limit/IPv6 hop limit/IPv4 TTL 531 * grh_gid_fi - GRH, source GID address and IPv6 flow label. 532 * rgid - Remote GID/IP address. 533 */ 534 static MLX5DV_ALWAYS_INLINE 535 void mlx5dv_set_dgram_seg(struct mlx5_wqe_datagram_seg *seg, 536 uint64_t key, uint32_t dqp_dct, 537 uint8_t ext, uint8_t stat_rate_sl, 538 uint8_t fl_mlid, uint16_t rlid, 539 uint8_t *rmac, uint8_t tclass, 540 uint8_t hop_limit, uint32_t grh_gid_fi, 541 uint8_t *rgid) 542 { 543 544 /* Always put 64 bits, in q_key, the reserved part will be 0 */ 545 seg->av.key.dc_key = htobe64(key); 546 seg->av.dqp_dct = htobe32(((uint32_t)ext << 31) | dqp_dct); 547 seg->av.stat_rate_sl = stat_rate_sl; 548 seg->av.fl_mlid = fl_mlid; 549 seg->av.rlid = htobe16(rlid); 550 memcpy(seg->av.rmac, rmac, 6); 551 seg->av.tclass = tclass; 552 seg->av.hop_limit = hop_limit; 553 seg->av.grh_gid_fl = htobe32(grh_gid_fi); 554 memcpy(seg->av.rgid, rgid, 16); 555 } 556 557 /* 558 * Data Segments - contain pointers and a byte count for the scatter/gather list. 559 * They can optionally contain data, which will save a memory read access for 560 * gather Work Requests. 561 */ 562 static MLX5DV_ALWAYS_INLINE 563 void mlx5dv_set_data_seg(struct mlx5_wqe_data_seg *seg, 564 uint32_t length, uint32_t lkey, 565 uintptr_t address) 566 { 567 seg->byte_count = htobe32(length); 568 seg->lkey = htobe32(lkey); 569 seg->addr = htobe64(address); 570 } 571 /* 572 * x86 optimized version of mlx5dv_set_data_seg() 573 * 574 * This is useful when doing calculations on large data sets 575 * for parallel calculations. 576 * 577 * It doesn't suit for serialized algorithms. 578 */ 579 #if defined(__SSE3__) 580 static MLX5DV_ALWAYS_INLINE 581 void mlx5dv_x86_set_data_seg(struct mlx5_wqe_data_seg *seg, 582 uint32_t length, uint32_t lkey, 583 uintptr_t address) 584 { 585 __m128i val = _mm_set_epi32((uint32_t)address, (uint32_t)(address >> 32), lkey, length); 586 __m128i mask = _mm_set_epi8(12, 13, 14, 15, /* local address low */ 587 8, 9, 10, 11, /* local address high */ 588 4, 5, 6, 7, /* l_key */ 589 0, 1, 2, 3 /* byte count */ 590 ); 591 *(__m128i *) seg = _mm_shuffle_epi8(val, mask); 592 } 593 #endif /* defined(__SSE3__) */ 594 595 /* 596 * Eth Segment - contains packet headers and information for stateless L2, L3, L4 offloading. 597 * 598 * Output: 599 * seg - Eth segment to be filled. 600 * Input: 601 * cs_flags - l3cs/l3cs_inner/l4cs/l4cs_inner. 602 * mss - Maximum segment size. For TSO WQEs, the number of bytes 603 * in the TCP payload to be transmitted in each packet. Must 604 * be 0 on non TSO WQEs. 605 * inline_hdr_sz - Length of the inlined packet headers. 606 * inline_hdr_start - Inlined packet header. 607 */ 608 static MLX5DV_ALWAYS_INLINE 609 void mlx5dv_set_eth_seg(struct mlx5_wqe_eth_seg *seg, uint8_t cs_flags, 610 uint16_t mss, uint16_t inline_hdr_sz, 611 uint8_t *inline_hdr_start) 612 { 613 seg->cs_flags = cs_flags; 614 seg->mss = htobe16(mss); 615 seg->inline_hdr_sz = htobe16(inline_hdr_sz); 616 memcpy(seg->inline_hdr_start, inline_hdr_start, inline_hdr_sz); 617 } 618 #endif /* _MLX5DV_H_ */ 619