1 /* 2 * Copyright (c) 2004 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Cisco Systems. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 * 33 * $Id: mthca_qp.c 1355 2004-12-17 15:23:43Z roland $ 34 */ 35 36 #include <linux/init.h> 37 38 #include <ib_verbs.h> 39 #include <ib_cache.h> 40 #include <ib_pack.h> 41 42 #include "mthca_dev.h" 43 #include "mthca_cmd.h" 44 #include "mthca_memfree.h" 45 46 enum { 47 MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, 48 MTHCA_ACK_REQ_FREQ = 10, 49 MTHCA_FLIGHT_LIMIT = 9, 50 MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */ 51 MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */ 52 MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */ 53 }; 54 55 enum { 56 MTHCA_QP_STATE_RST = 0, 57 MTHCA_QP_STATE_INIT = 1, 58 MTHCA_QP_STATE_RTR = 2, 59 MTHCA_QP_STATE_RTS = 3, 60 MTHCA_QP_STATE_SQE = 4, 61 MTHCA_QP_STATE_SQD = 5, 62 MTHCA_QP_STATE_ERR = 6, 63 MTHCA_QP_STATE_DRAINING = 7 64 }; 65 66 enum { 67 MTHCA_QP_ST_RC = 0x0, 68 MTHCA_QP_ST_UC = 0x1, 69 MTHCA_QP_ST_RD = 0x2, 70 MTHCA_QP_ST_UD = 0x3, 71 MTHCA_QP_ST_MLX = 0x7 72 }; 73 74 enum { 75 MTHCA_QP_PM_MIGRATED = 0x3, 76 MTHCA_QP_PM_ARMED = 0x0, 77 MTHCA_QP_PM_REARM = 0x1 78 }; 79 80 enum { 81 /* qp_context flags */ 82 MTHCA_QP_BIT_DE = 1 << 8, 83 /* params1 */ 84 MTHCA_QP_BIT_SRE = 1 << 15, 85 MTHCA_QP_BIT_SWE = 1 << 14, 86 MTHCA_QP_BIT_SAE = 1 << 13, 87 MTHCA_QP_BIT_SIC = 1 << 4, 88 MTHCA_QP_BIT_SSC = 1 << 3, 89 /* params2 */ 90 MTHCA_QP_BIT_RRE = 1 << 15, 91 MTHCA_QP_BIT_RWE = 1 << 14, 92 MTHCA_QP_BIT_RAE = 1 << 13, 93 MTHCA_QP_BIT_RIC = 1 << 4, 94 MTHCA_QP_BIT_RSC = 1 << 3 95 }; 96 97 struct mthca_qp_path { 98 u32 port_pkey; 99 u8 rnr_retry; 100 u8 g_mylmc; 101 u16 rlid; 102 u8 ackto; 103 u8 mgid_index; 104 u8 static_rate; 105 u8 hop_limit; 106 u32 sl_tclass_flowlabel; 107 u8 rgid[16]; 108 } __attribute__((packed)); 109 110 struct mthca_qp_context { 111 u32 flags; 112 u32 tavor_sched_queue; /* Reserved on Arbel */ 113 u8 mtu_msgmax; 114 u8 rq_size_stride; /* Reserved on Tavor */ 115 u8 sq_size_stride; /* Reserved on Tavor */ 116 u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */ 117 u32 usr_page; 118 u32 local_qpn; 119 u32 remote_qpn; 120 u32 reserved1[2]; 121 struct mthca_qp_path pri_path; 122 struct mthca_qp_path alt_path; 123 u32 rdd; 124 u32 pd; 125 u32 wqe_base; 126 u32 wqe_lkey; 127 u32 params1; 128 u32 reserved2; 129 u32 next_send_psn; 130 u32 cqn_snd; 131 u32 snd_wqe_base_l; /* Next send WQE on Tavor */ 132 u32 snd_db_index; /* (debugging only entries) */ 133 u32 last_acked_psn; 134 u32 ssn; 135 u32 params2; 136 u32 rnr_nextrecvpsn; 137 u32 ra_buff_indx; 138 u32 cqn_rcv; 139 u32 rcv_wqe_base_l; /* Next recv WQE on Tavor */ 140 u32 rcv_db_index; /* (debugging only entries) */ 141 u32 qkey; 142 u32 srqn; 143 u32 rmsn; 144 u16 rq_wqe_counter; /* reserved on Tavor */ 145 u16 sq_wqe_counter; /* reserved on Tavor */ 146 u32 reserved3[18]; 147 } __attribute__((packed)); 148 149 struct mthca_qp_param { 150 u32 opt_param_mask; 151 u32 reserved1; 152 struct mthca_qp_context context; 153 u32 reserved2[62]; 154 } __attribute__((packed)); 155 156 enum { 157 MTHCA_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, 158 MTHCA_QP_OPTPAR_RRE = 1 << 1, 159 MTHCA_QP_OPTPAR_RAE = 1 << 2, 160 MTHCA_QP_OPTPAR_RWE = 1 << 3, 161 MTHCA_QP_OPTPAR_PKEY_INDEX = 1 << 4, 162 MTHCA_QP_OPTPAR_Q_KEY = 1 << 5, 163 MTHCA_QP_OPTPAR_RNR_TIMEOUT = 1 << 6, 164 MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7, 165 MTHCA_QP_OPTPAR_SRA_MAX = 1 << 8, 166 MTHCA_QP_OPTPAR_RRA_MAX = 1 << 9, 167 MTHCA_QP_OPTPAR_PM_STATE = 1 << 10, 168 MTHCA_QP_OPTPAR_PORT_NUM = 1 << 11, 169 MTHCA_QP_OPTPAR_RETRY_COUNT = 1 << 12, 170 MTHCA_QP_OPTPAR_ALT_RNR_RETRY = 1 << 13, 171 MTHCA_QP_OPTPAR_ACK_TIMEOUT = 1 << 14, 172 MTHCA_QP_OPTPAR_RNR_RETRY = 1 << 15, 173 MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16 174 }; 175 176 enum { 177 MTHCA_NEXT_DBD = 1 << 7, 178 MTHCA_NEXT_FENCE = 1 << 6, 179 MTHCA_NEXT_CQ_UPDATE = 1 << 3, 180 MTHCA_NEXT_EVENT_GEN = 1 << 2, 181 MTHCA_NEXT_SOLICIT = 1 << 1, 182 183 MTHCA_MLX_VL15 = 1 << 17, 184 MTHCA_MLX_SLR = 1 << 16 185 }; 186 187 enum { 188 MTHCA_INVAL_LKEY = 0x100 189 }; 190 191 struct mthca_next_seg { 192 u32 nda_op; /* [31:6] next WQE [4:0] next opcode */ 193 u32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */ 194 u32 flags; /* [3] CQ [2] Event [1] Solicit */ 195 u32 imm; /* immediate data */ 196 }; 197 198 struct mthca_tavor_ud_seg { 199 u32 reserved1; 200 u32 lkey; 201 u64 av_addr; 202 u32 reserved2[4]; 203 u32 dqpn; 204 u32 qkey; 205 u32 reserved3[2]; 206 }; 207 208 struct mthca_arbel_ud_seg { 209 u32 av[8]; 210 u32 dqpn; 211 u32 qkey; 212 u32 reserved[2]; 213 }; 214 215 struct mthca_bind_seg { 216 u32 flags; /* [31] Atomic [30] rem write [29] rem read */ 217 u32 reserved; 218 u32 new_rkey; 219 u32 lkey; 220 u64 addr; 221 u64 length; 222 }; 223 224 struct mthca_raddr_seg { 225 u64 raddr; 226 u32 rkey; 227 u32 reserved; 228 }; 229 230 struct mthca_atomic_seg { 231 u64 swap_add; 232 u64 compare; 233 }; 234 235 struct mthca_data_seg { 236 u32 byte_count; 237 u32 lkey; 238 u64 addr; 239 }; 240 241 struct mthca_mlx_seg { 242 u32 nda_op; 243 u32 nds; 244 u32 flags; /* [17] VL15 [16] SLR [14:12] static rate 245 [11:8] SL [3] C [2] E */ 246 u16 rlid; 247 u16 vcrc; 248 }; 249 250 static const u8 mthca_opcode[] = { 251 [IB_WR_SEND] = MTHCA_OPCODE_SEND, 252 [IB_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM, 253 [IB_WR_RDMA_WRITE] = MTHCA_OPCODE_RDMA_WRITE, 254 [IB_WR_RDMA_WRITE_WITH_IMM] = MTHCA_OPCODE_RDMA_WRITE_IMM, 255 [IB_WR_RDMA_READ] = MTHCA_OPCODE_RDMA_READ, 256 [IB_WR_ATOMIC_CMP_AND_SWP] = MTHCA_OPCODE_ATOMIC_CS, 257 [IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA, 258 }; 259 260 static int is_sqp(struct mthca_dev *dev, struct mthca_qp *qp) 261 { 262 return qp->qpn >= dev->qp_table.sqp_start && 263 qp->qpn <= dev->qp_table.sqp_start + 3; 264 } 265 266 static int is_qp0(struct mthca_dev *dev, struct mthca_qp *qp) 267 { 268 return qp->qpn >= dev->qp_table.sqp_start && 269 qp->qpn <= dev->qp_table.sqp_start + 1; 270 } 271 272 static void *get_recv_wqe(struct mthca_qp *qp, int n) 273 { 274 if (qp->is_direct) 275 return qp->queue.direct.buf + (n << qp->rq.wqe_shift); 276 else 277 return qp->queue.page_list[(n << qp->rq.wqe_shift) >> PAGE_SHIFT].buf + 278 ((n << qp->rq.wqe_shift) & (PAGE_SIZE - 1)); 279 } 280 281 static void *get_send_wqe(struct mthca_qp *qp, int n) 282 { 283 if (qp->is_direct) 284 return qp->queue.direct.buf + qp->send_wqe_offset + 285 (n << qp->sq.wqe_shift); 286 else 287 return qp->queue.page_list[(qp->send_wqe_offset + 288 (n << qp->sq.wqe_shift)) >> 289 PAGE_SHIFT].buf + 290 ((qp->send_wqe_offset + (n << qp->sq.wqe_shift)) & 291 (PAGE_SIZE - 1)); 292 } 293 294 void mthca_qp_event(struct mthca_dev *dev, u32 qpn, 295 enum ib_event_type event_type) 296 { 297 struct mthca_qp *qp; 298 struct ib_event event; 299 300 spin_lock(&dev->qp_table.lock); 301 qp = mthca_array_get(&dev->qp_table.qp, qpn & (dev->limits.num_qps - 1)); 302 if (qp) 303 atomic_inc(&qp->refcount); 304 spin_unlock(&dev->qp_table.lock); 305 306 if (!qp) { 307 mthca_warn(dev, "Async event for bogus QP %08x\n", qpn); 308 return; 309 } 310 311 event.device = &dev->ib_dev; 312 event.event = event_type; 313 event.element.qp = &qp->ibqp; 314 if (qp->ibqp.event_handler) 315 qp->ibqp.event_handler(&event, qp->ibqp.qp_context); 316 317 if (atomic_dec_and_test(&qp->refcount)) 318 wake_up(&qp->wait); 319 } 320 321 static int to_mthca_state(enum ib_qp_state ib_state) 322 { 323 switch (ib_state) { 324 case IB_QPS_RESET: return MTHCA_QP_STATE_RST; 325 case IB_QPS_INIT: return MTHCA_QP_STATE_INIT; 326 case IB_QPS_RTR: return MTHCA_QP_STATE_RTR; 327 case IB_QPS_RTS: return MTHCA_QP_STATE_RTS; 328 case IB_QPS_SQD: return MTHCA_QP_STATE_SQD; 329 case IB_QPS_SQE: return MTHCA_QP_STATE_SQE; 330 case IB_QPS_ERR: return MTHCA_QP_STATE_ERR; 331 default: return -1; 332 } 333 } 334 335 enum { RC, UC, UD, RD, RDEE, MLX, NUM_TRANS }; 336 337 static int to_mthca_st(int transport) 338 { 339 switch (transport) { 340 case RC: return MTHCA_QP_ST_RC; 341 case UC: return MTHCA_QP_ST_UC; 342 case UD: return MTHCA_QP_ST_UD; 343 case RD: return MTHCA_QP_ST_RD; 344 case MLX: return MTHCA_QP_ST_MLX; 345 default: return -1; 346 } 347 } 348 349 static const struct { 350 int trans; 351 u32 req_param[NUM_TRANS]; 352 u32 opt_param[NUM_TRANS]; 353 } state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 354 [IB_QPS_RESET] = { 355 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, 356 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, 357 [IB_QPS_INIT] = { 358 .trans = MTHCA_TRANS_RST2INIT, 359 .req_param = { 360 [UD] = (IB_QP_PKEY_INDEX | 361 IB_QP_PORT | 362 IB_QP_QKEY), 363 [UC] = (IB_QP_PKEY_INDEX | 364 IB_QP_PORT | 365 IB_QP_ACCESS_FLAGS), 366 [RC] = (IB_QP_PKEY_INDEX | 367 IB_QP_PORT | 368 IB_QP_ACCESS_FLAGS), 369 [MLX] = (IB_QP_PKEY_INDEX | 370 IB_QP_QKEY), 371 }, 372 /* bug-for-bug compatibility with VAPI: */ 373 .opt_param = { 374 [MLX] = IB_QP_PORT 375 } 376 }, 377 }, 378 [IB_QPS_INIT] = { 379 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, 380 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, 381 [IB_QPS_INIT] = { 382 .trans = MTHCA_TRANS_INIT2INIT, 383 .opt_param = { 384 [UD] = (IB_QP_PKEY_INDEX | 385 IB_QP_PORT | 386 IB_QP_QKEY), 387 [UC] = (IB_QP_PKEY_INDEX | 388 IB_QP_PORT | 389 IB_QP_ACCESS_FLAGS), 390 [RC] = (IB_QP_PKEY_INDEX | 391 IB_QP_PORT | 392 IB_QP_ACCESS_FLAGS), 393 [MLX] = (IB_QP_PKEY_INDEX | 394 IB_QP_QKEY), 395 } 396 }, 397 [IB_QPS_RTR] = { 398 .trans = MTHCA_TRANS_INIT2RTR, 399 .req_param = { 400 [UC] = (IB_QP_AV | 401 IB_QP_PATH_MTU | 402 IB_QP_DEST_QPN | 403 IB_QP_RQ_PSN | 404 IB_QP_MAX_DEST_RD_ATOMIC), 405 [RC] = (IB_QP_AV | 406 IB_QP_PATH_MTU | 407 IB_QP_DEST_QPN | 408 IB_QP_RQ_PSN | 409 IB_QP_MAX_DEST_RD_ATOMIC | 410 IB_QP_MIN_RNR_TIMER), 411 }, 412 .opt_param = { 413 [UD] = (IB_QP_PKEY_INDEX | 414 IB_QP_QKEY), 415 [UC] = (IB_QP_ALT_PATH | 416 IB_QP_ACCESS_FLAGS | 417 IB_QP_PKEY_INDEX), 418 [RC] = (IB_QP_ALT_PATH | 419 IB_QP_ACCESS_FLAGS | 420 IB_QP_PKEY_INDEX), 421 [MLX] = (IB_QP_PKEY_INDEX | 422 IB_QP_QKEY), 423 } 424 } 425 }, 426 [IB_QPS_RTR] = { 427 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, 428 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, 429 [IB_QPS_RTS] = { 430 .trans = MTHCA_TRANS_RTR2RTS, 431 .req_param = { 432 [UD] = IB_QP_SQ_PSN, 433 [UC] = (IB_QP_SQ_PSN | 434 IB_QP_MAX_QP_RD_ATOMIC), 435 [RC] = (IB_QP_TIMEOUT | 436 IB_QP_RETRY_CNT | 437 IB_QP_RNR_RETRY | 438 IB_QP_SQ_PSN | 439 IB_QP_MAX_QP_RD_ATOMIC), 440 [MLX] = IB_QP_SQ_PSN, 441 }, 442 .opt_param = { 443 [UD] = (IB_QP_CUR_STATE | 444 IB_QP_QKEY), 445 [UC] = (IB_QP_CUR_STATE | 446 IB_QP_ALT_PATH | 447 IB_QP_ACCESS_FLAGS | 448 IB_QP_PKEY_INDEX | 449 IB_QP_PATH_MIG_STATE), 450 [RC] = (IB_QP_CUR_STATE | 451 IB_QP_ALT_PATH | 452 IB_QP_ACCESS_FLAGS | 453 IB_QP_PKEY_INDEX | 454 IB_QP_MIN_RNR_TIMER | 455 IB_QP_PATH_MIG_STATE), 456 [MLX] = (IB_QP_CUR_STATE | 457 IB_QP_QKEY), 458 } 459 } 460 }, 461 [IB_QPS_RTS] = { 462 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, 463 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, 464 [IB_QPS_RTS] = { 465 .trans = MTHCA_TRANS_RTS2RTS, 466 .opt_param = { 467 [UD] = (IB_QP_CUR_STATE | 468 IB_QP_QKEY), 469 [UC] = (IB_QP_ACCESS_FLAGS | 470 IB_QP_ALT_PATH | 471 IB_QP_PATH_MIG_STATE), 472 [RC] = (IB_QP_ACCESS_FLAGS | 473 IB_QP_ALT_PATH | 474 IB_QP_PATH_MIG_STATE | 475 IB_QP_MIN_RNR_TIMER), 476 [MLX] = (IB_QP_CUR_STATE | 477 IB_QP_QKEY), 478 } 479 }, 480 [IB_QPS_SQD] = { 481 .trans = MTHCA_TRANS_RTS2SQD, 482 }, 483 }, 484 [IB_QPS_SQD] = { 485 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, 486 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, 487 [IB_QPS_RTS] = { 488 .trans = MTHCA_TRANS_SQD2RTS, 489 .opt_param = { 490 [UD] = (IB_QP_CUR_STATE | 491 IB_QP_QKEY), 492 [UC] = (IB_QP_CUR_STATE | 493 IB_QP_ALT_PATH | 494 IB_QP_ACCESS_FLAGS | 495 IB_QP_PATH_MIG_STATE), 496 [RC] = (IB_QP_CUR_STATE | 497 IB_QP_ALT_PATH | 498 IB_QP_ACCESS_FLAGS | 499 IB_QP_MIN_RNR_TIMER | 500 IB_QP_PATH_MIG_STATE), 501 [MLX] = (IB_QP_CUR_STATE | 502 IB_QP_QKEY), 503 } 504 }, 505 [IB_QPS_SQD] = { 506 .trans = MTHCA_TRANS_SQD2SQD, 507 .opt_param = { 508 [UD] = (IB_QP_PKEY_INDEX | 509 IB_QP_QKEY), 510 [UC] = (IB_QP_AV | 511 IB_QP_MAX_QP_RD_ATOMIC | 512 IB_QP_MAX_DEST_RD_ATOMIC | 513 IB_QP_CUR_STATE | 514 IB_QP_ALT_PATH | 515 IB_QP_ACCESS_FLAGS | 516 IB_QP_PKEY_INDEX | 517 IB_QP_PATH_MIG_STATE), 518 [RC] = (IB_QP_AV | 519 IB_QP_TIMEOUT | 520 IB_QP_RETRY_CNT | 521 IB_QP_RNR_RETRY | 522 IB_QP_MAX_QP_RD_ATOMIC | 523 IB_QP_MAX_DEST_RD_ATOMIC | 524 IB_QP_CUR_STATE | 525 IB_QP_ALT_PATH | 526 IB_QP_ACCESS_FLAGS | 527 IB_QP_PKEY_INDEX | 528 IB_QP_MIN_RNR_TIMER | 529 IB_QP_PATH_MIG_STATE), 530 [MLX] = (IB_QP_PKEY_INDEX | 531 IB_QP_QKEY), 532 } 533 } 534 }, 535 [IB_QPS_SQE] = { 536 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, 537 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR }, 538 [IB_QPS_RTS] = { 539 .trans = MTHCA_TRANS_SQERR2RTS, 540 .opt_param = { 541 [UD] = (IB_QP_CUR_STATE | 542 IB_QP_QKEY), 543 [UC] = (IB_QP_CUR_STATE), 544 [RC] = (IB_QP_CUR_STATE | 545 IB_QP_MIN_RNR_TIMER), 546 [MLX] = (IB_QP_CUR_STATE | 547 IB_QP_QKEY), 548 } 549 } 550 }, 551 [IB_QPS_ERR] = { 552 [IB_QPS_RESET] = { .trans = MTHCA_TRANS_ANY2RST }, 553 [IB_QPS_ERR] = { .trans = MTHCA_TRANS_ANY2ERR } 554 } 555 }; 556 557 static void store_attrs(struct mthca_sqp *sqp, struct ib_qp_attr *attr, 558 int attr_mask) 559 { 560 if (attr_mask & IB_QP_PKEY_INDEX) 561 sqp->pkey_index = attr->pkey_index; 562 if (attr_mask & IB_QP_QKEY) 563 sqp->qkey = attr->qkey; 564 if (attr_mask & IB_QP_SQ_PSN) 565 sqp->send_psn = attr->sq_psn; 566 } 567 568 static void init_port(struct mthca_dev *dev, int port) 569 { 570 int err; 571 u8 status; 572 struct mthca_init_ib_param param; 573 574 memset(¶m, 0, sizeof param); 575 576 param.enable_1x = 1; 577 param.enable_4x = 1; 578 param.vl_cap = dev->limits.vl_cap; 579 param.mtu_cap = dev->limits.mtu_cap; 580 param.gid_cap = dev->limits.gid_table_len; 581 param.pkey_cap = dev->limits.pkey_table_len; 582 583 err = mthca_INIT_IB(dev, ¶m, port, &status); 584 if (err) 585 mthca_warn(dev, "INIT_IB failed, return code %d.\n", err); 586 if (status) 587 mthca_warn(dev, "INIT_IB returned status %02x.\n", status); 588 } 589 590 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) 591 { 592 struct mthca_dev *dev = to_mdev(ibqp->device); 593 struct mthca_qp *qp = to_mqp(ibqp); 594 enum ib_qp_state cur_state, new_state; 595 struct mthca_mailbox *mailbox; 596 struct mthca_qp_param *qp_param; 597 struct mthca_qp_context *qp_context; 598 u32 req_param, opt_param; 599 u8 status; 600 int err; 601 602 if (attr_mask & IB_QP_CUR_STATE) { 603 if (attr->cur_qp_state != IB_QPS_RTR && 604 attr->cur_qp_state != IB_QPS_RTS && 605 attr->cur_qp_state != IB_QPS_SQD && 606 attr->cur_qp_state != IB_QPS_SQE) 607 return -EINVAL; 608 else 609 cur_state = attr->cur_qp_state; 610 } else { 611 spin_lock_irq(&qp->sq.lock); 612 spin_lock(&qp->rq.lock); 613 cur_state = qp->state; 614 spin_unlock(&qp->rq.lock); 615 spin_unlock_irq(&qp->sq.lock); 616 } 617 618 if (attr_mask & IB_QP_STATE) { 619 if (attr->qp_state < 0 || attr->qp_state > IB_QPS_ERR) 620 return -EINVAL; 621 new_state = attr->qp_state; 622 } else 623 new_state = cur_state; 624 625 if (state_table[cur_state][new_state].trans == MTHCA_TRANS_INVALID) { 626 mthca_dbg(dev, "Illegal QP transition " 627 "%d->%d\n", cur_state, new_state); 628 return -EINVAL; 629 } 630 631 req_param = state_table[cur_state][new_state].req_param[qp->transport]; 632 opt_param = state_table[cur_state][new_state].opt_param[qp->transport]; 633 634 if ((req_param & attr_mask) != req_param) { 635 mthca_dbg(dev, "QP transition " 636 "%d->%d missing req attr 0x%08x\n", 637 cur_state, new_state, 638 req_param & ~attr_mask); 639 return -EINVAL; 640 } 641 642 if (attr_mask & ~(req_param | opt_param | IB_QP_STATE)) { 643 mthca_dbg(dev, "QP transition (transport %d) " 644 "%d->%d has extra attr 0x%08x\n", 645 qp->transport, 646 cur_state, new_state, 647 attr_mask & ~(req_param | opt_param | 648 IB_QP_STATE)); 649 return -EINVAL; 650 } 651 652 mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); 653 if (IS_ERR(mailbox)) 654 return PTR_ERR(mailbox); 655 qp_param = mailbox->buf; 656 qp_context = &qp_param->context; 657 memset(qp_param, 0, sizeof *qp_param); 658 659 qp_context->flags = cpu_to_be32((to_mthca_state(new_state) << 28) | 660 (to_mthca_st(qp->transport) << 16)); 661 qp_context->flags |= cpu_to_be32(MTHCA_QP_BIT_DE); 662 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) 663 qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11); 664 else { 665 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PM_STATE); 666 switch (attr->path_mig_state) { 667 case IB_MIG_MIGRATED: 668 qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_MIGRATED << 11); 669 break; 670 case IB_MIG_REARM: 671 qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_REARM << 11); 672 break; 673 case IB_MIG_ARMED: 674 qp_context->flags |= cpu_to_be32(MTHCA_QP_PM_ARMED << 11); 675 break; 676 } 677 } 678 679 /* leave tavor_sched_queue as 0 */ 680 681 if (qp->transport == MLX || qp->transport == UD) 682 qp_context->mtu_msgmax = (IB_MTU_2048 << 5) | 11; 683 else if (attr_mask & IB_QP_PATH_MTU) 684 qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31; 685 686 if (mthca_is_memfree(dev)) { 687 qp_context->rq_size_stride = 688 ((ffs(qp->rq.max) - 1) << 3) | (qp->rq.wqe_shift - 4); 689 qp_context->sq_size_stride = 690 ((ffs(qp->sq.max) - 1) << 3) | (qp->sq.wqe_shift - 4); 691 } 692 693 /* leave arbel_sched_queue as 0 */ 694 695 if (qp->ibqp.uobject) 696 qp_context->usr_page = 697 cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index); 698 else 699 qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); 700 qp_context->local_qpn = cpu_to_be32(qp->qpn); 701 if (attr_mask & IB_QP_DEST_QPN) { 702 qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num); 703 } 704 705 if (qp->transport == MLX) 706 qp_context->pri_path.port_pkey |= 707 cpu_to_be32(to_msqp(qp)->port << 24); 708 else { 709 if (attr_mask & IB_QP_PORT) { 710 qp_context->pri_path.port_pkey |= 711 cpu_to_be32(attr->port_num << 24); 712 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PORT_NUM); 713 } 714 } 715 716 if (attr_mask & IB_QP_PKEY_INDEX) { 717 qp_context->pri_path.port_pkey |= 718 cpu_to_be32(attr->pkey_index); 719 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PKEY_INDEX); 720 } 721 722 if (attr_mask & IB_QP_RNR_RETRY) { 723 qp_context->pri_path.rnr_retry = attr->rnr_retry << 5; 724 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY); 725 } 726 727 if (attr_mask & IB_QP_AV) { 728 qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f; 729 qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid); 730 qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate; 731 if (attr->ah_attr.ah_flags & IB_AH_GRH) { 732 qp_context->pri_path.g_mylmc |= 1 << 7; 733 qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index; 734 qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit; 735 qp_context->pri_path.sl_tclass_flowlabel = 736 cpu_to_be32((attr->ah_attr.sl << 28) | 737 (attr->ah_attr.grh.traffic_class << 20) | 738 (attr->ah_attr.grh.flow_label)); 739 memcpy(qp_context->pri_path.rgid, 740 attr->ah_attr.grh.dgid.raw, 16); 741 } else { 742 qp_context->pri_path.sl_tclass_flowlabel = 743 cpu_to_be32(attr->ah_attr.sl << 28); 744 } 745 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH); 746 } 747 748 if (attr_mask & IB_QP_TIMEOUT) { 749 qp_context->pri_path.ackto = attr->timeout; 750 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); 751 } 752 753 /* XXX alt_path */ 754 755 /* leave rdd as 0 */ 756 qp_context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pd_num); 757 /* leave wqe_base as 0 (we always create an MR based at 0 for WQs) */ 758 qp_context->wqe_lkey = cpu_to_be32(qp->mr.ibmr.lkey); 759 qp_context->params1 = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) | 760 (MTHCA_FLIGHT_LIMIT << 24) | 761 MTHCA_QP_BIT_SRE | 762 MTHCA_QP_BIT_SWE | 763 MTHCA_QP_BIT_SAE); 764 if (qp->sq_policy == IB_SIGNAL_ALL_WR) 765 qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC); 766 if (attr_mask & IB_QP_RETRY_CNT) { 767 qp_context->params1 |= cpu_to_be32(attr->retry_cnt << 16); 768 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT); 769 } 770 771 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { 772 qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ? 773 ffs(attr->max_rd_atomic) - 1 : 0, 774 7) << 21); 775 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX); 776 } 777 778 if (attr_mask & IB_QP_SQ_PSN) 779 qp_context->next_send_psn = cpu_to_be32(attr->sq_psn); 780 qp_context->cqn_snd = cpu_to_be32(to_mcq(ibqp->send_cq)->cqn); 781 782 if (mthca_is_memfree(dev)) { 783 qp_context->snd_wqe_base_l = cpu_to_be32(qp->send_wqe_offset); 784 qp_context->snd_db_index = cpu_to_be32(qp->sq.db_index); 785 } 786 787 if (attr_mask & IB_QP_ACCESS_FLAGS) { 788 /* 789 * Only enable RDMA/atomics if we have responder 790 * resources set to a non-zero value. 791 */ 792 if (qp->resp_depth) { 793 qp_context->params2 |= 794 cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ? 795 MTHCA_QP_BIT_RWE : 0); 796 qp_context->params2 |= 797 cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ? 798 MTHCA_QP_BIT_RRE : 0); 799 qp_context->params2 |= 800 cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 801 MTHCA_QP_BIT_RAE : 0); 802 } 803 804 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | 805 MTHCA_QP_OPTPAR_RRE | 806 MTHCA_QP_OPTPAR_RAE); 807 808 qp->atomic_rd_en = attr->qp_access_flags; 809 } 810 811 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { 812 u8 rra_max; 813 814 if (qp->resp_depth && !attr->max_dest_rd_atomic) { 815 /* 816 * Lowering our responder resources to zero. 817 * Turn off RDMA/atomics as responder. 818 * (RWE/RRE/RAE in params2 already zero) 819 */ 820 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | 821 MTHCA_QP_OPTPAR_RRE | 822 MTHCA_QP_OPTPAR_RAE); 823 } 824 825 if (!qp->resp_depth && attr->max_dest_rd_atomic) { 826 /* 827 * Increasing our responder resources from 828 * zero. Turn on RDMA/atomics as appropriate. 829 */ 830 qp_context->params2 |= 831 cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_WRITE ? 832 MTHCA_QP_BIT_RWE : 0); 833 qp_context->params2 |= 834 cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ? 835 MTHCA_QP_BIT_RRE : 0); 836 qp_context->params2 |= 837 cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ? 838 MTHCA_QP_BIT_RAE : 0); 839 840 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE | 841 MTHCA_QP_OPTPAR_RRE | 842 MTHCA_QP_OPTPAR_RAE); 843 } 844 845 for (rra_max = 0; 846 1 << rra_max < attr->max_dest_rd_atomic && 847 rra_max < dev->qp_table.rdb_shift; 848 ++rra_max) 849 ; /* nothing */ 850 851 qp_context->params2 |= cpu_to_be32(rra_max << 21); 852 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); 853 854 qp->resp_depth = attr->max_dest_rd_atomic; 855 } 856 857 qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); 858 859 if (attr_mask & IB_QP_MIN_RNR_TIMER) { 860 qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); 861 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT); 862 } 863 if (attr_mask & IB_QP_RQ_PSN) 864 qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); 865 866 qp_context->ra_buff_indx = 867 cpu_to_be32(dev->qp_table.rdb_base + 868 ((qp->qpn & (dev->limits.num_qps - 1)) * MTHCA_RDB_ENTRY_SIZE << 869 dev->qp_table.rdb_shift)); 870 871 qp_context->cqn_rcv = cpu_to_be32(to_mcq(ibqp->recv_cq)->cqn); 872 873 if (mthca_is_memfree(dev)) 874 qp_context->rcv_db_index = cpu_to_be32(qp->rq.db_index); 875 876 if (attr_mask & IB_QP_QKEY) { 877 qp_context->qkey = cpu_to_be32(attr->qkey); 878 qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY); 879 } 880 881 err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans, 882 qp->qpn, 0, mailbox, 0, &status); 883 if (status) { 884 mthca_warn(dev, "modify QP %d returned status %02x.\n", 885 state_table[cur_state][new_state].trans, status); 886 err = -EINVAL; 887 } 888 889 if (!err) 890 qp->state = new_state; 891 892 mthca_free_mailbox(dev, mailbox); 893 894 if (is_sqp(dev, qp)) 895 store_attrs(to_msqp(qp), attr, attr_mask); 896 897 /* 898 * If we are moving QP0 to RTR, bring the IB link up; if we 899 * are moving QP0 to RESET or ERROR, bring the link back down. 900 */ 901 if (is_qp0(dev, qp)) { 902 if (cur_state != IB_QPS_RTR && 903 new_state == IB_QPS_RTR) 904 init_port(dev, to_msqp(qp)->port); 905 906 if (cur_state != IB_QPS_RESET && 907 cur_state != IB_QPS_ERR && 908 (new_state == IB_QPS_RESET || 909 new_state == IB_QPS_ERR)) 910 mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status); 911 } 912 913 return err; 914 } 915 916 /* 917 * Allocate and register buffer for WQEs. qp->rq.max, sq.max, 918 * rq.max_gs and sq.max_gs must all be assigned. 919 * mthca_alloc_wqe_buf will calculate rq.wqe_shift and 920 * sq.wqe_shift (as well as send_wqe_offset, is_direct, and 921 * queue) 922 */ 923 static int mthca_alloc_wqe_buf(struct mthca_dev *dev, 924 struct mthca_pd *pd, 925 struct mthca_qp *qp) 926 { 927 int size; 928 int i; 929 int npages, shift; 930 dma_addr_t t; 931 u64 *dma_list = NULL; 932 int err = -ENOMEM; 933 934 size = sizeof (struct mthca_next_seg) + 935 qp->rq.max_gs * sizeof (struct mthca_data_seg); 936 937 for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size; 938 qp->rq.wqe_shift++) 939 ; /* nothing */ 940 941 size = sizeof (struct mthca_next_seg) + 942 qp->sq.max_gs * sizeof (struct mthca_data_seg); 943 switch (qp->transport) { 944 case MLX: 945 size += 2 * sizeof (struct mthca_data_seg); 946 break; 947 case UD: 948 if (mthca_is_memfree(dev)) 949 size += sizeof (struct mthca_arbel_ud_seg); 950 else 951 size += sizeof (struct mthca_tavor_ud_seg); 952 break; 953 default: 954 /* bind seg is as big as atomic + raddr segs */ 955 size += sizeof (struct mthca_bind_seg); 956 } 957 958 for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; 959 qp->sq.wqe_shift++) 960 ; /* nothing */ 961 962 qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift, 963 1 << qp->sq.wqe_shift); 964 965 /* 966 * If this is a userspace QP, we don't actually have to 967 * allocate anything. All we need is to calculate the WQE 968 * sizes and the send_wqe_offset, so we're done now. 969 */ 970 if (pd->ibpd.uobject) 971 return 0; 972 973 size = PAGE_ALIGN(qp->send_wqe_offset + 974 (qp->sq.max << qp->sq.wqe_shift)); 975 976 qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64), 977 GFP_KERNEL); 978 if (!qp->wrid) 979 goto err_out; 980 981 if (size <= MTHCA_MAX_DIRECT_QP_SIZE) { 982 qp->is_direct = 1; 983 npages = 1; 984 shift = get_order(size) + PAGE_SHIFT; 985 986 if (0) 987 mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n", 988 size, shift); 989 990 qp->queue.direct.buf = dma_alloc_coherent(&dev->pdev->dev, size, 991 &t, GFP_KERNEL); 992 if (!qp->queue.direct.buf) 993 goto err_out; 994 995 pci_unmap_addr_set(&qp->queue.direct, mapping, t); 996 997 memset(qp->queue.direct.buf, 0, size); 998 999 while (t & ((1 << shift) - 1)) { 1000 --shift; 1001 npages *= 2; 1002 } 1003 1004 dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); 1005 if (!dma_list) 1006 goto err_out_free; 1007 1008 for (i = 0; i < npages; ++i) 1009 dma_list[i] = t + i * (1 << shift); 1010 } else { 1011 qp->is_direct = 0; 1012 npages = size / PAGE_SIZE; 1013 shift = PAGE_SHIFT; 1014 1015 if (0) 1016 mthca_dbg(dev, "Creating indirect QP with %d pages\n", npages); 1017 1018 dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); 1019 if (!dma_list) 1020 goto err_out; 1021 1022 qp->queue.page_list = kmalloc(npages * 1023 sizeof *qp->queue.page_list, 1024 GFP_KERNEL); 1025 if (!qp->queue.page_list) 1026 goto err_out; 1027 1028 for (i = 0; i < npages; ++i) { 1029 qp->queue.page_list[i].buf = 1030 dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, 1031 &t, GFP_KERNEL); 1032 if (!qp->queue.page_list[i].buf) 1033 goto err_out_free; 1034 1035 memset(qp->queue.page_list[i].buf, 0, PAGE_SIZE); 1036 1037 pci_unmap_addr_set(&qp->queue.page_list[i], mapping, t); 1038 dma_list[i] = t; 1039 } 1040 } 1041 1042 err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift, 1043 npages, 0, size, 1044 MTHCA_MPT_FLAG_LOCAL_READ, 1045 &qp->mr); 1046 if (err) 1047 goto err_out_free; 1048 1049 kfree(dma_list); 1050 return 0; 1051 1052 err_out_free: 1053 if (qp->is_direct) { 1054 dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf, 1055 pci_unmap_addr(&qp->queue.direct, mapping)); 1056 } else 1057 for (i = 0; i < npages; ++i) { 1058 if (qp->queue.page_list[i].buf) 1059 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, 1060 qp->queue.page_list[i].buf, 1061 pci_unmap_addr(&qp->queue.page_list[i], 1062 mapping)); 1063 1064 } 1065 1066 err_out: 1067 kfree(qp->wrid); 1068 kfree(dma_list); 1069 return err; 1070 } 1071 1072 static void mthca_free_wqe_buf(struct mthca_dev *dev, 1073 struct mthca_qp *qp) 1074 { 1075 int i; 1076 int size = PAGE_ALIGN(qp->send_wqe_offset + 1077 (qp->sq.max << qp->sq.wqe_shift)); 1078 1079 if (qp->is_direct) { 1080 dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf, 1081 pci_unmap_addr(&qp->queue.direct, mapping)); 1082 } else { 1083 for (i = 0; i < size / PAGE_SIZE; ++i) { 1084 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, 1085 qp->queue.page_list[i].buf, 1086 pci_unmap_addr(&qp->queue.page_list[i], 1087 mapping)); 1088 } 1089 } 1090 1091 kfree(qp->wrid); 1092 } 1093 1094 static int mthca_map_memfree(struct mthca_dev *dev, 1095 struct mthca_qp *qp) 1096 { 1097 int ret; 1098 1099 if (mthca_is_memfree(dev)) { 1100 ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn); 1101 if (ret) 1102 return ret; 1103 1104 ret = mthca_table_get(dev, dev->qp_table.eqp_table, qp->qpn); 1105 if (ret) 1106 goto err_qpc; 1107 1108 ret = mthca_table_get(dev, dev->qp_table.rdb_table, 1109 qp->qpn << dev->qp_table.rdb_shift); 1110 if (ret) 1111 goto err_eqpc; 1112 1113 } 1114 1115 return 0; 1116 1117 err_eqpc: 1118 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); 1119 1120 err_qpc: 1121 mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); 1122 1123 return ret; 1124 } 1125 1126 static void mthca_unmap_memfree(struct mthca_dev *dev, 1127 struct mthca_qp *qp) 1128 { 1129 mthca_table_put(dev, dev->qp_table.rdb_table, 1130 qp->qpn << dev->qp_table.rdb_shift); 1131 mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); 1132 mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); 1133 } 1134 1135 static int mthca_alloc_memfree(struct mthca_dev *dev, 1136 struct mthca_qp *qp) 1137 { 1138 int ret = 0; 1139 1140 if (mthca_is_memfree(dev)) { 1141 qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, 1142 qp->qpn, &qp->rq.db); 1143 if (qp->rq.db_index < 0) 1144 return ret; 1145 1146 qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, 1147 qp->qpn, &qp->sq.db); 1148 if (qp->sq.db_index < 0) 1149 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); 1150 } 1151 1152 return ret; 1153 } 1154 1155 static void mthca_free_memfree(struct mthca_dev *dev, 1156 struct mthca_qp *qp) 1157 { 1158 if (mthca_is_memfree(dev)) { 1159 mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index); 1160 mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); 1161 } 1162 } 1163 1164 static void mthca_wq_init(struct mthca_wq* wq) 1165 { 1166 spin_lock_init(&wq->lock); 1167 wq->next_ind = 0; 1168 wq->last_comp = wq->max - 1; 1169 wq->head = 0; 1170 wq->tail = 0; 1171 wq->last = NULL; 1172 } 1173 1174 static int mthca_alloc_qp_common(struct mthca_dev *dev, 1175 struct mthca_pd *pd, 1176 struct mthca_cq *send_cq, 1177 struct mthca_cq *recv_cq, 1178 enum ib_sig_type send_policy, 1179 struct mthca_qp *qp) 1180 { 1181 int ret; 1182 int i; 1183 1184 atomic_set(&qp->refcount, 1); 1185 qp->state = IB_QPS_RESET; 1186 qp->atomic_rd_en = 0; 1187 qp->resp_depth = 0; 1188 qp->sq_policy = send_policy; 1189 mthca_wq_init(&qp->sq); 1190 mthca_wq_init(&qp->rq); 1191 1192 ret = mthca_map_memfree(dev, qp); 1193 if (ret) 1194 return ret; 1195 1196 ret = mthca_alloc_wqe_buf(dev, pd, qp); 1197 if (ret) { 1198 mthca_unmap_memfree(dev, qp); 1199 return ret; 1200 } 1201 1202 /* 1203 * If this is a userspace QP, we're done now. The doorbells 1204 * will be allocated and buffers will be initialized in 1205 * userspace. 1206 */ 1207 if (pd->ibpd.uobject) 1208 return 0; 1209 1210 ret = mthca_alloc_memfree(dev, qp); 1211 if (ret) { 1212 mthca_free_wqe_buf(dev, qp); 1213 mthca_unmap_memfree(dev, qp); 1214 return ret; 1215 } 1216 1217 if (mthca_is_memfree(dev)) { 1218 struct mthca_next_seg *next; 1219 struct mthca_data_seg *scatter; 1220 int size = (sizeof (struct mthca_next_seg) + 1221 qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16; 1222 1223 for (i = 0; i < qp->rq.max; ++i) { 1224 next = get_recv_wqe(qp, i); 1225 next->nda_op = cpu_to_be32(((i + 1) & (qp->rq.max - 1)) << 1226 qp->rq.wqe_shift); 1227 next->ee_nds = cpu_to_be32(size); 1228 1229 for (scatter = (void *) (next + 1); 1230 (void *) scatter < (void *) next + (1 << qp->rq.wqe_shift); 1231 ++scatter) 1232 scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); 1233 } 1234 1235 for (i = 0; i < qp->sq.max; ++i) { 1236 next = get_send_wqe(qp, i); 1237 next->nda_op = cpu_to_be32((((i + 1) & (qp->sq.max - 1)) << 1238 qp->sq.wqe_shift) + 1239 qp->send_wqe_offset); 1240 } 1241 } 1242 1243 return 0; 1244 } 1245 1246 static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, 1247 struct mthca_qp *qp) 1248 { 1249 /* Sanity check QP size before proceeding */ 1250 if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || 1251 cap->max_send_sge > 64 || cap->max_recv_sge > 64) 1252 return -EINVAL; 1253 1254 if (mthca_is_memfree(dev)) { 1255 qp->rq.max = cap->max_recv_wr ? 1256 roundup_pow_of_two(cap->max_recv_wr) : 0; 1257 qp->sq.max = cap->max_send_wr ? 1258 roundup_pow_of_two(cap->max_send_wr) : 0; 1259 } else { 1260 qp->rq.max = cap->max_recv_wr; 1261 qp->sq.max = cap->max_send_wr; 1262 } 1263 1264 qp->rq.max_gs = cap->max_recv_sge; 1265 qp->sq.max_gs = max_t(int, cap->max_send_sge, 1266 ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE, 1267 MTHCA_INLINE_CHUNK_SIZE) / 1268 sizeof (struct mthca_data_seg)); 1269 1270 /* 1271 * For MLX transport we need 2 extra S/G entries: 1272 * one for the header and one for the checksum at the end 1273 */ 1274 if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) || 1275 qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg) 1276 return -EINVAL; 1277 1278 return 0; 1279 } 1280 1281 int mthca_alloc_qp(struct mthca_dev *dev, 1282 struct mthca_pd *pd, 1283 struct mthca_cq *send_cq, 1284 struct mthca_cq *recv_cq, 1285 enum ib_qp_type type, 1286 enum ib_sig_type send_policy, 1287 struct ib_qp_cap *cap, 1288 struct mthca_qp *qp) 1289 { 1290 int err; 1291 1292 err = mthca_set_qp_size(dev, cap, qp); 1293 if (err) 1294 return err; 1295 1296 switch (type) { 1297 case IB_QPT_RC: qp->transport = RC; break; 1298 case IB_QPT_UC: qp->transport = UC; break; 1299 case IB_QPT_UD: qp->transport = UD; break; 1300 default: return -EINVAL; 1301 } 1302 1303 qp->qpn = mthca_alloc(&dev->qp_table.alloc); 1304 if (qp->qpn == -1) 1305 return -ENOMEM; 1306 1307 err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, 1308 send_policy, qp); 1309 if (err) { 1310 mthca_free(&dev->qp_table.alloc, qp->qpn); 1311 return err; 1312 } 1313 1314 spin_lock_irq(&dev->qp_table.lock); 1315 mthca_array_set(&dev->qp_table.qp, 1316 qp->qpn & (dev->limits.num_qps - 1), qp); 1317 spin_unlock_irq(&dev->qp_table.lock); 1318 1319 return 0; 1320 } 1321 1322 int mthca_alloc_sqp(struct mthca_dev *dev, 1323 struct mthca_pd *pd, 1324 struct mthca_cq *send_cq, 1325 struct mthca_cq *recv_cq, 1326 enum ib_sig_type send_policy, 1327 struct ib_qp_cap *cap, 1328 int qpn, 1329 int port, 1330 struct mthca_sqp *sqp) 1331 { 1332 u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; 1333 int err; 1334 1335 err = mthca_set_qp_size(dev, cap, &sqp->qp); 1336 if (err) 1337 return err; 1338 1339 sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; 1340 sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, 1341 &sqp->header_dma, GFP_KERNEL); 1342 if (!sqp->header_buf) 1343 return -ENOMEM; 1344 1345 spin_lock_irq(&dev->qp_table.lock); 1346 if (mthca_array_get(&dev->qp_table.qp, mqpn)) 1347 err = -EBUSY; 1348 else 1349 mthca_array_set(&dev->qp_table.qp, mqpn, sqp); 1350 spin_unlock_irq(&dev->qp_table.lock); 1351 1352 if (err) 1353 goto err_out; 1354 1355 sqp->port = port; 1356 sqp->qp.qpn = mqpn; 1357 sqp->qp.transport = MLX; 1358 1359 err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, 1360 send_policy, &sqp->qp); 1361 if (err) 1362 goto err_out_free; 1363 1364 atomic_inc(&pd->sqp_count); 1365 1366 return 0; 1367 1368 err_out_free: 1369 /* 1370 * Lock CQs here, so that CQ polling code can do QP lookup 1371 * without taking a lock. 1372 */ 1373 spin_lock_irq(&send_cq->lock); 1374 if (send_cq != recv_cq) 1375 spin_lock(&recv_cq->lock); 1376 1377 spin_lock(&dev->qp_table.lock); 1378 mthca_array_clear(&dev->qp_table.qp, mqpn); 1379 spin_unlock(&dev->qp_table.lock); 1380 1381 if (send_cq != recv_cq) 1382 spin_unlock(&recv_cq->lock); 1383 spin_unlock_irq(&send_cq->lock); 1384 1385 err_out: 1386 dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size, 1387 sqp->header_buf, sqp->header_dma); 1388 1389 return err; 1390 } 1391 1392 void mthca_free_qp(struct mthca_dev *dev, 1393 struct mthca_qp *qp) 1394 { 1395 u8 status; 1396 struct mthca_cq *send_cq; 1397 struct mthca_cq *recv_cq; 1398 1399 send_cq = to_mcq(qp->ibqp.send_cq); 1400 recv_cq = to_mcq(qp->ibqp.recv_cq); 1401 1402 /* 1403 * Lock CQs here, so that CQ polling code can do QP lookup 1404 * without taking a lock. 1405 */ 1406 spin_lock_irq(&send_cq->lock); 1407 if (send_cq != recv_cq) 1408 spin_lock(&recv_cq->lock); 1409 1410 spin_lock(&dev->qp_table.lock); 1411 mthca_array_clear(&dev->qp_table.qp, 1412 qp->qpn & (dev->limits.num_qps - 1)); 1413 spin_unlock(&dev->qp_table.lock); 1414 1415 if (send_cq != recv_cq) 1416 spin_unlock(&recv_cq->lock); 1417 spin_unlock_irq(&send_cq->lock); 1418 1419 atomic_dec(&qp->refcount); 1420 wait_event(qp->wait, !atomic_read(&qp->refcount)); 1421 1422 if (qp->state != IB_QPS_RESET) 1423 mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); 1424 1425 /* 1426 * If this is a userspace QP, the buffers, MR, CQs and so on 1427 * will be cleaned up in userspace, so all we have to do is 1428 * unref the mem-free tables and free the QPN in our table. 1429 */ 1430 if (!qp->ibqp.uobject) { 1431 mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn); 1432 if (qp->ibqp.send_cq != qp->ibqp.recv_cq) 1433 mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); 1434 1435 mthca_free_mr(dev, &qp->mr); 1436 mthca_free_memfree(dev, qp); 1437 mthca_free_wqe_buf(dev, qp); 1438 } 1439 1440 mthca_unmap_memfree(dev, qp); 1441 1442 if (is_sqp(dev, qp)) { 1443 atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); 1444 dma_free_coherent(&dev->pdev->dev, 1445 to_msqp(qp)->header_buf_size, 1446 to_msqp(qp)->header_buf, 1447 to_msqp(qp)->header_dma); 1448 } else 1449 mthca_free(&dev->qp_table.alloc, qp->qpn); 1450 } 1451 1452 /* Create UD header for an MLX send and build a data segment for it */ 1453 static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, 1454 int ind, struct ib_send_wr *wr, 1455 struct mthca_mlx_seg *mlx, 1456 struct mthca_data_seg *data) 1457 { 1458 int header_size; 1459 int err; 1460 1461 ib_ud_header_init(256, /* assume a MAD */ 1462 sqp->ud_header.grh_present, 1463 &sqp->ud_header); 1464 1465 err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); 1466 if (err) 1467 return err; 1468 mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); 1469 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | 1470 (sqp->ud_header.lrh.destination_lid == 0xffff ? 1471 MTHCA_MLX_SLR : 0) | 1472 (sqp->ud_header.lrh.service_level << 8)); 1473 mlx->rlid = sqp->ud_header.lrh.destination_lid; 1474 mlx->vcrc = 0; 1475 1476 switch (wr->opcode) { 1477 case IB_WR_SEND: 1478 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; 1479 sqp->ud_header.immediate_present = 0; 1480 break; 1481 case IB_WR_SEND_WITH_IMM: 1482 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; 1483 sqp->ud_header.immediate_present = 1; 1484 sqp->ud_header.immediate_data = wr->imm_data; 1485 break; 1486 default: 1487 return -EINVAL; 1488 } 1489 1490 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; 1491 if (sqp->ud_header.lrh.destination_lid == 0xffff) 1492 sqp->ud_header.lrh.source_lid = 0xffff; 1493 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); 1494 if (!sqp->qp.ibqp.qp_num) 1495 ib_get_cached_pkey(&dev->ib_dev, sqp->port, 1496 sqp->pkey_index, 1497 &sqp->ud_header.bth.pkey); 1498 else 1499 ib_get_cached_pkey(&dev->ib_dev, sqp->port, 1500 wr->wr.ud.pkey_index, 1501 &sqp->ud_header.bth.pkey); 1502 cpu_to_be16s(&sqp->ud_header.bth.pkey); 1503 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); 1504 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); 1505 sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? 1506 sqp->qkey : wr->wr.ud.remote_qkey); 1507 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); 1508 1509 header_size = ib_ud_header_pack(&sqp->ud_header, 1510 sqp->header_buf + 1511 ind * MTHCA_UD_HEADER_SIZE); 1512 1513 data->byte_count = cpu_to_be32(header_size); 1514 data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey); 1515 data->addr = cpu_to_be64(sqp->header_dma + 1516 ind * MTHCA_UD_HEADER_SIZE); 1517 1518 return 0; 1519 } 1520 1521 static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq, 1522 struct ib_cq *ib_cq) 1523 { 1524 unsigned cur; 1525 struct mthca_cq *cq; 1526 1527 cur = wq->head - wq->tail; 1528 if (likely(cur + nreq < wq->max)) 1529 return 0; 1530 1531 cq = to_mcq(ib_cq); 1532 spin_lock(&cq->lock); 1533 cur = wq->head - wq->tail; 1534 spin_unlock(&cq->lock); 1535 1536 return cur + nreq >= wq->max; 1537 } 1538 1539 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1540 struct ib_send_wr **bad_wr) 1541 { 1542 struct mthca_dev *dev = to_mdev(ibqp->device); 1543 struct mthca_qp *qp = to_mqp(ibqp); 1544 void *wqe; 1545 void *prev_wqe; 1546 unsigned long flags; 1547 int err = 0; 1548 int nreq; 1549 int i; 1550 int size; 1551 int size0 = 0; 1552 u32 f0 = 0; 1553 int ind; 1554 u8 op0 = 0; 1555 1556 spin_lock_irqsave(&qp->sq.lock, flags); 1557 1558 /* XXX check that state is OK to post send */ 1559 1560 ind = qp->sq.next_ind; 1561 1562 for (nreq = 0; wr; ++nreq, wr = wr->next) { 1563 if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { 1564 mthca_err(dev, "SQ %06x full (%u head, %u tail," 1565 " %d max, %d nreq)\n", qp->qpn, 1566 qp->sq.head, qp->sq.tail, 1567 qp->sq.max, nreq); 1568 err = -ENOMEM; 1569 *bad_wr = wr; 1570 goto out; 1571 } 1572 1573 wqe = get_send_wqe(qp, ind); 1574 prev_wqe = qp->sq.last; 1575 qp->sq.last = wqe; 1576 1577 ((struct mthca_next_seg *) wqe)->nda_op = 0; 1578 ((struct mthca_next_seg *) wqe)->ee_nds = 0; 1579 ((struct mthca_next_seg *) wqe)->flags = 1580 ((wr->send_flags & IB_SEND_SIGNALED) ? 1581 cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) | 1582 ((wr->send_flags & IB_SEND_SOLICITED) ? 1583 cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) | 1584 cpu_to_be32(1); 1585 if (wr->opcode == IB_WR_SEND_WITH_IMM || 1586 wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) 1587 ((struct mthca_next_seg *) wqe)->imm = wr->imm_data; 1588 1589 wqe += sizeof (struct mthca_next_seg); 1590 size = sizeof (struct mthca_next_seg) / 16; 1591 1592 switch (qp->transport) { 1593 case RC: 1594 switch (wr->opcode) { 1595 case IB_WR_ATOMIC_CMP_AND_SWP: 1596 case IB_WR_ATOMIC_FETCH_AND_ADD: 1597 ((struct mthca_raddr_seg *) wqe)->raddr = 1598 cpu_to_be64(wr->wr.atomic.remote_addr); 1599 ((struct mthca_raddr_seg *) wqe)->rkey = 1600 cpu_to_be32(wr->wr.atomic.rkey); 1601 ((struct mthca_raddr_seg *) wqe)->reserved = 0; 1602 1603 wqe += sizeof (struct mthca_raddr_seg); 1604 1605 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 1606 ((struct mthca_atomic_seg *) wqe)->swap_add = 1607 cpu_to_be64(wr->wr.atomic.swap); 1608 ((struct mthca_atomic_seg *) wqe)->compare = 1609 cpu_to_be64(wr->wr.atomic.compare_add); 1610 } else { 1611 ((struct mthca_atomic_seg *) wqe)->swap_add = 1612 cpu_to_be64(wr->wr.atomic.compare_add); 1613 ((struct mthca_atomic_seg *) wqe)->compare = 0; 1614 } 1615 1616 wqe += sizeof (struct mthca_atomic_seg); 1617 size += sizeof (struct mthca_raddr_seg) / 16 + 1618 sizeof (struct mthca_atomic_seg); 1619 break; 1620 1621 case IB_WR_RDMA_WRITE: 1622 case IB_WR_RDMA_WRITE_WITH_IMM: 1623 case IB_WR_RDMA_READ: 1624 ((struct mthca_raddr_seg *) wqe)->raddr = 1625 cpu_to_be64(wr->wr.rdma.remote_addr); 1626 ((struct mthca_raddr_seg *) wqe)->rkey = 1627 cpu_to_be32(wr->wr.rdma.rkey); 1628 ((struct mthca_raddr_seg *) wqe)->reserved = 0; 1629 wqe += sizeof (struct mthca_raddr_seg); 1630 size += sizeof (struct mthca_raddr_seg) / 16; 1631 break; 1632 1633 default: 1634 /* No extra segments required for sends */ 1635 break; 1636 } 1637 1638 break; 1639 1640 case UC: 1641 switch (wr->opcode) { 1642 case IB_WR_RDMA_WRITE: 1643 case IB_WR_RDMA_WRITE_WITH_IMM: 1644 ((struct mthca_raddr_seg *) wqe)->raddr = 1645 cpu_to_be64(wr->wr.rdma.remote_addr); 1646 ((struct mthca_raddr_seg *) wqe)->rkey = 1647 cpu_to_be32(wr->wr.rdma.rkey); 1648 ((struct mthca_raddr_seg *) wqe)->reserved = 0; 1649 wqe += sizeof (struct mthca_raddr_seg); 1650 size += sizeof (struct mthca_raddr_seg) / 16; 1651 break; 1652 1653 default: 1654 /* No extra segments required for sends */ 1655 break; 1656 } 1657 1658 break; 1659 1660 case UD: 1661 ((struct mthca_tavor_ud_seg *) wqe)->lkey = 1662 cpu_to_be32(to_mah(wr->wr.ud.ah)->key); 1663 ((struct mthca_tavor_ud_seg *) wqe)->av_addr = 1664 cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma); 1665 ((struct mthca_tavor_ud_seg *) wqe)->dqpn = 1666 cpu_to_be32(wr->wr.ud.remote_qpn); 1667 ((struct mthca_tavor_ud_seg *) wqe)->qkey = 1668 cpu_to_be32(wr->wr.ud.remote_qkey); 1669 1670 wqe += sizeof (struct mthca_tavor_ud_seg); 1671 size += sizeof (struct mthca_tavor_ud_seg) / 16; 1672 break; 1673 1674 case MLX: 1675 err = build_mlx_header(dev, to_msqp(qp), ind, wr, 1676 wqe - sizeof (struct mthca_next_seg), 1677 wqe); 1678 if (err) { 1679 *bad_wr = wr; 1680 goto out; 1681 } 1682 wqe += sizeof (struct mthca_data_seg); 1683 size += sizeof (struct mthca_data_seg) / 16; 1684 break; 1685 } 1686 1687 if (wr->num_sge > qp->sq.max_gs) { 1688 mthca_err(dev, "too many gathers\n"); 1689 err = -EINVAL; 1690 *bad_wr = wr; 1691 goto out; 1692 } 1693 1694 for (i = 0; i < wr->num_sge; ++i) { 1695 ((struct mthca_data_seg *) wqe)->byte_count = 1696 cpu_to_be32(wr->sg_list[i].length); 1697 ((struct mthca_data_seg *) wqe)->lkey = 1698 cpu_to_be32(wr->sg_list[i].lkey); 1699 ((struct mthca_data_seg *) wqe)->addr = 1700 cpu_to_be64(wr->sg_list[i].addr); 1701 wqe += sizeof (struct mthca_data_seg); 1702 size += sizeof (struct mthca_data_seg) / 16; 1703 } 1704 1705 /* Add one more inline data segment for ICRC */ 1706 if (qp->transport == MLX) { 1707 ((struct mthca_data_seg *) wqe)->byte_count = 1708 cpu_to_be32((1 << 31) | 4); 1709 ((u32 *) wqe)[1] = 0; 1710 wqe += sizeof (struct mthca_data_seg); 1711 size += sizeof (struct mthca_data_seg) / 16; 1712 } 1713 1714 qp->wrid[ind + qp->rq.max] = wr->wr_id; 1715 1716 if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) { 1717 mthca_err(dev, "opcode invalid\n"); 1718 err = -EINVAL; 1719 *bad_wr = wr; 1720 goto out; 1721 } 1722 1723 if (prev_wqe) { 1724 ((struct mthca_next_seg *) prev_wqe)->nda_op = 1725 cpu_to_be32(((ind << qp->sq.wqe_shift) + 1726 qp->send_wqe_offset) | 1727 mthca_opcode[wr->opcode]); 1728 wmb(); 1729 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 1730 cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size); 1731 } 1732 1733 if (!size0) { 1734 size0 = size; 1735 op0 = mthca_opcode[wr->opcode]; 1736 } 1737 1738 ++ind; 1739 if (unlikely(ind >= qp->sq.max)) 1740 ind -= qp->sq.max; 1741 } 1742 1743 out: 1744 if (likely(nreq)) { 1745 u32 doorbell[2]; 1746 1747 doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) + 1748 qp->send_wqe_offset) | f0 | op0); 1749 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); 1750 1751 wmb(); 1752 1753 mthca_write64(doorbell, 1754 dev->kar + MTHCA_SEND_DOORBELL, 1755 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1756 } 1757 1758 qp->sq.next_ind = ind; 1759 qp->sq.head += nreq; 1760 1761 spin_unlock_irqrestore(&qp->sq.lock, flags); 1762 return err; 1763 } 1764 1765 int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 1766 struct ib_recv_wr **bad_wr) 1767 { 1768 struct mthca_dev *dev = to_mdev(ibqp->device); 1769 struct mthca_qp *qp = to_mqp(ibqp); 1770 unsigned long flags; 1771 int err = 0; 1772 int nreq; 1773 int i; 1774 int size; 1775 int size0 = 0; 1776 int ind; 1777 void *wqe; 1778 void *prev_wqe; 1779 1780 spin_lock_irqsave(&qp->rq.lock, flags); 1781 1782 /* XXX check that state is OK to post receive */ 1783 1784 ind = qp->rq.next_ind; 1785 1786 for (nreq = 0; wr; ++nreq, wr = wr->next) { 1787 if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { 1788 mthca_err(dev, "RQ %06x full (%u head, %u tail," 1789 " %d max, %d nreq)\n", qp->qpn, 1790 qp->rq.head, qp->rq.tail, 1791 qp->rq.max, nreq); 1792 err = -ENOMEM; 1793 *bad_wr = wr; 1794 goto out; 1795 } 1796 1797 wqe = get_recv_wqe(qp, ind); 1798 prev_wqe = qp->rq.last; 1799 qp->rq.last = wqe; 1800 1801 ((struct mthca_next_seg *) wqe)->nda_op = 0; 1802 ((struct mthca_next_seg *) wqe)->ee_nds = 1803 cpu_to_be32(MTHCA_NEXT_DBD); 1804 ((struct mthca_next_seg *) wqe)->flags = 0; 1805 1806 wqe += sizeof (struct mthca_next_seg); 1807 size = sizeof (struct mthca_next_seg) / 16; 1808 1809 if (unlikely(wr->num_sge > qp->rq.max_gs)) { 1810 err = -EINVAL; 1811 *bad_wr = wr; 1812 goto out; 1813 } 1814 1815 for (i = 0; i < wr->num_sge; ++i) { 1816 ((struct mthca_data_seg *) wqe)->byte_count = 1817 cpu_to_be32(wr->sg_list[i].length); 1818 ((struct mthca_data_seg *) wqe)->lkey = 1819 cpu_to_be32(wr->sg_list[i].lkey); 1820 ((struct mthca_data_seg *) wqe)->addr = 1821 cpu_to_be64(wr->sg_list[i].addr); 1822 wqe += sizeof (struct mthca_data_seg); 1823 size += sizeof (struct mthca_data_seg) / 16; 1824 } 1825 1826 qp->wrid[ind] = wr->wr_id; 1827 1828 if (likely(prev_wqe)) { 1829 ((struct mthca_next_seg *) prev_wqe)->nda_op = 1830 cpu_to_be32((ind << qp->rq.wqe_shift) | 1); 1831 wmb(); 1832 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 1833 cpu_to_be32(MTHCA_NEXT_DBD | size); 1834 } 1835 1836 if (!size0) 1837 size0 = size; 1838 1839 ++ind; 1840 if (unlikely(ind >= qp->rq.max)) 1841 ind -= qp->rq.max; 1842 } 1843 1844 out: 1845 if (likely(nreq)) { 1846 u32 doorbell[2]; 1847 1848 doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); 1849 doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq); 1850 1851 wmb(); 1852 1853 mthca_write64(doorbell, 1854 dev->kar + MTHCA_RECEIVE_DOORBELL, 1855 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 1856 } 1857 1858 qp->rq.next_ind = ind; 1859 qp->rq.head += nreq; 1860 1861 spin_unlock_irqrestore(&qp->rq.lock, flags); 1862 return err; 1863 } 1864 1865 int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1866 struct ib_send_wr **bad_wr) 1867 { 1868 struct mthca_dev *dev = to_mdev(ibqp->device); 1869 struct mthca_qp *qp = to_mqp(ibqp); 1870 void *wqe; 1871 void *prev_wqe; 1872 unsigned long flags; 1873 int err = 0; 1874 int nreq; 1875 int i; 1876 int size; 1877 int size0 = 0; 1878 u32 f0 = 0; 1879 int ind; 1880 u8 op0 = 0; 1881 1882 spin_lock_irqsave(&qp->sq.lock, flags); 1883 1884 /* XXX check that state is OK to post send */ 1885 1886 ind = qp->sq.head & (qp->sq.max - 1); 1887 1888 for (nreq = 0; wr; ++nreq, wr = wr->next) { 1889 if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { 1890 mthca_err(dev, "SQ %06x full (%u head, %u tail," 1891 " %d max, %d nreq)\n", qp->qpn, 1892 qp->sq.head, qp->sq.tail, 1893 qp->sq.max, nreq); 1894 err = -ENOMEM; 1895 *bad_wr = wr; 1896 goto out; 1897 } 1898 1899 wqe = get_send_wqe(qp, ind); 1900 prev_wqe = qp->sq.last; 1901 qp->sq.last = wqe; 1902 1903 ((struct mthca_next_seg *) wqe)->flags = 1904 ((wr->send_flags & IB_SEND_SIGNALED) ? 1905 cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) | 1906 ((wr->send_flags & IB_SEND_SOLICITED) ? 1907 cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0) | 1908 cpu_to_be32(1); 1909 if (wr->opcode == IB_WR_SEND_WITH_IMM || 1910 wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) 1911 ((struct mthca_next_seg *) wqe)->imm = wr->imm_data; 1912 1913 wqe += sizeof (struct mthca_next_seg); 1914 size = sizeof (struct mthca_next_seg) / 16; 1915 1916 switch (qp->transport) { 1917 case RC: 1918 switch (wr->opcode) { 1919 case IB_WR_ATOMIC_CMP_AND_SWP: 1920 case IB_WR_ATOMIC_FETCH_AND_ADD: 1921 ((struct mthca_raddr_seg *) wqe)->raddr = 1922 cpu_to_be64(wr->wr.atomic.remote_addr); 1923 ((struct mthca_raddr_seg *) wqe)->rkey = 1924 cpu_to_be32(wr->wr.atomic.rkey); 1925 ((struct mthca_raddr_seg *) wqe)->reserved = 0; 1926 1927 wqe += sizeof (struct mthca_raddr_seg); 1928 1929 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 1930 ((struct mthca_atomic_seg *) wqe)->swap_add = 1931 cpu_to_be64(wr->wr.atomic.swap); 1932 ((struct mthca_atomic_seg *) wqe)->compare = 1933 cpu_to_be64(wr->wr.atomic.compare_add); 1934 } else { 1935 ((struct mthca_atomic_seg *) wqe)->swap_add = 1936 cpu_to_be64(wr->wr.atomic.compare_add); 1937 ((struct mthca_atomic_seg *) wqe)->compare = 0; 1938 } 1939 1940 wqe += sizeof (struct mthca_atomic_seg); 1941 size += sizeof (struct mthca_raddr_seg) / 16 + 1942 sizeof (struct mthca_atomic_seg); 1943 break; 1944 1945 case IB_WR_RDMA_READ: 1946 case IB_WR_RDMA_WRITE: 1947 case IB_WR_RDMA_WRITE_WITH_IMM: 1948 ((struct mthca_raddr_seg *) wqe)->raddr = 1949 cpu_to_be64(wr->wr.rdma.remote_addr); 1950 ((struct mthca_raddr_seg *) wqe)->rkey = 1951 cpu_to_be32(wr->wr.rdma.rkey); 1952 ((struct mthca_raddr_seg *) wqe)->reserved = 0; 1953 wqe += sizeof (struct mthca_raddr_seg); 1954 size += sizeof (struct mthca_raddr_seg) / 16; 1955 break; 1956 1957 default: 1958 /* No extra segments required for sends */ 1959 break; 1960 } 1961 1962 break; 1963 1964 case UC: 1965 switch (wr->opcode) { 1966 case IB_WR_RDMA_WRITE: 1967 case IB_WR_RDMA_WRITE_WITH_IMM: 1968 ((struct mthca_raddr_seg *) wqe)->raddr = 1969 cpu_to_be64(wr->wr.rdma.remote_addr); 1970 ((struct mthca_raddr_seg *) wqe)->rkey = 1971 cpu_to_be32(wr->wr.rdma.rkey); 1972 ((struct mthca_raddr_seg *) wqe)->reserved = 0; 1973 wqe += sizeof (struct mthca_raddr_seg); 1974 size += sizeof (struct mthca_raddr_seg) / 16; 1975 break; 1976 1977 default: 1978 /* No extra segments required for sends */ 1979 break; 1980 } 1981 1982 break; 1983 1984 case UD: 1985 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av, 1986 to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE); 1987 ((struct mthca_arbel_ud_seg *) wqe)->dqpn = 1988 cpu_to_be32(wr->wr.ud.remote_qpn); 1989 ((struct mthca_arbel_ud_seg *) wqe)->qkey = 1990 cpu_to_be32(wr->wr.ud.remote_qkey); 1991 1992 wqe += sizeof (struct mthca_arbel_ud_seg); 1993 size += sizeof (struct mthca_arbel_ud_seg) / 16; 1994 break; 1995 1996 case MLX: 1997 err = build_mlx_header(dev, to_msqp(qp), ind, wr, 1998 wqe - sizeof (struct mthca_next_seg), 1999 wqe); 2000 if (err) { 2001 *bad_wr = wr; 2002 goto out; 2003 } 2004 wqe += sizeof (struct mthca_data_seg); 2005 size += sizeof (struct mthca_data_seg) / 16; 2006 break; 2007 } 2008 2009 if (wr->num_sge > qp->sq.max_gs) { 2010 mthca_err(dev, "too many gathers\n"); 2011 err = -EINVAL; 2012 *bad_wr = wr; 2013 goto out; 2014 } 2015 2016 for (i = 0; i < wr->num_sge; ++i) { 2017 ((struct mthca_data_seg *) wqe)->byte_count = 2018 cpu_to_be32(wr->sg_list[i].length); 2019 ((struct mthca_data_seg *) wqe)->lkey = 2020 cpu_to_be32(wr->sg_list[i].lkey); 2021 ((struct mthca_data_seg *) wqe)->addr = 2022 cpu_to_be64(wr->sg_list[i].addr); 2023 wqe += sizeof (struct mthca_data_seg); 2024 size += sizeof (struct mthca_data_seg) / 16; 2025 } 2026 2027 /* Add one more inline data segment for ICRC */ 2028 if (qp->transport == MLX) { 2029 ((struct mthca_data_seg *) wqe)->byte_count = 2030 cpu_to_be32((1 << 31) | 4); 2031 ((u32 *) wqe)[1] = 0; 2032 wqe += sizeof (struct mthca_data_seg); 2033 size += sizeof (struct mthca_data_seg) / 16; 2034 } 2035 2036 qp->wrid[ind + qp->rq.max] = wr->wr_id; 2037 2038 if (wr->opcode >= ARRAY_SIZE(mthca_opcode)) { 2039 mthca_err(dev, "opcode invalid\n"); 2040 err = -EINVAL; 2041 *bad_wr = wr; 2042 goto out; 2043 } 2044 2045 if (likely(prev_wqe)) { 2046 ((struct mthca_next_seg *) prev_wqe)->nda_op = 2047 cpu_to_be32(((ind << qp->sq.wqe_shift) + 2048 qp->send_wqe_offset) | 2049 mthca_opcode[wr->opcode]); 2050 wmb(); 2051 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 2052 cpu_to_be32(MTHCA_NEXT_DBD | size); 2053 } 2054 2055 if (!size0) { 2056 size0 = size; 2057 op0 = mthca_opcode[wr->opcode]; 2058 } 2059 2060 ++ind; 2061 if (unlikely(ind >= qp->sq.max)) 2062 ind -= qp->sq.max; 2063 } 2064 2065 out: 2066 if (likely(nreq)) { 2067 u32 doorbell[2]; 2068 2069 doorbell[0] = cpu_to_be32((nreq << 24) | 2070 ((qp->sq.head & 0xffff) << 8) | 2071 f0 | op0); 2072 doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); 2073 2074 qp->sq.head += nreq; 2075 2076 /* 2077 * Make sure that descriptors are written before 2078 * doorbell record. 2079 */ 2080 wmb(); 2081 *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff); 2082 2083 /* 2084 * Make sure doorbell record is written before we 2085 * write MMIO send doorbell. 2086 */ 2087 wmb(); 2088 mthca_write64(doorbell, 2089 dev->kar + MTHCA_SEND_DOORBELL, 2090 MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); 2091 } 2092 2093 spin_unlock_irqrestore(&qp->sq.lock, flags); 2094 return err; 2095 } 2096 2097 int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 2098 struct ib_recv_wr **bad_wr) 2099 { 2100 struct mthca_dev *dev = to_mdev(ibqp->device); 2101 struct mthca_qp *qp = to_mqp(ibqp); 2102 unsigned long flags; 2103 int err = 0; 2104 int nreq; 2105 int ind; 2106 int i; 2107 void *wqe; 2108 2109 spin_lock_irqsave(&qp->rq.lock, flags); 2110 2111 /* XXX check that state is OK to post receive */ 2112 2113 ind = qp->rq.head & (qp->rq.max - 1); 2114 2115 for (nreq = 0; wr; ++nreq, wr = wr->next) { 2116 if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { 2117 mthca_err(dev, "RQ %06x full (%u head, %u tail," 2118 " %d max, %d nreq)\n", qp->qpn, 2119 qp->rq.head, qp->rq.tail, 2120 qp->rq.max, nreq); 2121 err = -ENOMEM; 2122 *bad_wr = wr; 2123 goto out; 2124 } 2125 2126 wqe = get_recv_wqe(qp, ind); 2127 2128 ((struct mthca_next_seg *) wqe)->flags = 0; 2129 2130 wqe += sizeof (struct mthca_next_seg); 2131 2132 if (unlikely(wr->num_sge > qp->rq.max_gs)) { 2133 err = -EINVAL; 2134 *bad_wr = wr; 2135 goto out; 2136 } 2137 2138 for (i = 0; i < wr->num_sge; ++i) { 2139 ((struct mthca_data_seg *) wqe)->byte_count = 2140 cpu_to_be32(wr->sg_list[i].length); 2141 ((struct mthca_data_seg *) wqe)->lkey = 2142 cpu_to_be32(wr->sg_list[i].lkey); 2143 ((struct mthca_data_seg *) wqe)->addr = 2144 cpu_to_be64(wr->sg_list[i].addr); 2145 wqe += sizeof (struct mthca_data_seg); 2146 } 2147 2148 if (i < qp->rq.max_gs) { 2149 ((struct mthca_data_seg *) wqe)->byte_count = 0; 2150 ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); 2151 ((struct mthca_data_seg *) wqe)->addr = 0; 2152 } 2153 2154 qp->wrid[ind] = wr->wr_id; 2155 2156 ++ind; 2157 if (unlikely(ind >= qp->rq.max)) 2158 ind -= qp->rq.max; 2159 } 2160 out: 2161 if (likely(nreq)) { 2162 qp->rq.head += nreq; 2163 2164 /* 2165 * Make sure that descriptors are written before 2166 * doorbell record. 2167 */ 2168 wmb(); 2169 *qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff); 2170 } 2171 2172 spin_unlock_irqrestore(&qp->rq.lock, flags); 2173 return err; 2174 } 2175 2176 int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, 2177 int index, int *dbd, u32 *new_wqe) 2178 { 2179 struct mthca_next_seg *next; 2180 2181 if (is_send) 2182 next = get_send_wqe(qp, index); 2183 else 2184 next = get_recv_wqe(qp, index); 2185 2186 if (mthca_is_memfree(dev)) 2187 *dbd = 1; 2188 else 2189 *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD)); 2190 if (next->ee_nds & cpu_to_be32(0x3f)) 2191 *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) | 2192 (next->ee_nds & cpu_to_be32(0x3f)); 2193 else 2194 *new_wqe = 0; 2195 2196 return 0; 2197 } 2198 2199 int __devinit mthca_init_qp_table(struct mthca_dev *dev) 2200 { 2201 int err; 2202 u8 status; 2203 int i; 2204 2205 spin_lock_init(&dev->qp_table.lock); 2206 2207 /* 2208 * We reserve 2 extra QPs per port for the special QPs. The 2209 * special QP for port 1 has to be even, so round up. 2210 */ 2211 dev->qp_table.sqp_start = (dev->limits.reserved_qps + 1) & ~1UL; 2212 err = mthca_alloc_init(&dev->qp_table.alloc, 2213 dev->limits.num_qps, 2214 (1 << 24) - 1, 2215 dev->qp_table.sqp_start + 2216 MTHCA_MAX_PORTS * 2); 2217 if (err) 2218 return err; 2219 2220 err = mthca_array_init(&dev->qp_table.qp, 2221 dev->limits.num_qps); 2222 if (err) { 2223 mthca_alloc_cleanup(&dev->qp_table.alloc); 2224 return err; 2225 } 2226 2227 for (i = 0; i < 2; ++i) { 2228 err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI, 2229 dev->qp_table.sqp_start + i * 2, 2230 &status); 2231 if (err) 2232 goto err_out; 2233 if (status) { 2234 mthca_warn(dev, "CONF_SPECIAL_QP returned " 2235 "status %02x, aborting.\n", 2236 status); 2237 err = -EINVAL; 2238 goto err_out; 2239 } 2240 } 2241 return 0; 2242 2243 err_out: 2244 for (i = 0; i < 2; ++i) 2245 mthca_CONF_SPECIAL_QP(dev, i, 0, &status); 2246 2247 mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps); 2248 mthca_alloc_cleanup(&dev->qp_table.alloc); 2249 2250 return err; 2251 } 2252 2253 void __devexit mthca_cleanup_qp_table(struct mthca_dev *dev) 2254 { 2255 int i; 2256 u8 status; 2257 2258 for (i = 0; i < 2; ++i) 2259 mthca_CONF_SPECIAL_QP(dev, i, 0, &status); 2260 2261 mthca_alloc_cleanup(&dev->qp_table.alloc); 2262 } 2263