1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_srq.c 29 * Tavor Shared Receive Queue Processing Routines 30 * 31 * Implements all the routines necessary for allocating, freeing, querying, 32 * modifying and posting shared receive queues. 33 */ 34 35 #include <sys/sysmacros.h> 36 #include <sys/types.h> 37 #include <sys/conf.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/modctl.h> 41 #include <sys/bitmap.h> 42 43 #include <sys/ib/adapters/tavor/tavor.h> 44 45 static void tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl, 46 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl); 47 48 /* 49 * tavor_srq_alloc() 50 * Context: Can be called only from user or kernel context. 51 */ 52 int 53 tavor_srq_alloc(tavor_state_t *state, tavor_srq_info_t *srqinfo, 54 uint_t sleepflag, tavor_srq_options_t *op) 55 { 56 ibt_srq_hdl_t ibt_srqhdl; 57 tavor_pdhdl_t pd; 58 ibt_srq_sizes_t *sizes; 59 ibt_srq_sizes_t *real_sizes; 60 tavor_srqhdl_t *srqhdl; 61 ibt_srq_flags_t flags; 62 tavor_rsrc_t *srqc, *rsrc; 63 tavor_hw_srqc_t srqc_entry; 64 uint32_t *buf; 65 tavor_srqhdl_t srq; 66 tavor_umap_db_entry_t *umapdb; 67 ibt_mr_attr_t mr_attr; 68 tavor_mr_options_t mr_op; 69 tavor_mrhdl_t mr; 70 uint64_t addr; 71 uint64_t value, srq_desc_off; 72 uint32_t lkey; 73 uint32_t log_srq_size; 74 uint32_t uarpg; 75 uint_t wq_location, dma_xfer_mode, srq_is_umap; 76 int flag, status; 77 char *errormsg; 78 uint_t max_sgl; 79 uint_t wqesz; 80 81 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes)) 82 83 TAVOR_TNF_ENTER(tavor_srq_alloc); 84 85 /* 86 * Check the "options" flag. Currently this flag tells the driver 87 * whether or not the SRQ's work queues should be come from normal 88 * system memory or whether they should be allocated from DDR memory. 89 */ 90 if (op == NULL) { 91 wq_location = TAVOR_QUEUE_LOCATION_NORMAL; 92 } else { 93 wq_location = op->srqo_wq_loc; 94 } 95 96 /* 97 * Extract the necessary info from the tavor_srq_info_t structure 98 */ 99 real_sizes = srqinfo->srqi_real_sizes; 100 sizes = srqinfo->srqi_sizes; 101 pd = srqinfo->srqi_pd; 102 ibt_srqhdl = srqinfo->srqi_ibt_srqhdl; 103 flags = srqinfo->srqi_flags; 104 srqhdl = srqinfo->srqi_srqhdl; 105 106 /* 107 * Determine whether SRQ is being allocated for userland access or 108 * whether it is being allocated for kernel access. If the SRQ is 109 * being allocated for userland access, then lookup the UAR doorbell 110 * page number for the current process. Note: If this is not found 111 * (e.g. if the process has not previously open()'d the Tavor driver), 112 * then an error is returned. 113 */ 114 srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0; 115 if (srq_is_umap) { 116 status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(), 117 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); 118 if (status != DDI_SUCCESS) { 119 /* Set "status" and "errormsg" and goto failure */ 120 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "failed UAR page"); 121 goto srqalloc_fail3; 122 } 123 uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx; 124 } 125 126 /* Increase PD refcnt */ 127 tavor_pd_refcnt_inc(pd); 128 129 /* Allocate an SRQ context entry */ 130 status = tavor_rsrc_alloc(state, TAVOR_SRQC, 1, sleepflag, &srqc); 131 if (status != DDI_SUCCESS) { 132 /* Set "status" and "errormsg" and goto failure */ 133 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ context"); 134 goto srqalloc_fail1; 135 } 136 137 /* Allocate the SRQ Handle entry */ 138 status = tavor_rsrc_alloc(state, TAVOR_SRQHDL, 1, sleepflag, &rsrc); 139 if (status != DDI_SUCCESS) { 140 /* Set "status" and "errormsg" and goto failure */ 141 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ handle"); 142 goto srqalloc_fail2; 143 } 144 145 srq = (tavor_srqhdl_t)rsrc->tr_addr; 146 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq)) 147 148 srq->srq_srqnum = srqc->tr_indx; /* just use index */ 149 150 /* 151 * If this will be a user-mappable SRQ, then allocate an entry for 152 * the "userland resources database". This will later be added to 153 * the database (after all further SRQ operations are successful). 154 * If we fail here, we must undo the reference counts and the 155 * previous resource allocation. 156 */ 157 if (srq_is_umap) { 158 umapdb = tavor_umap_db_alloc(state->ts_instance, 159 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, 160 (uint64_t)(uintptr_t)rsrc); 161 if (umapdb == NULL) { 162 /* Set "status" and "errormsg" and goto failure */ 163 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 164 goto srqalloc_fail3; 165 } 166 } 167 168 /* 169 * Calculate the appropriate size for the SRQ. 170 * Note: All Tavor SRQs must be a power-of-2 in size. Also 171 * they may not be any smaller than TAVOR_SRQ_MIN_SIZE. This step 172 * is to round the requested size up to the next highest power-of-2 173 */ 174 sizes->srq_wr_sz = max(sizes->srq_wr_sz, TAVOR_SRQ_MIN_SIZE); 175 log_srq_size = highbit(sizes->srq_wr_sz); 176 if (ISP2(sizes->srq_wr_sz)) { 177 log_srq_size = log_srq_size - 1; 178 } 179 180 /* 181 * Next we verify that the rounded-up size is valid (i.e. consistent 182 * with the device limits and/or software-configured limits). If not, 183 * then obviously we have a lot of cleanup to do before returning. 184 */ 185 if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) { 186 /* Set "status" and "errormsg" and goto failure */ 187 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size"); 188 goto srqalloc_fail4; 189 } 190 191 /* 192 * Next we verify that the requested number of SGL is valid (i.e. 193 * consistent with the device limits and/or software-configured 194 * limits). If not, then obviously the same cleanup needs to be done. 195 */ 196 max_sgl = state->ts_cfg_profile->cp_srq_max_sgl; 197 if (sizes->srq_sgl_sz > max_sgl) { 198 /* Set "status" and "errormsg" and goto failure */ 199 TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max SRQ SGL"); 200 goto srqalloc_fail4; 201 } 202 203 /* 204 * Determine the SRQ's WQE sizes. This depends on the requested 205 * number of SGLs. Note: This also has the side-effect of 206 * calculating the real number of SGLs (for the calculated WQE size) 207 */ 208 tavor_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz, 209 TAVOR_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz, 210 &srq->srq_wq_sgl); 211 212 /* 213 * Allocate the memory for SRQ work queues. Note: The location from 214 * which we will allocate these work queues has been passed in through 215 * the tavor_qp_options_t structure. Since Tavor work queues are not 216 * allowed to cross a 32-bit (4GB) boundary, the alignment of the work 217 * queue memory is very important. We used to allocate work queues 218 * (the combined receive and send queues) so that they would be aligned 219 * on their combined size. That alignment guaranteed that they would 220 * never cross the 4GB boundary (Tavor work queues are on the order of 221 * MBs at maximum). Now we are able to relax this alignment constraint 222 * by ensuring that the IB address assigned to the queue memory (as a 223 * result of the tavor_mr_register() call) is offset from zero. 224 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to 225 * guarantee the alignment, but when attempting to use IOMMU bypass 226 * mode we found that we were not allowed to specify any alignment that 227 * was more restrictive than the system page size. So we avoided this 228 * constraint by passing two alignment values, one for the memory 229 * allocation itself and the other for the DMA handle (for later bind). 230 * This used to cause more memory than necessary to be allocated (in 231 * order to guarantee the more restrictive alignment contraint). But 232 * be guaranteeing the zero-based IB virtual address for the queue, we 233 * are able to conserve this memory. 234 * 235 * Note: If SRQ is not user-mappable, then it may come from either 236 * kernel system memory or from HCA-attached local DDR memory. 237 * 238 * Note2: We align this queue on a pagesize boundary. This is required 239 * to make sure that all the resulting IB addresses will start at 0, for 240 * a zero-based queue. By making sure we are aligned on at least a 241 * page, any offset we use into our queue will be the same as when we 242 * perform tavor_srq_modify() operations later. 243 */ 244 wqesz = (1 << srq->srq_wq_log_wqesz); 245 srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz; 246 srq->srq_wqinfo.qa_alloc_align = PAGESIZE; 247 srq->srq_wqinfo.qa_bind_align = PAGESIZE; 248 if (srq_is_umap) { 249 srq->srq_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND; 250 } else { 251 srq->srq_wqinfo.qa_location = wq_location; 252 } 253 status = tavor_queue_alloc(state, &srq->srq_wqinfo, sleepflag); 254 if (status != DDI_SUCCESS) { 255 /* Set "status" and "errormsg" and goto failure */ 256 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq"); 257 goto srqalloc_fail4; 258 } 259 buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned; 260 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 261 262 /* 263 * Register the memory for the SRQ work queues. The memory for the SRQ 264 * must be registered in the Tavor TPT tables. This gives us the LKey 265 * to specify in the SRQ context later. Note: If the work queue is to 266 * be allocated from DDR memory, then only a "bypass" mapping is 267 * appropriate. And if the SRQ memory is user-mappable, then we force 268 * DDI_DMA_CONSISTENT mapping. Also, in order to meet the alignment 269 * restriction, we pass the "mro_bind_override_addr" flag in the call 270 * to tavor_mr_register(). This guarantees that the resulting IB vaddr 271 * will be zero-based (modulo the offset into the first page). If we 272 * fail here, we still have the bunch of resource and reference count 273 * cleanup to do. 274 */ 275 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : 276 IBT_MR_NOSLEEP; 277 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf; 278 mr_attr.mr_len = srq->srq_wqinfo.qa_size; 279 mr_attr.mr_as = NULL; 280 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE; 281 if (srq_is_umap) { 282 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass; 283 } else { 284 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) { 285 mr_op.mro_bind_type = 286 state->ts_cfg_profile->cp_iommu_bypass; 287 dma_xfer_mode = 288 state->ts_cfg_profile->cp_streaming_consistent; 289 if (dma_xfer_mode == DDI_DMA_STREAMING) { 290 mr_attr.mr_flags |= IBT_MR_NONCOHERENT; 291 } 292 } else { 293 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS; 294 } 295 } 296 mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl; 297 mr_op.mro_bind_override_addr = 1; 298 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op); 299 if (status != DDI_SUCCESS) { 300 /* Set "status" and "errormsg" and goto failure */ 301 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr"); 302 goto srqalloc_fail5; 303 } 304 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 305 addr = mr->mr_bindinfo.bi_addr; 306 lkey = mr->mr_lkey; 307 308 /* 309 * Calculate the offset between the kernel virtual address space 310 * and the IB virtual address space. This will be used when 311 * posting work requests to properly initialize each WQE. 312 */ 313 srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned - 314 (uint64_t)mr->mr_bindinfo.bi_addr; 315 316 /* 317 * Create WQL and Wridlist for use by this SRQ 318 */ 319 srq->srq_wrid_wql = tavor_wrid_wql_create(state); 320 if (srq->srq_wrid_wql == NULL) { 321 /* Set "status" and "errormsg" and goto failure */ 322 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wql create"); 323 goto srqalloc_fail6; 324 } 325 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wrid_wql))) 326 327 srq->srq_wridlist = tavor_wrid_get_list(1 << log_srq_size); 328 if (srq->srq_wridlist == NULL) { 329 /* Set "status" and "errormsg" and goto failure */ 330 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wridlist create"); 331 goto srqalloc_fail7; 332 } 333 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wridlist))) 334 335 srq->srq_wridlist->wl_srq_en = 1; 336 srq->srq_wridlist->wl_free_list_indx = -1; 337 338 /* 339 * Fill in all the return arguments (if necessary). This includes 340 * real queue size and real SGLs. 341 */ 342 if (real_sizes != NULL) { 343 real_sizes->srq_wr_sz = (1 << log_srq_size); 344 real_sizes->srq_sgl_sz = srq->srq_wq_sgl; 345 } 346 347 /* 348 * Fill in the SRQC entry. This is the final step before passing 349 * ownership of the SRQC entry to the Tavor hardware. We use all of 350 * the information collected/calculated above to fill in the 351 * requisite portions of the SRQC. Note: If this SRQ is going to be 352 * used for userland access, then we need to set the UAR page number 353 * appropriately (otherwise it's a "don't care") 354 */ 355 bzero(&srqc_entry, sizeof (tavor_hw_srqc_t)); 356 srqc_entry.wqe_addr_h = (addr >> 32); 357 srqc_entry.next_wqe_addr_l = 0; 358 srqc_entry.ds = (wqesz >> 4); 359 srqc_entry.state = TAVOR_SRQ_STATE_HW_OWNER; 360 srqc_entry.pd = pd->pd_pdnum; 361 srqc_entry.lkey = lkey; 362 srqc_entry.wqe_cnt = 0; 363 if (srq_is_umap) { 364 srqc_entry.uar = uarpg; 365 } else { 366 srqc_entry.uar = 0; 367 } 368 369 /* 370 * Write the SRQC entry to hardware. Lastly, we pass ownership of 371 * the entry to the hardware (using the Tavor SW2HW_SRQ firmware 372 * command). Note: In general, this operation shouldn't fail. But 373 * if it does, we have to undo everything we've done above before 374 * returning error. 375 */ 376 status = tavor_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry, 377 sizeof (tavor_hw_srqc_t), srq->srq_srqnum, 378 sleepflag); 379 if (status != TAVOR_CMD_SUCCESS) { 380 cmn_err(CE_CONT, "Tavor: SW2HW_SRQ command failed: %08x\n", 381 status); 382 TNF_PROBE_1(tavor_srq_alloc_sw2hw_srq_cmd_fail, 383 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 384 /* Set "status" and "errormsg" and goto failure */ 385 TAVOR_TNF_FAIL(IBT_FAILURE, "tavor SW2HW_SRQ command"); 386 goto srqalloc_fail8; 387 } 388 389 /* 390 * Fill in the rest of the Tavor SRQ handle. We can update 391 * the following fields for use in further operations on the SRQ. 392 */ 393 srq->srq_srqcrsrcp = srqc; 394 srq->srq_rsrcp = rsrc; 395 srq->srq_mrhdl = mr; 396 srq->srq_refcnt = 0; 397 srq->srq_is_umap = srq_is_umap; 398 srq->srq_uarpg = (srq->srq_is_umap) ? uarpg : 0; 399 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 400 srq->srq_pdhdl = pd; 401 srq->srq_wq_lastwqeindx = -1; 402 srq->srq_wq_bufsz = (1 << log_srq_size); 403 srq->srq_wq_buf = buf; 404 srq->srq_desc_off = srq_desc_off; 405 srq->srq_hdlrarg = (void *)ibt_srqhdl; 406 srq->srq_state = 0; 407 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size); 408 srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl; 409 410 /* Determine if later ddi_dma_sync will be necessary */ 411 srq->srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo); 412 413 /* 414 * Put SRQ handle in Tavor SRQNum-to-SRQhdl list. Then fill in the 415 * "srqhdl" and return success 416 */ 417 ASSERT(state->ts_srqhdl[srqc->tr_indx] == NULL); 418 state->ts_srqhdl[srqc->tr_indx] = srq; 419 420 /* 421 * If this is a user-mappable SRQ, then we need to insert the 422 * previously allocated entry into the "userland resources database". 423 * This will allow for later lookup during devmap() (i.e. mmap()) 424 * calls. 425 */ 426 if (srq->srq_is_umap) { 427 tavor_umap_db_add(umapdb); 428 } else { 429 mutex_enter(&srq->srq_wrid_wql->wql_lock); 430 tavor_wrid_list_srq_init(srq->srq_wridlist, srq, 0); 431 mutex_exit(&srq->srq_wrid_wql->wql_lock); 432 } 433 434 *srqhdl = srq; 435 436 TAVOR_TNF_EXIT(tavor_srq_alloc); 437 return (status); 438 439 /* 440 * The following is cleanup for all possible failure cases in this routine 441 */ 442 srqalloc_fail8: 443 kmem_free(srq->srq_wridlist->wl_wre, srq->srq_wridlist->wl_size * 444 sizeof (tavor_wrid_entry_t)); 445 kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t)); 446 srqalloc_fail7: 447 tavor_wql_refcnt_dec(srq->srq_wrid_wql); 448 srqalloc_fail6: 449 if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 450 TAVOR_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) { 451 TAVOR_WARNING(state, "failed to deregister SRQ memory"); 452 } 453 srqalloc_fail5: 454 tavor_queue_free(state, &srq->srq_wqinfo); 455 srqalloc_fail4: 456 if (srq_is_umap) { 457 tavor_umap_db_free(umapdb); 458 } 459 srqalloc_fail3: 460 tavor_rsrc_free(state, &rsrc); 461 srqalloc_fail2: 462 tavor_rsrc_free(state, &srqc); 463 srqalloc_fail1: 464 tavor_pd_refcnt_dec(pd); 465 srqalloc_fail: 466 TNF_PROBE_1(tavor_srq_alloc_fail, TAVOR_TNF_ERROR, "", 467 tnf_string, msg, errormsg); 468 TAVOR_TNF_EXIT(tavor_srq_alloc); 469 return (status); 470 } 471 472 473 /* 474 * tavor_srq_free() 475 * Context: Can be called only from user or kernel context. 476 */ 477 /* ARGSUSED */ 478 int 479 tavor_srq_free(tavor_state_t *state, tavor_srqhdl_t *srqhdl, uint_t sleepflag) 480 { 481 tavor_rsrc_t *srqc, *rsrc; 482 tavor_umap_db_entry_t *umapdb; 483 uint64_t value; 484 tavor_srqhdl_t srq; 485 tavor_mrhdl_t mr; 486 tavor_pdhdl_t pd; 487 tavor_hw_srqc_t srqc_entry; 488 uint32_t srqnum; 489 uint32_t size; 490 uint_t maxprot; 491 int status; 492 493 TAVOR_TNF_ENTER(tavor_srq_free); 494 495 /* 496 * Pull all the necessary information from the Tavor Shared Receive 497 * Queue handle. This is necessary here because the resource for the 498 * SRQ handle is going to be freed up as part of this operation. 499 */ 500 srq = *srqhdl; 501 mutex_enter(&srq->srq_lock); 502 srqc = srq->srq_srqcrsrcp; 503 rsrc = srq->srq_rsrcp; 504 pd = srq->srq_pdhdl; 505 mr = srq->srq_mrhdl; 506 srqnum = srq->srq_srqnum; 507 508 /* 509 * If there are work queues still associated with the SRQ, then return 510 * an error. Otherwise, we will be holding the SRQ lock. 511 */ 512 if (srq->srq_refcnt != 0) { 513 mutex_exit(&srq->srq_lock); 514 TNF_PROBE_1(tavor_srq_free_refcnt_fail, TAVOR_TNF_ERROR, "", 515 tnf_int, refcnt, srq->srq_refcnt); 516 TAVOR_TNF_EXIT(tavor_srq_free); 517 return (IBT_SRQ_IN_USE); 518 } 519 520 /* 521 * If this was a user-mappable SRQ, then we need to remove its entry 522 * from the "userland resources database". If it is also currently 523 * mmap()'d out to a user process, then we need to call 524 * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping. 525 * We also need to invalidate the SRQ tracking information for the 526 * user mapping. 527 */ 528 if (srq->srq_is_umap) { 529 status = tavor_umap_db_find(state->ts_instance, srq->srq_srqnum, 530 MLNX_UMAP_SRQMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE, 531 &umapdb); 532 if (status != DDI_SUCCESS) { 533 mutex_exit(&srq->srq_lock); 534 TAVOR_WARNING(state, "failed to find in database"); 535 TAVOR_TNF_EXIT(tavor_srq_free); 536 return (ibc_get_ci_failure(0)); 537 } 538 tavor_umap_db_free(umapdb); 539 if (srq->srq_umap_dhp != NULL) { 540 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 541 status = devmap_devmem_remap(srq->srq_umap_dhp, 542 state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size, 543 maxprot, DEVMAP_MAPPING_INVALID, NULL); 544 if (status != DDI_SUCCESS) { 545 mutex_exit(&srq->srq_lock); 546 TAVOR_WARNING(state, "failed in SRQ memory " 547 "devmap_devmem_remap()"); 548 TAVOR_TNF_EXIT(tavor_srq_free); 549 return (ibc_get_ci_failure(0)); 550 } 551 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 552 } 553 } 554 555 /* 556 * Put NULL into the Tavor SRQNum-to-SRQHdl list. This will allow any 557 * in-progress events to detect that the SRQ corresponding to this 558 * number has been freed. 559 */ 560 state->ts_srqhdl[srqc->tr_indx] = NULL; 561 562 mutex_exit(&srq->srq_lock); 563 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq)); 564 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq->srq_wridlist)); 565 566 /* 567 * Reclaim SRQC entry from hardware (using the Tavor HW2SW_SRQ 568 * firmware command). If the ownership transfer fails for any reason, 569 * then it is an indication that something (either in HW or SW) has 570 * gone seriously wrong. 571 */ 572 status = tavor_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry, 573 sizeof (tavor_hw_srqc_t), srqnum, sleepflag); 574 if (status != TAVOR_CMD_SUCCESS) { 575 TAVOR_WARNING(state, "failed to reclaim SRQC ownership"); 576 cmn_err(CE_CONT, "Tavor: HW2SW_SRQ command failed: %08x\n", 577 status); 578 TNF_PROBE_1(tavor_srq_free_hw2sw_srq_cmd_fail, 579 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 580 TAVOR_TNF_EXIT(tavor_srq_free); 581 return (IBT_FAILURE); 582 } 583 584 /* 585 * Deregister the memory for the Shared Receive Queue. If this fails 586 * for any reason, then it is an indication that something (either 587 * in HW or SW) has gone seriously wrong. So we print a warning 588 * message and return. 589 */ 590 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 591 sleepflag); 592 if (status != DDI_SUCCESS) { 593 TAVOR_WARNING(state, "failed to deregister SRQ memory"); 594 TNF_PROBE_0(tavor_srq_free_dereg_mr_fail, TAVOR_TNF_ERROR, ""); 595 TAVOR_TNF_EXIT(tavor_srq_free); 596 return (IBT_FAILURE); 597 } 598 599 /* Calculate the size and free the wridlist container */ 600 if (srq->srq_wridlist != NULL) { 601 size = (srq->srq_wridlist->wl_size * 602 sizeof (tavor_wrid_entry_t)); 603 kmem_free(srq->srq_wridlist->wl_wre, size); 604 kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t)); 605 606 /* 607 * Release reference to WQL; If this is the last reference, 608 * this call also has the side effect of freeing up the 609 * 'srq_wrid_wql' memory. 610 */ 611 tavor_wql_refcnt_dec(srq->srq_wrid_wql); 612 } 613 614 /* Free the memory for the SRQ */ 615 tavor_queue_free(state, &srq->srq_wqinfo); 616 617 /* Free the Tavor SRQ Handle */ 618 tavor_rsrc_free(state, &rsrc); 619 620 /* Free the SRQC entry resource */ 621 tavor_rsrc_free(state, &srqc); 622 623 /* Decrement the reference count on the protection domain (PD) */ 624 tavor_pd_refcnt_dec(pd); 625 626 /* Set the srqhdl pointer to NULL and return success */ 627 *srqhdl = NULL; 628 629 TAVOR_TNF_EXIT(tavor_srq_free); 630 return (DDI_SUCCESS); 631 } 632 633 634 /* 635 * tavor_srq_modify() 636 * Context: Can be called only from user or kernel context. 637 */ 638 int 639 tavor_srq_modify(tavor_state_t *state, tavor_srqhdl_t srq, uint_t size, 640 uint_t *real_size, uint_t sleepflag) 641 { 642 tavor_qalloc_info_t new_srqinfo, old_srqinfo; 643 tavor_rsrc_t *mtt, *mpt, *old_mtt; 644 tavor_bind_info_t bind; 645 tavor_bind_info_t old_bind; 646 tavor_rsrc_pool_info_t *rsrc_pool; 647 tavor_mrhdl_t mr; 648 tavor_hw_mpt_t mpt_entry; 649 tavor_wrid_entry_t *wre_new, *wre_old; 650 uint64_t mtt_ddrbaseaddr, mtt_addr; 651 uint64_t srq_desc_off; 652 uint32_t *buf, srq_old_bufsz; 653 uint32_t wqesz; 654 uint_t max_srq_size; 655 uint_t dma_xfer_mode, mtt_pgsize_bits; 656 uint_t srq_sync, log_srq_size, maxprot; 657 uint_t wq_location; 658 int status; 659 char *errormsg; 660 661 TAVOR_TNF_ENTER(tavor_srq_modify); 662 663 /* 664 * Check the "inddr" flag. This flag tells the driver whether or not 665 * the SRQ's work queues should be come from normal system memory or 666 * whether they should be allocated from DDR memory. 667 */ 668 wq_location = state->ts_cfg_profile->cp_srq_wq_inddr; 669 670 /* 671 * If size requested is larger than device capability, return 672 * Insufficient Resources 673 */ 674 max_srq_size = (1 << state->ts_cfg_profile->cp_log_max_srq_sz); 675 if (size > max_srq_size) { 676 TNF_PROBE_0(tavor_srq_modify_size_larger_than_maxsize, 677 TAVOR_TNF_ERROR, ""); 678 TAVOR_TNF_EXIT(tavor_srq_modify); 679 return (IBT_HCA_WR_EXCEEDED); 680 } 681 682 /* 683 * Calculate the appropriate size for the SRQ. 684 * Note: All Tavor SRQs must be a power-of-2 in size. Also 685 * they may not be any smaller than TAVOR_SRQ_MIN_SIZE. This step 686 * is to round the requested size up to the next highest power-of-2 687 */ 688 size = max(size, TAVOR_SRQ_MIN_SIZE); 689 log_srq_size = highbit(size); 690 if (ISP2(size)) { 691 log_srq_size = log_srq_size - 1; 692 } 693 694 /* 695 * Next we verify that the rounded-up size is valid (i.e. consistent 696 * with the device limits and/or software-configured limits). 697 */ 698 if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) { 699 /* Set "status" and "errormsg" and goto failure */ 700 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size"); 701 goto srqmodify_fail; 702 } 703 704 /* 705 * Allocate the memory for newly resized Shared Receive Queue. 706 * 707 * Note: If SRQ is not user-mappable, then it may come from either 708 * kernel system memory or from HCA-attached local DDR memory. 709 * 710 * Note2: We align this queue on a pagesize boundary. This is required 711 * to make sure that all the resulting IB addresses will start at 0, 712 * for a zero-based queue. By making sure we are aligned on at least a 713 * page, any offset we use into our queue will be the same as it was 714 * when we allocated it at tavor_srq_alloc() time. 715 */ 716 wqesz = (1 << srq->srq_wq_log_wqesz); 717 new_srqinfo.qa_size = (1 << log_srq_size) * wqesz; 718 new_srqinfo.qa_alloc_align = PAGESIZE; 719 new_srqinfo.qa_bind_align = PAGESIZE; 720 if (srq->srq_is_umap) { 721 new_srqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND; 722 } else { 723 new_srqinfo.qa_location = wq_location; 724 } 725 status = tavor_queue_alloc(state, &new_srqinfo, sleepflag); 726 if (status != DDI_SUCCESS) { 727 /* Set "status" and "errormsg" and goto failure */ 728 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq"); 729 goto srqmodify_fail; 730 } 731 buf = (uint32_t *)new_srqinfo.qa_buf_aligned; 732 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 733 734 /* 735 * Allocate the memory for the new WRE list. This will be used later 736 * when we resize the wridlist based on the new SRQ size. 737 */ 738 wre_new = (tavor_wrid_entry_t *)kmem_zalloc((1 << log_srq_size) * 739 sizeof (tavor_wrid_entry_t), sleepflag); 740 if (wre_new == NULL) { 741 /* Set "status" and "errormsg" and goto failure */ 742 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, 743 "failed wre_new alloc"); 744 goto srqmodify_fail; 745 } 746 747 /* 748 * Fill in the "bind" struct. This struct provides the majority 749 * of the information that will be used to distinguish between an 750 * "addr" binding (as is the case here) and a "buf" binding (see 751 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 752 * which does most of the "heavy lifting" for the Tavor memory 753 * registration routines. 754 */ 755 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind)) 756 bzero(&bind, sizeof (tavor_bind_info_t)); 757 bind.bi_type = TAVOR_BINDHDL_VADDR; 758 bind.bi_addr = (uint64_t)(uintptr_t)buf; 759 bind.bi_len = new_srqinfo.qa_size; 760 bind.bi_as = NULL; 761 bind.bi_flags = sleepflag == TAVOR_SLEEP ? IBT_MR_SLEEP : 762 IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 763 if (srq->srq_is_umap) { 764 bind.bi_bypass = state->ts_cfg_profile->cp_iommu_bypass; 765 } else { 766 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) { 767 bind.bi_bypass = 768 state->ts_cfg_profile->cp_iommu_bypass; 769 dma_xfer_mode = 770 state->ts_cfg_profile->cp_streaming_consistent; 771 if (dma_xfer_mode == DDI_DMA_STREAMING) { 772 bind.bi_flags |= IBT_MR_NONCOHERENT; 773 } 774 } else { 775 bind.bi_bypass = TAVOR_BINDMEM_BYPASS; 776 } 777 } 778 status = tavor_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt, 779 &mtt_pgsize_bits); 780 if (status != DDI_SUCCESS) { 781 /* Set "status" and "errormsg" and goto failure */ 782 TAVOR_TNF_FAIL(status, "failed mtt bind"); 783 kmem_free(wre_new, srq->srq_wq_bufsz * 784 sizeof (tavor_wrid_entry_t)); 785 tavor_queue_free(state, &new_srqinfo); 786 goto srqmodify_fail; 787 } 788 789 /* 790 * Calculate the offset between the kernel virtual address space 791 * and the IB virtual address space. This will be used when 792 * posting work requests to properly initialize each WQE. 793 * 794 * Note: bind addr is zero-based (from alloc) so we calculate the 795 * correct new offset here. 796 */ 797 bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1); 798 srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned - 799 (uint64_t)bind.bi_addr; 800 801 /* 802 * Get the base address for the MTT table. This will be necessary 803 * below when we are modifying the MPT entry. 804 */ 805 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 806 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 807 808 /* 809 * Fill in the MPT entry. This is the final step before passing 810 * ownership of the MPT entry to the Tavor hardware. We use all of 811 * the information collected/calculated above to fill in the 812 * requisite portions of the MPT. 813 */ 814 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 815 mpt_entry.reg_win_len = bind.bi_len; 816 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 817 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 818 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 819 820 /* 821 * Now we grab the SRQ lock. Since we will be updating the actual 822 * SRQ location and the producer/consumer indexes, we should hold 823 * the lock. 824 * 825 * We do a TAVOR_NOSLEEP here (and below), though, because we are 826 * holding the "srq_lock" and if we got raised to interrupt level 827 * by priority inversion, we would not want to block in this routine 828 * waiting for success. 829 */ 830 mutex_enter(&srq->srq_lock); 831 832 /* 833 * Copy old entries to new buffer 834 */ 835 srq_old_bufsz = srq->srq_wq_bufsz; 836 bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz); 837 838 /* Determine if later ddi_dma_sync will be necessary */ 839 srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo); 840 841 /* Sync entire "new" SRQ for use by hardware (if necessary) */ 842 if (srq_sync) { 843 (void) ddi_dma_sync(bind.bi_dmahdl, 0, 844 new_srqinfo.qa_size, DDI_DMA_SYNC_FORDEV); 845 } 846 847 /* 848 * Setup MPT information for use in the MODIFY_MPT command 849 */ 850 mr = srq->srq_mrhdl; 851 mutex_enter(&mr->mr_lock); 852 mpt = srq->srq_mrhdl->mr_mptrsrcp; 853 854 /* 855 * MODIFY_MPT 856 * 857 * If this fails for any reason, then it is an indication that 858 * something (either in HW or SW) has gone seriously wrong. So we 859 * print a warning message and return. 860 */ 861 status = tavor_modify_mpt_cmd_post(state, &mpt_entry, mpt->tr_indx, 862 TAVOR_CMD_MODIFY_MPT_RESIZESRQ, sleepflag); 863 if (status != TAVOR_CMD_SUCCESS) { 864 cmn_err(CE_CONT, "Tavor: MODIFY_MPT command failed: %08x\n", 865 status); 866 TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail, 867 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 868 TAVOR_TNF_FAIL(status, "MODIFY_MPT command failed"); 869 (void) tavor_mr_mtt_unbind(state, &srq->srq_mrhdl->mr_bindinfo, 870 srq->srq_mrhdl->mr_mttrsrcp); 871 kmem_free(wre_new, srq->srq_wq_bufsz * 872 sizeof (tavor_wrid_entry_t)); 873 tavor_queue_free(state, &new_srqinfo); 874 mutex_exit(&mr->mr_lock); 875 mutex_exit(&srq->srq_lock); 876 return (ibc_get_ci_failure(0)); 877 } 878 879 /* 880 * Update the Tavor Shared Receive Queue handle with all the new 881 * information. At the same time, save away all the necessary 882 * information for freeing up the old resources 883 */ 884 old_srqinfo = srq->srq_wqinfo; 885 old_mtt = srq->srq_mrhdl->mr_mttrsrcp; 886 bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind, 887 sizeof (tavor_bind_info_t)); 888 889 /* Now set the new info */ 890 srq->srq_wqinfo = new_srqinfo; 891 srq->srq_wq_buf = buf; 892 srq->srq_wq_bufsz = (1 << log_srq_size); 893 bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (tavor_bind_info_t)); 894 srq->srq_mrhdl->mr_mttrsrcp = mtt; 895 srq->srq_desc_off = srq_desc_off; 896 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size); 897 898 /* Update MR mtt pagesize */ 899 mr->mr_logmttpgsz = mtt_pgsize_bits; 900 mutex_exit(&mr->mr_lock); 901 902 #ifdef __lock_lint 903 mutex_enter(&srq->srq_wrid_wql->wql_lock); 904 #else 905 if (srq->srq_wrid_wql != NULL) { 906 mutex_enter(&srq->srq_wrid_wql->wql_lock); 907 } 908 #endif 909 910 /* 911 * Initialize new wridlist, if needed. 912 * 913 * If a wridlist already is setup on an SRQ (the QP associated with an 914 * SRQ has moved "from_reset") then we must update this wridlist based 915 * on the new SRQ size. We allocate the new size of Work Request ID 916 * Entries, copy over the old entries to the new list, and 917 * re-initialize the srq wridlist in non-umap case 918 */ 919 wre_old = NULL; 920 if (srq->srq_wridlist != NULL) { 921 wre_old = srq->srq_wridlist->wl_wre; 922 923 bcopy(wre_old, wre_new, srq_old_bufsz * 924 sizeof (tavor_wrid_entry_t)); 925 926 /* Setup new sizes in wre */ 927 srq->srq_wridlist->wl_wre = wre_new; 928 srq->srq_wridlist->wl_size = srq->srq_wq_bufsz; 929 930 if (!srq->srq_is_umap) { 931 tavor_wrid_list_srq_init(srq->srq_wridlist, srq, 932 srq_old_bufsz); 933 } 934 } 935 936 #ifdef __lock_lint 937 mutex_exit(&srq->srq_wrid_wql->wql_lock); 938 #else 939 if (srq->srq_wrid_wql != NULL) { 940 mutex_exit(&srq->srq_wrid_wql->wql_lock); 941 } 942 #endif 943 944 /* 945 * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out 946 * to a user process, then we need to call devmap_devmem_remap() to 947 * invalidate the mapping to the SRQ memory. We also need to 948 * invalidate the SRQ tracking information for the user mapping. 949 * 950 * Note: On failure, the remap really shouldn't ever happen. So, if it 951 * does, it is an indication that something has gone seriously wrong. 952 * So we print a warning message and return error (knowing, of course, 953 * that the "old" SRQ memory will be leaked) 954 */ 955 if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) { 956 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 957 status = devmap_devmem_remap(srq->srq_umap_dhp, 958 state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot, 959 DEVMAP_MAPPING_INVALID, NULL); 960 if (status != DDI_SUCCESS) { 961 mutex_exit(&srq->srq_lock); 962 TAVOR_WARNING(state, "failed in SRQ memory " 963 "devmap_devmem_remap()"); 964 /* We can, however, free the memory for old wre */ 965 if (wre_old != NULL) { 966 kmem_free(wre_old, srq_old_bufsz * 967 sizeof (tavor_wrid_entry_t)); 968 } 969 TAVOR_TNF_EXIT(tavor_srq_modify); 970 return (ibc_get_ci_failure(0)); 971 } 972 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 973 } 974 975 /* 976 * Drop the SRQ lock now. The only thing left to do is to free up 977 * the old resources. 978 */ 979 mutex_exit(&srq->srq_lock); 980 981 /* 982 * Unbind the MTT entries. 983 */ 984 status = tavor_mr_mtt_unbind(state, &old_bind, old_mtt); 985 if (status != DDI_SUCCESS) { 986 TAVOR_WARNING(state, "failed to unbind old SRQ memory"); 987 /* Set "status" and "errormsg" and goto failure */ 988 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 989 "failed to unbind (old)"); 990 goto srqmodify_fail; 991 } 992 993 /* Free the memory for old wre */ 994 if (wre_old != NULL) { 995 kmem_free(wre_old, srq_old_bufsz * 996 sizeof (tavor_wrid_entry_t)); 997 } 998 999 /* Free the memory for the old SRQ */ 1000 tavor_queue_free(state, &old_srqinfo); 1001 1002 /* 1003 * Fill in the return arguments (if necessary). This includes the 1004 * real new completion queue size. 1005 */ 1006 if (real_size != NULL) { 1007 *real_size = (1 << log_srq_size); 1008 } 1009 1010 TAVOR_TNF_EXIT(tavor_srq_modify); 1011 return (DDI_SUCCESS); 1012 1013 srqmodify_fail: 1014 TNF_PROBE_1(tavor_srq_modify_fail, TAVOR_TNF_ERROR, "", 1015 tnf_string, msg, errormsg); 1016 TAVOR_TNF_EXIT(tavor_srq_modify); 1017 return (status); 1018 } 1019 1020 1021 /* 1022 * tavor_srq_refcnt_inc() 1023 * Context: Can be called from interrupt or base context. 1024 */ 1025 void 1026 tavor_srq_refcnt_inc(tavor_srqhdl_t srq) 1027 { 1028 mutex_enter(&srq->srq_lock); 1029 TNF_PROBE_1_DEBUG(tavor_srq_refcnt_inc, TAVOR_TNF_TRACE, "", 1030 tnf_uint, refcnt, srq->srq_refcnt); 1031 srq->srq_refcnt++; 1032 mutex_exit(&srq->srq_lock); 1033 } 1034 1035 1036 /* 1037 * tavor_srq_refcnt_dec() 1038 * Context: Can be called from interrupt or base context. 1039 */ 1040 void 1041 tavor_srq_refcnt_dec(tavor_srqhdl_t srq) 1042 { 1043 mutex_enter(&srq->srq_lock); 1044 srq->srq_refcnt--; 1045 TNF_PROBE_1_DEBUG(tavor_srq_refcnt_dec, TAVOR_TNF_TRACE, "", 1046 tnf_uint, refcnt, srq->srq_refcnt); 1047 mutex_exit(&srq->srq_lock); 1048 } 1049 1050 1051 /* 1052 * tavor_srqhdl_from_srqnum() 1053 * Context: Can be called from interrupt or base context. 1054 * 1055 * This routine is important because changing the unconstrained 1056 * portion of the SRQ number is critical to the detection of a 1057 * potential race condition in the SRQ handler code (i.e. the case 1058 * where a SRQ is freed and alloc'd again before an event for the 1059 * "old" SRQ can be handled). 1060 * 1061 * While this is not a perfect solution (not sure that one exists) 1062 * it does help to mitigate the chance that this race condition will 1063 * cause us to deliver a "stale" event to the new SRQ owner. Note: 1064 * this solution does not scale well because the number of constrained 1065 * bits increases (and, hence, the number of unconstrained bits 1066 * decreases) as the number of supported SRQ grows. For small and 1067 * intermediate values, it should hopefully provide sufficient 1068 * protection. 1069 */ 1070 tavor_srqhdl_t 1071 tavor_srqhdl_from_srqnum(tavor_state_t *state, uint_t srqnum) 1072 { 1073 uint_t srqindx, srqmask; 1074 1075 /* Calculate the SRQ table index from the srqnum */ 1076 srqmask = (1 << state->ts_cfg_profile->cp_log_num_srq) - 1; 1077 srqindx = srqnum & srqmask; 1078 return (state->ts_srqhdl[srqindx]); 1079 } 1080 1081 1082 /* 1083 * tavor_srq_sgl_to_logwqesz() 1084 * Context: Can be called from interrupt or base context. 1085 */ 1086 static void 1087 tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl, 1088 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl) 1089 { 1090 uint_t max_size, log2, actual_sgl; 1091 1092 TAVOR_TNF_ENTER(tavor_srq_sgl_to_logwqesz); 1093 1094 switch (wq_type) { 1095 case TAVOR_QP_WQ_TYPE_RECVQ: 1096 /* 1097 * Use requested maximum SGL to calculate max descriptor size 1098 * (while guaranteeing that the descriptor size is a 1099 * power-of-2 cachelines). 1100 */ 1101 max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4)); 1102 log2 = highbit(max_size); 1103 if (ISP2(max_size)) { 1104 log2 = log2 - 1; 1105 } 1106 1107 /* Make sure descriptor is at least the minimum size */ 1108 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM); 1109 1110 /* Calculate actual number of SGL (given WQE size) */ 1111 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4; 1112 break; 1113 1114 default: 1115 TAVOR_WARNING(state, "unexpected work queue type"); 1116 TNF_PROBE_0(tavor_srq_sgl_to_logwqesz_inv_wqtype_fail, 1117 TAVOR_TNF_ERROR, ""); 1118 break; 1119 } 1120 1121 /* Fill in the return values */ 1122 *logwqesz = log2; 1123 *max_sgl = min(state->ts_cfg_profile->cp_srq_max_sgl, actual_sgl); 1124 1125 TAVOR_TNF_EXIT(tavor_qp_sgl_to_logwqesz); 1126 } 1127