1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2020, The University of Queensland 14 * Copyright (c) 2018, Joyent, Inc. 15 */ 16 17 /* 18 * Mellanox Connect-X 4/5/6 driver. 19 */ 20 21 #include <sys/modctl.h> 22 #include <sys/conf.h> 23 #include <sys/devops.h> 24 #include <sys/sysmacros.h> 25 #include <sys/atomic.h> 26 #include <sys/cpuvar.h> 27 28 #include <sys/pattr.h> 29 #include <sys/dlpi.h> 30 31 #include <sys/mac_provider.h> 32 33 #include <sys/random.h> 34 35 #include <mlxcx.h> 36 37 boolean_t 38 mlxcx_wq_alloc_dma(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) 39 { 40 ddi_device_acc_attr_t acc; 41 ddi_dma_attr_t attr; 42 boolean_t ret; 43 size_t sz; 44 45 VERIFY0(mlwq->mlwq_state & MLXCX_WQ_ALLOC); 46 47 /* Receive and send queue entries might be different sizes. */ 48 switch (mlwq->mlwq_type) { 49 case MLXCX_WQ_TYPE_SENDQ: 50 mlwq->mlwq_entshift = mlxp->mlx_props.mldp_sq_size_shift; 51 mlwq->mlwq_nents = (1 << mlwq->mlwq_entshift); 52 sz = mlwq->mlwq_nents * sizeof (mlxcx_sendq_ent_t); 53 break; 54 case MLXCX_WQ_TYPE_RECVQ: 55 mlwq->mlwq_entshift = mlxp->mlx_props.mldp_rq_size_shift; 56 mlwq->mlwq_nents = (1 << mlwq->mlwq_entshift); 57 sz = mlwq->mlwq_nents * sizeof (mlxcx_recvq_ent_t); 58 break; 59 default: 60 VERIFY(0); 61 return (B_FALSE); 62 } 63 ASSERT3U(sz & (MLXCX_HW_PAGE_SIZE - 1), ==, 0); 64 65 mlxcx_dma_acc_attr(mlxp, &acc); 66 mlxcx_dma_queue_attr(mlxp, &attr); 67 68 ret = mlxcx_dma_alloc(mlxp, &mlwq->mlwq_dma, &attr, &acc, 69 B_TRUE, sz, B_TRUE); 70 if (!ret) { 71 mlxcx_warn(mlxp, "failed to allocate WQ memory"); 72 return (B_FALSE); 73 } 74 75 /* 76 * Just set the first pointer in the union. Yes, this is a strict 77 * aliasing violation. No, I don't care. 78 */ 79 mlwq->mlwq_send_ent = (mlxcx_sendq_ent_t *)mlwq->mlwq_dma.mxdb_va; 80 81 mlxcx_dma_acc_attr(mlxp, &acc); 82 mlxcx_dma_qdbell_attr(mlxp, &attr); 83 sz = sizeof (mlxcx_workq_doorbell_t); 84 ret = mlxcx_dma_alloc(mlxp, &mlwq->mlwq_doorbell_dma, &attr, &acc, 85 B_TRUE, sz, B_TRUE); 86 if (!ret) { 87 mlxcx_warn(mlxp, "failed to allocate WQ doorbell memory"); 88 mlxcx_dma_free(&mlwq->mlwq_dma); 89 mlwq->mlwq_send_ent = NULL; 90 return (B_FALSE); 91 } 92 93 mlwq->mlwq_doorbell = 94 (mlxcx_workq_doorbell_t *)mlwq->mlwq_doorbell_dma.mxdb_va; 95 96 mlwq->mlwq_state |= MLXCX_WQ_ALLOC; 97 98 return (B_TRUE); 99 } 100 101 void 102 mlxcx_wq_rele_dma(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) 103 { 104 VERIFY(mlwq->mlwq_state & MLXCX_WQ_ALLOC); 105 if (mlwq->mlwq_state & MLXCX_WQ_CREATED) 106 VERIFY(mlwq->mlwq_state & MLXCX_WQ_DESTROYED); 107 108 mlxcx_dma_free(&mlwq->mlwq_dma); 109 mlwq->mlwq_send_ent = NULL; 110 mlxcx_dma_free(&mlwq->mlwq_doorbell_dma); 111 mlwq->mlwq_doorbell = NULL; 112 113 mlwq->mlwq_state &= ~MLXCX_CQ_ALLOC; 114 } 115 116 boolean_t 117 mlxcx_cq_alloc_dma(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq) 118 { 119 ddi_device_acc_attr_t acc; 120 ddi_dma_attr_t attr; 121 boolean_t ret; 122 size_t sz, i; 123 124 VERIFY0(mlcq->mlcq_state & MLXCX_EQ_ALLOC); 125 126 mlcq->mlcq_entshift = mlxp->mlx_props.mldp_cq_size_shift; 127 mlcq->mlcq_nents = (1 << mlcq->mlcq_entshift); 128 sz = mlcq->mlcq_nents * sizeof (mlxcx_completionq_ent_t); 129 ASSERT3U(sz & (MLXCX_HW_PAGE_SIZE - 1), ==, 0); 130 131 mlxcx_dma_acc_attr(mlxp, &acc); 132 mlxcx_dma_queue_attr(mlxp, &attr); 133 134 ret = mlxcx_dma_alloc(mlxp, &mlcq->mlcq_dma, &attr, &acc, 135 B_TRUE, sz, B_TRUE); 136 if (!ret) { 137 mlxcx_warn(mlxp, "failed to allocate CQ memory"); 138 return (B_FALSE); 139 } 140 141 mlcq->mlcq_ent = (mlxcx_completionq_ent_t *)mlcq->mlcq_dma.mxdb_va; 142 143 for (i = 0; i < mlcq->mlcq_nents; ++i) { 144 mlcq->mlcq_ent[i].mlcqe_opcode = MLXCX_CQE_OP_INVALID; 145 mlcq->mlcq_ent[i].mlcqe_owner = MLXCX_CQE_OWNER_INIT; 146 } 147 148 mlxcx_dma_acc_attr(mlxp, &acc); 149 mlxcx_dma_qdbell_attr(mlxp, &attr); 150 sz = sizeof (mlxcx_completionq_doorbell_t); 151 ret = mlxcx_dma_alloc(mlxp, &mlcq->mlcq_doorbell_dma, &attr, &acc, 152 B_TRUE, sz, B_TRUE); 153 if (!ret) { 154 mlxcx_warn(mlxp, "failed to allocate CQ doorbell memory"); 155 mlxcx_dma_free(&mlcq->mlcq_dma); 156 mlcq->mlcq_ent = NULL; 157 return (B_FALSE); 158 } 159 160 mlcq->mlcq_doorbell = 161 (mlxcx_completionq_doorbell_t *)mlcq->mlcq_doorbell_dma.mxdb_va; 162 163 mlcq->mlcq_state |= MLXCX_CQ_ALLOC; 164 165 return (B_TRUE); 166 } 167 168 void 169 mlxcx_cq_rele_dma(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq) 170 { 171 VERIFY(mlcq->mlcq_state & MLXCX_CQ_ALLOC); 172 if (mlcq->mlcq_state & MLXCX_CQ_CREATED) 173 VERIFY(mlcq->mlcq_state & MLXCX_CQ_DESTROYED); 174 175 mlxcx_dma_free(&mlcq->mlcq_dma); 176 mlcq->mlcq_ent = NULL; 177 mlxcx_dma_free(&mlcq->mlcq_doorbell_dma); 178 mlcq->mlcq_doorbell = NULL; 179 180 mlcq->mlcq_state &= ~MLXCX_CQ_ALLOC; 181 } 182 183 void 184 mlxcx_wq_teardown(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) 185 { 186 mlxcx_completion_queue_t *mlcq; 187 188 /* 189 * If something is holding the lock on a long operation like a 190 * refill, setting this flag asks them to exit early if possible. 191 */ 192 atomic_or_uint(&mlwq->mlwq_state, MLXCX_WQ_TEARDOWN); 193 194 mutex_enter(&mlwq->mlwq_mtx); 195 196 list_remove(&mlxp->mlx_wqs, mlwq); 197 198 if ((mlwq->mlwq_state & MLXCX_WQ_CREATED) && 199 !(mlwq->mlwq_state & MLXCX_WQ_DESTROYED)) { 200 if (mlwq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && 201 mlwq->mlwq_state & MLXCX_WQ_STARTED && 202 !mlxcx_cmd_stop_rq(mlxp, mlwq)) { 203 mlxcx_warn(mlxp, "failed to stop " 204 "recv queue num %x", mlwq->mlwq_num); 205 } 206 if (mlwq->mlwq_type == MLXCX_WQ_TYPE_SENDQ && 207 mlwq->mlwq_state & MLXCX_WQ_STARTED && 208 !mlxcx_cmd_stop_sq(mlxp, mlwq)) { 209 mlxcx_warn(mlxp, "failed to stop " 210 "send queue num %x", mlwq->mlwq_num); 211 } 212 if (mlwq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && 213 !mlxcx_cmd_destroy_rq(mlxp, mlwq)) { 214 mlxcx_warn(mlxp, "failed to destroy " 215 "recv queue num %x", mlwq->mlwq_num); 216 } 217 if (mlwq->mlwq_type == MLXCX_WQ_TYPE_SENDQ && 218 !mlxcx_cmd_destroy_sq(mlxp, mlwq)) { 219 mlxcx_warn(mlxp, "failed to destroy " 220 "send queue num %x", mlwq->mlwq_num); 221 } 222 } 223 if (mlwq->mlwq_state & MLXCX_WQ_ALLOC) { 224 mlxcx_wq_rele_dma(mlxp, mlwq); 225 } 226 mlcq = mlwq->mlwq_cq; 227 228 /* These will be released by mlxcx_teardown_bufs() */ 229 mlwq->mlwq_bufs = NULL; 230 mlwq->mlwq_foreign_bufs = NULL; 231 232 mutex_exit(&mlwq->mlwq_mtx); 233 234 mutex_enter(&mlcq->mlcq_mtx); 235 mutex_enter(&mlwq->mlwq_mtx); 236 ASSERT3P(mlcq->mlcq_wq, ==, mlwq); 237 mlcq->mlcq_wq = NULL; 238 mutex_exit(&mlwq->mlwq_mtx); 239 mutex_exit(&mlcq->mlcq_mtx); 240 241 mutex_destroy(&mlwq->mlwq_mtx); 242 } 243 244 void 245 mlxcx_cq_teardown(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq) 246 { 247 mlxcx_event_queue_t *mleq; 248 mlxcx_buffer_t *b; 249 250 /* 251 * If something is holding the lock on a long operation like polling 252 * which we're going to abort anyway, this flag asks them to exit 253 * early if possible. 254 */ 255 atomic_or_uint(&mlcq->mlcq_state, MLXCX_CQ_TEARDOWN); 256 257 mutex_enter(&mlcq->mlcq_mtx); 258 259 list_remove(&mlxp->mlx_cqs, mlcq); 260 261 if ((mlcq->mlcq_state & MLXCX_CQ_CREATED) && 262 !(mlcq->mlcq_state & MLXCX_CQ_DESTROYED)) { 263 if (!mlxcx_cmd_destroy_cq(mlxp, mlcq)) { 264 mlxcx_warn(mlxp, "failed to destroy " 265 "completion queue num %u", 266 mlcq->mlcq_num); 267 } 268 } 269 if (mlcq->mlcq_state & MLXCX_CQ_ALLOC) { 270 mlxcx_cq_rele_dma(mlxp, mlcq); 271 } 272 /* 273 * If we're on an EQ AVL tree, then we need to grab 274 * the EQ's mutex to take it off. The ISR always takes 275 * EQ mutex before CQ mutex, so we have to let go of 276 * the CQ mutex then come back again. 277 * 278 * The ISR will bail out if tries to touch this CQ now since 279 * we added the CQ_DESTROYED flag above. 280 */ 281 if (mlcq->mlcq_state & MLXCX_CQ_EQAVL) { 282 mleq = mlcq->mlcq_eq; 283 } else { 284 mleq = NULL; 285 } 286 287 /* Return any outstanding buffers to the free pool. */ 288 while ((b = list_remove_head(&mlcq->mlcq_buffers)) != NULL) { 289 mlxcx_buf_return_chain(mlxp, b, B_FALSE); 290 } 291 mutex_enter(&mlcq->mlcq_bufbmtx); 292 while ((b = list_remove_head(&mlcq->mlcq_buffers_b)) != NULL) { 293 mlxcx_buf_return_chain(mlxp, b, B_FALSE); 294 } 295 mutex_exit(&mlcq->mlcq_bufbmtx); 296 297 /* 298 * Since the interrupt handlers take the EQ lock before the CQ one, 299 * we must do the same here. That means letting go of the lock 300 * for a brief window here (we'll double-check the state when we 301 * get back in). 302 */ 303 mutex_exit(&mlcq->mlcq_mtx); 304 305 if (mleq != NULL) { 306 mutex_enter(&mleq->mleq_mtx); 307 mutex_enter(&mlcq->mlcq_mtx); 308 /* 309 * Double-check the state, we let go of the 310 * mutex briefly. 311 */ 312 if (mlcq->mlcq_state & MLXCX_CQ_EQAVL) { 313 avl_remove(&mleq->mleq_cqs, mlcq); 314 mlcq->mlcq_state &= ~MLXCX_CQ_EQAVL; 315 } 316 mutex_exit(&mlcq->mlcq_mtx); 317 mutex_exit(&mleq->mleq_mtx); 318 } 319 320 mutex_enter(&mlcq->mlcq_mtx); 321 ASSERT0(mlcq->mlcq_state & ~(MLXCX_CQ_CREATED | MLXCX_CQ_DESTROYED | 322 MLXCX_CQ_TEARDOWN | MLXCX_CQ_ARMED)); 323 mutex_exit(&mlcq->mlcq_mtx); 324 325 mutex_destroy(&mlcq->mlcq_mtx); 326 mutex_destroy(&mlcq->mlcq_bufbmtx); 327 list_destroy(&mlcq->mlcq_buffers); 328 list_destroy(&mlcq->mlcq_buffers_b); 329 kmem_free(mlcq, sizeof (mlxcx_completion_queue_t)); 330 } 331 332 static boolean_t 333 mlxcx_cq_setup(mlxcx_t *mlxp, mlxcx_event_queue_t *eq, 334 mlxcx_completion_queue_t **cqp) 335 { 336 mlxcx_completion_queue_t *cq; 337 338 cq = kmem_zalloc(sizeof (mlxcx_completion_queue_t), KM_SLEEP); 339 mutex_init(&cq->mlcq_mtx, NULL, MUTEX_DRIVER, 340 DDI_INTR_PRI(mlxp->mlx_intr_pri)); 341 mutex_init(&cq->mlcq_bufbmtx, NULL, MUTEX_DRIVER, 342 DDI_INTR_PRI(mlxp->mlx_intr_pri)); 343 list_create(&cq->mlcq_buffers, sizeof (mlxcx_buffer_t), 344 offsetof(mlxcx_buffer_t, mlb_cq_entry)); 345 list_create(&cq->mlcq_buffers_b, sizeof (mlxcx_buffer_t), 346 offsetof(mlxcx_buffer_t, mlb_cq_entry)); 347 348 cq->mlcq_mlx = mlxp; 349 list_insert_tail(&mlxp->mlx_cqs, cq); 350 351 mutex_enter(&cq->mlcq_mtx); 352 353 if (!mlxcx_cq_alloc_dma(mlxp, cq)) { 354 mutex_exit(&cq->mlcq_mtx); 355 return (B_FALSE); 356 } 357 358 cq->mlcq_bufhwm = cq->mlcq_nents - MLXCX_CQ_HWM_GAP; 359 cq->mlcq_buflwm = cq->mlcq_nents - MLXCX_CQ_LWM_GAP; 360 361 cq->mlcq_uar = &mlxp->mlx_uar; 362 cq->mlcq_eq = eq; 363 364 cq->mlcq_cqemod_period_usec = mlxp->mlx_props.mldp_cqemod_period_usec; 365 cq->mlcq_cqemod_count = mlxp->mlx_props.mldp_cqemod_count; 366 367 if (!mlxcx_cmd_create_cq(mlxp, cq)) { 368 mutex_exit(&cq->mlcq_mtx); 369 return (B_FALSE); 370 } 371 372 mutex_exit(&cq->mlcq_mtx); 373 374 mutex_enter(&eq->mleq_mtx); 375 mutex_enter(&cq->mlcq_mtx); 376 ASSERT0(cq->mlcq_state & MLXCX_CQ_EQAVL); 377 avl_add(&eq->mleq_cqs, cq); 378 cq->mlcq_state |= MLXCX_CQ_EQAVL; 379 mlxcx_arm_cq(mlxp, cq); 380 mutex_exit(&cq->mlcq_mtx); 381 mutex_exit(&eq->mleq_mtx); 382 383 *cqp = cq; 384 return (B_TRUE); 385 } 386 387 static boolean_t 388 mlxcx_rq_setup(mlxcx_t *mlxp, mlxcx_completion_queue_t *cq, 389 mlxcx_work_queue_t *wq) 390 { 391 mutex_init(&wq->mlwq_mtx, NULL, MUTEX_DRIVER, 392 DDI_INTR_PRI(mlxp->mlx_intr_pri)); 393 394 list_insert_tail(&mlxp->mlx_wqs, wq); 395 396 mutex_enter(&wq->mlwq_mtx); 397 398 wq->mlwq_mlx = mlxp; 399 wq->mlwq_type = MLXCX_WQ_TYPE_RECVQ; 400 wq->mlwq_cq = cq; 401 wq->mlwq_pd = &mlxp->mlx_pd; 402 wq->mlwq_uar = &mlxp->mlx_uar; 403 404 wq->mlwq_bufs = mlxcx_mlbs_create(mlxp); 405 406 if (!mlxcx_wq_alloc_dma(mlxp, wq)) { 407 mutex_exit(&wq->mlwq_mtx); 408 return (B_FALSE); 409 } 410 411 if (!mlxcx_cmd_create_rq(mlxp, wq)) { 412 mutex_exit(&wq->mlwq_mtx); 413 return (B_FALSE); 414 } 415 416 mutex_exit(&wq->mlwq_mtx); 417 418 mutex_enter(&cq->mlcq_mtx); 419 mutex_enter(&wq->mlwq_mtx); 420 ASSERT3P(cq->mlcq_wq, ==, NULL); 421 cq->mlcq_wq = wq; 422 mutex_exit(&wq->mlwq_mtx); 423 mutex_exit(&cq->mlcq_mtx); 424 425 return (B_TRUE); 426 } 427 428 static boolean_t 429 mlxcx_sq_setup(mlxcx_t *mlxp, mlxcx_port_t *port, mlxcx_completion_queue_t *cq, 430 mlxcx_tis_t *tis, mlxcx_work_queue_t *wq) 431 { 432 mutex_init(&wq->mlwq_mtx, NULL, MUTEX_DRIVER, 433 DDI_INTR_PRI(mlxp->mlx_intr_pri)); 434 435 list_insert_tail(&mlxp->mlx_wqs, wq); 436 437 mutex_enter(&wq->mlwq_mtx); 438 439 wq->mlwq_mlx = mlxp; 440 wq->mlwq_type = MLXCX_WQ_TYPE_SENDQ; 441 wq->mlwq_cq = cq; 442 wq->mlwq_pd = &mlxp->mlx_pd; 443 wq->mlwq_uar = &mlxp->mlx_uar; 444 wq->mlwq_tis = tis; 445 446 wq->mlwq_bufs = mlxcx_mlbs_create(mlxp); 447 wq->mlwq_foreign_bufs = mlxcx_mlbs_create(mlxp); 448 449 VERIFY3U(port->mlp_wqe_min_inline, <=, MLXCX_ETH_INLINE_L2); 450 wq->mlwq_inline_mode = MLXCX_ETH_INLINE_L2; 451 452 if (!mlxcx_wq_alloc_dma(mlxp, wq)) { 453 mutex_exit(&wq->mlwq_mtx); 454 return (B_FALSE); 455 } 456 457 if (!mlxcx_cmd_create_sq(mlxp, wq)) { 458 mutex_exit(&wq->mlwq_mtx); 459 return (B_FALSE); 460 } 461 462 mutex_exit(&wq->mlwq_mtx); 463 464 mutex_enter(&cq->mlcq_mtx); 465 mutex_enter(&wq->mlwq_mtx); 466 ASSERT3P(cq->mlcq_wq, ==, NULL); 467 cq->mlcq_wq = wq; 468 mutex_exit(&wq->mlwq_mtx); 469 mutex_exit(&cq->mlcq_mtx); 470 471 return (B_TRUE); 472 } 473 474 void 475 mlxcx_teardown_rx_group(mlxcx_t *mlxp, mlxcx_ring_group_t *g) 476 { 477 mlxcx_work_queue_t *wq; 478 mlxcx_completion_queue_t *cq; 479 mlxcx_flow_entry_t *fe; 480 mlxcx_flow_group_t *fg; 481 mlxcx_flow_table_t *ft; 482 uint_t i; 483 484 mutex_enter(&g->mlg_port->mlp_mtx); 485 mutex_enter(&g->mlg_mtx); 486 487 if (g->mlg_state & MLXCX_GROUP_FLOWS) { 488 mlxcx_remove_all_umcast_entries(mlxp, g->mlg_port, g); 489 490 if (g->mlg_rx_vlan_ft != NULL) 491 mlxcx_remove_all_vlan_entries(mlxp, g); 492 493 if (g == &mlxp->mlx_rx_groups[0]) { 494 ft = g->mlg_port->mlp_rx_flow; 495 mutex_enter(&ft->mlft_mtx); 496 497 fg = g->mlg_port->mlp_bcast; 498 fe = list_head(&fg->mlfg_entries); 499 if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { 500 (void) mlxcx_cmd_delete_flow_table_entry( 501 mlxp, fe); 502 } 503 504 fg = g->mlg_port->mlp_promisc; 505 fe = list_head(&fg->mlfg_entries); 506 if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { 507 (void) mlxcx_cmd_delete_flow_table_entry( 508 mlxp, fe); 509 } 510 511 mutex_exit(&ft->mlft_mtx); 512 } 513 514 if (g->mlg_rx_vlan_ft != NULL) { 515 mutex_enter(&g->mlg_rx_vlan_ft->mlft_mtx); 516 ASSERT(list_is_empty(&g->mlg_rx_vlans)); 517 fg = g->mlg_rx_vlan_def_fg; 518 fe = list_head(&fg->mlfg_entries); 519 if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { 520 (void) mlxcx_cmd_delete_flow_table_entry( 521 mlxp, fe); 522 } 523 fg = g->mlg_rx_vlan_promisc_fg; 524 fe = list_head(&fg->mlfg_entries); 525 if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) { 526 (void) mlxcx_cmd_delete_flow_table_entry( 527 mlxp, fe); 528 } 529 mlxcx_teardown_flow_table(mlxp, g->mlg_rx_vlan_ft); 530 list_destroy(&g->mlg_rx_vlans); 531 532 g->mlg_rx_vlan_ft = NULL; 533 } 534 535 mutex_enter(&g->mlg_rx_hash_ft->mlft_mtx); 536 mlxcx_teardown_flow_table(mlxp, g->mlg_rx_hash_ft); 537 g->mlg_rx_hash_ft = NULL; 538 539 avl_destroy(&g->mlg_rx_macs); 540 g->mlg_state &= ~MLXCX_GROUP_FLOWS; 541 } 542 543 if (g->mlg_state & MLXCX_GROUP_RUNNING) { 544 for (i = 0; i < g->mlg_nwqs; ++i) { 545 wq = &g->mlg_wqs[i]; 546 mutex_enter(&wq->mlwq_mtx); 547 if (wq->mlwq_state & MLXCX_WQ_STARTED && 548 !mlxcx_cmd_stop_rq(mlxp, wq)) { 549 mlxcx_warn(mlxp, "failed to stop rq %x", 550 wq->mlwq_num); 551 } 552 mutex_exit(&wq->mlwq_mtx); 553 } 554 g->mlg_state &= ~MLXCX_GROUP_RUNNING; 555 } 556 557 if (g->mlg_state & MLXCX_GROUP_TIRTIS) { 558 for (i = 0; i < MLXCX_TIRS_PER_GROUP; ++i) { 559 mlxcx_tir_t *tir = &g->mlg_tir[i]; 560 if (tir->mltir_state & MLXCX_TIR_CREATED && 561 !(tir->mltir_state & MLXCX_TIR_DESTROYED)) { 562 if (!mlxcx_cmd_destroy_tir(mlxp, tir)) { 563 mlxcx_warn(mlxp, 564 "failed to destroy tir %u " 565 "for rx ring", tir->mltir_num); 566 } 567 } 568 } 569 g->mlg_state &= ~MLXCX_GROUP_TIRTIS; 570 } 571 572 if (g->mlg_state & MLXCX_GROUP_RQT) { 573 if (g->mlg_rqt->mlrqt_state & MLXCX_RQT_CREATED && 574 !(g->mlg_rqt->mlrqt_state & MLXCX_RQT_DESTROYED)) { 575 if (!mlxcx_cmd_destroy_rqt(mlxp, g->mlg_rqt)) { 576 mlxcx_warn(mlxp, "failed to destroy rqt %u " 577 "for rx ring", g->mlg_rqt->mlrqt_num); 578 } 579 kmem_free(g->mlg_rqt->mlrqt_rq, 580 g->mlg_rqt->mlrqt_rq_size); 581 g->mlg_rqt->mlrqt_rq = NULL; 582 kmem_free(g->mlg_rqt, sizeof (mlxcx_rqtable_t)); 583 g->mlg_rqt = NULL; 584 } 585 g->mlg_state &= ~MLXCX_GROUP_RQT; 586 } 587 588 for (i = 0; i < g->mlg_nwqs; ++i) { 589 wq = &g->mlg_wqs[i]; 590 cq = wq->mlwq_cq; 591 mlxcx_wq_teardown(mlxp, wq); 592 if (cq != NULL) 593 mlxcx_cq_teardown(mlxp, cq); 594 } 595 kmem_free(g->mlg_wqs, g->mlg_wqs_size); 596 g->mlg_wqs = NULL; 597 g->mlg_state &= ~MLXCX_GROUP_WQS; 598 599 mutex_exit(&g->mlg_mtx); 600 mutex_exit(&g->mlg_port->mlp_mtx); 601 602 mutex_destroy(&g->mlg_mtx); 603 604 g->mlg_state &= ~MLXCX_GROUP_INIT; 605 ASSERT3S(g->mlg_state, ==, 0); 606 } 607 608 void 609 mlxcx_teardown_tx_group(mlxcx_t *mlxp, mlxcx_ring_group_t *g) 610 { 611 mlxcx_work_queue_t *wq; 612 mlxcx_completion_queue_t *cq; 613 uint_t i; 614 615 mutex_enter(&g->mlg_mtx); 616 617 if (g->mlg_state & MLXCX_GROUP_WQS) { 618 for (i = 0; i < g->mlg_nwqs; ++i) { 619 wq = &g->mlg_wqs[i]; 620 mutex_enter(&wq->mlwq_mtx); 621 cq = wq->mlwq_cq; 622 if (wq->mlwq_state & MLXCX_WQ_STARTED && 623 !mlxcx_cmd_stop_sq(mlxp, wq)) { 624 mlxcx_warn(mlxp, "failed to stop sq %x", 625 wq->mlwq_num); 626 } 627 mutex_exit(&wq->mlwq_mtx); 628 mlxcx_wq_teardown(mlxp, wq); 629 if (cq != NULL) 630 mlxcx_cq_teardown(mlxp, cq); 631 } 632 g->mlg_state &= ~MLXCX_GROUP_RUNNING; 633 kmem_free(g->mlg_wqs, g->mlg_wqs_size); 634 g->mlg_wqs = NULL; 635 g->mlg_state &= ~MLXCX_GROUP_WQS; 636 } 637 638 if ((g->mlg_state & MLXCX_GROUP_TIRTIS) && 639 g->mlg_tis.mltis_state & MLXCX_TIS_CREATED && 640 !(g->mlg_tis.mltis_state & MLXCX_TIS_DESTROYED)) { 641 if (!mlxcx_cmd_destroy_tis(mlxp, &g->mlg_tis)) { 642 mlxcx_warn(mlxp, "failed to destroy tis %u for tx ring", 643 g->mlg_tis.mltis_num); 644 } 645 } 646 g->mlg_state &= ~MLXCX_GROUP_TIRTIS; 647 648 mutex_exit(&g->mlg_mtx); 649 mutex_destroy(&g->mlg_mtx); 650 g->mlg_state &= ~MLXCX_GROUP_INIT; 651 ASSERT3S(g->mlg_state, ==, 0); 652 } 653 654 void 655 mlxcx_teardown_groups(mlxcx_t *mlxp) 656 { 657 mlxcx_ring_group_t *g; 658 uint_t i; 659 660 for (i = 0; i < mlxp->mlx_rx_ngroups; ++i) { 661 g = &mlxp->mlx_rx_groups[i]; 662 if (!(g->mlg_state & MLXCX_GROUP_INIT)) 663 continue; 664 ASSERT3S(g->mlg_type, ==, MLXCX_GROUP_RX); 665 mlxcx_teardown_rx_group(mlxp, g); 666 } 667 kmem_free(mlxp->mlx_rx_groups, mlxp->mlx_rx_groups_size); 668 mlxp->mlx_rx_groups = NULL; 669 670 for (i = 0; i < mlxp->mlx_tx_ngroups; ++i) { 671 g = &mlxp->mlx_tx_groups[i]; 672 if (!(g->mlg_state & MLXCX_GROUP_INIT)) 673 continue; 674 ASSERT3S(g->mlg_type, ==, MLXCX_GROUP_TX); 675 mlxcx_teardown_tx_group(mlxp, g); 676 } 677 kmem_free(mlxp->mlx_tx_groups, mlxp->mlx_tx_groups_size); 678 mlxp->mlx_tx_groups = NULL; 679 } 680 681 boolean_t 682 mlxcx_rx_group_setup(mlxcx_t *mlxp, mlxcx_ring_group_t *g) 683 { 684 mlxcx_event_queue_t *eq; 685 mlxcx_completion_queue_t *cq; 686 mlxcx_work_queue_t *rq; 687 mlxcx_flow_table_t *ft; 688 mlxcx_flow_group_t *fg; 689 mlxcx_flow_entry_t *fe; 690 uint_t i, j; 691 692 ASSERT3S(g->mlg_state, ==, 0); 693 694 mutex_init(&g->mlg_mtx, NULL, MUTEX_DRIVER, 695 DDI_INTR_PRI(mlxp->mlx_intr_pri)); 696 mutex_enter(&g->mlg_mtx); 697 g->mlg_mlx = mlxp; 698 g->mlg_type = MLXCX_GROUP_RX; 699 g->mlg_port = &mlxp->mlx_ports[0]; 700 g->mlg_state |= MLXCX_GROUP_INIT; 701 702 g->mlg_nwqs = mlxp->mlx_props.mldp_rx_nrings_per_small_group; 703 i = g - &mlxp->mlx_rx_groups[0]; 704 if (i < mlxp->mlx_props.mldp_rx_ngroups_large) 705 g->mlg_nwqs = mlxp->mlx_props.mldp_rx_nrings_per_large_group; 706 707 g->mlg_wqs_size = g->mlg_nwqs * sizeof (mlxcx_work_queue_t); 708 g->mlg_wqs = kmem_zalloc(g->mlg_wqs_size, KM_SLEEP); 709 g->mlg_state |= MLXCX_GROUP_WQS; 710 711 g->mlg_rqt = kmem_zalloc(sizeof (mlxcx_rqtable_t), KM_SLEEP); 712 g->mlg_rqt->mlrqt_max = 2; 713 while (g->mlg_rqt->mlrqt_max < g->mlg_nwqs) 714 g->mlg_rqt->mlrqt_max <<= 1; 715 g->mlg_rqt->mlrqt_rq_size = g->mlg_rqt->mlrqt_max * 716 sizeof (mlxcx_work_queue_t *); 717 g->mlg_rqt->mlrqt_rq = kmem_zalloc(g->mlg_rqt->mlrqt_rq_size, KM_SLEEP); 718 g->mlg_state |= MLXCX_GROUP_RQT; 719 720 for (i = 0; i < g->mlg_nwqs; ++i) { 721 eq = NULL; 722 while (eq == NULL) { 723 eq = &mlxp->mlx_eqs[mlxp->mlx_next_eq++]; 724 if (mlxp->mlx_next_eq >= mlxp->mlx_intr_count) 725 mlxp->mlx_next_eq = 1; 726 if (eq->mleq_type != MLXCX_EQ_TYPE_ANY && 727 eq->mleq_type != MLXCX_EQ_TYPE_RX) { 728 /* Try the next one */ 729 eq = NULL; 730 } 731 } 732 733 if (!mlxcx_cq_setup(mlxp, eq, &cq)) { 734 g->mlg_nwqs = i; 735 break; 736 } 737 cq->mlcq_stats = &g->mlg_port->mlp_stats; 738 739 rq = &g->mlg_wqs[i]; 740 if (!mlxcx_rq_setup(mlxp, cq, rq)) { 741 g->mlg_nwqs = i; 742 break; 743 } 744 g->mlg_rqt->mlrqt_rq[g->mlg_rqt->mlrqt_used++] = rq; 745 g->mlg_rqt->mlrqt_state |= MLXCX_RQT_DIRTY; 746 rq->mlwq_group = g; 747 } 748 if (g->mlg_nwqs == 0) { 749 mutex_exit(&g->mlg_mtx); 750 return (B_FALSE); 751 } 752 753 if (!mlxcx_cmd_create_rqt(mlxp, g->mlg_rqt)) { 754 mutex_exit(&g->mlg_mtx); 755 return (B_FALSE); 756 } 757 758 for (i = 0; i < MLXCX_TIRS_PER_GROUP; ++i) { 759 mlxcx_tir_t *tir = &g->mlg_tir[i]; 760 tir->mltir_tdom = &mlxp->mlx_tdom; 761 switch (i) { 762 case MLXCX_TIR_ROLE_OTHER: 763 tir->mltir_type = MLXCX_TIR_DIRECT; 764 tir->mltir_rq = &g->mlg_wqs[0]; 765 break; 766 case MLXCX_TIR_ROLE_IPv4: 767 case MLXCX_TIR_ROLE_IPv6: 768 case MLXCX_TIR_ROLE_TCPv4: 769 case MLXCX_TIR_ROLE_TCPv6: 770 case MLXCX_TIR_ROLE_UDPv4: 771 case MLXCX_TIR_ROLE_UDPv6: 772 tir->mltir_type = MLXCX_TIR_INDIRECT; 773 tir->mltir_rqtable = g->mlg_rqt; 774 tir->mltir_hash_fn = MLXCX_TIR_HASH_TOEPLITZ; 775 (void) random_get_pseudo_bytes(tir->mltir_toeplitz_key, 776 sizeof (tir->mltir_toeplitz_key)); 777 break; 778 } 779 switch (i) { 780 case MLXCX_TIR_ROLE_OTHER: 781 break; 782 case MLXCX_TIR_ROLE_IPv4: 783 case MLXCX_TIR_ROLE_TCPv4: 784 case MLXCX_TIR_ROLE_UDPv4: 785 tir->mltir_l3_type = MLXCX_RX_HASH_L3_IPv4; 786 tir->mltir_hash_fields = 787 MLXCX_RX_HASH_SRC_IP | MLXCX_RX_HASH_DST_IP; 788 break; 789 case MLXCX_TIR_ROLE_IPv6: 790 case MLXCX_TIR_ROLE_TCPv6: 791 case MLXCX_TIR_ROLE_UDPv6: 792 tir->mltir_l3_type = MLXCX_RX_HASH_L3_IPv6; 793 tir->mltir_hash_fields = 794 MLXCX_RX_HASH_SRC_IP | MLXCX_RX_HASH_DST_IP; 795 break; 796 } 797 switch (i) { 798 case MLXCX_TIR_ROLE_OTHER: 799 case MLXCX_TIR_ROLE_IPv4: 800 case MLXCX_TIR_ROLE_IPv6: 801 break; 802 case MLXCX_TIR_ROLE_TCPv4: 803 case MLXCX_TIR_ROLE_TCPv6: 804 tir->mltir_l4_type = MLXCX_RX_HASH_L4_TCP; 805 tir->mltir_hash_fields |= 806 MLXCX_RX_HASH_L4_SPORT | MLXCX_RX_HASH_L4_DPORT; 807 break; 808 case MLXCX_TIR_ROLE_UDPv4: 809 case MLXCX_TIR_ROLE_UDPv6: 810 tir->mltir_l4_type = MLXCX_RX_HASH_L4_UDP; 811 tir->mltir_hash_fields |= 812 MLXCX_RX_HASH_L4_SPORT | MLXCX_RX_HASH_L4_DPORT; 813 break; 814 } 815 816 if (!mlxcx_cmd_create_tir(mlxp, tir)) { 817 mutex_exit(&g->mlg_mtx); 818 return (B_FALSE); 819 } 820 821 g->mlg_state |= MLXCX_GROUP_TIRTIS; 822 } 823 824 /* 825 * Flow table: our RX hashing breakout table for RSS 826 */ 827 828 g->mlg_rx_hash_ft = (ft = kmem_zalloc(sizeof (mlxcx_flow_table_t), 829 KM_SLEEP)); 830 mutex_init(&ft->mlft_mtx, NULL, MUTEX_DRIVER, 831 DDI_INTR_PRI(mlxp->mlx_intr_pri)); 832 avl_create(&g->mlg_rx_macs, mlxcx_grmac_compare, 833 sizeof (mlxcx_group_mac_t), 834 offsetof(mlxcx_group_mac_t, mlgm_group_entry)); 835 g->mlg_state |= MLXCX_GROUP_FLOWS; 836 837 mutex_enter(&ft->mlft_mtx); 838 839 ft->mlft_type = MLXCX_FLOW_TABLE_NIC_RX; 840 ft->mlft_level = 2; 841 ft->mlft_port = g->mlg_port; 842 ft->mlft_entshift = MLXCX_RX_HASH_FT_SIZE_SHIFT; 843 ft->mlft_nents = (1 << ft->mlft_entshift); 844 ASSERT3U(ft->mlft_nents, >=, MLXCX_TIRS_PER_GROUP); 845 ft->mlft_entsize = ft->mlft_nents * sizeof (mlxcx_flow_entry_t); 846 ft->mlft_ent = kmem_zalloc(ft->mlft_entsize, KM_SLEEP); 847 list_create(&ft->mlft_groups, sizeof (mlxcx_flow_group_t), 848 offsetof(mlxcx_flow_group_t, mlfg_entry)); 849 850 for (j = 0; j < ft->mlft_nents; ++j) { 851 ft->mlft_ent[j].mlfe_table = ft; 852 ft->mlft_ent[j].mlfe_index = j; 853 } 854 855 if (!mlxcx_cmd_create_flow_table(mlxp, ft)) { 856 mutex_exit(&ft->mlft_mtx); 857 mutex_exit(&g->mlg_mtx); 858 return (B_FALSE); 859 } 860 861 fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); 862 list_insert_tail(&ft->mlft_groups, fg); 863 fg->mlfg_table = ft; 864 fg->mlfg_size = 1; 865 fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO; 866 if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { 867 mutex_exit(&ft->mlft_mtx); 868 mutex_exit(&g->mlg_mtx); 869 return (B_FALSE); 870 } 871 fe = list_head(&fg->mlfg_entries); 872 fe->mlfe_ip_version = 6; 873 fe->mlfe_ip_proto = IPPROTO_UDP; 874 fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; 875 fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = 876 &g->mlg_tir[MLXCX_TIR_ROLE_UDPv6]; 877 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 878 mutex_exit(&ft->mlft_mtx); 879 mutex_exit(&g->mlg_mtx); 880 return (B_FALSE); 881 } 882 883 fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); 884 list_insert_tail(&ft->mlft_groups, fg); 885 fg->mlfg_table = ft; 886 fg->mlfg_size = 1; 887 fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO; 888 if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { 889 mutex_exit(&ft->mlft_mtx); 890 mutex_exit(&g->mlg_mtx); 891 return (B_FALSE); 892 } 893 fe = list_head(&fg->mlfg_entries); 894 fe->mlfe_ip_version = 4; 895 fe->mlfe_ip_proto = IPPROTO_UDP; 896 fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; 897 fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = 898 &g->mlg_tir[MLXCX_TIR_ROLE_UDPv4]; 899 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 900 mutex_exit(&ft->mlft_mtx); 901 mutex_exit(&g->mlg_mtx); 902 return (B_FALSE); 903 } 904 905 fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); 906 list_insert_tail(&ft->mlft_groups, fg); 907 fg->mlfg_table = ft; 908 fg->mlfg_size = 1; 909 fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO; 910 if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { 911 mutex_exit(&ft->mlft_mtx); 912 mutex_exit(&g->mlg_mtx); 913 return (B_FALSE); 914 } 915 fe = list_head(&fg->mlfg_entries); 916 fe->mlfe_ip_version = 6; 917 fe->mlfe_ip_proto = IPPROTO_TCP; 918 fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; 919 fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = 920 &g->mlg_tir[MLXCX_TIR_ROLE_TCPv6]; 921 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 922 mutex_exit(&ft->mlft_mtx); 923 mutex_exit(&g->mlg_mtx); 924 return (B_FALSE); 925 } 926 927 fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); 928 list_insert_tail(&ft->mlft_groups, fg); 929 fg->mlfg_table = ft; 930 fg->mlfg_size = 1; 931 fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER | MLXCX_FLOW_MATCH_IP_PROTO; 932 if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { 933 mutex_exit(&ft->mlft_mtx); 934 mutex_exit(&g->mlg_mtx); 935 return (B_FALSE); 936 } 937 fe = list_head(&fg->mlfg_entries); 938 fe->mlfe_ip_version = 4; 939 fe->mlfe_ip_proto = IPPROTO_TCP; 940 fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; 941 fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = 942 &g->mlg_tir[MLXCX_TIR_ROLE_TCPv4]; 943 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 944 mutex_exit(&ft->mlft_mtx); 945 mutex_exit(&g->mlg_mtx); 946 return (B_FALSE); 947 } 948 949 fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); 950 list_insert_tail(&ft->mlft_groups, fg); 951 fg->mlfg_table = ft; 952 fg->mlfg_size = 1; 953 fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER; 954 if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { 955 mutex_exit(&ft->mlft_mtx); 956 mutex_exit(&g->mlg_mtx); 957 return (B_FALSE); 958 } 959 fe = list_head(&fg->mlfg_entries); 960 fe->mlfe_ip_version = 6; 961 fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; 962 fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = 963 &g->mlg_tir[MLXCX_TIR_ROLE_IPv6]; 964 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 965 mutex_exit(&ft->mlft_mtx); 966 mutex_exit(&g->mlg_mtx); 967 return (B_FALSE); 968 } 969 970 fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); 971 list_insert_tail(&ft->mlft_groups, fg); 972 fg->mlfg_table = ft; 973 fg->mlfg_size = 1; 974 fg->mlfg_mask |= MLXCX_FLOW_MATCH_IP_VER; 975 if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { 976 mutex_exit(&ft->mlft_mtx); 977 mutex_exit(&g->mlg_mtx); 978 return (B_FALSE); 979 } 980 fe = list_head(&fg->mlfg_entries); 981 fe->mlfe_ip_version = 4; 982 fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; 983 fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = 984 &g->mlg_tir[MLXCX_TIR_ROLE_IPv4]; 985 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 986 mutex_exit(&ft->mlft_mtx); 987 mutex_exit(&g->mlg_mtx); 988 return (B_FALSE); 989 } 990 991 fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); 992 list_insert_tail(&ft->mlft_groups, fg); 993 fg->mlfg_table = ft; 994 fg->mlfg_size = 1; 995 if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { 996 mutex_exit(&ft->mlft_mtx); 997 mutex_exit(&g->mlg_mtx); 998 return (B_FALSE); 999 } 1000 fe = list_head(&fg->mlfg_entries); 1001 fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; 1002 fe->mlfe_dest[fe->mlfe_ndest++].mlfed_tir = 1003 &g->mlg_tir[MLXCX_TIR_ROLE_OTHER]; 1004 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 1005 mutex_exit(&ft->mlft_mtx); 1006 mutex_exit(&g->mlg_mtx); 1007 return (B_FALSE); 1008 } 1009 1010 mutex_exit(&ft->mlft_mtx); 1011 1012 /* 1013 * Flow table: the VLAN breakout table for doing VLAN filtering after 1014 * we've matched a MAC address. 1015 */ 1016 1017 g->mlg_rx_vlan_ft = (ft = kmem_zalloc(sizeof (mlxcx_flow_table_t), 1018 KM_SLEEP)); 1019 mutex_init(&ft->mlft_mtx, NULL, MUTEX_DRIVER, 1020 DDI_INTR_PRI(mlxp->mlx_intr_pri)); 1021 list_create(&g->mlg_rx_vlans, sizeof (mlxcx_group_vlan_t), 1022 offsetof(mlxcx_group_vlan_t, mlgv_entry)); 1023 1024 mutex_enter(&ft->mlft_mtx); 1025 1026 ft->mlft_type = MLXCX_FLOW_TABLE_NIC_RX; 1027 ft->mlft_level = 1; 1028 ft->mlft_port = g->mlg_port; 1029 ft->mlft_entshift = mlxp->mlx_props.mldp_ftbl_vlan_size_shift; 1030 ft->mlft_nents = (1 << ft->mlft_entshift); 1031 ft->mlft_entsize = ft->mlft_nents * sizeof (mlxcx_flow_entry_t); 1032 ft->mlft_ent = kmem_zalloc(ft->mlft_entsize, KM_SLEEP); 1033 list_create(&ft->mlft_groups, sizeof (mlxcx_flow_group_t), 1034 offsetof(mlxcx_flow_group_t, mlfg_entry)); 1035 1036 for (j = 0; j < ft->mlft_nents; ++j) { 1037 fe = &ft->mlft_ent[j]; 1038 fe->mlfe_table = ft; 1039 fe->mlfe_index = j; 1040 fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD; 1041 fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = g->mlg_rx_hash_ft; 1042 } 1043 1044 if (!mlxcx_cmd_create_flow_table(mlxp, ft)) { 1045 mutex_exit(&ft->mlft_mtx); 1046 mutex_exit(&g->mlg_mtx); 1047 return (B_FALSE); 1048 } 1049 1050 /* First group is all actual matched VLANs */ 1051 fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); 1052 g->mlg_rx_vlan_fg = fg; 1053 list_insert_tail(&ft->mlft_groups, fg); 1054 fg->mlfg_table = ft; 1055 fg->mlfg_size = ft->mlft_nents - 2; 1056 fg->mlfg_mask |= MLXCX_FLOW_MATCH_VLAN; 1057 fg->mlfg_mask |= MLXCX_FLOW_MATCH_VID; 1058 if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { 1059 mutex_exit(&ft->mlft_mtx); 1060 mutex_exit(&g->mlg_mtx); 1061 return (B_FALSE); 1062 } 1063 1064 /* 1065 * Then the "default" entry which we enable when we have no VLAN IDs 1066 * added to the group (we start with this enabled). 1067 */ 1068 fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); 1069 g->mlg_rx_vlan_def_fg = fg; 1070 list_insert_tail(&ft->mlft_groups, fg); 1071 fg->mlfg_table = ft; 1072 fg->mlfg_size = 1; 1073 if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { 1074 mutex_exit(&ft->mlft_mtx); 1075 mutex_exit(&g->mlg_mtx); 1076 return (B_FALSE); 1077 } 1078 fe = list_head(&fg->mlfg_entries); 1079 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 1080 mutex_exit(&ft->mlft_mtx); 1081 mutex_exit(&g->mlg_mtx); 1082 return (B_FALSE); 1083 } 1084 1085 /* 1086 * Finally, the promisc entry which points at the *hash ft* from the 1087 * default group. We only enable this when we have promisc on. 1088 */ 1089 fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP); 1090 g->mlg_rx_vlan_promisc_fg = fg; 1091 list_insert_tail(&ft->mlft_groups, fg); 1092 fg->mlfg_table = ft; 1093 fg->mlfg_size = 1; 1094 if (!mlxcx_setup_flow_group(mlxp, ft, fg)) { 1095 mutex_exit(&ft->mlft_mtx); 1096 mutex_exit(&g->mlg_mtx); 1097 return (B_FALSE); 1098 } 1099 fe = list_head(&fg->mlfg_entries); 1100 fe->mlfe_ndest = 1; 1101 fe->mlfe_dest[0].mlfed_flow = mlxp->mlx_rx_groups[0].mlg_rx_hash_ft; 1102 1103 mutex_exit(&ft->mlft_mtx); 1104 1105 mutex_exit(&g->mlg_mtx); 1106 1107 return (B_TRUE); 1108 } 1109 1110 boolean_t 1111 mlxcx_rx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g, 1112 mlxcx_work_queue_t *rq) 1113 { 1114 uint_t j; 1115 mlxcx_buffer_t *b; 1116 mlxcx_completion_queue_t *cq; 1117 1118 mutex_enter(&g->mlg_mtx); 1119 /* 1120 * Sadly, even though MAC has the mgi_start callback, it is not always 1121 * called -- in particular when we are being managed under an aggr, the 1122 * mgi_start callback will only ever be called on the default group. 1123 * 1124 * So instead of asserting about the group state here, we have to 1125 * check it and call group start if needed. 1126 */ 1127 if (!(g->mlg_state & MLXCX_GROUP_RUNNING)) { 1128 mutex_exit(&g->mlg_mtx); 1129 if (!mlxcx_rx_group_start(mlxp, g)) 1130 return (B_FALSE); 1131 mutex_enter(&g->mlg_mtx); 1132 } 1133 ASSERT(g->mlg_state & MLXCX_GROUP_RUNNING); 1134 1135 cq = rq->mlwq_cq; 1136 ASSERT(cq != NULL); 1137 1138 mutex_enter(&cq->mlcq_mtx); 1139 mutex_enter(&rq->mlwq_mtx); 1140 1141 if (rq->mlwq_state & MLXCX_WQ_STARTED) { 1142 mutex_exit(&rq->mlwq_mtx); 1143 mutex_exit(&cq->mlcq_mtx); 1144 mutex_exit(&g->mlg_mtx); 1145 return (B_TRUE); 1146 } 1147 1148 if (!mlxcx_cmd_start_rq(mlxp, rq)) { 1149 mutex_exit(&rq->mlwq_mtx); 1150 mutex_exit(&cq->mlcq_mtx); 1151 mutex_exit(&g->mlg_mtx); 1152 return (B_FALSE); 1153 } 1154 ASSERT(rq->mlwq_state & MLXCX_WQ_STARTED); 1155 1156 ASSERT0(rq->mlwq_state & MLXCX_WQ_BUFFERS); 1157 rq->mlwq_state |= MLXCX_WQ_BUFFERS; 1158 1159 for (j = 0; j < rq->mlwq_nents; ++j) { 1160 if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b)) 1161 break; 1162 mlxcx_buf_return(mlxp, b); 1163 } 1164 for (j = 0; j < rq->mlwq_nents / 2; ++j) { 1165 if (!mlxcx_buf_create(mlxp, rq->mlwq_bufs, &b)) 1166 break; 1167 mlxcx_buf_return(mlxp, b); 1168 } 1169 1170 mlxcx_rq_refill(mlxp, rq); 1171 1172 mutex_exit(&rq->mlwq_mtx); 1173 mutex_exit(&cq->mlcq_mtx); 1174 mutex_exit(&g->mlg_mtx); 1175 1176 return (B_TRUE); 1177 } 1178 1179 boolean_t 1180 mlxcx_rx_group_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g) 1181 { 1182 mlxcx_flow_table_t *ft; 1183 mlxcx_flow_group_t *fg; 1184 mlxcx_flow_entry_t *fe; 1185 1186 mutex_enter(&g->mlg_mtx); 1187 1188 if (g->mlg_state & MLXCX_GROUP_RUNNING) { 1189 mutex_exit(&g->mlg_mtx); 1190 return (B_TRUE); 1191 } 1192 1193 ASSERT0(g->mlg_state & MLXCX_GROUP_RUNNING); 1194 1195 g->mlg_state |= MLXCX_GROUP_RUNNING; 1196 1197 if (g == &mlxp->mlx_rx_groups[0]) { 1198 ft = g->mlg_port->mlp_rx_flow; 1199 mutex_enter(&ft->mlft_mtx); 1200 1201 /* 1202 * Broadcast and promisc entries go directly to group 0's 1203 * RSS hash fanout flow table. They bypass VLAN filtering. 1204 */ 1205 fg = g->mlg_port->mlp_bcast; 1206 fe = list_head(&fg->mlfg_entries); 1207 fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = g->mlg_rx_hash_ft; 1208 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 1209 mutex_exit(&ft->mlft_mtx); 1210 mutex_exit(&g->mlg_mtx); 1211 return (B_FALSE); 1212 } 1213 1214 fg = g->mlg_port->mlp_promisc; 1215 fe = list_head(&fg->mlfg_entries); 1216 fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = g->mlg_rx_hash_ft; 1217 /* 1218 * Don't actually set the promisc entry until promisc is 1219 * enabled. 1220 */ 1221 1222 mutex_exit(&ft->mlft_mtx); 1223 } 1224 1225 mutex_exit(&g->mlg_mtx); 1226 1227 return (B_TRUE); 1228 } 1229 1230 boolean_t 1231 mlxcx_tx_group_setup(mlxcx_t *mlxp, mlxcx_ring_group_t *g) 1232 { 1233 mlxcx_event_queue_t *eq; 1234 mlxcx_completion_queue_t *cq; 1235 mlxcx_work_queue_t *sq; 1236 uint_t i; 1237 1238 ASSERT3S(g->mlg_state, ==, 0); 1239 1240 mutex_init(&g->mlg_mtx, NULL, MUTEX_DRIVER, 1241 DDI_INTR_PRI(mlxp->mlx_intr_pri)); 1242 g->mlg_state |= MLXCX_GROUP_INIT; 1243 mutex_enter(&g->mlg_mtx); 1244 1245 g->mlg_mlx = mlxp; 1246 g->mlg_type = MLXCX_GROUP_TX; 1247 g->mlg_port = &mlxp->mlx_ports[0]; 1248 1249 g->mlg_nwqs = mlxp->mlx_props.mldp_tx_nrings_per_group; 1250 g->mlg_wqs_size = g->mlg_nwqs * sizeof (mlxcx_work_queue_t); 1251 g->mlg_wqs = kmem_zalloc(g->mlg_wqs_size, KM_SLEEP); 1252 g->mlg_state |= MLXCX_GROUP_WQS; 1253 1254 g->mlg_tis.mltis_tdom = &mlxp->mlx_tdom; 1255 1256 if (!mlxcx_cmd_create_tis(mlxp, &g->mlg_tis)) { 1257 mutex_exit(&g->mlg_mtx); 1258 return (B_FALSE); 1259 } 1260 1261 g->mlg_state |= MLXCX_GROUP_TIRTIS; 1262 1263 for (i = 0; i < g->mlg_nwqs; ++i) { 1264 eq = NULL; 1265 while (eq == NULL) { 1266 eq = &mlxp->mlx_eqs[mlxp->mlx_next_eq++]; 1267 if (mlxp->mlx_next_eq >= mlxp->mlx_intr_count) 1268 mlxp->mlx_next_eq = 1; 1269 if (eq->mleq_type != MLXCX_EQ_TYPE_ANY && 1270 eq->mleq_type != MLXCX_EQ_TYPE_TX) { 1271 /* Try the next one */ 1272 eq = NULL; 1273 } 1274 } 1275 1276 if (!mlxcx_cq_setup(mlxp, eq, &cq)) 1277 return (B_FALSE); 1278 cq->mlcq_stats = &g->mlg_port->mlp_stats; 1279 1280 sq = &g->mlg_wqs[i]; 1281 if (!mlxcx_sq_setup(mlxp, g->mlg_port, cq, &g->mlg_tis, sq)) { 1282 mutex_exit(&g->mlg_mtx); 1283 return (B_FALSE); 1284 } 1285 sq->mlwq_group = g; 1286 } 1287 1288 mutex_exit(&g->mlg_mtx); 1289 1290 return (B_TRUE); 1291 } 1292 1293 boolean_t 1294 mlxcx_tx_ring_start(mlxcx_t *mlxp, mlxcx_ring_group_t *g, 1295 mlxcx_work_queue_t *sq) 1296 { 1297 uint_t i; 1298 mlxcx_buffer_t *b; 1299 mlxcx_completion_queue_t *cq; 1300 1301 mutex_enter(&g->mlg_mtx); 1302 1303 cq = sq->mlwq_cq; 1304 ASSERT(cq != NULL); 1305 1306 mutex_enter(&cq->mlcq_mtx); 1307 mutex_enter(&sq->mlwq_mtx); 1308 if (sq->mlwq_state & MLXCX_WQ_STARTED) { 1309 mutex_exit(&sq->mlwq_mtx); 1310 mutex_exit(&cq->mlcq_mtx); 1311 mutex_exit(&g->mlg_mtx); 1312 return (B_TRUE); 1313 } 1314 1315 ASSERT0(sq->mlwq_state & MLXCX_WQ_BUFFERS); 1316 for (i = 0; i < sq->mlwq_nents; ++i) { 1317 if (!mlxcx_buf_create_foreign(mlxp, sq->mlwq_foreign_bufs, &b)) 1318 break; 1319 mlxcx_buf_return(mlxp, b); 1320 } 1321 for (i = 0; i < sq->mlwq_nents / 2; ++i) { 1322 if (!mlxcx_buf_create_foreign(mlxp, sq->mlwq_foreign_bufs, &b)) 1323 break; 1324 mlxcx_buf_return(mlxp, b); 1325 } 1326 for (i = 0; i < sq->mlwq_nents; ++i) { 1327 if (!mlxcx_buf_create(mlxp, sq->mlwq_bufs, &b)) 1328 break; 1329 mlxcx_buf_return(mlxp, b); 1330 } 1331 sq->mlwq_state |= MLXCX_WQ_BUFFERS; 1332 1333 if (!mlxcx_cmd_start_sq(mlxp, sq)) { 1334 mutex_exit(&sq->mlwq_mtx); 1335 mutex_exit(&cq->mlcq_mtx); 1336 mutex_exit(&g->mlg_mtx); 1337 return (B_FALSE); 1338 } 1339 g->mlg_state |= MLXCX_GROUP_RUNNING; 1340 1341 (void) mlxcx_sq_add_nop(mlxp, sq); 1342 1343 mutex_exit(&sq->mlwq_mtx); 1344 mutex_exit(&cq->mlcq_mtx); 1345 mutex_exit(&g->mlg_mtx); 1346 1347 return (B_TRUE); 1348 } 1349 1350 static boolean_t 1351 mlxcx_sq_ring_dbell(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, uint_t first) 1352 { 1353 uint_t idx; 1354 mlxcx_bf_t *bf; 1355 ddi_fm_error_t err; 1356 uint_t try = 0; 1357 1358 ASSERT3U(mlwq->mlwq_type, ==, MLXCX_WQ_TYPE_SENDQ); 1359 ASSERT(mutex_owned(&mlwq->mlwq_mtx)); 1360 1361 mlwq->mlwq_doorbell->mlwqd_send_counter = to_be16(mlwq->mlwq_pc); 1362 1363 ASSERT(mlwq->mlwq_cq != NULL); 1364 ASSERT(mlwq->mlwq_cq->mlcq_eq != NULL); 1365 idx = mlwq->mlwq_cq->mlcq_eq->mleq_intr_index & MLXCX_BF_PER_UAR_MASK; 1366 bf = &mlwq->mlwq_uar->mlu_bf[idx]; 1367 1368 retry: 1369 MLXCX_DMA_SYNC(mlwq->mlwq_doorbell_dma, DDI_DMA_SYNC_FORDEV); 1370 ddi_fm_dma_err_get(mlwq->mlwq_doorbell_dma.mxdb_dma_handle, &err, 1371 DDI_FME_VERSION); 1372 if (err.fme_status != DDI_FM_OK) { 1373 if (try++ < mlxcx_doorbell_tries) { 1374 ddi_fm_dma_err_clear( 1375 mlwq->mlwq_doorbell_dma.mxdb_dma_handle, 1376 DDI_FME_VERSION); 1377 goto retry; 1378 } else { 1379 goto err; 1380 } 1381 } 1382 1383 mlxcx_put64(mlxp, bf->mbf_even, from_be64( 1384 mlwq->mlwq_bf_ent[first].mlsqbf_qwords[0])); 1385 ddi_fm_acc_err_get(mlxp->mlx_regs_handle, &err, 1386 DDI_FME_VERSION); 1387 if (err.fme_status == DDI_FM_OK) 1388 return (B_TRUE); 1389 if (try++ < mlxcx_doorbell_tries) { 1390 ddi_fm_acc_err_clear(mlxp->mlx_regs_handle, DDI_FME_VERSION); 1391 goto retry; 1392 } 1393 1394 err: 1395 ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_LOST); 1396 return (B_FALSE); 1397 } 1398 1399 boolean_t 1400 mlxcx_sq_add_nop(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) 1401 { 1402 uint_t index, start_pc; 1403 mlxcx_sendq_ent_t *ent0; 1404 ddi_fm_error_t err; 1405 1406 ASSERT(mutex_owned(&mlwq->mlwq_mtx)); 1407 1408 index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1); 1409 ent0 = &mlwq->mlwq_send_ent[index]; 1410 start_pc = mlwq->mlwq_pc; 1411 ++mlwq->mlwq_pc; 1412 1413 bzero(ent0, sizeof (mlxcx_sendq_ent_t)); 1414 ent0->mlsqe_control.mlcs_opcode = MLXCX_WQE_OP_NOP; 1415 ent0->mlsqe_control.mlcs_qp_or_sq = to_be24(mlwq->mlwq_num); 1416 ent0->mlsqe_control.mlcs_wqe_index = to_be16(start_pc); 1417 1418 set_bits8(&ent0->mlsqe_control.mlcs_flags, 1419 MLXCX_SQE_FENCE_MODE, MLXCX_SQE_FENCE_NONE); 1420 set_bits8(&ent0->mlsqe_control.mlcs_flags, 1421 MLXCX_SQE_COMPLETION_MODE, MLXCX_SQE_CQE_ALWAYS); 1422 1423 ent0->mlsqe_control.mlcs_ds = 1; 1424 1425 VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle, 1426 (uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent, 1427 sizeof (mlxcx_sendq_ent_t), DDI_DMA_SYNC_FORDEV)); 1428 ddi_fm_dma_err_get(mlwq->mlwq_dma.mxdb_dma_handle, &err, 1429 DDI_FME_VERSION); 1430 if (err.fme_status != DDI_FM_OK) { 1431 return (B_FALSE); 1432 } 1433 if (!mlxcx_sq_ring_dbell(mlxp, mlwq, index)) { 1434 return (B_FALSE); 1435 } 1436 return (B_TRUE); 1437 } 1438 1439 boolean_t 1440 mlxcx_sq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, 1441 uint8_t *inlinehdrs, size_t inlinelen, uint32_t chkflags, 1442 mlxcx_buffer_t *b0) 1443 { 1444 uint_t index, first, ents = 0; 1445 mlxcx_completion_queue_t *cq; 1446 mlxcx_sendq_ent_t *ent0; 1447 mlxcx_sendq_extra_ent_t *ent; 1448 mlxcx_wqe_data_seg_t *seg; 1449 uint_t ptri, nptr; 1450 const ddi_dma_cookie_t *c; 1451 size_t rem; 1452 mlxcx_buffer_t *b; 1453 ddi_fm_error_t err; 1454 1455 ASSERT(mutex_owned(&mlwq->mlwq_mtx)); 1456 ASSERT3P(b0->mlb_tx_head, ==, b0); 1457 ASSERT3U(b0->mlb_state, ==, MLXCX_BUFFER_ON_WQ); 1458 cq = mlwq->mlwq_cq; 1459 1460 index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1); 1461 ent0 = &mlwq->mlwq_send_ent[index]; 1462 b0->mlb_wqe_index = mlwq->mlwq_pc; 1463 ++mlwq->mlwq_pc; 1464 ++ents; 1465 1466 first = index; 1467 1468 mutex_enter(&cq->mlcq_bufbmtx); 1469 list_insert_tail(&cq->mlcq_buffers_b, b0); 1470 atomic_inc_64(&cq->mlcq_bufcnt); 1471 mutex_exit(&cq->mlcq_bufbmtx); 1472 1473 bzero(ent0, sizeof (mlxcx_sendq_ent_t)); 1474 ent0->mlsqe_control.mlcs_opcode = MLXCX_WQE_OP_SEND; 1475 ent0->mlsqe_control.mlcs_qp_or_sq = to_be24(mlwq->mlwq_num); 1476 ent0->mlsqe_control.mlcs_wqe_index = to_be16(b0->mlb_wqe_index); 1477 1478 set_bits8(&ent0->mlsqe_control.mlcs_flags, 1479 MLXCX_SQE_FENCE_MODE, MLXCX_SQE_FENCE_WAIT_OTHERS); 1480 set_bits8(&ent0->mlsqe_control.mlcs_flags, 1481 MLXCX_SQE_COMPLETION_MODE, MLXCX_SQE_CQE_ALWAYS); 1482 1483 VERIFY3U(inlinelen, <=, sizeof (ent0->mlsqe_eth.mles_inline_headers)); 1484 set_bits16(&ent0->mlsqe_eth.mles_szflags, 1485 MLXCX_SQE_ETH_INLINE_HDR_SZ, inlinelen); 1486 if (inlinelen > 0) { 1487 bcopy(inlinehdrs, ent0->mlsqe_eth.mles_inline_headers, 1488 inlinelen); 1489 } 1490 1491 ent0->mlsqe_control.mlcs_ds = 1492 offsetof(mlxcx_sendq_ent_t, mlsqe_data) / 16; 1493 1494 if (chkflags & HCK_IPV4_HDRCKSUM) { 1495 ASSERT(mlxp->mlx_caps->mlc_checksum); 1496 set_bit8(&ent0->mlsqe_eth.mles_csflags, 1497 MLXCX_SQE_ETH_CSFLAG_L3_CHECKSUM); 1498 } 1499 if (chkflags & HCK_FULLCKSUM) { 1500 ASSERT(mlxp->mlx_caps->mlc_checksum); 1501 set_bit8(&ent0->mlsqe_eth.mles_csflags, 1502 MLXCX_SQE_ETH_CSFLAG_L4_CHECKSUM); 1503 } 1504 1505 b = b0; 1506 ptri = 0; 1507 nptr = sizeof (ent0->mlsqe_data) / sizeof (mlxcx_wqe_data_seg_t); 1508 seg = ent0->mlsqe_data; 1509 while (b != NULL) { 1510 rem = b->mlb_used; 1511 1512 c = NULL; 1513 while (rem > 0 && 1514 (c = mlxcx_dma_cookie_iter(&b->mlb_dma, c)) != NULL) { 1515 if (ptri >= nptr) { 1516 index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1); 1517 ent = &mlwq->mlwq_send_extra_ent[index]; 1518 ++mlwq->mlwq_pc; 1519 ++ents; 1520 1521 seg = ent->mlsqe_data; 1522 ptri = 0; 1523 nptr = sizeof (ent->mlsqe_data) / 1524 sizeof (mlxcx_wqe_data_seg_t); 1525 } 1526 1527 seg->mlds_lkey = to_be32(mlxp->mlx_rsvd_lkey); 1528 if (c->dmac_size > rem) { 1529 seg->mlds_byte_count = to_be32(rem); 1530 rem = 0; 1531 } else { 1532 seg->mlds_byte_count = to_be32(c->dmac_size); 1533 rem -= c->dmac_size; 1534 } 1535 seg->mlds_address = to_be64(c->dmac_laddress); 1536 ++seg; 1537 ++ptri; 1538 ++ent0->mlsqe_control.mlcs_ds; 1539 1540 ASSERT3U(ent0->mlsqe_control.mlcs_ds, <=, 1541 MLXCX_SQE_MAX_DS); 1542 } 1543 1544 if (b == b0) { 1545 b = list_head(&b0->mlb_tx_chain); 1546 } else { 1547 b = list_next(&b0->mlb_tx_chain, b); 1548 } 1549 } 1550 1551 for (; ptri < nptr; ++ptri, ++seg) { 1552 seg->mlds_lkey = to_be32(MLXCX_NULL_LKEY); 1553 seg->mlds_byte_count = to_be32(0); 1554 seg->mlds_address = to_be64(0); 1555 } 1556 1557 /* 1558 * Make sure the workqueue entry is flushed out before updating 1559 * the doorbell. 1560 */ 1561 VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle, 1562 (uintptr_t)ent0 - (uintptr_t)mlwq->mlwq_send_ent, 1563 ents * sizeof (mlxcx_sendq_ent_t), DDI_DMA_SYNC_FORDEV)); 1564 ddi_fm_dma_err_get(mlwq->mlwq_dma.mxdb_dma_handle, &err, 1565 DDI_FME_VERSION); 1566 if (err.fme_status != DDI_FM_OK) { 1567 return (B_FALSE); 1568 } 1569 if (!mlxcx_sq_ring_dbell(mlxp, mlwq, first)) { 1570 return (B_FALSE); 1571 } 1572 return (B_TRUE); 1573 } 1574 1575 boolean_t 1576 mlxcx_rq_add_buffer(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, 1577 mlxcx_buffer_t *buf) 1578 { 1579 return (mlxcx_rq_add_buffers(mlxp, mlwq, &buf, 1)); 1580 } 1581 1582 boolean_t 1583 mlxcx_rq_add_buffers(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq, 1584 mlxcx_buffer_t **bufs, size_t nbufs) 1585 { 1586 uint_t index; 1587 mlxcx_recvq_ent_t *ent; 1588 mlxcx_completion_queue_t *cq; 1589 mlxcx_wqe_data_seg_t *seg; 1590 uint_t bi, ptri; 1591 const ddi_dma_cookie_t *c; 1592 mlxcx_buffer_t *buf; 1593 ddi_fm_error_t err; 1594 1595 ASSERT(mutex_owned(&mlwq->mlwq_mtx)); 1596 cq = mlwq->mlwq_cq; 1597 ASSERT(mutex_owned(&cq->mlcq_mtx)); 1598 1599 for (bi = 0; bi < nbufs; ++bi) { 1600 buf = bufs[bi]; 1601 bufs[bi] = NULL; 1602 ASSERT3U(buf->mlb_state, ==, MLXCX_BUFFER_ON_WQ); 1603 1604 index = mlwq->mlwq_pc & (mlwq->mlwq_nents - 1); 1605 ent = &mlwq->mlwq_recv_ent[index]; 1606 buf->mlb_wqe_index = mlwq->mlwq_pc; 1607 1608 ++mlwq->mlwq_pc; 1609 1610 mutex_enter(&cq->mlcq_bufbmtx); 1611 list_insert_tail(&cq->mlcq_buffers, buf); 1612 atomic_inc_64(&cq->mlcq_bufcnt); 1613 mutex_exit(&cq->mlcq_bufbmtx); 1614 1615 ASSERT3U(buf->mlb_dma.mxdb_ncookies, <=, MLXCX_RECVQ_MAX_PTRS); 1616 ptri = 0; 1617 c = NULL; 1618 while ((c = mlxcx_dma_cookie_iter(&buf->mlb_dma, c)) != NULL) { 1619 seg = &ent->mlrqe_data[ptri++]; 1620 seg->mlds_lkey = to_be32(mlxp->mlx_rsvd_lkey); 1621 seg->mlds_byte_count = to_be32(c->dmac_size); 1622 seg->mlds_address = to_be64(c->dmac_laddress); 1623 } 1624 /* 1625 * Fill any unused scatter pointers with the special null 1626 * value. 1627 */ 1628 for (; ptri < MLXCX_RECVQ_MAX_PTRS; ++ptri) { 1629 seg = &ent->mlrqe_data[ptri]; 1630 seg->mlds_lkey = to_be32(MLXCX_NULL_LKEY); 1631 seg->mlds_byte_count = to_be32(0); 1632 seg->mlds_address = to_be64(0); 1633 } 1634 1635 /* 1636 * Make sure the workqueue entry is flushed out before updating 1637 * the doorbell. 1638 */ 1639 VERIFY0(ddi_dma_sync(mlwq->mlwq_dma.mxdb_dma_handle, 1640 (uintptr_t)ent - (uintptr_t)mlwq->mlwq_recv_ent, 1641 sizeof (mlxcx_recvq_ent_t), DDI_DMA_SYNC_FORDEV)); 1642 ddi_fm_dma_err_get(mlwq->mlwq_dma.mxdb_dma_handle, &err, 1643 DDI_FME_VERSION); 1644 if (err.fme_status != DDI_FM_OK) { 1645 return (B_FALSE); 1646 } 1647 } 1648 1649 mlwq->mlwq_doorbell->mlwqd_recv_counter = to_be16(mlwq->mlwq_pc); 1650 /* 1651 * Flush the CQ doorbell as well so that HW knows how many 1652 * completions we've consumed. 1653 */ 1654 MLXCX_DMA_SYNC(cq->mlcq_doorbell_dma, DDI_DMA_SYNC_FORDEV); 1655 ddi_fm_dma_err_get(cq->mlcq_doorbell_dma.mxdb_dma_handle, &err, 1656 DDI_FME_VERSION); 1657 if (err.fme_status != DDI_FM_OK) { 1658 return (B_FALSE); 1659 } 1660 MLXCX_DMA_SYNC(mlwq->mlwq_doorbell_dma, DDI_DMA_SYNC_FORDEV); 1661 ddi_fm_dma_err_get(mlwq->mlwq_doorbell_dma.mxdb_dma_handle, &err, 1662 DDI_FME_VERSION); 1663 if (err.fme_status != DDI_FM_OK) { 1664 return (B_FALSE); 1665 } 1666 return (B_TRUE); 1667 } 1668 1669 void 1670 mlxcx_rq_refill(mlxcx_t *mlxp, mlxcx_work_queue_t *mlwq) 1671 { 1672 size_t target, current, want, done, n; 1673 mlxcx_completion_queue_t *cq; 1674 mlxcx_buffer_t *b[MLXCX_RQ_REFILL_STEP]; 1675 uint_t i; 1676 1677 ASSERT(mutex_owned(&mlwq->mlwq_mtx)); 1678 cq = mlwq->mlwq_cq; 1679 ASSERT(mutex_owned(&cq->mlcq_mtx)); 1680 1681 ASSERT(mlwq->mlwq_state & MLXCX_WQ_BUFFERS); 1682 1683 target = mlwq->mlwq_nents - MLXCX_RQ_REFILL_STEP; 1684 cq = mlwq->mlwq_cq; 1685 1686 if (cq->mlcq_state & MLXCX_CQ_TEARDOWN) 1687 return; 1688 1689 current = cq->mlcq_bufcnt; 1690 1691 if (current >= target - MLXCX_RQ_REFILL_STEP) 1692 return; 1693 1694 want = target - current; 1695 done = 0; 1696 1697 while (!(mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) && done < want) { 1698 n = mlxcx_buf_take_n(mlxp, mlwq, b, MLXCX_RQ_REFILL_STEP); 1699 if (n == 0) { 1700 mlxcx_warn(mlxp, "!exiting rq refill early, done %u " 1701 "but wanted %u", done, want); 1702 return; 1703 } 1704 if (mlwq->mlwq_state & MLXCX_WQ_TEARDOWN) { 1705 for (i = 0; i < n; ++i) 1706 mlxcx_buf_return(mlxp, b[i]); 1707 return; 1708 } 1709 if (!mlxcx_rq_add_buffers(mlxp, mlwq, b, n)) { 1710 /* 1711 * mlxcx_rq_add_buffers NULLs out the buffers as it 1712 * enqueues them, so any that are non-NULL we have to 1713 * free now. The others now belong to the WQ, even if 1714 * we failed. 1715 */ 1716 for (i = 0; i < n; ++i) { 1717 if (b[i] != NULL) { 1718 mlxcx_buf_return(mlxp, b[i]); 1719 } 1720 } 1721 return; 1722 } 1723 done += n; 1724 } 1725 } 1726 1727 static const char * 1728 mlxcx_cq_err_syndrome_string(mlxcx_cq_error_syndrome_t sy) 1729 { 1730 switch (sy) { 1731 case MLXCX_CQ_ERR_LOCAL_LENGTH: 1732 return ("LOCAL_LENGTH"); 1733 case MLXCX_CQ_ERR_LOCAL_QP_OP: 1734 return ("LOCAL_QP_OP"); 1735 case MLXCX_CQ_ERR_LOCAL_PROTECTION: 1736 return ("LOCAL_PROTECTION"); 1737 case MLXCX_CQ_ERR_WR_FLUSHED: 1738 return ("WR_FLUSHED"); 1739 case MLXCX_CQ_ERR_MEM_WINDOW_BIND: 1740 return ("MEM_WINDOW_BIND"); 1741 case MLXCX_CQ_ERR_BAD_RESPONSE: 1742 return ("BAD_RESPONSE"); 1743 case MLXCX_CQ_ERR_LOCAL_ACCESS: 1744 return ("LOCAL_ACCESS"); 1745 case MLXCX_CQ_ERR_XPORT_RETRY_CTR: 1746 return ("XPORT_RETRY_CTR"); 1747 case MLXCX_CQ_ERR_RNR_RETRY_CTR: 1748 return ("RNR_RETRY_CTR"); 1749 case MLXCX_CQ_ERR_ABORTED: 1750 return ("ABORTED"); 1751 default: 1752 return ("UNKNOWN"); 1753 } 1754 } 1755 1756 static void 1757 mlxcx_fm_cqe_ereport(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, 1758 mlxcx_completionq_error_ent_t *ent) 1759 { 1760 uint64_t ena; 1761 char buf[FM_MAX_CLASS]; 1762 const char *name = mlxcx_cq_err_syndrome_string(ent->mlcqee_syndrome); 1763 1764 if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps)) 1765 return; 1766 1767 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", 1768 MLXCX_FM_SERVICE_MLXCX, "cqe.err"); 1769 ena = fm_ena_generate(0, FM_ENA_FMT1); 1770 1771 ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP, 1772 FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, 1773 "syndrome", DATA_TYPE_STRING, name, 1774 "syndrome_num", DATA_TYPE_UINT8, ent->mlcqee_syndrome, 1775 "vendor_syndrome", DATA_TYPE_UINT8, 1776 ent->mlcqee_vendor_error_syndrome, 1777 "wqe_counter", DATA_TYPE_UINT16, from_be16(ent->mlcqee_wqe_counter), 1778 "wq_type", DATA_TYPE_STRING, 1779 (mlcq->mlcq_wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ) ? "send": "recv", 1780 "cq_num", DATA_TYPE_UINT32, mlcq->mlcq_num, 1781 "wq_num", DATA_TYPE_UINT32, mlcq->mlcq_wq->mlwq_num, 1782 NULL); 1783 ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_DEGRADED); 1784 } 1785 1786 void 1787 mlxcx_tx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, 1788 mlxcx_completionq_ent_t *ent, mlxcx_buffer_t *buf) 1789 { 1790 ASSERT(mutex_owned(&mlcq->mlcq_mtx)); 1791 if (ent->mlcqe_opcode == MLXCX_CQE_OP_REQ_ERR) { 1792 mlxcx_completionq_error_ent_t *eent = 1793 (mlxcx_completionq_error_ent_t *)ent; 1794 mlxcx_fm_cqe_ereport(mlxp, mlcq, eent); 1795 mlxcx_buf_return_chain(mlxp, buf, B_FALSE); 1796 mutex_enter(&mlcq->mlcq_wq->mlwq_mtx); 1797 mlxcx_check_sq(mlxp, mlcq->mlcq_wq); 1798 mutex_exit(&mlcq->mlcq_wq->mlwq_mtx); 1799 return; 1800 } 1801 1802 if (ent->mlcqe_opcode != MLXCX_CQE_OP_REQ) { 1803 mlxcx_warn(mlxp, "!got weird cq opcode: %x", ent->mlcqe_opcode); 1804 mlxcx_buf_return_chain(mlxp, buf, B_FALSE); 1805 return; 1806 } 1807 1808 if (ent->mlcqe_send_wqe_opcode != MLXCX_WQE_OP_SEND) { 1809 mlxcx_warn(mlxp, "!got weird cq wqe opcode: %x", 1810 ent->mlcqe_send_wqe_opcode); 1811 mlxcx_buf_return_chain(mlxp, buf, B_FALSE); 1812 return; 1813 } 1814 1815 if (ent->mlcqe_format != MLXCX_CQE_FORMAT_BASIC) { 1816 mlxcx_warn(mlxp, "!got weird cq format: %x", ent->mlcqe_format); 1817 mlxcx_buf_return_chain(mlxp, buf, B_FALSE); 1818 return; 1819 } 1820 1821 mlxcx_buf_return_chain(mlxp, buf, B_FALSE); 1822 } 1823 1824 mblk_t * 1825 mlxcx_rx_completion(mlxcx_t *mlxp, mlxcx_completion_queue_t *mlcq, 1826 mlxcx_completionq_ent_t *ent, mlxcx_buffer_t *buf) 1827 { 1828 uint32_t chkflags = 0; 1829 ddi_fm_error_t err; 1830 1831 ASSERT(mutex_owned(&mlcq->mlcq_mtx)); 1832 1833 if (ent->mlcqe_opcode == MLXCX_CQE_OP_RESP_ERR) { 1834 mlxcx_completionq_error_ent_t *eent = 1835 (mlxcx_completionq_error_ent_t *)ent; 1836 mlxcx_fm_cqe_ereport(mlxp, mlcq, eent); 1837 mlxcx_buf_return(mlxp, buf); 1838 mutex_enter(&mlcq->mlcq_wq->mlwq_mtx); 1839 mlxcx_check_rq(mlxp, mlcq->mlcq_wq); 1840 mutex_exit(&mlcq->mlcq_wq->mlwq_mtx); 1841 return (NULL); 1842 } 1843 1844 if (ent->mlcqe_opcode != MLXCX_CQE_OP_RESP) { 1845 mlxcx_warn(mlxp, "!got weird cq opcode: %x", ent->mlcqe_opcode); 1846 mlxcx_buf_return(mlxp, buf); 1847 return (NULL); 1848 } 1849 1850 if (ent->mlcqe_format != MLXCX_CQE_FORMAT_BASIC) { 1851 mlxcx_warn(mlxp, "!got weird cq format: %x", ent->mlcqe_format); 1852 mlxcx_buf_return(mlxp, buf); 1853 return (NULL); 1854 } 1855 1856 if (ent->mlcqe_rx_drop_counter > 0) { 1857 atomic_add_64(&mlcq->mlcq_stats->mlps_rx_drops, 1858 ent->mlcqe_rx_drop_counter); 1859 } 1860 1861 MLXCX_DMA_SYNC(buf->mlb_dma, DDI_DMA_SYNC_FORCPU); 1862 ddi_fm_dma_err_get(buf->mlb_dma.mxdb_dma_handle, &err, 1863 DDI_FME_VERSION); 1864 if (err.fme_status != DDI_FM_OK) { 1865 ddi_fm_dma_err_clear(buf->mlb_dma.mxdb_dma_handle, 1866 DDI_FME_VERSION); 1867 mlxcx_buf_return(mlxp, buf); 1868 return (NULL); 1869 } 1870 1871 if (!mlxcx_buf_loan(mlxp, buf)) { 1872 mlxcx_warn(mlxp, "!loan failed, dropping packet"); 1873 mlxcx_buf_return(mlxp, buf); 1874 return (NULL); 1875 } 1876 1877 buf->mlb_mp->b_next = NULL; 1878 buf->mlb_mp->b_cont = NULL; 1879 buf->mlb_mp->b_wptr = buf->mlb_mp->b_rptr + 1880 from_be32(ent->mlcqe_byte_cnt); 1881 1882 if (get_bit8(ent->mlcqe_csflags, MLXCX_CQE_CSFLAGS_L4_OK)) { 1883 chkflags |= HCK_FULLCKSUM_OK; 1884 } 1885 if (get_bit8(ent->mlcqe_csflags, MLXCX_CQE_CSFLAGS_L3_OK)) { 1886 chkflags |= HCK_IPV4_HDRCKSUM_OK; 1887 } 1888 if (chkflags != 0) { 1889 mac_hcksum_set(buf->mlb_mp, 0, 0, 0, 1890 from_be16(ent->mlcqe_checksum), chkflags); 1891 } 1892 1893 /* 1894 * Don't check if a refill is needed on every single completion, 1895 * since checking involves taking the RQ lock. 1896 */ 1897 if ((buf->mlb_wqe_index & 0x7) == 0) { 1898 mlxcx_work_queue_t *wq = mlcq->mlcq_wq; 1899 ASSERT(wq != NULL); 1900 mutex_enter(&wq->mlwq_mtx); 1901 if (!(wq->mlwq_state & MLXCX_WQ_TEARDOWN)) 1902 mlxcx_rq_refill(mlxp, wq); 1903 mutex_exit(&wq->mlwq_mtx); 1904 } 1905 1906 return (buf->mlb_mp); 1907 } 1908 1909 static void 1910 mlxcx_buf_mp_return(caddr_t arg) 1911 { 1912 mlxcx_buffer_t *b = (mlxcx_buffer_t *)arg; 1913 mlxcx_t *mlxp = b->mlb_mlx; 1914 1915 if (b->mlb_state != MLXCX_BUFFER_ON_LOAN) { 1916 b->mlb_mp = NULL; 1917 return; 1918 } 1919 /* 1920 * The mblk for this buffer_t (in its mlb_mp field) has been used now, 1921 * so NULL it out. 1922 */ 1923 b->mlb_mp = NULL; 1924 mlxcx_buf_return(mlxp, b); 1925 } 1926 1927 boolean_t 1928 mlxcx_buf_create(mlxcx_t *mlxp, mlxcx_buf_shard_t *shard, mlxcx_buffer_t **bp) 1929 { 1930 mlxcx_buffer_t *b; 1931 ddi_device_acc_attr_t acc; 1932 ddi_dma_attr_t attr; 1933 boolean_t ret; 1934 1935 b = kmem_cache_alloc(mlxp->mlx_bufs_cache, KM_SLEEP); 1936 b->mlb_shard = shard; 1937 b->mlb_foreign = B_FALSE; 1938 1939 mlxcx_dma_acc_attr(mlxp, &acc); 1940 mlxcx_dma_buf_attr(mlxp, &attr); 1941 1942 ret = mlxcx_dma_alloc_offset(mlxp, &b->mlb_dma, &attr, &acc, 1943 B_FALSE, mlxp->mlx_ports[0].mlp_mtu, 2, B_TRUE); 1944 if (!ret) { 1945 kmem_cache_free(mlxp->mlx_bufs_cache, b); 1946 return (B_FALSE); 1947 } 1948 1949 b->mlb_frtn.free_func = mlxcx_buf_mp_return; 1950 b->mlb_frtn.free_arg = (caddr_t)b; 1951 b->mlb_mp = desballoc((unsigned char *)b->mlb_dma.mxdb_va, 1952 b->mlb_dma.mxdb_len, 0, &b->mlb_frtn); 1953 1954 *bp = b; 1955 1956 return (B_TRUE); 1957 } 1958 1959 boolean_t 1960 mlxcx_buf_create_foreign(mlxcx_t *mlxp, mlxcx_buf_shard_t *shard, 1961 mlxcx_buffer_t **bp) 1962 { 1963 mlxcx_buffer_t *b; 1964 ddi_dma_attr_t attr; 1965 boolean_t ret; 1966 1967 b = kmem_cache_alloc(mlxp->mlx_bufs_cache, KM_SLEEP); 1968 b->mlb_shard = shard; 1969 b->mlb_foreign = B_TRUE; 1970 1971 mlxcx_dma_buf_attr(mlxp, &attr); 1972 1973 ret = mlxcx_dma_init(mlxp, &b->mlb_dma, &attr, B_TRUE); 1974 if (!ret) { 1975 kmem_cache_free(mlxp->mlx_bufs_cache, b); 1976 return (B_FALSE); 1977 } 1978 1979 *bp = b; 1980 1981 return (B_TRUE); 1982 } 1983 1984 static void 1985 mlxcx_buf_take_foreign(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, 1986 mlxcx_buffer_t **bp) 1987 { 1988 mlxcx_buffer_t *b; 1989 mlxcx_buf_shard_t *s = wq->mlwq_foreign_bufs; 1990 1991 mutex_enter(&s->mlbs_mtx); 1992 while (list_is_empty(&s->mlbs_free)) 1993 cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); 1994 b = list_remove_head(&s->mlbs_free); 1995 ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); 1996 ASSERT(b->mlb_foreign); 1997 b->mlb_state = MLXCX_BUFFER_ON_WQ; 1998 list_insert_tail(&s->mlbs_busy, b); 1999 mutex_exit(&s->mlbs_mtx); 2000 2001 *bp = b; 2002 } 2003 2004 boolean_t 2005 mlxcx_buf_bind_or_copy(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, 2006 mblk_t *mpb, size_t off, mlxcx_buffer_t **bp) 2007 { 2008 mlxcx_buffer_t *b, *b0 = NULL; 2009 boolean_t first = B_TRUE; 2010 ddi_fm_error_t err; 2011 mblk_t *mp; 2012 uint8_t *rptr; 2013 size_t sz; 2014 size_t ncookies = 0; 2015 boolean_t ret; 2016 uint_t attempts = 0; 2017 2018 for (mp = mpb; mp != NULL; mp = mp->b_cont) { 2019 rptr = mp->b_rptr; 2020 sz = MBLKL(mp); 2021 2022 if (off > 0) 2023 ASSERT3U(off, <, sz); 2024 rptr += off; 2025 sz -= off; 2026 2027 if (sz < mlxp->mlx_props.mldp_tx_bind_threshold) 2028 goto copyb; 2029 2030 mlxcx_buf_take_foreign(mlxp, wq, &b); 2031 ret = mlxcx_dma_bind_mblk(mlxp, &b->mlb_dma, mp, off, B_FALSE); 2032 2033 if (!ret) { 2034 mlxcx_buf_return(mlxp, b); 2035 2036 copyb: 2037 mlxcx_buf_take(mlxp, wq, &b); 2038 ASSERT3U(b->mlb_dma.mxdb_len, >=, sz); 2039 bcopy(rptr, b->mlb_dma.mxdb_va, sz); 2040 MLXCX_DMA_SYNC(b->mlb_dma, DDI_DMA_SYNC_FORDEV); 2041 ddi_fm_dma_err_get(b->mlb_dma.mxdb_dma_handle, &err, 2042 DDI_FME_VERSION); 2043 if (err.fme_status != DDI_FM_OK) { 2044 ddi_fm_dma_err_clear(b->mlb_dma.mxdb_dma_handle, 2045 DDI_FME_VERSION); 2046 mlxcx_buf_return(mlxp, b); 2047 if (++attempts > MLXCX_BUF_BIND_MAX_ATTEMTPS) { 2048 *bp = NULL; 2049 return (B_FALSE); 2050 } 2051 goto copyb; 2052 } 2053 } 2054 2055 /* 2056 * We might overestimate here when we've copied data, since 2057 * the buffer might be longer than what we copied into it. This 2058 * is safe since it's always wrong in the conservative 2059 * direction (and we will blow up later when we actually 2060 * generate the WQE anyway). 2061 * 2062 * If the assert below ever blows, we'll have to come and fix 2063 * this up so we can transmit these packets. 2064 */ 2065 ncookies += b->mlb_dma.mxdb_ncookies; 2066 2067 if (first) 2068 b0 = b; 2069 2070 if (!first) 2071 b->mlb_state = MLXCX_BUFFER_ON_CHAIN; 2072 2073 b->mlb_tx_mp = mp; 2074 b->mlb_tx_head = b0; 2075 b->mlb_used = sz; 2076 2077 if (!first) 2078 list_insert_tail(&b0->mlb_tx_chain, b); 2079 first = B_FALSE; 2080 off = 0; 2081 } 2082 2083 ASSERT3U(ncookies, <=, MLXCX_SQE_MAX_PTRS); 2084 2085 *bp = b0; 2086 return (B_TRUE); 2087 } 2088 2089 void 2090 mlxcx_buf_take(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, mlxcx_buffer_t **bp) 2091 { 2092 mlxcx_buffer_t *b; 2093 mlxcx_buf_shard_t *s = wq->mlwq_bufs; 2094 2095 mutex_enter(&s->mlbs_mtx); 2096 while (list_is_empty(&s->mlbs_free)) 2097 cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); 2098 b = list_remove_head(&s->mlbs_free); 2099 ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); 2100 b->mlb_state = MLXCX_BUFFER_ON_WQ; 2101 list_insert_tail(&s->mlbs_busy, b); 2102 mutex_exit(&s->mlbs_mtx); 2103 2104 *bp = b; 2105 } 2106 2107 #define MLXCX_BUF_TAKE_N_TIMEOUT_USEC 5000 2108 #define MLXCX_BUF_TAKE_N_MAX_RETRIES 3 2109 2110 size_t 2111 mlxcx_buf_take_n(mlxcx_t *mlxp, mlxcx_work_queue_t *wq, 2112 mlxcx_buffer_t **bp, size_t nbufs) 2113 { 2114 mlxcx_buffer_t *b; 2115 size_t done = 0, empty = 0; 2116 clock_t wtime = drv_usectohz(MLXCX_BUF_TAKE_N_TIMEOUT_USEC); 2117 mlxcx_buf_shard_t *s; 2118 2119 s = wq->mlwq_bufs; 2120 2121 mutex_enter(&s->mlbs_mtx); 2122 while (done < nbufs) { 2123 while (list_is_empty(&s->mlbs_free)) { 2124 (void) cv_reltimedwait(&s->mlbs_free_nonempty, 2125 &s->mlbs_mtx, wtime, TR_MILLISEC); 2126 if (list_is_empty(&s->mlbs_free) && 2127 empty++ >= MLXCX_BUF_TAKE_N_MAX_RETRIES) { 2128 mutex_exit(&s->mlbs_mtx); 2129 return (done); 2130 } 2131 } 2132 b = list_remove_head(&s->mlbs_free); 2133 ASSERT3U(b->mlb_state, ==, MLXCX_BUFFER_FREE); 2134 b->mlb_state = MLXCX_BUFFER_ON_WQ; 2135 list_insert_tail(&s->mlbs_busy, b); 2136 bp[done++] = b; 2137 } 2138 mutex_exit(&s->mlbs_mtx); 2139 return (done); 2140 } 2141 2142 boolean_t 2143 mlxcx_buf_loan(mlxcx_t *mlxp, mlxcx_buffer_t *b) 2144 { 2145 VERIFY3U(b->mlb_state, ==, MLXCX_BUFFER_ON_WQ); 2146 ASSERT3P(b->mlb_mlx, ==, mlxp); 2147 2148 if (b->mlb_mp == NULL) { 2149 b->mlb_mp = desballoc((unsigned char *)b->mlb_dma.mxdb_va, 2150 b->mlb_dma.mxdb_len, 0, &b->mlb_frtn); 2151 if (b->mlb_mp == NULL) 2152 return (B_FALSE); 2153 } 2154 2155 b->mlb_state = MLXCX_BUFFER_ON_LOAN; 2156 b->mlb_wqe_index = 0; 2157 return (B_TRUE); 2158 } 2159 2160 void 2161 mlxcx_buf_return_chain(mlxcx_t *mlxp, mlxcx_buffer_t *b0, boolean_t keepmp) 2162 { 2163 mlxcx_buffer_t *b; 2164 2165 if (b0->mlb_tx_head != b0) { 2166 mlxcx_buf_return(mlxp, b0); 2167 return; 2168 } 2169 2170 while ((b = list_head(&b0->mlb_tx_chain)) != NULL) { 2171 mlxcx_buf_return(mlxp, b); 2172 } 2173 if (keepmp) { 2174 b0->mlb_tx_mp = NULL; 2175 b0->mlb_tx_head = NULL; 2176 } 2177 mlxcx_buf_return(mlxp, b0); 2178 } 2179 2180 void 2181 mlxcx_buf_return(mlxcx_t *mlxp, mlxcx_buffer_t *b) 2182 { 2183 mlxcx_buffer_state_t oldstate = b->mlb_state; 2184 mlxcx_buffer_t *txhead = b->mlb_tx_head; 2185 mlxcx_buf_shard_t *s = b->mlb_shard; 2186 mblk_t *mp = b->mlb_tx_mp; 2187 2188 VERIFY3U(oldstate, !=, MLXCX_BUFFER_FREE); 2189 ASSERT3P(b->mlb_mlx, ==, mlxp); 2190 b->mlb_state = MLXCX_BUFFER_FREE; 2191 b->mlb_wqe_index = 0; 2192 b->mlb_tx_head = NULL; 2193 b->mlb_tx_mp = NULL; 2194 b->mlb_used = 0; 2195 ASSERT(list_is_empty(&b->mlb_tx_chain)); 2196 2197 mutex_enter(&s->mlbs_mtx); 2198 switch (oldstate) { 2199 case MLXCX_BUFFER_INIT: 2200 break; 2201 case MLXCX_BUFFER_ON_WQ: 2202 list_remove(&s->mlbs_busy, b); 2203 break; 2204 case MLXCX_BUFFER_ON_LOAN: 2205 ASSERT(!b->mlb_foreign); 2206 list_remove(&s->mlbs_busy, b); 2207 break; 2208 case MLXCX_BUFFER_FREE: 2209 VERIFY(0); 2210 break; 2211 case MLXCX_BUFFER_ON_CHAIN: 2212 ASSERT(txhead != NULL); 2213 list_remove(&txhead->mlb_tx_chain, b); 2214 list_remove(&s->mlbs_busy, b); 2215 break; 2216 } 2217 2218 if (b->mlb_foreign) { 2219 if (b->mlb_dma.mxdb_flags & MLXCX_DMABUF_BOUND) { 2220 mlxcx_dma_unbind(mlxp, &b->mlb_dma); 2221 } 2222 } 2223 2224 list_insert_tail(&s->mlbs_free, b); 2225 cv_signal(&s->mlbs_free_nonempty); 2226 2227 mutex_exit(&s->mlbs_mtx); 2228 2229 /* 2230 * For TX chain heads, free the mblk_t after we let go of the lock. 2231 * This might be a borrowed buf that we in turn loaned to MAC, in which 2232 * case calling freemsg() on it will re-enter this very function -- so 2233 * we better not be holding the lock! 2234 */ 2235 if (txhead == b) 2236 freemsg(mp); 2237 } 2238 2239 void 2240 mlxcx_buf_destroy(mlxcx_t *mlxp, mlxcx_buffer_t *b) 2241 { 2242 mlxcx_buf_shard_t *s = b->mlb_shard; 2243 VERIFY(b->mlb_state == MLXCX_BUFFER_FREE || 2244 b->mlb_state == MLXCX_BUFFER_INIT); 2245 ASSERT(mutex_owned(&s->mlbs_mtx)); 2246 if (b->mlb_state == MLXCX_BUFFER_FREE) 2247 list_remove(&s->mlbs_free, b); 2248 2249 /* 2250 * This is going back to the kmem cache, so it needs to be set up in 2251 * the same way we expect a new buffer to come out (state INIT, other 2252 * fields NULL'd) 2253 */ 2254 b->mlb_state = MLXCX_BUFFER_INIT; 2255 b->mlb_shard = NULL; 2256 if (b->mlb_mp != NULL) { 2257 freeb(b->mlb_mp); 2258 ASSERT(b->mlb_mp == NULL); 2259 } 2260 mlxcx_dma_free(&b->mlb_dma); 2261 ASSERT(list_is_empty(&b->mlb_tx_chain)); 2262 2263 kmem_cache_free(mlxp->mlx_bufs_cache, b); 2264 } 2265