1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2021 Oxide Computer Company 14 */ 15 #include "ena.h" 16 17 static void 18 ena_refill_rx(ena_rxq_t *rxq, uint16_t num) 19 { 20 VERIFY3P(rxq, !=, NULL); 21 ASSERT(MUTEX_HELD(&rxq->er_lock)); 22 ASSERT3U(num, <=, rxq->er_sq_num_descs); 23 uint16_t tail_mod = rxq->er_sq_tail_idx & (rxq->er_sq_num_descs - 1); 24 25 while (num != 0) { 26 enahw_rx_desc_t *desc = &rxq->er_sq_descs[tail_mod]; 27 ena_rx_ctrl_block_t *rcb = &rxq->er_rcbs[tail_mod]; 28 uint16_t phase = rxq->er_sq_phase; 29 30 VERIFY3U(tail_mod, <, rxq->er_sq_num_descs); 31 VERIFY3P(desc, !=, NULL); 32 VERIFY3P(rcb, !=, NULL); 33 VERIFY3P(desc, >=, rxq->er_sq_descs); 34 VERIFY3P(desc, <=, 35 (rxq->er_sq_descs + rxq->er_sq_num_descs - 1)); 36 37 desc->erd_length = rcb->ercb_dma.edb_len; 38 desc->erd_req_id = tail_mod; 39 VERIFY3P(rcb->ercb_dma.edb_cookie, !=, NULL); 40 ena_set_dma_addr_values(rxq->er_ena, 41 rcb->ercb_dma.edb_cookie->dmac_laddress, 42 &desc->erd_buff_addr_lo, &desc->erd_buff_addr_hi); 43 ENAHW_RX_DESC_SET_PHASE(desc, phase); 44 ENAHW_RX_DESC_SET_FIRST(desc); 45 ENAHW_RX_DESC_SET_LAST(desc); 46 ENAHW_RX_DESC_SET_COMP_REQ(desc); 47 DTRACE_PROBE1(ena__refill__rx, enahw_rx_desc_t *, desc); 48 rxq->er_sq_tail_idx++; 49 tail_mod = rxq->er_sq_tail_idx & (rxq->er_sq_num_descs - 1); 50 51 if (tail_mod == 0) { 52 rxq->er_sq_phase = !rxq->er_sq_phase; 53 } 54 55 num--; 56 } 57 58 ENA_DMA_SYNC(rxq->er_sq_dma, DDI_DMA_SYNC_FORDEV); 59 ena_hw_abs_write32(rxq->er_ena, rxq->er_sq_db_addr, 60 rxq->er_sq_tail_idx); 61 } 62 63 void 64 ena_free_rx_dma(ena_rxq_t *rxq) 65 { 66 if (rxq->er_rcbs != NULL) { 67 for (uint_t i = 0; i < rxq->er_sq_num_descs; i++) { 68 ena_rx_ctrl_block_t *rcb = &rxq->er_rcbs[i]; 69 ena_dma_free(&rcb->ercb_dma); 70 } 71 72 kmem_free(rxq->er_rcbs, 73 sizeof (*rxq->er_rcbs) * rxq->er_sq_num_descs); 74 75 rxq->er_rcbs = NULL; 76 } 77 78 ena_dma_free(&rxq->er_cq_dma); 79 rxq->er_cq_descs = NULL; 80 rxq->er_cq_num_descs = 0; 81 82 ena_dma_free(&rxq->er_sq_dma); 83 rxq->er_sq_descs = NULL; 84 rxq->er_sq_num_descs = 0; 85 86 rxq->er_state &= ~ENA_RXQ_STATE_HOST_ALLOC; 87 } 88 89 static int 90 ena_alloc_rx_dma(ena_rxq_t *rxq) 91 { 92 ena_t *ena = rxq->er_ena; 93 size_t cq_descs_sz; 94 size_t sq_descs_sz; 95 ena_dma_conf_t conf; 96 int err = 0; 97 98 cq_descs_sz = rxq->er_cq_num_descs * sizeof (*rxq->er_cq_descs); 99 sq_descs_sz = rxq->er_sq_num_descs * sizeof (*rxq->er_sq_descs); 100 conf = (ena_dma_conf_t) { 101 .edc_size = sq_descs_sz, 102 .edc_align = ENAHW_IO_SQ_DESC_BUF_ALIGNMENT, 103 .edc_sgl = 1, 104 .edc_endian = DDI_NEVERSWAP_ACC, 105 .edc_stream = B_FALSE, 106 }; 107 108 if (!ena_dma_alloc(ena, &rxq->er_sq_dma, &conf, sq_descs_sz)) { 109 return (ENOMEM); 110 } 111 112 rxq->er_sq_descs = (void *)rxq->er_sq_dma.edb_va; 113 rxq->er_rcbs = kmem_zalloc(sizeof (*rxq->er_rcbs) * 114 rxq->er_sq_num_descs, KM_SLEEP); 115 116 for (uint_t i = 0; i < rxq->er_sq_num_descs; i++) { 117 ena_rx_ctrl_block_t *rcb = &rxq->er_rcbs[i]; 118 ena_dma_conf_t buf_conf = { 119 .edc_size = ena->ena_rx_buf_sz, 120 .edc_align = 1, 121 .edc_sgl = ena->ena_rx_sgl_max_sz, 122 .edc_endian = DDI_NEVERSWAP_ACC, 123 .edc_stream = B_TRUE, 124 }; 125 126 if (!ena_dma_alloc(ena, &rcb->ercb_dma, &buf_conf, 127 ena->ena_rx_buf_sz)) { 128 err = ENOMEM; 129 goto error; 130 } 131 } 132 133 conf = (ena_dma_conf_t) { 134 .edc_size = cq_descs_sz, 135 .edc_align = ENAHW_IO_CQ_DESC_BUF_ALIGNMENT, 136 .edc_sgl = 1, 137 .edc_endian = DDI_NEVERSWAP_ACC, 138 .edc_stream = B_FALSE, 139 }; 140 141 if (!ena_dma_alloc(ena, &rxq->er_cq_dma, &conf, cq_descs_sz)) { 142 err = ENOMEM; 143 goto error; 144 } 145 146 rxq->er_cq_descs = (void *)rxq->er_cq_dma.edb_va; 147 rxq->er_state |= ENA_RXQ_STATE_HOST_ALLOC; 148 return (0); 149 150 error: 151 ena_free_rx_dma(rxq); 152 return (err); 153 } 154 155 boolean_t 156 ena_alloc_rxq(ena_rxq_t *rxq) 157 { 158 int ret = 0; 159 ena_t *ena = rxq->er_ena; 160 uint16_t cq_hw_idx, sq_hw_idx; 161 uint32_t *cq_unmask_addr, *cq_headdb, *cq_numanode; 162 uint32_t *sq_db_addr; 163 164 /* 165 * First, allocate the Rx data buffers. 166 */ 167 if ((ret = ena_alloc_rx_dma(rxq)) != 0) { 168 ena_err(ena, "failed to allocate Rx queue %u data buffers: %d", 169 rxq->er_rxqs_idx, ret); 170 return (B_FALSE); 171 } 172 173 ASSERT(rxq->er_state & ENA_RXQ_STATE_HOST_ALLOC); 174 175 /* 176 * Second, create the Completion Queue. 177 */ 178 ret = ena_create_cq(ena, rxq->er_cq_num_descs, 179 rxq->er_cq_dma.edb_cookie->dmac_laddress, B_FALSE, 180 rxq->er_intr_vector, &cq_hw_idx, &cq_unmask_addr, &cq_headdb, 181 &cq_numanode); 182 183 if (ret != 0) { 184 ena_err(ena, "failed to create Rx CQ %u: %d", rxq->er_rxqs_idx, 185 ret); 186 return (B_FALSE); 187 } 188 189 /* The phase must always start on 1. */ 190 rxq->er_cq_phase = 1; 191 rxq->er_cq_head_idx = 0; 192 rxq->er_cq_hw_idx = cq_hw_idx; 193 rxq->er_cq_unmask_addr = cq_unmask_addr; 194 rxq->er_cq_head_db_addr = cq_headdb; 195 rxq->er_cq_numa_addr = cq_numanode; 196 rxq->er_state |= ENA_RXQ_STATE_CQ_CREATED; 197 198 /* 199 * Third, create the Submission Queue to match with the above 200 * CQ. At this time we force the SQ and CQ to have the same 201 * number of descriptors as we only use a 1:1 completion 202 * policy. However, in the future, we could loosen this and 203 * use an on-demand completion policy and the two could have a 204 * different number of descriptors. 205 */ 206 ASSERT3U(rxq->er_sq_num_descs, ==, rxq->er_cq_num_descs); 207 ret = ena_create_sq(ena, rxq->er_sq_num_descs, 208 rxq->er_sq_dma.edb_cookie->dmac_laddress, B_FALSE, cq_hw_idx, 209 &sq_hw_idx, &sq_db_addr); 210 211 if (ret != 0) { 212 ena_err(ena, "failed to create Rx SQ %u: %d", rxq->er_rxqs_idx, 213 ret); 214 return (B_FALSE); 215 } 216 217 ASSERT3P(sq_db_addr, !=, NULL); 218 rxq->er_sq_hw_idx = sq_hw_idx; 219 rxq->er_sq_db_addr = sq_db_addr; 220 /* The phase must always start on 1. */ 221 rxq->er_sq_phase = 1; 222 rxq->er_sq_tail_idx = 0; 223 rxq->er_sq_avail_descs = rxq->er_sq_num_descs; 224 rxq->er_mode = ENA_RXQ_MODE_INTR; 225 rxq->er_state |= ENA_RXQ_STATE_SQ_CREATED; 226 227 return (B_TRUE); 228 } 229 230 void 231 ena_cleanup_rxq(ena_rxq_t *rxq) 232 { 233 int ret = 0; 234 ena_t *ena = rxq->er_ena; 235 236 if ((rxq->er_state & ENA_RXQ_STATE_SQ_CREATED) != 0) { 237 ret = ena_destroy_sq(ena, rxq->er_sq_hw_idx, B_FALSE); 238 239 if (ret != 0) { 240 ena_err(ena, "failed to destroy Rx SQ %u: %d", 241 rxq->er_rxqs_idx, ret); 242 } 243 244 rxq->er_sq_hw_idx = 0; 245 rxq->er_sq_db_addr = NULL; 246 rxq->er_sq_tail_idx = 0; 247 rxq->er_sq_phase = 0; 248 rxq->er_state &= ~ENA_RXQ_STATE_SQ_CREATED; 249 } 250 251 if ((rxq->er_state & ENA_RXQ_STATE_CQ_CREATED) != 0) { 252 ret = ena_destroy_cq(ena, rxq->er_cq_hw_idx); 253 254 if (ret != 0) { 255 ena_err(ena, "failed to destroy Rx CQ %u: %d", 256 rxq->er_rxqs_idx, ret); 257 } 258 259 rxq->er_cq_hw_idx = 0; 260 rxq->er_cq_head_idx = 0; 261 rxq->er_cq_phase = 0; 262 rxq->er_cq_head_db_addr = NULL; 263 rxq->er_cq_unmask_addr = NULL; 264 rxq->er_cq_numa_addr = NULL; 265 rxq->er_state &= ~ENA_RXQ_STATE_CQ_CREATED; 266 } 267 268 ena_free_rx_dma(rxq); 269 ASSERT3S(rxq->er_state, ==, ENA_RXQ_STATE_NONE); 270 } 271 272 void 273 ena_ring_rx_stop(mac_ring_driver_t rh) 274 { 275 ena_rxq_t *rxq = (ena_rxq_t *)rh; 276 uint32_t intr_ctrl; 277 278 intr_ctrl = ena_hw_abs_read32(rxq->er_ena, rxq->er_cq_unmask_addr); 279 ENAHW_REG_INTR_MASK(intr_ctrl); 280 ena_hw_abs_write32(rxq->er_ena, rxq->er_cq_unmask_addr, intr_ctrl); 281 282 rxq->er_state &= ~ENA_RXQ_STATE_RUNNING; 283 rxq->er_state &= ~ENA_RXQ_STATE_READY; 284 } 285 286 int 287 ena_ring_rx_start(mac_ring_driver_t rh, uint64_t gen_num) 288 { 289 ena_rxq_t *rxq = (ena_rxq_t *)rh; 290 ena_t *ena = rxq->er_ena; 291 uint32_t intr_ctrl; 292 293 mutex_enter(&rxq->er_lock); 294 ena_refill_rx(rxq, rxq->er_sq_num_descs); 295 rxq->er_m_gen_num = gen_num; 296 rxq->er_intr_limit = ena->ena_rxq_intr_limit; 297 mutex_exit(&rxq->er_lock); 298 299 rxq->er_state |= ENA_RXQ_STATE_READY; 300 301 intr_ctrl = ena_hw_abs_read32(ena, rxq->er_cq_unmask_addr); 302 ENAHW_REG_INTR_UNMASK(intr_ctrl); 303 ena_hw_abs_write32(ena, rxq->er_cq_unmask_addr, intr_ctrl); 304 rxq->er_state |= ENA_RXQ_STATE_RUNNING; 305 return (0); 306 } 307 308 mblk_t * 309 ena_ring_rx(ena_rxq_t *rxq, int poll_bytes) 310 { 311 ena_t *ena = rxq->er_ena; 312 uint16_t head_mod = rxq->er_cq_head_idx & (rxq->er_cq_num_descs - 1); 313 uint64_t total_bytes = 0; 314 uint64_t num_frames = 0; 315 enahw_rx_cdesc_t *cdesc; 316 boolean_t polling = B_TRUE; 317 mblk_t *head = NULL; 318 mblk_t *tail = NULL; 319 320 ASSERT(MUTEX_HELD(&rxq->er_lock)); 321 ENA_DMA_SYNC(rxq->er_cq_dma, DDI_DMA_SYNC_FORKERNEL); 322 323 if (poll_bytes == ENA_INTERRUPT_MODE) { 324 polling = B_FALSE; 325 } 326 327 cdesc = &rxq->er_cq_descs[head_mod]; 328 VERIFY3P(cdesc, >=, rxq->er_cq_descs); 329 VERIFY3P(cdesc, <=, (rxq->er_cq_descs + rxq->er_cq_num_descs - 1)); 330 331 while (ENAHW_RX_CDESC_PHASE(cdesc) == rxq->er_cq_phase) { 332 boolean_t first, last; 333 ena_rx_ctrl_block_t *rcb; 334 uint16_t req_id; 335 mblk_t *mp; 336 enahw_io_l3_proto_t l3proto; 337 enahw_io_l4_proto_t l4proto; 338 boolean_t l4csum_checked; 339 uint32_t hflags = 0; 340 341 VERIFY3U(head_mod, <, rxq->er_cq_num_descs); 342 /* 343 * Currently, all incoming frames fit in a single Rx 344 * buffer (erd_length > total frame size). In the 345 * future, if we decide to loan buffers which are 346 * smaller, we will need to modify this code to read 347 * one or more descriptors (based on frame size). 348 * 349 * For this reason we do not expect any frame to span 350 * multiple descriptors. Therefore, we drop any data 351 * not delivered as a single descriptor, i.e., where 352 * 'first' and 'last' are both true. 353 */ 354 first = ENAHW_RX_CDESC_FIRST(cdesc); 355 last = ENAHW_RX_CDESC_LAST(cdesc); 356 357 if (!first || !last) { 358 mutex_enter(&rxq->er_stat_lock); 359 rxq->er_stat.ers_multi_desc.value.ui64++; 360 mutex_exit(&rxq->er_stat_lock); 361 goto next_desc; 362 } 363 364 req_id = cdesc->erc_req_id; 365 VERIFY3U(req_id, <, rxq->er_cq_num_descs); 366 rcb = &rxq->er_rcbs[req_id]; 367 rcb->ercb_offset = cdesc->erc_offset; 368 rcb->ercb_length = cdesc->erc_length; 369 ASSERT3U(rcb->ercb_length, <=, ena->ena_max_frame_total); 370 mp = allocb(rcb->ercb_length + ENA_RX_BUF_IPHDR_ALIGNMENT, 0); 371 372 /* 373 * If we can't allocate an mblk, things are looking 374 * grim. Forget about this frame and move on. 375 */ 376 if (mp == NULL) { 377 mutex_enter(&rxq->er_stat_lock); 378 rxq->er_stat.ers_allocb_fail.value.ui64++; 379 mutex_exit(&rxq->er_stat_lock); 380 goto next_desc; 381 } 382 383 /* 384 * As we pull frames we need to link them together as 385 * one chain to be delivered up to mac. 386 */ 387 if (head == NULL) { 388 head = mp; 389 } else { 390 tail->b_next = mp; 391 } 392 393 tail = mp; 394 395 /* 396 * We need to make sure the bytes are copied to the 397 * correct offset to achieve 4-byte IP header 398 * alignment. 399 * 400 * If we start using desballoc on the buffers, then we 401 * will need to make sure to apply this offset to the 402 * DMA buffers as well. Though it may be the case the 403 * device does this implicitly and that's what 404 * cdesc->erc_offset is for; we don't know because 405 * it's not documented. 406 */ 407 mp->b_wptr += ENA_RX_BUF_IPHDR_ALIGNMENT; 408 mp->b_rptr += ENA_RX_BUF_IPHDR_ALIGNMENT; 409 bcopy(rcb->ercb_dma.edb_va + rcb->ercb_offset, mp->b_wptr, 410 rcb->ercb_length); 411 mp->b_wptr += rcb->ercb_length; 412 total_bytes += rcb->ercb_length; 413 VERIFY3P(mp->b_wptr, >, mp->b_rptr); 414 VERIFY3P(mp->b_wptr, <=, mp->b_datap->db_lim); 415 416 l3proto = ENAHW_RX_CDESC_L3_PROTO(cdesc); 417 l4proto = ENAHW_RX_CDESC_L4_PROTO(cdesc); 418 419 /* 420 * When it comes to bad TCP/IP checksums we do not 421 * discard the packet at this level. Instead, we let 422 * it percolate up for further processing and tracking 423 * by the upstream TCP/IP stack. 424 */ 425 if (ena->ena_rx_l3_ipv4_csum && 426 l3proto == ENAHW_IO_L3_PROTO_IPV4) { 427 boolean_t l3_csum_err = 428 ENAHW_RX_CDESC_L3_CSUM_ERR(cdesc); 429 430 if (l3_csum_err) { 431 mutex_enter(&rxq->er_stat_lock); 432 rxq->er_stat.ers_hck_ipv4_err.value.ui64++; 433 mutex_exit(&rxq->er_stat_lock); 434 } else { 435 hflags |= HCK_IPV4_HDRCKSUM_OK; 436 } 437 } 438 439 l4csum_checked = ENAHW_RX_CDESC_L4_CSUM_CHECKED(cdesc); 440 441 if (ena->ena_rx_l4_ipv4_csum && l4csum_checked && 442 l4proto == ENAHW_IO_L4_PROTO_TCP) { 443 boolean_t l4_csum_err = 444 ENAHW_RX_CDESC_L4_CSUM_ERR(cdesc); 445 446 if (l4_csum_err) { 447 mutex_enter(&rxq->er_stat_lock); 448 rxq->er_stat.ers_hck_l4_err.value.ui64++; 449 mutex_exit(&rxq->er_stat_lock); 450 } else { 451 hflags |= HCK_FULLCKSUM_OK; 452 } 453 } 454 455 if (hflags != 0) { 456 mac_hcksum_set(mp, 0, 0, 0, 0, hflags); 457 } 458 459 next_desc: 460 /* 461 * Technically, if we arrived here due to a failure, 462 * then we did not read a new frame. However, we count 463 * it all the same anyways in order to count it as 464 * progress to the interrupt work limit. The failure 465 * stats will allow us to differentiate good frames 466 * from bad. 467 */ 468 num_frames++; 469 rxq->er_cq_head_idx++; 470 head_mod = rxq->er_cq_head_idx & (rxq->er_cq_num_descs - 1); 471 472 if (head_mod == 0) { 473 rxq->er_cq_phase = !rxq->er_cq_phase; 474 } 475 476 if (polling && (total_bytes > poll_bytes)) { 477 break; 478 } else if (!polling && (num_frames >= rxq->er_intr_limit)) { 479 mutex_enter(&rxq->er_stat_lock); 480 rxq->er_stat.ers_intr_limit.value.ui64++; 481 mutex_exit(&rxq->er_stat_lock); 482 break; 483 } 484 485 cdesc = &rxq->er_cq_descs[head_mod]; 486 VERIFY3P(cdesc, >=, rxq->er_cq_descs); 487 VERIFY3P(cdesc, <=, 488 (rxq->er_cq_descs + rxq->er_cq_num_descs - 1)); 489 } 490 491 mutex_enter(&rxq->er_stat_lock); 492 rxq->er_stat.ers_packets.value.ui64 += num_frames; 493 rxq->er_stat.ers_bytes.value.ui64 += total_bytes; 494 mutex_exit(&rxq->er_stat_lock); 495 496 DTRACE_PROBE4(rx__frames, mblk_t *, head, boolean_t, polling, uint64_t, 497 num_frames, uint64_t, total_bytes); 498 ena_refill_rx(rxq, num_frames); 499 return (head); 500 } 501 502 void 503 ena_rx_intr_work(ena_rxq_t *rxq) 504 { 505 mblk_t *mp; 506 507 mutex_enter(&rxq->er_lock); 508 mp = ena_ring_rx(rxq, ENA_INTERRUPT_MODE); 509 mutex_exit(&rxq->er_lock); 510 511 if (mp == NULL) { 512 return; 513 } 514 515 mac_rx_ring(rxq->er_ena->ena_mh, rxq->er_mrh, mp, rxq->er_m_gen_num); 516 } 517 518 mblk_t * 519 ena_ring_rx_poll(void *rh, int poll_bytes) 520 { 521 ena_rxq_t *rxq = rh; 522 mblk_t *mp; 523 524 ASSERT3S(poll_bytes, >, 0); 525 526 mutex_enter(&rxq->er_lock); 527 mp = ena_ring_rx(rxq, poll_bytes); 528 mutex_exit(&rxq->er_lock); 529 530 return (mp); 531 } 532