1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 * Copyright 2017 Tegile Systems, Inc. All rights reserved. 15 * Copyright 2020 RackTop Systems, Inc. 16 */ 17 18 /* 19 * ------------------------- 20 * Interrupt Handling Theory 21 * ------------------------- 22 * 23 * There are a couple different sets of interrupts that we need to worry about: 24 * 25 * - Interrupts from receive queues 26 * - Interrupts from transmit queues 27 * - 'Other Interrupts', such as the administrative queue 28 * 29 * 'Other Interrupts' are asynchronous events such as a link status change event 30 * being posted to the administrative queue, unrecoverable ECC errors, and more. 31 * If we have something being posted to the administrative queue, then we go 32 * through and process it, because it's generally enabled as a separate logical 33 * interrupt. Note, we may need to do more here eventually. To re-enable the 34 * interrupts from the 'Other Interrupts' section, we need to clear the PBA and 35 * write ENA to PFINT_ICR0. 36 * 37 * Interrupts from the transmit and receive queues indicates that our requests 38 * have been processed. In the rx case, it means that we have data that we 39 * should take a look at and send up the stack. In the tx case, it means that 40 * data which we got from MAC has now been sent out on the wire and we can free 41 * the associated data. Most of the logic for acting upon the presence of this 42 * data can be found in i40e_transciever.c which handles all of the DMA, rx, and 43 * tx operations. This file is dedicated to handling and dealing with interrupt 44 * processing. 45 * 46 * All devices supported by this driver support three kinds of interrupts: 47 * 48 * o Extended Message Signaled Interrupts (MSI-X) 49 * o Message Signaled Interrupts (MSI) 50 * o Legacy PCI interrupts (INTx) 51 * 52 * Generally speaking the hardware logically handles MSI and INTx the same and 53 * restricts us to only using a single interrupt, which isn't the interesting 54 * case. With MSI-X available, each physical function of the device provides the 55 * opportunity for multiple interrupts which is what we'll focus on. 56 * 57 * -------------------- 58 * Interrupt Management 59 * -------------------- 60 * 61 * By default, the admin queue, which consists of the asynchronous other 62 * interrupts is always bound to MSI-X vector zero. Next, we spread out all of 63 * the other interrupts that we have available to us over the remaining 64 * interrupt vectors. 65 * 66 * This means that there may be multiple queues, both tx and rx, which are 67 * mapped to the same interrupt. When the interrupt fires, we'll have to check 68 * all of them for servicing, before we go through and indicate that the 69 * interrupt is claimed. 70 * 71 * The hardware provides the means of mapping various queues to MSI-X interrupts 72 * by programming the I40E_QINT_RQCTL() and I4OE_QINT_TQCTL() registers. These 73 * registers can also be used to enable and disable whether or not the queue is 74 * a source of interrupts. As part of this, the hardware requires that we 75 * maintain a linked list of queues for each interrupt vector. While it may seem 76 * like this is only there for the purproses of ITRs, that's not the case. The 77 * first queue must be programmed in I40E_QINT_LNKLSTN(%vector) register. Each 78 * queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL 79 * register. 80 * 81 * Finally, the individual interrupt vector itself has the ability to be enabled 82 * and disabled. The overall interrupt is controlled through the 83 * I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt 84 * as a whole. 85 * 86 * Note that this means that both the individual queue and the interrupt as a 87 * whole can be toggled and re-enabled. 88 * 89 * ------------------- 90 * Non-MSIX Management 91 * ------------------- 92 * 93 * We may have a case where the Operating System is unable to actually allocate 94 * any MSI-X to the system. In such a world, there is only one transmit/receive 95 * queue pair and it is bound to the same interrupt with index zero. The 96 * hardware doesn't allow us access to additional interrupt vectors in these 97 * modes. Note that technically we could support more transmit/receive queues if 98 * we wanted. 99 * 100 * In this world, because the interrupts for the admin queue and traffic are 101 * mixed together, we have to consult ICR0 to determine what has occurred. The 102 * QINT_TQCTL and QINT_RQCTL registers have a field, 'MSI-X 0 index' which 103 * allows us to set a specific bit in ICR0. There are up to seven such bits; 104 * however, we only use the bit 0 and 1 for the rx and tx queue respectively. 105 * These are contained by the I40E_INTR_NOTX_{R|T}X_QUEUE and 106 * I40E_INTR_NOTX_{R|T}X_MASK registers respectively. 107 * 108 * Unfortunately, these corresponding queue bits have no corresponding entry in 109 * the ICR0_ENA register. So instead, when enabling interrupts on the queues, we 110 * end up enabling it on the queue registers rather than on the MSI-X registers. 111 * In the MSI-X world, because they can be enabled and disabled, this is 112 * different and the queues can always be enabled and disabled, but the 113 * interrupts themselves are toggled (ignoring the question of interrupt 114 * blanking for polling on rings). 115 * 116 * Finally, we still have to set up the interrupt linked list, but the list is 117 * instead rooted at the register I40E_PFINT_LNKLST0, rather than being tied to 118 * one of the other MSI-X registers. 119 * 120 * -------------------- 121 * Interrupt Moderation 122 * -------------------- 123 * 124 * The XL710 hardware has three different interrupt moderation registers per 125 * interrupt. Unsurprisingly, we use these for: 126 * 127 * o RX interrupts 128 * o TX interrupts 129 * o 'Other interrupts' (link status change, admin queue, etc.) 130 * 131 * By default, we throttle 'other interrupts' the most, then TX interrupts, and 132 * then RX interrupts. The default values for these were based on trying to 133 * reason about both the importance and frequency of events. Generally speaking 134 * 'other interrupts' are not very frequent and they're not important for the 135 * I/O data path in and of itself (though they may indicate issues with the I/O 136 * data path). 137 * 138 * On the flip side, when we're not polling, RX interrupts are very important. 139 * The longer we wait for them, the more latency that we inject into the system. 140 * However, if we allow interrupts to occur too frequently, we risk a few 141 * problems: 142 * 143 * 1) Abusing system resources. Without proper interrupt blanking and polling, 144 * we can see upwards of 200k-300k interrupts per second on the system. 145 * 146 * 2) Not enough data coalescing to enable polling. In other words, the more 147 * data that we allow to build up, the more likely we'll be able to enable 148 * polling mode and allowing us to better handle bulk data. 149 * 150 * In-between the 'other interrupts' and the TX interrupts we have the 151 * reclamation of TX buffers. This operation is not quite as important as we 152 * generally size the ring large enough that we should be able to reclaim a 153 * substantial amount of the descriptors that we have used per interrupt. So 154 * while it's important that this interrupt occur, we don't necessarily need it 155 * firing as frequently as RX; it doesn't, on its own, induce additional latency 156 * into the system. 157 * 158 * Based on all this we currently assign static ITR values for the system. While 159 * we could move to a dynamic system (the hardware supports that), we'd want to 160 * make sure that we're seeing problems from this that we believe would be 161 * generally helped by the added complexity. 162 * 163 * Based on this, the default values that we have allow for the following 164 * interrupt thresholds: 165 * 166 * o 20k interrupts/s for RX 167 * o 5k interrupts/s for TX 168 * o 2k interupts/s for 'Other Interrupts' 169 */ 170 171 #include "i40e_sw.h" 172 173 #define I40E_INTR_NOTX_QUEUE 0 174 #define I40E_INTR_NOTX_INTR 0 175 #define I40E_INTR_NOTX_RX_QUEUE 0 176 #define I40E_INTR_NOTX_RX_MASK (1 << I40E_PFINT_ICR0_QUEUE_0_SHIFT) 177 #define I40E_INTR_NOTX_TX_QUEUE 1 178 #define I40E_INTR_NOTX_TX_MASK (1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT) 179 180 void 181 i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val) 182 { 183 int i; 184 i40e_hw_t *hw = &i40e->i40e_hw_space; 185 186 VERIFY3U(val, <=, I40E_MAX_ITR); 187 VERIFY3U(itr, <, I40E_ITR_INDEX_NONE); 188 189 /* 190 * No matter the interrupt mode, the ITR for other interrupts is always 191 * on interrupt zero and the same is true if we're not using MSI-X. 192 */ 193 if (itr == I40E_ITR_INDEX_OTHER || 194 i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) { 195 I40E_WRITE_REG(hw, I40E_PFINT_ITR0(itr), val); 196 return; 197 } 198 199 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 200 I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i), val); 201 } 202 } 203 204 /* 205 * Re-enable the adminq. Note that the adminq doesn't have a traditional queue 206 * associated with it from an interrupt perspective and just lives on ICR0. 207 * However when MSI-X interrupts are not being used, then this also enables and 208 * disables those interrupts. 209 */ 210 static void 211 i40e_intr_adminq_enable(i40e_t *i40e) 212 { 213 i40e_hw_t *hw = &i40e->i40e_hw_space; 214 uint32_t reg; 215 216 reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | 217 I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | 218 (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); 219 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg); 220 i40e_flush(hw); 221 } 222 223 static void 224 i40e_intr_adminq_disable(i40e_t *i40e) 225 { 226 i40e_hw_t *hw = &i40e->i40e_hw_space; 227 uint32_t reg; 228 229 reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; 230 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg); 231 } 232 233 /* 234 * The next two functions enable/disable the reception of interrupts 235 * on the given vector. Only vectors 1..N are programmed by these 236 * functions; vector 0 is special and handled by a different register. 237 * We must subtract one from the vector because i40e implicitly adds 238 * one to the vector value. See section 10.2.2.10.13 for more details. 239 */ 240 static void 241 i40e_intr_io_enable(i40e_t *i40e, int vector) 242 { 243 uint32_t reg; 244 i40e_hw_t *hw = &i40e->i40e_hw_space; 245 246 ASSERT3S(vector, >, 0); 247 reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | 248 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | 249 (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); 250 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); 251 } 252 253 static void 254 i40e_intr_io_disable(i40e_t *i40e, int vector) 255 { 256 uint32_t reg; 257 i40e_hw_t *hw = &i40e->i40e_hw_space; 258 259 ASSERT3S(vector, >, 0); 260 reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; 261 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); 262 } 263 264 /* 265 * When MSI-X interrupts are being used, then we can enable the actual 266 * interrupts themselves. However, when they are not, we instead have to turn 267 * towards the queue's CAUSE_ENA bit and enable that. 268 */ 269 void 270 i40e_intr_io_enable_all(i40e_t *i40e) 271 { 272 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 273 int i; 274 275 for (i = 1; i < i40e->i40e_intr_count; i++) { 276 i40e_intr_io_enable(i40e, i); 277 } 278 } else { 279 uint32_t reg; 280 i40e_hw_t *hw = &i40e->i40e_hw_space; 281 282 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE)); 283 reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; 284 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); 285 286 reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE)); 287 reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK; 288 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); 289 } 290 } 291 292 /* 293 * When MSI-X interrupts are being used, then we can disable the actual 294 * interrupts themselves. However, when they are not, we instead have to turn 295 * towards the queue's CAUSE_ENA bit and disable that. 296 */ 297 void 298 i40e_intr_io_disable_all(i40e_t *i40e) 299 { 300 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 301 int i; 302 303 for (i = 1; i < i40e->i40e_intr_count; i++) { 304 i40e_intr_io_disable(i40e, i); 305 } 306 } else { 307 uint32_t reg; 308 i40e_hw_t *hw = &i40e->i40e_hw_space; 309 310 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE)); 311 reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; 312 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); 313 314 reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE)); 315 reg &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK; 316 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); 317 } 318 } 319 320 /* 321 * As part of disabling the tx and rx queue's we're technically supposed to 322 * remove the linked list entries. The simplest way is to clear the LNKLSTN 323 * register by setting it to I40E_QUEUE_TYPE_EOL (0x7FF). 324 * 325 * Note all of the FM register access checks are performed by the caller. 326 */ 327 void 328 i40e_intr_io_clear_cause(i40e_t *i40e) 329 { 330 uint32_t i; 331 i40e_hw_t *hw = &i40e->i40e_hw_space; 332 333 if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) { 334 uint32_t reg; 335 reg = I40E_QUEUE_TYPE_EOL; 336 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg); 337 return; 338 } 339 340 for (i = 0; i < i40e->i40e_intr_count - 1; i++) { 341 uint32_t reg; 342 343 reg = I40E_QUEUE_TYPE_EOL; 344 I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg); 345 } 346 347 i40e_flush(hw); 348 } 349 350 /* 351 * Finalize interrupt handling. Mostly this disables the admin queue. 352 */ 353 void 354 i40e_intr_chip_fini(i40e_t *i40e) 355 { 356 #ifdef DEBUG 357 int i; 358 uint32_t reg; 359 360 i40e_hw_t *hw = &i40e->i40e_hw_space; 361 362 /* 363 * Take a look and verify that all other interrupts have been disabled 364 * and the interrupt linked lists have been zeroed. 365 */ 366 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 367 for (i = 0; i < i40e->i40e_intr_count - 1; i++) { 368 reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i)); 369 VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK); 370 371 reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i)); 372 VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL); 373 } 374 } 375 #endif 376 377 i40e_intr_adminq_disable(i40e); 378 } 379 380 /* 381 * Set the head of the interrupt linked list. The PFINT_LNKLSTN[N] 382 * register actually refers to the 'N + 1' interrupt vector. E.g., 383 * PFINT_LNKLSTN[0] refers to interrupt vector 1. 384 */ 385 static void 386 i40e_set_lnklstn(i40e_t *i40e, uint_t vector, uint_t queue) 387 { 388 uint32_t reg; 389 i40e_hw_t *hw = &i40e->i40e_hw_space; 390 391 reg = (queue << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) | 392 (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); 393 394 I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(vector), reg); 395 DEBUGOUT2("PFINT_LNKLSTN[%u] = 0x%x", vector, reg); 396 } 397 398 /* 399 * Set the QINT_RQCTL[queue] register. The next queue is always the Tx 400 * queue associated with this Rx queue. Unlike PFINT_LNKLSTN, the 401 * vector should be the actual vector this queue is on -- i.e., it 402 * should be equal to itrq_rx_intrvec. 403 */ 404 static void 405 i40e_set_rqctl(i40e_t *i40e, uint_t vector, uint_t queue) 406 { 407 uint32_t reg; 408 i40e_hw_t *hw = &i40e->i40e_hw_space; 409 410 ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_rx_intrvec); 411 412 reg = (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | 413 (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | 414 (queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | 415 (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | 416 I40E_QINT_RQCTL_CAUSE_ENA_MASK; 417 418 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); 419 DEBUGOUT2("QINT_RQCTL[%u] = 0x%x", queue, reg); 420 } 421 422 /* 423 * Like i40e_set_rqctl(), but for QINT_TQCTL[queue]. The next queue is 424 * either the Rx queue of another TRQP, or EOL. 425 */ 426 static void 427 i40e_set_tqctl(i40e_t *i40e, uint_t vector, uint_t queue, uint_t next_queue) 428 { 429 uint32_t reg; 430 i40e_hw_t *hw = &i40e->i40e_hw_space; 431 432 ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_tx_intrvec); 433 434 reg = (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | 435 (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | 436 (next_queue << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | 437 (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) | 438 I40E_QINT_TQCTL_CAUSE_ENA_MASK; 439 440 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(queue), reg); 441 DEBUGOUT2("QINT_TQCTL[%u] = 0x%x", queue, reg); 442 } 443 444 /* 445 * Program the interrupt linked list. Each vector has a linked list of 446 * queues which act as event sources for that vector. When one of 447 * those sources has an event the associated interrupt vector is 448 * fired. This mapping must match the mapping found in 449 * i40e_map_intrs_to_vectors(). 450 * 451 * See section 7.5.3 for more information about the configuration of 452 * the interrupt linked list. 453 */ 454 static void 455 i40e_intr_init_queue_msix(i40e_t *i40e) 456 { 457 uint_t intr_count; 458 459 /* 460 * The 0th vector is for 'Other Interrupts' only (subject to 461 * change in the future). 462 */ 463 intr_count = i40e->i40e_intr_count - 1; 464 465 for (uint_t vec = 0; vec < intr_count; vec++) { 466 boolean_t head = B_TRUE; 467 468 for (uint_t qidx = vec; qidx < i40e->i40e_num_trqpairs; 469 qidx += intr_count) { 470 uint_t next_qidx = qidx + intr_count; 471 472 next_qidx = (next_qidx > i40e->i40e_num_trqpairs) ? 473 I40E_QUEUE_TYPE_EOL : next_qidx; 474 475 if (head) { 476 i40e_set_lnklstn(i40e, vec, qidx); 477 head = B_FALSE; 478 } 479 480 i40e_set_rqctl(i40e, vec + 1, qidx); 481 i40e_set_tqctl(i40e, vec + 1, qidx, next_qidx); 482 } 483 } 484 } 485 486 /* 487 * Set up a single queue to share the admin queue interrupt in the non-MSI-X 488 * world. Note we do not enable the queue as an interrupt cause at this time. We 489 * don't have any other vector of control here, unlike with the MSI-X interrupt 490 * case. 491 */ 492 static void 493 i40e_intr_init_queue_shared(i40e_t *i40e) 494 { 495 i40e_hw_t *hw = &i40e->i40e_hw_space; 496 uint32_t reg; 497 498 VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED || 499 i40e->i40e_intr_type == DDI_INTR_TYPE_MSI); 500 501 reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) | 502 (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); 503 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg); 504 505 reg = (I40E_INTR_NOTX_INTR << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | 506 (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | 507 (I40E_INTR_NOTX_RX_QUEUE << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT) | 508 (I40E_INTR_NOTX_QUEUE << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | 509 (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); 510 511 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); 512 513 reg = (I40E_INTR_NOTX_INTR << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | 514 (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | 515 (I40E_INTR_NOTX_TX_QUEUE << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT) | 516 (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | 517 (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); 518 519 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); 520 } 521 522 /* 523 * Enable the specified queue as a valid source of interrupts. Note, this should 524 * only be used as part of the GLDv3's interrupt blanking routines. The debug 525 * build assertions are specific to that. 526 */ 527 void 528 i40e_intr_rx_queue_enable(i40e_trqpair_t *itrq) 529 { 530 uint32_t reg; 531 uint_t queue = itrq->itrq_index; 532 i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space; 533 534 ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock)); 535 ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs); 536 537 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue)); 538 ASSERT0(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK); 539 reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; 540 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); 541 } 542 543 /* 544 * Disable the specified queue as a valid source of interrupts. Note, this 545 * should only be used as part of the GLDv3's interrupt blanking routines. The 546 * debug build assertions are specific to that. 547 */ 548 void 549 i40e_intr_rx_queue_disable(i40e_trqpair_t *itrq) 550 { 551 uint32_t reg; 552 uint_t queue = itrq->itrq_index; 553 i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space; 554 555 ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock)); 556 ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs); 557 558 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue)); 559 ASSERT3U(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK, ==, 560 I40E_QINT_RQCTL_CAUSE_ENA_MASK); 561 reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; 562 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); 563 } 564 565 /* 566 * Start up the various chip's interrupt handling. We not only configure the 567 * adminq here, but we also go through and configure all of the actual queues, 568 * the interrupt linked lists, and others. 569 */ 570 void 571 i40e_intr_chip_init(i40e_t *i40e) 572 { 573 i40e_hw_t *hw = &i40e->i40e_hw_space; 574 uint32_t reg; 575 576 /* 577 * Ensure that all non adminq interrupts are disabled at the chip level. 578 */ 579 i40e_intr_io_disable_all(i40e); 580 581 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, 0); 582 (void) I40E_READ_REG(hw, I40E_PFINT_ICR0); 583 584 /* 585 * Always enable all of the other-class interrupts to be on their own 586 * ITR. This only needs to be set on interrupt zero, which has its own 587 * special setting. 588 */ 589 reg = I40E_ITR_INDEX_OTHER << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT; 590 I40E_WRITE_REG(hw, I40E_PFINT_STAT_CTL0, reg); 591 592 /* 593 * Enable interrupt types we expect to receive. At the moment, this 594 * is limited to the adminq; however, we'll want to review 11.2.2.9.22 595 * for more types here as we add support for detecting them, handling 596 * them, and resetting the device as appropriate. 597 */ 598 reg = I40E_PFINT_ICR0_ENA_ADMINQ_MASK; 599 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg); 600 601 /* 602 * Always set the interrupt linked list to empty. We'll come back and 603 * change this if MSI-X are actually on the scene. 604 */ 605 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_TYPE_EOL); 606 607 i40e_intr_adminq_enable(i40e); 608 609 /* 610 * Set up all of the queues and map them to interrupts based on the bit 611 * assignments. 612 */ 613 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 614 i40e_intr_init_queue_msix(i40e); 615 } else { 616 i40e_intr_init_queue_shared(i40e); 617 } 618 619 /* 620 * Finally set all of the default ITRs for the interrupts. Note that the 621 * queues will have been set up above. 622 */ 623 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr); 624 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr); 625 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, i40e->i40e_other_itr); 626 } 627 628 static void 629 i40e_intr_adminq_work(i40e_t *i40e) 630 { 631 struct i40e_hw *hw = &i40e->i40e_hw_space; 632 struct i40e_arq_event_info evt; 633 uint16_t remain = 1; 634 635 bzero(&evt, sizeof (struct i40e_arq_event_info)); 636 evt.buf_len = I40E_ADMINQ_BUFSZ; 637 evt.msg_buf = i40e->i40e_aqbuf; 638 639 while (remain != 0) { 640 enum i40e_status_code ret; 641 uint16_t opcode; 642 643 /* 644 * At the moment, the only error code that seems to be returned 645 * is one saying that there's no work. In such a case we leave 646 * this be. 647 */ 648 ret = i40e_clean_arq_element(hw, &evt, &remain); 649 if (ret != I40E_SUCCESS) 650 break; 651 652 opcode = LE_16(evt.desc.opcode); 653 switch (opcode) { 654 case i40e_aqc_opc_get_link_status: 655 mutex_enter(&i40e->i40e_general_lock); 656 i40e_link_check(i40e); 657 mutex_exit(&i40e->i40e_general_lock); 658 break; 659 default: 660 /* 661 * Longer term we'll want to enable other causes here 662 * and get these cleaned up and doing something. 663 */ 664 break; 665 } 666 } 667 } 668 669 static void 670 i40e_intr_rx_work(i40e_t *i40e, i40e_trqpair_t *itrq) 671 { 672 mblk_t *mp = NULL; 673 674 mutex_enter(&itrq->itrq_rx_lock); 675 if (!itrq->itrq_intr_poll) 676 mp = i40e_ring_rx(itrq, I40E_POLL_NULL); 677 mutex_exit(&itrq->itrq_rx_lock); 678 679 if (mp == NULL) 680 return; 681 682 mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp, 683 itrq->itrq_rxgen); 684 } 685 686 /* ARGSUSED */ 687 static void 688 i40e_intr_tx_work(i40e_t *i40e, i40e_trqpair_t *itrq) 689 { 690 i40e_tx_recycle_ring(itrq); 691 } 692 693 /* 694 * At the moment, the only 'other' interrupt on ICR0 that we handle is the 695 * adminq. We should go through and support the other notifications at some 696 * point. 697 */ 698 static void 699 i40e_intr_other_work(i40e_t *i40e) 700 { 701 struct i40e_hw *hw = &i40e->i40e_hw_space; 702 uint32_t reg; 703 704 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0); 705 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) != 706 DDI_FM_OK) { 707 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 708 atomic_or_32(&i40e->i40e_state, I40E_ERROR); 709 return; 710 } 711 712 if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) 713 i40e_intr_adminq_work(i40e); 714 715 /* 716 * Make sure that the adminq interrupt is not masked and then explicitly 717 * enable the adminq and thus the other interrupt. 718 */ 719 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA); 720 reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK; 721 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg); 722 723 i40e_intr_adminq_enable(i40e); 724 } 725 726 /* 727 * The prolog/epilog pair of functions ensure the integrity of the trqpair 728 * across ring stop/start operations. 729 * 730 * A ring stop operation will wait whilst an interrupt is processing a 731 * trqpair, and when a ring is stopped the interrupt handler will skip 732 * the trqpair. 733 */ 734 static boolean_t 735 i40e_intr_trqpair_prolog(i40e_trqpair_t *itrq) 736 { 737 boolean_t enabled; 738 739 mutex_enter(&itrq->itrq_intr_lock); 740 enabled = !itrq->itrq_intr_quiesce; 741 if (enabled) 742 itrq->itrq_intr_busy = B_TRUE; 743 mutex_exit(&itrq->itrq_intr_lock); 744 745 return (enabled); 746 } 747 748 static void 749 i40e_intr_trqpair_epilog(i40e_trqpair_t *itrq) 750 { 751 mutex_enter(&itrq->itrq_intr_lock); 752 itrq->itrq_intr_busy = B_FALSE; 753 if (itrq->itrq_intr_quiesce) 754 cv_signal(&itrq->itrq_intr_cv); 755 mutex_exit(&itrq->itrq_intr_lock); 756 } 757 758 /* 759 * Tell any active interrupt vectors the ring is quiescing, then 760 * wait until any active interrupt thread has finished with this 761 * trqpair. 762 */ 763 void 764 i40e_intr_quiesce(i40e_trqpair_t *itrq) 765 { 766 mutex_enter(&itrq->itrq_intr_lock); 767 itrq->itrq_intr_quiesce = B_TRUE; 768 while (itrq->itrq_intr_busy) 769 cv_wait(&itrq->itrq_intr_cv, &itrq->itrq_intr_lock); 770 mutex_exit(&itrq->itrq_intr_lock); 771 } 772 773 /* 774 * Handle an MSI-X interrupt. See section 7.5.1.3 for an overview of 775 * the MSI-X interrupt sequence. 776 */ 777 uint_t 778 i40e_intr_msix(void *arg1, void *arg2) 779 { 780 i40e_t *i40e = (i40e_t *)arg1; 781 uint_t vector_idx = (uint_t)(uintptr_t)arg2; 782 783 ASSERT3U(vector_idx, <, i40e->i40e_intr_count); 784 785 /* 786 * When using MSI-X interrupts, vector 0 is always reserved for the 787 * adminq at this time. Though longer term, we'll want to also bridge 788 * some I/O to them. 789 */ 790 if (vector_idx == 0) { 791 i40e_intr_other_work(i40e); 792 return (DDI_INTR_CLAIMED); 793 } 794 795 ASSERT3U(vector_idx, >, 0); 796 797 /* 798 * We determine the queue indexes via simple arithmetic (as 799 * opposed to keeping explicit state like a bitmap). While 800 * conveinent, it does mean that i40e_map_intrs_to_vectors(), 801 * i40e_intr_init_queue_msix(), and this function must be 802 * modified as a unit. 803 * 804 * We subtract 1 from the vector to offset the addition we 805 * performed during i40e_map_intrs_to_vectors(). 806 */ 807 for (uint_t i = vector_idx - 1; i < i40e->i40e_num_trqpairs; 808 i += (i40e->i40e_intr_count - 1)) { 809 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i]; 810 811 ASSERT3U(i, <, i40e->i40e_num_trqpairs); 812 ASSERT3P(itrq, !=, NULL); 813 if (!i40e_intr_trqpair_prolog(itrq)) 814 continue; 815 816 i40e_intr_rx_work(i40e, itrq); 817 i40e_intr_tx_work(i40e, itrq); 818 819 i40e_intr_trqpair_epilog(itrq); 820 } 821 822 i40e_intr_io_enable(i40e, vector_idx); 823 return (DDI_INTR_CLAIMED); 824 } 825 826 static uint_t 827 i40e_intr_notx(i40e_t *i40e, boolean_t shared) 828 { 829 i40e_hw_t *hw = &i40e->i40e_hw_space; 830 uint32_t reg; 831 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[0]; 832 int ret = DDI_INTR_CLAIMED; 833 834 if (shared == B_TRUE) { 835 mutex_enter(&i40e->i40e_general_lock); 836 if (i40e->i40e_state & I40E_SUSPENDED) { 837 mutex_exit(&i40e->i40e_general_lock); 838 return (DDI_INTR_UNCLAIMED); 839 } 840 mutex_exit(&i40e->i40e_general_lock); 841 } 842 843 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0); 844 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) != 845 DDI_FM_OK) { 846 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 847 atomic_or_32(&i40e->i40e_state, I40E_ERROR); 848 return (DDI_INTR_CLAIMED); 849 } 850 851 if (reg == 0) { 852 if (shared == B_TRUE) 853 ret = DDI_INTR_UNCLAIMED; 854 goto done; 855 } 856 857 if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) 858 i40e_intr_adminq_work(i40e); 859 860 if (i40e_intr_trqpair_prolog(itrq)) { 861 if (reg & I40E_INTR_NOTX_RX_MASK) 862 i40e_intr_rx_work(i40e, itrq); 863 864 if (reg & I40E_INTR_NOTX_TX_MASK) 865 i40e_intr_tx_work(i40e, itrq); 866 867 i40e_intr_trqpair_epilog(itrq); 868 } 869 870 done: 871 i40e_intr_adminq_enable(i40e); 872 return (ret); 873 874 } 875 876 /* ARGSUSED */ 877 uint_t 878 i40e_intr_msi(void *arg1, void *arg2) 879 { 880 i40e_t *i40e = (i40e_t *)arg1; 881 882 return (i40e_intr_notx(i40e, B_FALSE)); 883 } 884 885 /* ARGSUSED */ 886 uint_t 887 i40e_intr_legacy(void *arg1, void *arg2) 888 { 889 i40e_t *i40e = (i40e_t *)arg1; 890 891 return (i40e_intr_notx(i40e, B_TRUE)); 892 } 893