1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2018 Joyent, Inc. 14 * Copyright 2017 Tegile Systems, Inc. All rights reserved. 15 */ 16 17 /* 18 * ------------------------- 19 * Interrupt Handling Theory 20 * ------------------------- 21 * 22 * There are a couple different sets of interrupts that we need to worry about: 23 * 24 * - Interrupts from receive queues 25 * - Interrupts from transmit queues 26 * - 'Other Interrupts', such as the administrative queue 27 * 28 * 'Other Interrupts' are asynchronous events such as a link status change event 29 * being posted to the administrative queue, unrecoverable ECC errors, and more. 30 * If we have something being posted to the administrative queue, then we go 31 * through and process it, because it's generally enabled as a separate logical 32 * interrupt. Note, we may need to do more here eventually. To re-enable the 33 * interrupts from the 'Other Interrupts' section, we need to clear the PBA and 34 * write ENA to PFINT_ICR0. 35 * 36 * Interrupts from the transmit and receive queues indicates that our requests 37 * have been processed. In the rx case, it means that we have data that we 38 * should take a look at and send up the stack. In the tx case, it means that 39 * data which we got from MAC has now been sent out on the wire and we can free 40 * the associated data. Most of the logic for acting upon the presence of this 41 * data can be found in i40e_transciever.c which handles all of the DMA, rx, and 42 * tx operations. This file is dedicated to handling and dealing with interrupt 43 * processing. 44 * 45 * All devices supported by this driver support three kinds of interrupts: 46 * 47 * o Extended Message Signaled Interrupts (MSI-X) 48 * o Message Signaled Interrupts (MSI) 49 * o Legacy PCI interrupts (INTx) 50 * 51 * Generally speaking the hardware logically handles MSI and INTx the same and 52 * restricts us to only using a single interrupt, which isn't the interesting 53 * case. With MSI-X available, each physical function of the device provides the 54 * opportunity for multiple interrupts which is what we'll focus on. 55 * 56 * -------------------- 57 * Interrupt Management 58 * -------------------- 59 * 60 * By default, the admin queue, which consists of the asynchronous other 61 * interrupts is always bound to MSI-X vector zero. Next, we spread out all of 62 * the other interrupts that we have available to us over the remaining 63 * interrupt vectors. 64 * 65 * This means that there may be multiple queues, both tx and rx, which are 66 * mapped to the same interrupt. When the interrupt fires, we'll have to check 67 * all of them for servicing, before we go through and indicate that the 68 * interrupt is claimed. 69 * 70 * The hardware provides the means of mapping various queues to MSI-X interrupts 71 * by programming the I40E_QINT_RQCTL() and I4OE_QINT_TQCTL() registers. These 72 * registers can also be used to enable and disable whether or not the queue is 73 * a source of interrupts. As part of this, the hardware requires that we 74 * maintain a linked list of queues for each interrupt vector. While it may seem 75 * like this is only there for the purproses of ITRs, that's not the case. The 76 * first queue must be programmed in I40E_QINT_LNKLSTN(%vector) register. Each 77 * queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL 78 * register. 79 * 80 * Finally, the individual interrupt vector itself has the ability to be enabled 81 * and disabled. The overall interrupt is controlled through the 82 * I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt 83 * as a whole. 84 * 85 * Note that this means that both the individual queue and the interrupt as a 86 * whole can be toggled and re-enabled. 87 * 88 * ------------------- 89 * Non-MSIX Management 90 * ------------------- 91 * 92 * We may have a case where the Operating System is unable to actually allocate 93 * any MSI-X to the system. In such a world, there is only one transmit/receive 94 * queue pair and it is bound to the same interrupt with index zero. The 95 * hardware doesn't allow us access to additional interrupt vectors in these 96 * modes. Note that technically we could support more transmit/receive queues if 97 * we wanted. 98 * 99 * In this world, because the interrupts for the admin queue and traffic are 100 * mixed together, we have to consult ICR0 to determine what has occurred. The 101 * QINT_TQCTL and QINT_RQCTL registers have a field, 'MSI-X 0 index' which 102 * allows us to set a specific bit in ICR0. There are up to seven such bits; 103 * however, we only use the bit 0 and 1 for the rx and tx queue respectively. 104 * These are contained by the I40E_INTR_NOTX_{R|T}X_QUEUE and 105 * I40E_INTR_NOTX_{R|T}X_MASK registers respectively. 106 * 107 * Unfortunately, these corresponding queue bits have no corresponding entry in 108 * the ICR0_ENA register. So instead, when enabling interrupts on the queues, we 109 * end up enabling it on the queue registers rather than on the MSI-X registers. 110 * In the MSI-X world, because they can be enabled and disabled, this is 111 * different and the queues can always be enabled and disabled, but the 112 * interrupts themselves are toggled (ignoring the question of interrupt 113 * blanking for polling on rings). 114 * 115 * Finally, we still have to set up the interrupt linked list, but the list is 116 * instead rooted at the register I40E_PFINT_LNKLST0, rather than being tied to 117 * one of the other MSI-X registers. 118 * 119 * -------------------- 120 * Interrupt Moderation 121 * -------------------- 122 * 123 * The XL710 hardware has three different interrupt moderation registers per 124 * interrupt. Unsurprisingly, we use these for: 125 * 126 * o RX interrupts 127 * o TX interrupts 128 * o 'Other interrupts' (link status change, admin queue, etc.) 129 * 130 * By default, we throttle 'other interrupts' the most, then TX interrupts, and 131 * then RX interrupts. The default values for these were based on trying to 132 * reason about both the importance and frequency of events. Generally speaking 133 * 'other interrupts' are not very frequent and they're not important for the 134 * I/O data path in and of itself (though they may indicate issues with the I/O 135 * data path). 136 * 137 * On the flip side, when we're not polling, RX interrupts are very important. 138 * The longer we wait for them, the more latency that we inject into the system. 139 * However, if we allow interrupts to occur too frequently, we risk a few 140 * problems: 141 * 142 * 1) Abusing system resources. Without proper interrupt blanking and polling, 143 * we can see upwards of 200k-300k interrupts per second on the system. 144 * 145 * 2) Not enough data coalescing to enable polling. In other words, the more 146 * data that we allow to build up, the more likely we'll be able to enable 147 * polling mode and allowing us to better handle bulk data. 148 * 149 * In-between the 'other interrupts' and the TX interrupts we have the 150 * reclamation of TX buffers. This operation is not quite as important as we 151 * generally size the ring large enough that we should be able to reclaim a 152 * substantial amount of the descriptors that we have used per interrupt. So 153 * while it's important that this interrupt occur, we don't necessarily need it 154 * firing as frequently as RX; it doesn't, on its own, induce additional latency 155 * into the system. 156 * 157 * Based on all this we currently assign static ITR values for the system. While 158 * we could move to a dynamic system (the hardware supports that), we'd want to 159 * make sure that we're seeing problems from this that we believe would be 160 * generally helped by the added complexity. 161 * 162 * Based on this, the default values that we have allow for the following 163 * interrupt thresholds: 164 * 165 * o 20k interrupts/s for RX 166 * o 5k interrupts/s for TX 167 * o 2k interupts/s for 'Other Interrupts' 168 */ 169 170 #include "i40e_sw.h" 171 172 #define I40E_INTR_NOTX_QUEUE 0 173 #define I40E_INTR_NOTX_INTR 0 174 #define I40E_INTR_NOTX_RX_QUEUE 0 175 #define I40E_INTR_NOTX_RX_MASK (1 << I40E_PFINT_ICR0_QUEUE_0_SHIFT) 176 #define I40E_INTR_NOTX_TX_QUEUE 1 177 #define I40E_INTR_NOTX_TX_MASK (1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT) 178 179 void 180 i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val) 181 { 182 int i; 183 i40e_hw_t *hw = &i40e->i40e_hw_space; 184 185 VERIFY3U(val, <=, I40E_MAX_ITR); 186 VERIFY3U(itr, <, I40E_ITR_INDEX_NONE); 187 188 /* 189 * No matter the interrupt mode, the ITR for other interrupts is always 190 * on interrupt zero and the same is true if we're not using MSI-X. 191 */ 192 if (itr == I40E_ITR_INDEX_OTHER || 193 i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) { 194 I40E_WRITE_REG(hw, I40E_PFINT_ITR0(itr), val); 195 return; 196 } 197 198 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 199 I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i), val); 200 } 201 } 202 203 /* 204 * Re-enable the adminq. Note that the adminq doesn't have a traditional queue 205 * associated with it from an interrupt perspective and just lives on ICR0. 206 * However when MSI-X interrupts are not being used, then this also enables and 207 * disables those interrupts. 208 */ 209 static void 210 i40e_intr_adminq_enable(i40e_t *i40e) 211 { 212 i40e_hw_t *hw = &i40e->i40e_hw_space; 213 uint32_t reg; 214 215 reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | 216 I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | 217 (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); 218 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg); 219 i40e_flush(hw); 220 } 221 222 static void 223 i40e_intr_adminq_disable(i40e_t *i40e) 224 { 225 i40e_hw_t *hw = &i40e->i40e_hw_space; 226 uint32_t reg; 227 228 reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; 229 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg); 230 } 231 232 /* 233 * The next two functions enable/disable the reception of interrupts 234 * on the given vector. Only vectors 1..N are programmed by these 235 * functions; vector 0 is special and handled by a different register. 236 * We must subtract one from the vector because i40e implicitly adds 237 * one to the vector value. See section 10.2.2.10.13 for more details. 238 */ 239 static void 240 i40e_intr_io_enable(i40e_t *i40e, int vector) 241 { 242 uint32_t reg; 243 i40e_hw_t *hw = &i40e->i40e_hw_space; 244 245 ASSERT3S(vector, >, 0); 246 reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | 247 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | 248 (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); 249 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); 250 } 251 252 static void 253 i40e_intr_io_disable(i40e_t *i40e, int vector) 254 { 255 uint32_t reg; 256 i40e_hw_t *hw = &i40e->i40e_hw_space; 257 258 ASSERT3S(vector, >, 0); 259 reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; 260 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); 261 } 262 263 /* 264 * When MSI-X interrupts are being used, then we can enable the actual 265 * interrupts themselves. However, when they are not, we instead have to turn 266 * towards the queue's CAUSE_ENA bit and enable that. 267 */ 268 void 269 i40e_intr_io_enable_all(i40e_t *i40e) 270 { 271 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 272 int i; 273 274 for (i = 1; i < i40e->i40e_intr_count; i++) { 275 i40e_intr_io_enable(i40e, i); 276 } 277 } else { 278 uint32_t reg; 279 i40e_hw_t *hw = &i40e->i40e_hw_space; 280 281 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE)); 282 reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; 283 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); 284 285 reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE)); 286 reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK; 287 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); 288 } 289 } 290 291 /* 292 * When MSI-X interrupts are being used, then we can disable the actual 293 * interrupts themselves. However, when they are not, we instead have to turn 294 * towards the queue's CAUSE_ENA bit and disable that. 295 */ 296 void 297 i40e_intr_io_disable_all(i40e_t *i40e) 298 { 299 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 300 int i; 301 302 for (i = 1; i < i40e->i40e_intr_count; i++) { 303 i40e_intr_io_disable(i40e, i); 304 } 305 } else { 306 uint32_t reg; 307 i40e_hw_t *hw = &i40e->i40e_hw_space; 308 309 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE)); 310 reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; 311 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); 312 313 reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE)); 314 reg &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK; 315 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); 316 } 317 } 318 319 /* 320 * As part of disabling the tx and rx queue's we're technically supposed to 321 * remove the linked list entries. The simplest way is to clear the LNKLSTN 322 * register by setting it to I40E_QUEUE_TYPE_EOL (0x7FF). 323 * 324 * Note all of the FM register access checks are performed by the caller. 325 */ 326 void 327 i40e_intr_io_clear_cause(i40e_t *i40e) 328 { 329 int i; 330 i40e_hw_t *hw = &i40e->i40e_hw_space; 331 332 if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) { 333 uint32_t reg; 334 reg = I40E_QUEUE_TYPE_EOL; 335 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg); 336 return; 337 } 338 339 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 340 uint32_t reg; 341 #ifdef DEBUG 342 /* 343 * Verify that the interrupt in question is disabled. This is a 344 * prerequisite of modifying the data in question. 345 */ 346 reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i)); 347 VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK); 348 #endif 349 reg = I40E_QUEUE_TYPE_EOL; 350 I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg); 351 } 352 353 i40e_flush(hw); 354 } 355 356 /* 357 * Finalize interrupt handling. Mostly this disables the admin queue. 358 */ 359 void 360 i40e_intr_chip_fini(i40e_t *i40e) 361 { 362 #ifdef DEBUG 363 int i; 364 uint32_t reg; 365 366 i40e_hw_t *hw = &i40e->i40e_hw_space; 367 368 /* 369 * Take a look and verify that all other interrupts have been disabled 370 * and the interrupt linked lists have been zeroed. 371 */ 372 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 373 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 374 reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i)); 375 VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK); 376 377 reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i)); 378 VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL); 379 } 380 } 381 #endif 382 383 i40e_intr_adminq_disable(i40e); 384 } 385 386 /* 387 * Set the head of the interrupt linked list. The PFINT_LNKLSTN[N] 388 * register actually refers to the 'N + 1' interrupt vector. E.g., 389 * PFINT_LNKLSTN[0] refers to interrupt vector 1. 390 */ 391 static void 392 i40e_set_lnklstn(i40e_t *i40e, uint_t vector, uint_t queue) 393 { 394 uint32_t reg; 395 i40e_hw_t *hw = &i40e->i40e_hw_space; 396 397 reg = (queue << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) | 398 (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); 399 400 I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(vector), reg); 401 DEBUGOUT2("PFINT_LNKLSTN[%u] = 0x%x", vector, reg); 402 } 403 404 /* 405 * Set the QINT_RQCTL[queue] register. The next queue is always the Tx 406 * queue associated with this Rx queue. Unlike PFINT_LNKLSTN, the 407 * vector should be the actual vector this queue is on -- i.e., it 408 * should be equal to itrq_rx_intrvec. 409 */ 410 static void 411 i40e_set_rqctl(i40e_t *i40e, uint_t vector, uint_t queue) 412 { 413 uint32_t reg; 414 i40e_hw_t *hw = &i40e->i40e_hw_space; 415 416 ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_rx_intrvec); 417 418 reg = (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | 419 (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | 420 (queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | 421 (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | 422 I40E_QINT_RQCTL_CAUSE_ENA_MASK; 423 424 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); 425 DEBUGOUT2("QINT_RQCTL[%u] = 0x%x", queue, reg); 426 } 427 428 /* 429 * Like i40e_set_rqctl(), but for QINT_TQCTL[queue]. The next queue is 430 * either the Rx queue of another TRQP, or EOL. 431 */ 432 static void 433 i40e_set_tqctl(i40e_t *i40e, uint_t vector, uint_t queue, uint_t next_queue) 434 { 435 uint32_t reg; 436 i40e_hw_t *hw = &i40e->i40e_hw_space; 437 438 ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_tx_intrvec); 439 440 reg = (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | 441 (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | 442 (next_queue << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | 443 (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) | 444 I40E_QINT_TQCTL_CAUSE_ENA_MASK; 445 446 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(queue), reg); 447 DEBUGOUT2("QINT_TQCTL[%u] = 0x%x", queue, reg); 448 } 449 450 /* 451 * Program the interrupt linked list. Each vector has a linked list of 452 * queues which act as event sources for that vector. When one of 453 * those sources has an event the associated interrupt vector is 454 * fired. This mapping must match the mapping found in 455 * i40e_map_intrs_to_vectors(). 456 * 457 * See section 7.5.3 for more information about the configuration of 458 * the interrupt linked list. 459 */ 460 static void 461 i40e_intr_init_queue_msix(i40e_t *i40e) 462 { 463 uint_t intr_count; 464 465 /* 466 * The 0th vector is for 'Other Interrupts' only (subject to 467 * change in the future). 468 */ 469 intr_count = i40e->i40e_intr_count - 1; 470 471 for (uint_t vec = 0; vec < intr_count; vec++) { 472 boolean_t head = B_TRUE; 473 474 for (uint_t qidx = vec; qidx < i40e->i40e_num_trqpairs; 475 qidx += intr_count) { 476 uint_t next_qidx = qidx + intr_count; 477 478 next_qidx = (next_qidx > i40e->i40e_num_trqpairs) ? 479 I40E_QUEUE_TYPE_EOL : next_qidx; 480 481 if (head) { 482 i40e_set_lnklstn(i40e, vec, qidx); 483 head = B_FALSE; 484 } 485 486 i40e_set_rqctl(i40e, vec + 1, qidx); 487 i40e_set_tqctl(i40e, vec + 1, qidx, next_qidx); 488 } 489 } 490 } 491 492 /* 493 * Set up a single queue to share the admin queue interrupt in the non-MSI-X 494 * world. Note we do not enable the queue as an interrupt cause at this time. We 495 * don't have any other vector of control here, unlike with the MSI-X interrupt 496 * case. 497 */ 498 static void 499 i40e_intr_init_queue_shared(i40e_t *i40e) 500 { 501 i40e_hw_t *hw = &i40e->i40e_hw_space; 502 uint32_t reg; 503 504 VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED || 505 i40e->i40e_intr_type == DDI_INTR_TYPE_MSI); 506 507 reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) | 508 (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); 509 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg); 510 511 reg = (I40E_INTR_NOTX_INTR << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | 512 (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | 513 (I40E_INTR_NOTX_RX_QUEUE << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT) | 514 (I40E_INTR_NOTX_QUEUE << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | 515 (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); 516 517 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); 518 519 reg = (I40E_INTR_NOTX_INTR << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | 520 (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | 521 (I40E_INTR_NOTX_TX_QUEUE << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT) | 522 (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | 523 (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); 524 525 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); 526 } 527 528 /* 529 * Enable the specified queue as a valid source of interrupts. Note, this should 530 * only be used as part of the GLDv3's interrupt blanking routines. The debug 531 * build assertions are specific to that. 532 */ 533 void 534 i40e_intr_rx_queue_enable(i40e_trqpair_t *itrq) 535 { 536 uint32_t reg; 537 uint_t queue = itrq->itrq_index; 538 i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space; 539 540 ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock)); 541 ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs); 542 543 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue)); 544 ASSERT0(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK); 545 reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; 546 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); 547 } 548 549 /* 550 * Disable the specified queue as a valid source of interrupts. Note, this 551 * should only be used as part of the GLDv3's interrupt blanking routines. The 552 * debug build assertions are specific to that. 553 */ 554 void 555 i40e_intr_rx_queue_disable(i40e_trqpair_t *itrq) 556 { 557 uint32_t reg; 558 uint_t queue = itrq->itrq_index; 559 i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space; 560 561 ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock)); 562 ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs); 563 564 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue)); 565 ASSERT3U(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK, ==, 566 I40E_QINT_RQCTL_CAUSE_ENA_MASK); 567 reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; 568 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); 569 } 570 571 /* 572 * Start up the various chip's interrupt handling. We not only configure the 573 * adminq here, but we also go through and configure all of the actual queues, 574 * the interrupt linked lists, and others. 575 */ 576 void 577 i40e_intr_chip_init(i40e_t *i40e) 578 { 579 i40e_hw_t *hw = &i40e->i40e_hw_space; 580 uint32_t reg; 581 582 /* 583 * Ensure that all non adminq interrupts are disabled at the chip level. 584 */ 585 i40e_intr_io_disable_all(i40e); 586 587 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, 0); 588 (void) I40E_READ_REG(hw, I40E_PFINT_ICR0); 589 590 /* 591 * Always enable all of the other-class interrupts to be on their own 592 * ITR. This only needs to be set on interrupt zero, which has its own 593 * special setting. 594 */ 595 reg = I40E_ITR_INDEX_OTHER << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT; 596 I40E_WRITE_REG(hw, I40E_PFINT_STAT_CTL0, reg); 597 598 /* 599 * Enable interrupt types we expect to receive. At the moment, this 600 * is limited to the adminq; however, we'll want to review 11.2.2.9.22 601 * for more types here as we add support for detecting them, handling 602 * them, and resetting the device as appropriate. 603 */ 604 reg = I40E_PFINT_ICR0_ENA_ADMINQ_MASK; 605 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg); 606 607 /* 608 * Always set the interrupt linked list to empty. We'll come back and 609 * change this if MSI-X are actually on the scene. 610 */ 611 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_TYPE_EOL); 612 613 i40e_intr_adminq_enable(i40e); 614 615 /* 616 * Set up all of the queues and map them to interrupts based on the bit 617 * assignments. 618 */ 619 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 620 i40e_intr_init_queue_msix(i40e); 621 } else { 622 i40e_intr_init_queue_shared(i40e); 623 } 624 625 /* 626 * Finally set all of the default ITRs for the interrupts. Note that the 627 * queues will have been set up above. 628 */ 629 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr); 630 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr); 631 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, i40e->i40e_other_itr); 632 } 633 634 static void 635 i40e_intr_adminq_work(i40e_t *i40e) 636 { 637 struct i40e_hw *hw = &i40e->i40e_hw_space; 638 struct i40e_arq_event_info evt; 639 uint16_t remain = 1; 640 641 bzero(&evt, sizeof (struct i40e_arq_event_info)); 642 evt.buf_len = I40E_ADMINQ_BUFSZ; 643 evt.msg_buf = i40e->i40e_aqbuf; 644 645 while (remain != 0) { 646 enum i40e_status_code ret; 647 uint16_t opcode; 648 649 /* 650 * At the moment, the only error code that seems to be returned 651 * is one saying that there's no work. In such a case we leave 652 * this be. 653 */ 654 ret = i40e_clean_arq_element(hw, &evt, &remain); 655 if (ret != I40E_SUCCESS) 656 break; 657 658 opcode = LE_16(evt.desc.opcode); 659 switch (opcode) { 660 case i40e_aqc_opc_get_link_status: 661 mutex_enter(&i40e->i40e_general_lock); 662 i40e_link_check(i40e); 663 mutex_exit(&i40e->i40e_general_lock); 664 break; 665 default: 666 /* 667 * Longer term we'll want to enable other causes here 668 * and get these cleaned up and doing something. 669 */ 670 break; 671 } 672 } 673 } 674 675 static void 676 i40e_intr_rx_work(i40e_t *i40e, i40e_trqpair_t *itrq) 677 { 678 mblk_t *mp = NULL; 679 680 mutex_enter(&itrq->itrq_rx_lock); 681 if (!itrq->itrq_intr_poll) 682 mp = i40e_ring_rx(itrq, I40E_POLL_NULL); 683 mutex_exit(&itrq->itrq_rx_lock); 684 685 if (mp == NULL) 686 return; 687 688 mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp, 689 itrq->itrq_rxgen); 690 } 691 692 /* ARGSUSED */ 693 static void 694 i40e_intr_tx_work(i40e_t *i40e, i40e_trqpair_t *itrq) 695 { 696 i40e_tx_recycle_ring(itrq); 697 } 698 699 /* 700 * At the moment, the only 'other' interrupt on ICR0 that we handle is the 701 * adminq. We should go through and support the other notifications at some 702 * point. 703 */ 704 static void 705 i40e_intr_other_work(i40e_t *i40e) 706 { 707 struct i40e_hw *hw = &i40e->i40e_hw_space; 708 uint32_t reg; 709 710 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0); 711 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) != 712 DDI_FM_OK) { 713 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 714 atomic_or_32(&i40e->i40e_state, I40E_ERROR); 715 return; 716 } 717 718 if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) 719 i40e_intr_adminq_work(i40e); 720 721 /* 722 * Make sure that the adminq interrupt is not masked and then explicitly 723 * enable the adminq and thus the other interrupt. 724 */ 725 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA); 726 reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK; 727 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg); 728 729 i40e_intr_adminq_enable(i40e); 730 } 731 732 /* 733 * Handle an MSI-X interrupt. See section 7.5.1.3 for an overview of 734 * the MSI-X interrupt sequence. 735 */ 736 uint_t 737 i40e_intr_msix(void *arg1, void *arg2) 738 { 739 i40e_t *i40e = (i40e_t *)arg1; 740 uint_t vector_idx = (uint_t)(uintptr_t)arg2; 741 742 ASSERT3U(vector_idx, <, i40e->i40e_intr_count); 743 744 /* 745 * When using MSI-X interrupts, vector 0 is always reserved for the 746 * adminq at this time. Though longer term, we'll want to also bridge 747 * some I/O to them. 748 */ 749 if (vector_idx == 0) { 750 i40e_intr_other_work(i40e); 751 return (DDI_INTR_CLAIMED); 752 } 753 754 ASSERT3U(vector_idx, >, 0); 755 756 /* 757 * We determine the queue indexes via simple arithmetic (as 758 * opposed to keeping explicit state like a bitmap). While 759 * conveinent, it does mean that i40e_map_intrs_to_vectors(), 760 * i40e_intr_init_queue_msix(), and this function must be 761 * modified as a unit. 762 * 763 * We subtract 1 from the vector to offset the addition we 764 * performed during i40e_map_intrs_to_vectors(). 765 */ 766 for (uint_t i = vector_idx - 1; i < i40e->i40e_num_trqpairs; 767 i += (i40e->i40e_intr_count - 1)) { 768 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i]; 769 770 ASSERT3U(i, <, i40e->i40e_num_trqpairs); 771 ASSERT3P(itrq, !=, NULL); 772 i40e_intr_rx_work(i40e, itrq); 773 i40e_intr_tx_work(i40e, itrq); 774 } 775 776 i40e_intr_io_enable(i40e, vector_idx); 777 return (DDI_INTR_CLAIMED); 778 } 779 780 static uint_t 781 i40e_intr_notx(i40e_t *i40e, boolean_t shared) 782 { 783 i40e_hw_t *hw = &i40e->i40e_hw_space; 784 uint32_t reg; 785 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[0]; 786 int ret = DDI_INTR_CLAIMED; 787 788 if (shared == B_TRUE) { 789 mutex_enter(&i40e->i40e_general_lock); 790 if (i40e->i40e_state & I40E_SUSPENDED) { 791 mutex_exit(&i40e->i40e_general_lock); 792 return (DDI_INTR_UNCLAIMED); 793 } 794 mutex_exit(&i40e->i40e_general_lock); 795 } 796 797 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0); 798 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) != 799 DDI_FM_OK) { 800 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 801 atomic_or_32(&i40e->i40e_state, I40E_ERROR); 802 return (DDI_INTR_CLAIMED); 803 } 804 805 if (reg == 0) { 806 if (shared == B_TRUE) 807 ret = DDI_INTR_UNCLAIMED; 808 goto done; 809 } 810 811 if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) 812 i40e_intr_adminq_work(i40e); 813 814 if (reg & I40E_INTR_NOTX_RX_MASK) 815 i40e_intr_rx_work(i40e, itrq); 816 817 if (reg & I40E_INTR_NOTX_TX_MASK) 818 i40e_intr_tx_work(i40e, itrq); 819 820 done: 821 i40e_intr_adminq_enable(i40e); 822 return (ret); 823 824 } 825 826 /* ARGSUSED */ 827 uint_t 828 i40e_intr_msi(void *arg1, void *arg2) 829 { 830 i40e_t *i40e = (i40e_t *)arg1; 831 832 return (i40e_intr_notx(i40e, B_FALSE)); 833 } 834 835 /* ARGSUSED */ 836 uint_t 837 i40e_intr_legacy(void *arg1, void *arg2) 838 { 839 i40e_t *i40e = (i40e_t *)arg1; 840 841 return (i40e_intr_notx(i40e, B_TRUE)); 842 } 843