1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2016 Joyent, Inc. 14 */ 15 16 /* 17 * ------------------------- 18 * Interrupt Handling Theory 19 * ------------------------- 20 * 21 * There are a couple different sets of interrupts that we need to worry about: 22 * 23 * - Interrupts from receive queues 24 * - Interrupts from transmit queues 25 * - 'Other Interrupts', such as the administrative queue 26 * 27 * 'Other Interrupts' are asynchronous events such as a link status change event 28 * being posted to the administrative queue, unrecoverable ECC errors, and more. 29 * If we have something being posted to the administrative queue, then we go 30 * through and process it, because it's generally enabled as a separate logical 31 * interrupt. Note, we may need to do more here eventually. To re-enable the 32 * interrupts from the 'Other Interrupts' section, we need to clear the PBA and 33 * write ENA to PFINT_ICR0. 34 * 35 * Interrupts from the transmit and receive queues indicates that our requests 36 * have been processed. In the rx case, it means that we have data that we 37 * should take a look at and send up the stack. In the tx case, it means that 38 * data which we got from MAC has now been sent out on the wire and we can free 39 * the associated data. Most of the logic for acting upon the presence of this 40 * data can be found in i40e_transciever.c which handles all of the DMA, rx, and 41 * tx operations. This file is dedicated to handling and dealing with interrupt 42 * processing. 43 * 44 * All devices supported by this driver support three kinds of interrupts: 45 * 46 * o Extended Message Signaled Interrupts (MSI-X) 47 * o Message Signaled Interrupts (MSI) 48 * o Legacy PCI interrupts (INTx) 49 * 50 * Generally speaking the hardware logically handles MSI and INTx the same and 51 * restricts us to only using a single interrupt, which isn't the interesting 52 * case. With MSI-X available, each physical function of the device provides the 53 * opportunity for multiple interrupts which is what we'll focus on. 54 * 55 * -------------------- 56 * Interrupt Management 57 * -------------------- 58 * 59 * By default, the admin queue, which consists of the asynchronous other 60 * interrupts is always bound to MSI-X vector zero. Next, we spread out all of 61 * the other interrupts that we have available to us over the remaining 62 * interrupt vectors. 63 * 64 * This means that there may be multiple queues, both tx and rx, which are 65 * mapped to the same interrupt. When the interrupt fires, we'll have to check 66 * all of them for servicing, before we go through and indicate that the 67 * interrupt is claimed. 68 * 69 * The hardware provides the means of mapping various queues to MSI-X interrupts 70 * by programming the I40E_QINT_RQCTL() and I4OE_QINT_TQCTL() registers. These 71 * registers can also be used to enable and disable whether or not the queue is 72 * a source of interrupts. As part of this, the hardware requires that we 73 * maintain a linked list of queues for each interrupt vector. While it may seem 74 * like this is only there for the purproses of ITRs, that's not the case. The 75 * first queue must be programmed in I40E_QINT_LNKLSTN(%vector) register. Each 76 * queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL 77 * register. 78 * 79 * Because we only have a single queue enabled at the moment and we always have 80 * two interrupts, we do something pretty simple and just know that there's one 81 * data queue in the interrupt handler. Longer term, we'll need to think harder 82 * about this, but for the moment it'll have to suffice. 83 * 84 * Finally, the individual interrupt vector itself has the ability to be enabled 85 * and disabled. The overall interrupt is controlled through the 86 * I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt 87 * as a whole. 88 * 89 * Note that this means that both the individual queue and the interrupt as a 90 * whole can be toggled and re-enabled. 91 * 92 * ------------------- 93 * Non-MSIX Management 94 * ------------------- 95 * 96 * We may have a case where the Operating System is unable to actually allocate 97 * any MSI-X to the system. In such a world, there is only one transmit/receive 98 * queue pair and it is bound to the same interrupt with index zero. The 99 * hardware doesn't allow us access to additional interrupt vectors in these 100 * modes. Note that technically we could support more transmit/receive queues if 101 * we wanted. 102 * 103 * In this world, because the interrupts for the admin queue and traffic are 104 * mixed together, we have to consult ICR0 to determine what has occurred. The 105 * QINT_TQCTL and QINT_RQCTL registers have a field, 'MSI-X 0 index' which 106 * allows us to set a specific bit in ICR0. There are up to seven such bits; 107 * however, we only use the bit 0 and 1 for the rx and tx queue respectively. 108 * These are contained by the I40E_INTR_NOTX_{R|T}X_QUEUE and 109 * I40E_INTR_NOTX_{R|T}X_MASK registers respectively. 110 * 111 * Unfortunately, these corresponding queue bits have no corresponding entry in 112 * the ICR0_ENA register. So instead, when enabling interrupts on the queues, we 113 * end up enabling it on the queue registers rather than on the MSI-X registers. 114 * In the MSI-X world, because they can be enabled and disabled, this is 115 * different and the queues can always be enabled and disabled, but the 116 * interrupts themselves are toggled (ignoring the question of interrupt 117 * blanking for polling on rings). 118 * 119 * Finally, we still have to set up the interrupt linked list, but the list is 120 * instead rooted at the register I40E_PFINT_LNKLST0, rather than being tied to 121 * one of the other MSI-X registers. 122 * 123 * -------------------- 124 * Interrupt Moderation 125 * -------------------- 126 * 127 * The XL710 hardware has three different interrupt moderation registers per 128 * interrupt. Unsurprisingly, we use these for: 129 * 130 * o RX interrupts 131 * o TX interrupts 132 * o 'Other interrupts' (link status change, admin queue, etc.) 133 * 134 * By default, we throttle 'other interrupts' the most, then TX interrupts, and 135 * then RX interrupts. The default values for these were based on trying to 136 * reason about both the importance and frequency of events. Generally speaking 137 * 'other interrupts' are not very frequent and they're not important for the 138 * I/O data path in and of itself (though they may indicate issues with the I/O 139 * data path). 140 * 141 * On the flip side, when we're not polling, RX interrupts are very important. 142 * The longer we wait for them, the more latency that we inject into the system. 143 * However, if we allow interrupts to occur too frequently, we risk a few 144 * problems: 145 * 146 * 1) Abusing system resources. Without proper interrupt blanking and polling, 147 * we can see upwards of 200k-300k interrupts per second on the system. 148 * 149 * 2) Not enough data coalescing to enable polling. In other words, the more 150 * data that we allow to build up, the more likely we'll be able to enable 151 * polling mode and allowing us to better handle bulk data. 152 * 153 * In-between the 'other interrupts' and the TX interrupts we have the 154 * reclamation of TX buffers. This operation is not quite as important as we 155 * generally size the ring large enough that we should be able to reclaim a 156 * substantial amount of the descriptors that we have used per interrupt. So 157 * while it's important that this interrupt occur, we don't necessarily need it 158 * firing as frequently as RX; it doesn't, on its own, induce additional latency 159 * into the system. 160 * 161 * Based on all this we currently assign static ITR values for the system. While 162 * we could move to a dynamic system (the hardware supports that), we'd want to 163 * make sure that we're seeing problems from this that we believe would be 164 * generally helped by the added complexity. 165 * 166 * Based on this, the default values that we have allow for the following 167 * interrupt thresholds: 168 * 169 * o 20k interrupts/s for RX 170 * o 5k interrupts/s for TX 171 * o 2k interupts/s for 'Other Interrupts' 172 */ 173 174 #include "i40e_sw.h" 175 176 #define I40E_INTR_NOTX_QUEUE 0 177 #define I40E_INTR_NOTX_INTR 0 178 #define I40E_INTR_NOTX_RX_QUEUE 0 179 #define I40E_INTR_NOTX_RX_MASK (1 << I40E_PFINT_ICR0_QUEUE_0_SHIFT) 180 #define I40E_INTR_NOTX_TX_QUEUE 1 181 #define I40E_INTR_NOTX_TX_MASK (1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT) 182 183 void 184 i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val) 185 { 186 int i; 187 i40e_hw_t *hw = &i40e->i40e_hw_space; 188 189 VERIFY3U(val, <=, I40E_MAX_ITR); 190 VERIFY3U(itr, <, I40E_ITR_INDEX_NONE); 191 192 /* 193 * No matter the interrupt mode, the ITR for other interrupts is always 194 * on interrupt zero and the same is true if we're not using MSI-X. 195 */ 196 if (itr == I40E_ITR_INDEX_OTHER || 197 i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) { 198 I40E_WRITE_REG(hw, I40E_PFINT_ITR0(itr), val); 199 return; 200 } 201 202 for (i = 1; i < i40e->i40e_intr_count; i++) { 203 I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i - 1), val); 204 } 205 } 206 207 /* 208 * Re-enable the adminq. Note that the adminq doesn't have a traditional queue 209 * associated with it from an interrupt perspective and just lives on ICR0. 210 * However when MSI-X interrupts are not being used, then this also enables and 211 * disables those interrupts. 212 */ 213 static void 214 i40e_intr_adminq_enable(i40e_t *i40e) 215 { 216 i40e_hw_t *hw = &i40e->i40e_hw_space; 217 uint32_t reg; 218 219 reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | 220 I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | 221 (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); 222 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg); 223 i40e_flush(hw); 224 } 225 226 static void 227 i40e_intr_adminq_disable(i40e_t *i40e) 228 { 229 i40e_hw_t *hw = &i40e->i40e_hw_space; 230 uint32_t reg; 231 232 reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; 233 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg); 234 } 235 236 static void 237 i40e_intr_io_enable(i40e_t *i40e, int vector) 238 { 239 uint32_t reg; 240 i40e_hw_t *hw = &i40e->i40e_hw_space; 241 242 reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | 243 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | 244 (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); 245 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); 246 } 247 248 static void 249 i40e_intr_io_disable(i40e_t *i40e, int vector) 250 { 251 uint32_t reg; 252 i40e_hw_t *hw = &i40e->i40e_hw_space; 253 254 reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; 255 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg); 256 } 257 258 /* 259 * When MSI-X interrupts are being used, then we can enable the actual 260 * interrupts themselves. However, when they are not, we instead have to turn 261 * towards the queue's CAUSE_ENA bit and enable that. 262 */ 263 void 264 i40e_intr_io_enable_all(i40e_t *i40e) 265 { 266 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 267 int i; 268 269 for (i = 1; i < i40e->i40e_intr_count; i++) { 270 i40e_intr_io_enable(i40e, i); 271 } 272 } else { 273 uint32_t reg; 274 i40e_hw_t *hw = &i40e->i40e_hw_space; 275 276 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE)); 277 reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; 278 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); 279 280 reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE)); 281 reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK; 282 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); 283 } 284 } 285 286 /* 287 * When MSI-X interrupts are being used, then we can disable the actual 288 * interrupts themselves. However, when they are not, we instead have to turn 289 * towards the queue's CAUSE_ENA bit and disable that. 290 */ 291 void 292 i40e_intr_io_disable_all(i40e_t *i40e) 293 { 294 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 295 int i; 296 297 for (i = 1; i < i40e->i40e_intr_count; i++) { 298 i40e_intr_io_disable(i40e, i); 299 } 300 } else { 301 uint32_t reg; 302 i40e_hw_t *hw = &i40e->i40e_hw_space; 303 304 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE)); 305 reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; 306 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); 307 308 reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE)); 309 reg &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK; 310 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); 311 } 312 } 313 314 /* 315 * As part of disabling the tx and rx queue's we're technically supposed to 316 * remove the linked list entries. The simplest way is to clear the LNKLSTN 317 * register by setting it to I40E_QUEUE_TYPE_EOL (0x7FF). 318 * 319 * Note all of the FM register access checks are performed by the caller. 320 */ 321 void 322 i40e_intr_io_clear_cause(i40e_t *i40e) 323 { 324 int i; 325 i40e_hw_t *hw = &i40e->i40e_hw_space; 326 327 if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) { 328 uint32_t reg; 329 reg = I40E_QUEUE_TYPE_EOL; 330 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg); 331 return; 332 } 333 334 for (i = 1; i < i40e->i40e_intr_count; i++) { 335 uint32_t reg; 336 #ifdef DEBUG 337 /* 338 * Verify that the interrupt in question is disabled. This is a 339 * prerequisite of modifying the data in question. 340 */ 341 reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i - 1)); 342 VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK); 343 #endif 344 reg = I40E_QUEUE_TYPE_EOL; 345 I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i - 1), reg); 346 } 347 348 i40e_flush(hw); 349 } 350 351 /* 352 * Finalize interrupt handling. Mostly this disables the admin queue. 353 */ 354 void 355 i40e_intr_chip_fini(i40e_t *i40e) 356 { 357 #ifdef DEBUG 358 int i; 359 uint32_t reg; 360 361 i40e_hw_t *hw = &i40e->i40e_hw_space; 362 363 /* 364 * Take a look and verify that all other interrupts have been disabled 365 * and the interrupt linked lists have been zeroed. 366 */ 367 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 368 for (i = 1; i < i40e->i40e_intr_count; i++) { 369 reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i - 1)); 370 VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK); 371 372 reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i - 1)); 373 VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL); 374 } 375 } 376 #endif 377 378 i40e_intr_adminq_disable(i40e); 379 } 380 381 /* 382 * Enable all of the queues and set the corresponding LNKLSTN registers. Note 383 * that we always enable queues as interrupt sources, even though we don't 384 * enable the MSI-X interrupt vectors. 385 */ 386 static void 387 i40e_intr_init_queue_msix(i40e_t *i40e) 388 { 389 i40e_hw_t *hw = &i40e->i40e_hw_space; 390 uint32_t reg; 391 392 /* 393 * Because we only have a single queue, just do something simple now. 394 * How this all works will need to really be properly redone based on 395 * the bit maps, etc. Note that we skip the ITR logic for the moment, 396 * just to make our lives as explicit and simple as possible. 397 */ 398 reg = (0 << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) | 399 (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); 400 I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(0), reg); 401 402 reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | 403 (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | 404 (0 << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | 405 (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | 406 I40E_QINT_RQCTL_CAUSE_ENA_MASK; 407 408 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(0), reg); 409 410 reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | 411 (I40E_ITR_INDEX_TX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | 412 (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | 413 (I40E_QUEUE_TYPE_RX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | 414 I40E_QINT_TQCTL_CAUSE_ENA_MASK; 415 416 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(0), reg); 417 418 } 419 420 /* 421 * Set up a single queue to share the admin queue interrupt in the non-MSI-X 422 * world. Note we do not enable the queue as an interrupt cause at this time. We 423 * don't have any other vector of control here, unlike with the MSI-X interrupt 424 * case. 425 */ 426 static void 427 i40e_intr_init_queue_shared(i40e_t *i40e) 428 { 429 i40e_hw_t *hw = &i40e->i40e_hw_space; 430 uint32_t reg; 431 432 VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED || 433 i40e->i40e_intr_type == DDI_INTR_TYPE_MSI); 434 435 reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) | 436 (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT); 437 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg); 438 439 reg = (I40E_INTR_NOTX_INTR << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | 440 (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | 441 (I40E_INTR_NOTX_RX_QUEUE << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT) | 442 (I40E_INTR_NOTX_QUEUE << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | 443 (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); 444 445 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg); 446 447 reg = (I40E_INTR_NOTX_INTR << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | 448 (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | 449 (I40E_INTR_NOTX_TX_QUEUE << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT) | 450 (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | 451 (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); 452 453 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg); 454 } 455 456 /* 457 * Enable the specified queue as a valid source of interrupts. Note, this should 458 * only be used as part of the GLDv3's interrupt blanking routines. The debug 459 * build assertions are specific to that. 460 */ 461 void 462 i40e_intr_rx_queue_enable(i40e_t *i40e, uint_t queue) 463 { 464 uint32_t reg; 465 i40e_hw_t *hw = &i40e->i40e_hw_space; 466 467 ASSERT(MUTEX_HELD(&i40e->i40e_general_lock)); 468 ASSERT(queue < i40e->i40e_num_trqpairs); 469 470 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue)); 471 ASSERT0(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK); 472 reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK; 473 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); 474 } 475 476 /* 477 * Disable the specified queue as a valid source of interrupts. Note, this 478 * should only be used as part of the GLDv3's interrupt blanking routines. The 479 * debug build assertions are specific to that. 480 */ 481 void 482 i40e_intr_rx_queue_disable(i40e_t *i40e, uint_t queue) 483 { 484 uint32_t reg; 485 i40e_hw_t *hw = &i40e->i40e_hw_space; 486 487 ASSERT(MUTEX_HELD(&i40e->i40e_general_lock)); 488 ASSERT(queue < i40e->i40e_num_trqpairs); 489 490 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue)); 491 ASSERT3U(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK, ==, 492 I40E_QINT_RQCTL_CAUSE_ENA_MASK); 493 reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; 494 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg); 495 } 496 497 /* 498 * Start up the various chip's interrupt handling. We not only configure the 499 * adminq here, but we also go through and configure all of the actual queues, 500 * the interrupt linked lists, and others. 501 */ 502 void 503 i40e_intr_chip_init(i40e_t *i40e) 504 { 505 i40e_hw_t *hw = &i40e->i40e_hw_space; 506 uint32_t reg; 507 508 /* 509 * Ensure that all non adminq interrupts are disabled at the chip level. 510 */ 511 i40e_intr_io_disable_all(i40e); 512 513 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, 0); 514 (void) I40E_READ_REG(hw, I40E_PFINT_ICR0); 515 516 /* 517 * Always enable all of the other-class interrupts to be on their own 518 * ITR. This only needs to be set on interrupt zero, which has its own 519 * special setting. 520 */ 521 reg = I40E_ITR_INDEX_OTHER << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT; 522 I40E_WRITE_REG(hw, I40E_PFINT_STAT_CTL0, reg); 523 524 /* 525 * Enable interrupt types we expect to receive. At the moment, this 526 * is limited to the adminq; however, we'll want to review 11.2.2.9.22 527 * for more types here as we add support for detecting them, handling 528 * them, and resetting the device as appropriate. 529 */ 530 reg = I40E_PFINT_ICR0_ENA_ADMINQ_MASK; 531 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg); 532 533 /* 534 * Always set the interrupt linked list to empty. We'll come back and 535 * change this if MSI-X are actually on the scene. 536 */ 537 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_TYPE_EOL); 538 539 i40e_intr_adminq_enable(i40e); 540 541 /* 542 * Set up all of the queues and map them to interrupts based on the bit 543 * assignments. 544 */ 545 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 546 i40e_intr_init_queue_msix(i40e); 547 } else { 548 i40e_intr_init_queue_shared(i40e); 549 } 550 551 /* 552 * Finally set all of the default ITRs for the interrupts. Note that the 553 * queues will have been set up above. 554 */ 555 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr); 556 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr); 557 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, i40e->i40e_other_itr); 558 } 559 560 static void 561 i40e_intr_adminq_work(i40e_t *i40e) 562 { 563 struct i40e_hw *hw = &i40e->i40e_hw_space; 564 struct i40e_arq_event_info evt; 565 uint16_t remain = 1; 566 567 bzero(&evt, sizeof (struct i40e_arq_event_info)); 568 evt.buf_len = I40E_ADMINQ_BUFSZ; 569 evt.msg_buf = i40e->i40e_aqbuf; 570 571 while (remain != 0) { 572 enum i40e_status_code ret; 573 uint16_t opcode; 574 575 /* 576 * At the moment, the only error code that seems to be returned 577 * is one saying that there's no work. In such a case we leave 578 * this be. 579 */ 580 ret = i40e_clean_arq_element(hw, &evt, &remain); 581 if (ret != I40E_SUCCESS) 582 break; 583 584 opcode = LE_16(evt.desc.opcode); 585 switch (opcode) { 586 case i40e_aqc_opc_get_link_status: 587 mutex_enter(&i40e->i40e_general_lock); 588 i40e_link_check(i40e); 589 mutex_exit(&i40e->i40e_general_lock); 590 break; 591 default: 592 /* 593 * Longer term we'll want to enable other causes here 594 * and get these cleaned up and doing something. 595 */ 596 break; 597 } 598 } 599 } 600 601 static void 602 i40e_intr_rx_work(i40e_t *i40e, int queue) 603 { 604 mblk_t *mp; 605 i40e_trqpair_t *itrq; 606 607 ASSERT(queue < i40e->i40e_num_trqpairs); 608 itrq = &i40e->i40e_trqpairs[queue]; 609 610 mutex_enter(&itrq->itrq_rx_lock); 611 mp = i40e_ring_rx(itrq, I40E_POLL_NULL); 612 mutex_exit(&itrq->itrq_rx_lock); 613 614 if (mp != NULL) { 615 mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp, 616 itrq->itrq_rxgen); 617 } 618 } 619 620 static void 621 i40e_intr_tx_work(i40e_t *i40e, int queue) 622 { 623 i40e_trqpair_t *itrq; 624 625 itrq = &i40e->i40e_trqpairs[queue]; 626 i40e_tx_recycle_ring(itrq); 627 } 628 629 /* 630 * At the moment, the only 'other' interrupt on ICR0 that we handle is the 631 * adminq. We should go through and support the other notifications at some 632 * point. 633 */ 634 static void 635 i40e_intr_other_work(i40e_t *i40e) 636 { 637 struct i40e_hw *hw = &i40e->i40e_hw_space; 638 uint32_t reg; 639 640 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0); 641 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) != 642 DDI_FM_OK) { 643 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 644 atomic_or_32(&i40e->i40e_state, I40E_ERROR); 645 return; 646 } 647 648 if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) 649 i40e_intr_adminq_work(i40e); 650 651 /* 652 * Make sure that the adminq interrupt is not masked and then explicitly 653 * enable the adminq and thus the other interrupt. 654 */ 655 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA); 656 reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK; 657 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg); 658 659 i40e_intr_adminq_enable(i40e); 660 } 661 662 uint_t 663 i40e_intr_msix(void *arg1, void *arg2) 664 { 665 i40e_t *i40e = (i40e_t *)arg1; 666 int vector_idx = (int)(uintptr_t)arg2; 667 668 /* 669 * When using MSI-X interrupts, vector 0 is always reserved for the 670 * adminq at this time. Though longer term, we'll want to also bridge 671 * some I/O to them. 672 */ 673 if (vector_idx == 0) { 674 i40e_intr_other_work(i40e); 675 return (DDI_INTR_CLAIMED); 676 } 677 678 VERIFY(vector_idx == 1); 679 680 /* 681 * Note that we explicitly do not check this value under the lock even 682 * though assignments to it are done so. In this case, the cost of 683 * getting this wrong is at worst a bit of additional contention and 684 * even more rarely, a duplicated packet. However, the cost on the other 685 * hand is a lot more. This is something that as we more generally 686 * implement ring support we should revisit. 687 */ 688 if (i40e->i40e_intr_poll != B_TRUE) 689 i40e_intr_rx_work(i40e, 0); 690 i40e_intr_tx_work(i40e, 0); 691 i40e_intr_io_enable(i40e, 1); 692 693 return (DDI_INTR_CLAIMED); 694 } 695 696 static uint_t 697 i40e_intr_notx(i40e_t *i40e, boolean_t shared) 698 { 699 i40e_hw_t *hw = &i40e->i40e_hw_space; 700 uint32_t reg; 701 int ret = DDI_INTR_CLAIMED; 702 703 if (shared == B_TRUE) { 704 mutex_enter(&i40e->i40e_general_lock); 705 if (i40e->i40e_state & I40E_SUSPENDED) { 706 mutex_exit(&i40e->i40e_general_lock); 707 return (DDI_INTR_UNCLAIMED); 708 } 709 mutex_exit(&i40e->i40e_general_lock); 710 } 711 712 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0); 713 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) != 714 DDI_FM_OK) { 715 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 716 atomic_or_32(&i40e->i40e_state, I40E_ERROR); 717 return (DDI_INTR_CLAIMED); 718 } 719 720 if (reg == 0) { 721 if (shared == B_TRUE) 722 ret = DDI_INTR_UNCLAIMED; 723 goto done; 724 } 725 726 if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) 727 i40e_intr_adminq_work(i40e); 728 729 if (reg & I40E_INTR_NOTX_RX_MASK) 730 i40e_intr_rx_work(i40e, 0); 731 732 if (reg & I40E_INTR_NOTX_TX_MASK) 733 i40e_intr_tx_work(i40e, 0); 734 735 done: 736 i40e_intr_adminq_enable(i40e); 737 return (ret); 738 739 } 740 741 /* ARGSUSED */ 742 uint_t 743 i40e_intr_msi(void *arg1, void *arg2) 744 { 745 i40e_t *i40e = (i40e_t *)arg1; 746 747 return (i40e_intr_notx(i40e, B_FALSE)); 748 } 749 750 /* ARGSUSED */ 751 uint_t 752 i40e_intr_legacy(void *arg1, void *arg2) 753 { 754 i40e_t *i40e = (i40e_t *)arg1; 755 756 return (i40e_intr_notx(i40e, B_TRUE)); 757 } 758