1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2019 Joyent, Inc.
14 * Copyright 2017 Tegile Systems, Inc. All rights reserved.
15 * Copyright 2020 RackTop Systems, Inc.
16 */
17
18 /*
19 * -------------------------
20 * Interrupt Handling Theory
21 * -------------------------
22 *
23 * There are a couple different sets of interrupts that we need to worry about:
24 *
25 * - Interrupts from receive queues
26 * - Interrupts from transmit queues
27 * - 'Other Interrupts', such as the administrative queue
28 *
29 * 'Other Interrupts' are asynchronous events such as a link status change event
30 * being posted to the administrative queue, unrecoverable ECC errors, and more.
31 * If we have something being posted to the administrative queue, then we go
32 * through and process it, because it's generally enabled as a separate logical
33 * interrupt. Note, we may need to do more here eventually. To re-enable the
34 * interrupts from the 'Other Interrupts' section, we need to clear the PBA and
35 * write ENA to PFINT_ICR0.
36 *
37 * Interrupts from the transmit and receive queues indicates that our requests
38 * have been processed. In the rx case, it means that we have data that we
39 * should take a look at and send up the stack. In the tx case, it means that
40 * data which we got from MAC has now been sent out on the wire and we can free
41 * the associated data. Most of the logic for acting upon the presence of this
42 * data can be found in i40e_transciever.c which handles all of the DMA, rx, and
43 * tx operations. This file is dedicated to handling and dealing with interrupt
44 * processing.
45 *
46 * All devices supported by this driver support three kinds of interrupts:
47 *
48 * o Extended Message Signaled Interrupts (MSI-X)
49 * o Message Signaled Interrupts (MSI)
50 * o Legacy PCI interrupts (INTx)
51 *
52 * Generally speaking the hardware logically handles MSI and INTx the same and
53 * restricts us to only using a single interrupt, which isn't the interesting
54 * case. With MSI-X available, each physical function of the device provides the
55 * opportunity for multiple interrupts which is what we'll focus on.
56 *
57 * --------------------
58 * Interrupt Management
59 * --------------------
60 *
61 * By default, the admin queue, which consists of the asynchronous other
62 * interrupts is always bound to MSI-X vector zero. Next, we spread out all of
63 * the other interrupts that we have available to us over the remaining
64 * interrupt vectors.
65 *
66 * This means that there may be multiple queues, both tx and rx, which are
67 * mapped to the same interrupt. When the interrupt fires, we'll have to check
68 * all of them for servicing, before we go through and indicate that the
69 * interrupt is claimed.
70 *
71 * The hardware provides the means of mapping various queues to MSI-X interrupts
72 * by programming the I40E_QINT_RQCTL() and I4OE_QINT_TQCTL() registers. These
73 * registers can also be used to enable and disable whether or not the queue is
74 * a source of interrupts. As part of this, the hardware requires that we
75 * maintain a linked list of queues for each interrupt vector. While it may seem
76 * like this is only there for the purproses of ITRs, that's not the case. The
77 * first queue must be programmed in I40E_QINT_LNKLSTN(%vector) register. Each
78 * queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL
79 * register.
80 *
81 * Finally, the individual interrupt vector itself has the ability to be enabled
82 * and disabled. The overall interrupt is controlled through the
83 * I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt
84 * as a whole.
85 *
86 * Note that this means that both the individual queue and the interrupt as a
87 * whole can be toggled and re-enabled.
88 *
89 * -------------------
90 * Non-MSIX Management
91 * -------------------
92 *
93 * We may have a case where the Operating System is unable to actually allocate
94 * any MSI-X to the system. In such a world, there is only one transmit/receive
95 * queue pair and it is bound to the same interrupt with index zero. The
96 * hardware doesn't allow us access to additional interrupt vectors in these
97 * modes. Note that technically we could support more transmit/receive queues if
98 * we wanted.
99 *
100 * In this world, because the interrupts for the admin queue and traffic are
101 * mixed together, we have to consult ICR0 to determine what has occurred. The
102 * QINT_TQCTL and QINT_RQCTL registers have a field, 'MSI-X 0 index' which
103 * allows us to set a specific bit in ICR0. There are up to seven such bits;
104 * however, we only use the bit 0 and 1 for the rx and tx queue respectively.
105 * These are contained by the I40E_INTR_NOTX_{R|T}X_QUEUE and
106 * I40E_INTR_NOTX_{R|T}X_MASK registers respectively.
107 *
108 * Unfortunately, these corresponding queue bits have no corresponding entry in
109 * the ICR0_ENA register. So instead, when enabling interrupts on the queues, we
110 * end up enabling it on the queue registers rather than on the MSI-X registers.
111 * In the MSI-X world, because they can be enabled and disabled, this is
112 * different and the queues can always be enabled and disabled, but the
113 * interrupts themselves are toggled (ignoring the question of interrupt
114 * blanking for polling on rings).
115 *
116 * Finally, we still have to set up the interrupt linked list, but the list is
117 * instead rooted at the register I40E_PFINT_LNKLST0, rather than being tied to
118 * one of the other MSI-X registers.
119 *
120 * --------------------
121 * Interrupt Moderation
122 * --------------------
123 *
124 * The XL710 hardware has three different interrupt moderation registers per
125 * interrupt. Unsurprisingly, we use these for:
126 *
127 * o RX interrupts
128 * o TX interrupts
129 * o 'Other interrupts' (link status change, admin queue, etc.)
130 *
131 * By default, we throttle 'other interrupts' the most, then TX interrupts, and
132 * then RX interrupts. The default values for these were based on trying to
133 * reason about both the importance and frequency of events. Generally speaking
134 * 'other interrupts' are not very frequent and they're not important for the
135 * I/O data path in and of itself (though they may indicate issues with the I/O
136 * data path).
137 *
138 * On the flip side, when we're not polling, RX interrupts are very important.
139 * The longer we wait for them, the more latency that we inject into the system.
140 * However, if we allow interrupts to occur too frequently, we risk a few
141 * problems:
142 *
143 * 1) Abusing system resources. Without proper interrupt blanking and polling,
144 * we can see upwards of 200k-300k interrupts per second on the system.
145 *
146 * 2) Not enough data coalescing to enable polling. In other words, the more
147 * data that we allow to build up, the more likely we'll be able to enable
148 * polling mode and allowing us to better handle bulk data.
149 *
150 * In-between the 'other interrupts' and the TX interrupts we have the
151 * reclamation of TX buffers. This operation is not quite as important as we
152 * generally size the ring large enough that we should be able to reclaim a
153 * substantial amount of the descriptors that we have used per interrupt. So
154 * while it's important that this interrupt occur, we don't necessarily need it
155 * firing as frequently as RX; it doesn't, on its own, induce additional latency
156 * into the system.
157 *
158 * Based on all this we currently assign static ITR values for the system. While
159 * we could move to a dynamic system (the hardware supports that), we'd want to
160 * make sure that we're seeing problems from this that we believe would be
161 * generally helped by the added complexity.
162 *
163 * Based on this, the default values that we have allow for the following
164 * interrupt thresholds:
165 *
166 * o 20k interrupts/s for RX
167 * o 5k interrupts/s for TX
168 * o 2k interupts/s for 'Other Interrupts'
169 */
170
171 #include "i40e_sw.h"
172
173 #define I40E_INTR_NOTX_QUEUE 0
174 #define I40E_INTR_NOTX_INTR 0
175 #define I40E_INTR_NOTX_RX_QUEUE 0
176 #define I40E_INTR_NOTX_RX_MASK (1 << I40E_PFINT_ICR0_QUEUE_0_SHIFT)
177 #define I40E_INTR_NOTX_TX_QUEUE 1
178 #define I40E_INTR_NOTX_TX_MASK (1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT)
179
180 void
i40e_intr_set_itr(i40e_t * i40e,i40e_itr_index_t itr,uint_t val)181 i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val)
182 {
183 int i;
184 i40e_hw_t *hw = &i40e->i40e_hw_space;
185
186 VERIFY3U(val, <=, I40E_MAX_ITR);
187 VERIFY3U(itr, <, I40E_ITR_INDEX_NONE);
188
189 /*
190 * No matter the interrupt mode, the ITR for other interrupts is always
191 * on interrupt zero and the same is true if we're not using MSI-X.
192 */
193 if (itr == I40E_ITR_INDEX_OTHER ||
194 i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
195 I40E_WRITE_REG(hw, I40E_PFINT_ITR0(itr), val);
196 return;
197 }
198
199 for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
200 I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i), val);
201 }
202 }
203
204 /*
205 * Re-enable the adminq. Note that the adminq doesn't have a traditional queue
206 * associated with it from an interrupt perspective and just lives on ICR0.
207 * However when MSI-X interrupts are not being used, then this also enables and
208 * disables those interrupts.
209 */
210 static void
i40e_intr_adminq_enable(i40e_t * i40e)211 i40e_intr_adminq_enable(i40e_t *i40e)
212 {
213 i40e_hw_t *hw = &i40e->i40e_hw_space;
214 uint32_t reg;
215
216 reg = I40E_PFINT_DYN_CTL0_INTENA_MASK |
217 I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
218 (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
219 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
220 i40e_flush(hw);
221 }
222
223 static void
i40e_intr_adminq_disable(i40e_t * i40e)224 i40e_intr_adminq_disable(i40e_t *i40e)
225 {
226 i40e_hw_t *hw = &i40e->i40e_hw_space;
227 uint32_t reg;
228
229 reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT;
230 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
231 }
232
233 /*
234 * The next two functions enable/disable the reception of interrupts
235 * on the given vector. Only vectors 1..N are programmed by these
236 * functions; vector 0 is special and handled by a different register.
237 * We must subtract one from the vector because i40e implicitly adds
238 * one to the vector value. See section 10.2.2.10.13 for more details.
239 */
240 static void
i40e_intr_io_enable(i40e_t * i40e,int vector)241 i40e_intr_io_enable(i40e_t *i40e, int vector)
242 {
243 uint32_t reg;
244 i40e_hw_t *hw = &i40e->i40e_hw_space;
245
246 ASSERT3S(vector, >, 0);
247 reg = I40E_PFINT_DYN_CTLN_INTENA_MASK |
248 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
249 (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
250 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
251 }
252
253 static void
i40e_intr_io_disable(i40e_t * i40e,int vector)254 i40e_intr_io_disable(i40e_t *i40e, int vector)
255 {
256 uint32_t reg;
257 i40e_hw_t *hw = &i40e->i40e_hw_space;
258
259 ASSERT3S(vector, >, 0);
260 reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
261 I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
262 }
263
264 /*
265 * When MSI-X interrupts are being used, then we can enable the actual
266 * interrupts themselves. However, when they are not, we instead have to turn
267 * towards the queue's CAUSE_ENA bit and enable that.
268 */
269 void
i40e_intr_io_enable_all(i40e_t * i40e)270 i40e_intr_io_enable_all(i40e_t *i40e)
271 {
272 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
273 int i;
274
275 for (i = 1; i < i40e->i40e_intr_count; i++) {
276 i40e_intr_io_enable(i40e, i);
277 }
278 } else {
279 uint32_t reg;
280 i40e_hw_t *hw = &i40e->i40e_hw_space;
281
282 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
283 reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
284 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
285
286 reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
287 reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
288 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
289 }
290 }
291
292 /*
293 * When MSI-X interrupts are being used, then we can disable the actual
294 * interrupts themselves. However, when they are not, we instead have to turn
295 * towards the queue's CAUSE_ENA bit and disable that.
296 */
297 void
i40e_intr_io_disable_all(i40e_t * i40e)298 i40e_intr_io_disable_all(i40e_t *i40e)
299 {
300 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
301 int i;
302
303 for (i = 1; i < i40e->i40e_intr_count; i++) {
304 i40e_intr_io_disable(i40e, i);
305 }
306 } else {
307 uint32_t reg;
308 i40e_hw_t *hw = &i40e->i40e_hw_space;
309
310 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
311 reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
312 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
313
314 reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
315 reg &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
316 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
317 }
318 }
319
320 /*
321 * As part of disabling the tx and rx queue's we're technically supposed to
322 * remove the linked list entries. The simplest way is to clear the LNKLSTN
323 * register by setting it to I40E_QUEUE_TYPE_EOL (0x7FF).
324 *
325 * Note all of the FM register access checks are performed by the caller.
326 */
327 void
i40e_intr_io_clear_cause(i40e_t * i40e)328 i40e_intr_io_clear_cause(i40e_t *i40e)
329 {
330 uint32_t i;
331 i40e_hw_t *hw = &i40e->i40e_hw_space;
332
333 if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
334 uint32_t reg;
335 reg = I40E_QUEUE_TYPE_EOL;
336 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
337 return;
338 }
339
340 for (i = 0; i < i40e->i40e_intr_count - 1; i++) {
341 uint32_t reg;
342
343 reg = I40E_QUEUE_TYPE_EOL;
344 I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i), reg);
345 }
346
347 i40e_flush(hw);
348 }
349
350 /*
351 * Finalize interrupt handling. Mostly this disables the admin queue.
352 */
353 void
i40e_intr_chip_fini(i40e_t * i40e)354 i40e_intr_chip_fini(i40e_t *i40e)
355 {
356 #ifdef DEBUG
357 int i;
358 uint32_t reg;
359
360 i40e_hw_t *hw = &i40e->i40e_hw_space;
361
362 /*
363 * Take a look and verify that all other interrupts have been disabled
364 * and the interrupt linked lists have been zeroed.
365 */
366 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
367 for (i = 0; i < i40e->i40e_intr_count - 1; i++) {
368 reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i));
369 VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
370
371 reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i));
372 VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
373 }
374 }
375 #endif
376
377 i40e_intr_adminq_disable(i40e);
378 }
379
380 /*
381 * Set the head of the interrupt linked list. The PFINT_LNKLSTN[N]
382 * register actually refers to the 'N + 1' interrupt vector. E.g.,
383 * PFINT_LNKLSTN[0] refers to interrupt vector 1.
384 */
385 static void
i40e_set_lnklstn(i40e_t * i40e,uint_t vector,uint_t queue)386 i40e_set_lnklstn(i40e_t *i40e, uint_t vector, uint_t queue)
387 {
388 uint32_t reg;
389 i40e_hw_t *hw = &i40e->i40e_hw_space;
390
391 reg = (queue << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
392 (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
393
394 I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(vector), reg);
395 DEBUGOUT2("PFINT_LNKLSTN[%u] = 0x%x", vector, reg);
396 }
397
398 /*
399 * Set the QINT_RQCTL[queue] register. The next queue is always the Tx
400 * queue associated with this Rx queue. Unlike PFINT_LNKLSTN, the
401 * vector should be the actual vector this queue is on -- i.e., it
402 * should be equal to itrq_rx_intrvec.
403 */
404 static void
i40e_set_rqctl(i40e_t * i40e,uint_t vector,uint_t queue)405 i40e_set_rqctl(i40e_t *i40e, uint_t vector, uint_t queue)
406 {
407 uint32_t reg;
408 i40e_hw_t *hw = &i40e->i40e_hw_space;
409
410 ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_rx_intrvec);
411
412 reg = (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
413 (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
414 (queue << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
415 (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
416 I40E_QINT_RQCTL_CAUSE_ENA_MASK;
417
418 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
419 DEBUGOUT2("QINT_RQCTL[%u] = 0x%x", queue, reg);
420 }
421
422 /*
423 * Like i40e_set_rqctl(), but for QINT_TQCTL[queue]. The next queue is
424 * either the Rx queue of another TRQP, or EOL.
425 */
426 static void
i40e_set_tqctl(i40e_t * i40e,uint_t vector,uint_t queue,uint_t next_queue)427 i40e_set_tqctl(i40e_t *i40e, uint_t vector, uint_t queue, uint_t next_queue)
428 {
429 uint32_t reg;
430 i40e_hw_t *hw = &i40e->i40e_hw_space;
431
432 ASSERT3U(vector, ==, i40e->i40e_trqpairs[queue].itrq_tx_intrvec);
433
434 reg = (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
435 (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
436 (next_queue << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
437 (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT) |
438 I40E_QINT_TQCTL_CAUSE_ENA_MASK;
439
440 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(queue), reg);
441 DEBUGOUT2("QINT_TQCTL[%u] = 0x%x", queue, reg);
442 }
443
444 /*
445 * Program the interrupt linked list. Each vector has a linked list of
446 * queues which act as event sources for that vector. When one of
447 * those sources has an event the associated interrupt vector is
448 * fired. This mapping must match the mapping found in
449 * i40e_map_intrs_to_vectors().
450 *
451 * See section 7.5.3 for more information about the configuration of
452 * the interrupt linked list.
453 */
454 static void
i40e_intr_init_queue_msix(i40e_t * i40e)455 i40e_intr_init_queue_msix(i40e_t *i40e)
456 {
457 uint_t intr_count;
458
459 /*
460 * The 0th vector is for 'Other Interrupts' only (subject to
461 * change in the future).
462 */
463 intr_count = i40e->i40e_intr_count - 1;
464
465 for (uint_t vec = 0; vec < intr_count; vec++) {
466 boolean_t head = B_TRUE;
467
468 for (uint_t qidx = vec; qidx < i40e->i40e_num_trqpairs;
469 qidx += intr_count) {
470 uint_t next_qidx = qidx + intr_count;
471
472 next_qidx = (next_qidx > i40e->i40e_num_trqpairs) ?
473 I40E_QUEUE_TYPE_EOL : next_qidx;
474
475 if (head) {
476 i40e_set_lnklstn(i40e, vec, qidx);
477 head = B_FALSE;
478 }
479
480 i40e_set_rqctl(i40e, vec + 1, qidx);
481 i40e_set_tqctl(i40e, vec + 1, qidx, next_qidx);
482 }
483 }
484 }
485
486 /*
487 * Set up a single queue to share the admin queue interrupt in the non-MSI-X
488 * world. Note we do not enable the queue as an interrupt cause at this time. We
489 * don't have any other vector of control here, unlike with the MSI-X interrupt
490 * case.
491 */
492 static void
i40e_intr_init_queue_shared(i40e_t * i40e)493 i40e_intr_init_queue_shared(i40e_t *i40e)
494 {
495 i40e_hw_t *hw = &i40e->i40e_hw_space;
496 uint32_t reg;
497
498 VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED ||
499 i40e->i40e_intr_type == DDI_INTR_TYPE_MSI);
500
501 reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) |
502 (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
503 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
504
505 reg = (I40E_INTR_NOTX_INTR << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
506 (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
507 (I40E_INTR_NOTX_RX_QUEUE << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT) |
508 (I40E_INTR_NOTX_QUEUE << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
509 (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT);
510
511 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
512
513 reg = (I40E_INTR_NOTX_INTR << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
514 (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
515 (I40E_INTR_NOTX_TX_QUEUE << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT) |
516 (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
517 (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
518
519 I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
520 }
521
522 /*
523 * Enable the specified queue as a valid source of interrupts. Note, this should
524 * only be used as part of the GLDv3's interrupt blanking routines. The debug
525 * build assertions are specific to that.
526 */
527 void
i40e_intr_rx_queue_enable(i40e_trqpair_t * itrq)528 i40e_intr_rx_queue_enable(i40e_trqpair_t *itrq)
529 {
530 uint32_t reg;
531 uint_t queue = itrq->itrq_index;
532 i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space;
533
534 ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock));
535 ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs);
536
537 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
538 ASSERT0(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK);
539 reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
540 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
541 }
542
543 /*
544 * Disable the specified queue as a valid source of interrupts. Note, this
545 * should only be used as part of the GLDv3's interrupt blanking routines. The
546 * debug build assertions are specific to that.
547 */
548 void
i40e_intr_rx_queue_disable(i40e_trqpair_t * itrq)549 i40e_intr_rx_queue_disable(i40e_trqpair_t *itrq)
550 {
551 uint32_t reg;
552 uint_t queue = itrq->itrq_index;
553 i40e_hw_t *hw = &itrq->itrq_i40e->i40e_hw_space;
554
555 ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock));
556 ASSERT(queue < itrq->itrq_i40e->i40e_num_trqpairs);
557
558 reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
559 ASSERT3U(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK, ==,
560 I40E_QINT_RQCTL_CAUSE_ENA_MASK);
561 reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
562 I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
563 }
564
565 /*
566 * Start up the various chip's interrupt handling. We not only configure the
567 * adminq here, but we also go through and configure all of the actual queues,
568 * the interrupt linked lists, and others.
569 */
570 void
i40e_intr_chip_init(i40e_t * i40e)571 i40e_intr_chip_init(i40e_t *i40e)
572 {
573 i40e_hw_t *hw = &i40e->i40e_hw_space;
574 uint32_t reg;
575
576 /*
577 * Ensure that all non adminq interrupts are disabled at the chip level.
578 */
579 i40e_intr_io_disable_all(i40e);
580
581 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, 0);
582 (void) I40E_READ_REG(hw, I40E_PFINT_ICR0);
583
584 /*
585 * Always enable all of the other-class interrupts to be on their own
586 * ITR. This only needs to be set on interrupt zero, which has its own
587 * special setting.
588 */
589 reg = I40E_ITR_INDEX_OTHER << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT;
590 I40E_WRITE_REG(hw, I40E_PFINT_STAT_CTL0, reg);
591
592 /*
593 * Enable interrupt types we expect to receive. At the moment, this
594 * is limited to the adminq; however, we'll want to review 11.2.2.9.22
595 * for more types here as we add support for detecting them, handling
596 * them, and resetting the device as appropriate.
597 */
598 reg = I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
599 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
600
601 /*
602 * Always set the interrupt linked list to empty. We'll come back and
603 * change this if MSI-X are actually on the scene.
604 */
605 I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_TYPE_EOL);
606
607 i40e_intr_adminq_enable(i40e);
608
609 /*
610 * Set up all of the queues and map them to interrupts based on the bit
611 * assignments.
612 */
613 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
614 i40e_intr_init_queue_msix(i40e);
615 } else {
616 i40e_intr_init_queue_shared(i40e);
617 }
618
619 /*
620 * Finally set all of the default ITRs for the interrupts. Note that the
621 * queues will have been set up above.
622 */
623 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr);
624 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr);
625 i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, i40e->i40e_other_itr);
626 }
627
628 static void
i40e_intr_adminq_work(i40e_t * i40e)629 i40e_intr_adminq_work(i40e_t *i40e)
630 {
631 struct i40e_hw *hw = &i40e->i40e_hw_space;
632 struct i40e_arq_event_info evt;
633 uint16_t remain = 1;
634
635 bzero(&evt, sizeof (struct i40e_arq_event_info));
636 evt.buf_len = I40E_ADMINQ_BUFSZ;
637 evt.msg_buf = i40e->i40e_aqbuf;
638
639 while (remain != 0) {
640 enum i40e_status_code ret;
641 uint16_t opcode;
642
643 /*
644 * At the moment, the only error code that seems to be returned
645 * is one saying that there's no work. In such a case we leave
646 * this be.
647 */
648 ret = i40e_clean_arq_element(hw, &evt, &remain);
649 if (ret != I40E_SUCCESS)
650 break;
651
652 opcode = LE_16(evt.desc.opcode);
653 switch (opcode) {
654 case i40e_aqc_opc_get_link_status:
655 mutex_enter(&i40e->i40e_general_lock);
656 i40e_link_check(i40e);
657 mutex_exit(&i40e->i40e_general_lock);
658 break;
659 default:
660 /*
661 * Longer term we'll want to enable other causes here
662 * and get these cleaned up and doing something.
663 */
664 break;
665 }
666 }
667 }
668
669 static void
i40e_intr_rx_work(i40e_t * i40e,i40e_trqpair_t * itrq)670 i40e_intr_rx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
671 {
672 mblk_t *mp = NULL;
673
674 mutex_enter(&itrq->itrq_rx_lock);
675 if (!itrq->itrq_intr_poll)
676 mp = i40e_ring_rx(itrq, I40E_POLL_NULL);
677 mutex_exit(&itrq->itrq_rx_lock);
678
679 if (mp == NULL)
680 return;
681
682 mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp,
683 itrq->itrq_rxgen);
684 }
685
686 /* ARGSUSED */
687 static void
i40e_intr_tx_work(i40e_t * i40e,i40e_trqpair_t * itrq)688 i40e_intr_tx_work(i40e_t *i40e, i40e_trqpair_t *itrq)
689 {
690 i40e_tx_recycle_ring(itrq);
691 }
692
693 /*
694 * At the moment, the only 'other' interrupt on ICR0 that we handle is the
695 * adminq. We should go through and support the other notifications at some
696 * point.
697 */
698 static void
i40e_intr_other_work(i40e_t * i40e)699 i40e_intr_other_work(i40e_t *i40e)
700 {
701 struct i40e_hw *hw = &i40e->i40e_hw_space;
702 uint32_t reg;
703
704 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
705 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
706 DDI_FM_OK) {
707 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
708 atomic_or_32(&i40e->i40e_state, I40E_ERROR);
709 return;
710 }
711
712 if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
713 i40e_intr_adminq_work(i40e);
714
715 /*
716 * Make sure that the adminq interrupt is not masked and then explicitly
717 * enable the adminq and thus the other interrupt.
718 */
719 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA);
720 reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
721 I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
722
723 i40e_intr_adminq_enable(i40e);
724 }
725
726 /*
727 * The prolog/epilog pair of functions ensure the integrity of the trqpair
728 * across ring stop/start operations.
729 *
730 * A ring stop operation will wait whilst an interrupt is processing a
731 * trqpair, and when a ring is stopped the interrupt handler will skip
732 * the trqpair.
733 */
734 static boolean_t
i40e_intr_trqpair_prolog(i40e_trqpair_t * itrq)735 i40e_intr_trqpair_prolog(i40e_trqpair_t *itrq)
736 {
737 boolean_t enabled;
738
739 mutex_enter(&itrq->itrq_intr_lock);
740 enabled = !itrq->itrq_intr_quiesce;
741 if (enabled)
742 itrq->itrq_intr_busy = B_TRUE;
743 mutex_exit(&itrq->itrq_intr_lock);
744
745 return (enabled);
746 }
747
748 static void
i40e_intr_trqpair_epilog(i40e_trqpair_t * itrq)749 i40e_intr_trqpair_epilog(i40e_trqpair_t *itrq)
750 {
751 mutex_enter(&itrq->itrq_intr_lock);
752 itrq->itrq_intr_busy = B_FALSE;
753 if (itrq->itrq_intr_quiesce)
754 cv_signal(&itrq->itrq_intr_cv);
755 mutex_exit(&itrq->itrq_intr_lock);
756 }
757
758 /*
759 * Tell any active interrupt vectors the ring is quiescing, then
760 * wait until any active interrupt thread has finished with this
761 * trqpair.
762 */
763 void
i40e_intr_quiesce(i40e_trqpair_t * itrq)764 i40e_intr_quiesce(i40e_trqpair_t *itrq)
765 {
766 mutex_enter(&itrq->itrq_intr_lock);
767 itrq->itrq_intr_quiesce = B_TRUE;
768 while (itrq->itrq_intr_busy)
769 cv_wait(&itrq->itrq_intr_cv, &itrq->itrq_intr_lock);
770 mutex_exit(&itrq->itrq_intr_lock);
771 }
772
773 /*
774 * Handle an MSI-X interrupt. See section 7.5.1.3 for an overview of
775 * the MSI-X interrupt sequence.
776 */
777 uint_t
i40e_intr_msix(void * arg1,void * arg2)778 i40e_intr_msix(void *arg1, void *arg2)
779 {
780 i40e_t *i40e = (i40e_t *)arg1;
781 uint_t vector_idx = (uint_t)(uintptr_t)arg2;
782
783 ASSERT3U(vector_idx, <, i40e->i40e_intr_count);
784
785 /*
786 * When using MSI-X interrupts, vector 0 is always reserved for the
787 * adminq at this time. Though longer term, we'll want to also bridge
788 * some I/O to them.
789 */
790 if (vector_idx == 0) {
791 i40e_intr_other_work(i40e);
792 return (DDI_INTR_CLAIMED);
793 }
794
795 ASSERT3U(vector_idx, >, 0);
796
797 /*
798 * We determine the queue indexes via simple arithmetic (as
799 * opposed to keeping explicit state like a bitmap). While
800 * conveinent, it does mean that i40e_map_intrs_to_vectors(),
801 * i40e_intr_init_queue_msix(), and this function must be
802 * modified as a unit.
803 *
804 * We subtract 1 from the vector to offset the addition we
805 * performed during i40e_map_intrs_to_vectors().
806 */
807 for (uint_t i = vector_idx - 1; i < i40e->i40e_num_trqpairs;
808 i += (i40e->i40e_intr_count - 1)) {
809 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
810
811 ASSERT3U(i, <, i40e->i40e_num_trqpairs);
812 ASSERT3P(itrq, !=, NULL);
813 if (!i40e_intr_trqpair_prolog(itrq))
814 continue;
815
816 i40e_intr_rx_work(i40e, itrq);
817 i40e_intr_tx_work(i40e, itrq);
818
819 i40e_intr_trqpair_epilog(itrq);
820 }
821
822 i40e_intr_io_enable(i40e, vector_idx);
823 return (DDI_INTR_CLAIMED);
824 }
825
826 static uint_t
i40e_intr_notx(i40e_t * i40e,boolean_t shared)827 i40e_intr_notx(i40e_t *i40e, boolean_t shared)
828 {
829 i40e_hw_t *hw = &i40e->i40e_hw_space;
830 uint32_t reg;
831 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[0];
832 int ret = DDI_INTR_CLAIMED;
833
834 if (shared == B_TRUE) {
835 mutex_enter(&i40e->i40e_general_lock);
836 if (i40e->i40e_state & I40E_SUSPENDED) {
837 mutex_exit(&i40e->i40e_general_lock);
838 return (DDI_INTR_UNCLAIMED);
839 }
840 mutex_exit(&i40e->i40e_general_lock);
841 }
842
843 reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
844 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
845 DDI_FM_OK) {
846 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
847 atomic_or_32(&i40e->i40e_state, I40E_ERROR);
848 return (DDI_INTR_CLAIMED);
849 }
850
851 if (reg == 0) {
852 if (shared == B_TRUE)
853 ret = DDI_INTR_UNCLAIMED;
854 goto done;
855 }
856
857 if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
858 i40e_intr_adminq_work(i40e);
859
860 if (i40e_intr_trqpair_prolog(itrq)) {
861 if (reg & I40E_INTR_NOTX_RX_MASK)
862 i40e_intr_rx_work(i40e, itrq);
863
864 if (reg & I40E_INTR_NOTX_TX_MASK)
865 i40e_intr_tx_work(i40e, itrq);
866
867 i40e_intr_trqpair_epilog(itrq);
868 }
869
870 done:
871 i40e_intr_adminq_enable(i40e);
872 return (ret);
873
874 }
875
876 /* ARGSUSED */
877 uint_t
i40e_intr_msi(void * arg1,void * arg2)878 i40e_intr_msi(void *arg1, void *arg2)
879 {
880 i40e_t *i40e = (i40e_t *)arg1;
881
882 return (i40e_intr_notx(i40e, B_FALSE));
883 }
884
885 /* ARGSUSED */
886 uint_t
i40e_intr_legacy(void * arg1,void * arg2)887 i40e_intr_legacy(void *arg1, void *arg2)
888 {
889 i40e_t *i40e = (i40e_t *)arg1;
890
891 return (i40e_intr_notx(i40e, B_TRUE));
892 }
893