xref: /titanic_41/usr/src/uts/common/io/i40e/i40e_intr.c (revision fdc35dd8859c711d510ef5e9397204a1d98e23c5)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2016 Joyent, Inc.
14  */
15 
16 /*
17  * -------------------------
18  * Interrupt Handling Theory
19  * -------------------------
20  *
21  * There are a couple different sets of interrupts that we need to worry about:
22  *
23  *   - Interrupts from receive queues
24  *   - Interrupts from transmit queues
25  *   - 'Other Interrupts', such as the administrative queue
26  *
27  * 'Other Interrupts' are asynchronous events such as a link status change event
28  * being posted to the administrative queue, unrecoverable ECC errors, and more.
29  * If we have something being posted to the administrative queue, then we go
30  * through and process it, because it's generally enabled as a separate logical
31  * interrupt. Note, we may need to do more here eventually. To re-enable the
32  * interrupts from the 'Other Interrupts' section, we need to clear the PBA and
33  * write ENA to PFINT_ICR0.
34  *
35  * Interrupts from the transmit and receive queues indicates that our requests
36  * have been processed. In the rx case, it means that we have data that we
37  * should take a look at and send up the stack. In the tx case, it means that
38  * data which we got from MAC has now been sent out on the wire and we can free
39  * the associated data. Most of the logic for acting upon the presence of this
40  * data can be found in i40e_transciever.c which handles all of the DMA, rx, and
41  * tx operations. This file is dedicated to handling and dealing with interrupt
42  * processing.
43  *
44  * All devices supported by this driver support three kinds of interrupts:
45  *
46  *   o Extended Message Signaled Interrupts (MSI-X)
47  *   o Message Signaled Interrupts (MSI)
48  *   o Legacy PCI interrupts (INTx)
49  *
50  * Generally speaking the hardware logically handles MSI and INTx the same and
51  * restricts us to only using a single interrupt, which isn't the interesting
52  * case. With MSI-X available, each physical function of the device provides the
53  * opportunity for multiple interrupts which is what we'll focus on.
54  *
55  * --------------------
56  * Interrupt Management
57  * --------------------
58  *
59  * By default, the admin queue, which consists of the asynchronous other
60  * interrupts is always bound to MSI-X vector zero. Next, we spread out all of
61  * the other interrupts that we have available to us over the remaining
62  * interrupt vectors.
63  *
64  * This means that there may be multiple queues, both tx and rx, which are
65  * mapped to the same interrupt. When the interrupt fires, we'll have to check
66  * all of them for servicing, before we go through and indicate that the
67  * interrupt is claimed.
68  *
69  * The hardware provides the means of mapping various queues to MSI-X interrupts
70  * by programming the I40E_QINT_RQCTL() and I4OE_QINT_TQCTL() registers. These
71  * registers can also be used to enable and disable whether or not the queue is
72  * a source of interrupts. As part of this, the hardware requires that we
73  * maintain a linked list of queues for each interrupt vector. While it may seem
74  * like this is only there for the purproses of ITRs, that's not the case. The
75  * first queue must be programmed in I40E_QINT_LNKLSTN(%vector) register. Each
76  * queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL
77  * register.
78  *
79  * Because we only have a single queue enabled at the moment and we always have
80  * two interrupts, we do something pretty simple and just know that there's one
81  * data queue in the interrupt handler. Longer term, we'll need to think harder
82  * about this, but for the moment it'll have to suffice.
83  *
84  * Finally, the individual interrupt vector itself has the ability to be enabled
85  * and disabled. The overall interrupt is controlled through the
86  * I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt
87  * as a whole.
88  *
89  * Note that this means that both the individual queue and the interrupt as a
90  * whole can be toggled and re-enabled.
91  *
92  * -------------------
93  * Non-MSIX Management
94  * -------------------
95  *
96  * We may have a case where the Operating System is unable to actually allocate
97  * any MSI-X to the system. In such a world, there is only one transmit/receive
98  * queue pair and it is bound to the same interrupt with index zero. The
99  * hardware doesn't allow us access to additional interrupt vectors in these
100  * modes. Note that technically we could support more transmit/receive queues if
101  * we wanted.
102  *
103  * In this world, because the interrupts for the admin queue and traffic are
104  * mixed together, we have to consult ICR0 to determine what has occurred. The
105  * QINT_TQCTL and QINT_RQCTL registers have a field, 'MSI-X 0 index' which
106  * allows us to set a specific bit in ICR0. There are up to seven such bits;
107  * however, we only use the bit 0 and 1 for the rx and tx queue respectively.
108  * These are contained by the I40E_INTR_NOTX_{R|T}X_QUEUE and
109  * I40E_INTR_NOTX_{R|T}X_MASK registers respectively.
110  *
111  * Unfortunately, these corresponding queue bits have no corresponding entry in
112  * the ICR0_ENA register. So instead, when enabling interrupts on the queues, we
113  * end up enabling it on the queue registers rather than on the MSI-X registers.
114  * In the MSI-X world, because they can be enabled and disabled, this is
115  * different and the queues can always be enabled and disabled, but the
116  * interrupts themselves are toggled (ignoring the question of interrupt
117  * blanking for polling on rings).
118  *
119  * Finally, we still have to set up the interrupt linked list, but the list is
120  * instead rooted at the register I40E_PFINT_LNKLST0, rather than being tied to
121  * one of the other MSI-X registers.
122  *
123  * --------------------
124  * Interrupt Moderation
125  * --------------------
126  *
127  * The XL710 hardware has three different interrupt moderation registers per
128  * interrupt. Unsurprisingly, we use these for:
129  *
130  *   o RX interrupts
131  *   o TX interrupts
132  *   o 'Other interrupts' (link status change, admin queue, etc.)
133  *
134  * By default, we throttle 'other interrupts' the most, then TX interrupts, and
135  * then RX interrupts. The default values for these were based on trying to
136  * reason about both the importance and frequency of events. Generally speaking
137  * 'other interrupts' are not very frequent and they're not important for the
138  * I/O data path in and of itself (though they may indicate issues with the I/O
139  * data path).
140  *
141  * On the flip side, when we're not polling, RX interrupts are very important.
142  * The longer we wait for them, the more latency that we inject into the system.
143  * However, if we allow interrupts to occur too frequently, we risk a few
144  * problems:
145  *
146  *  1) Abusing system resources. Without proper interrupt blanking and polling,
147  *     we can see upwards of 200k-300k interrupts per second on the system.
148  *
149  *  2) Not enough data coalescing to enable polling. In other words, the more
150  *     data that we allow to build up, the more likely we'll be able to enable
151  *     polling mode and allowing us to better handle bulk data.
152  *
153  * In-between the 'other interrupts' and the TX interrupts we have the
154  * reclamation of TX buffers. This operation is not quite as important as we
155  * generally size the ring large enough that we should be able to reclaim a
156  * substantial amount of the descriptors that we have used per interrupt. So
157  * while it's important that this interrupt occur, we don't necessarily need it
158  * firing as frequently as RX; it doesn't, on its own, induce additional latency
159  * into the system.
160  *
161  * Based on all this we currently assign static ITR values for the system. While
162  * we could move to a dynamic system (the hardware supports that), we'd want to
163  * make sure that we're seeing problems from this that we believe would be
164  * generally helped by the added complexity.
165  *
166  * Based on this, the default values that we have allow for the following
167  * interrupt thresholds:
168  *
169  *    o 20k interrupts/s for RX
170  *    o 5k interrupts/s for TX
171  *    o 2k interupts/s for 'Other Interrupts'
172  */
173 
174 #include "i40e_sw.h"
175 
176 #define	I40E_INTR_NOTX_QUEUE	0
177 #define	I40E_INTR_NOTX_INTR	0
178 #define	I40E_INTR_NOTX_RX_QUEUE	0
179 #define	I40E_INTR_NOTX_RX_MASK	(1 << I40E_PFINT_ICR0_QUEUE_0_SHIFT)
180 #define	I40E_INTR_NOTX_TX_QUEUE	1
181 #define	I40E_INTR_NOTX_TX_MASK	(1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT)
182 
183 void
184 i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val)
185 {
186 	int i;
187 	i40e_hw_t *hw = &i40e->i40e_hw_space;
188 
189 	VERIFY3U(val, <=, I40E_MAX_ITR);
190 	VERIFY3U(itr, <, I40E_ITR_INDEX_NONE);
191 
192 	/*
193 	 * No matter the interrupt mode, the ITR for other interrupts is always
194 	 * on interrupt zero and the same is true if we're not using MSI-X.
195 	 */
196 	if (itr == I40E_ITR_INDEX_OTHER ||
197 	    i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
198 		I40E_WRITE_REG(hw, I40E_PFINT_ITR0(itr), val);
199 		return;
200 	}
201 
202 	for (i = 1; i < i40e->i40e_intr_count; i++) {
203 		I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i - 1), val);
204 	}
205 }
206 
207 /*
208  * Re-enable the adminq. Note that the adminq doesn't have a traditional queue
209  * associated with it from an interrupt perspective and just lives on ICR0.
210  * However when MSI-X interrupts are not being used, then this also enables and
211  * disables those interrupts.
212  */
213 static void
214 i40e_intr_adminq_enable(i40e_t *i40e)
215 {
216 	i40e_hw_t *hw = &i40e->i40e_hw_space;
217 	uint32_t reg;
218 
219 	reg = I40E_PFINT_DYN_CTL0_INTENA_MASK |
220 	    I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
221 	    (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
222 	I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
223 	i40e_flush(hw);
224 }
225 
226 static void
227 i40e_intr_adminq_disable(i40e_t *i40e)
228 {
229 	i40e_hw_t *hw = &i40e->i40e_hw_space;
230 	uint32_t reg;
231 
232 	reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT;
233 	I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
234 }
235 
236 static void
237 i40e_intr_io_enable(i40e_t *i40e, int vector)
238 {
239 	uint32_t reg;
240 	i40e_hw_t *hw = &i40e->i40e_hw_space;
241 
242 	reg = I40E_PFINT_DYN_CTLN_INTENA_MASK |
243 	    I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
244 	    (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
245 	I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
246 }
247 
248 static void
249 i40e_intr_io_disable(i40e_t *i40e, int vector)
250 {
251 	uint32_t reg;
252 	i40e_hw_t *hw = &i40e->i40e_hw_space;
253 
254 	reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
255 	I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
256 }
257 
258 /*
259  * When MSI-X interrupts are being used, then we can enable the actual
260  * interrupts themselves. However, when they are not, we instead have to turn
261  * towards the queue's CAUSE_ENA bit and enable that.
262  */
263 void
264 i40e_intr_io_enable_all(i40e_t *i40e)
265 {
266 	if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
267 		int i;
268 
269 		for (i = 1; i < i40e->i40e_intr_count; i++) {
270 			i40e_intr_io_enable(i40e, i);
271 		}
272 	} else {
273 		uint32_t reg;
274 		i40e_hw_t *hw = &i40e->i40e_hw_space;
275 
276 		reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
277 		reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
278 		I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
279 
280 		reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
281 		reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
282 		I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
283 	}
284 }
285 
286 /*
287  * When MSI-X interrupts are being used, then we can disable the actual
288  * interrupts themselves. However, when they are not, we instead have to turn
289  * towards the queue's CAUSE_ENA bit and disable that.
290  */
291 void
292 i40e_intr_io_disable_all(i40e_t *i40e)
293 {
294 	if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
295 		int i;
296 
297 		for (i = 1; i < i40e->i40e_intr_count; i++) {
298 			i40e_intr_io_disable(i40e, i);
299 		}
300 	} else {
301 		uint32_t reg;
302 		i40e_hw_t *hw = &i40e->i40e_hw_space;
303 
304 		reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
305 		reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
306 		I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
307 
308 		reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
309 		reg &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
310 		I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
311 	}
312 }
313 
314 /*
315  * As part of disabling the tx and rx queue's we're technically supposed to
316  * remove the linked list entries. The simplest way is to clear the LNKLSTN
317  * register by setting it to I40E_QUEUE_TYPE_EOL (0x7FF).
318  *
319  * Note all of the FM register access checks are performed by the caller.
320  */
321 void
322 i40e_intr_io_clear_cause(i40e_t *i40e)
323 {
324 	int i;
325 	i40e_hw_t *hw = &i40e->i40e_hw_space;
326 
327 	if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
328 		uint32_t reg;
329 		reg = I40E_QUEUE_TYPE_EOL;
330 		I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
331 		return;
332 	}
333 
334 	for (i = 1; i < i40e->i40e_intr_count; i++) {
335 		uint32_t reg;
336 #ifdef DEBUG
337 		/*
338 		 * Verify that the interrupt in question is disabled. This is a
339 		 * prerequisite of modifying the data in question.
340 		 */
341 		reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i - 1));
342 		VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
343 #endif
344 		reg = I40E_QUEUE_TYPE_EOL;
345 		I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i - 1), reg);
346 	}
347 
348 	i40e_flush(hw);
349 }
350 
351 /*
352  * Finalize interrupt handling. Mostly this disables the admin queue.
353  */
354 void
355 i40e_intr_chip_fini(i40e_t *i40e)
356 {
357 #ifdef DEBUG
358 	int i;
359 	uint32_t reg;
360 
361 	i40e_hw_t *hw = &i40e->i40e_hw_space;
362 
363 	/*
364 	 * Take a look and verify that all other interrupts have been disabled
365 	 * and the interrupt linked lists have been zeroed.
366 	 */
367 	if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
368 		for (i = 1; i < i40e->i40e_intr_count; i++) {
369 			reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i - 1));
370 			VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
371 
372 			reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i - 1));
373 			VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
374 		}
375 	}
376 #endif
377 
378 	i40e_intr_adminq_disable(i40e);
379 }
380 
381 /*
382  * Enable all of the queues and set the corresponding LNKLSTN registers. Note
383  * that we always enable queues as interrupt sources, even though we don't
384  * enable the MSI-X interrupt vectors.
385  */
386 static void
387 i40e_intr_init_queue_msix(i40e_t *i40e)
388 {
389 	i40e_hw_t *hw = &i40e->i40e_hw_space;
390 	uint32_t reg;
391 
392 	/*
393 	 * Because we only have a single queue, just do something simple now.
394 	 * How this all works will need to really be properly redone based on
395 	 * the bit maps, etc. Note that we skip the ITR logic for the moment,
396 	 * just to make our lives as explicit and simple as possible.
397 	 */
398 	reg = (0 << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
399 	    (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
400 	I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(0), reg);
401 
402 	reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
403 	    (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
404 	    (0 << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
405 	    (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
406 	    I40E_QINT_RQCTL_CAUSE_ENA_MASK;
407 
408 	I40E_WRITE_REG(hw, I40E_QINT_RQCTL(0), reg);
409 
410 	reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
411 	    (I40E_ITR_INDEX_TX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
412 	    (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
413 	    (I40E_QUEUE_TYPE_RX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
414 	    I40E_QINT_TQCTL_CAUSE_ENA_MASK;
415 
416 	I40E_WRITE_REG(hw, I40E_QINT_TQCTL(0), reg);
417 
418 }
419 
420 /*
421  * Set up a single queue to share the admin queue interrupt in the non-MSI-X
422  * world. Note we do not enable the queue as an interrupt cause at this time. We
423  * don't have any other vector of control here, unlike with the MSI-X interrupt
424  * case.
425  */
426 static void
427 i40e_intr_init_queue_shared(i40e_t *i40e)
428 {
429 	i40e_hw_t *hw = &i40e->i40e_hw_space;
430 	uint32_t reg;
431 
432 	VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED ||
433 	    i40e->i40e_intr_type == DDI_INTR_TYPE_MSI);
434 
435 	reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) |
436 	    (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
437 	I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
438 
439 	reg = (I40E_INTR_NOTX_INTR << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
440 	    (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
441 	    (I40E_INTR_NOTX_RX_QUEUE << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT) |
442 	    (I40E_INTR_NOTX_QUEUE << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
443 	    (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT);
444 
445 	I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
446 
447 	reg = (I40E_INTR_NOTX_INTR << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
448 	    (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
449 	    (I40E_INTR_NOTX_TX_QUEUE << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT) |
450 	    (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
451 	    (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
452 
453 	I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
454 }
455 
456 /*
457  * Enable the specified queue as a valid source of interrupts. Note, this should
458  * only be used as part of the GLDv3's interrupt blanking routines. The debug
459  * build assertions are specific to that.
460  */
461 void
462 i40e_intr_rx_queue_enable(i40e_t *i40e, uint_t queue)
463 {
464 	uint32_t reg;
465 	i40e_hw_t *hw = &i40e->i40e_hw_space;
466 
467 	ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
468 	ASSERT(queue < i40e->i40e_num_trqpairs);
469 
470 	reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
471 	ASSERT0(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK);
472 	reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
473 	I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
474 }
475 
476 /*
477  * Disable the specified queue as a valid source of interrupts. Note, this
478  * should only be used as part of the GLDv3's interrupt blanking routines. The
479  * debug build assertions are specific to that.
480  */
481 void
482 i40e_intr_rx_queue_disable(i40e_t *i40e, uint_t queue)
483 {
484 	uint32_t reg;
485 	i40e_hw_t *hw = &i40e->i40e_hw_space;
486 
487 	ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
488 	ASSERT(queue < i40e->i40e_num_trqpairs);
489 
490 	reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
491 	ASSERT3U(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK, ==,
492 	    I40E_QINT_RQCTL_CAUSE_ENA_MASK);
493 	reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
494 	I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
495 }
496 
497 /*
498  * Start up the various chip's interrupt handling. We not only configure the
499  * adminq here, but we also go through and configure all of the actual queues,
500  * the interrupt linked lists, and others.
501  */
502 void
503 i40e_intr_chip_init(i40e_t *i40e)
504 {
505 	i40e_hw_t *hw = &i40e->i40e_hw_space;
506 	uint32_t reg;
507 
508 	/*
509 	 * Ensure that all non adminq interrupts are disabled at the chip level.
510 	 */
511 	i40e_intr_io_disable_all(i40e);
512 
513 	I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, 0);
514 	(void) I40E_READ_REG(hw, I40E_PFINT_ICR0);
515 
516 	/*
517 	 * Always enable all of the other-class interrupts to be on their own
518 	 * ITR. This only needs to be set on interrupt zero, which has its own
519 	 * special setting.
520 	 */
521 	reg = I40E_ITR_INDEX_OTHER << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT;
522 	I40E_WRITE_REG(hw, I40E_PFINT_STAT_CTL0, reg);
523 
524 	/*
525 	 * Enable interrupt types we expect to receive. At the moment, this
526 	 * is limited to the adminq; however, we'll want to review 11.2.2.9.22
527 	 * for more types here as we add support for detecting them, handling
528 	 * them, and resetting the device as appropriate.
529 	 */
530 	reg = I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
531 	I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
532 
533 	/*
534 	 * Always set the interrupt linked list to empty. We'll come back and
535 	 * change this if MSI-X are actually on the scene.
536 	 */
537 	I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_TYPE_EOL);
538 
539 	i40e_intr_adminq_enable(i40e);
540 
541 	/*
542 	 * Set up all of the queues and map them to interrupts based on the bit
543 	 * assignments.
544 	 */
545 	if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
546 		i40e_intr_init_queue_msix(i40e);
547 	} else {
548 		i40e_intr_init_queue_shared(i40e);
549 	}
550 
551 	/*
552 	 * Finally set all of the default ITRs for the interrupts. Note that the
553 	 * queues will have been set up above.
554 	 */
555 	i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr);
556 	i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr);
557 	i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, i40e->i40e_other_itr);
558 }
559 
560 static void
561 i40e_intr_adminq_work(i40e_t *i40e)
562 {
563 	struct i40e_hw *hw = &i40e->i40e_hw_space;
564 	struct i40e_arq_event_info evt;
565 	uint16_t remain = 1;
566 
567 	bzero(&evt, sizeof (struct i40e_arq_event_info));
568 	evt.buf_len = I40E_ADMINQ_BUFSZ;
569 	evt.msg_buf = i40e->i40e_aqbuf;
570 
571 	while (remain != 0) {
572 		enum i40e_status_code ret;
573 		uint16_t opcode;
574 
575 		/*
576 		 * At the moment, the only error code that seems to be returned
577 		 * is one saying that there's no work. In such a case we leave
578 		 * this be.
579 		 */
580 		ret = i40e_clean_arq_element(hw, &evt, &remain);
581 		if (ret != I40E_SUCCESS)
582 			break;
583 
584 		opcode = LE_16(evt.desc.opcode);
585 		switch (opcode) {
586 		case i40e_aqc_opc_get_link_status:
587 			mutex_enter(&i40e->i40e_general_lock);
588 			i40e_link_check(i40e);
589 			mutex_exit(&i40e->i40e_general_lock);
590 			break;
591 		default:
592 			/*
593 			 * Longer term we'll want to enable other causes here
594 			 * and get these cleaned up and doing something.
595 			 */
596 			break;
597 		}
598 	}
599 }
600 
601 static void
602 i40e_intr_rx_work(i40e_t *i40e, int queue)
603 {
604 	mblk_t *mp;
605 	i40e_trqpair_t *itrq;
606 
607 	ASSERT(queue < i40e->i40e_num_trqpairs);
608 	itrq = &i40e->i40e_trqpairs[queue];
609 
610 	mutex_enter(&itrq->itrq_rx_lock);
611 	mp = i40e_ring_rx(itrq, I40E_POLL_NULL);
612 	mutex_exit(&itrq->itrq_rx_lock);
613 
614 	if (mp != NULL) {
615 		mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp,
616 		    itrq->itrq_rxgen);
617 	}
618 }
619 
620 static void
621 i40e_intr_tx_work(i40e_t *i40e, int queue)
622 {
623 	i40e_trqpair_t *itrq;
624 
625 	itrq = &i40e->i40e_trqpairs[queue];
626 	i40e_tx_recycle_ring(itrq);
627 }
628 
629 /*
630  * At the moment, the only 'other' interrupt on ICR0 that we handle is the
631  * adminq. We should go through and support the other notifications at some
632  * point.
633  */
634 static void
635 i40e_intr_other_work(i40e_t *i40e)
636 {
637 	struct i40e_hw *hw = &i40e->i40e_hw_space;
638 	uint32_t reg;
639 
640 	reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
641 	if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
642 	    DDI_FM_OK) {
643 		ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
644 		atomic_or_32(&i40e->i40e_state, I40E_ERROR);
645 		return;
646 	}
647 
648 	if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
649 		i40e_intr_adminq_work(i40e);
650 
651 	/*
652 	 * Make sure that the adminq interrupt is not masked and then explicitly
653 	 * enable the adminq and thus the other interrupt.
654 	 */
655 	reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA);
656 	reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
657 	I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
658 
659 	i40e_intr_adminq_enable(i40e);
660 }
661 
662 uint_t
663 i40e_intr_msix(void *arg1, void *arg2)
664 {
665 	i40e_t *i40e = (i40e_t *)arg1;
666 	int vector_idx = (int)(uintptr_t)arg2;
667 
668 	/*
669 	 * When using MSI-X interrupts, vector 0 is always reserved for the
670 	 * adminq at this time. Though longer term, we'll want to also bridge
671 	 * some I/O to them.
672 	 */
673 	if (vector_idx == 0) {
674 		i40e_intr_other_work(i40e);
675 		return (DDI_INTR_CLAIMED);
676 	}
677 
678 	VERIFY(vector_idx == 1);
679 
680 	/*
681 	 * Note that we explicitly do not check this value under the lock even
682 	 * though assignments to it are done so. In this case, the cost of
683 	 * getting this wrong is at worst a bit of additional contention and
684 	 * even more rarely, a duplicated packet. However, the cost on the other
685 	 * hand is a lot more. This is something that as we more generally
686 	 * implement ring support we should revisit.
687 	 */
688 	if (i40e->i40e_intr_poll != B_TRUE)
689 		i40e_intr_rx_work(i40e, 0);
690 	i40e_intr_tx_work(i40e, 0);
691 	i40e_intr_io_enable(i40e, 1);
692 
693 	return (DDI_INTR_CLAIMED);
694 }
695 
696 static uint_t
697 i40e_intr_notx(i40e_t *i40e, boolean_t shared)
698 {
699 	i40e_hw_t *hw = &i40e->i40e_hw_space;
700 	uint32_t reg;
701 	int ret = DDI_INTR_CLAIMED;
702 
703 	if (shared == B_TRUE) {
704 		mutex_enter(&i40e->i40e_general_lock);
705 		if (i40e->i40e_state & I40E_SUSPENDED) {
706 			mutex_exit(&i40e->i40e_general_lock);
707 			return (DDI_INTR_UNCLAIMED);
708 		}
709 		mutex_exit(&i40e->i40e_general_lock);
710 	}
711 
712 	reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
713 	if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
714 	    DDI_FM_OK) {
715 		ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
716 		atomic_or_32(&i40e->i40e_state, I40E_ERROR);
717 		return (DDI_INTR_CLAIMED);
718 	}
719 
720 	if (reg == 0) {
721 		if (shared == B_TRUE)
722 			ret = DDI_INTR_UNCLAIMED;
723 		goto done;
724 	}
725 
726 	if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
727 		i40e_intr_adminq_work(i40e);
728 
729 	if (reg & I40E_INTR_NOTX_RX_MASK)
730 		i40e_intr_rx_work(i40e, 0);
731 
732 	if (reg & I40E_INTR_NOTX_TX_MASK)
733 		i40e_intr_tx_work(i40e, 0);
734 
735 done:
736 	i40e_intr_adminq_enable(i40e);
737 	return (ret);
738 
739 }
740 
741 /* ARGSUSED */
742 uint_t
743 i40e_intr_msi(void *arg1, void *arg2)
744 {
745 	i40e_t *i40e = (i40e_t *)arg1;
746 
747 	return (i40e_intr_notx(i40e, B_FALSE));
748 }
749 
750 /* ARGSUSED */
751 uint_t
752 i40e_intr_legacy(void *arg1, void *arg2)
753 {
754 	i40e_t *i40e = (i40e_t *)arg1;
755 
756 	return (i40e_intr_notx(i40e, B_TRUE));
757 }
758