1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved. 14 * Copyright (c) 2017, Joyent, Inc. 15 * Copyright 2017 Tegile Systems, Inc. All rights reserved. 16 */ 17 18 /* 19 * i40e - Intel 10/40 Gb Ethernet driver 20 * 21 * The i40e driver is the main software device driver for the Intel 40 Gb family 22 * of devices. Note that these devices come in many flavors with both 40 GbE 23 * ports and 10 GbE ports. This device is the successor to the 82599 family of 24 * devices (ixgbe). 25 * 26 * Unlike previous generations of Intel 1 GbE and 10 GbE devices, the 40 GbE 27 * devices defined in the XL710 controller (previously known as Fortville) are a 28 * rather different beast and have a small switch embedded inside of them. In 29 * addition, the way that most of the programming is done has been overhauled. 30 * As opposed to just using PCIe memory mapped registers, it also has an 31 * administrative queue which is used to communicate with firmware running on 32 * the chip. 33 * 34 * Each physical function in the hardware shows up as a device that this driver 35 * will bind to. The hardware splits many resources evenly across all of the 36 * physical functions present on the device, while other resources are instead 37 * shared across the entire card and its up to the device driver to 38 * intelligently partition them. 39 * 40 * ------------ 41 * Organization 42 * ------------ 43 * 44 * This driver is made up of several files which have their own theory 45 * statements spread across them. We'll touch on the high level purpose of each 46 * file here, and then we'll get into more discussion on how the device is 47 * generally modelled with respect to the interfaces in illumos. 48 * 49 * i40e_gld.c: This file contains all of the bindings to MAC and the networking 50 * stack. 51 * 52 * i40e_intr.c: This file contains all of the interrupt service routines and 53 * contains logic to enable and disable interrupts on the hardware. 54 * It also contains the logic to map hardware resources such as the 55 * rings to and from interrupts and controls their ability to fire. 56 * 57 * There is a big theory statement on interrupts present there. 58 * 59 * i40e_main.c: The file that you're currently in. It interfaces with the 60 * traditional OS DDI interfaces and is in charge of configuring 61 * the device. 62 * 63 * i40e_osdep.[ch]: These files contain interfaces and definitions needed to 64 * work with Intel's common code for the device. 65 * 66 * i40e_stats.c: This file contains the general work and logic around our 67 * kstats. A theory statement on their organization and use of the 68 * hardware exists there. 69 * 70 * i40e_sw.h: This header file contains all of the primary structure definitions 71 * and constants that are used across the entire driver. 72 * 73 * i40e_transceiver.c: This file contains all of the logic for sending and 74 * receiving data. It contains all of the ring and DMA 75 * allocation logic, as well as, the actual interfaces to 76 * send and receive data. 77 * 78 * A big theory statement on ring management, descriptors, 79 * and how it ties into the OS is present there. 80 * 81 * -------------- 82 * General Design 83 * -------------- 84 * 85 * Before we go too far into the general way we've laid out data structures and 86 * the like, it's worth taking some time to explain how the hardware is 87 * organized. This organization informs a lot of how we do things at this time 88 * in the driver. 89 * 90 * Each physical device consists of a number of one or more ports, which are 91 * considered physical functions in the PCI sense and thus each get enumerated 92 * by the system, resulting in an instance being created and attached to. While 93 * there are many resources that are unique to each physical function eg. 94 * instance of the device, there are many that are shared across all of them. 95 * Several resources have an amount reserved for each Virtual Station Interface 96 * (VSI) and then a static pool of resources, available for all functions on the 97 * card. 98 * 99 * The most important resource in hardware are its transmit and receive queue 100 * pairs (i40e_trqpair_t). These should be thought of as rings in GLDv3 101 * parlance. There are a set number of these on each device; however, they are 102 * statically partitioned among all of the different physical functions. 103 * 104 * 'Fortville' (the code name for this device family) is basically a switch. To 105 * map MAC addresses and other things to queues, we end up having to create 106 * Virtual Station Interfaces (VSIs) and establish forwarding rules that direct 107 * traffic to a queue. A VSI owns a collection of queues and has a series of 108 * forwarding rules that point to it. One way to think of this is to treat it 109 * like MAC does a VNIC. When MAC refers to a group, a collection of rings and 110 * classification resources, that is a VSI in i40e. 111 * 112 * The sets of VSIs is shared across the entire device, though there may be some 113 * amount that are reserved to each PF. Because the GLDv3 does not let us change 114 * the number of groups dynamically, we instead statically divide this amount 115 * evenly between all the functions that exist. In addition, we have the same 116 * problem with the mac address forwarding rules. There are a static number that 117 * exist shared across all the functions. 118 * 119 * To handle both of these resources, what we end up doing is going through and 120 * determining which functions belong to the same device. Nominally one might do 121 * this by having a nexus driver; however, a prime requirement for a nexus 122 * driver is identifying the various children and activating them. While it is 123 * possible to get this information from NVRAM, we would end up duplicating a 124 * lot of the PCI enumeration logic. Really, at the end of the day, the device 125 * doesn't give us the traditional identification properties we want from a 126 * nexus driver. 127 * 128 * Instead, we rely on some properties that are guaranteed to be unique. While 129 * it might be tempting to leverage the PBA or serial number of the device from 130 * NVRAM, there is nothing that says that two devices can't be mis-programmed to 131 * have the same values in NVRAM. Instead, we uniquely identify a group of 132 * functions based on their parent in the /devices tree, their PCI bus and PCI 133 * function identifiers. Using either on their own may not be sufficient. 134 * 135 * For each unique PCI device that we encounter, we'll create a i40e_device_t. 136 * From there, because we don't have a good way to tell the GLDv3 about sharing 137 * resources between everything, we'll end up just dividing the resources 138 * evenly between all of the functions. Longer term, if we don't have to declare 139 * to the GLDv3 that these resources are shared, then we'll maintain a pool and 140 * have each PF allocate from the pool in the device, thus if only two of four 141 * ports are being used, for example, then all of the resources can still be 142 * used. 143 * 144 * ------------------------------------------- 145 * Transmit and Receive Queue Pair Allocations 146 * ------------------------------------------- 147 * 148 * NVRAM ends up assigning each PF its own share of the transmit and receive LAN 149 * queue pairs, we have no way of modifying it, only observing it. From there, 150 * it's up to us to map these queues to VSIs and VFs. Since we don't support any 151 * VFs at this time, we only focus on assignments to VSIs. 152 * 153 * At the moment, we used a static mapping of transmit/receive queue pairs to a 154 * given VSI (eg. rings to a group). Though in the fullness of time, we want to 155 * make this something which is fully dynamic and take advantage of documented, 156 * but not yet available functionality for adding filters based on VXLAN and 157 * other encapsulation technologies. 158 * 159 * ------------------------------------- 160 * Broadcast, Multicast, and Promiscuous 161 * ------------------------------------- 162 * 163 * As part of the GLDv3, we need to make sure that we can handle receiving 164 * broadcast and multicast traffic. As well as enabling promiscuous mode when 165 * requested. GLDv3 requires that all broadcast and multicast traffic be 166 * retrieved by the default group, eg. the first one. This is the same thing as 167 * the default VSI. 168 * 169 * To receieve broadcast traffic, we enable it through the admin queue, rather 170 * than use one of our filters for it. For multicast traffic, we reserve a 171 * certain number of the hash filters and assign them to a given PF. When we 172 * exceed those, we then switch to using promiscuous mode for multicast traffic. 173 * 174 * More specifically, once we exceed the number of filters (indicated because 175 * the i40e_t`i40e_resources.ifr_nmcastfilt == 176 * i40e_t`i40e_resources.ifr_nmcastfilt_used), we then instead need to toggle 177 * promiscuous mode. If promiscuous mode is toggled then we keep track of the 178 * number of MACs added to it by incrementing i40e_t`i40e_mcast_promisc_count. 179 * That will stay enabled until that count reaches zero indicating that we have 180 * only added multicast addresses that we have a corresponding entry for. 181 * 182 * Because MAC itself wants to toggle promiscuous mode, which includes both 183 * unicast and multicast traffic, we go through and keep track of that 184 * ourselves. That is maintained through the use of the i40e_t`i40e_promisc_on 185 * member. 186 * 187 * -------------- 188 * VSI Management 189 * -------------- 190 * 191 * At this time, we currently only support a single MAC group, and thus a single 192 * VSI. This VSI is considered the default VSI and should be the only one that 193 * exists after a reset. Currently it is stored as the member 194 * i40e_t`i40e_vsi_id. While this works for the moment and for an initial 195 * driver, it's not sufficient for the longer-term path of the driver. Instead, 196 * we'll want to actually have a unique i40e_vsi_t structure which is used 197 * everywhere. Note that this means that every place that uses the 198 * i40e_t`i40e_vsi_id will need to be refactored. 199 * 200 * ---------------- 201 * Structure Layout 202 * ---------------- 203 * 204 * The following images relates the core data structures together. The primary 205 * structure in the system is the i40e_t. It itself contains multiple rings, 206 * i40e_trqpair_t's which contain the various transmit and receive data. The 207 * receive data is stored outside of the i40e_trqpair_t and instead in the 208 * i40e_rx_data_t. The i40e_t has a corresponding i40e_device_t which keeps 209 * track of per-physical device state. Finally, for every active descriptor, 210 * there is a corresponding control block, which is where the 211 * i40e_rx_control_block_t and the i40e_tx_control_block_t come from. 212 * 213 * +-----------------------+ +-----------------------+ 214 * | Global i40e_t list | | Global Device list | 215 * | | +--| | 216 * | i40e_glist | | | i40e_dlist | 217 * +-----------------------+ | +-----------------------+ 218 * | v 219 * | +------------------------+ +-----------------------+ 220 * | | Device-wide Structure |----->| Device-wide Structure |--> ... 221 * | | i40e_device_t | | i40e_device_t | 222 * | | | +-----------------------+ 223 * | | dev_info_t * ------+--> Parent in devices tree. 224 * | | uint_t ------+--> PCI bus number 225 * | | uint_t ------+--> PCI device number 226 * | | uint_t ------+--> Number of functions 227 * | | i40e_switch_rsrcs_t ---+--> Captured total switch resources 228 * | | list_t ------+-------------+ 229 * | +------------------------+ | 230 * | ^ | 231 * | +--------+ | 232 * | | v 233 * | +---------------------------+ | +-------------------+ 234 * +->| GLDv3 Device, per PF |-----|-->| GLDv3 Device (PF) |--> ... 235 * | i40e_t | | | i40e_t | 236 * | **Primary Structure** | | +-------------------+ 237 * | | | 238 * | i40e_device_t * --+-----+ 239 * | i40e_state_t --+---> Device State 240 * | i40e_hw_t --+---> Intel common code structure 241 * | mac_handle_t --+---> GLDv3 handle to MAC 242 * | ddi_periodic_t --+---> Link activity timer 243 * | int (vsi_id) --+---> VSI ID, main identifier 244 * | i40e_func_rsrc_t --+---> Available hardware resources 245 * | i40e_switch_rsrc_t * --+---> Switch resource snapshot 246 * | i40e_sdu --+---> Current MTU 247 * | i40e_frame_max --+---> Current HW frame size 248 * | i40e_uaddr_t * --+---> Array of assigned unicast MACs 249 * | i40e_maddr_t * --+---> Array of assigned multicast MACs 250 * | i40e_mcast_promisccount --+---> Active multicast state 251 * | i40e_promisc_on --+---> Current promiscuous mode state 252 * | int --+---> Number of transmit/receive pairs 253 * | kstat_t * --+---> PF kstats 254 * | kstat_t * --+---> VSI kstats 255 * | i40e_pf_stats_t --+---> PF kstat backing data 256 * | i40e_vsi_stats_t --+---> VSI kstat backing data 257 * | i40e_trqpair_t * --+---------+ 258 * +---------------------------+ | 259 * | 260 * v 261 * +-------------------------------+ +-----------------------------+ 262 * | Transmit/Receive Queue Pair |-------| Transmit/Receive Queue Pair |->... 263 * | i40e_trqpair_t | | i40e_trqpair_t | 264 * + Ring Data Structure | +-----------------------------+ 265 * | | 266 * | mac_ring_handle_t +--> MAC RX ring handle 267 * | mac_ring_handle_t +--> MAC TX ring handle 268 * | i40e_rxq_stat_t --+--> RX Queue stats 269 * | i40e_txq_stat_t --+--> TX Queue stats 270 * | uint32_t (tx ring size) +--> TX Ring Size 271 * | uint32_t (tx free list size) +--> TX Free List Size 272 * | i40e_dma_buffer_t --------+--> TX Descriptor ring DMA 273 * | i40e_tx_desc_t * --------+--> TX descriptor ring 274 * | volatile unt32_t * +--> TX Write back head 275 * | uint32_t -------+--> TX ring head 276 * | uint32_t -------+--> TX ring tail 277 * | uint32_t -------+--> Num TX desc free 278 * | i40e_tx_control_block_t * --+--> TX control block array ---+ 279 * | i40e_tx_control_block_t ** --+--> TCB work list ----+ 280 * | i40e_tx_control_block_t ** --+--> TCB free list ---+ 281 * | uint32_t -------+--> Free TCB count | 282 * | i40e_rx_data_t * -------+--+ v 283 * +-------------------------------+ | +---------------------------+ 284 * | | Per-TX Frame Metadata | 285 * | | i40e_tx_control_block_t | 286 * +--------------------+ | | 287 * | mblk to transmit <--+--- mblk_t * | 288 * | type of transmit <--+--- i40e_tx_type_t | 289 * | TX DMA handle <--+--- ddi_dma_handle_t | 290 * v TX DMA buffer <--+--- i40e_dma_buffer_t | 291 * +------------------------------+ +---------------------------+ 292 * | Core Receive Data | 293 * | i40e_rx_data_t | 294 * | | 295 * | i40e_dma_buffer_t --+--> RX descriptor DMA Data 296 * | i40e_rx_desc_t --+--> RX descriptor ring 297 * | uint32_t --+--> Next free desc. 298 * | i40e_rx_control_block_t * --+--> RX Control Block Array ---+ 299 * | i40e_rx_control_block_t ** --+--> RCB work list ---+ 300 * | i40e_rx_control_block_t ** --+--> RCB free list ---+ 301 * +------------------------------+ | 302 * ^ | 303 * | +---------------------------+ | 304 * | | Per-RX Frame Metadata |<---------------+ 305 * | | i40e_rx_control_block_t | 306 * | | | 307 * | | mblk_t * ----+--> Received mblk_t data 308 * | | uint32_t ----+--> Reference count 309 * | | i40e_dma_buffer_t ----+--> Receive data DMA info 310 * | | frtn_t ----+--> mblk free function info 311 * +-----+-- i40e_rx_data_t * | 312 * +---------------------------+ 313 * 314 * ------------- 315 * Lock Ordering 316 * ------------- 317 * 318 * In order to ensure that we don't deadlock, the following represents the 319 * lock order being used. When grabbing locks, follow the following order. Lower 320 * numbers are more important. Thus, the i40e_glock which is number 0, must be 321 * taken before any other locks in the driver. On the other hand, the 322 * i40e_t`i40e_stat_lock, has the highest number because it's the least 323 * important lock. Note, that just because one lock is higher than another does 324 * not mean that all intermediary locks are required. 325 * 326 * 0) i40e_glock 327 * 1) i40e_t`i40e_general_lock 328 * 329 * 2) i40e_trqpair_t`itrq_rx_lock 330 * 3) i40e_trqpair_t`itrq_tx_lock 331 * 4) i40e_t`i40e_rx_pending_lock 332 * 5) i40e_trqpair_t`itrq_tcb_lock 333 * 334 * 6) i40e_t`i40e_stat_lock 335 * 336 * Rules and expectations: 337 * 338 * 1) A thread holding locks belong to one PF should not hold locks belonging to 339 * a second. If for some reason this becomes necessary, locks should be grabbed 340 * based on the list order in the i40e_device_t, which implies that the 341 * i40e_glock is held. 342 * 343 * 2) When grabbing locks between multiple transmit and receive queues, the 344 * locks for the lowest number transmit/receive queue should be grabbed first. 345 * 346 * 3) When grabbing both the transmit and receive lock for a given queue, always 347 * grab i40e_trqpair_t`itrq_rx_lock before the i40e_trqpair_t`itrq_tx_lock. 348 * 349 * 4) The following pairs of locks are not expected to be held at the same time: 350 * 351 * o i40e_t`i40e_rx_pending_lock and i40e_trqpair_t`itrq_tcb_lock 352 * 353 * ----------- 354 * Future Work 355 * ----------- 356 * 357 * At the moment the i40e_t driver is rather bare bones, allowing us to start 358 * getting data flowing and folks using it while we develop additional features. 359 * While bugs have been filed to cover this future work, the following gives an 360 * overview of expected work: 361 * 362 * o TSO support 363 * o Multiple group support 364 * o DMA binding and breaking up the locking in ring recycling. 365 * o Enhanced detection of device errors 366 * o Participation in IRM 367 * o FMA device reset 368 * o Stall detection, temperature error detection, etc. 369 * o More dynamic resource pools 370 */ 371 372 #include "i40e_sw.h" 373 374 static char i40e_ident[] = "Intel 10/40Gb Ethernet v1.0.1"; 375 376 /* 377 * The i40e_glock primarily protects the lists below and the i40e_device_t 378 * structures. 379 */ 380 static kmutex_t i40e_glock; 381 static list_t i40e_glist; 382 static list_t i40e_dlist; 383 384 /* 385 * Access attributes for register mapping. 386 */ 387 static ddi_device_acc_attr_t i40e_regs_acc_attr = { 388 DDI_DEVICE_ATTR_V1, 389 DDI_STRUCTURE_LE_ACC, 390 DDI_STRICTORDER_ACC, 391 DDI_FLAGERR_ACC 392 }; 393 394 /* 395 * Logging function for this driver. 396 */ 397 static void 398 i40e_dev_err(i40e_t *i40e, int level, boolean_t console, const char *fmt, 399 va_list ap) 400 { 401 char buf[1024]; 402 403 (void) vsnprintf(buf, sizeof (buf), fmt, ap); 404 405 if (i40e == NULL) { 406 cmn_err(level, (console) ? "%s: %s" : "!%s: %s", 407 I40E_MODULE_NAME, buf); 408 } else { 409 dev_err(i40e->i40e_dip, level, (console) ? "%s" : "!%s", 410 buf); 411 } 412 } 413 414 /* 415 * Because there's the stupid trailing-comma problem with the C preprocessor 416 * and variable arguments, I need to instantiate these. Pardon the redundant 417 * code. 418 */ 419 /*PRINTFLIKE2*/ 420 void 421 i40e_error(i40e_t *i40e, const char *fmt, ...) 422 { 423 va_list ap; 424 425 va_start(ap, fmt); 426 i40e_dev_err(i40e, CE_WARN, B_FALSE, fmt, ap); 427 va_end(ap); 428 } 429 430 /*PRINTFLIKE2*/ 431 void 432 i40e_log(i40e_t *i40e, const char *fmt, ...) 433 { 434 va_list ap; 435 436 va_start(ap, fmt); 437 i40e_dev_err(i40e, CE_NOTE, B_FALSE, fmt, ap); 438 va_end(ap); 439 } 440 441 /*PRINTFLIKE2*/ 442 void 443 i40e_notice(i40e_t *i40e, const char *fmt, ...) 444 { 445 va_list ap; 446 447 va_start(ap, fmt); 448 i40e_dev_err(i40e, CE_NOTE, B_TRUE, fmt, ap); 449 va_end(ap); 450 } 451 452 /* 453 * Various parts of the driver need to know if the controller is from the X722 454 * family, which has a few additional capabilities and different programming 455 * means. We don't consider virtual functions as part of this as they are quite 456 * different and will require substantially more work. 457 */ 458 static boolean_t 459 i40e_is_x722(i40e_t *i40e) 460 { 461 return (i40e->i40e_hw_space.mac.type == I40E_MAC_X722); 462 } 463 464 static void 465 i40e_device_rele(i40e_t *i40e) 466 { 467 i40e_device_t *idp = i40e->i40e_device; 468 469 if (idp == NULL) 470 return; 471 472 mutex_enter(&i40e_glock); 473 VERIFY(idp->id_nreg > 0); 474 list_remove(&idp->id_i40e_list, i40e); 475 idp->id_nreg--; 476 if (idp->id_nreg == 0) { 477 list_remove(&i40e_dlist, idp); 478 list_destroy(&idp->id_i40e_list); 479 kmem_free(idp->id_rsrcs, sizeof (i40e_switch_rsrc_t) * 480 idp->id_rsrcs_alloc); 481 kmem_free(idp, sizeof (i40e_device_t)); 482 } 483 i40e->i40e_device = NULL; 484 mutex_exit(&i40e_glock); 485 } 486 487 static i40e_device_t * 488 i40e_device_find(i40e_t *i40e, dev_info_t *parent, uint_t bus, uint_t device) 489 { 490 i40e_device_t *idp; 491 mutex_enter(&i40e_glock); 492 for (idp = list_head(&i40e_dlist); idp != NULL; 493 idp = list_next(&i40e_dlist, idp)) { 494 if (idp->id_parent == parent && idp->id_pci_bus == bus && 495 idp->id_pci_device == device) { 496 break; 497 } 498 } 499 500 if (idp != NULL) { 501 VERIFY(idp->id_nreg < idp->id_nfuncs); 502 idp->id_nreg++; 503 } else { 504 i40e_hw_t *hw = &i40e->i40e_hw_space; 505 ASSERT(hw->num_ports > 0); 506 ASSERT(hw->num_partitions > 0); 507 508 /* 509 * The Intel common code doesn't exactly keep the number of PCI 510 * functions. But it calculates it during discovery of 511 * partitions and ports. So what we do is undo the calculation 512 * that it does originally, as functions are evenly spread 513 * across ports in the rare case of partitions. 514 */ 515 idp = kmem_alloc(sizeof (i40e_device_t), KM_SLEEP); 516 idp->id_parent = parent; 517 idp->id_pci_bus = bus; 518 idp->id_pci_device = device; 519 idp->id_nfuncs = hw->num_ports * hw->num_partitions; 520 idp->id_nreg = 1; 521 idp->id_rsrcs_alloc = i40e->i40e_switch_rsrc_alloc; 522 idp->id_rsrcs_act = i40e->i40e_switch_rsrc_actual; 523 idp->id_rsrcs = kmem_alloc(sizeof (i40e_switch_rsrc_t) * 524 idp->id_rsrcs_alloc, KM_SLEEP); 525 bcopy(i40e->i40e_switch_rsrcs, idp->id_rsrcs, 526 sizeof (i40e_switch_rsrc_t) * idp->id_rsrcs_alloc); 527 list_create(&idp->id_i40e_list, sizeof (i40e_t), 528 offsetof(i40e_t, i40e_dlink)); 529 530 list_insert_tail(&i40e_dlist, idp); 531 } 532 533 list_insert_tail(&idp->id_i40e_list, i40e); 534 mutex_exit(&i40e_glock); 535 536 return (idp); 537 } 538 539 static void 540 i40e_link_state_set(i40e_t *i40e, link_state_t state) 541 { 542 if (i40e->i40e_link_state == state) 543 return; 544 545 i40e->i40e_link_state = state; 546 mac_link_update(i40e->i40e_mac_hdl, i40e->i40e_link_state); 547 } 548 549 /* 550 * This is a basic link check routine. Mostly we're using this just to see 551 * if we can get any accurate information about the state of the link being 552 * up or down, as well as updating the link state, speed, etc. information. 553 */ 554 void 555 i40e_link_check(i40e_t *i40e) 556 { 557 i40e_hw_t *hw = &i40e->i40e_hw_space; 558 boolean_t ls; 559 int ret; 560 561 ASSERT(MUTEX_HELD(&i40e->i40e_general_lock)); 562 563 hw->phy.get_link_info = B_TRUE; 564 if ((ret = i40e_get_link_status(hw, &ls)) != I40E_SUCCESS) { 565 i40e->i40e_s_link_status_errs++; 566 i40e->i40e_s_link_status_lasterr = ret; 567 return; 568 } 569 570 /* 571 * Firmware abstracts all of the mac and phy information for us, so we 572 * can use i40e_get_link_status to determine the current state. 573 */ 574 if (ls == B_TRUE) { 575 enum i40e_aq_link_speed speed; 576 577 speed = i40e_get_link_speed(hw); 578 579 /* 580 * Translate from an i40e value to a value in Mbits/s. 581 */ 582 switch (speed) { 583 case I40E_LINK_SPEED_100MB: 584 i40e->i40e_link_speed = 100; 585 break; 586 case I40E_LINK_SPEED_1GB: 587 i40e->i40e_link_speed = 1000; 588 break; 589 case I40E_LINK_SPEED_10GB: 590 i40e->i40e_link_speed = 10000; 591 break; 592 case I40E_LINK_SPEED_20GB: 593 i40e->i40e_link_speed = 20000; 594 break; 595 case I40E_LINK_SPEED_40GB: 596 i40e->i40e_link_speed = 40000; 597 break; 598 case I40E_LINK_SPEED_25GB: 599 i40e->i40e_link_speed = 25000; 600 break; 601 default: 602 i40e->i40e_link_speed = 0; 603 break; 604 } 605 606 /* 607 * At this time, hardware does not support half-duplex 608 * operation, hence why we don't ask the hardware about our 609 * current speed. 610 */ 611 i40e->i40e_link_duplex = LINK_DUPLEX_FULL; 612 i40e_link_state_set(i40e, LINK_STATE_UP); 613 } else { 614 i40e->i40e_link_speed = 0; 615 i40e->i40e_link_duplex = 0; 616 i40e_link_state_set(i40e, LINK_STATE_DOWN); 617 } 618 } 619 620 static void 621 i40e_rem_intrs(i40e_t *i40e) 622 { 623 int i, rc; 624 625 for (i = 0; i < i40e->i40e_intr_count; i++) { 626 rc = ddi_intr_free(i40e->i40e_intr_handles[i]); 627 if (rc != DDI_SUCCESS) { 628 i40e_log(i40e, "failed to free interrupt %d: %d", 629 i, rc); 630 } 631 } 632 633 kmem_free(i40e->i40e_intr_handles, i40e->i40e_intr_size); 634 i40e->i40e_intr_handles = NULL; 635 } 636 637 static void 638 i40e_rem_intr_handlers(i40e_t *i40e) 639 { 640 int i, rc; 641 642 for (i = 0; i < i40e->i40e_intr_count; i++) { 643 rc = ddi_intr_remove_handler(i40e->i40e_intr_handles[i]); 644 if (rc != DDI_SUCCESS) { 645 i40e_log(i40e, "failed to remove interrupt %d: %d", 646 i, rc); 647 } 648 } 649 } 650 651 /* 652 * illumos Fault Management Architecture (FMA) support. 653 */ 654 655 int 656 i40e_check_acc_handle(ddi_acc_handle_t handle) 657 { 658 ddi_fm_error_t de; 659 660 ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION); 661 ddi_fm_acc_err_clear(handle, DDI_FME_VERSION); 662 return (de.fme_status); 663 } 664 665 int 666 i40e_check_dma_handle(ddi_dma_handle_t handle) 667 { 668 ddi_fm_error_t de; 669 670 ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION); 671 return (de.fme_status); 672 } 673 674 /* 675 * Fault service error handling callback function. 676 */ 677 /* ARGSUSED */ 678 static int 679 i40e_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data) 680 { 681 pci_ereport_post(dip, err, NULL); 682 return (err->fme_status); 683 } 684 685 static void 686 i40e_fm_init(i40e_t *i40e) 687 { 688 ddi_iblock_cookie_t iblk; 689 690 i40e->i40e_fm_capabilities = ddi_prop_get_int(DDI_DEV_T_ANY, 691 i40e->i40e_dip, DDI_PROP_DONTPASS, "fm_capable", 692 DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE | 693 DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE); 694 695 if (i40e->i40e_fm_capabilities < 0) { 696 i40e->i40e_fm_capabilities = 0; 697 } else if (i40e->i40e_fm_capabilities > 0xf) { 698 i40e->i40e_fm_capabilities = DDI_FM_EREPORT_CAPABLE | 699 DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE | 700 DDI_FM_ERRCB_CAPABLE; 701 } 702 703 /* 704 * Only register with IO Fault Services if we have some capability 705 */ 706 if (i40e->i40e_fm_capabilities & DDI_FM_ACCCHK_CAPABLE) { 707 i40e_regs_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC; 708 } else { 709 i40e_regs_acc_attr.devacc_attr_access = DDI_DEFAULT_ACC; 710 } 711 712 if (i40e->i40e_fm_capabilities) { 713 ddi_fm_init(i40e->i40e_dip, &i40e->i40e_fm_capabilities, &iblk); 714 715 if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities) || 716 DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities)) { 717 pci_ereport_setup(i40e->i40e_dip); 718 } 719 720 if (DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities)) { 721 ddi_fm_handler_register(i40e->i40e_dip, 722 i40e_fm_error_cb, (void*)i40e); 723 } 724 } 725 726 if (i40e->i40e_fm_capabilities & DDI_FM_DMACHK_CAPABLE) { 727 i40e_init_dma_attrs(i40e, B_TRUE); 728 } else { 729 i40e_init_dma_attrs(i40e, B_FALSE); 730 } 731 } 732 733 static void 734 i40e_fm_fini(i40e_t *i40e) 735 { 736 if (i40e->i40e_fm_capabilities) { 737 738 if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities) || 739 DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities)) 740 pci_ereport_teardown(i40e->i40e_dip); 741 742 if (DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities)) 743 ddi_fm_handler_unregister(i40e->i40e_dip); 744 745 ddi_fm_fini(i40e->i40e_dip); 746 } 747 } 748 749 void 750 i40e_fm_ereport(i40e_t *i40e, char *detail) 751 { 752 uint64_t ena; 753 char buf[FM_MAX_CLASS]; 754 755 (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail); 756 ena = fm_ena_generate(0, FM_ENA_FMT1); 757 if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities)) { 758 ddi_fm_ereport_post(i40e->i40e_dip, buf, ena, DDI_NOSLEEP, 759 FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL); 760 } 761 } 762 763 /* 764 * Here we're trying to get the ID of the default VSI. In general, when we come 765 * through and look at this shortly after attach, we expect there to only be a 766 * single element present, which is the default VSI. Importantly, each PF seems 767 * to not see any other devices, in part because of the simple switch mode that 768 * we're using. If for some reason, we see more artifact, we'll need to revisit 769 * what we're doing here. 770 */ 771 static int 772 i40e_get_vsi_id(i40e_t *i40e) 773 { 774 i40e_hw_t *hw = &i40e->i40e_hw_space; 775 struct i40e_aqc_get_switch_config_resp *sw_config; 776 uint8_t aq_buf[I40E_AQ_LARGE_BUF]; 777 uint16_t next = 0; 778 int rc; 779 780 /* LINTED: E_BAD_PTR_CAST_ALIGN */ 781 sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; 782 rc = i40e_aq_get_switch_config(hw, sw_config, sizeof (aq_buf), &next, 783 NULL); 784 if (rc != I40E_SUCCESS) { 785 i40e_error(i40e, "i40e_aq_get_switch_config() failed %d: %d", 786 rc, hw->aq.asq_last_status); 787 return (-1); 788 } 789 790 if (LE_16(sw_config->header.num_reported) != 1) { 791 i40e_error(i40e, "encountered multiple (%d) switching units " 792 "during attach, not proceeding", 793 LE_16(sw_config->header.num_reported)); 794 return (-1); 795 } 796 797 return (sw_config->element[0].seid); 798 } 799 800 /* 801 * We need to fill the i40e_hw_t structure with the capabilities of this PF. We 802 * must also provide the memory for it; however, we don't need to keep it around 803 * to the call to the common code. It takes it and parses it into an internal 804 * structure. 805 */ 806 static boolean_t 807 i40e_get_hw_capabilities(i40e_t *i40e, i40e_hw_t *hw) 808 { 809 struct i40e_aqc_list_capabilities_element_resp *buf; 810 int rc; 811 size_t len; 812 uint16_t needed; 813 int nelems = I40E_HW_CAP_DEFAULT; 814 815 len = nelems * sizeof (*buf); 816 817 for (;;) { 818 ASSERT(len > 0); 819 buf = kmem_alloc(len, KM_SLEEP); 820 rc = i40e_aq_discover_capabilities(hw, buf, len, 821 &needed, i40e_aqc_opc_list_func_capabilities, NULL); 822 kmem_free(buf, len); 823 824 if (hw->aq.asq_last_status == I40E_AQ_RC_ENOMEM && 825 nelems == I40E_HW_CAP_DEFAULT) { 826 if (nelems == needed) { 827 i40e_error(i40e, "Capability discovery failed " 828 "due to byzantine common code"); 829 return (B_FALSE); 830 } 831 len = needed; 832 continue; 833 } else if (rc != I40E_SUCCESS || 834 hw->aq.asq_last_status != I40E_AQ_RC_OK) { 835 i40e_error(i40e, "Capability discovery failed: %d", rc); 836 return (B_FALSE); 837 } 838 839 break; 840 } 841 842 return (B_TRUE); 843 } 844 845 /* 846 * Obtain the switch's capabilities as seen by this PF and keep it around for 847 * our later use. 848 */ 849 static boolean_t 850 i40e_get_switch_resources(i40e_t *i40e) 851 { 852 i40e_hw_t *hw = &i40e->i40e_hw_space; 853 uint8_t cnt = 2; 854 uint8_t act; 855 size_t size; 856 i40e_switch_rsrc_t *buf; 857 858 for (;;) { 859 enum i40e_status_code ret; 860 size = cnt * sizeof (i40e_switch_rsrc_t); 861 ASSERT(size > 0); 862 if (size > UINT16_MAX) 863 return (B_FALSE); 864 buf = kmem_alloc(size, KM_SLEEP); 865 866 ret = i40e_aq_get_switch_resource_alloc(hw, &act, buf, 867 cnt, NULL); 868 if (ret == I40E_ERR_ADMIN_QUEUE_ERROR && 869 hw->aq.asq_last_status == I40E_AQ_RC_EINVAL) { 870 kmem_free(buf, size); 871 cnt += I40E_SWITCH_CAP_DEFAULT; 872 continue; 873 } else if (ret != I40E_SUCCESS) { 874 kmem_free(buf, size); 875 i40e_error(i40e, 876 "failed to retrieve switch statistics: %d", ret); 877 return (B_FALSE); 878 } 879 880 break; 881 } 882 883 i40e->i40e_switch_rsrc_alloc = cnt; 884 i40e->i40e_switch_rsrc_actual = act; 885 i40e->i40e_switch_rsrcs = buf; 886 887 return (B_TRUE); 888 } 889 890 static void 891 i40e_cleanup_resources(i40e_t *i40e) 892 { 893 if (i40e->i40e_uaddrs != NULL) { 894 kmem_free(i40e->i40e_uaddrs, sizeof (i40e_uaddr_t) * 895 i40e->i40e_resources.ifr_nmacfilt); 896 i40e->i40e_uaddrs = NULL; 897 } 898 899 if (i40e->i40e_maddrs != NULL) { 900 kmem_free(i40e->i40e_maddrs, sizeof (i40e_maddr_t) * 901 i40e->i40e_resources.ifr_nmcastfilt); 902 i40e->i40e_maddrs = NULL; 903 } 904 905 if (i40e->i40e_switch_rsrcs != NULL) { 906 size_t sz = sizeof (i40e_switch_rsrc_t) * 907 i40e->i40e_switch_rsrc_alloc; 908 ASSERT(sz > 0); 909 kmem_free(i40e->i40e_switch_rsrcs, sz); 910 i40e->i40e_switch_rsrcs = NULL; 911 } 912 913 if (i40e->i40e_device != NULL) 914 i40e_device_rele(i40e); 915 } 916 917 static boolean_t 918 i40e_get_available_resources(i40e_t *i40e) 919 { 920 dev_info_t *parent; 921 uint16_t bus, device, func; 922 uint_t nregs; 923 int *regs, i; 924 i40e_device_t *idp; 925 i40e_hw_t *hw = &i40e->i40e_hw_space; 926 927 parent = ddi_get_parent(i40e->i40e_dip); 928 929 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, i40e->i40e_dip, 0, "reg", 930 ®s, &nregs) != DDI_PROP_SUCCESS) { 931 return (B_FALSE); 932 } 933 934 if (nregs < 1) { 935 ddi_prop_free(regs); 936 return (B_FALSE); 937 } 938 939 bus = PCI_REG_BUS_G(regs[0]); 940 device = PCI_REG_DEV_G(regs[0]); 941 func = PCI_REG_FUNC_G(regs[0]); 942 ddi_prop_free(regs); 943 944 i40e->i40e_hw_space.bus.func = func; 945 i40e->i40e_hw_space.bus.device = device; 946 947 if (i40e_get_switch_resources(i40e) == B_FALSE) { 948 return (B_FALSE); 949 } 950 951 /* 952 * To calculate the total amount of a resource we have available, we 953 * need to add how many our i40e_t thinks it has guaranteed, if any, and 954 * then we need to go through and divide the number of available on the 955 * device, which was snapshotted before anyone should have allocated 956 * anything, and use that to derive how many are available from the 957 * pool. Longer term, we may want to turn this into something that's 958 * more of a pool-like resource that everything can share (though that 959 * may require some more assistance from MAC). 960 * 961 * Though for transmit and receive queue pairs, we just have to ask 962 * firmware instead. 963 */ 964 idp = i40e_device_find(i40e, parent, bus, device); 965 i40e->i40e_device = idp; 966 i40e->i40e_resources.ifr_nvsis = 0; 967 i40e->i40e_resources.ifr_nvsis_used = 0; 968 i40e->i40e_resources.ifr_nmacfilt = 0; 969 i40e->i40e_resources.ifr_nmacfilt_used = 0; 970 i40e->i40e_resources.ifr_nmcastfilt = 0; 971 i40e->i40e_resources.ifr_nmcastfilt_used = 0; 972 973 for (i = 0; i < i40e->i40e_switch_rsrc_actual; i++) { 974 i40e_switch_rsrc_t *srp = &i40e->i40e_switch_rsrcs[i]; 975 976 switch (srp->resource_type) { 977 case I40E_AQ_RESOURCE_TYPE_VSI: 978 i40e->i40e_resources.ifr_nvsis += 979 LE_16(srp->guaranteed); 980 i40e->i40e_resources.ifr_nvsis_used = LE_16(srp->used); 981 break; 982 case I40E_AQ_RESOURCE_TYPE_MACADDR: 983 i40e->i40e_resources.ifr_nmacfilt += 984 LE_16(srp->guaranteed); 985 i40e->i40e_resources.ifr_nmacfilt_used = 986 LE_16(srp->used); 987 break; 988 case I40E_AQ_RESOURCE_TYPE_MULTICAST_HASH: 989 i40e->i40e_resources.ifr_nmcastfilt += 990 LE_16(srp->guaranteed); 991 i40e->i40e_resources.ifr_nmcastfilt_used = 992 LE_16(srp->used); 993 break; 994 default: 995 break; 996 } 997 } 998 999 for (i = 0; i < idp->id_rsrcs_act; i++) { 1000 i40e_switch_rsrc_t *srp = &i40e->i40e_switch_rsrcs[i]; 1001 switch (srp->resource_type) { 1002 case I40E_AQ_RESOURCE_TYPE_VSI: 1003 i40e->i40e_resources.ifr_nvsis += 1004 LE_16(srp->total_unalloced) / idp->id_nfuncs; 1005 break; 1006 case I40E_AQ_RESOURCE_TYPE_MACADDR: 1007 i40e->i40e_resources.ifr_nmacfilt += 1008 LE_16(srp->total_unalloced) / idp->id_nfuncs; 1009 break; 1010 case I40E_AQ_RESOURCE_TYPE_MULTICAST_HASH: 1011 i40e->i40e_resources.ifr_nmcastfilt += 1012 LE_16(srp->total_unalloced) / idp->id_nfuncs; 1013 default: 1014 break; 1015 } 1016 } 1017 1018 i40e->i40e_resources.ifr_nrx_queue = hw->func_caps.num_rx_qp; 1019 i40e->i40e_resources.ifr_ntx_queue = hw->func_caps.num_tx_qp; 1020 1021 i40e->i40e_uaddrs = kmem_zalloc(sizeof (i40e_uaddr_t) * 1022 i40e->i40e_resources.ifr_nmacfilt, KM_SLEEP); 1023 i40e->i40e_maddrs = kmem_zalloc(sizeof (i40e_maddr_t) * 1024 i40e->i40e_resources.ifr_nmcastfilt, KM_SLEEP); 1025 1026 /* 1027 * Initialize these as multicast addresses to indicate it's invalid for 1028 * sanity purposes. Think of it like 0xdeadbeef. 1029 */ 1030 for (i = 0; i < i40e->i40e_resources.ifr_nmacfilt; i++) 1031 i40e->i40e_uaddrs[i].iua_mac[0] = 0x01; 1032 1033 return (B_TRUE); 1034 } 1035 1036 static boolean_t 1037 i40e_enable_interrupts(i40e_t *i40e) 1038 { 1039 int i, rc; 1040 1041 if (i40e->i40e_intr_cap & DDI_INTR_FLAG_BLOCK) { 1042 rc = ddi_intr_block_enable(i40e->i40e_intr_handles, 1043 i40e->i40e_intr_count); 1044 if (rc != DDI_SUCCESS) { 1045 i40e_error(i40e, "Interrupt block-enable failed: %d", 1046 rc); 1047 return (B_FALSE); 1048 } 1049 } else { 1050 for (i = 0; i < i40e->i40e_intr_count; i++) { 1051 rc = ddi_intr_enable(i40e->i40e_intr_handles[i]); 1052 if (rc != DDI_SUCCESS) { 1053 i40e_error(i40e, 1054 "Failed to enable interrupt %d: %d", i, rc); 1055 while (--i >= 0) { 1056 (void) ddi_intr_disable( 1057 i40e->i40e_intr_handles[i]); 1058 } 1059 return (B_FALSE); 1060 } 1061 } 1062 } 1063 1064 return (B_TRUE); 1065 } 1066 1067 static boolean_t 1068 i40e_disable_interrupts(i40e_t *i40e) 1069 { 1070 int i, rc; 1071 1072 if (i40e->i40e_intr_cap & DDI_INTR_FLAG_BLOCK) { 1073 rc = ddi_intr_block_disable(i40e->i40e_intr_handles, 1074 i40e->i40e_intr_count); 1075 if (rc != DDI_SUCCESS) { 1076 i40e_error(i40e, 1077 "Interrupt block-disabled failed: %d", rc); 1078 return (B_FALSE); 1079 } 1080 } else { 1081 for (i = 0; i < i40e->i40e_intr_count; i++) { 1082 rc = ddi_intr_disable(i40e->i40e_intr_handles[i]); 1083 if (rc != DDI_SUCCESS) { 1084 i40e_error(i40e, 1085 "Failed to disable interrupt %d: %d", 1086 i, rc); 1087 return (B_FALSE); 1088 } 1089 } 1090 } 1091 1092 return (B_TRUE); 1093 } 1094 1095 /* 1096 * Free receive & transmit rings. 1097 */ 1098 static void 1099 i40e_free_trqpairs(i40e_t *i40e) 1100 { 1101 int i; 1102 i40e_trqpair_t *itrq; 1103 1104 if (i40e->i40e_trqpairs != NULL) { 1105 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 1106 itrq = &i40e->i40e_trqpairs[i]; 1107 mutex_destroy(&itrq->itrq_rx_lock); 1108 mutex_destroy(&itrq->itrq_tx_lock); 1109 mutex_destroy(&itrq->itrq_tcb_lock); 1110 1111 /* 1112 * Should have already been cleaned up by start/stop, 1113 * etc. 1114 */ 1115 ASSERT(itrq->itrq_txkstat == NULL); 1116 ASSERT(itrq->itrq_rxkstat == NULL); 1117 } 1118 1119 kmem_free(i40e->i40e_trqpairs, 1120 sizeof (i40e_trqpair_t) * i40e->i40e_num_trqpairs); 1121 i40e->i40e_trqpairs = NULL; 1122 } 1123 1124 cv_destroy(&i40e->i40e_rx_pending_cv); 1125 mutex_destroy(&i40e->i40e_rx_pending_lock); 1126 mutex_destroy(&i40e->i40e_general_lock); 1127 } 1128 1129 /* 1130 * Allocate transmit and receive rings, as well as other data structures that we 1131 * need. 1132 */ 1133 static boolean_t 1134 i40e_alloc_trqpairs(i40e_t *i40e) 1135 { 1136 int i; 1137 void *mutexpri = DDI_INTR_PRI(i40e->i40e_intr_pri); 1138 1139 /* 1140 * Now that we have the priority for the interrupts, initialize 1141 * all relevant locks. 1142 */ 1143 mutex_init(&i40e->i40e_general_lock, NULL, MUTEX_DRIVER, mutexpri); 1144 mutex_init(&i40e->i40e_rx_pending_lock, NULL, MUTEX_DRIVER, mutexpri); 1145 cv_init(&i40e->i40e_rx_pending_cv, NULL, CV_DRIVER, NULL); 1146 1147 i40e->i40e_trqpairs = kmem_zalloc(sizeof (i40e_trqpair_t) * 1148 i40e->i40e_num_trqpairs, KM_SLEEP); 1149 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 1150 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i]; 1151 1152 itrq->itrq_i40e = i40e; 1153 mutex_init(&itrq->itrq_rx_lock, NULL, MUTEX_DRIVER, mutexpri); 1154 mutex_init(&itrq->itrq_tx_lock, NULL, MUTEX_DRIVER, mutexpri); 1155 mutex_init(&itrq->itrq_tcb_lock, NULL, MUTEX_DRIVER, mutexpri); 1156 itrq->itrq_index = i; 1157 } 1158 1159 return (B_TRUE); 1160 } 1161 1162 1163 1164 /* 1165 * Unless a .conf file already overrode i40e_t structure values, they will 1166 * be 0, and need to be set in conjunction with the now-available HW report. 1167 * 1168 * However, at the moment, we cap all of these resources as we only support a 1169 * single receive ring and a single group. 1170 */ 1171 /* ARGSUSED */ 1172 static void 1173 i40e_hw_to_instance(i40e_t *i40e, i40e_hw_t *hw) 1174 { 1175 if (i40e->i40e_num_trqpairs == 0) { 1176 i40e->i40e_num_trqpairs = I40E_TRQPAIR_MAX; 1177 } 1178 1179 if (i40e->i40e_num_rx_groups == 0) { 1180 i40e->i40e_num_rx_groups = I40E_GROUP_MAX; 1181 } 1182 } 1183 1184 /* 1185 * Free any resources required by, or setup by, the Intel common code. 1186 */ 1187 static void 1188 i40e_common_code_fini(i40e_t *i40e) 1189 { 1190 i40e_hw_t *hw = &i40e->i40e_hw_space; 1191 int rc; 1192 1193 rc = i40e_shutdown_lan_hmc(hw); 1194 if (rc != I40E_SUCCESS) 1195 i40e_error(i40e, "failed to shutdown LAN hmc: %d", rc); 1196 1197 rc = i40e_shutdown_adminq(hw); 1198 if (rc != I40E_SUCCESS) 1199 i40e_error(i40e, "failed to shutdown admin queue: %d", rc); 1200 } 1201 1202 /* 1203 * Initialize and call Intel common-code routines, includes some setup 1204 * the common code expects from the driver. Also prints on failure, so 1205 * the caller doesn't have to. 1206 */ 1207 static boolean_t 1208 i40e_common_code_init(i40e_t *i40e, i40e_hw_t *hw) 1209 { 1210 int rc; 1211 1212 i40e_clear_hw(hw); 1213 rc = i40e_pf_reset(hw); 1214 if (rc != 0) { 1215 i40e_error(i40e, "failed to reset hardware: %d", rc); 1216 i40e_fm_ereport(i40e, DDI_FM_DEVICE_NO_RESPONSE); 1217 return (B_FALSE); 1218 } 1219 1220 rc = i40e_init_shared_code(hw); 1221 if (rc != 0) { 1222 i40e_error(i40e, "failed to initialize i40e core: %d", rc); 1223 return (B_FALSE); 1224 } 1225 1226 hw->aq.num_arq_entries = I40E_DEF_ADMINQ_SIZE; 1227 hw->aq.num_asq_entries = I40E_DEF_ADMINQ_SIZE; 1228 hw->aq.arq_buf_size = I40E_ADMINQ_BUFSZ; 1229 hw->aq.asq_buf_size = I40E_ADMINQ_BUFSZ; 1230 1231 rc = i40e_init_adminq(hw); 1232 if (rc != 0) { 1233 i40e_error(i40e, "failed to initialize firmware admin queue: " 1234 "%d, potential firmware version mismatch", rc); 1235 i40e_fm_ereport(i40e, DDI_FM_DEVICE_INVAL_STATE); 1236 return (B_FALSE); 1237 } 1238 1239 if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && 1240 hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) { 1241 i40e_log(i40e, "The driver for the device detected a newer " 1242 "version of the NVM image (%d.%d) than expected (%d.%d).\n" 1243 "Please install the most recent version of the network " 1244 "driver.\n", hw->aq.api_maj_ver, hw->aq.api_min_ver, 1245 I40E_FW_API_VERSION_MAJOR, I40E_FW_API_VERSION_MINOR); 1246 } else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR || 1247 hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1)) { 1248 i40e_log(i40e, "The driver for the device detected an older" 1249 " version of the NVM image (%d.%d) than expected (%d.%d)." 1250 "\nPlease update the NVM image.\n", 1251 hw->aq.api_maj_ver, hw->aq.api_min_ver, 1252 I40E_FW_API_VERSION_MAJOR, I40E_FW_API_VERSION_MINOR - 1); 1253 } 1254 1255 i40e_clear_pxe_mode(hw); 1256 1257 /* 1258 * We need to call this so that the common code can discover 1259 * capabilities of the hardware, which it uses throughout the rest. 1260 */ 1261 if (!i40e_get_hw_capabilities(i40e, hw)) { 1262 i40e_error(i40e, "failed to obtain hardware capabilities"); 1263 return (B_FALSE); 1264 } 1265 1266 if (i40e_get_available_resources(i40e) == B_FALSE) { 1267 i40e_error(i40e, "failed to obtain hardware resources"); 1268 return (B_FALSE); 1269 } 1270 1271 i40e_hw_to_instance(i40e, hw); 1272 1273 rc = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, 1274 hw->func_caps.num_rx_qp, 0, 0); 1275 if (rc != 0) { 1276 i40e_error(i40e, "failed to initialize hardware memory cache: " 1277 "%d", rc); 1278 return (B_FALSE); 1279 } 1280 1281 rc = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); 1282 if (rc != 0) { 1283 i40e_error(i40e, "failed to configure hardware memory cache: " 1284 "%d", rc); 1285 return (B_FALSE); 1286 } 1287 1288 (void) i40e_aq_stop_lldp(hw, TRUE, NULL); 1289 1290 rc = i40e_get_mac_addr(hw, hw->mac.addr); 1291 if (rc != I40E_SUCCESS) { 1292 i40e_error(i40e, "failed to retrieve hardware mac address: %d", 1293 rc); 1294 return (B_FALSE); 1295 } 1296 1297 rc = i40e_validate_mac_addr(hw->mac.addr); 1298 if (rc != 0) { 1299 i40e_error(i40e, "failed to validate internal mac address: " 1300 "%d", rc); 1301 return (B_FALSE); 1302 } 1303 bcopy(hw->mac.addr, hw->mac.perm_addr, ETHERADDRL); 1304 if ((rc = i40e_get_port_mac_addr(hw, hw->mac.port_addr)) != 1305 I40E_SUCCESS) { 1306 i40e_error(i40e, "failed to retrieve port mac address: %d", 1307 rc); 1308 return (B_FALSE); 1309 } 1310 1311 /* 1312 * We need to obtain the Virtual Station ID (VSI) before we can 1313 * perform other operations on the device. 1314 */ 1315 i40e->i40e_vsi_id = i40e_get_vsi_id(i40e); 1316 if (i40e->i40e_vsi_id == -1) { 1317 i40e_error(i40e, "failed to obtain VSI ID"); 1318 return (B_FALSE); 1319 } 1320 1321 return (B_TRUE); 1322 } 1323 1324 static void 1325 i40e_unconfigure(dev_info_t *devinfo, i40e_t *i40e) 1326 { 1327 int rc; 1328 1329 if (i40e->i40e_attach_progress & I40E_ATTACH_ENABLE_INTR) 1330 (void) i40e_disable_interrupts(i40e); 1331 1332 if ((i40e->i40e_attach_progress & I40E_ATTACH_LINK_TIMER) && 1333 i40e->i40e_periodic_id != 0) { 1334 ddi_periodic_delete(i40e->i40e_periodic_id); 1335 i40e->i40e_periodic_id = 0; 1336 } 1337 1338 if (i40e->i40e_attach_progress & I40E_ATTACH_MAC) { 1339 rc = mac_unregister(i40e->i40e_mac_hdl); 1340 if (rc != 0) { 1341 i40e_error(i40e, "failed to unregister from mac: %d", 1342 rc); 1343 } 1344 } 1345 1346 if (i40e->i40e_attach_progress & I40E_ATTACH_STATS) { 1347 i40e_stats_fini(i40e); 1348 } 1349 1350 if (i40e->i40e_attach_progress & I40E_ATTACH_ADD_INTR) 1351 i40e_rem_intr_handlers(i40e); 1352 1353 if (i40e->i40e_attach_progress & I40E_ATTACH_ALLOC_RINGSLOCKS) 1354 i40e_free_trqpairs(i40e); 1355 1356 if (i40e->i40e_attach_progress & I40E_ATTACH_ALLOC_INTR) 1357 i40e_rem_intrs(i40e); 1358 1359 if (i40e->i40e_attach_progress & I40E_ATTACH_COMMON_CODE) 1360 i40e_common_code_fini(i40e); 1361 1362 i40e_cleanup_resources(i40e); 1363 1364 if (i40e->i40e_attach_progress & I40E_ATTACH_PROPS) 1365 (void) ddi_prop_remove_all(devinfo); 1366 1367 if (i40e->i40e_attach_progress & I40E_ATTACH_REGS_MAP && 1368 i40e->i40e_osdep_space.ios_reg_handle != NULL) { 1369 ddi_regs_map_free(&i40e->i40e_osdep_space.ios_reg_handle); 1370 i40e->i40e_osdep_space.ios_reg_handle = NULL; 1371 } 1372 1373 if ((i40e->i40e_attach_progress & I40E_ATTACH_PCI_CONFIG) && 1374 i40e->i40e_osdep_space.ios_cfg_handle != NULL) { 1375 pci_config_teardown(&i40e->i40e_osdep_space.ios_cfg_handle); 1376 i40e->i40e_osdep_space.ios_cfg_handle = NULL; 1377 } 1378 1379 if (i40e->i40e_attach_progress & I40E_ATTACH_FM_INIT) 1380 i40e_fm_fini(i40e); 1381 1382 kmem_free(i40e->i40e_aqbuf, I40E_ADMINQ_BUFSZ); 1383 kmem_free(i40e, sizeof (i40e_t)); 1384 1385 ddi_set_driver_private(devinfo, NULL); 1386 } 1387 1388 static boolean_t 1389 i40e_final_init(i40e_t *i40e) 1390 { 1391 i40e_hw_t *hw = &i40e->i40e_hw_space; 1392 struct i40e_osdep *osdep = OS_DEP(hw); 1393 uint8_t pbanum[I40E_PBANUM_STRLEN]; 1394 enum i40e_status_code irc; 1395 char buf[I40E_DDI_PROP_LEN]; 1396 1397 pbanum[0] = '\0'; 1398 irc = i40e_read_pba_string(hw, pbanum, sizeof (pbanum)); 1399 if (irc != I40E_SUCCESS) { 1400 i40e_log(i40e, "failed to read PBA string: %d", irc); 1401 } else { 1402 (void) ddi_prop_update_string(DDI_DEV_T_NONE, i40e->i40e_dip, 1403 "printed-board-assembly", (char *)pbanum); 1404 } 1405 1406 #ifdef DEBUG 1407 ASSERT(snprintf(NULL, 0, "%d.%d", hw->aq.fw_maj_ver, 1408 hw->aq.fw_min_ver) < sizeof (buf)); 1409 ASSERT(snprintf(NULL, 0, "%x", hw->aq.fw_build) < sizeof (buf)); 1410 ASSERT(snprintf(NULL, 0, "%d.%d", hw->aq.api_maj_ver, 1411 hw->aq.api_min_ver) < sizeof (buf)); 1412 #endif 1413 1414 (void) snprintf(buf, sizeof (buf), "%d.%d", hw->aq.fw_maj_ver, 1415 hw->aq.fw_min_ver); 1416 (void) ddi_prop_update_string(DDI_DEV_T_NONE, i40e->i40e_dip, 1417 "firmware-version", buf); 1418 (void) snprintf(buf, sizeof (buf), "%x", hw->aq.fw_build); 1419 (void) ddi_prop_update_string(DDI_DEV_T_NONE, i40e->i40e_dip, 1420 "firmware-build", buf); 1421 (void) snprintf(buf, sizeof (buf), "%d.%d", hw->aq.api_maj_ver, 1422 hw->aq.api_min_ver); 1423 (void) ddi_prop_update_string(DDI_DEV_T_NONE, i40e->i40e_dip, 1424 "api-version", buf); 1425 1426 if (!i40e_set_hw_bus_info(hw)) 1427 return (B_FALSE); 1428 1429 if (i40e_check_acc_handle(osdep->ios_reg_handle) != DDI_FM_OK) { 1430 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST); 1431 return (B_FALSE); 1432 } 1433 1434 return (B_TRUE); 1435 } 1436 1437 static void 1438 i40e_identify_hardware(i40e_t *i40e) 1439 { 1440 i40e_hw_t *hw = &i40e->i40e_hw_space; 1441 struct i40e_osdep *osdep = &i40e->i40e_osdep_space; 1442 1443 hw->vendor_id = pci_config_get16(osdep->ios_cfg_handle, PCI_CONF_VENID); 1444 hw->device_id = pci_config_get16(osdep->ios_cfg_handle, PCI_CONF_DEVID); 1445 hw->revision_id = pci_config_get8(osdep->ios_cfg_handle, 1446 PCI_CONF_REVID); 1447 hw->subsystem_device_id = 1448 pci_config_get16(osdep->ios_cfg_handle, PCI_CONF_SUBSYSID); 1449 hw->subsystem_vendor_id = 1450 pci_config_get16(osdep->ios_cfg_handle, PCI_CONF_SUBVENID); 1451 1452 /* 1453 * Note that we set the hardware's bus information later on, in 1454 * i40e_get_available_resources(). The common code doesn't seem to 1455 * require that it be set in any ways, it seems to be mostly for 1456 * book-keeping. 1457 */ 1458 } 1459 1460 static boolean_t 1461 i40e_regs_map(i40e_t *i40e) 1462 { 1463 dev_info_t *devinfo = i40e->i40e_dip; 1464 i40e_hw_t *hw = &i40e->i40e_hw_space; 1465 struct i40e_osdep *osdep = &i40e->i40e_osdep_space; 1466 off_t memsize; 1467 int ret; 1468 1469 if (ddi_dev_regsize(devinfo, I40E_ADAPTER_REGSET, &memsize) != 1470 DDI_SUCCESS) { 1471 i40e_error(i40e, "Used invalid register set to map PCIe regs"); 1472 return (B_FALSE); 1473 } 1474 1475 if ((ret = ddi_regs_map_setup(devinfo, I40E_ADAPTER_REGSET, 1476 (caddr_t *)&hw->hw_addr, 0, memsize, &i40e_regs_acc_attr, 1477 &osdep->ios_reg_handle)) != DDI_SUCCESS) { 1478 i40e_error(i40e, "failed to map device registers: %d", ret); 1479 return (B_FALSE); 1480 } 1481 1482 osdep->ios_reg_size = memsize; 1483 return (B_TRUE); 1484 } 1485 1486 /* 1487 * Update parameters required when a new MTU has been configured. Calculate the 1488 * maximum frame size, as well as, size our DMA buffers which we size in 1489 * increments of 1K. 1490 */ 1491 void 1492 i40e_update_mtu(i40e_t *i40e) 1493 { 1494 uint32_t rx, tx; 1495 1496 i40e->i40e_frame_max = i40e->i40e_sdu + 1497 sizeof (struct ether_vlan_header) + ETHERFCSL; 1498 1499 rx = i40e->i40e_frame_max + I40E_BUF_IPHDR_ALIGNMENT; 1500 i40e->i40e_rx_buf_size = ((rx >> 10) + 1501 ((rx & (((uint32_t)1 << 10) -1)) > 0 ? 1 : 0)) << 10; 1502 1503 tx = i40e->i40e_frame_max; 1504 i40e->i40e_tx_buf_size = ((tx >> 10) + 1505 ((tx & (((uint32_t)1 << 10) -1)) > 0 ? 1 : 0)) << 10; 1506 } 1507 1508 static int 1509 i40e_get_prop(i40e_t *i40e, char *prop, int min, int max, int def) 1510 { 1511 int val; 1512 1513 val = ddi_prop_get_int(DDI_DEV_T_ANY, i40e->i40e_dip, DDI_PROP_DONTPASS, 1514 prop, def); 1515 if (val > max) 1516 val = max; 1517 if (val < min) 1518 val = min; 1519 return (val); 1520 } 1521 1522 static void 1523 i40e_init_properties(i40e_t *i40e) 1524 { 1525 i40e->i40e_sdu = i40e_get_prop(i40e, "default_mtu", 1526 I40E_MIN_MTU, I40E_MAX_MTU, I40E_DEF_MTU); 1527 1528 i40e->i40e_intr_force = i40e_get_prop(i40e, "intr_force", 1529 I40E_INTR_NONE, I40E_INTR_LEGACY, I40E_INTR_NONE); 1530 1531 i40e->i40e_mr_enable = i40e_get_prop(i40e, "mr_enable", 1532 B_FALSE, B_TRUE, B_TRUE); 1533 1534 i40e->i40e_tx_ring_size = i40e_get_prop(i40e, "tx_ring_size", 1535 I40E_MIN_TX_RING_SIZE, I40E_MAX_TX_RING_SIZE, 1536 I40E_DEF_TX_RING_SIZE); 1537 if ((i40e->i40e_tx_ring_size % I40E_DESC_ALIGN) != 0) { 1538 i40e->i40e_tx_ring_size = P2ROUNDUP(i40e->i40e_tx_ring_size, 1539 I40E_DESC_ALIGN); 1540 } 1541 1542 i40e->i40e_tx_block_thresh = i40e_get_prop(i40e, "tx_resched_threshold", 1543 I40E_MIN_TX_BLOCK_THRESH, 1544 i40e->i40e_tx_ring_size - I40E_TX_MAX_COOKIE, 1545 I40E_DEF_TX_BLOCK_THRESH); 1546 1547 i40e->i40e_rx_ring_size = i40e_get_prop(i40e, "rx_ring_size", 1548 I40E_MIN_RX_RING_SIZE, I40E_MAX_RX_RING_SIZE, 1549 I40E_DEF_RX_RING_SIZE); 1550 if ((i40e->i40e_rx_ring_size % I40E_DESC_ALIGN) != 0) { 1551 i40e->i40e_rx_ring_size = P2ROUNDUP(i40e->i40e_rx_ring_size, 1552 I40E_DESC_ALIGN); 1553 } 1554 1555 i40e->i40e_rx_limit_per_intr = i40e_get_prop(i40e, "rx_limit_per_intr", 1556 I40E_MIN_RX_LIMIT_PER_INTR, I40E_MAX_RX_LIMIT_PER_INTR, 1557 I40E_DEF_RX_LIMIT_PER_INTR); 1558 1559 i40e->i40e_tx_hcksum_enable = i40e_get_prop(i40e, "tx_hcksum_enable", 1560 B_FALSE, B_TRUE, B_TRUE); 1561 1562 i40e->i40e_rx_hcksum_enable = i40e_get_prop(i40e, "rx_hcksum_enable", 1563 B_FALSE, B_TRUE, B_TRUE); 1564 1565 i40e->i40e_rx_dma_min = i40e_get_prop(i40e, "rx_dma_threshold", 1566 I40E_MIN_RX_DMA_THRESH, I40E_MAX_RX_DMA_THRESH, 1567 I40E_DEF_RX_DMA_THRESH); 1568 1569 i40e->i40e_tx_dma_min = i40e_get_prop(i40e, "tx_dma_threshold", 1570 I40E_MIN_TX_DMA_THRESH, I40E_MAX_TX_DMA_THRESH, 1571 I40E_DEF_TX_DMA_THRESH); 1572 1573 i40e->i40e_tx_itr = i40e_get_prop(i40e, "tx_intr_throttle", 1574 I40E_MIN_ITR, I40E_MAX_ITR, I40E_DEF_TX_ITR); 1575 1576 i40e->i40e_rx_itr = i40e_get_prop(i40e, "rx_intr_throttle", 1577 I40E_MIN_ITR, I40E_MAX_ITR, I40E_DEF_RX_ITR); 1578 1579 i40e->i40e_other_itr = i40e_get_prop(i40e, "other_intr_throttle", 1580 I40E_MIN_ITR, I40E_MAX_ITR, I40E_DEF_OTHER_ITR); 1581 1582 if (!i40e->i40e_mr_enable) { 1583 i40e->i40e_num_trqpairs = I40E_TRQPAIR_NOMSIX; 1584 i40e->i40e_num_rx_groups = I40E_GROUP_NOMSIX; 1585 } 1586 1587 i40e_update_mtu(i40e); 1588 } 1589 1590 /* 1591 * There are a few constraints on interrupts that we're currently imposing, some 1592 * of which are restrictions from hardware. For a fuller treatment, see 1593 * i40e_intr.c. 1594 * 1595 * Currently, to use MSI-X we require two interrupts be available though in 1596 * theory we should participate in IRM and happily use more interrupts. 1597 * 1598 * Hardware only supports a single MSI being programmed and therefore if we 1599 * don't have MSI-X interrupts available at this time, then we ratchet down the 1600 * number of rings and groups available. Obviously, we only bother with a single 1601 * fixed interrupt. 1602 */ 1603 static boolean_t 1604 i40e_alloc_intr_handles(i40e_t *i40e, dev_info_t *devinfo, int intr_type) 1605 { 1606 i40e_hw_t *hw = &i40e->i40e_hw_space; 1607 ddi_acc_handle_t rh = i40e->i40e_osdep_space.ios_reg_handle; 1608 int request, count, actual, rc, min; 1609 uint32_t reg; 1610 1611 switch (intr_type) { 1612 case DDI_INTR_TYPE_FIXED: 1613 case DDI_INTR_TYPE_MSI: 1614 request = 1; 1615 min = 1; 1616 break; 1617 case DDI_INTR_TYPE_MSIX: 1618 min = 2; 1619 if (!i40e->i40e_mr_enable) { 1620 request = 2; 1621 break; 1622 } 1623 reg = I40E_READ_REG(hw, I40E_GLPCI_CNF2); 1624 /* 1625 * Should this read fail, we will drop back to using 1626 * MSI or fixed interrupts. 1627 */ 1628 if (i40e_check_acc_handle(rh) != DDI_FM_OK) { 1629 ddi_fm_service_impact(i40e->i40e_dip, 1630 DDI_SERVICE_DEGRADED); 1631 return (B_FALSE); 1632 } 1633 request = (reg & I40E_GLPCI_CNF2_MSI_X_PF_N_MASK) >> 1634 I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT; 1635 request++; /* the register value is n - 1 */ 1636 break; 1637 default: 1638 panic("bad interrupt type passed to i40e_alloc_intr_handles: " 1639 "%d", intr_type); 1640 return (B_FALSE); 1641 } 1642 1643 rc = ddi_intr_get_nintrs(devinfo, intr_type, &count); 1644 if (rc != DDI_SUCCESS || count < min) { 1645 i40e_log(i40e, "Get interrupt number failed, " 1646 "returned %d, count %d", rc, count); 1647 return (B_FALSE); 1648 } 1649 1650 rc = ddi_intr_get_navail(devinfo, intr_type, &count); 1651 if (rc != DDI_SUCCESS || count < min) { 1652 i40e_log(i40e, "Get AVAILABLE interrupt number failed, " 1653 "returned %d, count %d", rc, count); 1654 return (B_FALSE); 1655 } 1656 1657 actual = 0; 1658 i40e->i40e_intr_count = 0; 1659 i40e->i40e_intr_count_max = 0; 1660 i40e->i40e_intr_count_min = 0; 1661 1662 i40e->i40e_intr_size = request * sizeof (ddi_intr_handle_t); 1663 ASSERT(i40e->i40e_intr_size != 0); 1664 i40e->i40e_intr_handles = kmem_alloc(i40e->i40e_intr_size, KM_SLEEP); 1665 1666 rc = ddi_intr_alloc(devinfo, i40e->i40e_intr_handles, intr_type, 0, 1667 min(request, count), &actual, DDI_INTR_ALLOC_NORMAL); 1668 if (rc != DDI_SUCCESS) { 1669 i40e_log(i40e, "Interrupt allocation failed with %d.", rc); 1670 goto alloc_handle_fail; 1671 } 1672 1673 i40e->i40e_intr_count = actual; 1674 i40e->i40e_intr_count_max = request; 1675 i40e->i40e_intr_count_min = min; 1676 1677 if (actual < min) { 1678 i40e_log(i40e, "actual (%d) is less than minimum (%d).", 1679 actual, min); 1680 goto alloc_handle_fail; 1681 } 1682 1683 /* 1684 * Record the priority and capabilities for our first vector. Once 1685 * we have it, that's our priority until detach time. Even if we 1686 * eventually participate in IRM, our priority shouldn't change. 1687 */ 1688 rc = ddi_intr_get_pri(i40e->i40e_intr_handles[0], &i40e->i40e_intr_pri); 1689 if (rc != DDI_SUCCESS) { 1690 i40e_log(i40e, 1691 "Getting interrupt priority failed with %d.", rc); 1692 goto alloc_handle_fail; 1693 } 1694 1695 rc = ddi_intr_get_cap(i40e->i40e_intr_handles[0], &i40e->i40e_intr_cap); 1696 if (rc != DDI_SUCCESS) { 1697 i40e_log(i40e, 1698 "Getting interrupt capabilities failed with %d.", rc); 1699 goto alloc_handle_fail; 1700 } 1701 1702 i40e->i40e_intr_type = intr_type; 1703 return (B_TRUE); 1704 1705 alloc_handle_fail: 1706 1707 i40e_rem_intrs(i40e); 1708 return (B_FALSE); 1709 } 1710 1711 static boolean_t 1712 i40e_alloc_intrs(i40e_t *i40e, dev_info_t *devinfo) 1713 { 1714 int intr_types, rc; 1715 uint_t max_trqpairs; 1716 1717 if (i40e_is_x722(i40e)) { 1718 max_trqpairs = I40E_722_MAX_TC_QUEUES; 1719 } else { 1720 max_trqpairs = I40E_710_MAX_TC_QUEUES; 1721 } 1722 1723 rc = ddi_intr_get_supported_types(devinfo, &intr_types); 1724 if (rc != DDI_SUCCESS) { 1725 i40e_error(i40e, "failed to get supported interrupt types: %d", 1726 rc); 1727 return (B_FALSE); 1728 } 1729 1730 i40e->i40e_intr_type = 0; 1731 1732 if ((intr_types & DDI_INTR_TYPE_MSIX) && 1733 i40e->i40e_intr_force <= I40E_INTR_MSIX) { 1734 if (i40e_alloc_intr_handles(i40e, devinfo, 1735 DDI_INTR_TYPE_MSIX)) { 1736 i40e->i40e_num_trqpairs = 1737 MIN(i40e->i40e_intr_count - 1, max_trqpairs); 1738 return (B_TRUE); 1739 } 1740 } 1741 1742 /* 1743 * We only use multiple transmit/receive pairs when MSI-X interrupts are 1744 * available due to the fact that the device basically only supports a 1745 * single MSI interrupt. 1746 */ 1747 i40e->i40e_num_trqpairs = I40E_TRQPAIR_NOMSIX; 1748 i40e->i40e_num_rx_groups = I40E_GROUP_NOMSIX; 1749 1750 if ((intr_types & DDI_INTR_TYPE_MSI) && 1751 (i40e->i40e_intr_force <= I40E_INTR_MSI)) { 1752 if (i40e_alloc_intr_handles(i40e, devinfo, DDI_INTR_TYPE_MSI)) 1753 return (B_TRUE); 1754 } 1755 1756 if (intr_types & DDI_INTR_TYPE_FIXED) { 1757 if (i40e_alloc_intr_handles(i40e, devinfo, DDI_INTR_TYPE_FIXED)) 1758 return (B_TRUE); 1759 } 1760 1761 return (B_FALSE); 1762 } 1763 1764 /* 1765 * Map different interrupts to MSI-X vectors. 1766 */ 1767 static boolean_t 1768 i40e_map_intrs_to_vectors(i40e_t *i40e) 1769 { 1770 int i; 1771 1772 if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) { 1773 return (B_TRUE); 1774 } 1775 1776 /* 1777 * Each queue pair is mapped to a single interrupt, so transmit 1778 * and receive interrupts for a given queue share the same vector. 1779 * The number of queue pairs is one less than the number of interrupt 1780 * vectors and is assigned the vector one higher than its index. 1781 * Vector zero is reserved for the admin queue. 1782 */ 1783 ASSERT(i40e->i40e_intr_count == i40e->i40e_num_trqpairs + 1); 1784 1785 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 1786 i40e->i40e_trqpairs[i].itrq_rx_intrvec = i + 1; 1787 i40e->i40e_trqpairs[i].itrq_tx_intrvec = i + 1; 1788 } 1789 1790 return (B_TRUE); 1791 } 1792 1793 static boolean_t 1794 i40e_add_intr_handlers(i40e_t *i40e) 1795 { 1796 int rc, vector; 1797 1798 switch (i40e->i40e_intr_type) { 1799 case DDI_INTR_TYPE_MSIX: 1800 for (vector = 0; vector < i40e->i40e_intr_count; vector++) { 1801 rc = ddi_intr_add_handler( 1802 i40e->i40e_intr_handles[vector], 1803 (ddi_intr_handler_t *)i40e_intr_msix, i40e, 1804 (void *)(uintptr_t)vector); 1805 if (rc != DDI_SUCCESS) { 1806 i40e_log(i40e, "Add interrupt handler (MSI-X) " 1807 "failed: return %d, vector %d", rc, vector); 1808 for (vector--; vector >= 0; vector--) { 1809 (void) ddi_intr_remove_handler( 1810 i40e->i40e_intr_handles[vector]); 1811 } 1812 return (B_FALSE); 1813 } 1814 } 1815 break; 1816 case DDI_INTR_TYPE_MSI: 1817 rc = ddi_intr_add_handler(i40e->i40e_intr_handles[0], 1818 (ddi_intr_handler_t *)i40e_intr_msi, i40e, NULL); 1819 if (rc != DDI_SUCCESS) { 1820 i40e_log(i40e, "Add interrupt handler (MSI) failed: " 1821 "return %d", rc); 1822 return (B_FALSE); 1823 } 1824 break; 1825 case DDI_INTR_TYPE_FIXED: 1826 rc = ddi_intr_add_handler(i40e->i40e_intr_handles[0], 1827 (ddi_intr_handler_t *)i40e_intr_legacy, i40e, NULL); 1828 if (rc != DDI_SUCCESS) { 1829 i40e_log(i40e, "Add interrupt handler (legacy) failed:" 1830 " return %d", rc); 1831 return (B_FALSE); 1832 } 1833 break; 1834 default: 1835 /* Cast to pacify lint */ 1836 panic("i40e_intr_type %p contains an unknown type: %d", 1837 (void *)i40e, i40e->i40e_intr_type); 1838 } 1839 1840 return (B_TRUE); 1841 } 1842 1843 /* 1844 * Perform periodic checks. Longer term, we should be thinking about additional 1845 * things here: 1846 * 1847 * o Stall Detection 1848 * o Temperature sensor detection 1849 * o Device resetting 1850 * o Statistics updating to avoid wraparound 1851 */ 1852 static void 1853 i40e_timer(void *arg) 1854 { 1855 i40e_t *i40e = arg; 1856 1857 mutex_enter(&i40e->i40e_general_lock); 1858 i40e_link_check(i40e); 1859 mutex_exit(&i40e->i40e_general_lock); 1860 } 1861 1862 /* 1863 * Get the hardware state, and scribble away anything that needs scribbling. 1864 */ 1865 static void 1866 i40e_get_hw_state(i40e_t *i40e, i40e_hw_t *hw) 1867 { 1868 int rc; 1869 1870 ASSERT(MUTEX_HELD(&i40e->i40e_general_lock)); 1871 1872 (void) i40e_aq_get_link_info(hw, TRUE, NULL, NULL); 1873 i40e_link_check(i40e); 1874 1875 /* 1876 * Try and determine our PHY. Note that we may have to retry to and 1877 * delay to detect fiber correctly. 1878 */ 1879 rc = i40e_aq_get_phy_capabilities(hw, B_FALSE, B_TRUE, &i40e->i40e_phy, 1880 NULL); 1881 if (rc == I40E_ERR_UNKNOWN_PHY) { 1882 i40e_msec_delay(200); 1883 rc = i40e_aq_get_phy_capabilities(hw, B_FALSE, B_TRUE, 1884 &i40e->i40e_phy, NULL); 1885 } 1886 1887 if (rc != I40E_SUCCESS) { 1888 if (rc == I40E_ERR_UNKNOWN_PHY) { 1889 i40e_error(i40e, "encountered unknown PHY type, " 1890 "not attaching."); 1891 } else { 1892 i40e_error(i40e, "error getting physical capabilities: " 1893 "%d, %d", rc, hw->aq.asq_last_status); 1894 } 1895 } 1896 1897 rc = i40e_update_link_info(hw); 1898 if (rc != I40E_SUCCESS) { 1899 i40e_error(i40e, "failed to update link information: %d", rc); 1900 } 1901 1902 /* 1903 * In general, we don't want to mask off (as in stop from being a cause) 1904 * any of the interrupts that the phy might be able to generate. 1905 */ 1906 rc = i40e_aq_set_phy_int_mask(hw, 0, NULL); 1907 if (rc != I40E_SUCCESS) { 1908 i40e_error(i40e, "failed to update phy link mask: %d", rc); 1909 } 1910 } 1911 1912 /* 1913 * Go through and re-initialize any existing filters that we may have set up for 1914 * this device. Note that we would only expect them to exist if hardware had 1915 * already been initialized and we had just reset it. While we're not 1916 * implementing this yet, we're keeping this around for when we add reset 1917 * capabilities, so this isn't forgotten. 1918 */ 1919 /* ARGSUSED */ 1920 static void 1921 i40e_init_macaddrs(i40e_t *i40e, i40e_hw_t *hw) 1922 { 1923 } 1924 1925 /* 1926 * Configure the hardware for the Virtual Station Interface (VSI). Currently 1927 * we only support one, but in the future we could instantiate more than one 1928 * per attach-point. 1929 */ 1930 static boolean_t 1931 i40e_config_vsi(i40e_t *i40e, i40e_hw_t *hw) 1932 { 1933 struct i40e_vsi_context context; 1934 int err, tc_queues; 1935 1936 bzero(&context, sizeof (struct i40e_vsi_context)); 1937 context.seid = i40e->i40e_vsi_id; 1938 context.pf_num = hw->pf_id; 1939 err = i40e_aq_get_vsi_params(hw, &context, NULL); 1940 if (err != I40E_SUCCESS) { 1941 i40e_error(i40e, "get VSI params failed with %d", err); 1942 return (B_FALSE); 1943 } 1944 1945 i40e->i40e_vsi_num = context.vsi_number; 1946 1947 /* 1948 * Set the queue and traffic class bits. Keep it simple for now. 1949 */ 1950 context.info.valid_sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; 1951 context.info.mapping_flags = I40E_AQ_VSI_QUE_MAP_CONTIG; 1952 context.info.queue_mapping[0] = I40E_ASSIGN_ALL_QUEUES; 1953 1954 /* 1955 * tc_queues determines the size of the traffic class, where the size is 1956 * 2^^tc_queues to a maximum of 64 for the X710 and 128 for the X722. 1957 * 1958 * Some examples: 1959 * i40e_num_trqpairs == 1 => tc_queues = 0, 2^^0 = 1. 1960 * i40e_num_trqpairs == 7 => tc_queues = 3, 2^^3 = 8. 1961 * i40e_num_trqpairs == 8 => tc_queues = 3, 2^^3 = 8. 1962 * i40e_num_trqpairs == 9 => tc_queues = 4, 2^^4 = 16. 1963 * i40e_num_trqpairs == 17 => tc_queues = 5, 2^^5 = 32. 1964 * i40e_num_trqpairs == 64 => tc_queues = 6, 2^^6 = 64. 1965 */ 1966 tc_queues = ddi_fls(i40e->i40e_num_trqpairs - 1); 1967 1968 context.info.tc_mapping[0] = ((0 << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) & 1969 I40E_AQ_VSI_TC_QUE_OFFSET_MASK) | 1970 ((tc_queues << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) & 1971 I40E_AQ_VSI_TC_QUE_NUMBER_MASK); 1972 1973 context.info.valid_sections |= I40E_AQ_VSI_PROP_VLAN_VALID; 1974 context.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL | 1975 I40E_AQ_VSI_PVLAN_EMOD_NOTHING; 1976 1977 context.flags = LE16_TO_CPU(I40E_AQ_VSI_TYPE_PF); 1978 1979 i40e->i40e_vsi_stat_id = LE16_TO_CPU(context.info.stat_counter_idx); 1980 if (i40e_stat_vsi_init(i40e) == B_FALSE) 1981 return (B_FALSE); 1982 1983 err = i40e_aq_update_vsi_params(hw, &context, NULL); 1984 if (err != I40E_SUCCESS) { 1985 i40e_error(i40e, "Update VSI params failed with %d", err); 1986 return (B_FALSE); 1987 } 1988 1989 1990 return (B_TRUE); 1991 } 1992 1993 /* 1994 * Configure the RSS key. For the X710 controller family, this is set on a 1995 * per-PF basis via registers. For the X722, this is done on a per-VSI basis 1996 * through the admin queue. 1997 */ 1998 static boolean_t 1999 i40e_config_rss_key(i40e_t *i40e, i40e_hw_t *hw) 2000 { 2001 uint32_t seed[I40E_PFQF_HKEY_MAX_INDEX + 1]; 2002 2003 (void) random_get_pseudo_bytes((uint8_t *)seed, sizeof (seed)); 2004 2005 if (i40e_is_x722(i40e)) { 2006 struct i40e_aqc_get_set_rss_key_data key; 2007 const char *u8seed = (char *)seed; 2008 enum i40e_status_code status; 2009 2010 CTASSERT(sizeof (key) >= (sizeof (key.standard_rss_key) + 2011 sizeof (key.extended_hash_key))); 2012 2013 bcopy(u8seed, key.standard_rss_key, 2014 sizeof (key.standard_rss_key)); 2015 bcopy(&u8seed[sizeof (key.standard_rss_key)], 2016 key.extended_hash_key, sizeof (key.extended_hash_key)); 2017 2018 status = i40e_aq_set_rss_key(hw, i40e->i40e_vsi_num, &key); 2019 if (status != I40E_SUCCESS) { 2020 i40e_error(i40e, "failed to set rss key: %d", status); 2021 return (B_FALSE); 2022 } 2023 } else { 2024 uint_t i; 2025 for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) 2026 i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), seed[i]); 2027 } 2028 2029 return (B_TRUE); 2030 } 2031 2032 /* 2033 * Populate the LUT. The size of each entry in the LUT depends on the controller 2034 * family, with the X722 using a known 7-bit width. On the X710 controller, this 2035 * is programmed through its control registers where as on the X722 this is 2036 * configured through the admin queue. Also of note, the X722 allows the LUT to 2037 * be set on a per-PF or VSI basis. At this time, as we only have a single VSI, 2038 * we use the PF setting as it is the primary VSI. 2039 * 2040 * We populate the LUT in a round robin fashion with the rx queue indices from 0 2041 * to i40e_num_trqpairs - 1. 2042 */ 2043 static boolean_t 2044 i40e_config_rss_hlut(i40e_t *i40e, i40e_hw_t *hw) 2045 { 2046 uint32_t *hlut; 2047 uint8_t lut_mask; 2048 uint_t i; 2049 boolean_t ret = B_FALSE; 2050 2051 /* 2052 * We always configure the PF with a table size of 512 bytes in 2053 * i40e_chip_start(). 2054 */ 2055 hlut = kmem_alloc(I40E_HLUT_TABLE_SIZE, KM_NOSLEEP); 2056 if (hlut == NULL) { 2057 i40e_error(i40e, "i40e_config_rss() buffer allocation failed"); 2058 return (B_FALSE); 2059 } 2060 2061 /* 2062 * The width of the X722 is apparently defined to be 7 bits, regardless 2063 * of the capability. 2064 */ 2065 if (i40e_is_x722(i40e)) { 2066 lut_mask = (1 << 7) - 1; 2067 } else { 2068 lut_mask = (1 << hw->func_caps.rss_table_entry_width) - 1; 2069 } 2070 2071 for (i = 0; i < I40E_HLUT_TABLE_SIZE; i++) 2072 ((uint8_t *)hlut)[i] = (i % i40e->i40e_num_trqpairs) & lut_mask; 2073 2074 if (i40e_is_x722(i40e)) { 2075 enum i40e_status_code status; 2076 status = i40e_aq_set_rss_lut(hw, i40e->i40e_vsi_num, B_TRUE, 2077 (uint8_t *)hlut, I40E_HLUT_TABLE_SIZE); 2078 if (status != I40E_SUCCESS) { 2079 i40e_error(i40e, "failed to set RSS LUT: %d", status); 2080 goto out; 2081 } 2082 } else { 2083 for (i = 0; i < I40E_HLUT_TABLE_SIZE >> 2; i++) { 2084 I40E_WRITE_REG(hw, I40E_PFQF_HLUT(i), hlut[i]); 2085 } 2086 } 2087 ret = B_TRUE; 2088 out: 2089 kmem_free(hlut, I40E_HLUT_TABLE_SIZE); 2090 return (ret); 2091 } 2092 2093 /* 2094 * Set up RSS. 2095 * 1. Seed the hash key. 2096 * 2. Enable PCTYPEs for the hash filter. 2097 * 3. Populate the LUT. 2098 */ 2099 static boolean_t 2100 i40e_config_rss(i40e_t *i40e, i40e_hw_t *hw) 2101 { 2102 uint64_t hena; 2103 2104 /* 2105 * 1. Seed the hash key 2106 */ 2107 if (!i40e_config_rss_key(i40e, hw)) 2108 return (B_FALSE); 2109 2110 /* 2111 * 2. Configure PCTYPES 2112 */ 2113 hena = (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | 2114 (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | 2115 (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | 2116 (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | 2117 (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4) | 2118 (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | 2119 (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | 2120 (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | 2121 (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | 2122 (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6) | 2123 (1ULL << I40E_FILTER_PCTYPE_L2_PAYLOAD); 2124 2125 /* 2126 * Add additional types supported by the X722 controller. 2127 */ 2128 if (i40e_is_x722(i40e)) { 2129 hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | 2130 (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | 2131 (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | 2132 (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | 2133 (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) | 2134 (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); 2135 } 2136 2137 i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (uint32_t)hena); 2138 i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (uint32_t)(hena >> 32)); 2139 2140 /* 2141 * 3. Populate LUT 2142 */ 2143 return (i40e_config_rss_hlut(i40e, hw)); 2144 } 2145 2146 /* 2147 * Wrapper to kick the chipset on. 2148 */ 2149 static boolean_t 2150 i40e_chip_start(i40e_t *i40e) 2151 { 2152 i40e_hw_t *hw = &i40e->i40e_hw_space; 2153 struct i40e_filter_control_settings filter; 2154 int rc; 2155 2156 if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || 2157 (hw->aq.fw_maj_ver < 4)) { 2158 i40e_msec_delay(75); 2159 if (i40e_aq_set_link_restart_an(hw, TRUE, NULL) != 2160 I40E_SUCCESS) { 2161 i40e_error(i40e, "failed to restart link: admin queue " 2162 "error: %d", hw->aq.asq_last_status); 2163 return (B_FALSE); 2164 } 2165 } 2166 2167 /* Determine hardware state */ 2168 i40e_get_hw_state(i40e, hw); 2169 2170 /* Initialize mac addresses. */ 2171 i40e_init_macaddrs(i40e, hw); 2172 2173 /* 2174 * Set up the filter control. If the hash lut size is changed from 2175 * I40E_HASH_LUT_SIZE_512 then I40E_HLUT_TABLE_SIZE and 2176 * i40e_config_rss_hlut() will need to be updated. 2177 */ 2178 bzero(&filter, sizeof (filter)); 2179 filter.enable_ethtype = TRUE; 2180 filter.enable_macvlan = TRUE; 2181 filter.hash_lut_size = I40E_HASH_LUT_SIZE_512; 2182 2183 rc = i40e_set_filter_control(hw, &filter); 2184 if (rc != I40E_SUCCESS) { 2185 i40e_error(i40e, "i40e_set_filter_control() returned %d", rc); 2186 return (B_FALSE); 2187 } 2188 2189 i40e_intr_chip_init(i40e); 2190 2191 if (!i40e_config_vsi(i40e, hw)) 2192 return (B_FALSE); 2193 2194 if (!i40e_config_rss(i40e, hw)) 2195 return (B_FALSE); 2196 2197 i40e_flush(hw); 2198 2199 return (B_TRUE); 2200 } 2201 2202 /* 2203 * Take care of tearing down the rx ring. See 8.3.3.1.2 for more information. 2204 */ 2205 static void 2206 i40e_shutdown_rx_rings(i40e_t *i40e) 2207 { 2208 int i; 2209 uint32_t reg; 2210 2211 i40e_hw_t *hw = &i40e->i40e_hw_space; 2212 2213 /* 2214 * Step 1. The interrupt linked list (see i40e_intr.c for more 2215 * information) should have already been cleared before calling this 2216 * function. 2217 */ 2218 #ifdef DEBUG 2219 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 2220 for (i = 1; i < i40e->i40e_intr_count; i++) { 2221 reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i - 1)); 2222 VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL); 2223 } 2224 } else { 2225 reg = I40E_READ_REG(hw, I40E_PFINT_LNKLST0); 2226 VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL); 2227 } 2228 2229 #endif /* DEBUG */ 2230 2231 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2232 /* 2233 * Step 1. Request the queue by clearing QENA_REQ. It may not be 2234 * set due to unwinding from failures and a partially enabled 2235 * ring set. 2236 */ 2237 reg = I40E_READ_REG(hw, I40E_QRX_ENA(i)); 2238 if (!(reg & I40E_QRX_ENA_QENA_REQ_MASK)) 2239 continue; 2240 VERIFY((reg & I40E_QRX_ENA_QENA_REQ_MASK) == 2241 I40E_QRX_ENA_QENA_REQ_MASK); 2242 reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; 2243 I40E_WRITE_REG(hw, I40E_QRX_ENA(i), reg); 2244 } 2245 2246 /* 2247 * Step 2. Wait for the disable to take, by having QENA_STAT in the FPM 2248 * be cleared. Note that we could still receive data in the queue during 2249 * this time. We don't actually wait for this now and instead defer this 2250 * to i40e_shutdown_rings_wait(), after we've interleaved disabling the 2251 * TX queues as well. 2252 */ 2253 } 2254 2255 static void 2256 i40e_shutdown_tx_rings(i40e_t *i40e) 2257 { 2258 int i; 2259 uint32_t reg; 2260 2261 i40e_hw_t *hw = &i40e->i40e_hw_space; 2262 2263 /* 2264 * Step 1. The interrupt linked list should already have been cleared. 2265 */ 2266 #ifdef DEBUG 2267 if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) { 2268 for (i = 1; i < i40e->i40e_intr_count; i++) { 2269 reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i - 1)); 2270 VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL); 2271 } 2272 } else { 2273 reg = I40E_READ_REG(hw, I40E_PFINT_LNKLST0); 2274 VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL); 2275 2276 } 2277 #endif /* DEBUG */ 2278 2279 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2280 /* 2281 * Step 2. Set the SET_QDIS flag for every queue. 2282 */ 2283 i40e_pre_tx_queue_cfg(hw, i, B_FALSE); 2284 } 2285 2286 /* 2287 * Step 3. Wait at least 400 usec (can be done once for all queues). 2288 */ 2289 drv_usecwait(500); 2290 2291 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2292 /* 2293 * Step 4. Clear the QENA_REQ flag which tells hardware to 2294 * quiesce. If QENA_REQ is not already set then that means that 2295 * we likely already tried to disable this queue. 2296 */ 2297 reg = I40E_READ_REG(hw, I40E_QTX_ENA(i)); 2298 if (!(reg & I40E_QTX_ENA_QENA_REQ_MASK)) 2299 continue; 2300 reg &= ~I40E_QTX_ENA_QENA_REQ_MASK; 2301 I40E_WRITE_REG(hw, I40E_QTX_ENA(i), reg); 2302 } 2303 2304 /* 2305 * Step 5. Wait for all drains to finish. This will be done by the 2306 * hardware removing the QENA_STAT flag from the queue. Rather than 2307 * waiting here, we interleave it with all the others in 2308 * i40e_shutdown_rings_wait(). 2309 */ 2310 } 2311 2312 /* 2313 * Wait for all the rings to be shut down. e.g. Steps 2 and 5 from the above 2314 * functions. 2315 */ 2316 static boolean_t 2317 i40e_shutdown_rings_wait(i40e_t *i40e) 2318 { 2319 int i, try; 2320 i40e_hw_t *hw = &i40e->i40e_hw_space; 2321 2322 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2323 uint32_t reg; 2324 2325 for (try = 0; try < I40E_RING_WAIT_NTRIES; try++) { 2326 reg = I40E_READ_REG(hw, I40E_QRX_ENA(i)); 2327 if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) 2328 break; 2329 i40e_msec_delay(I40E_RING_WAIT_PAUSE); 2330 } 2331 2332 if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) != 0) { 2333 i40e_error(i40e, "timed out disabling rx queue %d", 2334 i); 2335 return (B_FALSE); 2336 } 2337 2338 for (try = 0; try < I40E_RING_WAIT_NTRIES; try++) { 2339 reg = I40E_READ_REG(hw, I40E_QTX_ENA(i)); 2340 if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) 2341 break; 2342 i40e_msec_delay(I40E_RING_WAIT_PAUSE); 2343 } 2344 2345 if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) != 0) { 2346 i40e_error(i40e, "timed out disabling tx queue %d", 2347 i); 2348 return (B_FALSE); 2349 } 2350 } 2351 2352 return (B_TRUE); 2353 } 2354 2355 static boolean_t 2356 i40e_shutdown_rings(i40e_t *i40e) 2357 { 2358 i40e_shutdown_rx_rings(i40e); 2359 i40e_shutdown_tx_rings(i40e); 2360 return (i40e_shutdown_rings_wait(i40e)); 2361 } 2362 2363 static void 2364 i40e_setup_rx_descs(i40e_trqpair_t *itrq) 2365 { 2366 int i; 2367 i40e_rx_data_t *rxd = itrq->itrq_rxdata; 2368 2369 for (i = 0; i < rxd->rxd_ring_size; i++) { 2370 i40e_rx_control_block_t *rcb; 2371 i40e_rx_desc_t *rdesc; 2372 2373 rcb = rxd->rxd_work_list[i]; 2374 rdesc = &rxd->rxd_desc_ring[i]; 2375 2376 rdesc->read.pkt_addr = 2377 CPU_TO_LE64((uintptr_t)rcb->rcb_dma.dmab_dma_address); 2378 rdesc->read.hdr_addr = 0; 2379 } 2380 } 2381 2382 static boolean_t 2383 i40e_setup_rx_hmc(i40e_trqpair_t *itrq) 2384 { 2385 i40e_rx_data_t *rxd = itrq->itrq_rxdata; 2386 i40e_t *i40e = itrq->itrq_i40e; 2387 i40e_hw_t *hw = &i40e->i40e_hw_space; 2388 2389 struct i40e_hmc_obj_rxq rctx; 2390 int err; 2391 2392 bzero(&rctx, sizeof (struct i40e_hmc_obj_rxq)); 2393 rctx.base = rxd->rxd_desc_area.dmab_dma_address / 2394 I40E_HMC_RX_CTX_UNIT; 2395 rctx.qlen = rxd->rxd_ring_size; 2396 VERIFY(i40e->i40e_rx_buf_size >= I40E_HMC_RX_DBUFF_MIN); 2397 VERIFY(i40e->i40e_rx_buf_size <= I40E_HMC_RX_DBUFF_MAX); 2398 rctx.dbuff = i40e->i40e_rx_buf_size >> I40E_RXQ_CTX_DBUFF_SHIFT; 2399 rctx.hbuff = 0 >> I40E_RXQ_CTX_HBUFF_SHIFT; 2400 rctx.dtype = I40E_HMC_RX_DTYPE_NOSPLIT; 2401 rctx.dsize = I40E_HMC_RX_DSIZE_32BYTE; 2402 rctx.crcstrip = I40E_HMC_RX_CRCSTRIP_ENABLE; 2403 rctx.fc_ena = I40E_HMC_RX_FC_DISABLE; 2404 rctx.l2tsel = I40E_HMC_RX_L2TAGORDER; 2405 rctx.hsplit_0 = I40E_HMC_RX_HDRSPLIT_DISABLE; 2406 rctx.hsplit_1 = I40E_HMC_RX_HDRSPLIT_DISABLE; 2407 rctx.showiv = I40E_HMC_RX_INVLAN_DONTSTRIP; 2408 rctx.rxmax = i40e->i40e_frame_max; 2409 rctx.tphrdesc_ena = I40E_HMC_RX_TPH_DISABLE; 2410 rctx.tphwdesc_ena = I40E_HMC_RX_TPH_DISABLE; 2411 rctx.tphdata_ena = I40E_HMC_RX_TPH_DISABLE; 2412 rctx.tphhead_ena = I40E_HMC_RX_TPH_DISABLE; 2413 rctx.lrxqthresh = I40E_HMC_RX_LOWRXQ_NOINTR; 2414 2415 /* 2416 * This must be set to 0x1, see Table 8-12 in section 8.3.3.2.2. 2417 */ 2418 rctx.prefena = I40E_HMC_RX_PREFENA; 2419 2420 err = i40e_clear_lan_rx_queue_context(hw, itrq->itrq_index); 2421 if (err != I40E_SUCCESS) { 2422 i40e_error(i40e, "failed to clear rx queue %d context: %d", 2423 itrq->itrq_index, err); 2424 return (B_FALSE); 2425 } 2426 2427 err = i40e_set_lan_rx_queue_context(hw, itrq->itrq_index, &rctx); 2428 if (err != I40E_SUCCESS) { 2429 i40e_error(i40e, "failed to set rx queue %d context: %d", 2430 itrq->itrq_index, err); 2431 return (B_FALSE); 2432 } 2433 2434 return (B_TRUE); 2435 } 2436 2437 /* 2438 * Take care of setting up the descriptor rings and actually programming the 2439 * device. See 8.3.3.1.1 for the full list of steps we need to do to enable the 2440 * rx rings. 2441 */ 2442 static boolean_t 2443 i40e_setup_rx_rings(i40e_t *i40e) 2444 { 2445 int i; 2446 i40e_hw_t *hw = &i40e->i40e_hw_space; 2447 2448 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2449 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i]; 2450 i40e_rx_data_t *rxd = itrq->itrq_rxdata; 2451 uint32_t reg; 2452 2453 /* 2454 * Step 1. Program all receive ring descriptors. 2455 */ 2456 i40e_setup_rx_descs(itrq); 2457 2458 /* 2459 * Step 2. Program the queue's FPM/HMC context. 2460 */ 2461 if (i40e_setup_rx_hmc(itrq) == B_FALSE) 2462 return (B_FALSE); 2463 2464 /* 2465 * Step 3. Clear the queue's tail pointer and set it to the end 2466 * of the space. 2467 */ 2468 I40E_WRITE_REG(hw, I40E_QRX_TAIL(i), 0); 2469 I40E_WRITE_REG(hw, I40E_QRX_TAIL(i), rxd->rxd_ring_size - 1); 2470 2471 /* 2472 * Step 4. Enable the queue via the QENA_REQ. 2473 */ 2474 reg = I40E_READ_REG(hw, I40E_QRX_ENA(i)); 2475 VERIFY0(reg & (I40E_QRX_ENA_QENA_REQ_MASK | 2476 I40E_QRX_ENA_QENA_STAT_MASK)); 2477 reg |= I40E_QRX_ENA_QENA_REQ_MASK; 2478 I40E_WRITE_REG(hw, I40E_QRX_ENA(i), reg); 2479 } 2480 2481 /* 2482 * Note, we wait for every queue to be enabled before we start checking. 2483 * This will hopefully cause most queues to be enabled at this point. 2484 */ 2485 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2486 uint32_t j, reg; 2487 2488 /* 2489 * Step 5. Verify that QENA_STAT has been set. It's promised 2490 * that this should occur within about 10 us, but like other 2491 * systems, we give the card a bit more time. 2492 */ 2493 for (j = 0; j < I40E_RING_WAIT_NTRIES; j++) { 2494 reg = I40E_READ_REG(hw, I40E_QRX_ENA(i)); 2495 2496 if (reg & I40E_QRX_ENA_QENA_STAT_MASK) 2497 break; 2498 i40e_msec_delay(I40E_RING_WAIT_PAUSE); 2499 } 2500 2501 if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) { 2502 i40e_error(i40e, "failed to enable rx queue %d, timed " 2503 "out.", i); 2504 return (B_FALSE); 2505 } 2506 } 2507 2508 return (B_TRUE); 2509 } 2510 2511 static boolean_t 2512 i40e_setup_tx_hmc(i40e_trqpair_t *itrq) 2513 { 2514 i40e_t *i40e = itrq->itrq_i40e; 2515 i40e_hw_t *hw = &i40e->i40e_hw_space; 2516 2517 struct i40e_hmc_obj_txq tctx; 2518 struct i40e_vsi_context context; 2519 int err; 2520 2521 bzero(&tctx, sizeof (struct i40e_hmc_obj_txq)); 2522 tctx.new_context = I40E_HMC_TX_NEW_CONTEXT; 2523 tctx.base = itrq->itrq_desc_area.dmab_dma_address / 2524 I40E_HMC_TX_CTX_UNIT; 2525 tctx.fc_ena = I40E_HMC_TX_FC_DISABLE; 2526 tctx.timesync_ena = I40E_HMC_TX_TS_DISABLE; 2527 tctx.fd_ena = I40E_HMC_TX_FD_DISABLE; 2528 tctx.alt_vlan_ena = I40E_HMC_TX_ALT_VLAN_DISABLE; 2529 tctx.head_wb_ena = I40E_HMC_TX_WB_ENABLE; 2530 tctx.qlen = itrq->itrq_tx_ring_size; 2531 tctx.tphrdesc_ena = I40E_HMC_TX_TPH_DISABLE; 2532 tctx.tphrpacket_ena = I40E_HMC_TX_TPH_DISABLE; 2533 tctx.tphwdesc_ena = I40E_HMC_TX_TPH_DISABLE; 2534 tctx.head_wb_addr = itrq->itrq_desc_area.dmab_dma_address + 2535 sizeof (i40e_tx_desc_t) * itrq->itrq_tx_ring_size; 2536 2537 /* 2538 * This field isn't actually documented, like crc, but it suggests that 2539 * it should be zeroed. We leave both of these here because of that for 2540 * now. We should check with Intel on why these are here even. 2541 */ 2542 tctx.crc = 0; 2543 tctx.rdylist_act = 0; 2544 2545 /* 2546 * We're supposed to assign the rdylist field with the value of the 2547 * traffic class index for the first device. We query the VSI parameters 2548 * again to get what the handle is. Note that every queue is always 2549 * assigned to traffic class zero, because we don't actually use them. 2550 */ 2551 bzero(&context, sizeof (struct i40e_vsi_context)); 2552 context.seid = i40e->i40e_vsi_id; 2553 context.pf_num = hw->pf_id; 2554 err = i40e_aq_get_vsi_params(hw, &context, NULL); 2555 if (err != I40E_SUCCESS) { 2556 i40e_error(i40e, "get VSI params failed with %d", err); 2557 return (B_FALSE); 2558 } 2559 tctx.rdylist = LE_16(context.info.qs_handle[0]); 2560 2561 err = i40e_clear_lan_tx_queue_context(hw, itrq->itrq_index); 2562 if (err != I40E_SUCCESS) { 2563 i40e_error(i40e, "failed to clear tx queue %d context: %d", 2564 itrq->itrq_index, err); 2565 return (B_FALSE); 2566 } 2567 2568 err = i40e_set_lan_tx_queue_context(hw, itrq->itrq_index, &tctx); 2569 if (err != I40E_SUCCESS) { 2570 i40e_error(i40e, "failed to set tx queue %d context: %d", 2571 itrq->itrq_index, err); 2572 return (B_FALSE); 2573 } 2574 2575 return (B_TRUE); 2576 } 2577 2578 /* 2579 * Take care of setting up the descriptor rings and actually programming the 2580 * device. See 8.4.3.1.1 for what we need to do here. 2581 */ 2582 static boolean_t 2583 i40e_setup_tx_rings(i40e_t *i40e) 2584 { 2585 int i; 2586 i40e_hw_t *hw = &i40e->i40e_hw_space; 2587 2588 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2589 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i]; 2590 uint32_t reg; 2591 2592 /* 2593 * Step 1. Clear the queue disable flag and verify that the 2594 * index is set correctly. 2595 */ 2596 i40e_pre_tx_queue_cfg(hw, i, B_TRUE); 2597 2598 /* 2599 * Step 2. Prepare the queue's FPM/HMC context. 2600 */ 2601 if (i40e_setup_tx_hmc(itrq) == B_FALSE) 2602 return (B_FALSE); 2603 2604 /* 2605 * Step 3. Verify that it's clear that this PF owns this queue. 2606 */ 2607 reg = I40E_QTX_CTL_PF_QUEUE; 2608 reg |= (hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & 2609 I40E_QTX_CTL_PF_INDX_MASK; 2610 I40E_WRITE_REG(hw, I40E_QTX_CTL(itrq->itrq_index), reg); 2611 i40e_flush(hw); 2612 2613 /* 2614 * Step 4. Set the QENA_REQ flag. 2615 */ 2616 reg = I40E_READ_REG(hw, I40E_QTX_ENA(i)); 2617 VERIFY0(reg & (I40E_QTX_ENA_QENA_REQ_MASK | 2618 I40E_QTX_ENA_QENA_STAT_MASK)); 2619 reg |= I40E_QTX_ENA_QENA_REQ_MASK; 2620 I40E_WRITE_REG(hw, I40E_QTX_ENA(i), reg); 2621 } 2622 2623 /* 2624 * Note, we wait for every queue to be enabled before we start checking. 2625 * This will hopefully cause most queues to be enabled at this point. 2626 */ 2627 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2628 uint32_t j, reg; 2629 2630 /* 2631 * Step 5. Verify that QENA_STAT has been set. It's promised 2632 * that this should occur within about 10 us, but like BSD, 2633 * we'll try for up to 100 ms for this queue. 2634 */ 2635 for (j = 0; j < I40E_RING_WAIT_NTRIES; j++) { 2636 reg = I40E_READ_REG(hw, I40E_QTX_ENA(i)); 2637 2638 if (reg & I40E_QTX_ENA_QENA_STAT_MASK) 2639 break; 2640 i40e_msec_delay(I40E_RING_WAIT_PAUSE); 2641 } 2642 2643 if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) { 2644 i40e_error(i40e, "failed to enable tx queue %d, timed " 2645 "out", i); 2646 return (B_FALSE); 2647 } 2648 } 2649 2650 return (B_TRUE); 2651 } 2652 2653 void 2654 i40e_stop(i40e_t *i40e, boolean_t free_allocations) 2655 { 2656 int i; 2657 2658 ASSERT(MUTEX_HELD(&i40e->i40e_general_lock)); 2659 2660 /* 2661 * Shutdown and drain the tx and rx pipeline. We do this using the 2662 * following steps. 2663 * 2664 * 1) Shutdown interrupts to all the queues (trying to keep the admin 2665 * queue alive). 2666 * 2667 * 2) Remove all of the interrupt tx and rx causes by setting the 2668 * interrupt linked lists to zero. 2669 * 2670 * 2) Shutdown the tx and rx rings. Because i40e_shutdown_rings() should 2671 * wait for all the queues to be disabled, once we reach that point 2672 * it should be safe to free associated data. 2673 * 2674 * 4) Wait 50ms after all that is done. This ensures that the rings are 2675 * ready for programming again and we don't have to think about this 2676 * in other parts of the driver. 2677 * 2678 * 5) Disable remaining chip interrupts, (admin queue, etc.) 2679 * 2680 * 6) Verify that FM is happy with all the register accesses we 2681 * performed. 2682 */ 2683 i40e_intr_io_disable_all(i40e); 2684 i40e_intr_io_clear_cause(i40e); 2685 2686 if (i40e_shutdown_rings(i40e) == B_FALSE) { 2687 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST); 2688 } 2689 2690 delay(50 * drv_usectohz(1000)); 2691 2692 i40e_intr_chip_fini(i40e); 2693 2694 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2695 mutex_enter(&i40e->i40e_trqpairs[i].itrq_rx_lock); 2696 mutex_enter(&i40e->i40e_trqpairs[i].itrq_tx_lock); 2697 } 2698 2699 /* 2700 * We should consider refactoring this to be part of the ring start / 2701 * stop routines at some point. 2702 */ 2703 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2704 i40e_stats_trqpair_fini(&i40e->i40e_trqpairs[i]); 2705 } 2706 2707 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_cfg_handle) != 2708 DDI_FM_OK) { 2709 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST); 2710 } 2711 2712 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2713 i40e_tx_cleanup_ring(&i40e->i40e_trqpairs[i]); 2714 } 2715 2716 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2717 mutex_exit(&i40e->i40e_trqpairs[i].itrq_rx_lock); 2718 mutex_exit(&i40e->i40e_trqpairs[i].itrq_tx_lock); 2719 } 2720 2721 i40e_stat_vsi_fini(i40e); 2722 2723 i40e->i40e_link_speed = 0; 2724 i40e->i40e_link_duplex = 0; 2725 i40e_link_state_set(i40e, LINK_STATE_UNKNOWN); 2726 2727 if (free_allocations) { 2728 i40e_free_ring_mem(i40e, B_FALSE); 2729 } 2730 } 2731 2732 boolean_t 2733 i40e_start(i40e_t *i40e, boolean_t alloc) 2734 { 2735 i40e_hw_t *hw = &i40e->i40e_hw_space; 2736 boolean_t rc = B_TRUE; 2737 int i, err; 2738 2739 ASSERT(MUTEX_HELD(&i40e->i40e_general_lock)); 2740 2741 if (alloc) { 2742 if (i40e_alloc_ring_mem(i40e) == B_FALSE) { 2743 i40e_error(i40e, 2744 "Failed to allocate ring memory"); 2745 return (B_FALSE); 2746 } 2747 } 2748 2749 /* 2750 * This should get refactored to be part of ring start and stop at 2751 * some point, along with most of the logic here. 2752 */ 2753 for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 2754 if (i40e_stats_trqpair_init(&i40e->i40e_trqpairs[i]) == 2755 B_FALSE) { 2756 int j; 2757 2758 for (j = 0; j < i; j++) { 2759 i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[j]; 2760 i40e_stats_trqpair_fini(itrq); 2761 } 2762 return (B_FALSE); 2763 } 2764 } 2765 2766 if (!i40e_chip_start(i40e)) { 2767 i40e_fm_ereport(i40e, DDI_FM_DEVICE_INVAL_STATE); 2768 rc = B_FALSE; 2769 goto done; 2770 } 2771 2772 if (i40e_setup_rx_rings(i40e) == B_FALSE) { 2773 rc = B_FALSE; 2774 goto done; 2775 } 2776 2777 if (i40e_setup_tx_rings(i40e) == B_FALSE) { 2778 rc = B_FALSE; 2779 goto done; 2780 } 2781 2782 /* 2783 * Enable broadcast traffic; however, do not enable multicast traffic. 2784 * That's handle exclusively through MAC's mc_multicst routines. 2785 */ 2786 err = i40e_aq_set_vsi_broadcast(hw, i40e->i40e_vsi_id, B_TRUE, NULL); 2787 if (err != I40E_SUCCESS) { 2788 i40e_error(i40e, "failed to set default VSI: %d", err); 2789 rc = B_FALSE; 2790 goto done; 2791 } 2792 2793 err = i40e_aq_set_mac_config(hw, i40e->i40e_frame_max, B_TRUE, 0, NULL); 2794 if (err != I40E_SUCCESS) { 2795 i40e_error(i40e, "failed to set MAC config: %d", err); 2796 rc = B_FALSE; 2797 goto done; 2798 } 2799 2800 /* 2801 * Finally, make sure that we're happy from an FM perspective. 2802 */ 2803 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) != 2804 DDI_FM_OK) { 2805 rc = B_FALSE; 2806 goto done; 2807 } 2808 2809 /* Clear state bits prior to final interrupt enabling. */ 2810 atomic_and_32(&i40e->i40e_state, 2811 ~(I40E_ERROR | I40E_STALL | I40E_OVERTEMP)); 2812 2813 i40e_intr_io_enable_all(i40e); 2814 2815 done: 2816 if (rc == B_FALSE) { 2817 i40e_stop(i40e, B_FALSE); 2818 if (alloc == B_TRUE) { 2819 i40e_free_ring_mem(i40e, B_TRUE); 2820 } 2821 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST); 2822 } 2823 2824 return (rc); 2825 } 2826 2827 /* 2828 * We may have loaned up descriptors to the stack. As such, if we still have 2829 * them outstanding, then we will not continue with detach. 2830 */ 2831 static boolean_t 2832 i40e_drain_rx(i40e_t *i40e) 2833 { 2834 mutex_enter(&i40e->i40e_rx_pending_lock); 2835 while (i40e->i40e_rx_pending > 0) { 2836 if (cv_reltimedwait(&i40e->i40e_rx_pending_cv, 2837 &i40e->i40e_rx_pending_lock, 2838 drv_usectohz(I40E_DRAIN_RX_WAIT), TR_CLOCK_TICK) == -1) { 2839 mutex_exit(&i40e->i40e_rx_pending_lock); 2840 return (B_FALSE); 2841 } 2842 } 2843 mutex_exit(&i40e->i40e_rx_pending_lock); 2844 2845 return (B_TRUE); 2846 } 2847 2848 static int 2849 i40e_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 2850 { 2851 i40e_t *i40e; 2852 struct i40e_osdep *osdep; 2853 i40e_hw_t *hw; 2854 int instance; 2855 2856 if (cmd != DDI_ATTACH) 2857 return (DDI_FAILURE); 2858 2859 instance = ddi_get_instance(devinfo); 2860 i40e = kmem_zalloc(sizeof (i40e_t), KM_SLEEP); 2861 2862 i40e->i40e_aqbuf = kmem_zalloc(I40E_ADMINQ_BUFSZ, KM_SLEEP); 2863 i40e->i40e_instance = instance; 2864 i40e->i40e_dip = devinfo; 2865 2866 hw = &i40e->i40e_hw_space; 2867 osdep = &i40e->i40e_osdep_space; 2868 hw->back = osdep; 2869 osdep->ios_i40e = i40e; 2870 2871 ddi_set_driver_private(devinfo, i40e); 2872 2873 i40e_fm_init(i40e); 2874 i40e->i40e_attach_progress |= I40E_ATTACH_FM_INIT; 2875 2876 if (pci_config_setup(devinfo, &osdep->ios_cfg_handle) != DDI_SUCCESS) { 2877 i40e_error(i40e, "Failed to map PCI configurations."); 2878 goto attach_fail; 2879 } 2880 i40e->i40e_attach_progress |= I40E_ATTACH_PCI_CONFIG; 2881 2882 i40e_identify_hardware(i40e); 2883 2884 if (!i40e_regs_map(i40e)) { 2885 i40e_error(i40e, "Failed to map device registers."); 2886 goto attach_fail; 2887 } 2888 i40e->i40e_attach_progress |= I40E_ATTACH_REGS_MAP; 2889 2890 i40e_init_properties(i40e); 2891 i40e->i40e_attach_progress |= I40E_ATTACH_PROPS; 2892 2893 if (!i40e_common_code_init(i40e, hw)) 2894 goto attach_fail; 2895 i40e->i40e_attach_progress |= I40E_ATTACH_COMMON_CODE; 2896 2897 /* 2898 * When we participate in IRM, we should make sure that we register 2899 * ourselves with it before callbacks. 2900 */ 2901 if (!i40e_alloc_intrs(i40e, devinfo)) { 2902 i40e_error(i40e, "Failed to allocate interrupts."); 2903 goto attach_fail; 2904 } 2905 i40e->i40e_attach_progress |= I40E_ATTACH_ALLOC_INTR; 2906 2907 if (!i40e_alloc_trqpairs(i40e)) { 2908 i40e_error(i40e, 2909 "Failed to allocate receive & transmit rings."); 2910 goto attach_fail; 2911 } 2912 i40e->i40e_attach_progress |= I40E_ATTACH_ALLOC_RINGSLOCKS; 2913 2914 if (!i40e_map_intrs_to_vectors(i40e)) { 2915 i40e_error(i40e, "Failed to map interrupts to vectors."); 2916 goto attach_fail; 2917 } 2918 2919 if (!i40e_add_intr_handlers(i40e)) { 2920 i40e_error(i40e, "Failed to add the interrupt handlers."); 2921 goto attach_fail; 2922 } 2923 i40e->i40e_attach_progress |= I40E_ATTACH_ADD_INTR; 2924 2925 if (!i40e_final_init(i40e)) { 2926 i40e_error(i40e, "Final initialization failed."); 2927 goto attach_fail; 2928 } 2929 i40e->i40e_attach_progress |= I40E_ATTACH_INIT; 2930 2931 if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_cfg_handle) != 2932 DDI_FM_OK) { 2933 ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST); 2934 goto attach_fail; 2935 } 2936 2937 if (!i40e_stats_init(i40e)) { 2938 i40e_error(i40e, "Stats initialization failed."); 2939 goto attach_fail; 2940 } 2941 i40e->i40e_attach_progress |= I40E_ATTACH_STATS; 2942 2943 if (!i40e_register_mac(i40e)) { 2944 i40e_error(i40e, "Failed to register to MAC/GLDv3"); 2945 goto attach_fail; 2946 } 2947 i40e->i40e_attach_progress |= I40E_ATTACH_MAC; 2948 2949 i40e->i40e_periodic_id = ddi_periodic_add(i40e_timer, i40e, 2950 I40E_CYCLIC_PERIOD, DDI_IPL_0); 2951 if (i40e->i40e_periodic_id == 0) { 2952 i40e_error(i40e, "Failed to add the link-check timer"); 2953 goto attach_fail; 2954 } 2955 i40e->i40e_attach_progress |= I40E_ATTACH_LINK_TIMER; 2956 2957 if (!i40e_enable_interrupts(i40e)) { 2958 i40e_error(i40e, "Failed to enable DDI interrupts"); 2959 goto attach_fail; 2960 } 2961 i40e->i40e_attach_progress |= I40E_ATTACH_ENABLE_INTR; 2962 2963 atomic_or_32(&i40e->i40e_state, I40E_INITIALIZED); 2964 2965 mutex_enter(&i40e_glock); 2966 list_insert_tail(&i40e_glist, i40e); 2967 mutex_exit(&i40e_glock); 2968 2969 return (DDI_SUCCESS); 2970 2971 attach_fail: 2972 i40e_unconfigure(devinfo, i40e); 2973 return (DDI_FAILURE); 2974 } 2975 2976 static int 2977 i40e_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 2978 { 2979 i40e_t *i40e; 2980 2981 if (cmd != DDI_DETACH) 2982 return (DDI_FAILURE); 2983 2984 i40e = (i40e_t *)ddi_get_driver_private(devinfo); 2985 if (i40e == NULL) { 2986 i40e_log(NULL, "i40e_detach() called with no i40e pointer!"); 2987 return (DDI_FAILURE); 2988 } 2989 2990 if (i40e_drain_rx(i40e) == B_FALSE) { 2991 i40e_log(i40e, "timed out draining DMA resources, %d buffers " 2992 "remain", i40e->i40e_rx_pending); 2993 return (DDI_FAILURE); 2994 } 2995 2996 mutex_enter(&i40e_glock); 2997 list_remove(&i40e_glist, i40e); 2998 mutex_exit(&i40e_glock); 2999 3000 i40e_unconfigure(devinfo, i40e); 3001 3002 return (DDI_SUCCESS); 3003 } 3004 3005 static struct cb_ops i40e_cb_ops = { 3006 nulldev, /* cb_open */ 3007 nulldev, /* cb_close */ 3008 nodev, /* cb_strategy */ 3009 nodev, /* cb_print */ 3010 nodev, /* cb_dump */ 3011 nodev, /* cb_read */ 3012 nodev, /* cb_write */ 3013 nodev, /* cb_ioctl */ 3014 nodev, /* cb_devmap */ 3015 nodev, /* cb_mmap */ 3016 nodev, /* cb_segmap */ 3017 nochpoll, /* cb_chpoll */ 3018 ddi_prop_op, /* cb_prop_op */ 3019 NULL, /* cb_stream */ 3020 D_MP | D_HOTPLUG, /* cb_flag */ 3021 CB_REV, /* cb_rev */ 3022 nodev, /* cb_aread */ 3023 nodev /* cb_awrite */ 3024 }; 3025 3026 static struct dev_ops i40e_dev_ops = { 3027 DEVO_REV, /* devo_rev */ 3028 0, /* devo_refcnt */ 3029 NULL, /* devo_getinfo */ 3030 nulldev, /* devo_identify */ 3031 nulldev, /* devo_probe */ 3032 i40e_attach, /* devo_attach */ 3033 i40e_detach, /* devo_detach */ 3034 nodev, /* devo_reset */ 3035 &i40e_cb_ops, /* devo_cb_ops */ 3036 NULL, /* devo_bus_ops */ 3037 ddi_power, /* devo_power */ 3038 ddi_quiesce_not_supported /* devo_quiesce */ 3039 }; 3040 3041 static struct modldrv i40e_modldrv = { 3042 &mod_driverops, 3043 i40e_ident, 3044 &i40e_dev_ops 3045 }; 3046 3047 static struct modlinkage i40e_modlinkage = { 3048 MODREV_1, 3049 &i40e_modldrv, 3050 NULL 3051 }; 3052 3053 /* 3054 * Module Initialization Functions. 3055 */ 3056 int 3057 _init(void) 3058 { 3059 int status; 3060 3061 list_create(&i40e_glist, sizeof (i40e_t), offsetof(i40e_t, i40e_glink)); 3062 list_create(&i40e_dlist, sizeof (i40e_device_t), 3063 offsetof(i40e_device_t, id_link)); 3064 mutex_init(&i40e_glock, NULL, MUTEX_DRIVER, NULL); 3065 mac_init_ops(&i40e_dev_ops, I40E_MODULE_NAME); 3066 3067 status = mod_install(&i40e_modlinkage); 3068 if (status != DDI_SUCCESS) { 3069 mac_fini_ops(&i40e_dev_ops); 3070 mutex_destroy(&i40e_glock); 3071 list_destroy(&i40e_dlist); 3072 list_destroy(&i40e_glist); 3073 } 3074 3075 return (status); 3076 } 3077 3078 int 3079 _info(struct modinfo *modinfop) 3080 { 3081 return (mod_info(&i40e_modlinkage, modinfop)); 3082 } 3083 3084 int 3085 _fini(void) 3086 { 3087 int status; 3088 3089 status = mod_remove(&i40e_modlinkage); 3090 if (status == DDI_SUCCESS) { 3091 mac_fini_ops(&i40e_dev_ops); 3092 mutex_destroy(&i40e_glock); 3093 list_destroy(&i40e_dlist); 3094 list_destroy(&i40e_glist); 3095 } 3096 3097 return (status); 3098 } 3099