1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015 Bjoern A. Zeeb 5 * Copyright (c) 2020 Denis Salopek 6 * 7 * This software was developed by SRI International and the University of 8 * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249 9 * ("MRC2"), as part of the DARPA MRC research programme. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 #include <sys/param.h> 35 #include <sys/bus.h> 36 #include <sys/endian.h> 37 #include <sys/kernel.h> 38 #include <sys/limits.h> 39 #include <sys/module.h> 40 #include <sys/rman.h> 41 #include <sys/socket.h> 42 #include <sys/sockio.h> 43 #include <sys/sysctl.h> 44 #include <sys/taskqueue.h> 45 46 #include <net/if.h> 47 #include <net/if_media.h> 48 #include <net/if_types.h> 49 #include <net/if_var.h> 50 51 #include <netinet/in.h> 52 #include <netinet/if_ether.h> 53 54 #include <dev/pci/pcivar.h> 55 #include <dev/pci/pcireg.h> 56 57 #include <machine/bus.h> 58 59 #include "adapter.h" 60 61 #define PCI_VENDOR_ID_XILINX 0x10ee 62 #define PCI_DEVICE_ID_SUME 0x7028 63 64 /* SUME bus driver interface */ 65 static int sume_probe(device_t); 66 static int sume_attach(device_t); 67 static int sume_detach(device_t); 68 69 static device_method_t sume_methods[] = { 70 DEVMETHOD(device_probe, sume_probe), 71 DEVMETHOD(device_attach, sume_attach), 72 DEVMETHOD(device_detach, sume_detach), 73 DEVMETHOD_END 74 }; 75 76 static driver_t sume_driver = { 77 "sume", 78 sume_methods, 79 sizeof(struct sume_adapter) 80 }; 81 82 /* 83 * The DMA engine for SUME generates interrupts for each RX/TX transaction. 84 * Depending on the channel (0 if packet transaction, 1 if register transaction) 85 * the used bits of the interrupt vector will be the lowest or the second lowest 86 * 5 bits. 87 * 88 * When receiving packets from SUME (RX): 89 * (1) SUME received a packet on one of the interfaces. 90 * (2) SUME generates an interrupt vector, bit 00001 is set (channel 0 - new RX 91 * transaction). 92 * (3) We read the length of the incoming packet and the offset along with the 93 * 'last' flag from the SUME registers. 94 * (4) We prepare for the DMA transaction by setting the bouncebuffer on the 95 * address buf_addr. For now, this is how it's done: 96 * - First 3*sizeof(uint32_t) bytes are: lower and upper 32 bits of physical 97 * address where we want the data to arrive (buf_addr[0] and buf_addr[1]), 98 * and length of incoming data (buf_addr[2]). 99 * - Data will start right after, at buf_addr+3*sizeof(uint32_t). The 100 * physical address buf_hw_addr is a block of contiguous memory mapped to 101 * buf_addr, so we can set the incoming data's physical address (buf_addr[0] 102 * and buf_addr[1]) to buf_hw_addr+3*sizeof(uint32_t). 103 * (5) We notify SUME that the bouncebuffer is ready for the transaction by 104 * writing the lower/upper physical address buf_hw_addr to the SUME 105 * registers RIFFA_TX_SG_ADDR_LO_REG_OFF and RIFFA_TX_SG_ADDR_HI_REG_OFF as 106 * well as the number of segments to the register RIFFA_TX_SG_LEN_REG_OFF. 107 * (6) SUME generates an interrupt vector, bit 00010 is set (channel 0 - 108 * bouncebuffer received). 109 * (7) SUME generates an interrupt vector, bit 00100 is set (channel 0 - 110 * transaction is done). 111 * (8) SUME can do both steps (6) and (7) using the same interrupt. 112 * (8) We read the first 16 bytes (metadata) of the received data and note the 113 * incoming interface so we can later forward it to the right one in the OS 114 * (sume0, sume1, sume2 or sume3). 115 * (10) We create an mbuf and copy the data from the bouncebuffer to the mbuf 116 * and set the mbuf rcvif to the incoming interface. 117 * (11) We forward the mbuf to the appropriate interface via ifp->if_input. 118 * 119 * When sending packets to SUME (TX): 120 * (1) The OS calls sume_if_start() function on TX. 121 * (2) We get the mbuf packet data and copy it to the 122 * buf_addr+3*sizeof(uint32_t) + metadata 16 bytes. 123 * (3) We create the metadata based on the output interface and copy it to the 124 * buf_addr+3*sizeof(uint32_t). 125 * (4) We write the offset/last and length of the packet to the SUME registers 126 * RIFFA_RX_OFFLAST_REG_OFF and RIFFA_RX_LEN_REG_OFF. 127 * (5) We fill the bouncebuffer by filling the first 3*sizeof(uint32_t) bytes 128 * with the physical address and length just as in RX step (4). 129 * (6) We notify SUME that the bouncebuffer is ready by writing to SUME 130 * registers RIFFA_RX_SG_ADDR_LO_REG_OFF, RIFFA_RX_SG_ADDR_HI_REG_OFF and 131 * RIFFA_RX_SG_LEN_REG_OFF just as in RX step (5). 132 * (7) SUME generates an interrupt vector, bit 01000 is set (channel 0 - 133 * bouncebuffer is read). 134 * (8) SUME generates an interrupt vector, bit 10000 is set (channel 0 - 135 * transaction is done). 136 * (9) SUME can do both steps (7) and (8) using the same interrupt. 137 * 138 * Internal registers 139 * Every module in the SUME hardware has its own set of internal registers 140 * (IDs, for debugging and statistic purposes, etc.). Their base addresses are 141 * defined in 'projects/reference_nic/hw/tcl/reference_nic_defines.tcl' and the 142 * offsets to different memory locations of every module are defined in their 143 * corresponding folder inside the library. These registers can be RO/RW and 144 * there is a special method to fetch/change this data over 1 or 2 DMA 145 * transactions. For writing, by calling the sume_module_reg_write(). For 146 * reading, by calling the sume_module_reg_write() and then 147 * sume_module_reg_read(). Check those functions for more information. 148 */ 149 150 MALLOC_DECLARE(M_SUME); 151 MALLOC_DEFINE(M_SUME, "sume", "NetFPGA SUME device driver"); 152 153 static void check_tx_queues(struct sume_adapter *); 154 static void sume_fill_bb_desc(struct sume_adapter *, struct riffa_chnl_dir *, 155 uint64_t); 156 157 static struct unrhdr *unr; 158 159 static struct { 160 uint16_t device; 161 char *desc; 162 } sume_pciids[] = { 163 {PCI_DEVICE_ID_SUME, "NetFPGA SUME reference NIC"}, 164 }; 165 166 static inline uint32_t 167 read_reg(struct sume_adapter *adapter, int offset) 168 { 169 170 return (bus_space_read_4(adapter->bt, adapter->bh, offset << 2)); 171 } 172 173 static inline void 174 write_reg(struct sume_adapter *adapter, int offset, uint32_t val) 175 { 176 177 bus_space_write_4(adapter->bt, adapter->bh, offset << 2, val); 178 } 179 180 static int 181 sume_probe(device_t dev) 182 { 183 int i; 184 uint16_t v = pci_get_vendor(dev); 185 uint16_t d = pci_get_device(dev); 186 187 if (v != PCI_VENDOR_ID_XILINX) 188 return (ENXIO); 189 190 for (i = 0; i < nitems(sume_pciids); i++) { 191 if (d == sume_pciids[i].device) { 192 device_set_desc(dev, sume_pciids[i].desc); 193 return (BUS_PROBE_DEFAULT); 194 } 195 } 196 197 return (ENXIO); 198 } 199 200 /* 201 * Building mbuf for packet received from SUME. We expect to receive 'len' 202 * bytes of data (including metadata) written from the bouncebuffer address 203 * buf_addr+3*sizeof(uint32_t). Metadata will tell us which SUME interface 204 * received the packet (sport will be 1, 2, 4 or 8), the packet length (plen), 205 * and the magic word needs to be 0xcafe. When we have the packet data, we 206 * create an mbuf and copy the data to it using m_copyback() function, set the 207 * correct interface to rcvif and return the mbuf to be later sent to the OS 208 * with if_input. 209 */ 210 static struct mbuf * 211 sume_rx_build_mbuf(struct sume_adapter *adapter, uint32_t len) 212 { 213 struct nf_priv *nf_priv; 214 struct mbuf *m; 215 if_t ifp = NULL; 216 int np; 217 uint16_t dport, plen, magic; 218 device_t dev = adapter->dev; 219 uint8_t *indata = (uint8_t *) 220 adapter->recv[SUME_RIFFA_CHANNEL_DATA]->buf_addr + 221 sizeof(struct nf_bb_desc); 222 struct nf_metadata *mdata = (struct nf_metadata *) indata; 223 224 /* The metadata header is 16 bytes. */ 225 if (len < sizeof(struct nf_metadata)) { 226 device_printf(dev, "short frame (%d)\n", len); 227 adapter->packets_err++; 228 adapter->bytes_err += len; 229 return (NULL); 230 } 231 232 dport = le16toh(mdata->dport); 233 plen = le16toh(mdata->plen); 234 magic = le16toh(mdata->magic); 235 236 if (sizeof(struct nf_metadata) + plen > len || 237 magic != SUME_RIFFA_MAGIC) { 238 device_printf(dev, "corrupted packet (%zd + %d > %d || magic " 239 "0x%04x != 0x%04x)\n", sizeof(struct nf_metadata), plen, 240 len, magic, SUME_RIFFA_MAGIC); 241 return (NULL); 242 } 243 244 /* We got the packet from one of the even bits */ 245 np = (ffs(dport & SUME_DPORT_MASK) >> 1) - 1; 246 if (np > SUME_NPORTS) { 247 device_printf(dev, "invalid destination port 0x%04x (%d)\n", 248 dport, np); 249 adapter->packets_err++; 250 adapter->bytes_err += plen; 251 return (NULL); 252 } 253 ifp = adapter->ifp[np]; 254 nf_priv = if_getsoftc(ifp); 255 nf_priv->stats.rx_packets++; 256 nf_priv->stats.rx_bytes += plen; 257 258 /* If the interface is down, well, we are done. */ 259 if (!(if_getflags(ifp) & IFF_UP)) { 260 nf_priv->stats.ifc_down_packets++; 261 nf_priv->stats.ifc_down_bytes += plen; 262 return (NULL); 263 } 264 265 if (adapter->sume_debug) 266 printf("Building mbuf with length: %d\n", plen); 267 268 m = m_getm(NULL, plen, M_NOWAIT, MT_DATA); 269 if (m == NULL) { 270 adapter->packets_err++; 271 adapter->bytes_err += plen; 272 return (NULL); 273 } 274 275 /* Copy the data in at the right offset. */ 276 m_copyback(m, 0, plen, (void *) (indata + sizeof(struct nf_metadata))); 277 m->m_pkthdr.rcvif = ifp; 278 279 return (m); 280 } 281 282 /* 283 * SUME interrupt handler for when we get a valid interrupt from the board. 284 * Theoretically, we can receive interrupt for any of the available channels, 285 * but RIFFA DMA uses only 2: 0 and 1, so we use only vect0. The vector is a 32 286 * bit number, using 5 bits for every channel, the least significant bits 287 * correspond to channel 0 and the next 5 bits correspond to channel 1. Vector 288 * bits for RX/TX are: 289 * RX 290 * bit 0 - new transaction from SUME 291 * bit 1 - SUME received our bouncebuffer address 292 * bit 2 - SUME copied the received data to our bouncebuffer, transaction done 293 * TX 294 * bit 3 - SUME received our bouncebuffer address 295 * bit 4 - SUME copied the data from our bouncebuffer, transaction done 296 * 297 * There are two finite state machines (one for TX, one for RX). We loop 298 * through channels 0 and 1 to check and our current state and which interrupt 299 * bit is set. 300 * TX 301 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the first TX transaction. 302 * SUME_RIFFA_CHAN_STATE_READY: we prepared (filled with data) the bouncebuffer 303 * and triggered the SUME for the TX transaction. Waiting for interrupt bit 3 304 * to go to the next state. 305 * SUME_RIFFA_CHAN_STATE_READ: waiting for interrupt bit 4 (for SUME to send 306 * our packet). Then we get the length of the sent data and go back to the 307 * IDLE state. 308 * RX 309 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the interrupt bit 0 (new RX 310 * transaction). When we get it, we prepare our bouncebuffer for reading and 311 * trigger the SUME to start the transaction. Go to the next state. 312 * SUME_RIFFA_CHAN_STATE_READY: waiting for the interrupt bit 1 (SUME got our 313 * bouncebuffer). Go to the next state. 314 * SUME_RIFFA_CHAN_STATE_READ: SUME copied data and our bouncebuffer is ready, 315 * we can build the mbuf and go back to the IDLE state. 316 */ 317 static void 318 sume_intr_handler(void *arg) 319 { 320 struct sume_adapter *adapter = arg; 321 uint32_t vect, vect0, len; 322 int ch, loops; 323 device_t dev = adapter->dev; 324 struct mbuf *m = NULL; 325 if_t ifp = NULL; 326 struct riffa_chnl_dir *send, *recv; 327 328 SUME_LOCK(adapter); 329 330 vect0 = read_reg(adapter, RIFFA_IRQ_REG0_OFF); 331 if ((vect0 & SUME_INVALID_VECT) != 0) { 332 SUME_UNLOCK(adapter); 333 return; 334 } 335 336 /* 337 * We only have one interrupt for all channels and no way 338 * to quickly lookup for which channel(s) we got an interrupt? 339 */ 340 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 341 vect = vect0 >> (5 * ch); 342 send = adapter->send[ch]; 343 recv = adapter->recv[ch]; 344 345 loops = 0; 346 while ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) && 347 loops <= 5) { 348 if (adapter->sume_debug) 349 device_printf(dev, "TX ch %d state %u vect = " 350 "0x%08x\n", ch, send->state, vect); 351 switch (send->state) { 352 case SUME_RIFFA_CHAN_STATE_IDLE: 353 break; 354 case SUME_RIFFA_CHAN_STATE_READY: 355 if (!(vect & SUME_MSI_TXBUF)) { 356 device_printf(dev, "ch %d unexpected " 357 "interrupt in send+3 state %u: " 358 "vect = 0x%08x\n", ch, send->state, 359 vect); 360 send->recovery = 1; 361 break; 362 } 363 send->state = SUME_RIFFA_CHAN_STATE_READ; 364 vect &= ~SUME_MSI_TXBUF; 365 break; 366 case SUME_RIFFA_CHAN_STATE_READ: 367 if (!(vect & SUME_MSI_TXDONE)) { 368 device_printf(dev, "ch %d unexpected " 369 "interrupt in send+4 state %u: " 370 "vect = 0x%08x\n", ch, send->state, 371 vect); 372 send->recovery = 1; 373 break; 374 } 375 send->state = SUME_RIFFA_CHAN_STATE_LEN; 376 377 len = read_reg(adapter, RIFFA_CHNL_REG(ch, 378 RIFFA_RX_TNFR_LEN_REG_OFF)); 379 if (ch == SUME_RIFFA_CHANNEL_DATA) { 380 send->state = 381 SUME_RIFFA_CHAN_STATE_IDLE; 382 check_tx_queues(adapter); 383 } else if (ch == SUME_RIFFA_CHANNEL_REG) 384 wakeup(&send->event); 385 else { 386 device_printf(dev, "ch %d unexpected " 387 "interrupt in send+4 state %u: " 388 "vect = 0x%08x\n", ch, send->state, 389 vect); 390 send->recovery = 1; 391 } 392 vect &= ~SUME_MSI_TXDONE; 393 break; 394 case SUME_RIFFA_CHAN_STATE_LEN: 395 break; 396 default: 397 device_printf(dev, "unknown TX state!\n"); 398 } 399 loops++; 400 } 401 402 if ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) && 403 send->recovery) 404 device_printf(dev, "ch %d ignoring vect = 0x%08x " 405 "during TX; not in recovery; state = %d loops = " 406 "%d\n", ch, vect, send->state, loops); 407 408 loops = 0; 409 while ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF | 410 SUME_MSI_RXDONE)) && loops < 5) { 411 if (adapter->sume_debug) 412 device_printf(dev, "RX ch %d state %u vect = " 413 "0x%08x\n", ch, recv->state, vect); 414 switch (recv->state) { 415 case SUME_RIFFA_CHAN_STATE_IDLE: 416 if (!(vect & SUME_MSI_RXQUE)) { 417 device_printf(dev, "ch %d unexpected " 418 "interrupt in recv+0 state %u: " 419 "vect = 0x%08x\n", ch, recv->state, 420 vect); 421 recv->recovery = 1; 422 break; 423 } 424 uint32_t max_ptr; 425 426 /* Clear recovery state. */ 427 recv->recovery = 0; 428 429 /* Get offset and length. */ 430 recv->offlast = read_reg(adapter, 431 RIFFA_CHNL_REG(ch, 432 RIFFA_TX_OFFLAST_REG_OFF)); 433 recv->len = read_reg(adapter, RIFFA_CHNL_REG(ch, 434 RIFFA_TX_LEN_REG_OFF)); 435 436 /* Boundary checks. */ 437 max_ptr = (uint32_t)((uintptr_t)recv->buf_addr 438 + SUME_RIFFA_OFFSET(recv->offlast) 439 + SUME_RIFFA_LEN(recv->len) - 1); 440 if (max_ptr < 441 (uint32_t)((uintptr_t)recv->buf_addr)) 442 device_printf(dev, "receive buffer " 443 "wrap-around overflow.\n"); 444 if (SUME_RIFFA_OFFSET(recv->offlast) + 445 SUME_RIFFA_LEN(recv->len) > 446 adapter->sg_buf_size) 447 device_printf(dev, "receive buffer too" 448 " small.\n"); 449 450 /* Fill the bouncebuf "descriptor". */ 451 sume_fill_bb_desc(adapter, recv, 452 SUME_RIFFA_LEN(recv->len)); 453 454 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 455 BUS_DMASYNC_PREREAD | 456 BUS_DMASYNC_PREWRITE); 457 write_reg(adapter, RIFFA_CHNL_REG(ch, 458 RIFFA_TX_SG_ADDR_LO_REG_OFF), 459 SUME_RIFFA_LO_ADDR(recv->buf_hw_addr)); 460 write_reg(adapter, RIFFA_CHNL_REG(ch, 461 RIFFA_TX_SG_ADDR_HI_REG_OFF), 462 SUME_RIFFA_HI_ADDR(recv->buf_hw_addr)); 463 write_reg(adapter, RIFFA_CHNL_REG(ch, 464 RIFFA_TX_SG_LEN_REG_OFF), 465 4 * recv->num_sg); 466 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 467 BUS_DMASYNC_POSTREAD | 468 BUS_DMASYNC_POSTWRITE); 469 470 recv->state = SUME_RIFFA_CHAN_STATE_READY; 471 vect &= ~SUME_MSI_RXQUE; 472 break; 473 case SUME_RIFFA_CHAN_STATE_READY: 474 if (!(vect & SUME_MSI_RXBUF)) { 475 device_printf(dev, "ch %d unexpected " 476 "interrupt in recv+1 state %u: " 477 "vect = 0x%08x\n", ch, recv->state, 478 vect); 479 recv->recovery = 1; 480 break; 481 } 482 recv->state = SUME_RIFFA_CHAN_STATE_READ; 483 vect &= ~SUME_MSI_RXBUF; 484 break; 485 case SUME_RIFFA_CHAN_STATE_READ: 486 if (!(vect & SUME_MSI_RXDONE)) { 487 device_printf(dev, "ch %d unexpected " 488 "interrupt in recv+2 state %u: " 489 "vect = 0x%08x\n", ch, recv->state, 490 vect); 491 recv->recovery = 1; 492 break; 493 } 494 len = read_reg(adapter, RIFFA_CHNL_REG(ch, 495 RIFFA_TX_TNFR_LEN_REG_OFF)); 496 497 /* Remember, len and recv->len are words. */ 498 if (ch == SUME_RIFFA_CHANNEL_DATA) { 499 m = sume_rx_build_mbuf(adapter, 500 len << 2); 501 recv->state = 502 SUME_RIFFA_CHAN_STATE_IDLE; 503 } else if (ch == SUME_RIFFA_CHANNEL_REG) 504 wakeup(&recv->event); 505 else { 506 device_printf(dev, "ch %d unexpected " 507 "interrupt in recv+2 state %u: " 508 "vect = 0x%08x\n", ch, recv->state, 509 vect); 510 recv->recovery = 1; 511 } 512 vect &= ~SUME_MSI_RXDONE; 513 break; 514 case SUME_RIFFA_CHAN_STATE_LEN: 515 break; 516 default: 517 device_printf(dev, "unknown RX state!\n"); 518 } 519 loops++; 520 } 521 522 if ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF | 523 SUME_MSI_RXDONE)) && recv->recovery) { 524 device_printf(dev, "ch %d ignoring vect = 0x%08x " 525 "during RX; not in recovery; state = %d, loops = " 526 "%d\n", ch, vect, recv->state, loops); 527 528 /* Clean the unfinished transaction. */ 529 if (ch == SUME_RIFFA_CHANNEL_REG && 530 vect & SUME_MSI_RXDONE) { 531 read_reg(adapter, RIFFA_CHNL_REG(ch, 532 RIFFA_TX_TNFR_LEN_REG_OFF)); 533 recv->recovery = 0; 534 } 535 } 536 } 537 SUME_UNLOCK(adapter); 538 539 if (m != NULL) { 540 ifp = m->m_pkthdr.rcvif; 541 if_input(ifp, m); 542 } 543 } 544 545 /* 546 * As we cannot disable interrupt generation, ignore early interrupts by waiting 547 * for the adapter to go into the 'running' state. 548 */ 549 static int 550 sume_intr_filter(void *arg) 551 { 552 struct sume_adapter *adapter = arg; 553 554 if (adapter->running == 0) 555 return (FILTER_STRAY); 556 557 return (FILTER_SCHEDULE_THREAD); 558 } 559 560 static int 561 sume_probe_riffa_pci(struct sume_adapter *adapter) 562 { 563 device_t dev = adapter->dev; 564 int error, count, capmem; 565 uint32_t reg, devctl, linkctl; 566 567 pci_enable_busmaster(dev); 568 569 adapter->rid = PCIR_BAR(0); 570 adapter->bar0_addr = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 571 &adapter->rid, RF_ACTIVE); 572 if (adapter->bar0_addr == NULL) { 573 device_printf(dev, "unable to allocate bus resource: " 574 "BAR0 address\n"); 575 return (ENXIO); 576 } 577 adapter->bt = rman_get_bustag(adapter->bar0_addr); 578 adapter->bh = rman_get_bushandle(adapter->bar0_addr); 579 adapter->bar0_len = rman_get_size(adapter->bar0_addr); 580 if (adapter->bar0_len != 1024) { 581 device_printf(dev, "BAR0 resource length %lu != 1024\n", 582 adapter->bar0_len); 583 return (ENXIO); 584 } 585 586 count = pci_msi_count(dev); 587 error = pci_alloc_msi(dev, &count); 588 if (error) { 589 device_printf(dev, "unable to allocate bus resource: PCI " 590 "MSI\n"); 591 return (error); 592 } 593 594 adapter->irq.rid = 1; /* Should be 1, thus says pci_alloc_msi() */ 595 adapter->irq.res = bus_alloc_resource_any(dev, SYS_RES_IRQ, 596 &adapter->irq.rid, RF_SHAREABLE | RF_ACTIVE); 597 if (adapter->irq.res == NULL) { 598 device_printf(dev, "unable to allocate bus resource: IRQ " 599 "memory\n"); 600 return (ENXIO); 601 } 602 603 error = bus_setup_intr(dev, adapter->irq.res, INTR_MPSAFE | 604 INTR_TYPE_NET, sume_intr_filter, sume_intr_handler, adapter, 605 &adapter->irq.tag); 606 if (error) { 607 device_printf(dev, "failed to setup interrupt for rid %d, name" 608 " %s: %d\n", adapter->irq.rid, "SUME_INTR", error); 609 return (ENXIO); 610 } 611 612 if (pci_find_cap(dev, PCIY_EXPRESS, &capmem) != 0) { 613 device_printf(dev, "PCI not PCIe capable\n"); 614 return (ENXIO); 615 } 616 617 devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL, 2); 618 pci_write_config(dev, capmem + PCIER_DEVICE_CTL, (devctl | 619 PCIEM_CTL_EXT_TAG_FIELD), 2); 620 621 devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL2, 2); 622 pci_write_config(dev, capmem + PCIER_DEVICE_CTL2, (devctl | 623 PCIEM_CTL2_ID_ORDERED_REQ_EN), 2); 624 625 linkctl = pci_read_config(dev, capmem + PCIER_LINK_CTL, 2); 626 pci_write_config(dev, capmem + PCIER_LINK_CTL, (linkctl | 627 PCIEM_LINK_CTL_RCB), 2); 628 629 reg = read_reg(adapter, RIFFA_INFO_REG_OFF); 630 adapter->num_sg = RIFFA_SG_ELEMS * ((reg >> 19) & 0xf); 631 adapter->sg_buf_size = RIFFA_SG_BUF_SIZE * ((reg >> 19) & 0xf); 632 633 error = ENODEV; 634 /* Check bus master is enabled. */ 635 if (((reg >> 4) & 0x1) != 1) { 636 device_printf(dev, "bus master not enabled: %d\n", 637 (reg >> 4) & 0x1); 638 return (error); 639 } 640 /* Check link parameters are valid. */ 641 if (((reg >> 5) & 0x3f) == 0 || ((reg >> 11) & 0x3) == 0) { 642 device_printf(dev, "link parameters not valid: %d %d\n", 643 (reg >> 5) & 0x3f, (reg >> 11) & 0x3); 644 return (error); 645 } 646 /* Check # of channels are within valid range. */ 647 if ((reg & 0xf) == 0 || (reg & 0xf) > RIFFA_MAX_CHNLS) { 648 device_printf(dev, "number of channels out of range: %d\n", 649 reg & 0xf); 650 return (error); 651 } 652 /* Check bus width. */ 653 if (((reg >> 19) & 0xf) == 0 || 654 ((reg >> 19) & 0xf) > RIFFA_MAX_BUS_WIDTH_PARAM) { 655 device_printf(dev, "bus width out of range: %d\n", 656 (reg >> 19) & 0xf); 657 return (error); 658 } 659 660 device_printf(dev, "[riffa] # of channels: %d\n", 661 reg & 0xf); 662 device_printf(dev, "[riffa] bus interface width: %d\n", 663 ((reg >> 19) & 0xf) << 5); 664 device_printf(dev, "[riffa] bus master enabled: %d\n", 665 (reg >> 4) & 0x1); 666 device_printf(dev, "[riffa] negotiated link width: %d\n", 667 (reg >> 5) & 0x3f); 668 device_printf(dev, "[riffa] negotiated rate width: %d MTs\n", 669 ((reg >> 11) & 0x3) * 2500); 670 device_printf(dev, "[riffa] max downstream payload: %d B\n", 671 128 << ((reg >> 13) & 0x7)); 672 device_printf(dev, "[riffa] max upstream payload: %d B\n", 673 128 << ((reg >> 16) & 0x7)); 674 675 return (0); 676 } 677 678 /* If there is no sume_if_init, the ether_ioctl panics. */ 679 static void 680 sume_if_init(void *sc) 681 { 682 } 683 684 /* Write the address and length for our incoming / outgoing transaction. */ 685 static void 686 sume_fill_bb_desc(struct sume_adapter *adapter, struct riffa_chnl_dir *p, 687 uint64_t len) 688 { 689 struct nf_bb_desc *bouncebuf = (struct nf_bb_desc *) p->buf_addr; 690 691 bouncebuf->lower = (p->buf_hw_addr + sizeof(struct nf_bb_desc)); 692 bouncebuf->upper = (p->buf_hw_addr + sizeof(struct nf_bb_desc)) >> 32; 693 bouncebuf->len = len >> 2; 694 } 695 696 /* Module register locked write. */ 697 static int 698 sume_modreg_write_locked(struct sume_adapter *adapter) 699 { 700 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 701 702 /* Let the FPGA know about the transfer. */ 703 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 704 RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST); 705 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 706 RIFFA_RX_LEN_REG_OFF), send->len); /* words */ 707 708 /* Fill the bouncebuf "descriptor". */ 709 sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len)); 710 711 /* Update the state before intiating the DMA to avoid races. */ 712 send->state = SUME_RIFFA_CHAN_STATE_READY; 713 714 bus_dmamap_sync(send->ch_tag, send->ch_map, 715 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 716 /* DMA. */ 717 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 718 RIFFA_RX_SG_ADDR_LO_REG_OFF), 719 SUME_RIFFA_LO_ADDR(send->buf_hw_addr)); 720 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 721 RIFFA_RX_SG_ADDR_HI_REG_OFF), 722 SUME_RIFFA_HI_ADDR(send->buf_hw_addr)); 723 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 724 RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg); 725 bus_dmamap_sync(send->ch_tag, send->ch_map, 726 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 727 728 return (0); 729 } 730 731 /* 732 * Request a register read or write (depending on optype). 733 * If optype is set (0x1f) this will result in a register write, 734 * otherwise this will result in a register read request at the given 735 * address and the result will need to be DMAed back. 736 */ 737 static int 738 sume_module_reg_write(struct nf_priv *nf_priv, struct sume_ifreq *sifr, 739 uint32_t optype) 740 { 741 struct sume_adapter *adapter = nf_priv->adapter; 742 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 743 struct nf_regop_data *data; 744 int error; 745 746 /* 747 * 1. Make sure the channel is free; otherwise return EBUSY. 748 * 2. Prepare the memory in the bounce buffer (which we always 749 * use for regs). 750 * 3. Start the DMA process. 751 * 4. Sleep and wait for result and return success or error. 752 */ 753 SUME_LOCK(adapter); 754 755 if (send->state != SUME_RIFFA_CHAN_STATE_IDLE) { 756 SUME_UNLOCK(adapter); 757 return (EBUSY); 758 } 759 760 data = (struct nf_regop_data *) (send->buf_addr + 761 sizeof(struct nf_bb_desc)); 762 data->addr = htole32(sifr->addr); 763 data->val = htole32(sifr->val); 764 /* Tag to indentify request. */ 765 data->rtag = htole32(++send->rtag); 766 data->optype = htole32(optype); 767 send->len = sizeof(struct nf_regop_data) / 4; /* words */ 768 769 error = sume_modreg_write_locked(adapter); 770 if (error) { 771 SUME_UNLOCK(adapter); 772 return (EFAULT); 773 } 774 775 /* Timeout after 1s. */ 776 if (send->state != SUME_RIFFA_CHAN_STATE_LEN) 777 error = msleep(&send->event, &adapter->lock, 0, 778 "Waiting recv finish", 1 * hz); 779 780 /* This was a write so we are done; were interrupted, or timed out. */ 781 if (optype != SUME_MR_READ || error != 0 || error == EWOULDBLOCK) { 782 send->state = SUME_RIFFA_CHAN_STATE_IDLE; 783 if (optype == SUME_MR_READ) 784 error = EWOULDBLOCK; 785 else 786 error = 0; 787 } else 788 error = 0; 789 790 /* 791 * For read requests we will update state once we are done 792 * having read the result to avoid any two outstanding 793 * transactions, or we need a queue and validate tags, 794 * which is a lot of work for a low priority, infrequent 795 * event. 796 */ 797 798 SUME_UNLOCK(adapter); 799 800 return (error); 801 } 802 803 /* Module register read. */ 804 static int 805 sume_module_reg_read(struct nf_priv *nf_priv, struct sume_ifreq *sifr) 806 { 807 struct sume_adapter *adapter = nf_priv->adapter; 808 struct riffa_chnl_dir *recv = adapter->recv[SUME_RIFFA_CHANNEL_REG]; 809 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 810 struct nf_regop_data *data; 811 int error = 0; 812 813 /* 814 * 0. Sleep waiting for result if needed (unless condition is 815 * true already). 816 * 1. Read DMA results. 817 * 2. Update state on *TX* to IDLE to allow next read to start. 818 */ 819 SUME_LOCK(adapter); 820 821 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 822 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 823 /* 824 * We only need to be woken up at the end of the transaction. 825 * Timeout after 1s. 826 */ 827 if (recv->state != SUME_RIFFA_CHAN_STATE_READ) 828 error = msleep(&recv->event, &adapter->lock, 0, 829 "Waiting transaction finish", 1 * hz); 830 831 if (recv->state != SUME_RIFFA_CHAN_STATE_READ || error == EWOULDBLOCK) { 832 SUME_UNLOCK(adapter); 833 device_printf(adapter->dev, "wait error: %d\n", error); 834 return (EWOULDBLOCK); 835 } 836 837 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 838 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 839 840 /* 841 * Read reply data and validate address and tag. 842 * Note: we do access the send side without lock but the state 843 * machine does prevent the data from changing. 844 */ 845 data = (struct nf_regop_data *) (recv->buf_addr + 846 sizeof(struct nf_bb_desc)); 847 848 if (le32toh(data->rtag) != send->rtag) 849 device_printf(adapter->dev, "rtag error: 0x%08x 0x%08x\n", 850 le32toh(data->rtag), send->rtag); 851 852 sifr->val = le32toh(data->val); 853 recv->state = SUME_RIFFA_CHAN_STATE_IDLE; 854 855 /* We are done. */ 856 send->state = SUME_RIFFA_CHAN_STATE_IDLE; 857 858 SUME_UNLOCK(adapter); 859 860 return (0); 861 } 862 863 /* Read value from a module register and return it to a sume_ifreq. */ 864 static int 865 get_modreg_value(struct nf_priv *nf_priv, struct sume_ifreq *sifr) 866 { 867 int error; 868 869 error = sume_module_reg_write(nf_priv, sifr, SUME_MR_READ); 870 if (!error) 871 error = sume_module_reg_read(nf_priv, sifr); 872 873 return (error); 874 } 875 876 static int 877 sume_if_ioctl(if_t ifp, unsigned long cmd, caddr_t data) 878 { 879 struct ifreq *ifr = (struct ifreq *) data; 880 struct nf_priv *nf_priv = if_getsoftc(ifp); 881 struct sume_ifreq sifr; 882 int error = 0; 883 884 switch (cmd) { 885 case SIOCGIFMEDIA: 886 case SIOCGIFXMEDIA: 887 error = ifmedia_ioctl(ifp, ifr, &nf_priv->media, cmd); 888 break; 889 890 case SUME_IOCTL_CMD_WRITE_REG: 891 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr)); 892 if (error) { 893 error = EINVAL; 894 break; 895 } 896 error = sume_module_reg_write(nf_priv, &sifr, SUME_MR_WRITE); 897 break; 898 899 case SUME_IOCTL_CMD_READ_REG: 900 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr)); 901 if (error) { 902 error = EINVAL; 903 break; 904 } 905 906 error = get_modreg_value(nf_priv, &sifr); 907 if (error) 908 break; 909 910 error = copyout(&sifr, ifr_data_get_ptr(ifr), sizeof(sifr)); 911 if (error) 912 error = EINVAL; 913 914 break; 915 916 case SIOCSIFFLAGS: 917 /* Silence tcpdump 'promisc mode not supported' warning. */ 918 if (if_getflags(ifp) & IFF_PROMISC) 919 break; 920 921 default: 922 error = ether_ioctl(ifp, cmd, data); 923 break; 924 } 925 926 return (error); 927 } 928 929 static int 930 sume_media_change(if_t ifp) 931 { 932 struct nf_priv *nf_priv = if_getsoftc(ifp); 933 struct ifmedia *ifm = &nf_priv->media; 934 935 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 936 return (EINVAL); 937 938 if (IFM_SUBTYPE(ifm->ifm_media) == IFM_10G_SR) 939 if_setbaudrate(ifp, ifmedia_baudrate(IFM_ETHER | IFM_10G_SR)); 940 else 941 if_setbaudrate(ifp, ifmedia_baudrate(ifm->ifm_media)); 942 943 return (0); 944 } 945 946 static void 947 sume_update_link_status(if_t ifp) 948 { 949 struct nf_priv *nf_priv = if_getsoftc(ifp); 950 struct sume_adapter *adapter = nf_priv->adapter; 951 struct sume_ifreq sifr; 952 int link_status; 953 954 sifr.addr = SUME_STATUS_ADDR(nf_priv->port); 955 sifr.val = 0; 956 957 if (get_modreg_value(nf_priv, &sifr)) 958 return; 959 960 link_status = SUME_LINK_STATUS(sifr.val); 961 962 if (!link_status && nf_priv->link_up) { 963 if_link_state_change(ifp, LINK_STATE_DOWN); 964 nf_priv->link_up = 0; 965 if (adapter->sume_debug) 966 device_printf(adapter->dev, "port %d link state " 967 "changed to DOWN\n", nf_priv->unit); 968 } else if (link_status && !nf_priv->link_up) { 969 nf_priv->link_up = 1; 970 if_link_state_change(ifp, LINK_STATE_UP); 971 if (adapter->sume_debug) 972 device_printf(adapter->dev, "port %d link state " 973 "changed to UP\n", nf_priv->unit); 974 } 975 } 976 977 static void 978 sume_media_status(if_t ifp, struct ifmediareq *ifmr) 979 { 980 struct nf_priv *nf_priv = if_getsoftc(ifp); 981 struct ifmedia *ifm = &nf_priv->media; 982 983 if (ifm->ifm_cur->ifm_media == (IFM_ETHER | IFM_10G_SR) && 984 (if_getflags(ifp) & IFF_UP)) 985 ifmr->ifm_active = IFM_ETHER | IFM_10G_SR; 986 else 987 ifmr->ifm_active = ifm->ifm_cur->ifm_media; 988 989 ifmr->ifm_status |= IFM_AVALID; 990 991 sume_update_link_status(ifp); 992 993 if (nf_priv->link_up) 994 ifmr->ifm_status |= IFM_ACTIVE; 995 } 996 997 /* 998 * Packet to transmit. We take the packet data from the mbuf and copy it to the 999 * bouncebuffer address buf_addr+3*sizeof(uint32_t)+16. The 16 bytes before the 1000 * packet data are for metadata: sport/dport (depending on our source 1001 * interface), packet length and magic 0xcafe. We tell the SUME about the 1002 * transfer, fill the first 3*sizeof(uint32_t) bytes of the bouncebuffer with 1003 * the information about the start and length of the packet and trigger the 1004 * transaction. 1005 */ 1006 static int 1007 sume_if_start_locked(if_t ifp) 1008 { 1009 struct mbuf *m; 1010 struct nf_priv *nf_priv = if_getsoftc(ifp); 1011 struct sume_adapter *adapter = nf_priv->adapter; 1012 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_DATA]; 1013 uint8_t *outbuf; 1014 struct nf_metadata *mdata; 1015 int plen = SUME_MIN_PKT_SIZE; 1016 1017 KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned")); 1018 KASSERT(send->state == SUME_RIFFA_CHAN_STATE_IDLE, 1019 ("SUME not in IDLE state")); 1020 1021 m = if_dequeue(ifp); 1022 if (m == NULL) 1023 return (EINVAL); 1024 1025 /* Packets large enough do not need to be padded */ 1026 if (m->m_pkthdr.len > SUME_MIN_PKT_SIZE) 1027 plen = m->m_pkthdr.len; 1028 1029 if (adapter->sume_debug) 1030 device_printf(adapter->dev, "sending %d bytes to %s%d\n", plen, 1031 SUME_ETH_DEVICE_NAME, nf_priv->unit); 1032 1033 outbuf = (uint8_t *) send->buf_addr + sizeof(struct nf_bb_desc); 1034 mdata = (struct nf_metadata *) outbuf; 1035 1036 /* Clear the recovery flag. */ 1037 send->recovery = 0; 1038 1039 /* Make sure we fit with the 16 bytes nf_metadata. */ 1040 if (m->m_pkthdr.len + sizeof(struct nf_metadata) > 1041 adapter->sg_buf_size) { 1042 device_printf(adapter->dev, "packet too big for bounce buffer " 1043 "(%d)\n", m->m_pkthdr.len); 1044 m_freem(m); 1045 nf_priv->stats.tx_dropped++; 1046 return (ENOMEM); 1047 } 1048 1049 bus_dmamap_sync(send->ch_tag, send->ch_map, 1050 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1051 1052 /* Zero out the padded data */ 1053 if (m->m_pkthdr.len < SUME_MIN_PKT_SIZE) 1054 bzero(outbuf + sizeof(struct nf_metadata), SUME_MIN_PKT_SIZE); 1055 /* Skip the first 16 bytes for the metadata. */ 1056 m_copydata(m, 0, m->m_pkthdr.len, outbuf + sizeof(struct nf_metadata)); 1057 send->len = (sizeof(struct nf_metadata) + plen + 3) / 4; 1058 1059 /* Fill in the metadata: CPU(DMA) ports are odd, MAC ports are even. */ 1060 mdata->sport = htole16(1 << (nf_priv->port * 2 + 1)); 1061 mdata->dport = htole16(1 << (nf_priv->port * 2)); 1062 mdata->plen = htole16(plen); 1063 mdata->magic = htole16(SUME_RIFFA_MAGIC); 1064 mdata->t1 = htole32(0); 1065 mdata->t2 = htole32(0); 1066 1067 /* Let the FPGA know about the transfer. */ 1068 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1069 RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST); 1070 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1071 RIFFA_RX_LEN_REG_OFF), send->len); 1072 1073 /* Fill the bouncebuf "descriptor". */ 1074 sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len)); 1075 1076 /* Update the state before intiating the DMA to avoid races. */ 1077 send->state = SUME_RIFFA_CHAN_STATE_READY; 1078 1079 /* DMA. */ 1080 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1081 RIFFA_RX_SG_ADDR_LO_REG_OFF), 1082 SUME_RIFFA_LO_ADDR(send->buf_hw_addr)); 1083 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1084 RIFFA_RX_SG_ADDR_HI_REG_OFF), 1085 SUME_RIFFA_HI_ADDR(send->buf_hw_addr)); 1086 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1087 RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg); 1088 1089 bus_dmamap_sync(send->ch_tag, send->ch_map, 1090 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1091 1092 nf_priv->stats.tx_packets++; 1093 nf_priv->stats.tx_bytes += plen; 1094 1095 /* We can free as long as we use the bounce buffer. */ 1096 m_freem(m); 1097 1098 adapter->last_ifc = nf_priv->port; 1099 1100 /* Reset watchdog counter. */ 1101 adapter->wd_counter = 0; 1102 1103 return (0); 1104 } 1105 1106 static void 1107 sume_if_start(if_t ifp) 1108 { 1109 struct nf_priv *nf_priv = if_getsoftc(ifp); 1110 struct sume_adapter *adapter = nf_priv->adapter; 1111 1112 if (!adapter->running || !(if_getflags(ifp) & IFF_UP)) 1113 return; 1114 1115 SUME_LOCK(adapter); 1116 if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state == 1117 SUME_RIFFA_CHAN_STATE_IDLE) 1118 sume_if_start_locked(ifp); 1119 SUME_UNLOCK(adapter); 1120 } 1121 1122 /* 1123 * We call this function at the end of every TX transaction to check for 1124 * remaining packets in the TX queues for every UP interface. 1125 */ 1126 static void 1127 check_tx_queues(struct sume_adapter *adapter) 1128 { 1129 int i, last_ifc; 1130 1131 KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned")); 1132 1133 last_ifc = adapter->last_ifc; 1134 1135 /* Check all interfaces */ 1136 for (i = last_ifc + 1; i < last_ifc + SUME_NPORTS + 1; i++) { 1137 if_t ifp = adapter->ifp[i % SUME_NPORTS]; 1138 1139 if (!(if_getflags(ifp) & IFF_UP)) 1140 continue; 1141 1142 if (!sume_if_start_locked(ifp)) 1143 break; 1144 } 1145 } 1146 1147 static int 1148 sume_ifp_alloc(struct sume_adapter *adapter, uint32_t port) 1149 { 1150 if_t ifp; 1151 struct nf_priv *nf_priv = malloc(sizeof(struct nf_priv), M_SUME, 1152 M_ZERO | M_WAITOK); 1153 1154 ifp = if_alloc(IFT_ETHER); 1155 if (ifp == NULL) { 1156 device_printf(adapter->dev, "cannot allocate ifnet\n"); 1157 return (ENOMEM); 1158 } 1159 1160 adapter->ifp[port] = ifp; 1161 if_setsoftc(ifp, nf_priv); 1162 1163 nf_priv->adapter = adapter; 1164 nf_priv->unit = alloc_unr(unr); 1165 nf_priv->port = port; 1166 nf_priv->link_up = 0; 1167 1168 if_initname(ifp, SUME_ETH_DEVICE_NAME, nf_priv->unit); 1169 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 1170 1171 if_setinitfn(ifp, sume_if_init); 1172 if_setstartfn(ifp, sume_if_start); 1173 if_setioctlfn(ifp, sume_if_ioctl); 1174 1175 uint8_t hw_addr[ETHER_ADDR_LEN] = DEFAULT_ETHER_ADDRESS; 1176 hw_addr[ETHER_ADDR_LEN-1] = nf_priv->unit; 1177 ether_ifattach(ifp, hw_addr); 1178 1179 ifmedia_init(&nf_priv->media, IFM_IMASK, sume_media_change, 1180 sume_media_status); 1181 ifmedia_add(&nf_priv->media, IFM_ETHER | IFM_10G_SR, 0, NULL); 1182 ifmedia_set(&nf_priv->media, IFM_ETHER | IFM_10G_SR); 1183 1184 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); 1185 1186 return (0); 1187 } 1188 1189 static void 1190 callback_dma(void *arg, bus_dma_segment_t *segs, int nseg, int err) 1191 { 1192 if (err) 1193 return; 1194 1195 KASSERT(nseg == 1, ("%d segments returned!", nseg)); 1196 1197 *(bus_addr_t *) arg = segs[0].ds_addr; 1198 } 1199 1200 static int 1201 sume_probe_riffa_buffer(const struct sume_adapter *adapter, 1202 struct riffa_chnl_dir ***p, const char *dir) 1203 { 1204 struct riffa_chnl_dir **rp; 1205 bus_addr_t hw_addr; 1206 int error, ch; 1207 device_t dev = adapter->dev; 1208 1209 error = ENOMEM; 1210 *p = malloc(SUME_RIFFA_CHANNELS * sizeof(struct riffa_chnl_dir *), 1211 M_SUME, M_ZERO | M_WAITOK); 1212 if (*p == NULL) { 1213 device_printf(dev, "malloc(%s) failed.\n", dir); 1214 return (error); 1215 } 1216 1217 rp = *p; 1218 /* Allocate the chnl_dir structs themselves. */ 1219 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 1220 /* One direction. */ 1221 rp[ch] = malloc(sizeof(struct riffa_chnl_dir), M_SUME, 1222 M_ZERO | M_WAITOK); 1223 if (rp[ch] == NULL) { 1224 device_printf(dev, "malloc(%s[%d]) riffa_chnl_dir " 1225 "failed.\n", dir, ch); 1226 return (error); 1227 } 1228 1229 int err = bus_dma_tag_create(bus_get_dma_tag(dev), 1230 4, 0, 1231 BUS_SPACE_MAXADDR, 1232 BUS_SPACE_MAXADDR, 1233 NULL, NULL, 1234 adapter->sg_buf_size, 1235 1, 1236 adapter->sg_buf_size, 1237 0, 1238 NULL, 1239 NULL, 1240 &rp[ch]->ch_tag); 1241 1242 if (err) { 1243 device_printf(dev, "bus_dma_tag_create(%s[%d]) " 1244 "failed.\n", dir, ch); 1245 return (err); 1246 } 1247 1248 err = bus_dmamem_alloc(rp[ch]->ch_tag, (void **) 1249 &rp[ch]->buf_addr, BUS_DMA_WAITOK | BUS_DMA_COHERENT | 1250 BUS_DMA_ZERO, &rp[ch]->ch_map); 1251 if (err) { 1252 device_printf(dev, "bus_dmamem_alloc(%s[%d]) failed.\n", 1253 dir, ch); 1254 return (err); 1255 } 1256 1257 bzero(rp[ch]->buf_addr, adapter->sg_buf_size); 1258 1259 err = bus_dmamap_load(rp[ch]->ch_tag, rp[ch]->ch_map, 1260 rp[ch]->buf_addr, adapter->sg_buf_size, callback_dma, 1261 &hw_addr, BUS_DMA_NOWAIT); 1262 if (err) { 1263 device_printf(dev, "bus_dmamap_load(%s[%d]) failed.\n", 1264 dir, ch); 1265 return (err); 1266 } 1267 rp[ch]->buf_hw_addr = hw_addr; 1268 rp[ch]->num_sg = 1; 1269 rp[ch]->state = SUME_RIFFA_CHAN_STATE_IDLE; 1270 1271 rp[ch]->rtag = SUME_INIT_RTAG; 1272 } 1273 1274 return (0); 1275 } 1276 1277 static int 1278 sume_probe_riffa_buffers(struct sume_adapter *adapter) 1279 { 1280 int error; 1281 1282 error = sume_probe_riffa_buffer(adapter, &adapter->recv, "recv"); 1283 if (error) 1284 return (error); 1285 1286 error = sume_probe_riffa_buffer(adapter, &adapter->send, "send"); 1287 1288 return (error); 1289 } 1290 1291 static void 1292 sume_sysctl_init(struct sume_adapter *adapter) 1293 { 1294 device_t dev = adapter->dev; 1295 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 1296 struct sysctl_oid *tree = device_get_sysctl_tree(dev); 1297 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 1298 struct sysctl_oid *tmp_tree; 1299 char namebuf[MAX_IFC_NAME_LEN]; 1300 int i; 1301 1302 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "sume", CTLFLAG_RW, 1303 0, "SUME top-level tree"); 1304 if (tree == NULL) { 1305 device_printf(dev, "SYSCTL_ADD_NODE failed.\n"); 1306 return; 1307 } 1308 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug", CTLFLAG_RW, 1309 &adapter->sume_debug, 0, "debug int leaf"); 1310 1311 /* total RX error stats */ 1312 SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_epkts", 1313 CTLFLAG_RD, &adapter->packets_err, 0, "rx errors"); 1314 SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_ebytes", 1315 CTLFLAG_RD, &adapter->bytes_err, 0, "rx error bytes"); 1316 1317 for (i = SUME_NPORTS - 1; i >= 0; i--) { 1318 if_t ifp = adapter->ifp[i]; 1319 if (ifp == NULL) 1320 continue; 1321 1322 struct nf_priv *nf_priv = if_getsoftc(ifp); 1323 1324 snprintf(namebuf, MAX_IFC_NAME_LEN, "%s%d", 1325 SUME_ETH_DEVICE_NAME, nf_priv->unit); 1326 tmp_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 1327 CTLFLAG_RW, 0, "SUME ifc tree"); 1328 if (tmp_tree == NULL) { 1329 device_printf(dev, "SYSCTL_ADD_NODE failed.\n"); 1330 return; 1331 } 1332 1333 /* Packets dropped by down interface. */ 1334 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1335 "ifc_down_bytes", CTLFLAG_RD, 1336 &nf_priv->stats.ifc_down_bytes, 0, "ifc_down bytes"); 1337 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1338 "ifc_down_packets", CTLFLAG_RD, 1339 &nf_priv->stats.ifc_down_packets, 0, "ifc_down packets"); 1340 1341 /* HW RX stats */ 1342 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1343 "hw_rx_packets", CTLFLAG_RD, &nf_priv->stats.hw_rx_packets, 1344 0, "hw_rx packets"); 1345 1346 /* HW TX stats */ 1347 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1348 "hw_tx_packets", CTLFLAG_RD, &nf_priv->stats.hw_tx_packets, 1349 0, "hw_tx packets"); 1350 1351 /* RX stats */ 1352 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1353 "rx_bytes", CTLFLAG_RD, &nf_priv->stats.rx_bytes, 0, 1354 "rx bytes"); 1355 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1356 "rx_dropped", CTLFLAG_RD, &nf_priv->stats.rx_dropped, 0, 1357 "rx dropped"); 1358 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1359 "rx_packets", CTLFLAG_RD, &nf_priv->stats.rx_packets, 0, 1360 "rx packets"); 1361 1362 /* TX stats */ 1363 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1364 "tx_bytes", CTLFLAG_RD, &nf_priv->stats.tx_bytes, 0, 1365 "tx bytes"); 1366 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1367 "tx_dropped", CTLFLAG_RD, &nf_priv->stats.tx_dropped, 0, 1368 "tx dropped"); 1369 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1370 "tx_packets", CTLFLAG_RD, &nf_priv->stats.tx_packets, 0, 1371 "tx packets"); 1372 } 1373 } 1374 1375 static void 1376 sume_local_timer(void *arg) 1377 { 1378 struct sume_adapter *adapter = arg; 1379 1380 if (!adapter->running) 1381 return; 1382 1383 taskqueue_enqueue(adapter->tq, &adapter->stat_task); 1384 1385 SUME_LOCK(adapter); 1386 if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state != 1387 SUME_RIFFA_CHAN_STATE_IDLE && ++adapter->wd_counter >= 3) { 1388 /* Resetting interfaces if stuck for 3 seconds. */ 1389 device_printf(adapter->dev, "TX stuck, resetting adapter.\n"); 1390 read_reg(adapter, RIFFA_INFO_REG_OFF); 1391 1392 adapter->send[SUME_RIFFA_CHANNEL_DATA]->state = 1393 SUME_RIFFA_CHAN_STATE_IDLE; 1394 adapter->wd_counter = 0; 1395 1396 check_tx_queues(adapter); 1397 } 1398 SUME_UNLOCK(adapter); 1399 1400 callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter); 1401 } 1402 1403 static void 1404 sume_get_stats(void *context, int pending) 1405 { 1406 struct sume_adapter *adapter = context; 1407 int i; 1408 1409 for (i = 0; i < SUME_NPORTS; i++) { 1410 if_t ifp = adapter->ifp[i]; 1411 1412 if (if_getflags(ifp) & IFF_UP) { 1413 struct nf_priv *nf_priv = if_getsoftc(ifp); 1414 struct sume_ifreq sifr; 1415 1416 sume_update_link_status(ifp); 1417 1418 /* Get RX counter. */ 1419 sifr.addr = SUME_STAT_RX_ADDR(nf_priv->port); 1420 sifr.val = 0; 1421 1422 if (!get_modreg_value(nf_priv, &sifr)) 1423 nf_priv->stats.hw_rx_packets += sifr.val; 1424 1425 /* Get TX counter. */ 1426 sifr.addr = SUME_STAT_TX_ADDR(nf_priv->port); 1427 sifr.val = 0; 1428 1429 if (!get_modreg_value(nf_priv, &sifr)) 1430 nf_priv->stats.hw_tx_packets += sifr.val; 1431 } 1432 } 1433 } 1434 1435 static int 1436 sume_attach(device_t dev) 1437 { 1438 struct sume_adapter *adapter = device_get_softc(dev); 1439 adapter->dev = dev; 1440 int error, i; 1441 1442 mtx_init(&adapter->lock, "Global lock", NULL, MTX_DEF); 1443 1444 adapter->running = 0; 1445 1446 /* OK finish up RIFFA. */ 1447 error = sume_probe_riffa_pci(adapter); 1448 if (error != 0) 1449 goto error; 1450 1451 error = sume_probe_riffa_buffers(adapter); 1452 if (error != 0) 1453 goto error; 1454 1455 /* Now do the network interfaces. */ 1456 for (i = 0; i < SUME_NPORTS; i++) { 1457 error = sume_ifp_alloc(adapter, i); 1458 if (error != 0) 1459 goto error; 1460 } 1461 1462 /* Register stats and register sysctls. */ 1463 sume_sysctl_init(adapter); 1464 1465 /* Reset the HW. */ 1466 read_reg(adapter, RIFFA_INFO_REG_OFF); 1467 1468 /* Ready to go, "enable" IRQ. */ 1469 adapter->running = 1; 1470 1471 callout_init(&adapter->timer, 1); 1472 TASK_INIT(&adapter->stat_task, 0, sume_get_stats, adapter); 1473 1474 adapter->tq = taskqueue_create("sume_stats", M_NOWAIT, 1475 taskqueue_thread_enqueue, &adapter->tq); 1476 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s stattaskq", 1477 device_get_nameunit(adapter->dev)); 1478 1479 callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter); 1480 1481 return (0); 1482 1483 error: 1484 sume_detach(dev); 1485 1486 return (error); 1487 } 1488 1489 static void 1490 sume_remove_riffa_buffer(const struct sume_adapter *adapter, 1491 struct riffa_chnl_dir **pp) 1492 { 1493 int ch; 1494 1495 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 1496 if (pp[ch] == NULL) 1497 continue; 1498 1499 if (pp[ch]->buf_hw_addr != 0) { 1500 bus_dmamem_free(pp[ch]->ch_tag, pp[ch]->buf_addr, 1501 pp[ch]->ch_map); 1502 pp[ch]->buf_hw_addr = 0; 1503 } 1504 1505 free(pp[ch], M_SUME); 1506 } 1507 } 1508 1509 static void 1510 sume_remove_riffa_buffers(struct sume_adapter *adapter) 1511 { 1512 if (adapter->send != NULL) { 1513 sume_remove_riffa_buffer(adapter, adapter->send); 1514 free(adapter->send, M_SUME); 1515 adapter->send = NULL; 1516 } 1517 if (adapter->recv != NULL) { 1518 sume_remove_riffa_buffer(adapter, adapter->recv); 1519 free(adapter->recv, M_SUME); 1520 adapter->recv = NULL; 1521 } 1522 } 1523 1524 static int 1525 sume_detach(device_t dev) 1526 { 1527 struct sume_adapter *adapter = device_get_softc(dev); 1528 int i; 1529 struct nf_priv *nf_priv; 1530 1531 KASSERT(mtx_initialized(&adapter->lock), ("SUME mutex not " 1532 "initialized")); 1533 adapter->running = 0; 1534 1535 /* Drain the stats callout and task queue. */ 1536 callout_drain(&adapter->timer); 1537 1538 if (adapter->tq) { 1539 taskqueue_drain(adapter->tq, &adapter->stat_task); 1540 taskqueue_free(adapter->tq); 1541 } 1542 1543 for (i = 0; i < SUME_NPORTS; i++) { 1544 if_t ifp = adapter->ifp[i]; 1545 if (ifp == NULL) 1546 continue; 1547 1548 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 1549 nf_priv = if_getsoftc(ifp); 1550 1551 if (if_getflags(ifp) & IFF_UP) 1552 if_down(ifp); 1553 ifmedia_removeall(&nf_priv->media); 1554 free_unr(unr, nf_priv->unit); 1555 1556 if_setflagbits(ifp, 0, IFF_UP); 1557 ether_ifdetach(ifp); 1558 if_free(ifp); 1559 1560 free(nf_priv, M_SUME); 1561 } 1562 1563 sume_remove_riffa_buffers(adapter); 1564 1565 if (adapter->irq.tag) 1566 bus_teardown_intr(dev, adapter->irq.res, adapter->irq.tag); 1567 if (adapter->irq.res) 1568 bus_release_resource(dev, SYS_RES_IRQ, adapter->irq.rid, 1569 adapter->irq.res); 1570 1571 pci_release_msi(dev); 1572 1573 if (adapter->bar0_addr) 1574 bus_release_resource(dev, SYS_RES_MEMORY, adapter->rid, 1575 adapter->bar0_addr); 1576 1577 mtx_destroy(&adapter->lock); 1578 1579 return (0); 1580 } 1581 1582 static int 1583 mod_event(module_t mod, int cmd, void *arg) 1584 { 1585 switch (cmd) { 1586 case MOD_LOAD: 1587 unr = new_unrhdr(0, INT_MAX, NULL); 1588 break; 1589 1590 case MOD_UNLOAD: 1591 delete_unrhdr(unr); 1592 break; 1593 } 1594 1595 return (0); 1596 } 1597 1598 DRIVER_MODULE(sume, pci, sume_driver, mod_event, NULL); 1599 MODULE_VERSION(sume, 1); 1600