1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2015 Bjoern A. Zeeb 5 * Copyright (c) 2020 Denis Salopek 6 * 7 * This software was developed by SRI International and the University of 8 * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249 9 * ("MRC2"), as part of the DARPA MRC research programme. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include <sys/param.h> 37 #include <sys/bus.h> 38 #include <sys/endian.h> 39 #include <sys/kernel.h> 40 #include <sys/limits.h> 41 #include <sys/module.h> 42 #include <sys/rman.h> 43 #include <sys/socket.h> 44 #include <sys/sockio.h> 45 #include <sys/sysctl.h> 46 #include <sys/taskqueue.h> 47 48 #include <net/if.h> 49 #include <net/if_media.h> 50 #include <net/if_types.h> 51 #include <net/if_var.h> 52 53 #include <netinet/in.h> 54 #include <netinet/if_ether.h> 55 56 #include <dev/pci/pcivar.h> 57 #include <dev/pci/pcireg.h> 58 59 #include <machine/bus.h> 60 61 #include "adapter.h" 62 63 #define PCI_VENDOR_ID_XILINX 0x10ee 64 #define PCI_DEVICE_ID_SUME 0x7028 65 66 /* SUME bus driver interface */ 67 static int sume_probe(device_t); 68 static int sume_attach(device_t); 69 static int sume_detach(device_t); 70 71 static device_method_t sume_methods[] = { 72 DEVMETHOD(device_probe, sume_probe), 73 DEVMETHOD(device_attach, sume_attach), 74 DEVMETHOD(device_detach, sume_detach), 75 DEVMETHOD_END 76 }; 77 78 static driver_t sume_driver = { 79 "sume", 80 sume_methods, 81 sizeof(struct sume_adapter) 82 }; 83 84 /* 85 * The DMA engine for SUME generates interrupts for each RX/TX transaction. 86 * Depending on the channel (0 if packet transaction, 1 if register transaction) 87 * the used bits of the interrupt vector will be the lowest or the second lowest 88 * 5 bits. 89 * 90 * When receiving packets from SUME (RX): 91 * (1) SUME received a packet on one of the interfaces. 92 * (2) SUME generates an interrupt vector, bit 00001 is set (channel 0 - new RX 93 * transaction). 94 * (3) We read the length of the incoming packet and the offset along with the 95 * 'last' flag from the SUME registers. 96 * (4) We prepare for the DMA transaction by setting the bouncebuffer on the 97 * address buf_addr. For now, this is how it's done: 98 * - First 3*sizeof(uint32_t) bytes are: lower and upper 32 bits of physical 99 * address where we want the data to arrive (buf_addr[0] and buf_addr[1]), 100 * and length of incoming data (buf_addr[2]). 101 * - Data will start right after, at buf_addr+3*sizeof(uint32_t). The 102 * physical address buf_hw_addr is a block of contiguous memory mapped to 103 * buf_addr, so we can set the incoming data's physical address (buf_addr[0] 104 * and buf_addr[1]) to buf_hw_addr+3*sizeof(uint32_t). 105 * (5) We notify SUME that the bouncebuffer is ready for the transaction by 106 * writing the lower/upper physical address buf_hw_addr to the SUME 107 * registers RIFFA_TX_SG_ADDR_LO_REG_OFF and RIFFA_TX_SG_ADDR_HI_REG_OFF as 108 * well as the number of segments to the register RIFFA_TX_SG_LEN_REG_OFF. 109 * (6) SUME generates an interrupt vector, bit 00010 is set (channel 0 - 110 * bouncebuffer received). 111 * (7) SUME generates an interrupt vector, bit 00100 is set (channel 0 - 112 * transaction is done). 113 * (8) SUME can do both steps (6) and (7) using the same interrupt. 114 * (8) We read the first 16 bytes (metadata) of the received data and note the 115 * incoming interface so we can later forward it to the right one in the OS 116 * (sume0, sume1, sume2 or sume3). 117 * (10) We create an mbuf and copy the data from the bouncebuffer to the mbuf 118 * and set the mbuf rcvif to the incoming interface. 119 * (11) We forward the mbuf to the appropriate interface via ifp->if_input. 120 * 121 * When sending packets to SUME (TX): 122 * (1) The OS calls sume_if_start() function on TX. 123 * (2) We get the mbuf packet data and copy it to the 124 * buf_addr+3*sizeof(uint32_t) + metadata 16 bytes. 125 * (3) We create the metadata based on the output interface and copy it to the 126 * buf_addr+3*sizeof(uint32_t). 127 * (4) We write the offset/last and length of the packet to the SUME registers 128 * RIFFA_RX_OFFLAST_REG_OFF and RIFFA_RX_LEN_REG_OFF. 129 * (5) We fill the bouncebuffer by filling the first 3*sizeof(uint32_t) bytes 130 * with the physical address and length just as in RX step (4). 131 * (6) We notify SUME that the bouncebuffer is ready by writing to SUME 132 * registers RIFFA_RX_SG_ADDR_LO_REG_OFF, RIFFA_RX_SG_ADDR_HI_REG_OFF and 133 * RIFFA_RX_SG_LEN_REG_OFF just as in RX step (5). 134 * (7) SUME generates an interrupt vector, bit 01000 is set (channel 0 - 135 * bouncebuffer is read). 136 * (8) SUME generates an interrupt vector, bit 10000 is set (channel 0 - 137 * transaction is done). 138 * (9) SUME can do both steps (7) and (8) using the same interrupt. 139 * 140 * Internal registers 141 * Every module in the SUME hardware has its own set of internal registers 142 * (IDs, for debugging and statistic purposes, etc.). Their base addresses are 143 * defined in 'projects/reference_nic/hw/tcl/reference_nic_defines.tcl' and the 144 * offsets to different memory locations of every module are defined in their 145 * corresponding folder inside the library. These registers can be RO/RW and 146 * there is a special method to fetch/change this data over 1 or 2 DMA 147 * transactions. For writing, by calling the sume_module_reg_write(). For 148 * reading, by calling the sume_module_reg_write() and then 149 * sume_module_reg_read(). Check those functions for more information. 150 */ 151 152 MALLOC_DECLARE(M_SUME); 153 MALLOC_DEFINE(M_SUME, "sume", "NetFPGA SUME device driver"); 154 155 static void check_tx_queues(struct sume_adapter *); 156 static void sume_fill_bb_desc(struct sume_adapter *, struct riffa_chnl_dir *, 157 uint64_t); 158 159 static struct unrhdr *unr; 160 161 static struct { 162 uint16_t device; 163 char *desc; 164 } sume_pciids[] = { 165 {PCI_DEVICE_ID_SUME, "NetFPGA SUME reference NIC"}, 166 }; 167 168 static inline uint32_t 169 read_reg(struct sume_adapter *adapter, int offset) 170 { 171 172 return (bus_space_read_4(adapter->bt, adapter->bh, offset << 2)); 173 } 174 175 static inline void 176 write_reg(struct sume_adapter *adapter, int offset, uint32_t val) 177 { 178 179 bus_space_write_4(adapter->bt, adapter->bh, offset << 2, val); 180 } 181 182 static int 183 sume_probe(device_t dev) 184 { 185 int i; 186 uint16_t v = pci_get_vendor(dev); 187 uint16_t d = pci_get_device(dev); 188 189 if (v != PCI_VENDOR_ID_XILINX) 190 return (ENXIO); 191 192 for (i = 0; i < nitems(sume_pciids); i++) { 193 if (d == sume_pciids[i].device) { 194 device_set_desc(dev, sume_pciids[i].desc); 195 return (BUS_PROBE_DEFAULT); 196 } 197 } 198 199 return (ENXIO); 200 } 201 202 /* 203 * Building mbuf for packet received from SUME. We expect to receive 'len' 204 * bytes of data (including metadata) written from the bouncebuffer address 205 * buf_addr+3*sizeof(uint32_t). Metadata will tell us which SUME interface 206 * received the packet (sport will be 1, 2, 4 or 8), the packet length (plen), 207 * and the magic word needs to be 0xcafe. When we have the packet data, we 208 * create an mbuf and copy the data to it using m_copyback() function, set the 209 * correct interface to rcvif and return the mbuf to be later sent to the OS 210 * with if_input. 211 */ 212 static struct mbuf * 213 sume_rx_build_mbuf(struct sume_adapter *adapter, uint32_t len) 214 { 215 struct nf_priv *nf_priv; 216 struct mbuf *m; 217 if_t ifp = NULL; 218 int np; 219 uint16_t dport, plen, magic; 220 device_t dev = adapter->dev; 221 uint8_t *indata = (uint8_t *) 222 adapter->recv[SUME_RIFFA_CHANNEL_DATA]->buf_addr + 223 sizeof(struct nf_bb_desc); 224 struct nf_metadata *mdata = (struct nf_metadata *) indata; 225 226 /* The metadata header is 16 bytes. */ 227 if (len < sizeof(struct nf_metadata)) { 228 device_printf(dev, "short frame (%d)\n", len); 229 adapter->packets_err++; 230 adapter->bytes_err += len; 231 return (NULL); 232 } 233 234 dport = le16toh(mdata->dport); 235 plen = le16toh(mdata->plen); 236 magic = le16toh(mdata->magic); 237 238 if (sizeof(struct nf_metadata) + plen > len || 239 magic != SUME_RIFFA_MAGIC) { 240 device_printf(dev, "corrupted packet (%zd + %d > %d || magic " 241 "0x%04x != 0x%04x)\n", sizeof(struct nf_metadata), plen, 242 len, magic, SUME_RIFFA_MAGIC); 243 return (NULL); 244 } 245 246 /* We got the packet from one of the even bits */ 247 np = (ffs(dport & SUME_DPORT_MASK) >> 1) - 1; 248 if (np > SUME_NPORTS) { 249 device_printf(dev, "invalid destination port 0x%04x (%d)\n", 250 dport, np); 251 adapter->packets_err++; 252 adapter->bytes_err += plen; 253 return (NULL); 254 } 255 ifp = adapter->ifp[np]; 256 nf_priv = if_getsoftc(ifp); 257 nf_priv->stats.rx_packets++; 258 nf_priv->stats.rx_bytes += plen; 259 260 /* If the interface is down, well, we are done. */ 261 if (!(if_getflags(ifp) & IFF_UP)) { 262 nf_priv->stats.ifc_down_packets++; 263 nf_priv->stats.ifc_down_bytes += plen; 264 return (NULL); 265 } 266 267 if (adapter->sume_debug) 268 printf("Building mbuf with length: %d\n", plen); 269 270 m = m_getm(NULL, plen, M_NOWAIT, MT_DATA); 271 if (m == NULL) { 272 adapter->packets_err++; 273 adapter->bytes_err += plen; 274 return (NULL); 275 } 276 277 /* Copy the data in at the right offset. */ 278 m_copyback(m, 0, plen, (void *) (indata + sizeof(struct nf_metadata))); 279 m->m_pkthdr.rcvif = ifp; 280 281 return (m); 282 } 283 284 /* 285 * SUME interrupt handler for when we get a valid interrupt from the board. 286 * Theoretically, we can receive interrupt for any of the available channels, 287 * but RIFFA DMA uses only 2: 0 and 1, so we use only vect0. The vector is a 32 288 * bit number, using 5 bits for every channel, the least significant bits 289 * correspond to channel 0 and the next 5 bits correspond to channel 1. Vector 290 * bits for RX/TX are: 291 * RX 292 * bit 0 - new transaction from SUME 293 * bit 1 - SUME received our bouncebuffer address 294 * bit 2 - SUME copied the received data to our bouncebuffer, transaction done 295 * TX 296 * bit 3 - SUME received our bouncebuffer address 297 * bit 4 - SUME copied the data from our bouncebuffer, transaction done 298 * 299 * There are two finite state machines (one for TX, one for RX). We loop 300 * through channels 0 and 1 to check and our current state and which interrupt 301 * bit is set. 302 * TX 303 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the first TX transaction. 304 * SUME_RIFFA_CHAN_STATE_READY: we prepared (filled with data) the bouncebuffer 305 * and triggered the SUME for the TX transaction. Waiting for interrupt bit 3 306 * to go to the next state. 307 * SUME_RIFFA_CHAN_STATE_READ: waiting for interrupt bit 4 (for SUME to send 308 * our packet). Then we get the length of the sent data and go back to the 309 * IDLE state. 310 * RX 311 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the interrupt bit 0 (new RX 312 * transaction). When we get it, we prepare our bouncebuffer for reading and 313 * trigger the SUME to start the transaction. Go to the next state. 314 * SUME_RIFFA_CHAN_STATE_READY: waiting for the interrupt bit 1 (SUME got our 315 * bouncebuffer). Go to the next state. 316 * SUME_RIFFA_CHAN_STATE_READ: SUME copied data and our bouncebuffer is ready, 317 * we can build the mbuf and go back to the IDLE state. 318 */ 319 static void 320 sume_intr_handler(void *arg) 321 { 322 struct sume_adapter *adapter = arg; 323 uint32_t vect, vect0, len; 324 int ch, loops; 325 device_t dev = adapter->dev; 326 struct mbuf *m = NULL; 327 if_t ifp = NULL; 328 struct riffa_chnl_dir *send, *recv; 329 330 SUME_LOCK(adapter); 331 332 vect0 = read_reg(adapter, RIFFA_IRQ_REG0_OFF); 333 if ((vect0 & SUME_INVALID_VECT) != 0) { 334 SUME_UNLOCK(adapter); 335 return; 336 } 337 338 /* 339 * We only have one interrupt for all channels and no way 340 * to quickly lookup for which channel(s) we got an interrupt? 341 */ 342 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 343 vect = vect0 >> (5 * ch); 344 send = adapter->send[ch]; 345 recv = adapter->recv[ch]; 346 347 loops = 0; 348 while ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) && 349 loops <= 5) { 350 if (adapter->sume_debug) 351 device_printf(dev, "TX ch %d state %u vect = " 352 "0x%08x\n", ch, send->state, vect); 353 switch (send->state) { 354 case SUME_RIFFA_CHAN_STATE_IDLE: 355 break; 356 case SUME_RIFFA_CHAN_STATE_READY: 357 if (!(vect & SUME_MSI_TXBUF)) { 358 device_printf(dev, "ch %d unexpected " 359 "interrupt in send+3 state %u: " 360 "vect = 0x%08x\n", ch, send->state, 361 vect); 362 send->recovery = 1; 363 break; 364 } 365 send->state = SUME_RIFFA_CHAN_STATE_READ; 366 vect &= ~SUME_MSI_TXBUF; 367 break; 368 case SUME_RIFFA_CHAN_STATE_READ: 369 if (!(vect & SUME_MSI_TXDONE)) { 370 device_printf(dev, "ch %d unexpected " 371 "interrupt in send+4 state %u: " 372 "vect = 0x%08x\n", ch, send->state, 373 vect); 374 send->recovery = 1; 375 break; 376 } 377 send->state = SUME_RIFFA_CHAN_STATE_LEN; 378 379 len = read_reg(adapter, RIFFA_CHNL_REG(ch, 380 RIFFA_RX_TNFR_LEN_REG_OFF)); 381 if (ch == SUME_RIFFA_CHANNEL_DATA) { 382 send->state = 383 SUME_RIFFA_CHAN_STATE_IDLE; 384 check_tx_queues(adapter); 385 } else if (ch == SUME_RIFFA_CHANNEL_REG) 386 wakeup(&send->event); 387 else { 388 device_printf(dev, "ch %d unexpected " 389 "interrupt in send+4 state %u: " 390 "vect = 0x%08x\n", ch, send->state, 391 vect); 392 send->recovery = 1; 393 } 394 vect &= ~SUME_MSI_TXDONE; 395 break; 396 case SUME_RIFFA_CHAN_STATE_LEN: 397 break; 398 default: 399 device_printf(dev, "unknown TX state!\n"); 400 } 401 loops++; 402 } 403 404 if ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) && 405 send->recovery) 406 device_printf(dev, "ch %d ignoring vect = 0x%08x " 407 "during TX; not in recovery; state = %d loops = " 408 "%d\n", ch, vect, send->state, loops); 409 410 loops = 0; 411 while ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF | 412 SUME_MSI_RXDONE)) && loops < 5) { 413 if (adapter->sume_debug) 414 device_printf(dev, "RX ch %d state %u vect = " 415 "0x%08x\n", ch, recv->state, vect); 416 switch (recv->state) { 417 case SUME_RIFFA_CHAN_STATE_IDLE: 418 if (!(vect & SUME_MSI_RXQUE)) { 419 device_printf(dev, "ch %d unexpected " 420 "interrupt in recv+0 state %u: " 421 "vect = 0x%08x\n", ch, recv->state, 422 vect); 423 recv->recovery = 1; 424 break; 425 } 426 uint32_t max_ptr; 427 428 /* Clear recovery state. */ 429 recv->recovery = 0; 430 431 /* Get offset and length. */ 432 recv->offlast = read_reg(adapter, 433 RIFFA_CHNL_REG(ch, 434 RIFFA_TX_OFFLAST_REG_OFF)); 435 recv->len = read_reg(adapter, RIFFA_CHNL_REG(ch, 436 RIFFA_TX_LEN_REG_OFF)); 437 438 /* Boundary checks. */ 439 max_ptr = (uint32_t)((uintptr_t)recv->buf_addr 440 + SUME_RIFFA_OFFSET(recv->offlast) 441 + SUME_RIFFA_LEN(recv->len) - 1); 442 if (max_ptr < 443 (uint32_t)((uintptr_t)recv->buf_addr)) 444 device_printf(dev, "receive buffer " 445 "wrap-around overflow.\n"); 446 if (SUME_RIFFA_OFFSET(recv->offlast) + 447 SUME_RIFFA_LEN(recv->len) > 448 adapter->sg_buf_size) 449 device_printf(dev, "receive buffer too" 450 " small.\n"); 451 452 /* Fill the bouncebuf "descriptor". */ 453 sume_fill_bb_desc(adapter, recv, 454 SUME_RIFFA_LEN(recv->len)); 455 456 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 457 BUS_DMASYNC_PREREAD | 458 BUS_DMASYNC_PREWRITE); 459 write_reg(adapter, RIFFA_CHNL_REG(ch, 460 RIFFA_TX_SG_ADDR_LO_REG_OFF), 461 SUME_RIFFA_LO_ADDR(recv->buf_hw_addr)); 462 write_reg(adapter, RIFFA_CHNL_REG(ch, 463 RIFFA_TX_SG_ADDR_HI_REG_OFF), 464 SUME_RIFFA_HI_ADDR(recv->buf_hw_addr)); 465 write_reg(adapter, RIFFA_CHNL_REG(ch, 466 RIFFA_TX_SG_LEN_REG_OFF), 467 4 * recv->num_sg); 468 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 469 BUS_DMASYNC_POSTREAD | 470 BUS_DMASYNC_POSTWRITE); 471 472 recv->state = SUME_RIFFA_CHAN_STATE_READY; 473 vect &= ~SUME_MSI_RXQUE; 474 break; 475 case SUME_RIFFA_CHAN_STATE_READY: 476 if (!(vect & SUME_MSI_RXBUF)) { 477 device_printf(dev, "ch %d unexpected " 478 "interrupt in recv+1 state %u: " 479 "vect = 0x%08x\n", ch, recv->state, 480 vect); 481 recv->recovery = 1; 482 break; 483 } 484 recv->state = SUME_RIFFA_CHAN_STATE_READ; 485 vect &= ~SUME_MSI_RXBUF; 486 break; 487 case SUME_RIFFA_CHAN_STATE_READ: 488 if (!(vect & SUME_MSI_RXDONE)) { 489 device_printf(dev, "ch %d unexpected " 490 "interrupt in recv+2 state %u: " 491 "vect = 0x%08x\n", ch, recv->state, 492 vect); 493 recv->recovery = 1; 494 break; 495 } 496 len = read_reg(adapter, RIFFA_CHNL_REG(ch, 497 RIFFA_TX_TNFR_LEN_REG_OFF)); 498 499 /* Remember, len and recv->len are words. */ 500 if (ch == SUME_RIFFA_CHANNEL_DATA) { 501 m = sume_rx_build_mbuf(adapter, 502 len << 2); 503 recv->state = 504 SUME_RIFFA_CHAN_STATE_IDLE; 505 } else if (ch == SUME_RIFFA_CHANNEL_REG) 506 wakeup(&recv->event); 507 else { 508 device_printf(dev, "ch %d unexpected " 509 "interrupt in recv+2 state %u: " 510 "vect = 0x%08x\n", ch, recv->state, 511 vect); 512 recv->recovery = 1; 513 } 514 vect &= ~SUME_MSI_RXDONE; 515 break; 516 case SUME_RIFFA_CHAN_STATE_LEN: 517 break; 518 default: 519 device_printf(dev, "unknown RX state!\n"); 520 } 521 loops++; 522 } 523 524 if ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF | 525 SUME_MSI_RXDONE)) && recv->recovery) { 526 device_printf(dev, "ch %d ignoring vect = 0x%08x " 527 "during RX; not in recovery; state = %d, loops = " 528 "%d\n", ch, vect, recv->state, loops); 529 530 /* Clean the unfinished transaction. */ 531 if (ch == SUME_RIFFA_CHANNEL_REG && 532 vect & SUME_MSI_RXDONE) { 533 read_reg(adapter, RIFFA_CHNL_REG(ch, 534 RIFFA_TX_TNFR_LEN_REG_OFF)); 535 recv->recovery = 0; 536 } 537 } 538 } 539 SUME_UNLOCK(adapter); 540 541 if (m != NULL) { 542 ifp = m->m_pkthdr.rcvif; 543 if_input(ifp, m); 544 } 545 } 546 547 /* 548 * As we cannot disable interrupt generation, ignore early interrupts by waiting 549 * for the adapter to go into the 'running' state. 550 */ 551 static int 552 sume_intr_filter(void *arg) 553 { 554 struct sume_adapter *adapter = arg; 555 556 if (adapter->running == 0) 557 return (FILTER_STRAY); 558 559 return (FILTER_SCHEDULE_THREAD); 560 } 561 562 static int 563 sume_probe_riffa_pci(struct sume_adapter *adapter) 564 { 565 device_t dev = adapter->dev; 566 int error, count, capmem; 567 uint32_t reg, devctl, linkctl; 568 569 pci_enable_busmaster(dev); 570 571 adapter->rid = PCIR_BAR(0); 572 adapter->bar0_addr = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 573 &adapter->rid, RF_ACTIVE); 574 if (adapter->bar0_addr == NULL) { 575 device_printf(dev, "unable to allocate bus resource: " 576 "BAR0 address\n"); 577 return (ENXIO); 578 } 579 adapter->bt = rman_get_bustag(adapter->bar0_addr); 580 adapter->bh = rman_get_bushandle(adapter->bar0_addr); 581 adapter->bar0_len = rman_get_size(adapter->bar0_addr); 582 if (adapter->bar0_len != 1024) { 583 device_printf(dev, "BAR0 resource length %lu != 1024\n", 584 adapter->bar0_len); 585 return (ENXIO); 586 } 587 588 count = pci_msi_count(dev); 589 error = pci_alloc_msi(dev, &count); 590 if (error) { 591 device_printf(dev, "unable to allocate bus resource: PCI " 592 "MSI\n"); 593 return (error); 594 } 595 596 adapter->irq.rid = 1; /* Should be 1, thus says pci_alloc_msi() */ 597 adapter->irq.res = bus_alloc_resource_any(dev, SYS_RES_IRQ, 598 &adapter->irq.rid, RF_SHAREABLE | RF_ACTIVE); 599 if (adapter->irq.res == NULL) { 600 device_printf(dev, "unable to allocate bus resource: IRQ " 601 "memory\n"); 602 return (ENXIO); 603 } 604 605 error = bus_setup_intr(dev, adapter->irq.res, INTR_MPSAFE | 606 INTR_TYPE_NET, sume_intr_filter, sume_intr_handler, adapter, 607 &adapter->irq.tag); 608 if (error) { 609 device_printf(dev, "failed to setup interrupt for rid %d, name" 610 " %s: %d\n", adapter->irq.rid, "SUME_INTR", error); 611 return (ENXIO); 612 } 613 614 if (pci_find_cap(dev, PCIY_EXPRESS, &capmem) != 0) { 615 device_printf(dev, "PCI not PCIe capable\n"); 616 return (ENXIO); 617 } 618 619 devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL, 2); 620 pci_write_config(dev, capmem + PCIER_DEVICE_CTL, (devctl | 621 PCIEM_CTL_EXT_TAG_FIELD), 2); 622 623 devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL2, 2); 624 pci_write_config(dev, capmem + PCIER_DEVICE_CTL2, (devctl | 625 PCIEM_CTL2_ID_ORDERED_REQ_EN), 2); 626 627 linkctl = pci_read_config(dev, capmem + PCIER_LINK_CTL, 2); 628 pci_write_config(dev, capmem + PCIER_LINK_CTL, (linkctl | 629 PCIEM_LINK_CTL_RCB), 2); 630 631 reg = read_reg(adapter, RIFFA_INFO_REG_OFF); 632 adapter->num_sg = RIFFA_SG_ELEMS * ((reg >> 19) & 0xf); 633 adapter->sg_buf_size = RIFFA_SG_BUF_SIZE * ((reg >> 19) & 0xf); 634 635 error = ENODEV; 636 /* Check bus master is enabled. */ 637 if (((reg >> 4) & 0x1) != 1) { 638 device_printf(dev, "bus master not enabled: %d\n", 639 (reg >> 4) & 0x1); 640 return (error); 641 } 642 /* Check link parameters are valid. */ 643 if (((reg >> 5) & 0x3f) == 0 || ((reg >> 11) & 0x3) == 0) { 644 device_printf(dev, "link parameters not valid: %d %d\n", 645 (reg >> 5) & 0x3f, (reg >> 11) & 0x3); 646 return (error); 647 } 648 /* Check # of channels are within valid range. */ 649 if ((reg & 0xf) == 0 || (reg & 0xf) > RIFFA_MAX_CHNLS) { 650 device_printf(dev, "number of channels out of range: %d\n", 651 reg & 0xf); 652 return (error); 653 } 654 /* Check bus width. */ 655 if (((reg >> 19) & 0xf) == 0 || 656 ((reg >> 19) & 0xf) > RIFFA_MAX_BUS_WIDTH_PARAM) { 657 device_printf(dev, "bus width out of range: %d\n", 658 (reg >> 19) & 0xf); 659 return (error); 660 } 661 662 device_printf(dev, "[riffa] # of channels: %d\n", 663 reg & 0xf); 664 device_printf(dev, "[riffa] bus interface width: %d\n", 665 ((reg >> 19) & 0xf) << 5); 666 device_printf(dev, "[riffa] bus master enabled: %d\n", 667 (reg >> 4) & 0x1); 668 device_printf(dev, "[riffa] negotiated link width: %d\n", 669 (reg >> 5) & 0x3f); 670 device_printf(dev, "[riffa] negotiated rate width: %d MTs\n", 671 ((reg >> 11) & 0x3) * 2500); 672 device_printf(dev, "[riffa] max downstream payload: %d B\n", 673 128 << ((reg >> 13) & 0x7)); 674 device_printf(dev, "[riffa] max upstream payload: %d B\n", 675 128 << ((reg >> 16) & 0x7)); 676 677 return (0); 678 } 679 680 /* If there is no sume_if_init, the ether_ioctl panics. */ 681 static void 682 sume_if_init(void *sc) 683 { 684 } 685 686 /* Write the address and length for our incoming / outgoing transaction. */ 687 static void 688 sume_fill_bb_desc(struct sume_adapter *adapter, struct riffa_chnl_dir *p, 689 uint64_t len) 690 { 691 struct nf_bb_desc *bouncebuf = (struct nf_bb_desc *) p->buf_addr; 692 693 bouncebuf->lower = (p->buf_hw_addr + sizeof(struct nf_bb_desc)); 694 bouncebuf->upper = (p->buf_hw_addr + sizeof(struct nf_bb_desc)) >> 32; 695 bouncebuf->len = len >> 2; 696 } 697 698 /* Module register locked write. */ 699 static int 700 sume_modreg_write_locked(struct sume_adapter *adapter) 701 { 702 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 703 704 /* Let the FPGA know about the transfer. */ 705 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 706 RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST); 707 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 708 RIFFA_RX_LEN_REG_OFF), send->len); /* words */ 709 710 /* Fill the bouncebuf "descriptor". */ 711 sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len)); 712 713 /* Update the state before intiating the DMA to avoid races. */ 714 send->state = SUME_RIFFA_CHAN_STATE_READY; 715 716 bus_dmamap_sync(send->ch_tag, send->ch_map, 717 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 718 /* DMA. */ 719 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 720 RIFFA_RX_SG_ADDR_LO_REG_OFF), 721 SUME_RIFFA_LO_ADDR(send->buf_hw_addr)); 722 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 723 RIFFA_RX_SG_ADDR_HI_REG_OFF), 724 SUME_RIFFA_HI_ADDR(send->buf_hw_addr)); 725 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 726 RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg); 727 bus_dmamap_sync(send->ch_tag, send->ch_map, 728 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 729 730 return (0); 731 } 732 733 /* 734 * Request a register read or write (depending on optype). 735 * If optype is set (0x1f) this will result in a register write, 736 * otherwise this will result in a register read request at the given 737 * address and the result will need to be DMAed back. 738 */ 739 static int 740 sume_module_reg_write(struct nf_priv *nf_priv, struct sume_ifreq *sifr, 741 uint32_t optype) 742 { 743 struct sume_adapter *adapter = nf_priv->adapter; 744 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 745 struct nf_regop_data *data; 746 int error; 747 748 /* 749 * 1. Make sure the channel is free; otherwise return EBUSY. 750 * 2. Prepare the memory in the bounce buffer (which we always 751 * use for regs). 752 * 3. Start the DMA process. 753 * 4. Sleep and wait for result and return success or error. 754 */ 755 SUME_LOCK(adapter); 756 757 if (send->state != SUME_RIFFA_CHAN_STATE_IDLE) { 758 SUME_UNLOCK(adapter); 759 return (EBUSY); 760 } 761 762 data = (struct nf_regop_data *) (send->buf_addr + 763 sizeof(struct nf_bb_desc)); 764 data->addr = htole32(sifr->addr); 765 data->val = htole32(sifr->val); 766 /* Tag to indentify request. */ 767 data->rtag = htole32(++send->rtag); 768 data->optype = htole32(optype); 769 send->len = sizeof(struct nf_regop_data) / 4; /* words */ 770 771 error = sume_modreg_write_locked(adapter); 772 if (error) { 773 SUME_UNLOCK(adapter); 774 return (EFAULT); 775 } 776 777 /* Timeout after 1s. */ 778 if (send->state != SUME_RIFFA_CHAN_STATE_LEN) 779 error = msleep(&send->event, &adapter->lock, 0, 780 "Waiting recv finish", 1 * hz); 781 782 /* This was a write so we are done; were interrupted, or timed out. */ 783 if (optype != SUME_MR_READ || error != 0 || error == EWOULDBLOCK) { 784 send->state = SUME_RIFFA_CHAN_STATE_IDLE; 785 if (optype == SUME_MR_READ) 786 error = EWOULDBLOCK; 787 else 788 error = 0; 789 } else 790 error = 0; 791 792 /* 793 * For read requests we will update state once we are done 794 * having read the result to avoid any two outstanding 795 * transactions, or we need a queue and validate tags, 796 * which is a lot of work for a low priority, infrequent 797 * event. 798 */ 799 800 SUME_UNLOCK(adapter); 801 802 return (error); 803 } 804 805 /* Module register read. */ 806 static int 807 sume_module_reg_read(struct nf_priv *nf_priv, struct sume_ifreq *sifr) 808 { 809 struct sume_adapter *adapter = nf_priv->adapter; 810 struct riffa_chnl_dir *recv = adapter->recv[SUME_RIFFA_CHANNEL_REG]; 811 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 812 struct nf_regop_data *data; 813 int error = 0; 814 815 /* 816 * 0. Sleep waiting for result if needed (unless condition is 817 * true already). 818 * 1. Read DMA results. 819 * 2. Update state on *TX* to IDLE to allow next read to start. 820 */ 821 SUME_LOCK(adapter); 822 823 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 824 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 825 /* 826 * We only need to be woken up at the end of the transaction. 827 * Timeout after 1s. 828 */ 829 if (recv->state != SUME_RIFFA_CHAN_STATE_READ) 830 error = msleep(&recv->event, &adapter->lock, 0, 831 "Waiting transaction finish", 1 * hz); 832 833 if (recv->state != SUME_RIFFA_CHAN_STATE_READ || error == EWOULDBLOCK) { 834 SUME_UNLOCK(adapter); 835 device_printf(adapter->dev, "wait error: %d\n", error); 836 return (EWOULDBLOCK); 837 } 838 839 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 840 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 841 842 /* 843 * Read reply data and validate address and tag. 844 * Note: we do access the send side without lock but the state 845 * machine does prevent the data from changing. 846 */ 847 data = (struct nf_regop_data *) (recv->buf_addr + 848 sizeof(struct nf_bb_desc)); 849 850 if (le32toh(data->rtag) != send->rtag) 851 device_printf(adapter->dev, "rtag error: 0x%08x 0x%08x\n", 852 le32toh(data->rtag), send->rtag); 853 854 sifr->val = le32toh(data->val); 855 recv->state = SUME_RIFFA_CHAN_STATE_IDLE; 856 857 /* We are done. */ 858 send->state = SUME_RIFFA_CHAN_STATE_IDLE; 859 860 SUME_UNLOCK(adapter); 861 862 return (0); 863 } 864 865 /* Read value from a module register and return it to a sume_ifreq. */ 866 static int 867 get_modreg_value(struct nf_priv *nf_priv, struct sume_ifreq *sifr) 868 { 869 int error; 870 871 error = sume_module_reg_write(nf_priv, sifr, SUME_MR_READ); 872 if (!error) 873 error = sume_module_reg_read(nf_priv, sifr); 874 875 return (error); 876 } 877 878 static int 879 sume_if_ioctl(if_t ifp, unsigned long cmd, caddr_t data) 880 { 881 struct ifreq *ifr = (struct ifreq *) data; 882 struct nf_priv *nf_priv = if_getsoftc(ifp); 883 struct sume_ifreq sifr; 884 int error = 0; 885 886 switch (cmd) { 887 case SIOCGIFMEDIA: 888 case SIOCGIFXMEDIA: 889 error = ifmedia_ioctl(ifp, ifr, &nf_priv->media, cmd); 890 break; 891 892 case SUME_IOCTL_CMD_WRITE_REG: 893 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr)); 894 if (error) { 895 error = EINVAL; 896 break; 897 } 898 error = sume_module_reg_write(nf_priv, &sifr, SUME_MR_WRITE); 899 break; 900 901 case SUME_IOCTL_CMD_READ_REG: 902 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr)); 903 if (error) { 904 error = EINVAL; 905 break; 906 } 907 908 error = get_modreg_value(nf_priv, &sifr); 909 if (error) 910 break; 911 912 error = copyout(&sifr, ifr_data_get_ptr(ifr), sizeof(sifr)); 913 if (error) 914 error = EINVAL; 915 916 break; 917 918 case SIOCSIFFLAGS: 919 /* Silence tcpdump 'promisc mode not supported' warning. */ 920 if (if_getflags(ifp) & IFF_PROMISC) 921 break; 922 923 default: 924 error = ether_ioctl(ifp, cmd, data); 925 break; 926 } 927 928 return (error); 929 } 930 931 static int 932 sume_media_change(if_t ifp) 933 { 934 struct nf_priv *nf_priv = if_getsoftc(ifp); 935 struct ifmedia *ifm = &nf_priv->media; 936 937 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 938 return (EINVAL); 939 940 if (IFM_SUBTYPE(ifm->ifm_media) == IFM_10G_SR) 941 if_setbaudrate(ifp, ifmedia_baudrate(IFM_ETHER | IFM_10G_SR)); 942 else 943 if_setbaudrate(ifp, ifmedia_baudrate(ifm->ifm_media)); 944 945 return (0); 946 } 947 948 static void 949 sume_update_link_status(if_t ifp) 950 { 951 struct nf_priv *nf_priv = if_getsoftc(ifp); 952 struct sume_adapter *adapter = nf_priv->adapter; 953 struct sume_ifreq sifr; 954 int link_status; 955 956 sifr.addr = SUME_STATUS_ADDR(nf_priv->port); 957 sifr.val = 0; 958 959 if (get_modreg_value(nf_priv, &sifr)) 960 return; 961 962 link_status = SUME_LINK_STATUS(sifr.val); 963 964 if (!link_status && nf_priv->link_up) { 965 if_link_state_change(ifp, LINK_STATE_DOWN); 966 nf_priv->link_up = 0; 967 if (adapter->sume_debug) 968 device_printf(adapter->dev, "port %d link state " 969 "changed to DOWN\n", nf_priv->unit); 970 } else if (link_status && !nf_priv->link_up) { 971 nf_priv->link_up = 1; 972 if_link_state_change(ifp, LINK_STATE_UP); 973 if (adapter->sume_debug) 974 device_printf(adapter->dev, "port %d link state " 975 "changed to UP\n", nf_priv->unit); 976 } 977 } 978 979 static void 980 sume_media_status(if_t ifp, struct ifmediareq *ifmr) 981 { 982 struct nf_priv *nf_priv = if_getsoftc(ifp); 983 struct ifmedia *ifm = &nf_priv->media; 984 985 if (ifm->ifm_cur->ifm_media == (IFM_ETHER | IFM_10G_SR) && 986 (if_getflags(ifp) & IFF_UP)) 987 ifmr->ifm_active = IFM_ETHER | IFM_10G_SR; 988 else 989 ifmr->ifm_active = ifm->ifm_cur->ifm_media; 990 991 ifmr->ifm_status |= IFM_AVALID; 992 993 sume_update_link_status(ifp); 994 995 if (nf_priv->link_up) 996 ifmr->ifm_status |= IFM_ACTIVE; 997 } 998 999 /* 1000 * Packet to transmit. We take the packet data from the mbuf and copy it to the 1001 * bouncebuffer address buf_addr+3*sizeof(uint32_t)+16. The 16 bytes before the 1002 * packet data are for metadata: sport/dport (depending on our source 1003 * interface), packet length and magic 0xcafe. We tell the SUME about the 1004 * transfer, fill the first 3*sizeof(uint32_t) bytes of the bouncebuffer with 1005 * the information about the start and length of the packet and trigger the 1006 * transaction. 1007 */ 1008 static int 1009 sume_if_start_locked(if_t ifp) 1010 { 1011 struct mbuf *m; 1012 struct nf_priv *nf_priv = if_getsoftc(ifp); 1013 struct sume_adapter *adapter = nf_priv->adapter; 1014 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_DATA]; 1015 uint8_t *outbuf; 1016 struct nf_metadata *mdata; 1017 int plen = SUME_MIN_PKT_SIZE; 1018 1019 KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned")); 1020 KASSERT(send->state == SUME_RIFFA_CHAN_STATE_IDLE, 1021 ("SUME not in IDLE state")); 1022 1023 m = if_dequeue(ifp); 1024 if (m == NULL) 1025 return (EINVAL); 1026 1027 /* Packets large enough do not need to be padded */ 1028 if (m->m_pkthdr.len > SUME_MIN_PKT_SIZE) 1029 plen = m->m_pkthdr.len; 1030 1031 if (adapter->sume_debug) 1032 device_printf(adapter->dev, "sending %d bytes to %s%d\n", plen, 1033 SUME_ETH_DEVICE_NAME, nf_priv->unit); 1034 1035 outbuf = (uint8_t *) send->buf_addr + sizeof(struct nf_bb_desc); 1036 mdata = (struct nf_metadata *) outbuf; 1037 1038 /* Clear the recovery flag. */ 1039 send->recovery = 0; 1040 1041 /* Make sure we fit with the 16 bytes nf_metadata. */ 1042 if (m->m_pkthdr.len + sizeof(struct nf_metadata) > 1043 adapter->sg_buf_size) { 1044 device_printf(adapter->dev, "packet too big for bounce buffer " 1045 "(%d)\n", m->m_pkthdr.len); 1046 m_freem(m); 1047 nf_priv->stats.tx_dropped++; 1048 return (ENOMEM); 1049 } 1050 1051 bus_dmamap_sync(send->ch_tag, send->ch_map, 1052 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1053 1054 /* Zero out the padded data */ 1055 if (m->m_pkthdr.len < SUME_MIN_PKT_SIZE) 1056 bzero(outbuf + sizeof(struct nf_metadata), SUME_MIN_PKT_SIZE); 1057 /* Skip the first 16 bytes for the metadata. */ 1058 m_copydata(m, 0, m->m_pkthdr.len, outbuf + sizeof(struct nf_metadata)); 1059 send->len = (sizeof(struct nf_metadata) + plen + 3) / 4; 1060 1061 /* Fill in the metadata: CPU(DMA) ports are odd, MAC ports are even. */ 1062 mdata->sport = htole16(1 << (nf_priv->port * 2 + 1)); 1063 mdata->dport = htole16(1 << (nf_priv->port * 2)); 1064 mdata->plen = htole16(plen); 1065 mdata->magic = htole16(SUME_RIFFA_MAGIC); 1066 mdata->t1 = htole32(0); 1067 mdata->t2 = htole32(0); 1068 1069 /* Let the FPGA know about the transfer. */ 1070 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1071 RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST); 1072 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1073 RIFFA_RX_LEN_REG_OFF), send->len); 1074 1075 /* Fill the bouncebuf "descriptor". */ 1076 sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len)); 1077 1078 /* Update the state before intiating the DMA to avoid races. */ 1079 send->state = SUME_RIFFA_CHAN_STATE_READY; 1080 1081 /* DMA. */ 1082 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1083 RIFFA_RX_SG_ADDR_LO_REG_OFF), 1084 SUME_RIFFA_LO_ADDR(send->buf_hw_addr)); 1085 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1086 RIFFA_RX_SG_ADDR_HI_REG_OFF), 1087 SUME_RIFFA_HI_ADDR(send->buf_hw_addr)); 1088 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1089 RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg); 1090 1091 bus_dmamap_sync(send->ch_tag, send->ch_map, 1092 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1093 1094 nf_priv->stats.tx_packets++; 1095 nf_priv->stats.tx_bytes += plen; 1096 1097 /* We can free as long as we use the bounce buffer. */ 1098 m_freem(m); 1099 1100 adapter->last_ifc = nf_priv->port; 1101 1102 /* Reset watchdog counter. */ 1103 adapter->wd_counter = 0; 1104 1105 return (0); 1106 } 1107 1108 static void 1109 sume_if_start(if_t ifp) 1110 { 1111 struct nf_priv *nf_priv = if_getsoftc(ifp); 1112 struct sume_adapter *adapter = nf_priv->adapter; 1113 1114 if (!adapter->running || !(if_getflags(ifp) & IFF_UP)) 1115 return; 1116 1117 SUME_LOCK(adapter); 1118 if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state == 1119 SUME_RIFFA_CHAN_STATE_IDLE) 1120 sume_if_start_locked(ifp); 1121 SUME_UNLOCK(adapter); 1122 } 1123 1124 /* 1125 * We call this function at the end of every TX transaction to check for 1126 * remaining packets in the TX queues for every UP interface. 1127 */ 1128 static void 1129 check_tx_queues(struct sume_adapter *adapter) 1130 { 1131 int i, last_ifc; 1132 1133 KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned")); 1134 1135 last_ifc = adapter->last_ifc; 1136 1137 /* Check all interfaces */ 1138 for (i = last_ifc + 1; i < last_ifc + SUME_NPORTS + 1; i++) { 1139 if_t ifp = adapter->ifp[i % SUME_NPORTS]; 1140 1141 if (!(if_getflags(ifp) & IFF_UP)) 1142 continue; 1143 1144 if (!sume_if_start_locked(ifp)) 1145 break; 1146 } 1147 } 1148 1149 static int 1150 sume_ifp_alloc(struct sume_adapter *adapter, uint32_t port) 1151 { 1152 if_t ifp; 1153 struct nf_priv *nf_priv = malloc(sizeof(struct nf_priv), M_SUME, 1154 M_ZERO | M_WAITOK); 1155 1156 ifp = if_alloc(IFT_ETHER); 1157 if (ifp == NULL) { 1158 device_printf(adapter->dev, "cannot allocate ifnet\n"); 1159 return (ENOMEM); 1160 } 1161 1162 adapter->ifp[port] = ifp; 1163 if_setsoftc(ifp, nf_priv); 1164 1165 nf_priv->adapter = adapter; 1166 nf_priv->unit = alloc_unr(unr); 1167 nf_priv->port = port; 1168 nf_priv->link_up = 0; 1169 1170 if_initname(ifp, SUME_ETH_DEVICE_NAME, nf_priv->unit); 1171 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 1172 1173 if_setinitfn(ifp, sume_if_init); 1174 if_setstartfn(ifp, sume_if_start); 1175 if_setioctlfn(ifp, sume_if_ioctl); 1176 1177 uint8_t hw_addr[ETHER_ADDR_LEN] = DEFAULT_ETHER_ADDRESS; 1178 hw_addr[ETHER_ADDR_LEN-1] = nf_priv->unit; 1179 ether_ifattach(ifp, hw_addr); 1180 1181 ifmedia_init(&nf_priv->media, IFM_IMASK, sume_media_change, 1182 sume_media_status); 1183 ifmedia_add(&nf_priv->media, IFM_ETHER | IFM_10G_SR, 0, NULL); 1184 ifmedia_set(&nf_priv->media, IFM_ETHER | IFM_10G_SR); 1185 1186 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); 1187 1188 return (0); 1189 } 1190 1191 static void 1192 callback_dma(void *arg, bus_dma_segment_t *segs, int nseg, int err) 1193 { 1194 if (err) 1195 return; 1196 1197 KASSERT(nseg == 1, ("%d segments returned!", nseg)); 1198 1199 *(bus_addr_t *) arg = segs[0].ds_addr; 1200 } 1201 1202 static int 1203 sume_probe_riffa_buffer(const struct sume_adapter *adapter, 1204 struct riffa_chnl_dir ***p, const char *dir) 1205 { 1206 struct riffa_chnl_dir **rp; 1207 bus_addr_t hw_addr; 1208 int error, ch; 1209 device_t dev = adapter->dev; 1210 1211 error = ENOMEM; 1212 *p = malloc(SUME_RIFFA_CHANNELS * sizeof(struct riffa_chnl_dir *), 1213 M_SUME, M_ZERO | M_WAITOK); 1214 if (*p == NULL) { 1215 device_printf(dev, "malloc(%s) failed.\n", dir); 1216 return (error); 1217 } 1218 1219 rp = *p; 1220 /* Allocate the chnl_dir structs themselves. */ 1221 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 1222 /* One direction. */ 1223 rp[ch] = malloc(sizeof(struct riffa_chnl_dir), M_SUME, 1224 M_ZERO | M_WAITOK); 1225 if (rp[ch] == NULL) { 1226 device_printf(dev, "malloc(%s[%d]) riffa_chnl_dir " 1227 "failed.\n", dir, ch); 1228 return (error); 1229 } 1230 1231 int err = bus_dma_tag_create(bus_get_dma_tag(dev), 1232 4, 0, 1233 BUS_SPACE_MAXADDR, 1234 BUS_SPACE_MAXADDR, 1235 NULL, NULL, 1236 adapter->sg_buf_size, 1237 1, 1238 adapter->sg_buf_size, 1239 0, 1240 NULL, 1241 NULL, 1242 &rp[ch]->ch_tag); 1243 1244 if (err) { 1245 device_printf(dev, "bus_dma_tag_create(%s[%d]) " 1246 "failed.\n", dir, ch); 1247 return (err); 1248 } 1249 1250 err = bus_dmamem_alloc(rp[ch]->ch_tag, (void **) 1251 &rp[ch]->buf_addr, BUS_DMA_WAITOK | BUS_DMA_COHERENT | 1252 BUS_DMA_ZERO, &rp[ch]->ch_map); 1253 if (err) { 1254 device_printf(dev, "bus_dmamem_alloc(%s[%d]) failed.\n", 1255 dir, ch); 1256 return (err); 1257 } 1258 1259 bzero(rp[ch]->buf_addr, adapter->sg_buf_size); 1260 1261 err = bus_dmamap_load(rp[ch]->ch_tag, rp[ch]->ch_map, 1262 rp[ch]->buf_addr, adapter->sg_buf_size, callback_dma, 1263 &hw_addr, BUS_DMA_NOWAIT); 1264 if (err) { 1265 device_printf(dev, "bus_dmamap_load(%s[%d]) failed.\n", 1266 dir, ch); 1267 return (err); 1268 } 1269 rp[ch]->buf_hw_addr = hw_addr; 1270 rp[ch]->num_sg = 1; 1271 rp[ch]->state = SUME_RIFFA_CHAN_STATE_IDLE; 1272 1273 rp[ch]->rtag = SUME_INIT_RTAG; 1274 } 1275 1276 return (0); 1277 } 1278 1279 static int 1280 sume_probe_riffa_buffers(struct sume_adapter *adapter) 1281 { 1282 int error; 1283 1284 error = sume_probe_riffa_buffer(adapter, &adapter->recv, "recv"); 1285 if (error) 1286 return (error); 1287 1288 error = sume_probe_riffa_buffer(adapter, &adapter->send, "send"); 1289 1290 return (error); 1291 } 1292 1293 static void 1294 sume_sysctl_init(struct sume_adapter *adapter) 1295 { 1296 device_t dev = adapter->dev; 1297 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 1298 struct sysctl_oid *tree = device_get_sysctl_tree(dev); 1299 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 1300 struct sysctl_oid *tmp_tree; 1301 char namebuf[MAX_IFC_NAME_LEN]; 1302 int i; 1303 1304 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "sume", CTLFLAG_RW, 1305 0, "SUME top-level tree"); 1306 if (tree == NULL) { 1307 device_printf(dev, "SYSCTL_ADD_NODE failed.\n"); 1308 return; 1309 } 1310 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug", CTLFLAG_RW, 1311 &adapter->sume_debug, 0, "debug int leaf"); 1312 1313 /* total RX error stats */ 1314 SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_epkts", 1315 CTLFLAG_RD, &adapter->packets_err, 0, "rx errors"); 1316 SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_ebytes", 1317 CTLFLAG_RD, &adapter->bytes_err, 0, "rx error bytes"); 1318 1319 for (i = SUME_NPORTS - 1; i >= 0; i--) { 1320 if_t ifp = adapter->ifp[i]; 1321 if (ifp == NULL) 1322 continue; 1323 1324 struct nf_priv *nf_priv = if_getsoftc(ifp); 1325 1326 snprintf(namebuf, MAX_IFC_NAME_LEN, "%s%d", 1327 SUME_ETH_DEVICE_NAME, nf_priv->unit); 1328 tmp_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 1329 CTLFLAG_RW, 0, "SUME ifc tree"); 1330 if (tmp_tree == NULL) { 1331 device_printf(dev, "SYSCTL_ADD_NODE failed.\n"); 1332 return; 1333 } 1334 1335 /* Packets dropped by down interface. */ 1336 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1337 "ifc_down_bytes", CTLFLAG_RD, 1338 &nf_priv->stats.ifc_down_bytes, 0, "ifc_down bytes"); 1339 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1340 "ifc_down_packets", CTLFLAG_RD, 1341 &nf_priv->stats.ifc_down_packets, 0, "ifc_down packets"); 1342 1343 /* HW RX stats */ 1344 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1345 "hw_rx_packets", CTLFLAG_RD, &nf_priv->stats.hw_rx_packets, 1346 0, "hw_rx packets"); 1347 1348 /* HW TX stats */ 1349 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1350 "hw_tx_packets", CTLFLAG_RD, &nf_priv->stats.hw_tx_packets, 1351 0, "hw_tx packets"); 1352 1353 /* RX stats */ 1354 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1355 "rx_bytes", CTLFLAG_RD, &nf_priv->stats.rx_bytes, 0, 1356 "rx bytes"); 1357 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1358 "rx_dropped", CTLFLAG_RD, &nf_priv->stats.rx_dropped, 0, 1359 "rx dropped"); 1360 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1361 "rx_packets", CTLFLAG_RD, &nf_priv->stats.rx_packets, 0, 1362 "rx packets"); 1363 1364 /* TX stats */ 1365 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1366 "tx_bytes", CTLFLAG_RD, &nf_priv->stats.tx_bytes, 0, 1367 "tx bytes"); 1368 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1369 "tx_dropped", CTLFLAG_RD, &nf_priv->stats.tx_dropped, 0, 1370 "tx dropped"); 1371 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1372 "tx_packets", CTLFLAG_RD, &nf_priv->stats.tx_packets, 0, 1373 "tx packets"); 1374 } 1375 } 1376 1377 static void 1378 sume_local_timer(void *arg) 1379 { 1380 struct sume_adapter *adapter = arg; 1381 1382 if (!adapter->running) 1383 return; 1384 1385 taskqueue_enqueue(adapter->tq, &adapter->stat_task); 1386 1387 SUME_LOCK(adapter); 1388 if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state != 1389 SUME_RIFFA_CHAN_STATE_IDLE && ++adapter->wd_counter >= 3) { 1390 /* Resetting interfaces if stuck for 3 seconds. */ 1391 device_printf(adapter->dev, "TX stuck, resetting adapter.\n"); 1392 read_reg(adapter, RIFFA_INFO_REG_OFF); 1393 1394 adapter->send[SUME_RIFFA_CHANNEL_DATA]->state = 1395 SUME_RIFFA_CHAN_STATE_IDLE; 1396 adapter->wd_counter = 0; 1397 1398 check_tx_queues(adapter); 1399 } 1400 SUME_UNLOCK(adapter); 1401 1402 callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter); 1403 } 1404 1405 static void 1406 sume_get_stats(void *context, int pending) 1407 { 1408 struct sume_adapter *adapter = context; 1409 int i; 1410 1411 for (i = 0; i < SUME_NPORTS; i++) { 1412 if_t ifp = adapter->ifp[i]; 1413 1414 if (if_getflags(ifp) & IFF_UP) { 1415 struct nf_priv *nf_priv = if_getsoftc(ifp); 1416 struct sume_ifreq sifr; 1417 1418 sume_update_link_status(ifp); 1419 1420 /* Get RX counter. */ 1421 sifr.addr = SUME_STAT_RX_ADDR(nf_priv->port); 1422 sifr.val = 0; 1423 1424 if (!get_modreg_value(nf_priv, &sifr)) 1425 nf_priv->stats.hw_rx_packets += sifr.val; 1426 1427 /* Get TX counter. */ 1428 sifr.addr = SUME_STAT_TX_ADDR(nf_priv->port); 1429 sifr.val = 0; 1430 1431 if (!get_modreg_value(nf_priv, &sifr)) 1432 nf_priv->stats.hw_tx_packets += sifr.val; 1433 } 1434 } 1435 } 1436 1437 static int 1438 sume_attach(device_t dev) 1439 { 1440 struct sume_adapter *adapter = device_get_softc(dev); 1441 adapter->dev = dev; 1442 int error, i; 1443 1444 mtx_init(&adapter->lock, "Global lock", NULL, MTX_DEF); 1445 1446 adapter->running = 0; 1447 1448 /* OK finish up RIFFA. */ 1449 error = sume_probe_riffa_pci(adapter); 1450 if (error != 0) 1451 goto error; 1452 1453 error = sume_probe_riffa_buffers(adapter); 1454 if (error != 0) 1455 goto error; 1456 1457 /* Now do the network interfaces. */ 1458 for (i = 0; i < SUME_NPORTS; i++) { 1459 error = sume_ifp_alloc(adapter, i); 1460 if (error != 0) 1461 goto error; 1462 } 1463 1464 /* Register stats and register sysctls. */ 1465 sume_sysctl_init(adapter); 1466 1467 /* Reset the HW. */ 1468 read_reg(adapter, RIFFA_INFO_REG_OFF); 1469 1470 /* Ready to go, "enable" IRQ. */ 1471 adapter->running = 1; 1472 1473 callout_init(&adapter->timer, 1); 1474 TASK_INIT(&adapter->stat_task, 0, sume_get_stats, adapter); 1475 1476 adapter->tq = taskqueue_create("sume_stats", M_NOWAIT, 1477 taskqueue_thread_enqueue, &adapter->tq); 1478 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s stattaskq", 1479 device_get_nameunit(adapter->dev)); 1480 1481 callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter); 1482 1483 return (0); 1484 1485 error: 1486 sume_detach(dev); 1487 1488 return (error); 1489 } 1490 1491 static void 1492 sume_remove_riffa_buffer(const struct sume_adapter *adapter, 1493 struct riffa_chnl_dir **pp) 1494 { 1495 int ch; 1496 1497 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 1498 if (pp[ch] == NULL) 1499 continue; 1500 1501 if (pp[ch]->buf_hw_addr != 0) { 1502 bus_dmamem_free(pp[ch]->ch_tag, pp[ch]->buf_addr, 1503 pp[ch]->ch_map); 1504 pp[ch]->buf_hw_addr = 0; 1505 } 1506 1507 free(pp[ch], M_SUME); 1508 } 1509 } 1510 1511 static void 1512 sume_remove_riffa_buffers(struct sume_adapter *adapter) 1513 { 1514 if (adapter->send != NULL) { 1515 sume_remove_riffa_buffer(adapter, adapter->send); 1516 free(adapter->send, M_SUME); 1517 adapter->send = NULL; 1518 } 1519 if (adapter->recv != NULL) { 1520 sume_remove_riffa_buffer(adapter, adapter->recv); 1521 free(adapter->recv, M_SUME); 1522 adapter->recv = NULL; 1523 } 1524 } 1525 1526 static int 1527 sume_detach(device_t dev) 1528 { 1529 struct sume_adapter *adapter = device_get_softc(dev); 1530 int i; 1531 struct nf_priv *nf_priv; 1532 1533 KASSERT(mtx_initialized(&adapter->lock), ("SUME mutex not " 1534 "initialized")); 1535 adapter->running = 0; 1536 1537 /* Drain the stats callout and task queue. */ 1538 callout_drain(&adapter->timer); 1539 1540 if (adapter->tq) { 1541 taskqueue_drain(adapter->tq, &adapter->stat_task); 1542 taskqueue_free(adapter->tq); 1543 } 1544 1545 for (i = 0; i < SUME_NPORTS; i++) { 1546 if_t ifp = adapter->ifp[i]; 1547 if (ifp == NULL) 1548 continue; 1549 1550 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 1551 nf_priv = if_getsoftc(ifp); 1552 1553 if (if_getflags(ifp) & IFF_UP) 1554 if_down(ifp); 1555 ifmedia_removeall(&nf_priv->media); 1556 free_unr(unr, nf_priv->unit); 1557 1558 if_setflagbits(ifp, 0, IFF_UP); 1559 ether_ifdetach(ifp); 1560 if_free(ifp); 1561 1562 free(nf_priv, M_SUME); 1563 } 1564 1565 sume_remove_riffa_buffers(adapter); 1566 1567 if (adapter->irq.tag) 1568 bus_teardown_intr(dev, adapter->irq.res, adapter->irq.tag); 1569 if (adapter->irq.res) 1570 bus_release_resource(dev, SYS_RES_IRQ, adapter->irq.rid, 1571 adapter->irq.res); 1572 1573 pci_release_msi(dev); 1574 1575 if (adapter->bar0_addr) 1576 bus_release_resource(dev, SYS_RES_MEMORY, adapter->rid, 1577 adapter->bar0_addr); 1578 1579 mtx_destroy(&adapter->lock); 1580 1581 return (0); 1582 } 1583 1584 static int 1585 mod_event(module_t mod, int cmd, void *arg) 1586 { 1587 switch (cmd) { 1588 case MOD_LOAD: 1589 unr = new_unrhdr(0, INT_MAX, NULL); 1590 break; 1591 1592 case MOD_UNLOAD: 1593 delete_unrhdr(unr); 1594 break; 1595 } 1596 1597 return (0); 1598 } 1599 1600 DRIVER_MODULE(sume, pci, sume_driver, mod_event, NULL); 1601 MODULE_VERSION(sume, 1); 1602