1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015 Bjoern A. Zeeb 5 * Copyright (c) 2020 Denis Salopek 6 * 7 * This software was developed by SRI International and the University of 8 * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249 9 * ("MRC2"), as part of the DARPA MRC research programme. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/bus.h> 35 #include <sys/endian.h> 36 #include <sys/kernel.h> 37 #include <sys/limits.h> 38 #include <sys/module.h> 39 #include <sys/rman.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <sys/sysctl.h> 43 #include <sys/taskqueue.h> 44 45 #include <net/if.h> 46 #include <net/if_media.h> 47 #include <net/if_types.h> 48 #include <net/if_var.h> 49 50 #include <netinet/in.h> 51 #include <netinet/if_ether.h> 52 53 #include <dev/pci/pcivar.h> 54 #include <dev/pci/pcireg.h> 55 56 #include <machine/bus.h> 57 58 #include "adapter.h" 59 60 #define PCI_VENDOR_ID_XILINX 0x10ee 61 #define PCI_DEVICE_ID_SUME 0x7028 62 63 /* SUME bus driver interface */ 64 static int sume_probe(device_t); 65 static int sume_attach(device_t); 66 static int sume_detach(device_t); 67 68 static device_method_t sume_methods[] = { 69 DEVMETHOD(device_probe, sume_probe), 70 DEVMETHOD(device_attach, sume_attach), 71 DEVMETHOD(device_detach, sume_detach), 72 DEVMETHOD_END 73 }; 74 75 static driver_t sume_driver = { 76 "sume", 77 sume_methods, 78 sizeof(struct sume_adapter) 79 }; 80 81 /* 82 * The DMA engine for SUME generates interrupts for each RX/TX transaction. 83 * Depending on the channel (0 if packet transaction, 1 if register transaction) 84 * the used bits of the interrupt vector will be the lowest or the second lowest 85 * 5 bits. 86 * 87 * When receiving packets from SUME (RX): 88 * (1) SUME received a packet on one of the interfaces. 89 * (2) SUME generates an interrupt vector, bit 00001 is set (channel 0 - new RX 90 * transaction). 91 * (3) We read the length of the incoming packet and the offset along with the 92 * 'last' flag from the SUME registers. 93 * (4) We prepare for the DMA transaction by setting the bouncebuffer on the 94 * address buf_addr. For now, this is how it's done: 95 * - First 3*sizeof(uint32_t) bytes are: lower and upper 32 bits of physical 96 * address where we want the data to arrive (buf_addr[0] and buf_addr[1]), 97 * and length of incoming data (buf_addr[2]). 98 * - Data will start right after, at buf_addr+3*sizeof(uint32_t). The 99 * physical address buf_hw_addr is a block of contiguous memory mapped to 100 * buf_addr, so we can set the incoming data's physical address (buf_addr[0] 101 * and buf_addr[1]) to buf_hw_addr+3*sizeof(uint32_t). 102 * (5) We notify SUME that the bouncebuffer is ready for the transaction by 103 * writing the lower/upper physical address buf_hw_addr to the SUME 104 * registers RIFFA_TX_SG_ADDR_LO_REG_OFF and RIFFA_TX_SG_ADDR_HI_REG_OFF as 105 * well as the number of segments to the register RIFFA_TX_SG_LEN_REG_OFF. 106 * (6) SUME generates an interrupt vector, bit 00010 is set (channel 0 - 107 * bouncebuffer received). 108 * (7) SUME generates an interrupt vector, bit 00100 is set (channel 0 - 109 * transaction is done). 110 * (8) SUME can do both steps (6) and (7) using the same interrupt. 111 * (8) We read the first 16 bytes (metadata) of the received data and note the 112 * incoming interface so we can later forward it to the right one in the OS 113 * (sume0, sume1, sume2 or sume3). 114 * (10) We create an mbuf and copy the data from the bouncebuffer to the mbuf 115 * and set the mbuf rcvif to the incoming interface. 116 * (11) We forward the mbuf to the appropriate interface via ifp->if_input. 117 * 118 * When sending packets to SUME (TX): 119 * (1) The OS calls sume_if_start() function on TX. 120 * (2) We get the mbuf packet data and copy it to the 121 * buf_addr+3*sizeof(uint32_t) + metadata 16 bytes. 122 * (3) We create the metadata based on the output interface and copy it to the 123 * buf_addr+3*sizeof(uint32_t). 124 * (4) We write the offset/last and length of the packet to the SUME registers 125 * RIFFA_RX_OFFLAST_REG_OFF and RIFFA_RX_LEN_REG_OFF. 126 * (5) We fill the bouncebuffer by filling the first 3*sizeof(uint32_t) bytes 127 * with the physical address and length just as in RX step (4). 128 * (6) We notify SUME that the bouncebuffer is ready by writing to SUME 129 * registers RIFFA_RX_SG_ADDR_LO_REG_OFF, RIFFA_RX_SG_ADDR_HI_REG_OFF and 130 * RIFFA_RX_SG_LEN_REG_OFF just as in RX step (5). 131 * (7) SUME generates an interrupt vector, bit 01000 is set (channel 0 - 132 * bouncebuffer is read). 133 * (8) SUME generates an interrupt vector, bit 10000 is set (channel 0 - 134 * transaction is done). 135 * (9) SUME can do both steps (7) and (8) using the same interrupt. 136 * 137 * Internal registers 138 * Every module in the SUME hardware has its own set of internal registers 139 * (IDs, for debugging and statistic purposes, etc.). Their base addresses are 140 * defined in 'projects/reference_nic/hw/tcl/reference_nic_defines.tcl' and the 141 * offsets to different memory locations of every module are defined in their 142 * corresponding folder inside the library. These registers can be RO/RW and 143 * there is a special method to fetch/change this data over 1 or 2 DMA 144 * transactions. For writing, by calling the sume_module_reg_write(). For 145 * reading, by calling the sume_module_reg_write() and then 146 * sume_module_reg_read(). Check those functions for more information. 147 */ 148 149 MALLOC_DECLARE(M_SUME); 150 MALLOC_DEFINE(M_SUME, "sume", "NetFPGA SUME device driver"); 151 152 static void check_tx_queues(struct sume_adapter *); 153 static void sume_fill_bb_desc(struct sume_adapter *, struct riffa_chnl_dir *, 154 uint64_t); 155 156 static struct unrhdr *unr; 157 158 static struct { 159 uint16_t device; 160 char *desc; 161 } sume_pciids[] = { 162 {PCI_DEVICE_ID_SUME, "NetFPGA SUME reference NIC"}, 163 }; 164 165 static inline uint32_t 166 read_reg(struct sume_adapter *adapter, int offset) 167 { 168 169 return (bus_space_read_4(adapter->bt, adapter->bh, offset << 2)); 170 } 171 172 static inline void 173 write_reg(struct sume_adapter *adapter, int offset, uint32_t val) 174 { 175 176 bus_space_write_4(adapter->bt, adapter->bh, offset << 2, val); 177 } 178 179 static int 180 sume_probe(device_t dev) 181 { 182 int i; 183 uint16_t v = pci_get_vendor(dev); 184 uint16_t d = pci_get_device(dev); 185 186 if (v != PCI_VENDOR_ID_XILINX) 187 return (ENXIO); 188 189 for (i = 0; i < nitems(sume_pciids); i++) { 190 if (d == sume_pciids[i].device) { 191 device_set_desc(dev, sume_pciids[i].desc); 192 return (BUS_PROBE_DEFAULT); 193 } 194 } 195 196 return (ENXIO); 197 } 198 199 /* 200 * Building mbuf for packet received from SUME. We expect to receive 'len' 201 * bytes of data (including metadata) written from the bouncebuffer address 202 * buf_addr+3*sizeof(uint32_t). Metadata will tell us which SUME interface 203 * received the packet (sport will be 1, 2, 4 or 8), the packet length (plen), 204 * and the magic word needs to be 0xcafe. When we have the packet data, we 205 * create an mbuf and copy the data to it using m_copyback() function, set the 206 * correct interface to rcvif and return the mbuf to be later sent to the OS 207 * with if_input. 208 */ 209 static struct mbuf * 210 sume_rx_build_mbuf(struct sume_adapter *adapter, uint32_t len) 211 { 212 struct nf_priv *nf_priv; 213 struct mbuf *m; 214 if_t ifp = NULL; 215 int np; 216 uint16_t dport, plen, magic; 217 device_t dev = adapter->dev; 218 uint8_t *indata = (uint8_t *) 219 adapter->recv[SUME_RIFFA_CHANNEL_DATA]->buf_addr + 220 sizeof(struct nf_bb_desc); 221 struct nf_metadata *mdata = (struct nf_metadata *) indata; 222 223 /* The metadata header is 16 bytes. */ 224 if (len < sizeof(struct nf_metadata)) { 225 device_printf(dev, "short frame (%d)\n", len); 226 adapter->packets_err++; 227 adapter->bytes_err += len; 228 return (NULL); 229 } 230 231 dport = le16toh(mdata->dport); 232 plen = le16toh(mdata->plen); 233 magic = le16toh(mdata->magic); 234 235 if (sizeof(struct nf_metadata) + plen > len || 236 magic != SUME_RIFFA_MAGIC) { 237 device_printf(dev, "corrupted packet (%zd + %d > %d || magic " 238 "0x%04x != 0x%04x)\n", sizeof(struct nf_metadata), plen, 239 len, magic, SUME_RIFFA_MAGIC); 240 return (NULL); 241 } 242 243 /* We got the packet from one of the even bits */ 244 np = (ffs(dport & SUME_DPORT_MASK) >> 1) - 1; 245 if (np > SUME_NPORTS) { 246 device_printf(dev, "invalid destination port 0x%04x (%d)\n", 247 dport, np); 248 adapter->packets_err++; 249 adapter->bytes_err += plen; 250 return (NULL); 251 } 252 ifp = adapter->ifp[np]; 253 nf_priv = if_getsoftc(ifp); 254 nf_priv->stats.rx_packets++; 255 nf_priv->stats.rx_bytes += plen; 256 257 /* If the interface is down, well, we are done. */ 258 if (!(if_getflags(ifp) & IFF_UP)) { 259 nf_priv->stats.ifc_down_packets++; 260 nf_priv->stats.ifc_down_bytes += plen; 261 return (NULL); 262 } 263 264 if (adapter->sume_debug) 265 printf("Building mbuf with length: %d\n", plen); 266 267 m = m_getm(NULL, plen, M_NOWAIT, MT_DATA); 268 if (m == NULL) { 269 adapter->packets_err++; 270 adapter->bytes_err += plen; 271 return (NULL); 272 } 273 274 /* Copy the data in at the right offset. */ 275 m_copyback(m, 0, plen, (void *) (indata + sizeof(struct nf_metadata))); 276 m->m_pkthdr.rcvif = ifp; 277 278 return (m); 279 } 280 281 /* 282 * SUME interrupt handler for when we get a valid interrupt from the board. 283 * Theoretically, we can receive interrupt for any of the available channels, 284 * but RIFFA DMA uses only 2: 0 and 1, so we use only vect0. The vector is a 32 285 * bit number, using 5 bits for every channel, the least significant bits 286 * correspond to channel 0 and the next 5 bits correspond to channel 1. Vector 287 * bits for RX/TX are: 288 * RX 289 * bit 0 - new transaction from SUME 290 * bit 1 - SUME received our bouncebuffer address 291 * bit 2 - SUME copied the received data to our bouncebuffer, transaction done 292 * TX 293 * bit 3 - SUME received our bouncebuffer address 294 * bit 4 - SUME copied the data from our bouncebuffer, transaction done 295 * 296 * There are two finite state machines (one for TX, one for RX). We loop 297 * through channels 0 and 1 to check and our current state and which interrupt 298 * bit is set. 299 * TX 300 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the first TX transaction. 301 * SUME_RIFFA_CHAN_STATE_READY: we prepared (filled with data) the bouncebuffer 302 * and triggered the SUME for the TX transaction. Waiting for interrupt bit 3 303 * to go to the next state. 304 * SUME_RIFFA_CHAN_STATE_READ: waiting for interrupt bit 4 (for SUME to send 305 * our packet). Then we get the length of the sent data and go back to the 306 * IDLE state. 307 * RX 308 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the interrupt bit 0 (new RX 309 * transaction). When we get it, we prepare our bouncebuffer for reading and 310 * trigger the SUME to start the transaction. Go to the next state. 311 * SUME_RIFFA_CHAN_STATE_READY: waiting for the interrupt bit 1 (SUME got our 312 * bouncebuffer). Go to the next state. 313 * SUME_RIFFA_CHAN_STATE_READ: SUME copied data and our bouncebuffer is ready, 314 * we can build the mbuf and go back to the IDLE state. 315 */ 316 static void 317 sume_intr_handler(void *arg) 318 { 319 struct sume_adapter *adapter = arg; 320 uint32_t vect, vect0, len; 321 int ch, loops; 322 device_t dev = adapter->dev; 323 struct mbuf *m = NULL; 324 if_t ifp = NULL; 325 struct riffa_chnl_dir *send, *recv; 326 327 SUME_LOCK(adapter); 328 329 vect0 = read_reg(adapter, RIFFA_IRQ_REG0_OFF); 330 if ((vect0 & SUME_INVALID_VECT) != 0) { 331 SUME_UNLOCK(adapter); 332 return; 333 } 334 335 /* 336 * We only have one interrupt for all channels and no way 337 * to quickly lookup for which channel(s) we got an interrupt? 338 */ 339 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 340 vect = vect0 >> (5 * ch); 341 send = adapter->send[ch]; 342 recv = adapter->recv[ch]; 343 344 loops = 0; 345 while ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) && 346 loops <= 5) { 347 if (adapter->sume_debug) 348 device_printf(dev, "TX ch %d state %u vect = " 349 "0x%08x\n", ch, send->state, vect); 350 switch (send->state) { 351 case SUME_RIFFA_CHAN_STATE_IDLE: 352 break; 353 case SUME_RIFFA_CHAN_STATE_READY: 354 if (!(vect & SUME_MSI_TXBUF)) { 355 device_printf(dev, "ch %d unexpected " 356 "interrupt in send+3 state %u: " 357 "vect = 0x%08x\n", ch, send->state, 358 vect); 359 send->recovery = 1; 360 break; 361 } 362 send->state = SUME_RIFFA_CHAN_STATE_READ; 363 vect &= ~SUME_MSI_TXBUF; 364 break; 365 case SUME_RIFFA_CHAN_STATE_READ: 366 if (!(vect & SUME_MSI_TXDONE)) { 367 device_printf(dev, "ch %d unexpected " 368 "interrupt in send+4 state %u: " 369 "vect = 0x%08x\n", ch, send->state, 370 vect); 371 send->recovery = 1; 372 break; 373 } 374 send->state = SUME_RIFFA_CHAN_STATE_LEN; 375 376 len = read_reg(adapter, RIFFA_CHNL_REG(ch, 377 RIFFA_RX_TNFR_LEN_REG_OFF)); 378 if (ch == SUME_RIFFA_CHANNEL_DATA) { 379 send->state = 380 SUME_RIFFA_CHAN_STATE_IDLE; 381 check_tx_queues(adapter); 382 } else if (ch == SUME_RIFFA_CHANNEL_REG) 383 wakeup(&send->event); 384 else { 385 device_printf(dev, "ch %d unexpected " 386 "interrupt in send+4 state %u: " 387 "vect = 0x%08x\n", ch, send->state, 388 vect); 389 send->recovery = 1; 390 } 391 vect &= ~SUME_MSI_TXDONE; 392 break; 393 case SUME_RIFFA_CHAN_STATE_LEN: 394 break; 395 default: 396 device_printf(dev, "unknown TX state!\n"); 397 } 398 loops++; 399 } 400 401 if ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) && 402 send->recovery) 403 device_printf(dev, "ch %d ignoring vect = 0x%08x " 404 "during TX; not in recovery; state = %d loops = " 405 "%d\n", ch, vect, send->state, loops); 406 407 loops = 0; 408 while ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF | 409 SUME_MSI_RXDONE)) && loops < 5) { 410 if (adapter->sume_debug) 411 device_printf(dev, "RX ch %d state %u vect = " 412 "0x%08x\n", ch, recv->state, vect); 413 switch (recv->state) { 414 case SUME_RIFFA_CHAN_STATE_IDLE: 415 if (!(vect & SUME_MSI_RXQUE)) { 416 device_printf(dev, "ch %d unexpected " 417 "interrupt in recv+0 state %u: " 418 "vect = 0x%08x\n", ch, recv->state, 419 vect); 420 recv->recovery = 1; 421 break; 422 } 423 uint32_t max_ptr; 424 425 /* Clear recovery state. */ 426 recv->recovery = 0; 427 428 /* Get offset and length. */ 429 recv->offlast = read_reg(adapter, 430 RIFFA_CHNL_REG(ch, 431 RIFFA_TX_OFFLAST_REG_OFF)); 432 recv->len = read_reg(adapter, RIFFA_CHNL_REG(ch, 433 RIFFA_TX_LEN_REG_OFF)); 434 435 /* Boundary checks. */ 436 max_ptr = (uint32_t)((uintptr_t)recv->buf_addr 437 + SUME_RIFFA_OFFSET(recv->offlast) 438 + SUME_RIFFA_LEN(recv->len) - 1); 439 if (max_ptr < 440 (uint32_t)((uintptr_t)recv->buf_addr)) 441 device_printf(dev, "receive buffer " 442 "wrap-around overflow.\n"); 443 if (SUME_RIFFA_OFFSET(recv->offlast) + 444 SUME_RIFFA_LEN(recv->len) > 445 adapter->sg_buf_size) 446 device_printf(dev, "receive buffer too" 447 " small.\n"); 448 449 /* Fill the bouncebuf "descriptor". */ 450 sume_fill_bb_desc(adapter, recv, 451 SUME_RIFFA_LEN(recv->len)); 452 453 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 454 BUS_DMASYNC_PREREAD | 455 BUS_DMASYNC_PREWRITE); 456 write_reg(adapter, RIFFA_CHNL_REG(ch, 457 RIFFA_TX_SG_ADDR_LO_REG_OFF), 458 SUME_RIFFA_LO_ADDR(recv->buf_hw_addr)); 459 write_reg(adapter, RIFFA_CHNL_REG(ch, 460 RIFFA_TX_SG_ADDR_HI_REG_OFF), 461 SUME_RIFFA_HI_ADDR(recv->buf_hw_addr)); 462 write_reg(adapter, RIFFA_CHNL_REG(ch, 463 RIFFA_TX_SG_LEN_REG_OFF), 464 4 * recv->num_sg); 465 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 466 BUS_DMASYNC_POSTREAD | 467 BUS_DMASYNC_POSTWRITE); 468 469 recv->state = SUME_RIFFA_CHAN_STATE_READY; 470 vect &= ~SUME_MSI_RXQUE; 471 break; 472 case SUME_RIFFA_CHAN_STATE_READY: 473 if (!(vect & SUME_MSI_RXBUF)) { 474 device_printf(dev, "ch %d unexpected " 475 "interrupt in recv+1 state %u: " 476 "vect = 0x%08x\n", ch, recv->state, 477 vect); 478 recv->recovery = 1; 479 break; 480 } 481 recv->state = SUME_RIFFA_CHAN_STATE_READ; 482 vect &= ~SUME_MSI_RXBUF; 483 break; 484 case SUME_RIFFA_CHAN_STATE_READ: 485 if (!(vect & SUME_MSI_RXDONE)) { 486 device_printf(dev, "ch %d unexpected " 487 "interrupt in recv+2 state %u: " 488 "vect = 0x%08x\n", ch, recv->state, 489 vect); 490 recv->recovery = 1; 491 break; 492 } 493 len = read_reg(adapter, RIFFA_CHNL_REG(ch, 494 RIFFA_TX_TNFR_LEN_REG_OFF)); 495 496 /* Remember, len and recv->len are words. */ 497 if (ch == SUME_RIFFA_CHANNEL_DATA) { 498 m = sume_rx_build_mbuf(adapter, 499 len << 2); 500 recv->state = 501 SUME_RIFFA_CHAN_STATE_IDLE; 502 } else if (ch == SUME_RIFFA_CHANNEL_REG) 503 wakeup(&recv->event); 504 else { 505 device_printf(dev, "ch %d unexpected " 506 "interrupt in recv+2 state %u: " 507 "vect = 0x%08x\n", ch, recv->state, 508 vect); 509 recv->recovery = 1; 510 } 511 vect &= ~SUME_MSI_RXDONE; 512 break; 513 case SUME_RIFFA_CHAN_STATE_LEN: 514 break; 515 default: 516 device_printf(dev, "unknown RX state!\n"); 517 } 518 loops++; 519 } 520 521 if ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF | 522 SUME_MSI_RXDONE)) && recv->recovery) { 523 device_printf(dev, "ch %d ignoring vect = 0x%08x " 524 "during RX; not in recovery; state = %d, loops = " 525 "%d\n", ch, vect, recv->state, loops); 526 527 /* Clean the unfinished transaction. */ 528 if (ch == SUME_RIFFA_CHANNEL_REG && 529 vect & SUME_MSI_RXDONE) { 530 read_reg(adapter, RIFFA_CHNL_REG(ch, 531 RIFFA_TX_TNFR_LEN_REG_OFF)); 532 recv->recovery = 0; 533 } 534 } 535 } 536 SUME_UNLOCK(adapter); 537 538 if (m != NULL) { 539 ifp = m->m_pkthdr.rcvif; 540 if_input(ifp, m); 541 } 542 } 543 544 /* 545 * As we cannot disable interrupt generation, ignore early interrupts by waiting 546 * for the adapter to go into the 'running' state. 547 */ 548 static int 549 sume_intr_filter(void *arg) 550 { 551 struct sume_adapter *adapter = arg; 552 553 if (adapter->running == 0) 554 return (FILTER_STRAY); 555 556 return (FILTER_SCHEDULE_THREAD); 557 } 558 559 static int 560 sume_probe_riffa_pci(struct sume_adapter *adapter) 561 { 562 device_t dev = adapter->dev; 563 int error, count, capmem; 564 uint32_t reg, devctl, linkctl; 565 566 pci_enable_busmaster(dev); 567 568 adapter->rid = PCIR_BAR(0); 569 adapter->bar0_addr = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 570 &adapter->rid, RF_ACTIVE); 571 if (adapter->bar0_addr == NULL) { 572 device_printf(dev, "unable to allocate bus resource: " 573 "BAR0 address\n"); 574 return (ENXIO); 575 } 576 adapter->bt = rman_get_bustag(adapter->bar0_addr); 577 adapter->bh = rman_get_bushandle(adapter->bar0_addr); 578 adapter->bar0_len = rman_get_size(adapter->bar0_addr); 579 if (adapter->bar0_len != 1024) { 580 device_printf(dev, "BAR0 resource length %lu != 1024\n", 581 adapter->bar0_len); 582 return (ENXIO); 583 } 584 585 count = pci_msi_count(dev); 586 error = pci_alloc_msi(dev, &count); 587 if (error) { 588 device_printf(dev, "unable to allocate bus resource: PCI " 589 "MSI\n"); 590 return (error); 591 } 592 593 adapter->irq.rid = 1; /* Should be 1, thus says pci_alloc_msi() */ 594 adapter->irq.res = bus_alloc_resource_any(dev, SYS_RES_IRQ, 595 &adapter->irq.rid, RF_SHAREABLE | RF_ACTIVE); 596 if (adapter->irq.res == NULL) { 597 device_printf(dev, "unable to allocate bus resource: IRQ " 598 "memory\n"); 599 return (ENXIO); 600 } 601 602 error = bus_setup_intr(dev, adapter->irq.res, INTR_MPSAFE | 603 INTR_TYPE_NET, sume_intr_filter, sume_intr_handler, adapter, 604 &adapter->irq.tag); 605 if (error) { 606 device_printf(dev, "failed to setup interrupt for rid %d, name" 607 " %s: %d\n", adapter->irq.rid, "SUME_INTR", error); 608 return (ENXIO); 609 } 610 611 if (pci_find_cap(dev, PCIY_EXPRESS, &capmem) != 0) { 612 device_printf(dev, "PCI not PCIe capable\n"); 613 return (ENXIO); 614 } 615 616 devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL, 2); 617 pci_write_config(dev, capmem + PCIER_DEVICE_CTL, (devctl | 618 PCIEM_CTL_EXT_TAG_FIELD), 2); 619 620 devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL2, 2); 621 pci_write_config(dev, capmem + PCIER_DEVICE_CTL2, (devctl | 622 PCIEM_CTL2_ID_ORDERED_REQ_EN), 2); 623 624 linkctl = pci_read_config(dev, capmem + PCIER_LINK_CTL, 2); 625 pci_write_config(dev, capmem + PCIER_LINK_CTL, (linkctl | 626 PCIEM_LINK_CTL_RCB), 2); 627 628 reg = read_reg(adapter, RIFFA_INFO_REG_OFF); 629 adapter->num_sg = RIFFA_SG_ELEMS * ((reg >> 19) & 0xf); 630 adapter->sg_buf_size = RIFFA_SG_BUF_SIZE * ((reg >> 19) & 0xf); 631 632 error = ENODEV; 633 /* Check bus master is enabled. */ 634 if (((reg >> 4) & 0x1) != 1) { 635 device_printf(dev, "bus master not enabled: %d\n", 636 (reg >> 4) & 0x1); 637 return (error); 638 } 639 /* Check link parameters are valid. */ 640 if (((reg >> 5) & 0x3f) == 0 || ((reg >> 11) & 0x3) == 0) { 641 device_printf(dev, "link parameters not valid: %d %d\n", 642 (reg >> 5) & 0x3f, (reg >> 11) & 0x3); 643 return (error); 644 } 645 /* Check # of channels are within valid range. */ 646 if ((reg & 0xf) == 0 || (reg & 0xf) > RIFFA_MAX_CHNLS) { 647 device_printf(dev, "number of channels out of range: %d\n", 648 reg & 0xf); 649 return (error); 650 } 651 /* Check bus width. */ 652 if (((reg >> 19) & 0xf) == 0 || 653 ((reg >> 19) & 0xf) > RIFFA_MAX_BUS_WIDTH_PARAM) { 654 device_printf(dev, "bus width out of range: %d\n", 655 (reg >> 19) & 0xf); 656 return (error); 657 } 658 659 device_printf(dev, "[riffa] # of channels: %d\n", 660 reg & 0xf); 661 device_printf(dev, "[riffa] bus interface width: %d\n", 662 ((reg >> 19) & 0xf) << 5); 663 device_printf(dev, "[riffa] bus master enabled: %d\n", 664 (reg >> 4) & 0x1); 665 device_printf(dev, "[riffa] negotiated link width: %d\n", 666 (reg >> 5) & 0x3f); 667 device_printf(dev, "[riffa] negotiated rate width: %d MTs\n", 668 ((reg >> 11) & 0x3) * 2500); 669 device_printf(dev, "[riffa] max downstream payload: %d B\n", 670 128 << ((reg >> 13) & 0x7)); 671 device_printf(dev, "[riffa] max upstream payload: %d B\n", 672 128 << ((reg >> 16) & 0x7)); 673 674 return (0); 675 } 676 677 /* If there is no sume_if_init, the ether_ioctl panics. */ 678 static void 679 sume_if_init(void *sc) 680 { 681 } 682 683 /* Write the address and length for our incoming / outgoing transaction. */ 684 static void 685 sume_fill_bb_desc(struct sume_adapter *adapter, struct riffa_chnl_dir *p, 686 uint64_t len) 687 { 688 struct nf_bb_desc *bouncebuf = (struct nf_bb_desc *) p->buf_addr; 689 690 bouncebuf->lower = (p->buf_hw_addr + sizeof(struct nf_bb_desc)); 691 bouncebuf->upper = (p->buf_hw_addr + sizeof(struct nf_bb_desc)) >> 32; 692 bouncebuf->len = len >> 2; 693 } 694 695 /* Module register locked write. */ 696 static int 697 sume_modreg_write_locked(struct sume_adapter *adapter) 698 { 699 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 700 701 /* Let the FPGA know about the transfer. */ 702 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 703 RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST); 704 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 705 RIFFA_RX_LEN_REG_OFF), send->len); /* words */ 706 707 /* Fill the bouncebuf "descriptor". */ 708 sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len)); 709 710 /* Update the state before intiating the DMA to avoid races. */ 711 send->state = SUME_RIFFA_CHAN_STATE_READY; 712 713 bus_dmamap_sync(send->ch_tag, send->ch_map, 714 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 715 /* DMA. */ 716 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 717 RIFFA_RX_SG_ADDR_LO_REG_OFF), 718 SUME_RIFFA_LO_ADDR(send->buf_hw_addr)); 719 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 720 RIFFA_RX_SG_ADDR_HI_REG_OFF), 721 SUME_RIFFA_HI_ADDR(send->buf_hw_addr)); 722 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 723 RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg); 724 bus_dmamap_sync(send->ch_tag, send->ch_map, 725 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 726 727 return (0); 728 } 729 730 /* 731 * Request a register read or write (depending on optype). 732 * If optype is set (0x1f) this will result in a register write, 733 * otherwise this will result in a register read request at the given 734 * address and the result will need to be DMAed back. 735 */ 736 static int 737 sume_module_reg_write(struct nf_priv *nf_priv, struct sume_ifreq *sifr, 738 uint32_t optype) 739 { 740 struct sume_adapter *adapter = nf_priv->adapter; 741 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 742 struct nf_regop_data *data; 743 int error; 744 745 /* 746 * 1. Make sure the channel is free; otherwise return EBUSY. 747 * 2. Prepare the memory in the bounce buffer (which we always 748 * use for regs). 749 * 3. Start the DMA process. 750 * 4. Sleep and wait for result and return success or error. 751 */ 752 SUME_LOCK(adapter); 753 754 if (send->state != SUME_RIFFA_CHAN_STATE_IDLE) { 755 SUME_UNLOCK(adapter); 756 return (EBUSY); 757 } 758 759 data = (struct nf_regop_data *) (send->buf_addr + 760 sizeof(struct nf_bb_desc)); 761 data->addr = htole32(sifr->addr); 762 data->val = htole32(sifr->val); 763 /* Tag to indentify request. */ 764 data->rtag = htole32(++send->rtag); 765 data->optype = htole32(optype); 766 send->len = sizeof(struct nf_regop_data) / 4; /* words */ 767 768 error = sume_modreg_write_locked(adapter); 769 if (error) { 770 SUME_UNLOCK(adapter); 771 return (EFAULT); 772 } 773 774 /* Timeout after 1s. */ 775 if (send->state != SUME_RIFFA_CHAN_STATE_LEN) 776 error = msleep(&send->event, &adapter->lock, 0, 777 "Waiting recv finish", 1 * hz); 778 779 /* This was a write so we are done; were interrupted, or timed out. */ 780 if (optype != SUME_MR_READ || error != 0 || error == EWOULDBLOCK) { 781 send->state = SUME_RIFFA_CHAN_STATE_IDLE; 782 if (optype == SUME_MR_READ) 783 error = EWOULDBLOCK; 784 else 785 error = 0; 786 } else 787 error = 0; 788 789 /* 790 * For read requests we will update state once we are done 791 * having read the result to avoid any two outstanding 792 * transactions, or we need a queue and validate tags, 793 * which is a lot of work for a low priority, infrequent 794 * event. 795 */ 796 797 SUME_UNLOCK(adapter); 798 799 return (error); 800 } 801 802 /* Module register read. */ 803 static int 804 sume_module_reg_read(struct nf_priv *nf_priv, struct sume_ifreq *sifr) 805 { 806 struct sume_adapter *adapter = nf_priv->adapter; 807 struct riffa_chnl_dir *recv = adapter->recv[SUME_RIFFA_CHANNEL_REG]; 808 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 809 struct nf_regop_data *data; 810 int error = 0; 811 812 /* 813 * 0. Sleep waiting for result if needed (unless condition is 814 * true already). 815 * 1. Read DMA results. 816 * 2. Update state on *TX* to IDLE to allow next read to start. 817 */ 818 SUME_LOCK(adapter); 819 820 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 821 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 822 /* 823 * We only need to be woken up at the end of the transaction. 824 * Timeout after 1s. 825 */ 826 if (recv->state != SUME_RIFFA_CHAN_STATE_READ) 827 error = msleep(&recv->event, &adapter->lock, 0, 828 "Waiting transaction finish", 1 * hz); 829 830 if (recv->state != SUME_RIFFA_CHAN_STATE_READ || error == EWOULDBLOCK) { 831 SUME_UNLOCK(adapter); 832 device_printf(adapter->dev, "wait error: %d\n", error); 833 return (EWOULDBLOCK); 834 } 835 836 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 837 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 838 839 /* 840 * Read reply data and validate address and tag. 841 * Note: we do access the send side without lock but the state 842 * machine does prevent the data from changing. 843 */ 844 data = (struct nf_regop_data *) (recv->buf_addr + 845 sizeof(struct nf_bb_desc)); 846 847 if (le32toh(data->rtag) != send->rtag) 848 device_printf(adapter->dev, "rtag error: 0x%08x 0x%08x\n", 849 le32toh(data->rtag), send->rtag); 850 851 sifr->val = le32toh(data->val); 852 recv->state = SUME_RIFFA_CHAN_STATE_IDLE; 853 854 /* We are done. */ 855 send->state = SUME_RIFFA_CHAN_STATE_IDLE; 856 857 SUME_UNLOCK(adapter); 858 859 return (0); 860 } 861 862 /* Read value from a module register and return it to a sume_ifreq. */ 863 static int 864 get_modreg_value(struct nf_priv *nf_priv, struct sume_ifreq *sifr) 865 { 866 int error; 867 868 error = sume_module_reg_write(nf_priv, sifr, SUME_MR_READ); 869 if (!error) 870 error = sume_module_reg_read(nf_priv, sifr); 871 872 return (error); 873 } 874 875 static int 876 sume_if_ioctl(if_t ifp, unsigned long cmd, caddr_t data) 877 { 878 struct ifreq *ifr = (struct ifreq *) data; 879 struct nf_priv *nf_priv = if_getsoftc(ifp); 880 struct sume_ifreq sifr; 881 int error = 0; 882 883 switch (cmd) { 884 case SIOCGIFMEDIA: 885 case SIOCGIFXMEDIA: 886 error = ifmedia_ioctl(ifp, ifr, &nf_priv->media, cmd); 887 break; 888 889 case SUME_IOCTL_CMD_WRITE_REG: 890 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr)); 891 if (error) { 892 error = EINVAL; 893 break; 894 } 895 error = sume_module_reg_write(nf_priv, &sifr, SUME_MR_WRITE); 896 break; 897 898 case SUME_IOCTL_CMD_READ_REG: 899 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr)); 900 if (error) { 901 error = EINVAL; 902 break; 903 } 904 905 error = get_modreg_value(nf_priv, &sifr); 906 if (error) 907 break; 908 909 error = copyout(&sifr, ifr_data_get_ptr(ifr), sizeof(sifr)); 910 if (error) 911 error = EINVAL; 912 913 break; 914 915 case SIOCSIFFLAGS: 916 /* Silence tcpdump 'promisc mode not supported' warning. */ 917 if (if_getflags(ifp) & IFF_PROMISC) 918 break; 919 920 default: 921 error = ether_ioctl(ifp, cmd, data); 922 break; 923 } 924 925 return (error); 926 } 927 928 static int 929 sume_media_change(if_t ifp) 930 { 931 struct nf_priv *nf_priv = if_getsoftc(ifp); 932 struct ifmedia *ifm = &nf_priv->media; 933 934 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 935 return (EINVAL); 936 937 if (IFM_SUBTYPE(ifm->ifm_media) == IFM_10G_SR) 938 if_setbaudrate(ifp, ifmedia_baudrate(IFM_ETHER | IFM_10G_SR)); 939 else 940 if_setbaudrate(ifp, ifmedia_baudrate(ifm->ifm_media)); 941 942 return (0); 943 } 944 945 static void 946 sume_update_link_status(if_t ifp) 947 { 948 struct nf_priv *nf_priv = if_getsoftc(ifp); 949 struct sume_adapter *adapter = nf_priv->adapter; 950 struct sume_ifreq sifr; 951 int link_status; 952 953 sifr.addr = SUME_STATUS_ADDR(nf_priv->port); 954 sifr.val = 0; 955 956 if (get_modreg_value(nf_priv, &sifr)) 957 return; 958 959 link_status = SUME_LINK_STATUS(sifr.val); 960 961 if (!link_status && nf_priv->link_up) { 962 if_link_state_change(ifp, LINK_STATE_DOWN); 963 nf_priv->link_up = 0; 964 if (adapter->sume_debug) 965 device_printf(adapter->dev, "port %d link state " 966 "changed to DOWN\n", nf_priv->unit); 967 } else if (link_status && !nf_priv->link_up) { 968 nf_priv->link_up = 1; 969 if_link_state_change(ifp, LINK_STATE_UP); 970 if (adapter->sume_debug) 971 device_printf(adapter->dev, "port %d link state " 972 "changed to UP\n", nf_priv->unit); 973 } 974 } 975 976 static void 977 sume_media_status(if_t ifp, struct ifmediareq *ifmr) 978 { 979 struct nf_priv *nf_priv = if_getsoftc(ifp); 980 struct ifmedia *ifm = &nf_priv->media; 981 982 if (ifm->ifm_cur->ifm_media == (IFM_ETHER | IFM_10G_SR) && 983 (if_getflags(ifp) & IFF_UP)) 984 ifmr->ifm_active = IFM_ETHER | IFM_10G_SR; 985 else 986 ifmr->ifm_active = ifm->ifm_cur->ifm_media; 987 988 ifmr->ifm_status |= IFM_AVALID; 989 990 sume_update_link_status(ifp); 991 992 if (nf_priv->link_up) 993 ifmr->ifm_status |= IFM_ACTIVE; 994 } 995 996 /* 997 * Packet to transmit. We take the packet data from the mbuf and copy it to the 998 * bouncebuffer address buf_addr+3*sizeof(uint32_t)+16. The 16 bytes before the 999 * packet data are for metadata: sport/dport (depending on our source 1000 * interface), packet length and magic 0xcafe. We tell the SUME about the 1001 * transfer, fill the first 3*sizeof(uint32_t) bytes of the bouncebuffer with 1002 * the information about the start and length of the packet and trigger the 1003 * transaction. 1004 */ 1005 static int 1006 sume_if_start_locked(if_t ifp) 1007 { 1008 struct mbuf *m; 1009 struct nf_priv *nf_priv = if_getsoftc(ifp); 1010 struct sume_adapter *adapter = nf_priv->adapter; 1011 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_DATA]; 1012 uint8_t *outbuf; 1013 struct nf_metadata *mdata; 1014 int plen = SUME_MIN_PKT_SIZE; 1015 1016 KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned")); 1017 KASSERT(send->state == SUME_RIFFA_CHAN_STATE_IDLE, 1018 ("SUME not in IDLE state")); 1019 1020 m = if_dequeue(ifp); 1021 if (m == NULL) 1022 return (EINVAL); 1023 1024 /* Packets large enough do not need to be padded */ 1025 if (m->m_pkthdr.len > SUME_MIN_PKT_SIZE) 1026 plen = m->m_pkthdr.len; 1027 1028 if (adapter->sume_debug) 1029 device_printf(adapter->dev, "sending %d bytes to %s%d\n", plen, 1030 SUME_ETH_DEVICE_NAME, nf_priv->unit); 1031 1032 outbuf = (uint8_t *) send->buf_addr + sizeof(struct nf_bb_desc); 1033 mdata = (struct nf_metadata *) outbuf; 1034 1035 /* Clear the recovery flag. */ 1036 send->recovery = 0; 1037 1038 /* Make sure we fit with the 16 bytes nf_metadata. */ 1039 if (m->m_pkthdr.len + sizeof(struct nf_metadata) > 1040 adapter->sg_buf_size) { 1041 device_printf(adapter->dev, "packet too big for bounce buffer " 1042 "(%d)\n", m->m_pkthdr.len); 1043 m_freem(m); 1044 nf_priv->stats.tx_dropped++; 1045 return (ENOMEM); 1046 } 1047 1048 bus_dmamap_sync(send->ch_tag, send->ch_map, 1049 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1050 1051 /* Zero out the padded data */ 1052 if (m->m_pkthdr.len < SUME_MIN_PKT_SIZE) 1053 bzero(outbuf + sizeof(struct nf_metadata), SUME_MIN_PKT_SIZE); 1054 /* Skip the first 16 bytes for the metadata. */ 1055 m_copydata(m, 0, m->m_pkthdr.len, outbuf + sizeof(struct nf_metadata)); 1056 send->len = (sizeof(struct nf_metadata) + plen + 3) / 4; 1057 1058 /* Fill in the metadata: CPU(DMA) ports are odd, MAC ports are even. */ 1059 mdata->sport = htole16(1 << (nf_priv->port * 2 + 1)); 1060 mdata->dport = htole16(1 << (nf_priv->port * 2)); 1061 mdata->plen = htole16(plen); 1062 mdata->magic = htole16(SUME_RIFFA_MAGIC); 1063 mdata->t1 = htole32(0); 1064 mdata->t2 = htole32(0); 1065 1066 /* Let the FPGA know about the transfer. */ 1067 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1068 RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST); 1069 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1070 RIFFA_RX_LEN_REG_OFF), send->len); 1071 1072 /* Fill the bouncebuf "descriptor". */ 1073 sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len)); 1074 1075 /* Update the state before intiating the DMA to avoid races. */ 1076 send->state = SUME_RIFFA_CHAN_STATE_READY; 1077 1078 /* DMA. */ 1079 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1080 RIFFA_RX_SG_ADDR_LO_REG_OFF), 1081 SUME_RIFFA_LO_ADDR(send->buf_hw_addr)); 1082 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1083 RIFFA_RX_SG_ADDR_HI_REG_OFF), 1084 SUME_RIFFA_HI_ADDR(send->buf_hw_addr)); 1085 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1086 RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg); 1087 1088 bus_dmamap_sync(send->ch_tag, send->ch_map, 1089 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1090 1091 nf_priv->stats.tx_packets++; 1092 nf_priv->stats.tx_bytes += plen; 1093 1094 /* We can free as long as we use the bounce buffer. */ 1095 m_freem(m); 1096 1097 adapter->last_ifc = nf_priv->port; 1098 1099 /* Reset watchdog counter. */ 1100 adapter->wd_counter = 0; 1101 1102 return (0); 1103 } 1104 1105 static void 1106 sume_if_start(if_t ifp) 1107 { 1108 struct nf_priv *nf_priv = if_getsoftc(ifp); 1109 struct sume_adapter *adapter = nf_priv->adapter; 1110 1111 if (!adapter->running || !(if_getflags(ifp) & IFF_UP)) 1112 return; 1113 1114 SUME_LOCK(adapter); 1115 if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state == 1116 SUME_RIFFA_CHAN_STATE_IDLE) 1117 sume_if_start_locked(ifp); 1118 SUME_UNLOCK(adapter); 1119 } 1120 1121 /* 1122 * We call this function at the end of every TX transaction to check for 1123 * remaining packets in the TX queues for every UP interface. 1124 */ 1125 static void 1126 check_tx_queues(struct sume_adapter *adapter) 1127 { 1128 int i, last_ifc; 1129 1130 KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned")); 1131 1132 last_ifc = adapter->last_ifc; 1133 1134 /* Check all interfaces */ 1135 for (i = last_ifc + 1; i < last_ifc + SUME_NPORTS + 1; i++) { 1136 if_t ifp = adapter->ifp[i % SUME_NPORTS]; 1137 1138 if (!(if_getflags(ifp) & IFF_UP)) 1139 continue; 1140 1141 if (!sume_if_start_locked(ifp)) 1142 break; 1143 } 1144 } 1145 1146 static int 1147 sume_ifp_alloc(struct sume_adapter *adapter, uint32_t port) 1148 { 1149 if_t ifp; 1150 struct nf_priv *nf_priv = malloc(sizeof(struct nf_priv), M_SUME, 1151 M_ZERO | M_WAITOK); 1152 1153 ifp = if_alloc(IFT_ETHER); 1154 if (ifp == NULL) { 1155 device_printf(adapter->dev, "cannot allocate ifnet\n"); 1156 return (ENOMEM); 1157 } 1158 1159 adapter->ifp[port] = ifp; 1160 if_setsoftc(ifp, nf_priv); 1161 1162 nf_priv->adapter = adapter; 1163 nf_priv->unit = alloc_unr(unr); 1164 nf_priv->port = port; 1165 nf_priv->link_up = 0; 1166 1167 if_initname(ifp, SUME_ETH_DEVICE_NAME, nf_priv->unit); 1168 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 1169 1170 if_setinitfn(ifp, sume_if_init); 1171 if_setstartfn(ifp, sume_if_start); 1172 if_setioctlfn(ifp, sume_if_ioctl); 1173 1174 uint8_t hw_addr[ETHER_ADDR_LEN] = DEFAULT_ETHER_ADDRESS; 1175 hw_addr[ETHER_ADDR_LEN-1] = nf_priv->unit; 1176 ether_ifattach(ifp, hw_addr); 1177 1178 ifmedia_init(&nf_priv->media, IFM_IMASK, sume_media_change, 1179 sume_media_status); 1180 ifmedia_add(&nf_priv->media, IFM_ETHER | IFM_10G_SR, 0, NULL); 1181 ifmedia_set(&nf_priv->media, IFM_ETHER | IFM_10G_SR); 1182 1183 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); 1184 1185 return (0); 1186 } 1187 1188 static void 1189 callback_dma(void *arg, bus_dma_segment_t *segs, int nseg, int err) 1190 { 1191 if (err) 1192 return; 1193 1194 KASSERT(nseg == 1, ("%d segments returned!", nseg)); 1195 1196 *(bus_addr_t *) arg = segs[0].ds_addr; 1197 } 1198 1199 static int 1200 sume_probe_riffa_buffer(const struct sume_adapter *adapter, 1201 struct riffa_chnl_dir ***p, const char *dir) 1202 { 1203 struct riffa_chnl_dir **rp; 1204 bus_addr_t hw_addr; 1205 int error, ch; 1206 device_t dev = adapter->dev; 1207 1208 error = ENOMEM; 1209 *p = malloc(SUME_RIFFA_CHANNELS * sizeof(struct riffa_chnl_dir *), 1210 M_SUME, M_ZERO | M_WAITOK); 1211 if (*p == NULL) { 1212 device_printf(dev, "malloc(%s) failed.\n", dir); 1213 return (error); 1214 } 1215 1216 rp = *p; 1217 /* Allocate the chnl_dir structs themselves. */ 1218 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 1219 /* One direction. */ 1220 rp[ch] = malloc(sizeof(struct riffa_chnl_dir), M_SUME, 1221 M_ZERO | M_WAITOK); 1222 if (rp[ch] == NULL) { 1223 device_printf(dev, "malloc(%s[%d]) riffa_chnl_dir " 1224 "failed.\n", dir, ch); 1225 return (error); 1226 } 1227 1228 int err = bus_dma_tag_create(bus_get_dma_tag(dev), 1229 4, 0, 1230 BUS_SPACE_MAXADDR, 1231 BUS_SPACE_MAXADDR, 1232 NULL, NULL, 1233 adapter->sg_buf_size, 1234 1, 1235 adapter->sg_buf_size, 1236 0, 1237 NULL, 1238 NULL, 1239 &rp[ch]->ch_tag); 1240 1241 if (err) { 1242 device_printf(dev, "bus_dma_tag_create(%s[%d]) " 1243 "failed.\n", dir, ch); 1244 return (err); 1245 } 1246 1247 err = bus_dmamem_alloc(rp[ch]->ch_tag, (void **) 1248 &rp[ch]->buf_addr, BUS_DMA_WAITOK | BUS_DMA_COHERENT | 1249 BUS_DMA_ZERO, &rp[ch]->ch_map); 1250 if (err) { 1251 device_printf(dev, "bus_dmamem_alloc(%s[%d]) failed.\n", 1252 dir, ch); 1253 return (err); 1254 } 1255 1256 bzero(rp[ch]->buf_addr, adapter->sg_buf_size); 1257 1258 err = bus_dmamap_load(rp[ch]->ch_tag, rp[ch]->ch_map, 1259 rp[ch]->buf_addr, adapter->sg_buf_size, callback_dma, 1260 &hw_addr, BUS_DMA_NOWAIT); 1261 if (err) { 1262 device_printf(dev, "bus_dmamap_load(%s[%d]) failed.\n", 1263 dir, ch); 1264 return (err); 1265 } 1266 rp[ch]->buf_hw_addr = hw_addr; 1267 rp[ch]->num_sg = 1; 1268 rp[ch]->state = SUME_RIFFA_CHAN_STATE_IDLE; 1269 1270 rp[ch]->rtag = SUME_INIT_RTAG; 1271 } 1272 1273 return (0); 1274 } 1275 1276 static int 1277 sume_probe_riffa_buffers(struct sume_adapter *adapter) 1278 { 1279 int error; 1280 1281 error = sume_probe_riffa_buffer(adapter, &adapter->recv, "recv"); 1282 if (error) 1283 return (error); 1284 1285 error = sume_probe_riffa_buffer(adapter, &adapter->send, "send"); 1286 1287 return (error); 1288 } 1289 1290 static void 1291 sume_sysctl_init(struct sume_adapter *adapter) 1292 { 1293 device_t dev = adapter->dev; 1294 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 1295 struct sysctl_oid *tree = device_get_sysctl_tree(dev); 1296 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 1297 struct sysctl_oid *tmp_tree; 1298 char namebuf[MAX_IFC_NAME_LEN]; 1299 int i; 1300 1301 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "sume", CTLFLAG_RW, 1302 0, "SUME top-level tree"); 1303 if (tree == NULL) { 1304 device_printf(dev, "SYSCTL_ADD_NODE failed.\n"); 1305 return; 1306 } 1307 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug", CTLFLAG_RW, 1308 &adapter->sume_debug, 0, "debug int leaf"); 1309 1310 /* total RX error stats */ 1311 SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_epkts", 1312 CTLFLAG_RD, &adapter->packets_err, 0, "rx errors"); 1313 SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_ebytes", 1314 CTLFLAG_RD, &adapter->bytes_err, 0, "rx error bytes"); 1315 1316 for (i = SUME_NPORTS - 1; i >= 0; i--) { 1317 if_t ifp = adapter->ifp[i]; 1318 if (ifp == NULL) 1319 continue; 1320 1321 struct nf_priv *nf_priv = if_getsoftc(ifp); 1322 1323 snprintf(namebuf, MAX_IFC_NAME_LEN, "%s%d", 1324 SUME_ETH_DEVICE_NAME, nf_priv->unit); 1325 tmp_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 1326 CTLFLAG_RW, 0, "SUME ifc tree"); 1327 if (tmp_tree == NULL) { 1328 device_printf(dev, "SYSCTL_ADD_NODE failed.\n"); 1329 return; 1330 } 1331 1332 /* Packets dropped by down interface. */ 1333 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1334 "ifc_down_bytes", CTLFLAG_RD, 1335 &nf_priv->stats.ifc_down_bytes, 0, "ifc_down bytes"); 1336 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1337 "ifc_down_packets", CTLFLAG_RD, 1338 &nf_priv->stats.ifc_down_packets, 0, "ifc_down packets"); 1339 1340 /* HW RX stats */ 1341 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1342 "hw_rx_packets", CTLFLAG_RD, &nf_priv->stats.hw_rx_packets, 1343 0, "hw_rx packets"); 1344 1345 /* HW TX stats */ 1346 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1347 "hw_tx_packets", CTLFLAG_RD, &nf_priv->stats.hw_tx_packets, 1348 0, "hw_tx packets"); 1349 1350 /* RX stats */ 1351 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1352 "rx_bytes", CTLFLAG_RD, &nf_priv->stats.rx_bytes, 0, 1353 "rx bytes"); 1354 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1355 "rx_dropped", CTLFLAG_RD, &nf_priv->stats.rx_dropped, 0, 1356 "rx dropped"); 1357 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1358 "rx_packets", CTLFLAG_RD, &nf_priv->stats.rx_packets, 0, 1359 "rx packets"); 1360 1361 /* TX stats */ 1362 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1363 "tx_bytes", CTLFLAG_RD, &nf_priv->stats.tx_bytes, 0, 1364 "tx bytes"); 1365 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1366 "tx_dropped", CTLFLAG_RD, &nf_priv->stats.tx_dropped, 0, 1367 "tx dropped"); 1368 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1369 "tx_packets", CTLFLAG_RD, &nf_priv->stats.tx_packets, 0, 1370 "tx packets"); 1371 } 1372 } 1373 1374 static void 1375 sume_local_timer(void *arg) 1376 { 1377 struct sume_adapter *adapter = arg; 1378 1379 if (!adapter->running) 1380 return; 1381 1382 taskqueue_enqueue(adapter->tq, &adapter->stat_task); 1383 1384 SUME_LOCK(adapter); 1385 if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state != 1386 SUME_RIFFA_CHAN_STATE_IDLE && ++adapter->wd_counter >= 3) { 1387 /* Resetting interfaces if stuck for 3 seconds. */ 1388 device_printf(adapter->dev, "TX stuck, resetting adapter.\n"); 1389 read_reg(adapter, RIFFA_INFO_REG_OFF); 1390 1391 adapter->send[SUME_RIFFA_CHANNEL_DATA]->state = 1392 SUME_RIFFA_CHAN_STATE_IDLE; 1393 adapter->wd_counter = 0; 1394 1395 check_tx_queues(adapter); 1396 } 1397 SUME_UNLOCK(adapter); 1398 1399 callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter); 1400 } 1401 1402 static void 1403 sume_get_stats(void *context, int pending) 1404 { 1405 struct sume_adapter *adapter = context; 1406 int i; 1407 1408 for (i = 0; i < SUME_NPORTS; i++) { 1409 if_t ifp = adapter->ifp[i]; 1410 1411 if (if_getflags(ifp) & IFF_UP) { 1412 struct nf_priv *nf_priv = if_getsoftc(ifp); 1413 struct sume_ifreq sifr; 1414 1415 sume_update_link_status(ifp); 1416 1417 /* Get RX counter. */ 1418 sifr.addr = SUME_STAT_RX_ADDR(nf_priv->port); 1419 sifr.val = 0; 1420 1421 if (!get_modreg_value(nf_priv, &sifr)) 1422 nf_priv->stats.hw_rx_packets += sifr.val; 1423 1424 /* Get TX counter. */ 1425 sifr.addr = SUME_STAT_TX_ADDR(nf_priv->port); 1426 sifr.val = 0; 1427 1428 if (!get_modreg_value(nf_priv, &sifr)) 1429 nf_priv->stats.hw_tx_packets += sifr.val; 1430 } 1431 } 1432 } 1433 1434 static int 1435 sume_attach(device_t dev) 1436 { 1437 struct sume_adapter *adapter = device_get_softc(dev); 1438 adapter->dev = dev; 1439 int error, i; 1440 1441 mtx_init(&adapter->lock, "Global lock", NULL, MTX_DEF); 1442 1443 adapter->running = 0; 1444 1445 /* OK finish up RIFFA. */ 1446 error = sume_probe_riffa_pci(adapter); 1447 if (error != 0) 1448 goto error; 1449 1450 error = sume_probe_riffa_buffers(adapter); 1451 if (error != 0) 1452 goto error; 1453 1454 /* Now do the network interfaces. */ 1455 for (i = 0; i < SUME_NPORTS; i++) { 1456 error = sume_ifp_alloc(adapter, i); 1457 if (error != 0) 1458 goto error; 1459 } 1460 1461 /* Register stats and register sysctls. */ 1462 sume_sysctl_init(adapter); 1463 1464 /* Reset the HW. */ 1465 read_reg(adapter, RIFFA_INFO_REG_OFF); 1466 1467 /* Ready to go, "enable" IRQ. */ 1468 adapter->running = 1; 1469 1470 callout_init(&adapter->timer, 1); 1471 TASK_INIT(&adapter->stat_task, 0, sume_get_stats, adapter); 1472 1473 adapter->tq = taskqueue_create("sume_stats", M_NOWAIT, 1474 taskqueue_thread_enqueue, &adapter->tq); 1475 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s stattaskq", 1476 device_get_nameunit(adapter->dev)); 1477 1478 callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter); 1479 1480 return (0); 1481 1482 error: 1483 sume_detach(dev); 1484 1485 return (error); 1486 } 1487 1488 static void 1489 sume_remove_riffa_buffer(const struct sume_adapter *adapter, 1490 struct riffa_chnl_dir **pp) 1491 { 1492 int ch; 1493 1494 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 1495 if (pp[ch] == NULL) 1496 continue; 1497 1498 if (pp[ch]->buf_hw_addr != 0) { 1499 bus_dmamem_free(pp[ch]->ch_tag, pp[ch]->buf_addr, 1500 pp[ch]->ch_map); 1501 pp[ch]->buf_hw_addr = 0; 1502 } 1503 1504 free(pp[ch], M_SUME); 1505 } 1506 } 1507 1508 static void 1509 sume_remove_riffa_buffers(struct sume_adapter *adapter) 1510 { 1511 if (adapter->send != NULL) { 1512 sume_remove_riffa_buffer(adapter, adapter->send); 1513 free(adapter->send, M_SUME); 1514 adapter->send = NULL; 1515 } 1516 if (adapter->recv != NULL) { 1517 sume_remove_riffa_buffer(adapter, adapter->recv); 1518 free(adapter->recv, M_SUME); 1519 adapter->recv = NULL; 1520 } 1521 } 1522 1523 static int 1524 sume_detach(device_t dev) 1525 { 1526 struct sume_adapter *adapter = device_get_softc(dev); 1527 int i; 1528 struct nf_priv *nf_priv; 1529 1530 KASSERT(mtx_initialized(&adapter->lock), ("SUME mutex not " 1531 "initialized")); 1532 adapter->running = 0; 1533 1534 /* Drain the stats callout and task queue. */ 1535 callout_drain(&adapter->timer); 1536 1537 if (adapter->tq) { 1538 taskqueue_drain(adapter->tq, &adapter->stat_task); 1539 taskqueue_free(adapter->tq); 1540 } 1541 1542 for (i = 0; i < SUME_NPORTS; i++) { 1543 if_t ifp = adapter->ifp[i]; 1544 if (ifp == NULL) 1545 continue; 1546 1547 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 1548 nf_priv = if_getsoftc(ifp); 1549 1550 if (if_getflags(ifp) & IFF_UP) 1551 if_down(ifp); 1552 ifmedia_removeall(&nf_priv->media); 1553 free_unr(unr, nf_priv->unit); 1554 1555 if_setflagbits(ifp, 0, IFF_UP); 1556 ether_ifdetach(ifp); 1557 if_free(ifp); 1558 1559 free(nf_priv, M_SUME); 1560 } 1561 1562 sume_remove_riffa_buffers(adapter); 1563 1564 if (adapter->irq.tag) 1565 bus_teardown_intr(dev, adapter->irq.res, adapter->irq.tag); 1566 if (adapter->irq.res) 1567 bus_release_resource(dev, SYS_RES_IRQ, adapter->irq.rid, 1568 adapter->irq.res); 1569 1570 pci_release_msi(dev); 1571 1572 if (adapter->bar0_addr) 1573 bus_release_resource(dev, SYS_RES_MEMORY, adapter->rid, 1574 adapter->bar0_addr); 1575 1576 mtx_destroy(&adapter->lock); 1577 1578 return (0); 1579 } 1580 1581 static int 1582 mod_event(module_t mod, int cmd, void *arg) 1583 { 1584 switch (cmd) { 1585 case MOD_LOAD: 1586 unr = new_unrhdr(0, INT_MAX, NULL); 1587 break; 1588 1589 case MOD_UNLOAD: 1590 delete_unrhdr(unr); 1591 break; 1592 } 1593 1594 return (0); 1595 } 1596 1597 DRIVER_MODULE(sume, pci, sume_driver, mod_event, NULL); 1598 MODULE_VERSION(sume, 1); 1599