1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015 Bjoern A. Zeeb 5 * Copyright (c) 2020 Denis Salopek 6 * 7 * This software was developed by SRI International and the University of 8 * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-11-C-0249 9 * ("MRC2"), as part of the DARPA MRC research programme. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/bus.h> 35 #include <sys/endian.h> 36 #include <sys/kernel.h> 37 #include <sys/limits.h> 38 #include <sys/module.h> 39 #include <sys/rman.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <sys/sysctl.h> 43 #include <sys/taskqueue.h> 44 45 #include <net/if.h> 46 #include <net/if_media.h> 47 #include <net/if_types.h> 48 #include <net/if_var.h> 49 50 #include <netinet/in.h> 51 #include <netinet/if_ether.h> 52 53 #include <dev/pci/pcivar.h> 54 #include <dev/pci/pcireg.h> 55 56 #include <machine/bus.h> 57 58 #include "adapter.h" 59 60 #define PCI_VENDOR_ID_XILINX 0x10ee 61 #define PCI_DEVICE_ID_SUME 0x7028 62 63 /* SUME bus driver interface */ 64 static int sume_probe(device_t); 65 static int sume_attach(device_t); 66 static int sume_detach(device_t); 67 68 static device_method_t sume_methods[] = { 69 DEVMETHOD(device_probe, sume_probe), 70 DEVMETHOD(device_attach, sume_attach), 71 DEVMETHOD(device_detach, sume_detach), 72 DEVMETHOD_END 73 }; 74 75 static driver_t sume_driver = { 76 "sume", 77 sume_methods, 78 sizeof(struct sume_adapter) 79 }; 80 81 /* 82 * The DMA engine for SUME generates interrupts for each RX/TX transaction. 83 * Depending on the channel (0 if packet transaction, 1 if register transaction) 84 * the used bits of the interrupt vector will be the lowest or the second lowest 85 * 5 bits. 86 * 87 * When receiving packets from SUME (RX): 88 * (1) SUME received a packet on one of the interfaces. 89 * (2) SUME generates an interrupt vector, bit 00001 is set (channel 0 - new RX 90 * transaction). 91 * (3) We read the length of the incoming packet and the offset along with the 92 * 'last' flag from the SUME registers. 93 * (4) We prepare for the DMA transaction by setting the bouncebuffer on the 94 * address buf_addr. For now, this is how it's done: 95 * - First 3*sizeof(uint32_t) bytes are: lower and upper 32 bits of physical 96 * address where we want the data to arrive (buf_addr[0] and buf_addr[1]), 97 * and length of incoming data (buf_addr[2]). 98 * - Data will start right after, at buf_addr+3*sizeof(uint32_t). The 99 * physical address buf_hw_addr is a block of contiguous memory mapped to 100 * buf_addr, so we can set the incoming data's physical address (buf_addr[0] 101 * and buf_addr[1]) to buf_hw_addr+3*sizeof(uint32_t). 102 * (5) We notify SUME that the bouncebuffer is ready for the transaction by 103 * writing the lower/upper physical address buf_hw_addr to the SUME 104 * registers RIFFA_TX_SG_ADDR_LO_REG_OFF and RIFFA_TX_SG_ADDR_HI_REG_OFF as 105 * well as the number of segments to the register RIFFA_TX_SG_LEN_REG_OFF. 106 * (6) SUME generates an interrupt vector, bit 00010 is set (channel 0 - 107 * bouncebuffer received). 108 * (7) SUME generates an interrupt vector, bit 00100 is set (channel 0 - 109 * transaction is done). 110 * (8) SUME can do both steps (6) and (7) using the same interrupt. 111 * (8) We read the first 16 bytes (metadata) of the received data and note the 112 * incoming interface so we can later forward it to the right one in the OS 113 * (sume0, sume1, sume2 or sume3). 114 * (10) We create an mbuf and copy the data from the bouncebuffer to the mbuf 115 * and set the mbuf rcvif to the incoming interface. 116 * (11) We forward the mbuf to the appropriate interface via ifp->if_input. 117 * 118 * When sending packets to SUME (TX): 119 * (1) The OS calls sume_if_start() function on TX. 120 * (2) We get the mbuf packet data and copy it to the 121 * buf_addr+3*sizeof(uint32_t) + metadata 16 bytes. 122 * (3) We create the metadata based on the output interface and copy it to the 123 * buf_addr+3*sizeof(uint32_t). 124 * (4) We write the offset/last and length of the packet to the SUME registers 125 * RIFFA_RX_OFFLAST_REG_OFF and RIFFA_RX_LEN_REG_OFF. 126 * (5) We fill the bouncebuffer by filling the first 3*sizeof(uint32_t) bytes 127 * with the physical address and length just as in RX step (4). 128 * (6) We notify SUME that the bouncebuffer is ready by writing to SUME 129 * registers RIFFA_RX_SG_ADDR_LO_REG_OFF, RIFFA_RX_SG_ADDR_HI_REG_OFF and 130 * RIFFA_RX_SG_LEN_REG_OFF just as in RX step (5). 131 * (7) SUME generates an interrupt vector, bit 01000 is set (channel 0 - 132 * bouncebuffer is read). 133 * (8) SUME generates an interrupt vector, bit 10000 is set (channel 0 - 134 * transaction is done). 135 * (9) SUME can do both steps (7) and (8) using the same interrupt. 136 * 137 * Internal registers 138 * Every module in the SUME hardware has its own set of internal registers 139 * (IDs, for debugging and statistic purposes, etc.). Their base addresses are 140 * defined in 'projects/reference_nic/hw/tcl/reference_nic_defines.tcl' and the 141 * offsets to different memory locations of every module are defined in their 142 * corresponding folder inside the library. These registers can be RO/RW and 143 * there is a special method to fetch/change this data over 1 or 2 DMA 144 * transactions. For writing, by calling the sume_module_reg_write(). For 145 * reading, by calling the sume_module_reg_write() and then 146 * sume_module_reg_read(). Check those functions for more information. 147 */ 148 149 MALLOC_DECLARE(M_SUME); 150 MALLOC_DEFINE(M_SUME, "sume", "NetFPGA SUME device driver"); 151 152 static void check_tx_queues(struct sume_adapter *); 153 static void sume_fill_bb_desc(struct sume_adapter *, struct riffa_chnl_dir *, 154 uint64_t); 155 156 static struct unrhdr *unr; 157 158 static struct { 159 uint16_t device; 160 char *desc; 161 } sume_pciids[] = { 162 {PCI_DEVICE_ID_SUME, "NetFPGA SUME reference NIC"}, 163 }; 164 165 static inline uint32_t 166 read_reg(struct sume_adapter *adapter, int offset) 167 { 168 169 return (bus_space_read_4(adapter->bt, adapter->bh, offset << 2)); 170 } 171 172 static inline void 173 write_reg(struct sume_adapter *adapter, int offset, uint32_t val) 174 { 175 176 bus_space_write_4(adapter->bt, adapter->bh, offset << 2, val); 177 } 178 179 static int 180 sume_probe(device_t dev) 181 { 182 int i; 183 uint16_t v = pci_get_vendor(dev); 184 uint16_t d = pci_get_device(dev); 185 186 if (v != PCI_VENDOR_ID_XILINX) 187 return (ENXIO); 188 189 for (i = 0; i < nitems(sume_pciids); i++) { 190 if (d == sume_pciids[i].device) { 191 device_set_desc(dev, sume_pciids[i].desc); 192 return (BUS_PROBE_DEFAULT); 193 } 194 } 195 196 return (ENXIO); 197 } 198 199 /* 200 * Building mbuf for packet received from SUME. We expect to receive 'len' 201 * bytes of data (including metadata) written from the bouncebuffer address 202 * buf_addr+3*sizeof(uint32_t). Metadata will tell us which SUME interface 203 * received the packet (sport will be 1, 2, 4 or 8), the packet length (plen), 204 * and the magic word needs to be 0xcafe. When we have the packet data, we 205 * create an mbuf and copy the data to it using m_copyback() function, set the 206 * correct interface to rcvif and return the mbuf to be later sent to the OS 207 * with if_input. 208 */ 209 static struct mbuf * 210 sume_rx_build_mbuf(struct sume_adapter *adapter, uint32_t len) 211 { 212 struct nf_priv *nf_priv; 213 struct mbuf *m; 214 if_t ifp = NULL; 215 int np; 216 uint16_t dport, plen, magic; 217 device_t dev = adapter->dev; 218 uint8_t *indata = (uint8_t *) 219 adapter->recv[SUME_RIFFA_CHANNEL_DATA]->buf_addr + 220 sizeof(struct nf_bb_desc); 221 struct nf_metadata *mdata = (struct nf_metadata *) indata; 222 223 /* The metadata header is 16 bytes. */ 224 if (len < sizeof(struct nf_metadata)) { 225 device_printf(dev, "short frame (%d)\n", len); 226 adapter->packets_err++; 227 adapter->bytes_err += len; 228 return (NULL); 229 } 230 231 dport = le16toh(mdata->dport); 232 plen = le16toh(mdata->plen); 233 magic = le16toh(mdata->magic); 234 235 if (sizeof(struct nf_metadata) + plen > len || 236 magic != SUME_RIFFA_MAGIC) { 237 device_printf(dev, "corrupted packet (%zd + %d > %d || magic " 238 "0x%04x != 0x%04x)\n", sizeof(struct nf_metadata), plen, 239 len, magic, SUME_RIFFA_MAGIC); 240 return (NULL); 241 } 242 243 /* We got the packet from one of the even bits */ 244 np = (ffs(dport & SUME_DPORT_MASK) >> 1) - 1; 245 if (np > SUME_NPORTS) { 246 device_printf(dev, "invalid destination port 0x%04x (%d)\n", 247 dport, np); 248 adapter->packets_err++; 249 adapter->bytes_err += plen; 250 return (NULL); 251 } 252 ifp = adapter->ifp[np]; 253 nf_priv = if_getsoftc(ifp); 254 nf_priv->stats.rx_packets++; 255 nf_priv->stats.rx_bytes += plen; 256 257 /* If the interface is down, well, we are done. */ 258 if (!(if_getflags(ifp) & IFF_UP)) { 259 nf_priv->stats.ifc_down_packets++; 260 nf_priv->stats.ifc_down_bytes += plen; 261 return (NULL); 262 } 263 264 if (adapter->sume_debug) 265 printf("Building mbuf with length: %d\n", plen); 266 267 m = m_getm(NULL, plen, M_NOWAIT, MT_DATA); 268 if (m == NULL) { 269 adapter->packets_err++; 270 adapter->bytes_err += plen; 271 return (NULL); 272 } 273 274 /* Copy the data in at the right offset. */ 275 m_copyback(m, 0, plen, (void *) (indata + sizeof(struct nf_metadata))); 276 m->m_pkthdr.rcvif = ifp; 277 278 return (m); 279 } 280 281 /* 282 * SUME interrupt handler for when we get a valid interrupt from the board. 283 * Theoretically, we can receive interrupt for any of the available channels, 284 * but RIFFA DMA uses only 2: 0 and 1, so we use only vect0. The vector is a 32 285 * bit number, using 5 bits for every channel, the least significant bits 286 * correspond to channel 0 and the next 5 bits correspond to channel 1. Vector 287 * bits for RX/TX are: 288 * RX 289 * bit 0 - new transaction from SUME 290 * bit 1 - SUME received our bouncebuffer address 291 * bit 2 - SUME copied the received data to our bouncebuffer, transaction done 292 * TX 293 * bit 3 - SUME received our bouncebuffer address 294 * bit 4 - SUME copied the data from our bouncebuffer, transaction done 295 * 296 * There are two finite state machines (one for TX, one for RX). We loop 297 * through channels 0 and 1 to check and our current state and which interrupt 298 * bit is set. 299 * TX 300 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the first TX transaction. 301 * SUME_RIFFA_CHAN_STATE_READY: we prepared (filled with data) the bouncebuffer 302 * and triggered the SUME for the TX transaction. Waiting for interrupt bit 3 303 * to go to the next state. 304 * SUME_RIFFA_CHAN_STATE_READ: waiting for interrupt bit 4 (for SUME to send 305 * our packet). Then we get the length of the sent data and go back to the 306 * IDLE state. 307 * RX 308 * SUME_RIFFA_CHAN_STATE_IDLE: waiting for the interrupt bit 0 (new RX 309 * transaction). When we get it, we prepare our bouncebuffer for reading and 310 * trigger the SUME to start the transaction. Go to the next state. 311 * SUME_RIFFA_CHAN_STATE_READY: waiting for the interrupt bit 1 (SUME got our 312 * bouncebuffer). Go to the next state. 313 * SUME_RIFFA_CHAN_STATE_READ: SUME copied data and our bouncebuffer is ready, 314 * we can build the mbuf and go back to the IDLE state. 315 */ 316 static void 317 sume_intr_handler(void *arg) 318 { 319 struct sume_adapter *adapter = arg; 320 uint32_t vect, vect0, len; 321 int ch, loops; 322 device_t dev = adapter->dev; 323 struct mbuf *m = NULL; 324 if_t ifp = NULL; 325 struct riffa_chnl_dir *send, *recv; 326 327 SUME_LOCK(adapter); 328 329 vect0 = read_reg(adapter, RIFFA_IRQ_REG0_OFF); 330 if ((vect0 & SUME_INVALID_VECT) != 0) { 331 SUME_UNLOCK(adapter); 332 return; 333 } 334 335 /* 336 * We only have one interrupt for all channels and no way 337 * to quickly lookup for which channel(s) we got an interrupt? 338 */ 339 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 340 vect = vect0 >> (5 * ch); 341 send = adapter->send[ch]; 342 recv = adapter->recv[ch]; 343 344 loops = 0; 345 while ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) && 346 loops <= 5) { 347 if (adapter->sume_debug) 348 device_printf(dev, "TX ch %d state %u vect = " 349 "0x%08x\n", ch, send->state, vect); 350 switch (send->state) { 351 case SUME_RIFFA_CHAN_STATE_IDLE: 352 break; 353 case SUME_RIFFA_CHAN_STATE_READY: 354 if (!(vect & SUME_MSI_TXBUF)) { 355 device_printf(dev, "ch %d unexpected " 356 "interrupt in send+3 state %u: " 357 "vect = 0x%08x\n", ch, send->state, 358 vect); 359 send->recovery = 1; 360 break; 361 } 362 send->state = SUME_RIFFA_CHAN_STATE_READ; 363 vect &= ~SUME_MSI_TXBUF; 364 break; 365 case SUME_RIFFA_CHAN_STATE_READ: 366 if (!(vect & SUME_MSI_TXDONE)) { 367 device_printf(dev, "ch %d unexpected " 368 "interrupt in send+4 state %u: " 369 "vect = 0x%08x\n", ch, send->state, 370 vect); 371 send->recovery = 1; 372 break; 373 } 374 send->state = SUME_RIFFA_CHAN_STATE_LEN; 375 376 len = read_reg(adapter, RIFFA_CHNL_REG(ch, 377 RIFFA_RX_TNFR_LEN_REG_OFF)); 378 if (ch == SUME_RIFFA_CHANNEL_DATA) { 379 send->state = 380 SUME_RIFFA_CHAN_STATE_IDLE; 381 check_tx_queues(adapter); 382 } else if (ch == SUME_RIFFA_CHANNEL_REG) 383 wakeup(&send->event); 384 else { 385 device_printf(dev, "ch %d unexpected " 386 "interrupt in send+4 state %u: " 387 "vect = 0x%08x\n", ch, send->state, 388 vect); 389 send->recovery = 1; 390 } 391 vect &= ~SUME_MSI_TXDONE; 392 break; 393 case SUME_RIFFA_CHAN_STATE_LEN: 394 break; 395 default: 396 device_printf(dev, "unknown TX state!\n"); 397 } 398 loops++; 399 } 400 401 if ((vect & (SUME_MSI_TXBUF | SUME_MSI_TXDONE)) && 402 send->recovery) 403 device_printf(dev, "ch %d ignoring vect = 0x%08x " 404 "during TX; not in recovery; state = %d loops = " 405 "%d\n", ch, vect, send->state, loops); 406 407 loops = 0; 408 while ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF | 409 SUME_MSI_RXDONE)) && loops < 5) { 410 if (adapter->sume_debug) 411 device_printf(dev, "RX ch %d state %u vect = " 412 "0x%08x\n", ch, recv->state, vect); 413 switch (recv->state) { 414 case SUME_RIFFA_CHAN_STATE_IDLE: 415 if (!(vect & SUME_MSI_RXQUE)) { 416 device_printf(dev, "ch %d unexpected " 417 "interrupt in recv+0 state %u: " 418 "vect = 0x%08x\n", ch, recv->state, 419 vect); 420 recv->recovery = 1; 421 break; 422 } 423 uint32_t max_ptr; 424 425 /* Clear recovery state. */ 426 recv->recovery = 0; 427 428 /* Get offset and length. */ 429 recv->offlast = read_reg(adapter, 430 RIFFA_CHNL_REG(ch, 431 RIFFA_TX_OFFLAST_REG_OFF)); 432 recv->len = read_reg(adapter, RIFFA_CHNL_REG(ch, 433 RIFFA_TX_LEN_REG_OFF)); 434 435 /* Boundary checks. */ 436 max_ptr = (uint32_t)((uintptr_t)recv->buf_addr 437 + SUME_RIFFA_OFFSET(recv->offlast) 438 + SUME_RIFFA_LEN(recv->len) - 1); 439 if (max_ptr < 440 (uint32_t)((uintptr_t)recv->buf_addr)) 441 device_printf(dev, "receive buffer " 442 "wrap-around overflow.\n"); 443 if (SUME_RIFFA_OFFSET(recv->offlast) + 444 SUME_RIFFA_LEN(recv->len) > 445 adapter->sg_buf_size) 446 device_printf(dev, "receive buffer too" 447 " small.\n"); 448 449 /* Fill the bouncebuf "descriptor". */ 450 sume_fill_bb_desc(adapter, recv, 451 SUME_RIFFA_LEN(recv->len)); 452 453 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 454 BUS_DMASYNC_PREREAD | 455 BUS_DMASYNC_PREWRITE); 456 write_reg(adapter, RIFFA_CHNL_REG(ch, 457 RIFFA_TX_SG_ADDR_LO_REG_OFF), 458 SUME_RIFFA_LO_ADDR(recv->buf_hw_addr)); 459 write_reg(adapter, RIFFA_CHNL_REG(ch, 460 RIFFA_TX_SG_ADDR_HI_REG_OFF), 461 SUME_RIFFA_HI_ADDR(recv->buf_hw_addr)); 462 write_reg(adapter, RIFFA_CHNL_REG(ch, 463 RIFFA_TX_SG_LEN_REG_OFF), 464 4 * recv->num_sg); 465 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 466 BUS_DMASYNC_POSTREAD | 467 BUS_DMASYNC_POSTWRITE); 468 469 recv->state = SUME_RIFFA_CHAN_STATE_READY; 470 vect &= ~SUME_MSI_RXQUE; 471 break; 472 case SUME_RIFFA_CHAN_STATE_READY: 473 if (!(vect & SUME_MSI_RXBUF)) { 474 device_printf(dev, "ch %d unexpected " 475 "interrupt in recv+1 state %u: " 476 "vect = 0x%08x\n", ch, recv->state, 477 vect); 478 recv->recovery = 1; 479 break; 480 } 481 recv->state = SUME_RIFFA_CHAN_STATE_READ; 482 vect &= ~SUME_MSI_RXBUF; 483 break; 484 case SUME_RIFFA_CHAN_STATE_READ: 485 if (!(vect & SUME_MSI_RXDONE)) { 486 device_printf(dev, "ch %d unexpected " 487 "interrupt in recv+2 state %u: " 488 "vect = 0x%08x\n", ch, recv->state, 489 vect); 490 recv->recovery = 1; 491 break; 492 } 493 len = read_reg(adapter, RIFFA_CHNL_REG(ch, 494 RIFFA_TX_TNFR_LEN_REG_OFF)); 495 496 /* Remember, len and recv->len are words. */ 497 if (ch == SUME_RIFFA_CHANNEL_DATA) { 498 m = sume_rx_build_mbuf(adapter, 499 len << 2); 500 recv->state = 501 SUME_RIFFA_CHAN_STATE_IDLE; 502 } else if (ch == SUME_RIFFA_CHANNEL_REG) 503 wakeup(&recv->event); 504 else { 505 device_printf(dev, "ch %d unexpected " 506 "interrupt in recv+2 state %u: " 507 "vect = 0x%08x\n", ch, recv->state, 508 vect); 509 recv->recovery = 1; 510 } 511 vect &= ~SUME_MSI_RXDONE; 512 break; 513 case SUME_RIFFA_CHAN_STATE_LEN: 514 break; 515 default: 516 device_printf(dev, "unknown RX state!\n"); 517 } 518 loops++; 519 } 520 521 if ((vect & (SUME_MSI_RXQUE | SUME_MSI_RXBUF | 522 SUME_MSI_RXDONE)) && recv->recovery) { 523 device_printf(dev, "ch %d ignoring vect = 0x%08x " 524 "during RX; not in recovery; state = %d, loops = " 525 "%d\n", ch, vect, recv->state, loops); 526 527 /* Clean the unfinished transaction. */ 528 if (ch == SUME_RIFFA_CHANNEL_REG && 529 vect & SUME_MSI_RXDONE) { 530 read_reg(adapter, RIFFA_CHNL_REG(ch, 531 RIFFA_TX_TNFR_LEN_REG_OFF)); 532 recv->recovery = 0; 533 } 534 } 535 } 536 SUME_UNLOCK(adapter); 537 538 if (m != NULL) { 539 ifp = m->m_pkthdr.rcvif; 540 if_input(ifp, m); 541 } 542 } 543 544 /* 545 * As we cannot disable interrupt generation, ignore early interrupts by waiting 546 * for the adapter to go into the 'running' state. 547 */ 548 static int 549 sume_intr_filter(void *arg) 550 { 551 struct sume_adapter *adapter = arg; 552 553 if (adapter->running == 0) 554 return (FILTER_STRAY); 555 556 return (FILTER_SCHEDULE_THREAD); 557 } 558 559 static int 560 sume_probe_riffa_pci(struct sume_adapter *adapter) 561 { 562 device_t dev = adapter->dev; 563 int error, count, capmem; 564 uint32_t reg, devctl, linkctl; 565 566 pci_enable_busmaster(dev); 567 568 adapter->rid = PCIR_BAR(0); 569 adapter->bar0_addr = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 570 &adapter->rid, RF_ACTIVE); 571 if (adapter->bar0_addr == NULL) { 572 device_printf(dev, "unable to allocate bus resource: " 573 "BAR0 address\n"); 574 return (ENXIO); 575 } 576 adapter->bt = rman_get_bustag(adapter->bar0_addr); 577 adapter->bh = rman_get_bushandle(adapter->bar0_addr); 578 adapter->bar0_len = rman_get_size(adapter->bar0_addr); 579 if (adapter->bar0_len != 1024) { 580 device_printf(dev, "BAR0 resource length %lu != 1024\n", 581 adapter->bar0_len); 582 return (ENXIO); 583 } 584 585 count = pci_msi_count(dev); 586 error = pci_alloc_msi(dev, &count); 587 if (error) { 588 device_printf(dev, "unable to allocate bus resource: PCI " 589 "MSI\n"); 590 return (error); 591 } 592 593 adapter->irq.rid = 1; /* Should be 1, thus says pci_alloc_msi() */ 594 adapter->irq.res = bus_alloc_resource_any(dev, SYS_RES_IRQ, 595 &adapter->irq.rid, RF_SHAREABLE | RF_ACTIVE); 596 if (adapter->irq.res == NULL) { 597 device_printf(dev, "unable to allocate bus resource: IRQ " 598 "memory\n"); 599 return (ENXIO); 600 } 601 602 error = bus_setup_intr(dev, adapter->irq.res, INTR_MPSAFE | 603 INTR_TYPE_NET, sume_intr_filter, sume_intr_handler, adapter, 604 &adapter->irq.tag); 605 if (error) { 606 device_printf(dev, "failed to setup interrupt for rid %d, name" 607 " %s: %d\n", adapter->irq.rid, "SUME_INTR", error); 608 return (ENXIO); 609 } 610 611 if (pci_find_cap(dev, PCIY_EXPRESS, &capmem) != 0) { 612 device_printf(dev, "PCI not PCIe capable\n"); 613 return (ENXIO); 614 } 615 616 devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL, 2); 617 pci_write_config(dev, capmem + PCIER_DEVICE_CTL, (devctl | 618 PCIEM_CTL_EXT_TAG_FIELD), 2); 619 620 devctl = pci_read_config(dev, capmem + PCIER_DEVICE_CTL2, 2); 621 pci_write_config(dev, capmem + PCIER_DEVICE_CTL2, (devctl | 622 PCIEM_CTL2_ID_ORDERED_REQ_EN), 2); 623 624 linkctl = pci_read_config(dev, capmem + PCIER_LINK_CTL, 2); 625 pci_write_config(dev, capmem + PCIER_LINK_CTL, (linkctl | 626 PCIEM_LINK_CTL_RCB), 2); 627 628 reg = read_reg(adapter, RIFFA_INFO_REG_OFF); 629 adapter->num_sg = RIFFA_SG_ELEMS * ((reg >> 19) & 0xf); 630 adapter->sg_buf_size = RIFFA_SG_BUF_SIZE * ((reg >> 19) & 0xf); 631 632 error = ENODEV; 633 /* Check bus master is enabled. */ 634 if (((reg >> 4) & 0x1) != 1) { 635 device_printf(dev, "bus master not enabled: %d\n", 636 (reg >> 4) & 0x1); 637 return (error); 638 } 639 /* Check link parameters are valid. */ 640 if (((reg >> 5) & 0x3f) == 0 || ((reg >> 11) & 0x3) == 0) { 641 device_printf(dev, "link parameters not valid: %d %d\n", 642 (reg >> 5) & 0x3f, (reg >> 11) & 0x3); 643 return (error); 644 } 645 /* Check # of channels are within valid range. */ 646 if ((reg & 0xf) == 0 || (reg & 0xf) > RIFFA_MAX_CHNLS) { 647 device_printf(dev, "number of channels out of range: %d\n", 648 reg & 0xf); 649 return (error); 650 } 651 /* Check bus width. */ 652 if (((reg >> 19) & 0xf) == 0 || 653 ((reg >> 19) & 0xf) > RIFFA_MAX_BUS_WIDTH_PARAM) { 654 device_printf(dev, "bus width out of range: %d\n", 655 (reg >> 19) & 0xf); 656 return (error); 657 } 658 659 device_printf(dev, "[riffa] # of channels: %d\n", 660 reg & 0xf); 661 device_printf(dev, "[riffa] bus interface width: %d\n", 662 ((reg >> 19) & 0xf) << 5); 663 device_printf(dev, "[riffa] bus master enabled: %d\n", 664 (reg >> 4) & 0x1); 665 device_printf(dev, "[riffa] negotiated link width: %d\n", 666 (reg >> 5) & 0x3f); 667 device_printf(dev, "[riffa] negotiated rate width: %d MTs\n", 668 ((reg >> 11) & 0x3) * 2500); 669 device_printf(dev, "[riffa] max downstream payload: %d B\n", 670 128 << ((reg >> 13) & 0x7)); 671 device_printf(dev, "[riffa] max upstream payload: %d B\n", 672 128 << ((reg >> 16) & 0x7)); 673 674 return (0); 675 } 676 677 /* If there is no sume_if_init, the ether_ioctl panics. */ 678 static void 679 sume_if_init(void *sc) 680 { 681 } 682 683 /* Write the address and length for our incoming / outgoing transaction. */ 684 static void 685 sume_fill_bb_desc(struct sume_adapter *adapter, struct riffa_chnl_dir *p, 686 uint64_t len) 687 { 688 struct nf_bb_desc *bouncebuf = (struct nf_bb_desc *) p->buf_addr; 689 690 bouncebuf->lower = (p->buf_hw_addr + sizeof(struct nf_bb_desc)); 691 bouncebuf->upper = (p->buf_hw_addr + sizeof(struct nf_bb_desc)) >> 32; 692 bouncebuf->len = len >> 2; 693 } 694 695 /* Module register locked write. */ 696 static int 697 sume_modreg_write_locked(struct sume_adapter *adapter) 698 { 699 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 700 701 /* Let the FPGA know about the transfer. */ 702 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 703 RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST); 704 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 705 RIFFA_RX_LEN_REG_OFF), send->len); /* words */ 706 707 /* Fill the bouncebuf "descriptor". */ 708 sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len)); 709 710 /* Update the state before intiating the DMA to avoid races. */ 711 send->state = SUME_RIFFA_CHAN_STATE_READY; 712 713 bus_dmamap_sync(send->ch_tag, send->ch_map, 714 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 715 /* DMA. */ 716 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 717 RIFFA_RX_SG_ADDR_LO_REG_OFF), 718 SUME_RIFFA_LO_ADDR(send->buf_hw_addr)); 719 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 720 RIFFA_RX_SG_ADDR_HI_REG_OFF), 721 SUME_RIFFA_HI_ADDR(send->buf_hw_addr)); 722 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_REG, 723 RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg); 724 bus_dmamap_sync(send->ch_tag, send->ch_map, 725 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 726 727 return (0); 728 } 729 730 /* 731 * Request a register read or write (depending on optype). 732 * If optype is set (0x1f) this will result in a register write, 733 * otherwise this will result in a register read request at the given 734 * address and the result will need to be DMAed back. 735 */ 736 static int 737 sume_module_reg_write(struct nf_priv *nf_priv, struct sume_ifreq *sifr, 738 uint32_t optype) 739 { 740 struct sume_adapter *adapter = nf_priv->adapter; 741 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 742 struct nf_regop_data *data; 743 int error; 744 745 /* 746 * 1. Make sure the channel is free; otherwise return EBUSY. 747 * 2. Prepare the memory in the bounce buffer (which we always 748 * use for regs). 749 * 3. Start the DMA process. 750 * 4. Sleep and wait for result and return success or error. 751 */ 752 SUME_LOCK(adapter); 753 754 if (send->state != SUME_RIFFA_CHAN_STATE_IDLE) { 755 SUME_UNLOCK(adapter); 756 return (EBUSY); 757 } 758 759 data = (struct nf_regop_data *) (send->buf_addr + 760 sizeof(struct nf_bb_desc)); 761 data->addr = htole32(sifr->addr); 762 data->val = htole32(sifr->val); 763 /* Tag to indentify request. */ 764 data->rtag = htole32(++send->rtag); 765 data->optype = htole32(optype); 766 send->len = sizeof(struct nf_regop_data) / 4; /* words */ 767 768 error = sume_modreg_write_locked(adapter); 769 if (error) { 770 SUME_UNLOCK(adapter); 771 return (EFAULT); 772 } 773 774 /* Timeout after 1s. */ 775 if (send->state != SUME_RIFFA_CHAN_STATE_LEN) 776 error = msleep(&send->event, &adapter->lock, 0, 777 "Waiting recv finish", 1 * hz); 778 779 /* This was a write so we are done; were interrupted, or timed out. */ 780 if (optype != SUME_MR_READ || error != 0 || error == EWOULDBLOCK) { 781 send->state = SUME_RIFFA_CHAN_STATE_IDLE; 782 if (optype == SUME_MR_READ) 783 error = EWOULDBLOCK; 784 else 785 error = 0; 786 } else 787 error = 0; 788 789 /* 790 * For read requests we will update state once we are done 791 * having read the result to avoid any two outstanding 792 * transactions, or we need a queue and validate tags, 793 * which is a lot of work for a low priority, infrequent 794 * event. 795 */ 796 797 SUME_UNLOCK(adapter); 798 799 return (error); 800 } 801 802 /* Module register read. */ 803 static int 804 sume_module_reg_read(struct nf_priv *nf_priv, struct sume_ifreq *sifr) 805 { 806 struct sume_adapter *adapter = nf_priv->adapter; 807 struct riffa_chnl_dir *recv = adapter->recv[SUME_RIFFA_CHANNEL_REG]; 808 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_REG]; 809 struct nf_regop_data *data; 810 int error = 0; 811 812 /* 813 * 0. Sleep waiting for result if needed (unless condition is 814 * true already). 815 * 1. Read DMA results. 816 * 2. Update state on *TX* to IDLE to allow next read to start. 817 */ 818 SUME_LOCK(adapter); 819 820 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 821 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 822 /* 823 * We only need to be woken up at the end of the transaction. 824 * Timeout after 1s. 825 */ 826 if (recv->state != SUME_RIFFA_CHAN_STATE_READ) 827 error = msleep(&recv->event, &adapter->lock, 0, 828 "Waiting transaction finish", 1 * hz); 829 830 if (recv->state != SUME_RIFFA_CHAN_STATE_READ || error == EWOULDBLOCK) { 831 SUME_UNLOCK(adapter); 832 device_printf(adapter->dev, "wait error: %d\n", error); 833 return (EWOULDBLOCK); 834 } 835 836 bus_dmamap_sync(recv->ch_tag, recv->ch_map, 837 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 838 839 /* 840 * Read reply data and validate address and tag. 841 * Note: we do access the send side without lock but the state 842 * machine does prevent the data from changing. 843 */ 844 data = (struct nf_regop_data *) (recv->buf_addr + 845 sizeof(struct nf_bb_desc)); 846 847 if (le32toh(data->rtag) != send->rtag) 848 device_printf(adapter->dev, "rtag error: 0x%08x 0x%08x\n", 849 le32toh(data->rtag), send->rtag); 850 851 sifr->val = le32toh(data->val); 852 recv->state = SUME_RIFFA_CHAN_STATE_IDLE; 853 854 /* We are done. */ 855 send->state = SUME_RIFFA_CHAN_STATE_IDLE; 856 857 SUME_UNLOCK(adapter); 858 859 return (0); 860 } 861 862 /* Read value from a module register and return it to a sume_ifreq. */ 863 static int 864 get_modreg_value(struct nf_priv *nf_priv, struct sume_ifreq *sifr) 865 { 866 int error; 867 868 error = sume_module_reg_write(nf_priv, sifr, SUME_MR_READ); 869 if (!error) 870 error = sume_module_reg_read(nf_priv, sifr); 871 872 return (error); 873 } 874 875 static int 876 sume_if_ioctl(if_t ifp, unsigned long cmd, caddr_t data) 877 { 878 struct ifreq *ifr = (struct ifreq *) data; 879 struct nf_priv *nf_priv = if_getsoftc(ifp); 880 struct sume_ifreq sifr; 881 int error = 0; 882 883 switch (cmd) { 884 case SIOCGIFMEDIA: 885 case SIOCGIFXMEDIA: 886 error = ifmedia_ioctl(ifp, ifr, &nf_priv->media, cmd); 887 break; 888 889 case SUME_IOCTL_CMD_WRITE_REG: 890 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr)); 891 if (error) { 892 error = EINVAL; 893 break; 894 } 895 error = sume_module_reg_write(nf_priv, &sifr, SUME_MR_WRITE); 896 break; 897 898 case SUME_IOCTL_CMD_READ_REG: 899 error = copyin(ifr_data_get_ptr(ifr), &sifr, sizeof(sifr)); 900 if (error) { 901 error = EINVAL; 902 break; 903 } 904 905 error = get_modreg_value(nf_priv, &sifr); 906 if (error) 907 break; 908 909 error = copyout(&sifr, ifr_data_get_ptr(ifr), sizeof(sifr)); 910 if (error) 911 error = EINVAL; 912 913 break; 914 915 case SIOCSIFFLAGS: 916 /* Silence tcpdump 'promisc mode not supported' warning. */ 917 if (if_getflags(ifp) & IFF_PROMISC) 918 break; 919 920 default: 921 error = ether_ioctl(ifp, cmd, data); 922 break; 923 } 924 925 return (error); 926 } 927 928 static int 929 sume_media_change(if_t ifp) 930 { 931 struct nf_priv *nf_priv = if_getsoftc(ifp); 932 struct ifmedia *ifm = &nf_priv->media; 933 934 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 935 return (EINVAL); 936 937 if (IFM_SUBTYPE(ifm->ifm_media) == IFM_10G_SR) 938 if_setbaudrate(ifp, ifmedia_baudrate(IFM_ETHER | IFM_10G_SR)); 939 else 940 if_setbaudrate(ifp, ifmedia_baudrate(ifm->ifm_media)); 941 942 return (0); 943 } 944 945 static void 946 sume_update_link_status(if_t ifp) 947 { 948 struct nf_priv *nf_priv = if_getsoftc(ifp); 949 struct sume_adapter *adapter = nf_priv->adapter; 950 struct sume_ifreq sifr; 951 int link_status; 952 953 sifr.addr = SUME_STATUS_ADDR(nf_priv->port); 954 sifr.val = 0; 955 956 if (get_modreg_value(nf_priv, &sifr)) 957 return; 958 959 link_status = SUME_LINK_STATUS(sifr.val); 960 961 if (!link_status && nf_priv->link_up) { 962 if_link_state_change(ifp, LINK_STATE_DOWN); 963 nf_priv->link_up = 0; 964 if (adapter->sume_debug) 965 device_printf(adapter->dev, "port %d link state " 966 "changed to DOWN\n", nf_priv->unit); 967 } else if (link_status && !nf_priv->link_up) { 968 nf_priv->link_up = 1; 969 if_link_state_change(ifp, LINK_STATE_UP); 970 if (adapter->sume_debug) 971 device_printf(adapter->dev, "port %d link state " 972 "changed to UP\n", nf_priv->unit); 973 } 974 } 975 976 static void 977 sume_media_status(if_t ifp, struct ifmediareq *ifmr) 978 { 979 struct nf_priv *nf_priv = if_getsoftc(ifp); 980 struct ifmedia *ifm = &nf_priv->media; 981 982 if (ifm->ifm_cur->ifm_media == (IFM_ETHER | IFM_10G_SR) && 983 (if_getflags(ifp) & IFF_UP)) 984 ifmr->ifm_active = IFM_ETHER | IFM_10G_SR; 985 else 986 ifmr->ifm_active = ifm->ifm_cur->ifm_media; 987 988 ifmr->ifm_status |= IFM_AVALID; 989 990 sume_update_link_status(ifp); 991 992 if (nf_priv->link_up) 993 ifmr->ifm_status |= IFM_ACTIVE; 994 } 995 996 /* 997 * Packet to transmit. We take the packet data from the mbuf and copy it to the 998 * bouncebuffer address buf_addr+3*sizeof(uint32_t)+16. The 16 bytes before the 999 * packet data are for metadata: sport/dport (depending on our source 1000 * interface), packet length and magic 0xcafe. We tell the SUME about the 1001 * transfer, fill the first 3*sizeof(uint32_t) bytes of the bouncebuffer with 1002 * the information about the start and length of the packet and trigger the 1003 * transaction. 1004 */ 1005 static int 1006 sume_if_start_locked(if_t ifp) 1007 { 1008 struct mbuf *m; 1009 struct nf_priv *nf_priv = if_getsoftc(ifp); 1010 struct sume_adapter *adapter = nf_priv->adapter; 1011 struct riffa_chnl_dir *send = adapter->send[SUME_RIFFA_CHANNEL_DATA]; 1012 uint8_t *outbuf; 1013 struct nf_metadata *mdata; 1014 int plen = SUME_MIN_PKT_SIZE; 1015 1016 KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned")); 1017 KASSERT(send->state == SUME_RIFFA_CHAN_STATE_IDLE, 1018 ("SUME not in IDLE state")); 1019 1020 m = if_dequeue(ifp); 1021 if (m == NULL) 1022 return (EINVAL); 1023 1024 /* Packets large enough do not need to be padded */ 1025 if (m->m_pkthdr.len > SUME_MIN_PKT_SIZE) 1026 plen = m->m_pkthdr.len; 1027 1028 if (adapter->sume_debug) 1029 device_printf(adapter->dev, "sending %d bytes to %s%d\n", plen, 1030 SUME_ETH_DEVICE_NAME, nf_priv->unit); 1031 1032 outbuf = (uint8_t *) send->buf_addr + sizeof(struct nf_bb_desc); 1033 mdata = (struct nf_metadata *) outbuf; 1034 1035 /* Clear the recovery flag. */ 1036 send->recovery = 0; 1037 1038 /* Make sure we fit with the 16 bytes nf_metadata. */ 1039 if (m->m_pkthdr.len + sizeof(struct nf_metadata) > 1040 adapter->sg_buf_size) { 1041 device_printf(adapter->dev, "packet too big for bounce buffer " 1042 "(%d)\n", m->m_pkthdr.len); 1043 m_freem(m); 1044 nf_priv->stats.tx_dropped++; 1045 return (ENOMEM); 1046 } 1047 1048 bus_dmamap_sync(send->ch_tag, send->ch_map, 1049 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1050 1051 /* Zero out the padded data */ 1052 if (m->m_pkthdr.len < SUME_MIN_PKT_SIZE) 1053 bzero(outbuf + sizeof(struct nf_metadata), SUME_MIN_PKT_SIZE); 1054 /* Skip the first 16 bytes for the metadata. */ 1055 m_copydata(m, 0, m->m_pkthdr.len, outbuf + sizeof(struct nf_metadata)); 1056 send->len = (sizeof(struct nf_metadata) + plen + 3) / 4; 1057 1058 /* Fill in the metadata: CPU(DMA) ports are odd, MAC ports are even. */ 1059 mdata->sport = htole16(1 << (nf_priv->port * 2 + 1)); 1060 mdata->dport = htole16(1 << (nf_priv->port * 2)); 1061 mdata->plen = htole16(plen); 1062 mdata->magic = htole16(SUME_RIFFA_MAGIC); 1063 mdata->t1 = htole32(0); 1064 mdata->t2 = htole32(0); 1065 1066 /* Let the FPGA know about the transfer. */ 1067 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1068 RIFFA_RX_OFFLAST_REG_OFF), SUME_OFFLAST); 1069 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1070 RIFFA_RX_LEN_REG_OFF), send->len); 1071 1072 /* Fill the bouncebuf "descriptor". */ 1073 sume_fill_bb_desc(adapter, send, SUME_RIFFA_LEN(send->len)); 1074 1075 /* Update the state before intiating the DMA to avoid races. */ 1076 send->state = SUME_RIFFA_CHAN_STATE_READY; 1077 1078 /* DMA. */ 1079 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1080 RIFFA_RX_SG_ADDR_LO_REG_OFF), 1081 SUME_RIFFA_LO_ADDR(send->buf_hw_addr)); 1082 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1083 RIFFA_RX_SG_ADDR_HI_REG_OFF), 1084 SUME_RIFFA_HI_ADDR(send->buf_hw_addr)); 1085 write_reg(adapter, RIFFA_CHNL_REG(SUME_RIFFA_CHANNEL_DATA, 1086 RIFFA_RX_SG_LEN_REG_OFF), 4 * send->num_sg); 1087 1088 bus_dmamap_sync(send->ch_tag, send->ch_map, 1089 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1090 1091 nf_priv->stats.tx_packets++; 1092 nf_priv->stats.tx_bytes += plen; 1093 1094 /* We can free as long as we use the bounce buffer. */ 1095 m_freem(m); 1096 1097 adapter->last_ifc = nf_priv->port; 1098 1099 /* Reset watchdog counter. */ 1100 adapter->wd_counter = 0; 1101 1102 return (0); 1103 } 1104 1105 static void 1106 sume_if_start(if_t ifp) 1107 { 1108 struct nf_priv *nf_priv = if_getsoftc(ifp); 1109 struct sume_adapter *adapter = nf_priv->adapter; 1110 1111 if (!adapter->running || !(if_getflags(ifp) & IFF_UP)) 1112 return; 1113 1114 SUME_LOCK(adapter); 1115 if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state == 1116 SUME_RIFFA_CHAN_STATE_IDLE) 1117 sume_if_start_locked(ifp); 1118 SUME_UNLOCK(adapter); 1119 } 1120 1121 /* 1122 * We call this function at the end of every TX transaction to check for 1123 * remaining packets in the TX queues for every UP interface. 1124 */ 1125 static void 1126 check_tx_queues(struct sume_adapter *adapter) 1127 { 1128 int i, last_ifc; 1129 1130 KASSERT(mtx_owned(&adapter->lock), ("SUME lock not owned")); 1131 1132 last_ifc = adapter->last_ifc; 1133 1134 /* Check all interfaces */ 1135 for (i = last_ifc + 1; i < last_ifc + SUME_NPORTS + 1; i++) { 1136 if_t ifp = adapter->ifp[i % SUME_NPORTS]; 1137 1138 if (!(if_getflags(ifp) & IFF_UP)) 1139 continue; 1140 1141 if (!sume_if_start_locked(ifp)) 1142 break; 1143 } 1144 } 1145 1146 static void 1147 sume_ifp_alloc(struct sume_adapter *adapter, uint32_t port) 1148 { 1149 if_t ifp; 1150 struct nf_priv *nf_priv = malloc(sizeof(struct nf_priv), M_SUME, 1151 M_ZERO | M_WAITOK); 1152 1153 ifp = if_alloc(IFT_ETHER); 1154 adapter->ifp[port] = ifp; 1155 if_setsoftc(ifp, nf_priv); 1156 1157 nf_priv->adapter = adapter; 1158 nf_priv->unit = alloc_unr(unr); 1159 nf_priv->port = port; 1160 nf_priv->link_up = 0; 1161 1162 if_initname(ifp, SUME_ETH_DEVICE_NAME, nf_priv->unit); 1163 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 1164 1165 if_setinitfn(ifp, sume_if_init); 1166 if_setstartfn(ifp, sume_if_start); 1167 if_setioctlfn(ifp, sume_if_ioctl); 1168 1169 uint8_t hw_addr[ETHER_ADDR_LEN] = DEFAULT_ETHER_ADDRESS; 1170 hw_addr[ETHER_ADDR_LEN-1] = nf_priv->unit; 1171 ether_ifattach(ifp, hw_addr); 1172 1173 ifmedia_init(&nf_priv->media, IFM_IMASK, sume_media_change, 1174 sume_media_status); 1175 ifmedia_add(&nf_priv->media, IFM_ETHER | IFM_10G_SR, 0, NULL); 1176 ifmedia_set(&nf_priv->media, IFM_ETHER | IFM_10G_SR); 1177 1178 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); 1179 } 1180 1181 static void 1182 callback_dma(void *arg, bus_dma_segment_t *segs, int nseg, int err) 1183 { 1184 if (err) 1185 return; 1186 1187 KASSERT(nseg == 1, ("%d segments returned!", nseg)); 1188 1189 *(bus_addr_t *) arg = segs[0].ds_addr; 1190 } 1191 1192 static int 1193 sume_probe_riffa_buffer(const struct sume_adapter *adapter, 1194 struct riffa_chnl_dir ***p, const char *dir) 1195 { 1196 struct riffa_chnl_dir **rp; 1197 bus_addr_t hw_addr; 1198 int error, ch; 1199 device_t dev = adapter->dev; 1200 1201 error = ENOMEM; 1202 *p = malloc(SUME_RIFFA_CHANNELS * sizeof(struct riffa_chnl_dir *), 1203 M_SUME, M_ZERO | M_WAITOK); 1204 if (*p == NULL) { 1205 device_printf(dev, "malloc(%s) failed.\n", dir); 1206 return (error); 1207 } 1208 1209 rp = *p; 1210 /* Allocate the chnl_dir structs themselves. */ 1211 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 1212 /* One direction. */ 1213 rp[ch] = malloc(sizeof(struct riffa_chnl_dir), M_SUME, 1214 M_ZERO | M_WAITOK); 1215 if (rp[ch] == NULL) { 1216 device_printf(dev, "malloc(%s[%d]) riffa_chnl_dir " 1217 "failed.\n", dir, ch); 1218 return (error); 1219 } 1220 1221 int err = bus_dma_tag_create(bus_get_dma_tag(dev), 1222 4, 0, 1223 BUS_SPACE_MAXADDR, 1224 BUS_SPACE_MAXADDR, 1225 NULL, NULL, 1226 adapter->sg_buf_size, 1227 1, 1228 adapter->sg_buf_size, 1229 0, 1230 NULL, 1231 NULL, 1232 &rp[ch]->ch_tag); 1233 1234 if (err) { 1235 device_printf(dev, "bus_dma_tag_create(%s[%d]) " 1236 "failed.\n", dir, ch); 1237 return (err); 1238 } 1239 1240 err = bus_dmamem_alloc(rp[ch]->ch_tag, (void **) 1241 &rp[ch]->buf_addr, BUS_DMA_WAITOK | BUS_DMA_COHERENT | 1242 BUS_DMA_ZERO, &rp[ch]->ch_map); 1243 if (err) { 1244 device_printf(dev, "bus_dmamem_alloc(%s[%d]) failed.\n", 1245 dir, ch); 1246 return (err); 1247 } 1248 1249 bzero(rp[ch]->buf_addr, adapter->sg_buf_size); 1250 1251 err = bus_dmamap_load(rp[ch]->ch_tag, rp[ch]->ch_map, 1252 rp[ch]->buf_addr, adapter->sg_buf_size, callback_dma, 1253 &hw_addr, BUS_DMA_NOWAIT); 1254 if (err) { 1255 device_printf(dev, "bus_dmamap_load(%s[%d]) failed.\n", 1256 dir, ch); 1257 return (err); 1258 } 1259 rp[ch]->buf_hw_addr = hw_addr; 1260 rp[ch]->num_sg = 1; 1261 rp[ch]->state = SUME_RIFFA_CHAN_STATE_IDLE; 1262 1263 rp[ch]->rtag = SUME_INIT_RTAG; 1264 } 1265 1266 return (0); 1267 } 1268 1269 static int 1270 sume_probe_riffa_buffers(struct sume_adapter *adapter) 1271 { 1272 int error; 1273 1274 error = sume_probe_riffa_buffer(adapter, &adapter->recv, "recv"); 1275 if (error) 1276 return (error); 1277 1278 error = sume_probe_riffa_buffer(adapter, &adapter->send, "send"); 1279 1280 return (error); 1281 } 1282 1283 static void 1284 sume_sysctl_init(struct sume_adapter *adapter) 1285 { 1286 device_t dev = adapter->dev; 1287 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 1288 struct sysctl_oid *tree = device_get_sysctl_tree(dev); 1289 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 1290 struct sysctl_oid *tmp_tree; 1291 char namebuf[MAX_IFC_NAME_LEN]; 1292 int i; 1293 1294 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "sume", CTLFLAG_RW, 1295 0, "SUME top-level tree"); 1296 if (tree == NULL) { 1297 device_printf(dev, "SYSCTL_ADD_NODE failed.\n"); 1298 return; 1299 } 1300 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug", CTLFLAG_RW, 1301 &adapter->sume_debug, 0, "debug int leaf"); 1302 1303 /* total RX error stats */ 1304 SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_epkts", 1305 CTLFLAG_RD, &adapter->packets_err, 0, "rx errors"); 1306 SYSCTL_ADD_U64(ctx, child, OID_AUTO, "rx_ebytes", 1307 CTLFLAG_RD, &adapter->bytes_err, 0, "rx error bytes"); 1308 1309 for (i = SUME_NPORTS - 1; i >= 0; i--) { 1310 if_t ifp = adapter->ifp[i]; 1311 if (ifp == NULL) 1312 continue; 1313 1314 struct nf_priv *nf_priv = if_getsoftc(ifp); 1315 1316 snprintf(namebuf, MAX_IFC_NAME_LEN, "%s%d", 1317 SUME_ETH_DEVICE_NAME, nf_priv->unit); 1318 tmp_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 1319 CTLFLAG_RW, 0, "SUME ifc tree"); 1320 if (tmp_tree == NULL) { 1321 device_printf(dev, "SYSCTL_ADD_NODE failed.\n"); 1322 return; 1323 } 1324 1325 /* Packets dropped by down interface. */ 1326 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1327 "ifc_down_bytes", CTLFLAG_RD, 1328 &nf_priv->stats.ifc_down_bytes, 0, "ifc_down bytes"); 1329 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1330 "ifc_down_packets", CTLFLAG_RD, 1331 &nf_priv->stats.ifc_down_packets, 0, "ifc_down packets"); 1332 1333 /* HW RX stats */ 1334 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1335 "hw_rx_packets", CTLFLAG_RD, &nf_priv->stats.hw_rx_packets, 1336 0, "hw_rx packets"); 1337 1338 /* HW TX stats */ 1339 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1340 "hw_tx_packets", CTLFLAG_RD, &nf_priv->stats.hw_tx_packets, 1341 0, "hw_tx packets"); 1342 1343 /* RX stats */ 1344 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1345 "rx_bytes", CTLFLAG_RD, &nf_priv->stats.rx_bytes, 0, 1346 "rx bytes"); 1347 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1348 "rx_dropped", CTLFLAG_RD, &nf_priv->stats.rx_dropped, 0, 1349 "rx dropped"); 1350 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1351 "rx_packets", CTLFLAG_RD, &nf_priv->stats.rx_packets, 0, 1352 "rx packets"); 1353 1354 /* TX stats */ 1355 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1356 "tx_bytes", CTLFLAG_RD, &nf_priv->stats.tx_bytes, 0, 1357 "tx bytes"); 1358 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1359 "tx_dropped", CTLFLAG_RD, &nf_priv->stats.tx_dropped, 0, 1360 "tx dropped"); 1361 SYSCTL_ADD_U64(ctx, SYSCTL_CHILDREN(tmp_tree), OID_AUTO, 1362 "tx_packets", CTLFLAG_RD, &nf_priv->stats.tx_packets, 0, 1363 "tx packets"); 1364 } 1365 } 1366 1367 static void 1368 sume_local_timer(void *arg) 1369 { 1370 struct sume_adapter *adapter = arg; 1371 1372 if (!adapter->running) 1373 return; 1374 1375 taskqueue_enqueue(adapter->tq, &adapter->stat_task); 1376 1377 SUME_LOCK(adapter); 1378 if (adapter->send[SUME_RIFFA_CHANNEL_DATA]->state != 1379 SUME_RIFFA_CHAN_STATE_IDLE && ++adapter->wd_counter >= 3) { 1380 /* Resetting interfaces if stuck for 3 seconds. */ 1381 device_printf(adapter->dev, "TX stuck, resetting adapter.\n"); 1382 read_reg(adapter, RIFFA_INFO_REG_OFF); 1383 1384 adapter->send[SUME_RIFFA_CHANNEL_DATA]->state = 1385 SUME_RIFFA_CHAN_STATE_IDLE; 1386 adapter->wd_counter = 0; 1387 1388 check_tx_queues(adapter); 1389 } 1390 SUME_UNLOCK(adapter); 1391 1392 callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter); 1393 } 1394 1395 static void 1396 sume_get_stats(void *context, int pending) 1397 { 1398 struct sume_adapter *adapter = context; 1399 int i; 1400 1401 for (i = 0; i < SUME_NPORTS; i++) { 1402 if_t ifp = adapter->ifp[i]; 1403 1404 if (if_getflags(ifp) & IFF_UP) { 1405 struct nf_priv *nf_priv = if_getsoftc(ifp); 1406 struct sume_ifreq sifr; 1407 1408 sume_update_link_status(ifp); 1409 1410 /* Get RX counter. */ 1411 sifr.addr = SUME_STAT_RX_ADDR(nf_priv->port); 1412 sifr.val = 0; 1413 1414 if (!get_modreg_value(nf_priv, &sifr)) 1415 nf_priv->stats.hw_rx_packets += sifr.val; 1416 1417 /* Get TX counter. */ 1418 sifr.addr = SUME_STAT_TX_ADDR(nf_priv->port); 1419 sifr.val = 0; 1420 1421 if (!get_modreg_value(nf_priv, &sifr)) 1422 nf_priv->stats.hw_tx_packets += sifr.val; 1423 } 1424 } 1425 } 1426 1427 static int 1428 sume_attach(device_t dev) 1429 { 1430 struct sume_adapter *adapter = device_get_softc(dev); 1431 adapter->dev = dev; 1432 int error, i; 1433 1434 mtx_init(&adapter->lock, "Global lock", NULL, MTX_DEF); 1435 1436 adapter->running = 0; 1437 1438 /* OK finish up RIFFA. */ 1439 error = sume_probe_riffa_pci(adapter); 1440 if (error != 0) 1441 goto error; 1442 1443 error = sume_probe_riffa_buffers(adapter); 1444 if (error != 0) 1445 goto error; 1446 1447 /* Now do the network interfaces. */ 1448 for (i = 0; i < SUME_NPORTS; i++) 1449 sume_ifp_alloc(adapter, i); 1450 1451 /* Register stats and register sysctls. */ 1452 sume_sysctl_init(adapter); 1453 1454 /* Reset the HW. */ 1455 read_reg(adapter, RIFFA_INFO_REG_OFF); 1456 1457 /* Ready to go, "enable" IRQ. */ 1458 adapter->running = 1; 1459 1460 callout_init(&adapter->timer, 1); 1461 TASK_INIT(&adapter->stat_task, 0, sume_get_stats, adapter); 1462 1463 adapter->tq = taskqueue_create("sume_stats", M_NOWAIT, 1464 taskqueue_thread_enqueue, &adapter->tq); 1465 taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s stattaskq", 1466 device_get_nameunit(adapter->dev)); 1467 1468 callout_reset(&adapter->timer, 1 * hz, sume_local_timer, adapter); 1469 1470 return (0); 1471 1472 error: 1473 sume_detach(dev); 1474 1475 return (error); 1476 } 1477 1478 static void 1479 sume_remove_riffa_buffer(const struct sume_adapter *adapter, 1480 struct riffa_chnl_dir **pp) 1481 { 1482 int ch; 1483 1484 for (ch = 0; ch < SUME_RIFFA_CHANNELS; ch++) { 1485 if (pp[ch] == NULL) 1486 continue; 1487 1488 if (pp[ch]->buf_hw_addr != 0) { 1489 bus_dmamem_free(pp[ch]->ch_tag, pp[ch]->buf_addr, 1490 pp[ch]->ch_map); 1491 pp[ch]->buf_hw_addr = 0; 1492 } 1493 1494 free(pp[ch], M_SUME); 1495 } 1496 } 1497 1498 static void 1499 sume_remove_riffa_buffers(struct sume_adapter *adapter) 1500 { 1501 if (adapter->send != NULL) { 1502 sume_remove_riffa_buffer(adapter, adapter->send); 1503 free(adapter->send, M_SUME); 1504 adapter->send = NULL; 1505 } 1506 if (adapter->recv != NULL) { 1507 sume_remove_riffa_buffer(adapter, adapter->recv); 1508 free(adapter->recv, M_SUME); 1509 adapter->recv = NULL; 1510 } 1511 } 1512 1513 static int 1514 sume_detach(device_t dev) 1515 { 1516 struct sume_adapter *adapter = device_get_softc(dev); 1517 int i; 1518 struct nf_priv *nf_priv; 1519 1520 KASSERT(mtx_initialized(&adapter->lock), ("SUME mutex not " 1521 "initialized")); 1522 adapter->running = 0; 1523 1524 /* Drain the stats callout and task queue. */ 1525 callout_drain(&adapter->timer); 1526 1527 if (adapter->tq) { 1528 taskqueue_drain(adapter->tq, &adapter->stat_task); 1529 taskqueue_free(adapter->tq); 1530 } 1531 1532 for (i = 0; i < SUME_NPORTS; i++) { 1533 if_t ifp = adapter->ifp[i]; 1534 if (ifp == NULL) 1535 continue; 1536 1537 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 1538 nf_priv = if_getsoftc(ifp); 1539 1540 if (if_getflags(ifp) & IFF_UP) 1541 if_down(ifp); 1542 ifmedia_removeall(&nf_priv->media); 1543 free_unr(unr, nf_priv->unit); 1544 1545 if_setflagbits(ifp, 0, IFF_UP); 1546 ether_ifdetach(ifp); 1547 if_free(ifp); 1548 1549 free(nf_priv, M_SUME); 1550 } 1551 1552 sume_remove_riffa_buffers(adapter); 1553 1554 if (adapter->irq.tag) 1555 bus_teardown_intr(dev, adapter->irq.res, adapter->irq.tag); 1556 if (adapter->irq.res) 1557 bus_release_resource(dev, SYS_RES_IRQ, adapter->irq.rid, 1558 adapter->irq.res); 1559 1560 pci_release_msi(dev); 1561 1562 if (adapter->bar0_addr) 1563 bus_release_resource(dev, SYS_RES_MEMORY, adapter->rid, 1564 adapter->bar0_addr); 1565 1566 mtx_destroy(&adapter->lock); 1567 1568 return (0); 1569 } 1570 1571 static int 1572 mod_event(module_t mod, int cmd, void *arg) 1573 { 1574 switch (cmd) { 1575 case MOD_LOAD: 1576 unr = new_unrhdr(0, INT_MAX, NULL); 1577 break; 1578 1579 case MOD_UNLOAD: 1580 delete_unrhdr(unr); 1581 break; 1582 } 1583 1584 return (0); 1585 } 1586 1587 DRIVER_MODULE(sume, pci, sume_driver, mod_event, NULL); 1588 MODULE_VERSION(sume, 1); 1589