1 /* 2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org> 3 * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org> 4 * Copyright (c) 2013 Jeremiah Lott, Avere Systems 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/types.h> 34 #include <sys/limits.h> 35 #include <sys/ioctl.h> 36 #include <sys/uio.h> 37 #include <net/ethernet.h> 38 #include <netinet/in.h> 39 #include <netinet/tcp.h> 40 41 #include <errno.h> 42 #include <fcntl.h> 43 #include <md5.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <pthread.h> 49 #include <pthread_np.h> 50 51 #include "e1000_regs.h" 52 #include "e1000_defines.h" 53 #include "mii.h" 54 55 #include "bhyverun.h" 56 #include "pci_emul.h" 57 #include "mevent.h" 58 59 /* Hardware/register definitions XXX: move some to common code. */ 60 #define E82545_VENDOR_ID_INTEL 0x8086 61 #define E82545_DEV_ID_82545EM_COPPER 0x100F 62 #define E82545_SUBDEV_ID 0x1008 63 64 #define E82545_REVISION_4 4 65 66 #define E82545_MDIC_DATA_MASK 0x0000FFFF 67 #define E82545_MDIC_OP_MASK 0x0c000000 68 #define E82545_MDIC_IE 0x20000000 69 70 #define E82545_EECD_FWE_DIS 0x00000010 /* Flash writes disabled */ 71 #define E82545_EECD_FWE_EN 0x00000020 /* Flash writes enabled */ 72 #define E82545_EECD_FWE_MASK 0x00000030 /* Flash writes mask */ 73 74 #define E82545_BAR_REGISTER 0 75 #define E82545_BAR_REGISTER_LEN (128*1024) 76 #define E82545_BAR_FLASH 1 77 #define E82545_BAR_FLASH_LEN (64*1024) 78 #define E82545_BAR_IO 2 79 #define E82545_BAR_IO_LEN 8 80 81 #define E82545_IOADDR 0x00000000 82 #define E82545_IODATA 0x00000004 83 #define E82545_IO_REGISTER_MAX 0x0001FFFF 84 #define E82545_IO_FLASH_BASE 0x00080000 85 #define E82545_IO_FLASH_MAX 0x000FFFFF 86 87 #define E82545_ARRAY_ENTRY(reg, offset) (reg + (offset<<2)) 88 #define E82545_RAR_MAX 15 89 #define E82545_MTA_MAX 127 90 #define E82545_VFTA_MAX 127 91 92 /* Slightly modified from the driver versions, hardcoded for 3 opcode bits, 93 * followed by 6 address bits. 94 * TODO: make opcode bits and addr bits configurable? 95 * NVM Commands - Microwire */ 96 #define E82545_NVM_OPCODE_BITS 3 97 #define E82545_NVM_ADDR_BITS 6 98 #define E82545_NVM_DATA_BITS 16 99 #define E82545_NVM_OPADDR_BITS (E82545_NVM_OPCODE_BITS + E82545_NVM_ADDR_BITS) 100 #define E82545_NVM_ADDR_MASK ((1 << E82545_NVM_ADDR_BITS)-1) 101 #define E82545_NVM_OPCODE_MASK \ 102 (((1 << E82545_NVM_OPCODE_BITS) - 1) << E82545_NVM_ADDR_BITS) 103 #define E82545_NVM_OPCODE_READ (0x6 << E82545_NVM_ADDR_BITS) /* read */ 104 #define E82545_NVM_OPCODE_WRITE (0x5 << E82545_NVM_ADDR_BITS) /* write */ 105 #define E82545_NVM_OPCODE_ERASE (0x7 << E82545_NVM_ADDR_BITS) /* erase */ 106 #define E82545_NVM_OPCODE_EWEN (0x4 << E82545_NVM_ADDR_BITS) /* wr-enable */ 107 108 #define E82545_NVM_EEPROM_SIZE 64 /* 64 * 16-bit values == 128K */ 109 110 #define E1000_ICR_SRPD 0x00010000 111 112 /* 113 * XXX does this actually have a limit on the 82545 ? 114 * There is a limit on the max number of bytes, but perhaps not 115 * on descriptors ?? 116 */ 117 #define I82545_MAX_TXSEGS 20 118 119 /* Legacy receive descriptor */ 120 struct e1000_rx_desc { 121 uint64_t buffer_addr; /* Address of the descriptor's data buffer */ 122 uint16_t length; /* Length of data DMAed into data buffer */ 123 uint16_t csum; /* Packet checksum */ 124 uint8_t status; /* Descriptor status */ 125 uint8_t errors; /* Descriptor Errors */ 126 uint16_t special; 127 }; 128 129 /* Transmit descriptor types */ 130 #define E1000_TXD_MASK (E1000_TXD_CMD_DEXT | 0x00F00000) 131 #define E1000_TXD_TYP_L (0) 132 #define E1000_TXD_TYP_C (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_C) 133 #define E1000_TXD_TYP_D (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D) 134 135 /* Legacy transmit descriptor */ 136 struct e1000_tx_desc { 137 uint64_t buffer_addr; /* Address of the descriptor's data buffer */ 138 union { 139 uint32_t data; 140 struct { 141 uint16_t length; /* Data buffer length */ 142 uint8_t cso; /* Checksum offset */ 143 uint8_t cmd; /* Descriptor control */ 144 } flags; 145 } lower; 146 union { 147 uint32_t data; 148 struct { 149 uint8_t status; /* Descriptor status */ 150 uint8_t css; /* Checksum start */ 151 uint16_t special; 152 } fields; 153 } upper; 154 }; 155 156 /* Context descriptor */ 157 struct e1000_context_desc { 158 union { 159 uint32_t ip_config; 160 struct { 161 uint8_t ipcss; /* IP checksum start */ 162 uint8_t ipcso; /* IP checksum offset */ 163 uint16_t ipcse; /* IP checksum end */ 164 } ip_fields; 165 } lower_setup; 166 union { 167 uint32_t tcp_config; 168 struct { 169 uint8_t tucss; /* TCP checksum start */ 170 uint8_t tucso; /* TCP checksum offset */ 171 uint16_t tucse; /* TCP checksum end */ 172 } tcp_fields; 173 } upper_setup; 174 uint32_t cmd_and_length; 175 union { 176 uint32_t data; 177 struct { 178 uint8_t status; /* Descriptor status */ 179 uint8_t hdr_len; /* Header length */ 180 uint16_t mss; /* Maximum segment size */ 181 } fields; 182 } tcp_seg_setup; 183 }; 184 185 /* Data descriptor */ 186 struct e1000_data_desc { 187 uint64_t buffer_addr; /* Address of the descriptor's buffer address */ 188 union { 189 uint32_t data; 190 struct { 191 uint16_t length; /* Data buffer length */ 192 uint8_t typ_len_ext; 193 uint8_t cmd; 194 } flags; 195 } lower; 196 union { 197 uint32_t data; 198 struct { 199 uint8_t status; /* Descriptor status */ 200 uint8_t popts; /* Packet Options */ 201 uint16_t special; 202 } fields; 203 } upper; 204 }; 205 206 union e1000_tx_udesc { 207 struct e1000_tx_desc td; 208 struct e1000_context_desc cd; 209 struct e1000_data_desc dd; 210 }; 211 212 /* Tx checksum info for a packet. */ 213 struct ck_info { 214 int ck_valid; /* ck_info is valid */ 215 uint8_t ck_start; /* start byte of cksum calcuation */ 216 uint8_t ck_off; /* offset of cksum insertion */ 217 uint16_t ck_len; /* length of cksum calc: 0 is to packet-end */ 218 }; 219 220 /* 221 * Debug printf 222 */ 223 static int e82545_debug = 0; 224 #define DPRINTF(msg,params...) if (e82545_debug) fprintf(stderr, "e82545: " msg, params) 225 #define WPRINTF(msg,params...) fprintf(stderr, "e82545: " msg, params) 226 227 #define MIN(a,b) (((a)<(b))?(a):(b)) 228 #define MAX(a,b) (((a)>(b))?(a):(b)) 229 230 /* s/w representation of the RAL/RAH regs */ 231 struct eth_uni { 232 int eu_valid; 233 int eu_addrsel; 234 struct ether_addr eu_eth; 235 }; 236 237 238 struct e82545_softc { 239 struct pci_devinst *esc_pi; 240 struct vmctx *esc_ctx; 241 struct mevent *esc_mevp; 242 struct mevent *esc_mevpitr; 243 pthread_mutex_t esc_mtx; 244 struct ether_addr esc_mac; 245 int esc_tapfd; 246 247 /* General */ 248 uint32_t esc_CTRL; /* x0000 device ctl */ 249 uint32_t esc_FCAL; /* x0028 flow ctl addr lo */ 250 uint32_t esc_FCAH; /* x002C flow ctl addr hi */ 251 uint32_t esc_FCT; /* x0030 flow ctl type */ 252 uint32_t esc_VET; /* x0038 VLAN eth type */ 253 uint32_t esc_FCTTV; /* x0170 flow ctl tx timer */ 254 uint32_t esc_LEDCTL; /* x0E00 LED control */ 255 uint32_t esc_PBA; /* x1000 pkt buffer allocation */ 256 257 /* Interrupt control */ 258 int esc_irq_asserted; 259 uint32_t esc_ICR; /* x00C0 cause read/clear */ 260 uint32_t esc_ITR; /* x00C4 intr throttling */ 261 uint32_t esc_ICS; /* x00C8 cause set */ 262 uint32_t esc_IMS; /* x00D0 mask set/read */ 263 uint32_t esc_IMC; /* x00D8 mask clear */ 264 265 /* Transmit */ 266 union e1000_tx_udesc *esc_txdesc; 267 struct e1000_context_desc esc_txctx; 268 pthread_t esc_tx_tid; 269 pthread_cond_t esc_tx_cond; 270 int esc_tx_enabled; 271 int esc_tx_active; 272 uint32_t esc_TXCW; /* x0178 transmit config */ 273 uint32_t esc_TCTL; /* x0400 transmit ctl */ 274 uint32_t esc_TIPG; /* x0410 inter-packet gap */ 275 uint16_t esc_AIT; /* x0458 Adaptive Interframe Throttle */ 276 uint64_t esc_tdba; /* verified 64-bit desc table addr */ 277 uint32_t esc_TDBAL; /* x3800 desc table addr, low bits */ 278 uint32_t esc_TDBAH; /* x3804 desc table addr, hi 32-bits */ 279 uint32_t esc_TDLEN; /* x3808 # descriptors in bytes */ 280 uint16_t esc_TDH; /* x3810 desc table head idx */ 281 uint16_t esc_TDHr; /* internal read version of TDH */ 282 uint16_t esc_TDT; /* x3818 desc table tail idx */ 283 uint32_t esc_TIDV; /* x3820 intr delay */ 284 uint32_t esc_TXDCTL; /* x3828 desc control */ 285 uint32_t esc_TADV; /* x382C intr absolute delay */ 286 287 /* L2 frame acceptance */ 288 struct eth_uni esc_uni[16]; /* 16 x unicast MAC addresses */ 289 uint32_t esc_fmcast[128]; /* Multicast filter bit-match */ 290 uint32_t esc_fvlan[128]; /* VLAN 4096-bit filter */ 291 292 /* Receive */ 293 struct e1000_rx_desc *esc_rxdesc; 294 pthread_cond_t esc_rx_cond; 295 int esc_rx_enabled; 296 int esc_rx_active; 297 int esc_rx_loopback; 298 uint32_t esc_RCTL; /* x0100 receive ctl */ 299 uint32_t esc_FCRTL; /* x2160 flow cntl thresh, low */ 300 uint32_t esc_FCRTH; /* x2168 flow cntl thresh, hi */ 301 uint64_t esc_rdba; /* verified 64-bit desc table addr */ 302 uint32_t esc_RDBAL; /* x2800 desc table addr, low bits */ 303 uint32_t esc_RDBAH; /* x2804 desc table addr, hi 32-bits*/ 304 uint32_t esc_RDLEN; /* x2808 #descriptors */ 305 uint16_t esc_RDH; /* x2810 desc table head idx */ 306 uint16_t esc_RDT; /* x2818 desc table tail idx */ 307 uint32_t esc_RDTR; /* x2820 intr delay */ 308 uint32_t esc_RXDCTL; /* x2828 desc control */ 309 uint32_t esc_RADV; /* x282C intr absolute delay */ 310 uint32_t esc_RSRPD; /* x2C00 recv small packet detect */ 311 uint32_t esc_RXCSUM; /* x5000 receive cksum ctl */ 312 313 /* IO Port register access */ 314 uint32_t io_addr; 315 316 /* Shadow copy of MDIC */ 317 uint32_t mdi_control; 318 /* Shadow copy of EECD */ 319 uint32_t eeprom_control; 320 /* Latest NVM in/out */ 321 uint16_t nvm_data; 322 uint16_t nvm_opaddr; 323 /* stats */ 324 uint32_t missed_pkt_count; /* dropped for no room in rx queue */ 325 uint32_t pkt_rx_by_size[6]; 326 uint32_t pkt_tx_by_size[6]; 327 uint32_t good_pkt_rx_count; 328 uint32_t bcast_pkt_rx_count; 329 uint32_t mcast_pkt_rx_count; 330 uint32_t good_pkt_tx_count; 331 uint32_t bcast_pkt_tx_count; 332 uint32_t mcast_pkt_tx_count; 333 uint32_t oversize_rx_count; 334 uint32_t tso_tx_count; 335 uint64_t good_octets_rx; 336 uint64_t good_octets_tx; 337 uint64_t missed_octets; /* counts missed and oversized */ 338 339 uint8_t nvm_bits:6; /* number of bits remaining in/out */ 340 uint8_t nvm_mode:2; 341 #define E82545_NVM_MODE_OPADDR 0x0 342 #define E82545_NVM_MODE_DATAIN 0x1 343 #define E82545_NVM_MODE_DATAOUT 0x2 344 /* EEPROM data */ 345 uint16_t eeprom_data[E82545_NVM_EEPROM_SIZE]; 346 }; 347 348 static void e82545_reset(struct e82545_softc *sc, int dev); 349 static void e82545_rx_enable(struct e82545_softc *sc); 350 static void e82545_rx_disable(struct e82545_softc *sc); 351 static void e82545_tap_callback(int fd, enum ev_type type, void *param); 352 static void e82545_tx_start(struct e82545_softc *sc); 353 static void e82545_tx_enable(struct e82545_softc *sc); 354 static void e82545_tx_disable(struct e82545_softc *sc); 355 356 static inline int 357 e82545_size_stat_index(uint32_t size) 358 { 359 if (size <= 64) { 360 return 0; 361 } else if (size >= 1024) { 362 return 5; 363 } else { 364 /* should be 1-4 */ 365 return (ffs(size) - 6); 366 } 367 } 368 369 static void 370 e82545_init_eeprom(struct e82545_softc *sc) 371 { 372 uint16_t checksum, i; 373 374 /* mac addr */ 375 sc->eeprom_data[NVM_MAC_ADDR] = ((uint16_t)sc->esc_mac.octet[0]) | 376 (((uint16_t)sc->esc_mac.octet[1]) << 8); 377 sc->eeprom_data[NVM_MAC_ADDR+1] = ((uint16_t)sc->esc_mac.octet[2]) | 378 (((uint16_t)sc->esc_mac.octet[3]) << 8); 379 sc->eeprom_data[NVM_MAC_ADDR+2] = ((uint16_t)sc->esc_mac.octet[4]) | 380 (((uint16_t)sc->esc_mac.octet[5]) << 8); 381 382 /* pci ids */ 383 sc->eeprom_data[NVM_SUB_DEV_ID] = E82545_SUBDEV_ID; 384 sc->eeprom_data[NVM_SUB_VEN_ID] = E82545_VENDOR_ID_INTEL; 385 sc->eeprom_data[NVM_DEV_ID] = E82545_DEV_ID_82545EM_COPPER; 386 sc->eeprom_data[NVM_VEN_ID] = E82545_VENDOR_ID_INTEL; 387 388 /* fill in the checksum */ 389 checksum = 0; 390 for (i = 0; i < NVM_CHECKSUM_REG; i++) { 391 checksum += sc->eeprom_data[i]; 392 } 393 checksum = NVM_SUM - checksum; 394 sc->eeprom_data[NVM_CHECKSUM_REG] = checksum; 395 DPRINTF("eeprom checksum: 0x%x\r\n", checksum); 396 } 397 398 static void 399 e82545_write_mdi(struct e82545_softc *sc, uint8_t reg_addr, 400 uint8_t phy_addr, uint32_t data) 401 { 402 DPRINTF("Write mdi reg:0x%x phy:0x%x data: 0x%x\r\n", reg_addr, phy_addr, data); 403 } 404 405 static uint32_t 406 e82545_read_mdi(struct e82545_softc *sc, uint8_t reg_addr, 407 uint8_t phy_addr) 408 { 409 //DPRINTF("Read mdi reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); 410 switch (reg_addr) { 411 case PHY_STATUS: 412 return (MII_SR_LINK_STATUS | MII_SR_AUTONEG_CAPS | 413 MII_SR_AUTONEG_COMPLETE); 414 case PHY_AUTONEG_ADV: 415 return NWAY_AR_SELECTOR_FIELD; 416 case PHY_LP_ABILITY: 417 return 0; 418 case PHY_1000T_STATUS: 419 return (SR_1000T_LP_FD_CAPS | SR_1000T_REMOTE_RX_STATUS | 420 SR_1000T_LOCAL_RX_STATUS); 421 case PHY_ID1: 422 return (M88E1011_I_PHY_ID >> 16) & 0xFFFF; 423 case PHY_ID2: 424 return (M88E1011_I_PHY_ID | E82545_REVISION_4) & 0xFFFF; 425 default: 426 DPRINTF("Unknown mdi read reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); 427 return 0; 428 } 429 /* not reached */ 430 } 431 432 static void 433 e82545_eecd_strobe(struct e82545_softc *sc) 434 { 435 /* Microwire state machine */ 436 /* 437 DPRINTF("eeprom state machine srtobe " 438 "0x%x 0x%x 0x%x 0x%x\r\n", 439 sc->nvm_mode, sc->nvm_bits, 440 sc->nvm_opaddr, sc->nvm_data);*/ 441 442 if (sc->nvm_bits == 0) { 443 DPRINTF("eeprom state machine not expecting data! " 444 "0x%x 0x%x 0x%x 0x%x\r\n", 445 sc->nvm_mode, sc->nvm_bits, 446 sc->nvm_opaddr, sc->nvm_data); 447 return; 448 } 449 sc->nvm_bits--; 450 if (sc->nvm_mode == E82545_NVM_MODE_DATAOUT) { 451 /* shifting out */ 452 if (sc->nvm_data & 0x8000) { 453 sc->eeprom_control |= E1000_EECD_DO; 454 } else { 455 sc->eeprom_control &= ~E1000_EECD_DO; 456 } 457 sc->nvm_data <<= 1; 458 if (sc->nvm_bits == 0) { 459 /* read done, back to opcode mode. */ 460 sc->nvm_opaddr = 0; 461 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 462 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 463 } 464 } else if (sc->nvm_mode == E82545_NVM_MODE_DATAIN) { 465 /* shifting in */ 466 sc->nvm_data <<= 1; 467 if (sc->eeprom_control & E1000_EECD_DI) { 468 sc->nvm_data |= 1; 469 } 470 if (sc->nvm_bits == 0) { 471 /* eeprom write */ 472 uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; 473 uint16_t addr = sc->nvm_opaddr & E82545_NVM_ADDR_MASK; 474 if (op != E82545_NVM_OPCODE_WRITE) { 475 DPRINTF("Illegal eeprom write op 0x%x\r\n", 476 sc->nvm_opaddr); 477 } else if (addr >= E82545_NVM_EEPROM_SIZE) { 478 DPRINTF("Illegal eeprom write addr 0x%x\r\n", 479 sc->nvm_opaddr); 480 } else { 481 DPRINTF("eeprom write eeprom[0x%x] = 0x%x\r\n", 482 addr, sc->nvm_data); 483 sc->eeprom_data[addr] = sc->nvm_data; 484 } 485 /* back to opcode mode */ 486 sc->nvm_opaddr = 0; 487 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 488 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 489 } 490 } else if (sc->nvm_mode == E82545_NVM_MODE_OPADDR) { 491 sc->nvm_opaddr <<= 1; 492 if (sc->eeprom_control & E1000_EECD_DI) { 493 sc->nvm_opaddr |= 1; 494 } 495 if (sc->nvm_bits == 0) { 496 uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; 497 switch (op) { 498 case E82545_NVM_OPCODE_EWEN: 499 DPRINTF("eeprom write enable: 0x%x\r\n", 500 sc->nvm_opaddr); 501 /* back to opcode mode */ 502 sc->nvm_opaddr = 0; 503 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 504 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 505 break; 506 case E82545_NVM_OPCODE_READ: 507 { 508 uint16_t addr = sc->nvm_opaddr & 509 E82545_NVM_ADDR_MASK; 510 sc->nvm_mode = E82545_NVM_MODE_DATAOUT; 511 sc->nvm_bits = E82545_NVM_DATA_BITS; 512 if (addr < E82545_NVM_EEPROM_SIZE) { 513 sc->nvm_data = sc->eeprom_data[addr]; 514 DPRINTF("eeprom read: eeprom[0x%x] = 0x%x\r\n", 515 addr, sc->nvm_data); 516 } else { 517 DPRINTF("eeprom illegal read: 0x%x\r\n", 518 sc->nvm_opaddr); 519 sc->nvm_data = 0; 520 } 521 break; 522 } 523 case E82545_NVM_OPCODE_WRITE: 524 sc->nvm_mode = E82545_NVM_MODE_DATAIN; 525 sc->nvm_bits = E82545_NVM_DATA_BITS; 526 sc->nvm_data = 0; 527 break; 528 default: 529 DPRINTF("eeprom unknown op: 0x%x\r\r", 530 sc->nvm_opaddr); 531 /* back to opcode mode */ 532 sc->nvm_opaddr = 0; 533 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 534 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 535 } 536 } 537 } else { 538 DPRINTF("eeprom state machine wrong state! " 539 "0x%x 0x%x 0x%x 0x%x\r\n", 540 sc->nvm_mode, sc->nvm_bits, 541 sc->nvm_opaddr, sc->nvm_data); 542 } 543 } 544 545 static void 546 e82545_itr_callback(int fd, enum ev_type type, void *param) 547 { 548 uint32_t new; 549 struct e82545_softc *sc = param; 550 551 pthread_mutex_lock(&sc->esc_mtx); 552 new = sc->esc_ICR & sc->esc_IMS; 553 if (new && !sc->esc_irq_asserted) { 554 DPRINTF("itr callback: lintr assert %x\r\n", new); 555 sc->esc_irq_asserted = 1; 556 pci_lintr_assert(sc->esc_pi); 557 } else { 558 mevent_delete(sc->esc_mevpitr); 559 sc->esc_mevpitr = NULL; 560 } 561 pthread_mutex_unlock(&sc->esc_mtx); 562 } 563 564 static void 565 e82545_icr_assert(struct e82545_softc *sc, uint32_t bits) 566 { 567 uint32_t new; 568 569 DPRINTF("icr assert: 0x%x\r\n", bits); 570 571 /* 572 * An interrupt is only generated if bits are set that 573 * aren't already in the ICR, these bits are unmasked, 574 * and there isn't an interrupt already pending. 575 */ 576 new = bits & ~sc->esc_ICR & sc->esc_IMS; 577 sc->esc_ICR |= bits; 578 579 if (new == 0) { 580 DPRINTF("icr assert: masked %x, ims %x\r\n", new, sc->esc_IMS); 581 } else if (sc->esc_mevpitr != NULL) { 582 DPRINTF("icr assert: throttled %x, ims %x\r\n", new, sc->esc_IMS); 583 } else if (!sc->esc_irq_asserted) { 584 DPRINTF("icr assert: lintr assert %x\r\n", new); 585 sc->esc_irq_asserted = 1; 586 pci_lintr_assert(sc->esc_pi); 587 if (sc->esc_ITR != 0) { 588 sc->esc_mevpitr = mevent_add( 589 (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ 590 EVF_TIMER, e82545_itr_callback, sc); 591 } 592 } 593 } 594 595 static void 596 e82545_ims_change(struct e82545_softc *sc, uint32_t bits) 597 { 598 uint32_t new; 599 600 /* 601 * Changing the mask may allow previously asserted 602 * but masked interrupt requests to generate an interrupt. 603 */ 604 new = bits & sc->esc_ICR & ~sc->esc_IMS; 605 sc->esc_IMS |= bits; 606 607 if (new == 0) { 608 DPRINTF("ims change: masked %x, ims %x\r\n", new, sc->esc_IMS); 609 } else if (sc->esc_mevpitr != NULL) { 610 DPRINTF("ims change: throttled %x, ims %x\r\n", new, sc->esc_IMS); 611 } else if (!sc->esc_irq_asserted) { 612 DPRINTF("ims change: lintr assert %x\n\r", new); 613 sc->esc_irq_asserted = 1; 614 pci_lintr_assert(sc->esc_pi); 615 if (sc->esc_ITR != 0) { 616 sc->esc_mevpitr = mevent_add( 617 (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ 618 EVF_TIMER, e82545_itr_callback, sc); 619 } 620 } 621 } 622 623 static void 624 e82545_icr_deassert(struct e82545_softc *sc, uint32_t bits) 625 { 626 627 DPRINTF("icr deassert: 0x%x\r\n", bits); 628 sc->esc_ICR &= ~bits; 629 630 /* 631 * If there are no longer any interrupt sources and there 632 * was an asserted interrupt, clear it 633 */ 634 if (sc->esc_irq_asserted && !(sc->esc_ICR & sc->esc_IMS)) { 635 DPRINTF("icr deassert: lintr deassert %x\r\n", bits); 636 pci_lintr_deassert(sc->esc_pi); 637 sc->esc_irq_asserted = 0; 638 } 639 } 640 641 static void 642 e82545_intr_write(struct e82545_softc *sc, uint32_t offset, uint32_t value) 643 { 644 645 DPRINTF("intr_write: off %x, val %x\n\r", offset, value); 646 647 switch (offset) { 648 case E1000_ICR: 649 e82545_icr_deassert(sc, value); 650 break; 651 case E1000_ITR: 652 sc->esc_ITR = value; 653 break; 654 case E1000_ICS: 655 sc->esc_ICS = value; /* not used: store for debug */ 656 e82545_icr_assert(sc, value); 657 break; 658 case E1000_IMS: 659 e82545_ims_change(sc, value); 660 break; 661 case E1000_IMC: 662 sc->esc_IMC = value; /* for debug */ 663 sc->esc_IMS &= ~value; 664 // XXX clear interrupts if all ICR bits now masked 665 // and interrupt was pending ? 666 break; 667 default: 668 break; 669 } 670 } 671 672 static uint32_t 673 e82545_intr_read(struct e82545_softc *sc, uint32_t offset) 674 { 675 uint32_t retval; 676 677 retval = 0; 678 679 DPRINTF("intr_read: off %x\n\r", offset); 680 681 switch (offset) { 682 case E1000_ICR: 683 retval = sc->esc_ICR; 684 sc->esc_ICR = 0; 685 e82545_icr_deassert(sc, ~0); 686 break; 687 case E1000_ITR: 688 retval = sc->esc_ITR; 689 break; 690 case E1000_ICS: 691 /* write-only register */ 692 break; 693 case E1000_IMS: 694 retval = sc->esc_IMS; 695 break; 696 case E1000_IMC: 697 /* write-only register */ 698 break; 699 default: 700 break; 701 } 702 703 return (retval); 704 } 705 706 static void 707 e82545_devctl(struct e82545_softc *sc, uint32_t val) 708 { 709 710 sc->esc_CTRL = val & ~E1000_CTRL_RST; 711 712 if (val & E1000_CTRL_RST) { 713 DPRINTF("e1k: s/w reset, ctl %x\n", val); 714 e82545_reset(sc, 1); 715 } 716 /* XXX check for phy reset ? */ 717 } 718 719 static void 720 e82545_rx_update_rdba(struct e82545_softc *sc) 721 { 722 723 /* XXX verify desc base/len within phys mem range */ 724 sc->esc_rdba = (uint64_t)sc->esc_RDBAH << 32 | 725 sc->esc_RDBAL; 726 727 /* Cache host mapping of guest descriptor array */ 728 sc->esc_rxdesc = paddr_guest2host(sc->esc_ctx, 729 sc->esc_rdba, sc->esc_RDLEN); 730 } 731 732 static void 733 e82545_rx_ctl(struct e82545_softc *sc, uint32_t val) 734 { 735 int on; 736 737 on = ((val & E1000_RCTL_EN) == E1000_RCTL_EN); 738 739 /* Save RCTL after stripping reserved bits 31:27,24,21,14,11:10,0 */ 740 sc->esc_RCTL = val & ~0xF9204c01; 741 742 DPRINTF("rx_ctl - %s RCTL %x, val %x\n", 743 on ? "on" : "off", sc->esc_RCTL, val); 744 745 /* state change requested */ 746 if (on != sc->esc_rx_enabled) { 747 if (on) { 748 /* Catch disallowed/unimplemented settings */ 749 //assert(!(val & E1000_RCTL_LBM_TCVR)); 750 751 if (sc->esc_RCTL & E1000_RCTL_LBM_TCVR) { 752 sc->esc_rx_loopback = 1; 753 } else { 754 sc->esc_rx_loopback = 0; 755 } 756 757 e82545_rx_update_rdba(sc); 758 e82545_rx_enable(sc); 759 } else { 760 e82545_rx_disable(sc); 761 sc->esc_rx_loopback = 0; 762 sc->esc_rdba = 0; 763 sc->esc_rxdesc = NULL; 764 } 765 } 766 } 767 768 static void 769 e82545_tx_update_tdba(struct e82545_softc *sc) 770 { 771 772 /* XXX verify desc base/len within phys mem range */ 773 sc->esc_tdba = (uint64_t)sc->esc_TDBAH << 32 | sc->esc_TDBAL; 774 775 /* Cache host mapping of guest descriptor array */ 776 sc->esc_txdesc = paddr_guest2host(sc->esc_ctx, sc->esc_tdba, 777 sc->esc_TDLEN); 778 } 779 780 static void 781 e82545_tx_ctl(struct e82545_softc *sc, uint32_t val) 782 { 783 int on; 784 785 on = ((val & E1000_TCTL_EN) == E1000_TCTL_EN); 786 787 /* ignore TCTL_EN settings that don't change state */ 788 if (on == sc->esc_tx_enabled) 789 return; 790 791 if (on) { 792 e82545_tx_update_tdba(sc); 793 e82545_tx_enable(sc); 794 } else { 795 e82545_tx_disable(sc); 796 sc->esc_tdba = 0; 797 sc->esc_txdesc = NULL; 798 } 799 800 /* Save TCTL value after stripping reserved bits 31:25,23,2,0 */ 801 sc->esc_TCTL = val & ~0xFE800005; 802 } 803 804 int 805 e82545_bufsz(uint32_t rctl) 806 { 807 808 switch (rctl & (E1000_RCTL_BSEX | E1000_RCTL_SZ_256)) { 809 case (E1000_RCTL_SZ_2048): return (2048); 810 case (E1000_RCTL_SZ_1024): return (1024); 811 case (E1000_RCTL_SZ_512): return (512); 812 case (E1000_RCTL_SZ_256): return (256); 813 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_16384): return (16384); 814 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_8192): return (8192); 815 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_4096): return (4096); 816 } 817 return (256); /* Forbidden value. */ 818 } 819 820 static uint8_t dummybuf[2048]; 821 822 /* XXX one packet at a time until this is debugged */ 823 static void 824 e82545_tap_callback(int fd, enum ev_type type, void *param) 825 { 826 struct e82545_softc *sc = param; 827 struct e1000_rx_desc *rxd; 828 struct iovec vec[64]; 829 int left, len, lim, maxpktsz, maxpktdesc, bufsz, i, n, size; 830 uint32_t cause = 0; 831 uint16_t *tp, tag, head; 832 833 pthread_mutex_lock(&sc->esc_mtx); 834 DPRINTF("rx_run: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); 835 836 if (!sc->esc_rx_enabled || sc->esc_rx_loopback) { 837 DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped\r\n", 838 sc->esc_rx_enabled, sc->esc_rx_loopback); 839 while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { 840 } 841 goto done1; 842 } 843 bufsz = e82545_bufsz(sc->esc_RCTL); 844 maxpktsz = (sc->esc_RCTL & E1000_RCTL_LPE) ? 16384 : 1522; 845 maxpktdesc = (maxpktsz + bufsz - 1) / bufsz; 846 size = sc->esc_RDLEN / 16; 847 head = sc->esc_RDH; 848 left = (size + sc->esc_RDT - head) % size; 849 if (left < maxpktdesc) { 850 DPRINTF("rx overflow (%d < %d) -- packet(s) dropped\r\n", 851 left, maxpktdesc); 852 while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { 853 } 854 goto done1; 855 } 856 857 sc->esc_rx_active = 1; 858 pthread_mutex_unlock(&sc->esc_mtx); 859 860 for (lim = size / 4; lim > 0 && left >= maxpktdesc; lim -= n) { 861 862 /* Grab rx descriptor pointed to by the head pointer */ 863 for (i = 0; i < maxpktdesc; i++) { 864 rxd = &sc->esc_rxdesc[(head + i) % size]; 865 vec[i].iov_base = paddr_guest2host(sc->esc_ctx, 866 rxd->buffer_addr, bufsz); 867 vec[i].iov_len = bufsz; 868 } 869 len = readv(sc->esc_tapfd, vec, maxpktdesc); 870 if (len <= 0) { 871 DPRINTF("tap: readv() returned %d\n", len); 872 goto done; 873 } 874 875 /* 876 * Adjust the packet length based on whether the CRC needs 877 * to be stripped or if the packet is less than the minimum 878 * eth packet size. 879 */ 880 if (len < ETHER_MIN_LEN - ETHER_CRC_LEN) 881 len = ETHER_MIN_LEN - ETHER_CRC_LEN; 882 if (!(sc->esc_RCTL & E1000_RCTL_SECRC)) 883 len += ETHER_CRC_LEN; 884 n = (len + bufsz - 1) / bufsz; 885 886 DPRINTF("packet read %d bytes, %d segs, head %d\r\n", 887 len, n, head); 888 889 /* Apply VLAN filter. */ 890 tp = (uint16_t *)vec[0].iov_base + 6; 891 if ((sc->esc_RCTL & E1000_RCTL_VFE) && 892 (ntohs(tp[0]) == sc->esc_VET)) { 893 tag = ntohs(tp[1]) & 0x0fff; 894 if ((sc->esc_fvlan[tag >> 5] & 895 (1 << (tag & 0x1f))) != 0) { 896 DPRINTF("known VLAN %d\r\n", tag); 897 } else { 898 DPRINTF("unknown VLAN %d\r\n", tag); 899 n = 0; 900 continue; 901 } 902 } 903 904 /* Update all consumed descriptors. */ 905 for (i = 0; i < n - 1; i++) { 906 rxd = &sc->esc_rxdesc[(head + i) % size]; 907 rxd->length = bufsz; 908 rxd->csum = 0; 909 rxd->errors = 0; 910 rxd->special = 0; 911 rxd->status = E1000_RXD_STAT_DD; 912 } 913 rxd = &sc->esc_rxdesc[(head + i) % size]; 914 rxd->length = len % bufsz; 915 rxd->csum = 0; 916 rxd->errors = 0; 917 rxd->special = 0; 918 /* XXX signal no checksum for now */ 919 rxd->status = E1000_RXD_STAT_PIF | E1000_RXD_STAT_IXSM | 920 E1000_RXD_STAT_EOP | E1000_RXD_STAT_DD; 921 922 /* Schedule receive interrupts. */ 923 if (len <= sc->esc_RSRPD) { 924 cause |= E1000_ICR_SRPD | E1000_ICR_RXT0; 925 } else { 926 /* XXX: RDRT and RADV timers should be here. */ 927 cause |= E1000_ICR_RXT0; 928 } 929 930 head = (head + n) % size; 931 left -= n; 932 } 933 934 done: 935 pthread_mutex_lock(&sc->esc_mtx); 936 sc->esc_rx_active = 0; 937 if (sc->esc_rx_enabled == 0) 938 pthread_cond_signal(&sc->esc_rx_cond); 939 940 sc->esc_RDH = head; 941 /* Respect E1000_RCTL_RDMTS */ 942 left = (size + sc->esc_RDT - head) % size; 943 if (left < (size >> (((sc->esc_RCTL >> 8) & 3) + 1))) 944 cause |= E1000_ICR_RXDMT0; 945 /* Assert all accumulated interrupts. */ 946 if (cause != 0) 947 e82545_icr_assert(sc, cause); 948 done1: 949 DPRINTF("rx_run done: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); 950 pthread_mutex_unlock(&sc->esc_mtx); 951 } 952 953 static uint16_t 954 e82545_carry(uint32_t sum) 955 { 956 957 sum = (sum & 0xFFFF) + (sum >> 16); 958 if (sum > 0xFFFF) 959 sum -= 0xFFFF; 960 return (sum); 961 } 962 963 static uint16_t 964 e82545_buf_checksum(uint8_t *buf, int len) 965 { 966 int i; 967 uint32_t sum = 0; 968 969 /* Checksum all the pairs of bytes first... */ 970 for (i = 0; i < (len & ~1U); i += 2) 971 sum += *((u_int16_t *)(buf + i)); 972 973 /* 974 * If there's a single byte left over, checksum it, too. 975 * Network byte order is big-endian, so the remaining byte is 976 * the high byte. 977 */ 978 if (i < len) 979 sum += htons(buf[i] << 8); 980 981 return (e82545_carry(sum)); 982 } 983 984 static uint16_t 985 e82545_iov_checksum(struct iovec *iov, int iovcnt, int off, int len) 986 { 987 int now, odd; 988 uint32_t sum = 0, s; 989 990 /* Skip completely unneeded vectors. */ 991 while (iovcnt > 0 && iov->iov_len <= off && off > 0) { 992 off -= iov->iov_len; 993 iov++; 994 iovcnt--; 995 } 996 997 /* Calculate checksum of requested range. */ 998 odd = 0; 999 while (len > 0 && iovcnt > 0) { 1000 now = MIN(len, iov->iov_len - off); 1001 s = e82545_buf_checksum(iov->iov_base + off, now); 1002 sum += odd ? (s << 8) : s; 1003 odd ^= (now & 1); 1004 len -= now; 1005 off = 0; 1006 iov++; 1007 iovcnt--; 1008 } 1009 1010 return (e82545_carry(sum)); 1011 } 1012 1013 /* 1014 * Return the transmit descriptor type. 1015 */ 1016 int 1017 e82545_txdesc_type(uint32_t lower) 1018 { 1019 int type; 1020 1021 type = 0; 1022 1023 if (lower & E1000_TXD_CMD_DEXT) 1024 type = lower & E1000_TXD_MASK; 1025 1026 return (type); 1027 } 1028 1029 static void 1030 e82545_transmit_checksum(struct iovec *iov, int iovcnt, struct ck_info *ck) 1031 { 1032 uint16_t cksum; 1033 int cklen; 1034 1035 DPRINTF("tx cksum: iovcnt/s/off/len %d/%d/%d/%d\r\n", 1036 iovcnt, ck->ck_start, ck->ck_off, ck->ck_len); 1037 cklen = ck->ck_len ? ck->ck_len - ck->ck_start + 1 : INT_MAX; 1038 cksum = e82545_iov_checksum(iov, iovcnt, ck->ck_start, cklen); 1039 *(uint16_t *)((uint8_t *)iov[0].iov_base + ck->ck_off) = ~cksum; 1040 } 1041 1042 static void 1043 e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt) 1044 { 1045 1046 if (sc->esc_tapfd == -1) 1047 return; 1048 1049 (void) writev(sc->esc_tapfd, iov, iovcnt); 1050 } 1051 1052 static void 1053 e82545_transmit_done(struct e82545_softc *sc, union e1000_tx_udesc **txwb, 1054 int nwb) 1055 { 1056 int i; 1057 1058 /* Write-back tx descriptor status */ 1059 for (i = 0; i < nwb; i++) 1060 txwb[i]->td.upper.data |= E1000_TXD_STAT_DD; 1061 /* XXX wmb() */ 1062 } 1063 1064 static int 1065 e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, 1066 uint16_t dsize, uint16_t *rhead, int *tdwb) 1067 { 1068 uint8_t *hdr, *hdrp; 1069 struct iovec iovb[I82545_MAX_TXSEGS + 2]; 1070 struct iovec tiov[I82545_MAX_TXSEGS + 2]; 1071 union e1000_tx_udesc *txwb[I82545_MAX_TXSEGS]; 1072 struct e1000_context_desc *cd; 1073 struct ck_info ckinfo[2]; 1074 struct iovec *iov; 1075 union e1000_tx_udesc *dsc; 1076 int desc, dtype, len, ntype, nwb, iovcnt, tlen, hdrlen, vlen, tcp, tso; 1077 int mss, paylen, seg, tiovcnt, left, now, nleft, nnow, pv, pvoff; 1078 uint32_t tcpsum, tcpseq; 1079 uint16_t ipcs, tcpcs, ipid; 1080 1081 ckinfo[0].ck_valid = ckinfo[1].ck_valid = 0; 1082 iovcnt = 0; 1083 tlen = 0; 1084 nwb = 0; 1085 ntype = 0; 1086 tso = 0; 1087 1088 /* iovb[0/1] may be used for writable copy of headers. */ 1089 iov = &iovb[2]; 1090 1091 for (desc = 0; ; desc++, head = (head + 1) % dsize) { 1092 if (head == tail) { 1093 *rhead = head; 1094 return (0); 1095 } 1096 dsc = &sc->esc_txdesc[head]; 1097 dtype = e82545_txdesc_type(dsc->td.lower.data); 1098 1099 if (desc == 0) { 1100 switch (dtype) { 1101 case E1000_TXD_TYP_C: 1102 DPRINTF("tx ctxt desc idx %d: %016jx " 1103 "%08x%08x\r\n", 1104 head, dsc->td.buffer_addr, 1105 dsc->td.upper.data, dsc->td.lower.data); 1106 /* Save context and return */ 1107 /* XXX ignore DD processing here */ 1108 sc->esc_txctx = dsc->cd; 1109 *rhead = (head + 1) % dsize; 1110 return (1); 1111 break; 1112 case E1000_TXD_TYP_L: 1113 DPRINTF("tx legacy desc idx %d: %08x%08x\r\n", 1114 head, dsc->td.upper.data, dsc->td.lower.data); 1115 /* 1116 * legacy cksum start valid in first descriptor 1117 */ 1118 ntype = dtype; 1119 ckinfo[0].ck_start = dsc->td.upper.fields.css; 1120 break; 1121 case E1000_TXD_TYP_D: 1122 DPRINTF("tx data desc idx %d: %08x%08x\r\n", 1123 head, dsc->td.upper.data, dsc->td.lower.data); 1124 ntype = dtype; 1125 break; 1126 default: 1127 break; 1128 } 1129 } else { 1130 /* Descriptor type must be consistent */ 1131 assert(dtype == ntype); 1132 DPRINTF("tx next desc idx %d: %08x%08x\r\n", 1133 head, dsc->td.upper.data, dsc->td.lower.data); 1134 } 1135 1136 len = (dtype == E1000_TXD_TYP_L) ? dsc->td.lower.flags.length : 1137 dsc->dd.lower.data & 0xFFFFF; 1138 1139 if (len > 0) { 1140 /* Strip checksum supplied by guest. */ 1141 if ((dsc->td.lower.data & E1000_TXD_CMD_EOP) != 0 && 1142 (dsc->td.lower.data & E1000_TXD_CMD_IFCS) == 0) 1143 len -= 2; 1144 tlen += len; 1145 iov[iovcnt].iov_base = paddr_guest2host(sc->esc_ctx, 1146 dsc->td.buffer_addr, len); 1147 iov[iovcnt].iov_len = len; 1148 iovcnt++; 1149 } 1150 1151 /* Record the descriptor addres if write-back requested */ 1152 if (dsc->td.lower.data & E1000_TXD_CMD_RS) 1153 txwb[nwb++] = dsc; 1154 1155 /* 1156 * Pull out info that is valid in the final descriptor 1157 * and exit descriptor loop. 1158 */ 1159 if (dsc->td.lower.data & E1000_TXD_CMD_EOP) { 1160 if (dtype == E1000_TXD_TYP_L) { 1161 if (dsc->td.lower.data & E1000_TXD_CMD_IC) { 1162 ckinfo[0].ck_valid = 1; 1163 ckinfo[0].ck_off = 1164 dsc->td.lower.flags.cso; 1165 ckinfo[0].ck_len = 0; 1166 } 1167 } else { 1168 cd = &sc->esc_txctx; 1169 if (dsc->dd.lower.data & E1000_TXD_CMD_TSE) 1170 tso = 1; 1171 if (dsc->dd.upper.fields.popts & 1172 E1000_TXD_POPTS_IXSM) 1173 ckinfo[0].ck_valid = 1; 1174 if (dsc->dd.upper.fields.popts & 1175 E1000_TXD_POPTS_IXSM || tso) { 1176 ckinfo[0].ck_start = 1177 cd->lower_setup.ip_fields.ipcss; 1178 ckinfo[0].ck_off = 1179 cd->lower_setup.ip_fields.ipcso; 1180 ckinfo[0].ck_len = 1181 cd->lower_setup.ip_fields.ipcse; 1182 } 1183 if (dsc->dd.upper.fields.popts & 1184 E1000_TXD_POPTS_TXSM) 1185 ckinfo[1].ck_valid = 1; 1186 if (dsc->dd.upper.fields.popts & 1187 E1000_TXD_POPTS_TXSM || tso) { 1188 ckinfo[1].ck_start = 1189 cd->upper_setup.tcp_fields.tucss; 1190 ckinfo[1].ck_off = 1191 cd->upper_setup.tcp_fields.tucso; 1192 ckinfo[1].ck_len = 1193 cd->upper_setup.tcp_fields.tucse; 1194 } 1195 } 1196 break; 1197 } 1198 } 1199 1200 hdrlen = vlen = 0; 1201 /* Estimate writable space for VLAN header insertion. */ 1202 if ((sc->esc_CTRL & E1000_CTRL_VME) && 1203 (dsc->td.lower.data & E1000_TXD_CMD_VLE)) { 1204 hdrlen = ETHER_ADDR_LEN*2; 1205 vlen = ETHER_VLAN_ENCAP_LEN; 1206 } 1207 if (!tso) { 1208 /* Estimate required writable space for checksums. */ 1209 if (ckinfo[0].ck_valid) 1210 hdrlen = MAX(hdrlen, ckinfo[0].ck_off + 2); 1211 if (ckinfo[1].ck_valid) 1212 hdrlen = MAX(hdrlen, ckinfo[1].ck_off + 2); 1213 /* Round up writable space to the first vector. */ 1214 if (hdrlen != 0 && iov[0].iov_len > hdrlen && 1215 iov[0].iov_len < hdrlen + 100) 1216 hdrlen = iov[0].iov_len; 1217 } else { 1218 /* In case of TSO header length provided by software. */ 1219 hdrlen = sc->esc_txctx.tcp_seg_setup.fields.hdr_len; 1220 } 1221 1222 /* Allocate, fill and prepend writable header vector. */ 1223 if (hdrlen != 0) { 1224 hdr = __builtin_alloca(hdrlen + vlen); 1225 hdr += vlen; 1226 for (left = hdrlen, hdrp = hdr; left > 0; 1227 left -= now, hdrp += now) { 1228 now = MIN(left, iov->iov_len); 1229 memcpy(hdrp, iov->iov_base, now); 1230 iov->iov_base += now; 1231 iov->iov_len -= now; 1232 if (iov->iov_len == 0) { 1233 iov++; 1234 iovcnt--; 1235 } 1236 } 1237 iov--; 1238 iovcnt++; 1239 iov->iov_base = hdr; 1240 iov->iov_len = hdrlen; 1241 } 1242 1243 /* Insert VLAN tag. */ 1244 if (vlen != 0) { 1245 hdr -= ETHER_VLAN_ENCAP_LEN; 1246 memmove(hdr, hdr + ETHER_VLAN_ENCAP_LEN, ETHER_ADDR_LEN*2); 1247 hdrlen += ETHER_VLAN_ENCAP_LEN; 1248 hdr[ETHER_ADDR_LEN*2 + 0] = sc->esc_VET >> 8; 1249 hdr[ETHER_ADDR_LEN*2 + 1] = sc->esc_VET & 0xff; 1250 hdr[ETHER_ADDR_LEN*2 + 2] = dsc->td.upper.fields.special >> 8; 1251 hdr[ETHER_ADDR_LEN*2 + 3] = dsc->td.upper.fields.special & 0xff; 1252 iov->iov_base = hdr; 1253 iov->iov_len += ETHER_VLAN_ENCAP_LEN; 1254 /* Correct checksum offsets after VLAN tag insertion. */ 1255 ckinfo[0].ck_start += ETHER_VLAN_ENCAP_LEN; 1256 ckinfo[0].ck_off += ETHER_VLAN_ENCAP_LEN; 1257 if (ckinfo[0].ck_len != 0) 1258 ckinfo[0].ck_len += ETHER_VLAN_ENCAP_LEN; 1259 ckinfo[1].ck_start += ETHER_VLAN_ENCAP_LEN; 1260 ckinfo[1].ck_off += ETHER_VLAN_ENCAP_LEN; 1261 if (ckinfo[1].ck_len != 0) 1262 ckinfo[1].ck_len += ETHER_VLAN_ENCAP_LEN; 1263 } 1264 1265 /* Simple non-TSO case. */ 1266 if (!tso) { 1267 /* Calculate checksums and transmit. */ 1268 if (ckinfo[0].ck_valid) 1269 e82545_transmit_checksum(iov, iovcnt, &ckinfo[0]); 1270 if (ckinfo[1].ck_valid) 1271 e82545_transmit_checksum(iov, iovcnt, &ckinfo[1]); 1272 e82545_transmit_backend(sc, iov, iovcnt); 1273 goto done; 1274 } 1275 1276 /* Doing TSO. */ 1277 tcp = (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_TCP) != 0; 1278 mss = sc->esc_txctx.tcp_seg_setup.fields.mss; 1279 paylen = (sc->esc_txctx.cmd_and_length & 0x000fffff); 1280 DPRINTF("tx %s segmentation offload %d+%d/%d bytes %d iovs\r\n", 1281 tcp ? "TCP" : "UDP", hdrlen, paylen, mss, iovcnt); 1282 ipid = ntohs(*(uint16_t *)&hdr[ckinfo[0].ck_start + 4]); 1283 tcpseq = ntohl(*(uint32_t *)&hdr[ckinfo[1].ck_start + 4]); 1284 ipcs = *(uint16_t *)&hdr[ckinfo[0].ck_off]; 1285 tcpcs = 0; 1286 if (ckinfo[1].ck_valid) /* Save partial pseudo-header checksum. */ 1287 tcpcs = *(uint16_t *)&hdr[ckinfo[1].ck_off]; 1288 pv = 1; 1289 pvoff = 0; 1290 for (seg = 0, left = paylen; left > 0; seg++, left -= now) { 1291 now = MIN(left, mss); 1292 1293 /* Construct IOVs for the segment. */ 1294 /* Include whole original header. */ 1295 tiov[0].iov_base = hdr; 1296 tiov[0].iov_len = hdrlen; 1297 tiovcnt = 1; 1298 /* Include respective part of payload IOV. */ 1299 for (nleft = now; pv < iovcnt && nleft > 0; nleft -= nnow) { 1300 nnow = MIN(nleft, iov[pv].iov_len - pvoff); 1301 tiov[tiovcnt].iov_base = iov[pv].iov_base + pvoff; 1302 tiov[tiovcnt++].iov_len = nnow; 1303 if (pvoff + nnow == iov[pv].iov_len) { 1304 pv++; 1305 pvoff = 0; 1306 } else 1307 pvoff += nnow; 1308 } 1309 DPRINTF("tx segment %d %d+%d bytes %d iovs\r\n", 1310 seg, hdrlen, now, tiovcnt); 1311 1312 /* Update IP header. */ 1313 if (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_IP) { 1314 /* IPv4 -- set length and ID */ 1315 *(uint16_t *)&hdr[ckinfo[0].ck_start + 2] = 1316 htons(hdrlen - ckinfo[0].ck_start + now); 1317 *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] = 1318 htons(ipid + seg); 1319 } else { 1320 /* IPv6 -- set length */ 1321 *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] = 1322 htons(hdrlen - ckinfo[0].ck_start - 40 + 1323 now); 1324 } 1325 1326 /* Update pseudo-header checksum. */ 1327 tcpsum = tcpcs; 1328 tcpsum += htons(hdrlen - ckinfo[1].ck_start + now); 1329 1330 /* Update TCP/UDP headers. */ 1331 if (tcp) { 1332 /* Update sequence number and FIN/PUSH flags. */ 1333 *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] = 1334 htonl(tcpseq + paylen - left); 1335 if (now < left) { 1336 hdr[ckinfo[1].ck_start + 13] &= 1337 ~(TH_FIN | TH_PUSH); 1338 } 1339 } else { 1340 /* Update payload length. */ 1341 *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] = 1342 hdrlen - ckinfo[1].ck_start + now; 1343 } 1344 1345 /* Calculate checksums and transmit. */ 1346 if (ckinfo[0].ck_valid) { 1347 *(uint16_t *)&hdr[ckinfo[0].ck_off] = ipcs; 1348 e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[0]); 1349 } 1350 if (ckinfo[1].ck_valid) { 1351 *(uint16_t *)&hdr[ckinfo[1].ck_off] = 1352 e82545_carry(tcpsum); 1353 e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[1]); 1354 } 1355 e82545_transmit_backend(sc, tiov, tiovcnt); 1356 } 1357 1358 done: 1359 /* Record if tx descs were written back */ 1360 e82545_transmit_done(sc, txwb, nwb); 1361 if (nwb) 1362 *tdwb = 1; 1363 1364 *rhead = (head + 1) % dsize; 1365 return (desc + 1); 1366 } 1367 1368 static void 1369 e82545_tx_run(struct e82545_softc *sc) 1370 { 1371 uint32_t cause; 1372 uint16_t head, rhead, tail, size; 1373 int lim, tdwb, sent; 1374 1375 head = sc->esc_TDH; 1376 tail = sc->esc_TDT; 1377 size = sc->esc_TDLEN / 16; 1378 DPRINTF("tx_run: head %x, rhead %x, tail %x\r\n", 1379 sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); 1380 1381 pthread_mutex_unlock(&sc->esc_mtx); 1382 rhead = head; 1383 tdwb = 0; 1384 for (lim = size / 4; sc->esc_tx_enabled && lim > 0; lim -= sent) { 1385 sent = e82545_transmit(sc, head, tail, size, &rhead, &tdwb); 1386 if (sent == 0) 1387 break; 1388 head = rhead; 1389 } 1390 pthread_mutex_lock(&sc->esc_mtx); 1391 1392 sc->esc_TDH = head; 1393 sc->esc_TDHr = rhead; 1394 cause = 0; 1395 if (tdwb) 1396 cause |= E1000_ICR_TXDW; 1397 if (lim != size / 4 && sc->esc_TDH == sc->esc_TDT) 1398 cause |= E1000_ICR_TXQE; 1399 if (cause) 1400 e82545_icr_assert(sc, cause); 1401 1402 DPRINTF("tx_run done: head %x, rhead %x, tail %x\r\n", 1403 sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); 1404 } 1405 1406 static void * 1407 e82545_tx_thread(void *param) 1408 { 1409 struct e82545_softc *sc = param; 1410 1411 pthread_mutex_lock(&sc->esc_mtx); 1412 for (;;) { 1413 while (!sc->esc_tx_enabled || sc->esc_TDHr == sc->esc_TDT) { 1414 if (sc->esc_tx_enabled && sc->esc_TDHr != sc->esc_TDT) 1415 break; 1416 sc->esc_tx_active = 0; 1417 if (sc->esc_tx_enabled == 0) 1418 pthread_cond_signal(&sc->esc_tx_cond); 1419 pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx); 1420 } 1421 sc->esc_tx_active = 1; 1422 1423 /* Process some tx descriptors. Lock dropped inside. */ 1424 e82545_tx_run(sc); 1425 } 1426 } 1427 1428 static void 1429 e82545_tx_start(struct e82545_softc *sc) 1430 { 1431 1432 if (sc->esc_tx_active == 0) 1433 pthread_cond_signal(&sc->esc_tx_cond); 1434 } 1435 1436 static void 1437 e82545_tx_enable(struct e82545_softc *sc) 1438 { 1439 1440 sc->esc_tx_enabled = 1; 1441 } 1442 1443 static void 1444 e82545_tx_disable(struct e82545_softc *sc) 1445 { 1446 1447 sc->esc_tx_enabled = 0; 1448 while (sc->esc_tx_active) 1449 pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx); 1450 } 1451 1452 static void 1453 e82545_rx_enable(struct e82545_softc *sc) 1454 { 1455 1456 sc->esc_rx_enabled = 1; 1457 } 1458 1459 static void 1460 e82545_rx_disable(struct e82545_softc *sc) 1461 { 1462 1463 sc->esc_rx_enabled = 0; 1464 while (sc->esc_rx_active) 1465 pthread_cond_wait(&sc->esc_rx_cond, &sc->esc_mtx); 1466 } 1467 1468 static void 1469 e82545_write_ra(struct e82545_softc *sc, int reg, uint32_t wval) 1470 { 1471 struct eth_uni *eu; 1472 int idx; 1473 1474 idx = reg >> 1; 1475 assert(idx < 15); 1476 1477 eu = &sc->esc_uni[idx]; 1478 1479 if (reg & 0x1) { 1480 /* RAH */ 1481 eu->eu_valid = ((wval & E1000_RAH_AV) == E1000_RAH_AV); 1482 eu->eu_addrsel = (wval >> 16) & 0x3; 1483 eu->eu_eth.octet[5] = wval >> 8; 1484 eu->eu_eth.octet[4] = wval; 1485 } else { 1486 /* RAL */ 1487 eu->eu_eth.octet[3] = wval >> 24; 1488 eu->eu_eth.octet[2] = wval >> 16; 1489 eu->eu_eth.octet[1] = wval >> 8; 1490 eu->eu_eth.octet[0] = wval; 1491 } 1492 } 1493 1494 static uint32_t 1495 e82545_read_ra(struct e82545_softc *sc, int reg) 1496 { 1497 struct eth_uni *eu; 1498 uint32_t retval; 1499 int idx; 1500 1501 idx = reg >> 1; 1502 assert(idx < 15); 1503 1504 eu = &sc->esc_uni[idx]; 1505 1506 if (reg & 0x1) { 1507 /* RAH */ 1508 retval = (eu->eu_valid << 31) | 1509 (eu->eu_addrsel << 16) | 1510 (eu->eu_eth.octet[5] << 8) | 1511 eu->eu_eth.octet[4]; 1512 } else { 1513 /* RAL */ 1514 retval = (eu->eu_eth.octet[3] << 24) | 1515 (eu->eu_eth.octet[2] << 16) | 1516 (eu->eu_eth.octet[1] << 8) | 1517 eu->eu_eth.octet[0]; 1518 } 1519 1520 return (retval); 1521 } 1522 1523 static void 1524 e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value) 1525 { 1526 int ridx; 1527 1528 if (offset & 0x3) { 1529 DPRINTF("Unaligned register write offset:0x%x value:0x%x\r\n", offset, value); 1530 return; 1531 } 1532 DPRINTF("Register write: 0x%x value: 0x%x\r\n", offset, value); 1533 1534 switch (offset) { 1535 case E1000_CTRL: 1536 case E1000_CTRL_DUP: 1537 e82545_devctl(sc, value); 1538 break; 1539 case E1000_FCAL: 1540 sc->esc_FCAL = value; 1541 break; 1542 case E1000_FCAH: 1543 sc->esc_FCAH = value & ~0xFFFF0000; 1544 break; 1545 case E1000_FCT: 1546 sc->esc_FCT = value & ~0xFFFF0000; 1547 break; 1548 case E1000_VET: 1549 sc->esc_VET = value & ~0xFFFF0000; 1550 break; 1551 case E1000_FCTTV: 1552 sc->esc_FCTTV = value & ~0xFFFF0000; 1553 break; 1554 case E1000_LEDCTL: 1555 sc->esc_LEDCTL = value & ~0x30303000; 1556 break; 1557 case E1000_PBA: 1558 sc->esc_PBA = value & 0x0000FF80; 1559 break; 1560 case E1000_ICR: 1561 case E1000_ITR: 1562 case E1000_ICS: 1563 case E1000_IMS: 1564 case E1000_IMC: 1565 e82545_intr_write(sc, offset, value); 1566 break; 1567 case E1000_RCTL: 1568 e82545_rx_ctl(sc, value); 1569 break; 1570 case E1000_FCRTL: 1571 sc->esc_FCRTL = value & ~0xFFFF0007; 1572 break; 1573 case E1000_FCRTH: 1574 sc->esc_FCRTH = value & ~0xFFFF0007; 1575 break; 1576 case E1000_RDBAL(0): 1577 sc->esc_RDBAL = value & ~0xF; 1578 if (sc->esc_rx_enabled) { 1579 /* Apparently legal: update cached address */ 1580 e82545_rx_update_rdba(sc); 1581 } 1582 break; 1583 case E1000_RDBAH(0): 1584 assert(!sc->esc_rx_enabled); 1585 sc->esc_RDBAH = value; 1586 break; 1587 case E1000_RDLEN(0): 1588 assert(!sc->esc_rx_enabled); 1589 sc->esc_RDLEN = value & ~0xFFF0007F; 1590 break; 1591 case E1000_RDH(0): 1592 /* XXX should only ever be zero ? Range check ? */ 1593 sc->esc_RDH = value; 1594 break; 1595 case E1000_RDT(0): 1596 /* XXX if this opens up the rx ring, do something ? */ 1597 sc->esc_RDT = value; 1598 break; 1599 case E1000_RDTR: 1600 /* ignore FPD bit 31 */ 1601 sc->esc_RDTR = value & ~0xFFFF0000; 1602 break; 1603 case E1000_RXDCTL(0): 1604 sc->esc_RXDCTL = value & ~0xFEC0C0C0; 1605 break; 1606 case E1000_RADV: 1607 sc->esc_RADV = value & ~0xFFFF0000; 1608 break; 1609 case E1000_RSRPD: 1610 sc->esc_RSRPD = value & ~0xFFFFF000; 1611 break; 1612 case E1000_RXCSUM: 1613 sc->esc_RXCSUM = value & ~0xFFFFF800; 1614 break; 1615 case E1000_TXCW: 1616 sc->esc_TXCW = value & ~0x3FFF0000; 1617 break; 1618 case E1000_TCTL: 1619 e82545_tx_ctl(sc, value); 1620 break; 1621 case E1000_TIPG: 1622 sc->esc_TIPG = value; 1623 break; 1624 case E1000_AIT: 1625 sc->esc_AIT = value; 1626 break; 1627 case E1000_TDBAL(0): 1628 sc->esc_TDBAL = value & ~0xF; 1629 if (sc->esc_tx_enabled) { 1630 /* Apparently legal */ 1631 e82545_tx_update_tdba(sc); 1632 } 1633 break; 1634 case E1000_TDBAH(0): 1635 //assert(!sc->esc_tx_enabled); 1636 sc->esc_TDBAH = value; 1637 break; 1638 case E1000_TDLEN(0): 1639 //assert(!sc->esc_tx_enabled); 1640 sc->esc_TDLEN = value & ~0xFFF0007F; 1641 break; 1642 case E1000_TDH(0): 1643 //assert(!sc->esc_tx_enabled); 1644 /* XXX should only ever be zero ? Range check ? */ 1645 sc->esc_TDHr = sc->esc_TDH = value; 1646 break; 1647 case E1000_TDT(0): 1648 /* XXX range check ? */ 1649 sc->esc_TDT = value; 1650 if (sc->esc_tx_enabled) 1651 e82545_tx_start(sc); 1652 break; 1653 case E1000_TIDV: 1654 sc->esc_TIDV = value & ~0xFFFF0000; 1655 break; 1656 case E1000_TXDCTL(0): 1657 //assert(!sc->esc_tx_enabled); 1658 sc->esc_TXDCTL = value & ~0xC0C0C0; 1659 break; 1660 case E1000_TADV: 1661 sc->esc_TADV = value & ~0xFFFF0000; 1662 break; 1663 case E1000_RAL(0) ... E1000_RAH(15): 1664 /* convert to u32 offset */ 1665 ridx = (offset - E1000_RAL(0)) >> 2; 1666 e82545_write_ra(sc, ridx, value); 1667 break; 1668 case E1000_MTA ... (E1000_MTA + (127*4)): 1669 sc->esc_fmcast[(offset - E1000_MTA) >> 2] = value; 1670 break; 1671 case E1000_VFTA ... (E1000_VFTA + (127*4)): 1672 sc->esc_fvlan[(offset - E1000_VFTA) >> 2] = value; 1673 break; 1674 case E1000_EECD: 1675 { 1676 //DPRINTF("EECD write 0x%x -> 0x%x\r\n", sc->eeprom_control, value); 1677 /* edge triggered low->high */ 1678 uint32_t eecd_strobe = ((sc->eeprom_control & E1000_EECD_SK) ? 1679 0 : (value & E1000_EECD_SK)); 1680 uint32_t eecd_mask = (E1000_EECD_SK|E1000_EECD_CS| 1681 E1000_EECD_DI|E1000_EECD_REQ); 1682 sc->eeprom_control &= ~eecd_mask; 1683 sc->eeprom_control |= (value & eecd_mask); 1684 /* grant/revoke immediately */ 1685 if (value & E1000_EECD_REQ) { 1686 sc->eeprom_control |= E1000_EECD_GNT; 1687 } else { 1688 sc->eeprom_control &= ~E1000_EECD_GNT; 1689 } 1690 if (eecd_strobe && (sc->eeprom_control & E1000_EECD_CS)) { 1691 e82545_eecd_strobe(sc); 1692 } 1693 return; 1694 } 1695 case E1000_MDIC: 1696 { 1697 uint8_t reg_addr = (uint8_t)((value & E1000_MDIC_REG_MASK) >> 1698 E1000_MDIC_REG_SHIFT); 1699 uint8_t phy_addr = (uint8_t)((value & E1000_MDIC_PHY_MASK) >> 1700 E1000_MDIC_PHY_SHIFT); 1701 sc->mdi_control = 1702 (value & ~(E1000_MDIC_ERROR|E1000_MDIC_DEST)); 1703 if ((value & E1000_MDIC_READY) != 0) { 1704 DPRINTF("Incorrect MDIC ready bit: 0x%x\r\n", value); 1705 return; 1706 } 1707 switch (value & E82545_MDIC_OP_MASK) { 1708 case E1000_MDIC_OP_READ: 1709 sc->mdi_control &= ~E82545_MDIC_DATA_MASK; 1710 sc->mdi_control |= e82545_read_mdi(sc, reg_addr, phy_addr); 1711 break; 1712 case E1000_MDIC_OP_WRITE: 1713 e82545_write_mdi(sc, reg_addr, phy_addr, 1714 value & E82545_MDIC_DATA_MASK); 1715 break; 1716 default: 1717 DPRINTF("Unknown MDIC op: 0x%x\r\n", value); 1718 return; 1719 } 1720 /* TODO: barrier? */ 1721 sc->mdi_control |= E1000_MDIC_READY; 1722 if (value & E82545_MDIC_IE) { 1723 // TODO: generate interrupt 1724 } 1725 return; 1726 } 1727 case E1000_MANC: 1728 case E1000_STATUS: 1729 return; 1730 default: 1731 DPRINTF("Unknown write register: 0x%x value:%x\r\n", offset, value); 1732 return; 1733 } 1734 } 1735 1736 static uint32_t 1737 e82545_read_register(struct e82545_softc *sc, uint32_t offset) 1738 { 1739 uint32_t retval; 1740 int ridx; 1741 1742 if (offset & 0x3) { 1743 DPRINTF("Unaligned register read offset:0x%x\r\n", offset); 1744 return 0; 1745 } 1746 1747 DPRINTF("Register read: 0x%x\r\n", offset); 1748 1749 switch (offset) { 1750 case E1000_CTRL: 1751 retval = sc->esc_CTRL; 1752 break; 1753 case E1000_STATUS: 1754 retval = E1000_STATUS_FD | E1000_STATUS_LU | 1755 E1000_STATUS_SPEED_1000; 1756 break; 1757 case E1000_FCAL: 1758 retval = sc->esc_FCAL; 1759 break; 1760 case E1000_FCAH: 1761 retval = sc->esc_FCAH; 1762 break; 1763 case E1000_FCT: 1764 retval = sc->esc_FCT; 1765 break; 1766 case E1000_VET: 1767 retval = sc->esc_VET; 1768 break; 1769 case E1000_FCTTV: 1770 retval = sc->esc_FCTTV; 1771 break; 1772 case E1000_LEDCTL: 1773 retval = sc->esc_LEDCTL; 1774 break; 1775 case E1000_PBA: 1776 retval = sc->esc_PBA; 1777 break; 1778 case E1000_ICR: 1779 case E1000_ITR: 1780 case E1000_ICS: 1781 case E1000_IMS: 1782 case E1000_IMC: 1783 retval = e82545_intr_read(sc, offset); 1784 break; 1785 case E1000_RCTL: 1786 retval = sc->esc_RCTL; 1787 break; 1788 case E1000_FCRTL: 1789 retval = sc->esc_FCRTL; 1790 break; 1791 case E1000_FCRTH: 1792 retval = sc->esc_FCRTH; 1793 break; 1794 case E1000_RDBAL(0): 1795 retval = sc->esc_RDBAL; 1796 break; 1797 case E1000_RDBAH(0): 1798 retval = sc->esc_RDBAH; 1799 break; 1800 case E1000_RDLEN(0): 1801 retval = sc->esc_RDLEN; 1802 break; 1803 case E1000_RDH(0): 1804 retval = sc->esc_RDH; 1805 break; 1806 case E1000_RDT(0): 1807 retval = sc->esc_RDT; 1808 break; 1809 case E1000_RDTR: 1810 retval = sc->esc_RDTR; 1811 break; 1812 case E1000_RXDCTL(0): 1813 retval = sc->esc_RXDCTL; 1814 break; 1815 case E1000_RADV: 1816 retval = sc->esc_RADV; 1817 break; 1818 case E1000_RSRPD: 1819 retval = sc->esc_RSRPD; 1820 break; 1821 case E1000_RXCSUM: 1822 retval = sc->esc_RXCSUM; 1823 break; 1824 case E1000_TXCW: 1825 retval = sc->esc_TXCW; 1826 break; 1827 case E1000_TCTL: 1828 retval = sc->esc_TCTL; 1829 break; 1830 case E1000_TIPG: 1831 retval = sc->esc_TIPG; 1832 break; 1833 case E1000_AIT: 1834 retval = sc->esc_AIT; 1835 break; 1836 case E1000_TDBAL(0): 1837 retval = sc->esc_TDBAL; 1838 break; 1839 case E1000_TDBAH(0): 1840 retval = sc->esc_TDBAH; 1841 break; 1842 case E1000_TDLEN(0): 1843 retval = sc->esc_TDLEN; 1844 break; 1845 case E1000_TDH(0): 1846 retval = sc->esc_TDH; 1847 break; 1848 case E1000_TDT(0): 1849 retval = sc->esc_TDT; 1850 break; 1851 case E1000_TIDV: 1852 retval = sc->esc_TIDV; 1853 break; 1854 case E1000_TXDCTL(0): 1855 retval = sc->esc_TXDCTL; 1856 break; 1857 case E1000_TADV: 1858 retval = sc->esc_TADV; 1859 break; 1860 case E1000_RAL(0) ... E1000_RAH(15): 1861 /* convert to u32 offset */ 1862 ridx = (offset - E1000_RAL(0)) >> 2; 1863 retval = e82545_read_ra(sc, ridx); 1864 break; 1865 case E1000_MTA ... (E1000_MTA + (127*4)): 1866 retval = sc->esc_fmcast[(offset - E1000_MTA) >> 2]; 1867 break; 1868 case E1000_VFTA ... (E1000_VFTA + (127*4)): 1869 retval = sc->esc_fvlan[(offset - E1000_VFTA) >> 2]; 1870 break; 1871 case E1000_EECD: 1872 //DPRINTF("EECD read %x\r\n", sc->eeprom_control); 1873 retval = sc->eeprom_control; 1874 break; 1875 case E1000_MDIC: 1876 retval = sc->mdi_control; 1877 break; 1878 case E1000_MANC: 1879 retval = 0; 1880 break; 1881 /* stats that we emulate. */ 1882 case E1000_MPC: 1883 retval = sc->missed_pkt_count; 1884 break; 1885 case E1000_PRC64: 1886 retval = sc->pkt_rx_by_size[0]; 1887 break; 1888 case E1000_PRC127: 1889 retval = sc->pkt_rx_by_size[1]; 1890 break; 1891 case E1000_PRC255: 1892 retval = sc->pkt_rx_by_size[2]; 1893 break; 1894 case E1000_PRC511: 1895 retval = sc->pkt_rx_by_size[3]; 1896 break; 1897 case E1000_PRC1023: 1898 retval = sc->pkt_rx_by_size[4]; 1899 break; 1900 case E1000_PRC1522: 1901 retval = sc->pkt_rx_by_size[5]; 1902 break; 1903 case E1000_GPRC: 1904 retval = sc->good_pkt_rx_count; 1905 break; 1906 case E1000_BPRC: 1907 retval = sc->bcast_pkt_rx_count; 1908 break; 1909 case E1000_MPRC: 1910 retval = sc->mcast_pkt_rx_count; 1911 break; 1912 case E1000_GPTC: 1913 case E1000_TPT: 1914 retval = sc->good_pkt_tx_count; 1915 break; 1916 case E1000_GORCL: 1917 retval = (uint32_t)sc->good_octets_rx; 1918 break; 1919 case E1000_GORCH: 1920 retval = (uint32_t)(sc->good_octets_rx >> 32); 1921 break; 1922 case E1000_TOTL: 1923 case E1000_GOTCL: 1924 retval = (uint32_t)sc->good_octets_tx; 1925 break; 1926 case E1000_TOTH: 1927 case E1000_GOTCH: 1928 retval = (uint32_t)(sc->good_octets_tx >> 32); 1929 break; 1930 case E1000_ROC: 1931 retval = sc->oversize_rx_count; 1932 break; 1933 case E1000_TORL: 1934 retval = (uint32_t)(sc->good_octets_rx + sc->missed_octets); 1935 break; 1936 case E1000_TORH: 1937 retval = (uint32_t)((sc->good_octets_rx + 1938 sc->missed_octets) >> 32); 1939 break; 1940 case E1000_TPR: 1941 retval = sc->good_pkt_rx_count + sc->missed_pkt_count + 1942 sc->oversize_rx_count; 1943 break; 1944 case E1000_PTC64: 1945 retval = sc->pkt_tx_by_size[0]; 1946 break; 1947 case E1000_PTC127: 1948 retval = sc->pkt_tx_by_size[1]; 1949 break; 1950 case E1000_PTC255: 1951 retval = sc->pkt_tx_by_size[2]; 1952 break; 1953 case E1000_PTC511: 1954 retval = sc->pkt_tx_by_size[3]; 1955 break; 1956 case E1000_PTC1023: 1957 retval = sc->pkt_tx_by_size[4]; 1958 break; 1959 case E1000_PTC1522: 1960 retval = sc->pkt_tx_by_size[5]; 1961 break; 1962 case E1000_MPTC: 1963 retval = sc->mcast_pkt_tx_count; 1964 break; 1965 case E1000_BPTC: 1966 retval = sc->bcast_pkt_tx_count; 1967 break; 1968 case E1000_TSCTC: 1969 retval = sc->tso_tx_count; 1970 break; 1971 /* stats that are always 0. */ 1972 case E1000_CRCERRS: 1973 case E1000_ALGNERRC: 1974 case E1000_SYMERRS: 1975 case E1000_RXERRC: 1976 case E1000_SCC: 1977 case E1000_ECOL: 1978 case E1000_MCC: 1979 case E1000_LATECOL: 1980 case E1000_COLC: 1981 case E1000_DC: 1982 case E1000_TNCRS: 1983 case E1000_SEC: 1984 case E1000_CEXTERR: 1985 case E1000_RLEC: 1986 case E1000_XONRXC: 1987 case E1000_XONTXC: 1988 case E1000_XOFFRXC: 1989 case E1000_XOFFTXC: 1990 case E1000_FCRUC: 1991 case E1000_RNBC: 1992 case E1000_RUC: 1993 case E1000_RFC: 1994 case E1000_RJC: 1995 case E1000_MGTPRC: 1996 case E1000_MGTPDC: 1997 case E1000_MGTPTC: 1998 case E1000_TSCTFC: 1999 retval = 0; 2000 break; 2001 default: 2002 DPRINTF("Unknown read register: 0x%x\r\n", offset); 2003 retval = 0; 2004 break; 2005 } 2006 2007 return (retval); 2008 } 2009 2010 static void 2011 e82545_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2012 uint64_t offset, int size, uint64_t value) 2013 { 2014 struct e82545_softc *sc; 2015 2016 //DPRINTF("Write bar:%d offset:0x%lx value:0x%lx size:%d\r\n", baridx, offset, value, size); 2017 2018 sc = pi->pi_arg; 2019 2020 pthread_mutex_lock(&sc->esc_mtx); 2021 2022 switch (baridx) { 2023 case E82545_BAR_IO: 2024 switch (offset) { 2025 case E82545_IOADDR: 2026 if (size != 4) { 2027 DPRINTF("Wrong io addr write sz:%d value:0x%lx\r\n", size, value); 2028 } else 2029 sc->io_addr = (uint32_t)value; 2030 break; 2031 case E82545_IODATA: 2032 if (size != 4) { 2033 DPRINTF("Wrong io data write size:%d value:0x%lx\r\n", size, value); 2034 } else if (sc->io_addr > E82545_IO_REGISTER_MAX) { 2035 DPRINTF("Non-register io write addr:0x%x value:0x%lx\r\n", sc->io_addr, value); 2036 } else 2037 e82545_write_register(sc, sc->io_addr, 2038 (uint32_t)value); 2039 break; 2040 default: 2041 DPRINTF("Unknown io bar write offset:0x%lx value:0x%lx size:%d\r\n", offset, value, size); 2042 break; 2043 } 2044 break; 2045 case E82545_BAR_REGISTER: 2046 if (size != 4) { 2047 DPRINTF("Wrong register write size:%d offset:0x%lx value:0x%lx\r\n", size, offset, value); 2048 } else 2049 e82545_write_register(sc, (uint32_t)offset, 2050 (uint32_t)value); 2051 break; 2052 default: 2053 DPRINTF("Unknown write bar:%d off:0x%lx val:0x%lx size:%d\r\n", 2054 baridx, offset, value, size); 2055 } 2056 2057 pthread_mutex_unlock(&sc->esc_mtx); 2058 } 2059 2060 static uint64_t 2061 e82545_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2062 uint64_t offset, int size) 2063 { 2064 struct e82545_softc *sc; 2065 uint64_t retval; 2066 2067 //DPRINTF("Read bar:%d offset:0x%lx size:%d\r\n", baridx, offset, size); 2068 sc = pi->pi_arg; 2069 retval = 0; 2070 2071 pthread_mutex_lock(&sc->esc_mtx); 2072 2073 switch (baridx) { 2074 case E82545_BAR_IO: 2075 switch (offset) { 2076 case E82545_IOADDR: 2077 if (size != 4) { 2078 DPRINTF("Wrong io addr read sz:%d\r\n", size); 2079 } else 2080 retval = sc->io_addr; 2081 break; 2082 case E82545_IODATA: 2083 if (size != 4) { 2084 DPRINTF("Wrong io data read sz:%d\r\n", size); 2085 } 2086 if (sc->io_addr > E82545_IO_REGISTER_MAX) { 2087 DPRINTF("Non-register io read addr:0x%x\r\n", 2088 sc->io_addr); 2089 } else 2090 retval = e82545_read_register(sc, sc->io_addr); 2091 break; 2092 default: 2093 DPRINTF("Unknown io bar read offset:0x%lx size:%d\r\n", 2094 offset, size); 2095 break; 2096 } 2097 break; 2098 case E82545_BAR_REGISTER: 2099 if (size != 4) { 2100 DPRINTF("Wrong register read size:%d offset:0x%lx\r\n", 2101 size, offset); 2102 } else 2103 retval = e82545_read_register(sc, (uint32_t)offset); 2104 break; 2105 default: 2106 DPRINTF("Unknown read bar:%d offset:0x%lx size:%d\r\n", 2107 baridx, offset, size); 2108 break; 2109 } 2110 2111 pthread_mutex_unlock(&sc->esc_mtx); 2112 2113 return (retval); 2114 } 2115 2116 static void 2117 e82545_reset(struct e82545_softc *sc, int drvr) 2118 { 2119 int i; 2120 2121 e82545_rx_disable(sc); 2122 e82545_tx_disable(sc); 2123 2124 /* clear outstanding interrupts */ 2125 if (sc->esc_irq_asserted) 2126 pci_lintr_deassert(sc->esc_pi); 2127 2128 /* misc */ 2129 if (!drvr) { 2130 sc->esc_FCAL = 0; 2131 sc->esc_FCAH = 0; 2132 sc->esc_FCT = 0; 2133 sc->esc_VET = 0; 2134 sc->esc_FCTTV = 0; 2135 } 2136 sc->esc_LEDCTL = 0x07061302; 2137 sc->esc_PBA = 0x00100030; 2138 2139 /* start nvm in opcode mode. */ 2140 sc->nvm_opaddr = 0; 2141 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 2142 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 2143 sc->eeprom_control = E1000_EECD_PRES | E82545_EECD_FWE_EN; 2144 e82545_init_eeprom(sc); 2145 2146 /* interrupt */ 2147 sc->esc_ICR = 0; 2148 sc->esc_ITR = 250; 2149 sc->esc_ICS = 0; 2150 sc->esc_IMS = 0; 2151 sc->esc_IMC = 0; 2152 2153 /* L2 filters */ 2154 if (!drvr) { 2155 memset(sc->esc_fvlan, 0, sizeof(sc->esc_fvlan)); 2156 memset(sc->esc_fmcast, 0, sizeof(sc->esc_fmcast)); 2157 memset(sc->esc_uni, 0, sizeof(sc->esc_uni)); 2158 2159 /* XXX not necessary on 82545 ?? */ 2160 sc->esc_uni[0].eu_valid = 1; 2161 memcpy(sc->esc_uni[0].eu_eth.octet, sc->esc_mac.octet, 2162 ETHER_ADDR_LEN); 2163 } else { 2164 /* Clear RAH valid bits */ 2165 for (i = 0; i < 16; i++) 2166 sc->esc_uni[i].eu_valid = 0; 2167 } 2168 2169 /* receive */ 2170 if (!drvr) { 2171 sc->esc_RDBAL = 0; 2172 sc->esc_RDBAH = 0; 2173 } 2174 sc->esc_RCTL = 0; 2175 sc->esc_FCRTL = 0; 2176 sc->esc_FCRTH = 0; 2177 sc->esc_RDLEN = 0; 2178 sc->esc_RDH = 0; 2179 sc->esc_RDT = 0; 2180 sc->esc_RDTR = 0; 2181 sc->esc_RXDCTL = (1 << 24) | (1 << 16); /* default GRAN/WTHRESH */ 2182 sc->esc_RADV = 0; 2183 sc->esc_RXCSUM = 0; 2184 2185 /* transmit */ 2186 if (!drvr) { 2187 sc->esc_TDBAL = 0; 2188 sc->esc_TDBAH = 0; 2189 sc->esc_TIPG = 0; 2190 sc->esc_AIT = 0; 2191 sc->esc_TIDV = 0; 2192 sc->esc_TADV = 0; 2193 } 2194 sc->esc_tdba = 0; 2195 sc->esc_txdesc = NULL; 2196 sc->esc_TXCW = 0; 2197 sc->esc_TCTL = 0; 2198 sc->esc_TDLEN = 0; 2199 sc->esc_TDT = 0; 2200 sc->esc_TDHr = sc->esc_TDH = 0; 2201 sc->esc_TXDCTL = 0; 2202 } 2203 2204 static void 2205 e82545_open_tap(struct e82545_softc *sc, char *opts) 2206 { 2207 char tbuf[80]; 2208 2209 if (opts == NULL) { 2210 sc->esc_tapfd = -1; 2211 return; 2212 } 2213 2214 strcpy(tbuf, "/dev/"); 2215 strlcat(tbuf, opts, sizeof(tbuf)); 2216 2217 sc->esc_tapfd = open(tbuf, O_RDWR); 2218 if (sc->esc_tapfd == -1) { 2219 DPRINTF("unable to open tap device %s\n", opts); 2220 exit(1); 2221 } 2222 2223 /* 2224 * Set non-blocking and register for read 2225 * notifications with the event loop 2226 */ 2227 int opt = 1; 2228 if (ioctl(sc->esc_tapfd, FIONBIO, &opt) < 0) { 2229 WPRINTF("tap device O_NONBLOCK failed: %d\n", errno); 2230 close(sc->esc_tapfd); 2231 sc->esc_tapfd = -1; 2232 } 2233 2234 sc->esc_mevp = mevent_add(sc->esc_tapfd, 2235 EVF_READ, 2236 e82545_tap_callback, 2237 sc); 2238 if (sc->esc_mevp == NULL) { 2239 DPRINTF("Could not register mevent %d\n", EVF_READ); 2240 close(sc->esc_tapfd); 2241 sc->esc_tapfd = -1; 2242 } 2243 } 2244 2245 static int 2246 e82545_parsemac(char *mac_str, uint8_t *mac_addr) 2247 { 2248 struct ether_addr *ea; 2249 char *tmpstr; 2250 char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; 2251 2252 tmpstr = strsep(&mac_str,"="); 2253 if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) { 2254 ea = ether_aton(mac_str); 2255 if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) || 2256 memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) { 2257 fprintf(stderr, "Invalid MAC %s\n", mac_str); 2258 return (1); 2259 } else 2260 memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN); 2261 } 2262 return (0); 2263 } 2264 2265 static int 2266 e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 2267 { 2268 DPRINTF("Loading with options: %s\r\n", opts); 2269 2270 MD5_CTX mdctx; 2271 unsigned char digest[16]; 2272 char nstr[80]; 2273 struct e82545_softc *sc; 2274 char *devname; 2275 char *vtopts; 2276 int mac_provided; 2277 2278 /* Setup our softc */ 2279 sc = calloc(sizeof(*sc), 1); 2280 2281 pi->pi_arg = sc; 2282 sc->esc_pi = pi; 2283 sc->esc_ctx = ctx; 2284 2285 pthread_mutex_init(&sc->esc_mtx, NULL); 2286 pthread_cond_init(&sc->esc_rx_cond, NULL); 2287 pthread_cond_init(&sc->esc_tx_cond, NULL); 2288 pthread_create(&sc->esc_tx_tid, NULL, e82545_tx_thread, sc); 2289 snprintf(nstr, sizeof(nstr), "e82545-%d:%d tx", pi->pi_slot, 2290 pi->pi_func); 2291 pthread_set_name_np(sc->esc_tx_tid, nstr); 2292 2293 pci_set_cfgdata16(pi, PCIR_DEVICE, E82545_DEV_ID_82545EM_COPPER); 2294 pci_set_cfgdata16(pi, PCIR_VENDOR, E82545_VENDOR_ID_INTEL); 2295 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 2296 pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_NETWORK_ETHERNET); 2297 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, E82545_SUBDEV_ID); 2298 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, E82545_VENDOR_ID_INTEL); 2299 2300 pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL); 2301 pci_set_cfgdata8(pi, PCIR_INTPIN, 0x1); 2302 2303 /* TODO: this card also supports msi, but the freebsd driver for it 2304 * does not, so I have not implemented it. */ 2305 pci_lintr_request(pi); 2306 2307 pci_emul_alloc_bar(pi, E82545_BAR_REGISTER, PCIBAR_MEM32, 2308 E82545_BAR_REGISTER_LEN); 2309 pci_emul_alloc_bar(pi, E82545_BAR_FLASH, PCIBAR_MEM32, 2310 E82545_BAR_FLASH_LEN); 2311 pci_emul_alloc_bar(pi, E82545_BAR_IO, PCIBAR_IO, 2312 E82545_BAR_IO_LEN); 2313 2314 /* 2315 * Attempt to open the tap device and read the MAC address 2316 * if specified. Copied from virtio-net, slightly modified. 2317 */ 2318 mac_provided = 0; 2319 sc->esc_tapfd = -1; 2320 if (opts != NULL) { 2321 int err; 2322 2323 devname = vtopts = strdup(opts); 2324 (void) strsep(&vtopts, ","); 2325 2326 if (vtopts != NULL) { 2327 err = e82545_parsemac(vtopts, sc->esc_mac.octet); 2328 if (err != 0) { 2329 free(devname); 2330 return (err); 2331 } 2332 mac_provided = 1; 2333 } 2334 2335 if (strncmp(devname, "tap", 3) == 0 || 2336 strncmp(devname, "vmnet", 5) == 0) 2337 e82545_open_tap(sc, devname); 2338 2339 free(devname); 2340 } 2341 2342 /* 2343 * The default MAC address is the standard NetApp OUI of 00-a0-98, 2344 * followed by an MD5 of the PCI slot/func number and dev name 2345 */ 2346 if (!mac_provided) { 2347 snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, 2348 pi->pi_func, vmname); 2349 2350 MD5Init(&mdctx); 2351 MD5Update(&mdctx, nstr, strlen(nstr)); 2352 MD5Final(digest, &mdctx); 2353 2354 sc->esc_mac.octet[0] = 0x00; 2355 sc->esc_mac.octet[1] = 0xa0; 2356 sc->esc_mac.octet[2] = 0x98; 2357 sc->esc_mac.octet[3] = digest[0]; 2358 sc->esc_mac.octet[4] = digest[1]; 2359 sc->esc_mac.octet[5] = digest[2]; 2360 } 2361 2362 /* H/w initiated reset */ 2363 e82545_reset(sc, 0); 2364 2365 return (0); 2366 } 2367 2368 struct pci_devemu pci_de_e82545 = { 2369 .pe_emu = "e1000", 2370 .pe_init = e82545_init, 2371 .pe_barwrite = e82545_write, 2372 .pe_barread = e82545_read 2373 }; 2374 PCI_EMUL_SET(pci_de_e82545); 2375 2376