1 /* 2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org> 3 * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org> 4 * Copyright (c) 2013 Jeremiah Lott, Avere Systems 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/types.h> 34 #ifndef WITHOUT_CAPSICUM 35 #include <sys/capsicum.h> 36 #endif 37 #include <sys/limits.h> 38 #include <sys/ioctl.h> 39 #include <sys/uio.h> 40 #include <net/ethernet.h> 41 #include <netinet/in.h> 42 #include <netinet/tcp.h> 43 44 #include <err.h> 45 #include <errno.h> 46 #include <fcntl.h> 47 #include <md5.h> 48 #include <stdio.h> 49 #include <stdlib.h> 50 #include <string.h> 51 #include <sysexits.h> 52 #include <unistd.h> 53 #include <pthread.h> 54 #include <pthread_np.h> 55 56 #include "e1000_regs.h" 57 #include "e1000_defines.h" 58 #include "mii.h" 59 60 #include "bhyverun.h" 61 #include "pci_emul.h" 62 #include "mevent.h" 63 64 /* Hardware/register definitions XXX: move some to common code. */ 65 #define E82545_VENDOR_ID_INTEL 0x8086 66 #define E82545_DEV_ID_82545EM_COPPER 0x100F 67 #define E82545_SUBDEV_ID 0x1008 68 69 #define E82545_REVISION_4 4 70 71 #define E82545_MDIC_DATA_MASK 0x0000FFFF 72 #define E82545_MDIC_OP_MASK 0x0c000000 73 #define E82545_MDIC_IE 0x20000000 74 75 #define E82545_EECD_FWE_DIS 0x00000010 /* Flash writes disabled */ 76 #define E82545_EECD_FWE_EN 0x00000020 /* Flash writes enabled */ 77 #define E82545_EECD_FWE_MASK 0x00000030 /* Flash writes mask */ 78 79 #define E82545_BAR_REGISTER 0 80 #define E82545_BAR_REGISTER_LEN (128*1024) 81 #define E82545_BAR_FLASH 1 82 #define E82545_BAR_FLASH_LEN (64*1024) 83 #define E82545_BAR_IO 2 84 #define E82545_BAR_IO_LEN 8 85 86 #define E82545_IOADDR 0x00000000 87 #define E82545_IODATA 0x00000004 88 #define E82545_IO_REGISTER_MAX 0x0001FFFF 89 #define E82545_IO_FLASH_BASE 0x00080000 90 #define E82545_IO_FLASH_MAX 0x000FFFFF 91 92 #define E82545_ARRAY_ENTRY(reg, offset) (reg + (offset<<2)) 93 #define E82545_RAR_MAX 15 94 #define E82545_MTA_MAX 127 95 #define E82545_VFTA_MAX 127 96 97 /* Slightly modified from the driver versions, hardcoded for 3 opcode bits, 98 * followed by 6 address bits. 99 * TODO: make opcode bits and addr bits configurable? 100 * NVM Commands - Microwire */ 101 #define E82545_NVM_OPCODE_BITS 3 102 #define E82545_NVM_ADDR_BITS 6 103 #define E82545_NVM_DATA_BITS 16 104 #define E82545_NVM_OPADDR_BITS (E82545_NVM_OPCODE_BITS + E82545_NVM_ADDR_BITS) 105 #define E82545_NVM_ADDR_MASK ((1 << E82545_NVM_ADDR_BITS)-1) 106 #define E82545_NVM_OPCODE_MASK \ 107 (((1 << E82545_NVM_OPCODE_BITS) - 1) << E82545_NVM_ADDR_BITS) 108 #define E82545_NVM_OPCODE_READ (0x6 << E82545_NVM_ADDR_BITS) /* read */ 109 #define E82545_NVM_OPCODE_WRITE (0x5 << E82545_NVM_ADDR_BITS) /* write */ 110 #define E82545_NVM_OPCODE_ERASE (0x7 << E82545_NVM_ADDR_BITS) /* erase */ 111 #define E82545_NVM_OPCODE_EWEN (0x4 << E82545_NVM_ADDR_BITS) /* wr-enable */ 112 113 #define E82545_NVM_EEPROM_SIZE 64 /* 64 * 16-bit values == 128K */ 114 115 #define E1000_ICR_SRPD 0x00010000 116 117 /* This is an arbitrary number. There is no hard limit on the chip. */ 118 #define I82545_MAX_TXSEGS 64 119 120 /* Legacy receive descriptor */ 121 struct e1000_rx_desc { 122 uint64_t buffer_addr; /* Address of the descriptor's data buffer */ 123 uint16_t length; /* Length of data DMAed into data buffer */ 124 uint16_t csum; /* Packet checksum */ 125 uint8_t status; /* Descriptor status */ 126 uint8_t errors; /* Descriptor Errors */ 127 uint16_t special; 128 }; 129 130 /* Transmit descriptor types */ 131 #define E1000_TXD_MASK (E1000_TXD_CMD_DEXT | 0x00F00000) 132 #define E1000_TXD_TYP_L (0) 133 #define E1000_TXD_TYP_C (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_C) 134 #define E1000_TXD_TYP_D (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D) 135 136 /* Legacy transmit descriptor */ 137 struct e1000_tx_desc { 138 uint64_t buffer_addr; /* Address of the descriptor's data buffer */ 139 union { 140 uint32_t data; 141 struct { 142 uint16_t length; /* Data buffer length */ 143 uint8_t cso; /* Checksum offset */ 144 uint8_t cmd; /* Descriptor control */ 145 } flags; 146 } lower; 147 union { 148 uint32_t data; 149 struct { 150 uint8_t status; /* Descriptor status */ 151 uint8_t css; /* Checksum start */ 152 uint16_t special; 153 } fields; 154 } upper; 155 }; 156 157 /* Context descriptor */ 158 struct e1000_context_desc { 159 union { 160 uint32_t ip_config; 161 struct { 162 uint8_t ipcss; /* IP checksum start */ 163 uint8_t ipcso; /* IP checksum offset */ 164 uint16_t ipcse; /* IP checksum end */ 165 } ip_fields; 166 } lower_setup; 167 union { 168 uint32_t tcp_config; 169 struct { 170 uint8_t tucss; /* TCP checksum start */ 171 uint8_t tucso; /* TCP checksum offset */ 172 uint16_t tucse; /* TCP checksum end */ 173 } tcp_fields; 174 } upper_setup; 175 uint32_t cmd_and_length; 176 union { 177 uint32_t data; 178 struct { 179 uint8_t status; /* Descriptor status */ 180 uint8_t hdr_len; /* Header length */ 181 uint16_t mss; /* Maximum segment size */ 182 } fields; 183 } tcp_seg_setup; 184 }; 185 186 /* Data descriptor */ 187 struct e1000_data_desc { 188 uint64_t buffer_addr; /* Address of the descriptor's buffer address */ 189 union { 190 uint32_t data; 191 struct { 192 uint16_t length; /* Data buffer length */ 193 uint8_t typ_len_ext; 194 uint8_t cmd; 195 } flags; 196 } lower; 197 union { 198 uint32_t data; 199 struct { 200 uint8_t status; /* Descriptor status */ 201 uint8_t popts; /* Packet Options */ 202 uint16_t special; 203 } fields; 204 } upper; 205 }; 206 207 union e1000_tx_udesc { 208 struct e1000_tx_desc td; 209 struct e1000_context_desc cd; 210 struct e1000_data_desc dd; 211 }; 212 213 /* Tx checksum info for a packet. */ 214 struct ck_info { 215 int ck_valid; /* ck_info is valid */ 216 uint8_t ck_start; /* start byte of cksum calcuation */ 217 uint8_t ck_off; /* offset of cksum insertion */ 218 uint16_t ck_len; /* length of cksum calc: 0 is to packet-end */ 219 }; 220 221 /* 222 * Debug printf 223 */ 224 static int e82545_debug = 0; 225 #define DPRINTF(msg,params...) if (e82545_debug) fprintf(stderr, "e82545: " msg, params) 226 #define WPRINTF(msg,params...) fprintf(stderr, "e82545: " msg, params) 227 228 #define MIN(a,b) (((a)<(b))?(a):(b)) 229 #define MAX(a,b) (((a)>(b))?(a):(b)) 230 231 /* s/w representation of the RAL/RAH regs */ 232 struct eth_uni { 233 int eu_valid; 234 int eu_addrsel; 235 struct ether_addr eu_eth; 236 }; 237 238 239 struct e82545_softc { 240 struct pci_devinst *esc_pi; 241 struct vmctx *esc_ctx; 242 struct mevent *esc_mevp; 243 struct mevent *esc_mevpitr; 244 pthread_mutex_t esc_mtx; 245 struct ether_addr esc_mac; 246 int esc_tapfd; 247 248 /* General */ 249 uint32_t esc_CTRL; /* x0000 device ctl */ 250 uint32_t esc_FCAL; /* x0028 flow ctl addr lo */ 251 uint32_t esc_FCAH; /* x002C flow ctl addr hi */ 252 uint32_t esc_FCT; /* x0030 flow ctl type */ 253 uint32_t esc_VET; /* x0038 VLAN eth type */ 254 uint32_t esc_FCTTV; /* x0170 flow ctl tx timer */ 255 uint32_t esc_LEDCTL; /* x0E00 LED control */ 256 uint32_t esc_PBA; /* x1000 pkt buffer allocation */ 257 258 /* Interrupt control */ 259 int esc_irq_asserted; 260 uint32_t esc_ICR; /* x00C0 cause read/clear */ 261 uint32_t esc_ITR; /* x00C4 intr throttling */ 262 uint32_t esc_ICS; /* x00C8 cause set */ 263 uint32_t esc_IMS; /* x00D0 mask set/read */ 264 uint32_t esc_IMC; /* x00D8 mask clear */ 265 266 /* Transmit */ 267 union e1000_tx_udesc *esc_txdesc; 268 struct e1000_context_desc esc_txctx; 269 pthread_t esc_tx_tid; 270 pthread_cond_t esc_tx_cond; 271 int esc_tx_enabled; 272 int esc_tx_active; 273 uint32_t esc_TXCW; /* x0178 transmit config */ 274 uint32_t esc_TCTL; /* x0400 transmit ctl */ 275 uint32_t esc_TIPG; /* x0410 inter-packet gap */ 276 uint16_t esc_AIT; /* x0458 Adaptive Interframe Throttle */ 277 uint64_t esc_tdba; /* verified 64-bit desc table addr */ 278 uint32_t esc_TDBAL; /* x3800 desc table addr, low bits */ 279 uint32_t esc_TDBAH; /* x3804 desc table addr, hi 32-bits */ 280 uint32_t esc_TDLEN; /* x3808 # descriptors in bytes */ 281 uint16_t esc_TDH; /* x3810 desc table head idx */ 282 uint16_t esc_TDHr; /* internal read version of TDH */ 283 uint16_t esc_TDT; /* x3818 desc table tail idx */ 284 uint32_t esc_TIDV; /* x3820 intr delay */ 285 uint32_t esc_TXDCTL; /* x3828 desc control */ 286 uint32_t esc_TADV; /* x382C intr absolute delay */ 287 288 /* L2 frame acceptance */ 289 struct eth_uni esc_uni[16]; /* 16 x unicast MAC addresses */ 290 uint32_t esc_fmcast[128]; /* Multicast filter bit-match */ 291 uint32_t esc_fvlan[128]; /* VLAN 4096-bit filter */ 292 293 /* Receive */ 294 struct e1000_rx_desc *esc_rxdesc; 295 pthread_cond_t esc_rx_cond; 296 int esc_rx_enabled; 297 int esc_rx_active; 298 int esc_rx_loopback; 299 uint32_t esc_RCTL; /* x0100 receive ctl */ 300 uint32_t esc_FCRTL; /* x2160 flow cntl thresh, low */ 301 uint32_t esc_FCRTH; /* x2168 flow cntl thresh, hi */ 302 uint64_t esc_rdba; /* verified 64-bit desc table addr */ 303 uint32_t esc_RDBAL; /* x2800 desc table addr, low bits */ 304 uint32_t esc_RDBAH; /* x2804 desc table addr, hi 32-bits*/ 305 uint32_t esc_RDLEN; /* x2808 #descriptors */ 306 uint16_t esc_RDH; /* x2810 desc table head idx */ 307 uint16_t esc_RDT; /* x2818 desc table tail idx */ 308 uint32_t esc_RDTR; /* x2820 intr delay */ 309 uint32_t esc_RXDCTL; /* x2828 desc control */ 310 uint32_t esc_RADV; /* x282C intr absolute delay */ 311 uint32_t esc_RSRPD; /* x2C00 recv small packet detect */ 312 uint32_t esc_RXCSUM; /* x5000 receive cksum ctl */ 313 314 /* IO Port register access */ 315 uint32_t io_addr; 316 317 /* Shadow copy of MDIC */ 318 uint32_t mdi_control; 319 /* Shadow copy of EECD */ 320 uint32_t eeprom_control; 321 /* Latest NVM in/out */ 322 uint16_t nvm_data; 323 uint16_t nvm_opaddr; 324 /* stats */ 325 uint32_t missed_pkt_count; /* dropped for no room in rx queue */ 326 uint32_t pkt_rx_by_size[6]; 327 uint32_t pkt_tx_by_size[6]; 328 uint32_t good_pkt_rx_count; 329 uint32_t bcast_pkt_rx_count; 330 uint32_t mcast_pkt_rx_count; 331 uint32_t good_pkt_tx_count; 332 uint32_t bcast_pkt_tx_count; 333 uint32_t mcast_pkt_tx_count; 334 uint32_t oversize_rx_count; 335 uint32_t tso_tx_count; 336 uint64_t good_octets_rx; 337 uint64_t good_octets_tx; 338 uint64_t missed_octets; /* counts missed and oversized */ 339 340 uint8_t nvm_bits:6; /* number of bits remaining in/out */ 341 uint8_t nvm_mode:2; 342 #define E82545_NVM_MODE_OPADDR 0x0 343 #define E82545_NVM_MODE_DATAIN 0x1 344 #define E82545_NVM_MODE_DATAOUT 0x2 345 /* EEPROM data */ 346 uint16_t eeprom_data[E82545_NVM_EEPROM_SIZE]; 347 }; 348 349 static void e82545_reset(struct e82545_softc *sc, int dev); 350 static void e82545_rx_enable(struct e82545_softc *sc); 351 static void e82545_rx_disable(struct e82545_softc *sc); 352 static void e82545_tap_callback(int fd, enum ev_type type, void *param); 353 static void e82545_tx_start(struct e82545_softc *sc); 354 static void e82545_tx_enable(struct e82545_softc *sc); 355 static void e82545_tx_disable(struct e82545_softc *sc); 356 357 static inline int 358 e82545_size_stat_index(uint32_t size) 359 { 360 if (size <= 64) { 361 return 0; 362 } else if (size >= 1024) { 363 return 5; 364 } else { 365 /* should be 1-4 */ 366 return (ffs(size) - 6); 367 } 368 } 369 370 static void 371 e82545_init_eeprom(struct e82545_softc *sc) 372 { 373 uint16_t checksum, i; 374 375 /* mac addr */ 376 sc->eeprom_data[NVM_MAC_ADDR] = ((uint16_t)sc->esc_mac.octet[0]) | 377 (((uint16_t)sc->esc_mac.octet[1]) << 8); 378 sc->eeprom_data[NVM_MAC_ADDR+1] = ((uint16_t)sc->esc_mac.octet[2]) | 379 (((uint16_t)sc->esc_mac.octet[3]) << 8); 380 sc->eeprom_data[NVM_MAC_ADDR+2] = ((uint16_t)sc->esc_mac.octet[4]) | 381 (((uint16_t)sc->esc_mac.octet[5]) << 8); 382 383 /* pci ids */ 384 sc->eeprom_data[NVM_SUB_DEV_ID] = E82545_SUBDEV_ID; 385 sc->eeprom_data[NVM_SUB_VEN_ID] = E82545_VENDOR_ID_INTEL; 386 sc->eeprom_data[NVM_DEV_ID] = E82545_DEV_ID_82545EM_COPPER; 387 sc->eeprom_data[NVM_VEN_ID] = E82545_VENDOR_ID_INTEL; 388 389 /* fill in the checksum */ 390 checksum = 0; 391 for (i = 0; i < NVM_CHECKSUM_REG; i++) { 392 checksum += sc->eeprom_data[i]; 393 } 394 checksum = NVM_SUM - checksum; 395 sc->eeprom_data[NVM_CHECKSUM_REG] = checksum; 396 DPRINTF("eeprom checksum: 0x%x\r\n", checksum); 397 } 398 399 static void 400 e82545_write_mdi(struct e82545_softc *sc, uint8_t reg_addr, 401 uint8_t phy_addr, uint32_t data) 402 { 403 DPRINTF("Write mdi reg:0x%x phy:0x%x data: 0x%x\r\n", reg_addr, phy_addr, data); 404 } 405 406 static uint32_t 407 e82545_read_mdi(struct e82545_softc *sc, uint8_t reg_addr, 408 uint8_t phy_addr) 409 { 410 //DPRINTF("Read mdi reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); 411 switch (reg_addr) { 412 case PHY_STATUS: 413 return (MII_SR_LINK_STATUS | MII_SR_AUTONEG_CAPS | 414 MII_SR_AUTONEG_COMPLETE); 415 case PHY_AUTONEG_ADV: 416 return NWAY_AR_SELECTOR_FIELD; 417 case PHY_LP_ABILITY: 418 return 0; 419 case PHY_1000T_STATUS: 420 return (SR_1000T_LP_FD_CAPS | SR_1000T_REMOTE_RX_STATUS | 421 SR_1000T_LOCAL_RX_STATUS); 422 case PHY_ID1: 423 return (M88E1011_I_PHY_ID >> 16) & 0xFFFF; 424 case PHY_ID2: 425 return (M88E1011_I_PHY_ID | E82545_REVISION_4) & 0xFFFF; 426 default: 427 DPRINTF("Unknown mdi read reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); 428 return 0; 429 } 430 /* not reached */ 431 } 432 433 static void 434 e82545_eecd_strobe(struct e82545_softc *sc) 435 { 436 /* Microwire state machine */ 437 /* 438 DPRINTF("eeprom state machine srtobe " 439 "0x%x 0x%x 0x%x 0x%x\r\n", 440 sc->nvm_mode, sc->nvm_bits, 441 sc->nvm_opaddr, sc->nvm_data);*/ 442 443 if (sc->nvm_bits == 0) { 444 DPRINTF("eeprom state machine not expecting data! " 445 "0x%x 0x%x 0x%x 0x%x\r\n", 446 sc->nvm_mode, sc->nvm_bits, 447 sc->nvm_opaddr, sc->nvm_data); 448 return; 449 } 450 sc->nvm_bits--; 451 if (sc->nvm_mode == E82545_NVM_MODE_DATAOUT) { 452 /* shifting out */ 453 if (sc->nvm_data & 0x8000) { 454 sc->eeprom_control |= E1000_EECD_DO; 455 } else { 456 sc->eeprom_control &= ~E1000_EECD_DO; 457 } 458 sc->nvm_data <<= 1; 459 if (sc->nvm_bits == 0) { 460 /* read done, back to opcode mode. */ 461 sc->nvm_opaddr = 0; 462 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 463 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 464 } 465 } else if (sc->nvm_mode == E82545_NVM_MODE_DATAIN) { 466 /* shifting in */ 467 sc->nvm_data <<= 1; 468 if (sc->eeprom_control & E1000_EECD_DI) { 469 sc->nvm_data |= 1; 470 } 471 if (sc->nvm_bits == 0) { 472 /* eeprom write */ 473 uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; 474 uint16_t addr = sc->nvm_opaddr & E82545_NVM_ADDR_MASK; 475 if (op != E82545_NVM_OPCODE_WRITE) { 476 DPRINTF("Illegal eeprom write op 0x%x\r\n", 477 sc->nvm_opaddr); 478 } else if (addr >= E82545_NVM_EEPROM_SIZE) { 479 DPRINTF("Illegal eeprom write addr 0x%x\r\n", 480 sc->nvm_opaddr); 481 } else { 482 DPRINTF("eeprom write eeprom[0x%x] = 0x%x\r\n", 483 addr, sc->nvm_data); 484 sc->eeprom_data[addr] = sc->nvm_data; 485 } 486 /* back to opcode mode */ 487 sc->nvm_opaddr = 0; 488 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 489 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 490 } 491 } else if (sc->nvm_mode == E82545_NVM_MODE_OPADDR) { 492 sc->nvm_opaddr <<= 1; 493 if (sc->eeprom_control & E1000_EECD_DI) { 494 sc->nvm_opaddr |= 1; 495 } 496 if (sc->nvm_bits == 0) { 497 uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; 498 switch (op) { 499 case E82545_NVM_OPCODE_EWEN: 500 DPRINTF("eeprom write enable: 0x%x\r\n", 501 sc->nvm_opaddr); 502 /* back to opcode mode */ 503 sc->nvm_opaddr = 0; 504 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 505 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 506 break; 507 case E82545_NVM_OPCODE_READ: 508 { 509 uint16_t addr = sc->nvm_opaddr & 510 E82545_NVM_ADDR_MASK; 511 sc->nvm_mode = E82545_NVM_MODE_DATAOUT; 512 sc->nvm_bits = E82545_NVM_DATA_BITS; 513 if (addr < E82545_NVM_EEPROM_SIZE) { 514 sc->nvm_data = sc->eeprom_data[addr]; 515 DPRINTF("eeprom read: eeprom[0x%x] = 0x%x\r\n", 516 addr, sc->nvm_data); 517 } else { 518 DPRINTF("eeprom illegal read: 0x%x\r\n", 519 sc->nvm_opaddr); 520 sc->nvm_data = 0; 521 } 522 break; 523 } 524 case E82545_NVM_OPCODE_WRITE: 525 sc->nvm_mode = E82545_NVM_MODE_DATAIN; 526 sc->nvm_bits = E82545_NVM_DATA_BITS; 527 sc->nvm_data = 0; 528 break; 529 default: 530 DPRINTF("eeprom unknown op: 0x%x\r\r", 531 sc->nvm_opaddr); 532 /* back to opcode mode */ 533 sc->nvm_opaddr = 0; 534 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 535 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 536 } 537 } 538 } else { 539 DPRINTF("eeprom state machine wrong state! " 540 "0x%x 0x%x 0x%x 0x%x\r\n", 541 sc->nvm_mode, sc->nvm_bits, 542 sc->nvm_opaddr, sc->nvm_data); 543 } 544 } 545 546 static void 547 e82545_itr_callback(int fd, enum ev_type type, void *param) 548 { 549 uint32_t new; 550 struct e82545_softc *sc = param; 551 552 pthread_mutex_lock(&sc->esc_mtx); 553 new = sc->esc_ICR & sc->esc_IMS; 554 if (new && !sc->esc_irq_asserted) { 555 DPRINTF("itr callback: lintr assert %x\r\n", new); 556 sc->esc_irq_asserted = 1; 557 pci_lintr_assert(sc->esc_pi); 558 } else { 559 mevent_delete(sc->esc_mevpitr); 560 sc->esc_mevpitr = NULL; 561 } 562 pthread_mutex_unlock(&sc->esc_mtx); 563 } 564 565 static void 566 e82545_icr_assert(struct e82545_softc *sc, uint32_t bits) 567 { 568 uint32_t new; 569 570 DPRINTF("icr assert: 0x%x\r\n", bits); 571 572 /* 573 * An interrupt is only generated if bits are set that 574 * aren't already in the ICR, these bits are unmasked, 575 * and there isn't an interrupt already pending. 576 */ 577 new = bits & ~sc->esc_ICR & sc->esc_IMS; 578 sc->esc_ICR |= bits; 579 580 if (new == 0) { 581 DPRINTF("icr assert: masked %x, ims %x\r\n", new, sc->esc_IMS); 582 } else if (sc->esc_mevpitr != NULL) { 583 DPRINTF("icr assert: throttled %x, ims %x\r\n", new, sc->esc_IMS); 584 } else if (!sc->esc_irq_asserted) { 585 DPRINTF("icr assert: lintr assert %x\r\n", new); 586 sc->esc_irq_asserted = 1; 587 pci_lintr_assert(sc->esc_pi); 588 if (sc->esc_ITR != 0) { 589 sc->esc_mevpitr = mevent_add( 590 (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ 591 EVF_TIMER, e82545_itr_callback, sc); 592 } 593 } 594 } 595 596 static void 597 e82545_ims_change(struct e82545_softc *sc, uint32_t bits) 598 { 599 uint32_t new; 600 601 /* 602 * Changing the mask may allow previously asserted 603 * but masked interrupt requests to generate an interrupt. 604 */ 605 new = bits & sc->esc_ICR & ~sc->esc_IMS; 606 sc->esc_IMS |= bits; 607 608 if (new == 0) { 609 DPRINTF("ims change: masked %x, ims %x\r\n", new, sc->esc_IMS); 610 } else if (sc->esc_mevpitr != NULL) { 611 DPRINTF("ims change: throttled %x, ims %x\r\n", new, sc->esc_IMS); 612 } else if (!sc->esc_irq_asserted) { 613 DPRINTF("ims change: lintr assert %x\n\r", new); 614 sc->esc_irq_asserted = 1; 615 pci_lintr_assert(sc->esc_pi); 616 if (sc->esc_ITR != 0) { 617 sc->esc_mevpitr = mevent_add( 618 (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ 619 EVF_TIMER, e82545_itr_callback, sc); 620 } 621 } 622 } 623 624 static void 625 e82545_icr_deassert(struct e82545_softc *sc, uint32_t bits) 626 { 627 628 DPRINTF("icr deassert: 0x%x\r\n", bits); 629 sc->esc_ICR &= ~bits; 630 631 /* 632 * If there are no longer any interrupt sources and there 633 * was an asserted interrupt, clear it 634 */ 635 if (sc->esc_irq_asserted && !(sc->esc_ICR & sc->esc_IMS)) { 636 DPRINTF("icr deassert: lintr deassert %x\r\n", bits); 637 pci_lintr_deassert(sc->esc_pi); 638 sc->esc_irq_asserted = 0; 639 } 640 } 641 642 static void 643 e82545_intr_write(struct e82545_softc *sc, uint32_t offset, uint32_t value) 644 { 645 646 DPRINTF("intr_write: off %x, val %x\n\r", offset, value); 647 648 switch (offset) { 649 case E1000_ICR: 650 e82545_icr_deassert(sc, value); 651 break; 652 case E1000_ITR: 653 sc->esc_ITR = value; 654 break; 655 case E1000_ICS: 656 sc->esc_ICS = value; /* not used: store for debug */ 657 e82545_icr_assert(sc, value); 658 break; 659 case E1000_IMS: 660 e82545_ims_change(sc, value); 661 break; 662 case E1000_IMC: 663 sc->esc_IMC = value; /* for debug */ 664 sc->esc_IMS &= ~value; 665 // XXX clear interrupts if all ICR bits now masked 666 // and interrupt was pending ? 667 break; 668 default: 669 break; 670 } 671 } 672 673 static uint32_t 674 e82545_intr_read(struct e82545_softc *sc, uint32_t offset) 675 { 676 uint32_t retval; 677 678 retval = 0; 679 680 DPRINTF("intr_read: off %x\n\r", offset); 681 682 switch (offset) { 683 case E1000_ICR: 684 retval = sc->esc_ICR; 685 sc->esc_ICR = 0; 686 e82545_icr_deassert(sc, ~0); 687 break; 688 case E1000_ITR: 689 retval = sc->esc_ITR; 690 break; 691 case E1000_ICS: 692 /* write-only register */ 693 break; 694 case E1000_IMS: 695 retval = sc->esc_IMS; 696 break; 697 case E1000_IMC: 698 /* write-only register */ 699 break; 700 default: 701 break; 702 } 703 704 return (retval); 705 } 706 707 static void 708 e82545_devctl(struct e82545_softc *sc, uint32_t val) 709 { 710 711 sc->esc_CTRL = val & ~E1000_CTRL_RST; 712 713 if (val & E1000_CTRL_RST) { 714 DPRINTF("e1k: s/w reset, ctl %x\n", val); 715 e82545_reset(sc, 1); 716 } 717 /* XXX check for phy reset ? */ 718 } 719 720 static void 721 e82545_rx_update_rdba(struct e82545_softc *sc) 722 { 723 724 /* XXX verify desc base/len within phys mem range */ 725 sc->esc_rdba = (uint64_t)sc->esc_RDBAH << 32 | 726 sc->esc_RDBAL; 727 728 /* Cache host mapping of guest descriptor array */ 729 sc->esc_rxdesc = paddr_guest2host(sc->esc_ctx, 730 sc->esc_rdba, sc->esc_RDLEN); 731 } 732 733 static void 734 e82545_rx_ctl(struct e82545_softc *sc, uint32_t val) 735 { 736 int on; 737 738 on = ((val & E1000_RCTL_EN) == E1000_RCTL_EN); 739 740 /* Save RCTL after stripping reserved bits 31:27,24,21,14,11:10,0 */ 741 sc->esc_RCTL = val & ~0xF9204c01; 742 743 DPRINTF("rx_ctl - %s RCTL %x, val %x\n", 744 on ? "on" : "off", sc->esc_RCTL, val); 745 746 /* state change requested */ 747 if (on != sc->esc_rx_enabled) { 748 if (on) { 749 /* Catch disallowed/unimplemented settings */ 750 //assert(!(val & E1000_RCTL_LBM_TCVR)); 751 752 if (sc->esc_RCTL & E1000_RCTL_LBM_TCVR) { 753 sc->esc_rx_loopback = 1; 754 } else { 755 sc->esc_rx_loopback = 0; 756 } 757 758 e82545_rx_update_rdba(sc); 759 e82545_rx_enable(sc); 760 } else { 761 e82545_rx_disable(sc); 762 sc->esc_rx_loopback = 0; 763 sc->esc_rdba = 0; 764 sc->esc_rxdesc = NULL; 765 } 766 } 767 } 768 769 static void 770 e82545_tx_update_tdba(struct e82545_softc *sc) 771 { 772 773 /* XXX verify desc base/len within phys mem range */ 774 sc->esc_tdba = (uint64_t)sc->esc_TDBAH << 32 | sc->esc_TDBAL; 775 776 /* Cache host mapping of guest descriptor array */ 777 sc->esc_txdesc = paddr_guest2host(sc->esc_ctx, sc->esc_tdba, 778 sc->esc_TDLEN); 779 } 780 781 static void 782 e82545_tx_ctl(struct e82545_softc *sc, uint32_t val) 783 { 784 int on; 785 786 on = ((val & E1000_TCTL_EN) == E1000_TCTL_EN); 787 788 /* ignore TCTL_EN settings that don't change state */ 789 if (on == sc->esc_tx_enabled) 790 return; 791 792 if (on) { 793 e82545_tx_update_tdba(sc); 794 e82545_tx_enable(sc); 795 } else { 796 e82545_tx_disable(sc); 797 sc->esc_tdba = 0; 798 sc->esc_txdesc = NULL; 799 } 800 801 /* Save TCTL value after stripping reserved bits 31:25,23,2,0 */ 802 sc->esc_TCTL = val & ~0xFE800005; 803 } 804 805 int 806 e82545_bufsz(uint32_t rctl) 807 { 808 809 switch (rctl & (E1000_RCTL_BSEX | E1000_RCTL_SZ_256)) { 810 case (E1000_RCTL_SZ_2048): return (2048); 811 case (E1000_RCTL_SZ_1024): return (1024); 812 case (E1000_RCTL_SZ_512): return (512); 813 case (E1000_RCTL_SZ_256): return (256); 814 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_16384): return (16384); 815 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_8192): return (8192); 816 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_4096): return (4096); 817 } 818 return (256); /* Forbidden value. */ 819 } 820 821 static uint8_t dummybuf[2048]; 822 823 /* XXX one packet at a time until this is debugged */ 824 static void 825 e82545_tap_callback(int fd, enum ev_type type, void *param) 826 { 827 struct e82545_softc *sc = param; 828 struct e1000_rx_desc *rxd; 829 struct iovec vec[64]; 830 int left, len, lim, maxpktsz, maxpktdesc, bufsz, i, n, size; 831 uint32_t cause = 0; 832 uint16_t *tp, tag, head; 833 834 pthread_mutex_lock(&sc->esc_mtx); 835 DPRINTF("rx_run: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); 836 837 if (!sc->esc_rx_enabled || sc->esc_rx_loopback) { 838 DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped\r\n", 839 sc->esc_rx_enabled, sc->esc_rx_loopback); 840 while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { 841 } 842 goto done1; 843 } 844 bufsz = e82545_bufsz(sc->esc_RCTL); 845 maxpktsz = (sc->esc_RCTL & E1000_RCTL_LPE) ? 16384 : 1522; 846 maxpktdesc = (maxpktsz + bufsz - 1) / bufsz; 847 size = sc->esc_RDLEN / 16; 848 head = sc->esc_RDH; 849 left = (size + sc->esc_RDT - head) % size; 850 if (left < maxpktdesc) { 851 DPRINTF("rx overflow (%d < %d) -- packet(s) dropped\r\n", 852 left, maxpktdesc); 853 while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { 854 } 855 goto done1; 856 } 857 858 sc->esc_rx_active = 1; 859 pthread_mutex_unlock(&sc->esc_mtx); 860 861 for (lim = size / 4; lim > 0 && left >= maxpktdesc; lim -= n) { 862 863 /* Grab rx descriptor pointed to by the head pointer */ 864 for (i = 0; i < maxpktdesc; i++) { 865 rxd = &sc->esc_rxdesc[(head + i) % size]; 866 vec[i].iov_base = paddr_guest2host(sc->esc_ctx, 867 rxd->buffer_addr, bufsz); 868 vec[i].iov_len = bufsz; 869 } 870 len = readv(sc->esc_tapfd, vec, maxpktdesc); 871 if (len <= 0) { 872 DPRINTF("tap: readv() returned %d\n", len); 873 goto done; 874 } 875 876 /* 877 * Adjust the packet length based on whether the CRC needs 878 * to be stripped or if the packet is less than the minimum 879 * eth packet size. 880 */ 881 if (len < ETHER_MIN_LEN - ETHER_CRC_LEN) 882 len = ETHER_MIN_LEN - ETHER_CRC_LEN; 883 if (!(sc->esc_RCTL & E1000_RCTL_SECRC)) 884 len += ETHER_CRC_LEN; 885 n = (len + bufsz - 1) / bufsz; 886 887 DPRINTF("packet read %d bytes, %d segs, head %d\r\n", 888 len, n, head); 889 890 /* Apply VLAN filter. */ 891 tp = (uint16_t *)vec[0].iov_base + 6; 892 if ((sc->esc_RCTL & E1000_RCTL_VFE) && 893 (ntohs(tp[0]) == sc->esc_VET)) { 894 tag = ntohs(tp[1]) & 0x0fff; 895 if ((sc->esc_fvlan[tag >> 5] & 896 (1 << (tag & 0x1f))) != 0) { 897 DPRINTF("known VLAN %d\r\n", tag); 898 } else { 899 DPRINTF("unknown VLAN %d\r\n", tag); 900 n = 0; 901 continue; 902 } 903 } 904 905 /* Update all consumed descriptors. */ 906 for (i = 0; i < n - 1; i++) { 907 rxd = &sc->esc_rxdesc[(head + i) % size]; 908 rxd->length = bufsz; 909 rxd->csum = 0; 910 rxd->errors = 0; 911 rxd->special = 0; 912 rxd->status = E1000_RXD_STAT_DD; 913 } 914 rxd = &sc->esc_rxdesc[(head + i) % size]; 915 rxd->length = len % bufsz; 916 rxd->csum = 0; 917 rxd->errors = 0; 918 rxd->special = 0; 919 /* XXX signal no checksum for now */ 920 rxd->status = E1000_RXD_STAT_PIF | E1000_RXD_STAT_IXSM | 921 E1000_RXD_STAT_EOP | E1000_RXD_STAT_DD; 922 923 /* Schedule receive interrupts. */ 924 if (len <= sc->esc_RSRPD) { 925 cause |= E1000_ICR_SRPD | E1000_ICR_RXT0; 926 } else { 927 /* XXX: RDRT and RADV timers should be here. */ 928 cause |= E1000_ICR_RXT0; 929 } 930 931 head = (head + n) % size; 932 left -= n; 933 } 934 935 done: 936 pthread_mutex_lock(&sc->esc_mtx); 937 sc->esc_rx_active = 0; 938 if (sc->esc_rx_enabled == 0) 939 pthread_cond_signal(&sc->esc_rx_cond); 940 941 sc->esc_RDH = head; 942 /* Respect E1000_RCTL_RDMTS */ 943 left = (size + sc->esc_RDT - head) % size; 944 if (left < (size >> (((sc->esc_RCTL >> 8) & 3) + 1))) 945 cause |= E1000_ICR_RXDMT0; 946 /* Assert all accumulated interrupts. */ 947 if (cause != 0) 948 e82545_icr_assert(sc, cause); 949 done1: 950 DPRINTF("rx_run done: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); 951 pthread_mutex_unlock(&sc->esc_mtx); 952 } 953 954 static uint16_t 955 e82545_carry(uint32_t sum) 956 { 957 958 sum = (sum & 0xFFFF) + (sum >> 16); 959 if (sum > 0xFFFF) 960 sum -= 0xFFFF; 961 return (sum); 962 } 963 964 static uint16_t 965 e82545_buf_checksum(uint8_t *buf, int len) 966 { 967 int i; 968 uint32_t sum = 0; 969 970 /* Checksum all the pairs of bytes first... */ 971 for (i = 0; i < (len & ~1U); i += 2) 972 sum += *((u_int16_t *)(buf + i)); 973 974 /* 975 * If there's a single byte left over, checksum it, too. 976 * Network byte order is big-endian, so the remaining byte is 977 * the high byte. 978 */ 979 if (i < len) 980 sum += htons(buf[i] << 8); 981 982 return (e82545_carry(sum)); 983 } 984 985 static uint16_t 986 e82545_iov_checksum(struct iovec *iov, int iovcnt, int off, int len) 987 { 988 int now, odd; 989 uint32_t sum = 0, s; 990 991 /* Skip completely unneeded vectors. */ 992 while (iovcnt > 0 && iov->iov_len <= off && off > 0) { 993 off -= iov->iov_len; 994 iov++; 995 iovcnt--; 996 } 997 998 /* Calculate checksum of requested range. */ 999 odd = 0; 1000 while (len > 0 && iovcnt > 0) { 1001 now = MIN(len, iov->iov_len - off); 1002 s = e82545_buf_checksum(iov->iov_base + off, now); 1003 sum += odd ? (s << 8) : s; 1004 odd ^= (now & 1); 1005 len -= now; 1006 off = 0; 1007 iov++; 1008 iovcnt--; 1009 } 1010 1011 return (e82545_carry(sum)); 1012 } 1013 1014 /* 1015 * Return the transmit descriptor type. 1016 */ 1017 int 1018 e82545_txdesc_type(uint32_t lower) 1019 { 1020 int type; 1021 1022 type = 0; 1023 1024 if (lower & E1000_TXD_CMD_DEXT) 1025 type = lower & E1000_TXD_MASK; 1026 1027 return (type); 1028 } 1029 1030 static void 1031 e82545_transmit_checksum(struct iovec *iov, int iovcnt, struct ck_info *ck) 1032 { 1033 uint16_t cksum; 1034 int cklen; 1035 1036 DPRINTF("tx cksum: iovcnt/s/off/len %d/%d/%d/%d\r\n", 1037 iovcnt, ck->ck_start, ck->ck_off, ck->ck_len); 1038 cklen = ck->ck_len ? ck->ck_len - ck->ck_start + 1 : INT_MAX; 1039 cksum = e82545_iov_checksum(iov, iovcnt, ck->ck_start, cklen); 1040 *(uint16_t *)((uint8_t *)iov[0].iov_base + ck->ck_off) = ~cksum; 1041 } 1042 1043 static void 1044 e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt) 1045 { 1046 1047 if (sc->esc_tapfd == -1) 1048 return; 1049 1050 (void) writev(sc->esc_tapfd, iov, iovcnt); 1051 } 1052 1053 static void 1054 e82545_transmit_done(struct e82545_softc *sc, uint16_t head, uint16_t tail, 1055 uint16_t dsize, int *tdwb) 1056 { 1057 union e1000_tx_udesc *dsc; 1058 1059 for ( ; head != tail; head = (head + 1) % dsize) { 1060 dsc = &sc->esc_txdesc[head]; 1061 if (dsc->td.lower.data & E1000_TXD_CMD_RS) { 1062 dsc->td.upper.data |= E1000_TXD_STAT_DD; 1063 *tdwb = 1; 1064 } 1065 } 1066 } 1067 1068 static int 1069 e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, 1070 uint16_t dsize, uint16_t *rhead, int *tdwb) 1071 { 1072 uint8_t *hdr, *hdrp; 1073 struct iovec iovb[I82545_MAX_TXSEGS + 2]; 1074 struct iovec tiov[I82545_MAX_TXSEGS + 2]; 1075 struct e1000_context_desc *cd; 1076 struct ck_info ckinfo[2]; 1077 struct iovec *iov; 1078 union e1000_tx_udesc *dsc; 1079 int desc, dtype, len, ntype, iovcnt, tlen, hdrlen, vlen, tcp, tso; 1080 int mss, paylen, seg, tiovcnt, left, now, nleft, nnow, pv, pvoff; 1081 uint32_t tcpsum, tcpseq; 1082 uint16_t ipcs, tcpcs, ipid, ohead; 1083 1084 ckinfo[0].ck_valid = ckinfo[1].ck_valid = 0; 1085 iovcnt = 0; 1086 tlen = 0; 1087 ntype = 0; 1088 tso = 0; 1089 ohead = head; 1090 1091 /* iovb[0/1] may be used for writable copy of headers. */ 1092 iov = &iovb[2]; 1093 1094 for (desc = 0; ; desc++, head = (head + 1) % dsize) { 1095 if (head == tail) { 1096 *rhead = head; 1097 return (0); 1098 } 1099 dsc = &sc->esc_txdesc[head]; 1100 dtype = e82545_txdesc_type(dsc->td.lower.data); 1101 1102 if (desc == 0) { 1103 switch (dtype) { 1104 case E1000_TXD_TYP_C: 1105 DPRINTF("tx ctxt desc idx %d: %016jx " 1106 "%08x%08x\r\n", 1107 head, dsc->td.buffer_addr, 1108 dsc->td.upper.data, dsc->td.lower.data); 1109 /* Save context and return */ 1110 sc->esc_txctx = dsc->cd; 1111 goto done; 1112 case E1000_TXD_TYP_L: 1113 DPRINTF("tx legacy desc idx %d: %08x%08x\r\n", 1114 head, dsc->td.upper.data, dsc->td.lower.data); 1115 /* 1116 * legacy cksum start valid in first descriptor 1117 */ 1118 ntype = dtype; 1119 ckinfo[0].ck_start = dsc->td.upper.fields.css; 1120 break; 1121 case E1000_TXD_TYP_D: 1122 DPRINTF("tx data desc idx %d: %08x%08x\r\n", 1123 head, dsc->td.upper.data, dsc->td.lower.data); 1124 ntype = dtype; 1125 break; 1126 default: 1127 break; 1128 } 1129 } else { 1130 /* Descriptor type must be consistent */ 1131 assert(dtype == ntype); 1132 DPRINTF("tx next desc idx %d: %08x%08x\r\n", 1133 head, dsc->td.upper.data, dsc->td.lower.data); 1134 } 1135 1136 len = (dtype == E1000_TXD_TYP_L) ? dsc->td.lower.flags.length : 1137 dsc->dd.lower.data & 0xFFFFF; 1138 1139 if (len > 0) { 1140 /* Strip checksum supplied by guest. */ 1141 if ((dsc->td.lower.data & E1000_TXD_CMD_EOP) != 0 && 1142 (dsc->td.lower.data & E1000_TXD_CMD_IFCS) == 0) 1143 len -= 2; 1144 tlen += len; 1145 if (iovcnt < I82545_MAX_TXSEGS) { 1146 iov[iovcnt].iov_base = paddr_guest2host( 1147 sc->esc_ctx, dsc->td.buffer_addr, len); 1148 iov[iovcnt].iov_len = len; 1149 } 1150 iovcnt++; 1151 } 1152 1153 /* 1154 * Pull out info that is valid in the final descriptor 1155 * and exit descriptor loop. 1156 */ 1157 if (dsc->td.lower.data & E1000_TXD_CMD_EOP) { 1158 if (dtype == E1000_TXD_TYP_L) { 1159 if (dsc->td.lower.data & E1000_TXD_CMD_IC) { 1160 ckinfo[0].ck_valid = 1; 1161 ckinfo[0].ck_off = 1162 dsc->td.lower.flags.cso; 1163 ckinfo[0].ck_len = 0; 1164 } 1165 } else { 1166 cd = &sc->esc_txctx; 1167 if (dsc->dd.lower.data & E1000_TXD_CMD_TSE) 1168 tso = 1; 1169 if (dsc->dd.upper.fields.popts & 1170 E1000_TXD_POPTS_IXSM) 1171 ckinfo[0].ck_valid = 1; 1172 if (dsc->dd.upper.fields.popts & 1173 E1000_TXD_POPTS_IXSM || tso) { 1174 ckinfo[0].ck_start = 1175 cd->lower_setup.ip_fields.ipcss; 1176 ckinfo[0].ck_off = 1177 cd->lower_setup.ip_fields.ipcso; 1178 ckinfo[0].ck_len = 1179 cd->lower_setup.ip_fields.ipcse; 1180 } 1181 if (dsc->dd.upper.fields.popts & 1182 E1000_TXD_POPTS_TXSM) 1183 ckinfo[1].ck_valid = 1; 1184 if (dsc->dd.upper.fields.popts & 1185 E1000_TXD_POPTS_TXSM || tso) { 1186 ckinfo[1].ck_start = 1187 cd->upper_setup.tcp_fields.tucss; 1188 ckinfo[1].ck_off = 1189 cd->upper_setup.tcp_fields.tucso; 1190 ckinfo[1].ck_len = 1191 cd->upper_setup.tcp_fields.tucse; 1192 } 1193 } 1194 break; 1195 } 1196 } 1197 1198 if (iovcnt > I82545_MAX_TXSEGS) { 1199 WPRINTF("tx too many descriptors (%d > %d) -- dropped\r\n", 1200 iovcnt, I82545_MAX_TXSEGS); 1201 goto done; 1202 } 1203 1204 hdrlen = vlen = 0; 1205 /* Estimate writable space for VLAN header insertion. */ 1206 if ((sc->esc_CTRL & E1000_CTRL_VME) && 1207 (dsc->td.lower.data & E1000_TXD_CMD_VLE)) { 1208 hdrlen = ETHER_ADDR_LEN*2; 1209 vlen = ETHER_VLAN_ENCAP_LEN; 1210 } 1211 if (!tso) { 1212 /* Estimate required writable space for checksums. */ 1213 if (ckinfo[0].ck_valid) 1214 hdrlen = MAX(hdrlen, ckinfo[0].ck_off + 2); 1215 if (ckinfo[1].ck_valid) 1216 hdrlen = MAX(hdrlen, ckinfo[1].ck_off + 2); 1217 /* Round up writable space to the first vector. */ 1218 if (hdrlen != 0 && iov[0].iov_len > hdrlen && 1219 iov[0].iov_len < hdrlen + 100) 1220 hdrlen = iov[0].iov_len; 1221 } else { 1222 /* In case of TSO header length provided by software. */ 1223 hdrlen = sc->esc_txctx.tcp_seg_setup.fields.hdr_len; 1224 } 1225 1226 /* Allocate, fill and prepend writable header vector. */ 1227 if (hdrlen != 0) { 1228 hdr = __builtin_alloca(hdrlen + vlen); 1229 hdr += vlen; 1230 for (left = hdrlen, hdrp = hdr; left > 0; 1231 left -= now, hdrp += now) { 1232 now = MIN(left, iov->iov_len); 1233 memcpy(hdrp, iov->iov_base, now); 1234 iov->iov_base += now; 1235 iov->iov_len -= now; 1236 if (iov->iov_len == 0) { 1237 iov++; 1238 iovcnt--; 1239 } 1240 } 1241 iov--; 1242 iovcnt++; 1243 iov->iov_base = hdr; 1244 iov->iov_len = hdrlen; 1245 } 1246 1247 /* Insert VLAN tag. */ 1248 if (vlen != 0) { 1249 hdr -= ETHER_VLAN_ENCAP_LEN; 1250 memmove(hdr, hdr + ETHER_VLAN_ENCAP_LEN, ETHER_ADDR_LEN*2); 1251 hdrlen += ETHER_VLAN_ENCAP_LEN; 1252 hdr[ETHER_ADDR_LEN*2 + 0] = sc->esc_VET >> 8; 1253 hdr[ETHER_ADDR_LEN*2 + 1] = sc->esc_VET & 0xff; 1254 hdr[ETHER_ADDR_LEN*2 + 2] = dsc->td.upper.fields.special >> 8; 1255 hdr[ETHER_ADDR_LEN*2 + 3] = dsc->td.upper.fields.special & 0xff; 1256 iov->iov_base = hdr; 1257 iov->iov_len += ETHER_VLAN_ENCAP_LEN; 1258 /* Correct checksum offsets after VLAN tag insertion. */ 1259 ckinfo[0].ck_start += ETHER_VLAN_ENCAP_LEN; 1260 ckinfo[0].ck_off += ETHER_VLAN_ENCAP_LEN; 1261 if (ckinfo[0].ck_len != 0) 1262 ckinfo[0].ck_len += ETHER_VLAN_ENCAP_LEN; 1263 ckinfo[1].ck_start += ETHER_VLAN_ENCAP_LEN; 1264 ckinfo[1].ck_off += ETHER_VLAN_ENCAP_LEN; 1265 if (ckinfo[1].ck_len != 0) 1266 ckinfo[1].ck_len += ETHER_VLAN_ENCAP_LEN; 1267 } 1268 1269 /* Simple non-TSO case. */ 1270 if (!tso) { 1271 /* Calculate checksums and transmit. */ 1272 if (ckinfo[0].ck_valid) 1273 e82545_transmit_checksum(iov, iovcnt, &ckinfo[0]); 1274 if (ckinfo[1].ck_valid) 1275 e82545_transmit_checksum(iov, iovcnt, &ckinfo[1]); 1276 e82545_transmit_backend(sc, iov, iovcnt); 1277 goto done; 1278 } 1279 1280 /* Doing TSO. */ 1281 tcp = (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_TCP) != 0; 1282 mss = sc->esc_txctx.tcp_seg_setup.fields.mss; 1283 paylen = (sc->esc_txctx.cmd_and_length & 0x000fffff); 1284 DPRINTF("tx %s segmentation offload %d+%d/%d bytes %d iovs\r\n", 1285 tcp ? "TCP" : "UDP", hdrlen, paylen, mss, iovcnt); 1286 ipid = ntohs(*(uint16_t *)&hdr[ckinfo[0].ck_start + 4]); 1287 tcpseq = ntohl(*(uint32_t *)&hdr[ckinfo[1].ck_start + 4]); 1288 ipcs = *(uint16_t *)&hdr[ckinfo[0].ck_off]; 1289 tcpcs = 0; 1290 if (ckinfo[1].ck_valid) /* Save partial pseudo-header checksum. */ 1291 tcpcs = *(uint16_t *)&hdr[ckinfo[1].ck_off]; 1292 pv = 1; 1293 pvoff = 0; 1294 for (seg = 0, left = paylen; left > 0; seg++, left -= now) { 1295 now = MIN(left, mss); 1296 1297 /* Construct IOVs for the segment. */ 1298 /* Include whole original header. */ 1299 tiov[0].iov_base = hdr; 1300 tiov[0].iov_len = hdrlen; 1301 tiovcnt = 1; 1302 /* Include respective part of payload IOV. */ 1303 for (nleft = now; pv < iovcnt && nleft > 0; nleft -= nnow) { 1304 nnow = MIN(nleft, iov[pv].iov_len - pvoff); 1305 tiov[tiovcnt].iov_base = iov[pv].iov_base + pvoff; 1306 tiov[tiovcnt++].iov_len = nnow; 1307 if (pvoff + nnow == iov[pv].iov_len) { 1308 pv++; 1309 pvoff = 0; 1310 } else 1311 pvoff += nnow; 1312 } 1313 DPRINTF("tx segment %d %d+%d bytes %d iovs\r\n", 1314 seg, hdrlen, now, tiovcnt); 1315 1316 /* Update IP header. */ 1317 if (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_IP) { 1318 /* IPv4 -- set length and ID */ 1319 *(uint16_t *)&hdr[ckinfo[0].ck_start + 2] = 1320 htons(hdrlen - ckinfo[0].ck_start + now); 1321 *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] = 1322 htons(ipid + seg); 1323 } else { 1324 /* IPv6 -- set length */ 1325 *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] = 1326 htons(hdrlen - ckinfo[0].ck_start - 40 + 1327 now); 1328 } 1329 1330 /* Update pseudo-header checksum. */ 1331 tcpsum = tcpcs; 1332 tcpsum += htons(hdrlen - ckinfo[1].ck_start + now); 1333 1334 /* Update TCP/UDP headers. */ 1335 if (tcp) { 1336 /* Update sequence number and FIN/PUSH flags. */ 1337 *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] = 1338 htonl(tcpseq + paylen - left); 1339 if (now < left) { 1340 hdr[ckinfo[1].ck_start + 13] &= 1341 ~(TH_FIN | TH_PUSH); 1342 } 1343 } else { 1344 /* Update payload length. */ 1345 *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] = 1346 hdrlen - ckinfo[1].ck_start + now; 1347 } 1348 1349 /* Calculate checksums and transmit. */ 1350 if (ckinfo[0].ck_valid) { 1351 *(uint16_t *)&hdr[ckinfo[0].ck_off] = ipcs; 1352 e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[0]); 1353 } 1354 if (ckinfo[1].ck_valid) { 1355 *(uint16_t *)&hdr[ckinfo[1].ck_off] = 1356 e82545_carry(tcpsum); 1357 e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[1]); 1358 } 1359 e82545_transmit_backend(sc, tiov, tiovcnt); 1360 } 1361 1362 done: 1363 head = (head + 1) % dsize; 1364 e82545_transmit_done(sc, ohead, head, dsize, tdwb); 1365 1366 *rhead = head; 1367 return (desc + 1); 1368 } 1369 1370 static void 1371 e82545_tx_run(struct e82545_softc *sc) 1372 { 1373 uint32_t cause; 1374 uint16_t head, rhead, tail, size; 1375 int lim, tdwb, sent; 1376 1377 head = sc->esc_TDH; 1378 tail = sc->esc_TDT; 1379 size = sc->esc_TDLEN / 16; 1380 DPRINTF("tx_run: head %x, rhead %x, tail %x\r\n", 1381 sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); 1382 1383 pthread_mutex_unlock(&sc->esc_mtx); 1384 rhead = head; 1385 tdwb = 0; 1386 for (lim = size / 4; sc->esc_tx_enabled && lim > 0; lim -= sent) { 1387 sent = e82545_transmit(sc, head, tail, size, &rhead, &tdwb); 1388 if (sent == 0) 1389 break; 1390 head = rhead; 1391 } 1392 pthread_mutex_lock(&sc->esc_mtx); 1393 1394 sc->esc_TDH = head; 1395 sc->esc_TDHr = rhead; 1396 cause = 0; 1397 if (tdwb) 1398 cause |= E1000_ICR_TXDW; 1399 if (lim != size / 4 && sc->esc_TDH == sc->esc_TDT) 1400 cause |= E1000_ICR_TXQE; 1401 if (cause) 1402 e82545_icr_assert(sc, cause); 1403 1404 DPRINTF("tx_run done: head %x, rhead %x, tail %x\r\n", 1405 sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); 1406 } 1407 1408 static void * 1409 e82545_tx_thread(void *param) 1410 { 1411 struct e82545_softc *sc = param; 1412 1413 pthread_mutex_lock(&sc->esc_mtx); 1414 for (;;) { 1415 while (!sc->esc_tx_enabled || sc->esc_TDHr == sc->esc_TDT) { 1416 if (sc->esc_tx_enabled && sc->esc_TDHr != sc->esc_TDT) 1417 break; 1418 sc->esc_tx_active = 0; 1419 if (sc->esc_tx_enabled == 0) 1420 pthread_cond_signal(&sc->esc_tx_cond); 1421 pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx); 1422 } 1423 sc->esc_tx_active = 1; 1424 1425 /* Process some tx descriptors. Lock dropped inside. */ 1426 e82545_tx_run(sc); 1427 } 1428 } 1429 1430 static void 1431 e82545_tx_start(struct e82545_softc *sc) 1432 { 1433 1434 if (sc->esc_tx_active == 0) 1435 pthread_cond_signal(&sc->esc_tx_cond); 1436 } 1437 1438 static void 1439 e82545_tx_enable(struct e82545_softc *sc) 1440 { 1441 1442 sc->esc_tx_enabled = 1; 1443 } 1444 1445 static void 1446 e82545_tx_disable(struct e82545_softc *sc) 1447 { 1448 1449 sc->esc_tx_enabled = 0; 1450 while (sc->esc_tx_active) 1451 pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx); 1452 } 1453 1454 static void 1455 e82545_rx_enable(struct e82545_softc *sc) 1456 { 1457 1458 sc->esc_rx_enabled = 1; 1459 } 1460 1461 static void 1462 e82545_rx_disable(struct e82545_softc *sc) 1463 { 1464 1465 sc->esc_rx_enabled = 0; 1466 while (sc->esc_rx_active) 1467 pthread_cond_wait(&sc->esc_rx_cond, &sc->esc_mtx); 1468 } 1469 1470 static void 1471 e82545_write_ra(struct e82545_softc *sc, int reg, uint32_t wval) 1472 { 1473 struct eth_uni *eu; 1474 int idx; 1475 1476 idx = reg >> 1; 1477 assert(idx < 15); 1478 1479 eu = &sc->esc_uni[idx]; 1480 1481 if (reg & 0x1) { 1482 /* RAH */ 1483 eu->eu_valid = ((wval & E1000_RAH_AV) == E1000_RAH_AV); 1484 eu->eu_addrsel = (wval >> 16) & 0x3; 1485 eu->eu_eth.octet[5] = wval >> 8; 1486 eu->eu_eth.octet[4] = wval; 1487 } else { 1488 /* RAL */ 1489 eu->eu_eth.octet[3] = wval >> 24; 1490 eu->eu_eth.octet[2] = wval >> 16; 1491 eu->eu_eth.octet[1] = wval >> 8; 1492 eu->eu_eth.octet[0] = wval; 1493 } 1494 } 1495 1496 static uint32_t 1497 e82545_read_ra(struct e82545_softc *sc, int reg) 1498 { 1499 struct eth_uni *eu; 1500 uint32_t retval; 1501 int idx; 1502 1503 idx = reg >> 1; 1504 assert(idx < 15); 1505 1506 eu = &sc->esc_uni[idx]; 1507 1508 if (reg & 0x1) { 1509 /* RAH */ 1510 retval = (eu->eu_valid << 31) | 1511 (eu->eu_addrsel << 16) | 1512 (eu->eu_eth.octet[5] << 8) | 1513 eu->eu_eth.octet[4]; 1514 } else { 1515 /* RAL */ 1516 retval = (eu->eu_eth.octet[3] << 24) | 1517 (eu->eu_eth.octet[2] << 16) | 1518 (eu->eu_eth.octet[1] << 8) | 1519 eu->eu_eth.octet[0]; 1520 } 1521 1522 return (retval); 1523 } 1524 1525 static void 1526 e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value) 1527 { 1528 int ridx; 1529 1530 if (offset & 0x3) { 1531 DPRINTF("Unaligned register write offset:0x%x value:0x%x\r\n", offset, value); 1532 return; 1533 } 1534 DPRINTF("Register write: 0x%x value: 0x%x\r\n", offset, value); 1535 1536 switch (offset) { 1537 case E1000_CTRL: 1538 case E1000_CTRL_DUP: 1539 e82545_devctl(sc, value); 1540 break; 1541 case E1000_FCAL: 1542 sc->esc_FCAL = value; 1543 break; 1544 case E1000_FCAH: 1545 sc->esc_FCAH = value & ~0xFFFF0000; 1546 break; 1547 case E1000_FCT: 1548 sc->esc_FCT = value & ~0xFFFF0000; 1549 break; 1550 case E1000_VET: 1551 sc->esc_VET = value & ~0xFFFF0000; 1552 break; 1553 case E1000_FCTTV: 1554 sc->esc_FCTTV = value & ~0xFFFF0000; 1555 break; 1556 case E1000_LEDCTL: 1557 sc->esc_LEDCTL = value & ~0x30303000; 1558 break; 1559 case E1000_PBA: 1560 sc->esc_PBA = value & 0x0000FF80; 1561 break; 1562 case E1000_ICR: 1563 case E1000_ITR: 1564 case E1000_ICS: 1565 case E1000_IMS: 1566 case E1000_IMC: 1567 e82545_intr_write(sc, offset, value); 1568 break; 1569 case E1000_RCTL: 1570 e82545_rx_ctl(sc, value); 1571 break; 1572 case E1000_FCRTL: 1573 sc->esc_FCRTL = value & ~0xFFFF0007; 1574 break; 1575 case E1000_FCRTH: 1576 sc->esc_FCRTH = value & ~0xFFFF0007; 1577 break; 1578 case E1000_RDBAL(0): 1579 sc->esc_RDBAL = value & ~0xF; 1580 if (sc->esc_rx_enabled) { 1581 /* Apparently legal: update cached address */ 1582 e82545_rx_update_rdba(sc); 1583 } 1584 break; 1585 case E1000_RDBAH(0): 1586 assert(!sc->esc_rx_enabled); 1587 sc->esc_RDBAH = value; 1588 break; 1589 case E1000_RDLEN(0): 1590 assert(!sc->esc_rx_enabled); 1591 sc->esc_RDLEN = value & ~0xFFF0007F; 1592 break; 1593 case E1000_RDH(0): 1594 /* XXX should only ever be zero ? Range check ? */ 1595 sc->esc_RDH = value; 1596 break; 1597 case E1000_RDT(0): 1598 /* XXX if this opens up the rx ring, do something ? */ 1599 sc->esc_RDT = value; 1600 break; 1601 case E1000_RDTR: 1602 /* ignore FPD bit 31 */ 1603 sc->esc_RDTR = value & ~0xFFFF0000; 1604 break; 1605 case E1000_RXDCTL(0): 1606 sc->esc_RXDCTL = value & ~0xFEC0C0C0; 1607 break; 1608 case E1000_RADV: 1609 sc->esc_RADV = value & ~0xFFFF0000; 1610 break; 1611 case E1000_RSRPD: 1612 sc->esc_RSRPD = value & ~0xFFFFF000; 1613 break; 1614 case E1000_RXCSUM: 1615 sc->esc_RXCSUM = value & ~0xFFFFF800; 1616 break; 1617 case E1000_TXCW: 1618 sc->esc_TXCW = value & ~0x3FFF0000; 1619 break; 1620 case E1000_TCTL: 1621 e82545_tx_ctl(sc, value); 1622 break; 1623 case E1000_TIPG: 1624 sc->esc_TIPG = value; 1625 break; 1626 case E1000_AIT: 1627 sc->esc_AIT = value; 1628 break; 1629 case E1000_TDBAL(0): 1630 sc->esc_TDBAL = value & ~0xF; 1631 if (sc->esc_tx_enabled) { 1632 /* Apparently legal */ 1633 e82545_tx_update_tdba(sc); 1634 } 1635 break; 1636 case E1000_TDBAH(0): 1637 //assert(!sc->esc_tx_enabled); 1638 sc->esc_TDBAH = value; 1639 break; 1640 case E1000_TDLEN(0): 1641 //assert(!sc->esc_tx_enabled); 1642 sc->esc_TDLEN = value & ~0xFFF0007F; 1643 break; 1644 case E1000_TDH(0): 1645 //assert(!sc->esc_tx_enabled); 1646 /* XXX should only ever be zero ? Range check ? */ 1647 sc->esc_TDHr = sc->esc_TDH = value; 1648 break; 1649 case E1000_TDT(0): 1650 /* XXX range check ? */ 1651 sc->esc_TDT = value; 1652 if (sc->esc_tx_enabled) 1653 e82545_tx_start(sc); 1654 break; 1655 case E1000_TIDV: 1656 sc->esc_TIDV = value & ~0xFFFF0000; 1657 break; 1658 case E1000_TXDCTL(0): 1659 //assert(!sc->esc_tx_enabled); 1660 sc->esc_TXDCTL = value & ~0xC0C0C0; 1661 break; 1662 case E1000_TADV: 1663 sc->esc_TADV = value & ~0xFFFF0000; 1664 break; 1665 case E1000_RAL(0) ... E1000_RAH(15): 1666 /* convert to u32 offset */ 1667 ridx = (offset - E1000_RAL(0)) >> 2; 1668 e82545_write_ra(sc, ridx, value); 1669 break; 1670 case E1000_MTA ... (E1000_MTA + (127*4)): 1671 sc->esc_fmcast[(offset - E1000_MTA) >> 2] = value; 1672 break; 1673 case E1000_VFTA ... (E1000_VFTA + (127*4)): 1674 sc->esc_fvlan[(offset - E1000_VFTA) >> 2] = value; 1675 break; 1676 case E1000_EECD: 1677 { 1678 //DPRINTF("EECD write 0x%x -> 0x%x\r\n", sc->eeprom_control, value); 1679 /* edge triggered low->high */ 1680 uint32_t eecd_strobe = ((sc->eeprom_control & E1000_EECD_SK) ? 1681 0 : (value & E1000_EECD_SK)); 1682 uint32_t eecd_mask = (E1000_EECD_SK|E1000_EECD_CS| 1683 E1000_EECD_DI|E1000_EECD_REQ); 1684 sc->eeprom_control &= ~eecd_mask; 1685 sc->eeprom_control |= (value & eecd_mask); 1686 /* grant/revoke immediately */ 1687 if (value & E1000_EECD_REQ) { 1688 sc->eeprom_control |= E1000_EECD_GNT; 1689 } else { 1690 sc->eeprom_control &= ~E1000_EECD_GNT; 1691 } 1692 if (eecd_strobe && (sc->eeprom_control & E1000_EECD_CS)) { 1693 e82545_eecd_strobe(sc); 1694 } 1695 return; 1696 } 1697 case E1000_MDIC: 1698 { 1699 uint8_t reg_addr = (uint8_t)((value & E1000_MDIC_REG_MASK) >> 1700 E1000_MDIC_REG_SHIFT); 1701 uint8_t phy_addr = (uint8_t)((value & E1000_MDIC_PHY_MASK) >> 1702 E1000_MDIC_PHY_SHIFT); 1703 sc->mdi_control = 1704 (value & ~(E1000_MDIC_ERROR|E1000_MDIC_DEST)); 1705 if ((value & E1000_MDIC_READY) != 0) { 1706 DPRINTF("Incorrect MDIC ready bit: 0x%x\r\n", value); 1707 return; 1708 } 1709 switch (value & E82545_MDIC_OP_MASK) { 1710 case E1000_MDIC_OP_READ: 1711 sc->mdi_control &= ~E82545_MDIC_DATA_MASK; 1712 sc->mdi_control |= e82545_read_mdi(sc, reg_addr, phy_addr); 1713 break; 1714 case E1000_MDIC_OP_WRITE: 1715 e82545_write_mdi(sc, reg_addr, phy_addr, 1716 value & E82545_MDIC_DATA_MASK); 1717 break; 1718 default: 1719 DPRINTF("Unknown MDIC op: 0x%x\r\n", value); 1720 return; 1721 } 1722 /* TODO: barrier? */ 1723 sc->mdi_control |= E1000_MDIC_READY; 1724 if (value & E82545_MDIC_IE) { 1725 // TODO: generate interrupt 1726 } 1727 return; 1728 } 1729 case E1000_MANC: 1730 case E1000_STATUS: 1731 return; 1732 default: 1733 DPRINTF("Unknown write register: 0x%x value:%x\r\n", offset, value); 1734 return; 1735 } 1736 } 1737 1738 static uint32_t 1739 e82545_read_register(struct e82545_softc *sc, uint32_t offset) 1740 { 1741 uint32_t retval; 1742 int ridx; 1743 1744 if (offset & 0x3) { 1745 DPRINTF("Unaligned register read offset:0x%x\r\n", offset); 1746 return 0; 1747 } 1748 1749 DPRINTF("Register read: 0x%x\r\n", offset); 1750 1751 switch (offset) { 1752 case E1000_CTRL: 1753 retval = sc->esc_CTRL; 1754 break; 1755 case E1000_STATUS: 1756 retval = E1000_STATUS_FD | E1000_STATUS_LU | 1757 E1000_STATUS_SPEED_1000; 1758 break; 1759 case E1000_FCAL: 1760 retval = sc->esc_FCAL; 1761 break; 1762 case E1000_FCAH: 1763 retval = sc->esc_FCAH; 1764 break; 1765 case E1000_FCT: 1766 retval = sc->esc_FCT; 1767 break; 1768 case E1000_VET: 1769 retval = sc->esc_VET; 1770 break; 1771 case E1000_FCTTV: 1772 retval = sc->esc_FCTTV; 1773 break; 1774 case E1000_LEDCTL: 1775 retval = sc->esc_LEDCTL; 1776 break; 1777 case E1000_PBA: 1778 retval = sc->esc_PBA; 1779 break; 1780 case E1000_ICR: 1781 case E1000_ITR: 1782 case E1000_ICS: 1783 case E1000_IMS: 1784 case E1000_IMC: 1785 retval = e82545_intr_read(sc, offset); 1786 break; 1787 case E1000_RCTL: 1788 retval = sc->esc_RCTL; 1789 break; 1790 case E1000_FCRTL: 1791 retval = sc->esc_FCRTL; 1792 break; 1793 case E1000_FCRTH: 1794 retval = sc->esc_FCRTH; 1795 break; 1796 case E1000_RDBAL(0): 1797 retval = sc->esc_RDBAL; 1798 break; 1799 case E1000_RDBAH(0): 1800 retval = sc->esc_RDBAH; 1801 break; 1802 case E1000_RDLEN(0): 1803 retval = sc->esc_RDLEN; 1804 break; 1805 case E1000_RDH(0): 1806 retval = sc->esc_RDH; 1807 break; 1808 case E1000_RDT(0): 1809 retval = sc->esc_RDT; 1810 break; 1811 case E1000_RDTR: 1812 retval = sc->esc_RDTR; 1813 break; 1814 case E1000_RXDCTL(0): 1815 retval = sc->esc_RXDCTL; 1816 break; 1817 case E1000_RADV: 1818 retval = sc->esc_RADV; 1819 break; 1820 case E1000_RSRPD: 1821 retval = sc->esc_RSRPD; 1822 break; 1823 case E1000_RXCSUM: 1824 retval = sc->esc_RXCSUM; 1825 break; 1826 case E1000_TXCW: 1827 retval = sc->esc_TXCW; 1828 break; 1829 case E1000_TCTL: 1830 retval = sc->esc_TCTL; 1831 break; 1832 case E1000_TIPG: 1833 retval = sc->esc_TIPG; 1834 break; 1835 case E1000_AIT: 1836 retval = sc->esc_AIT; 1837 break; 1838 case E1000_TDBAL(0): 1839 retval = sc->esc_TDBAL; 1840 break; 1841 case E1000_TDBAH(0): 1842 retval = sc->esc_TDBAH; 1843 break; 1844 case E1000_TDLEN(0): 1845 retval = sc->esc_TDLEN; 1846 break; 1847 case E1000_TDH(0): 1848 retval = sc->esc_TDH; 1849 break; 1850 case E1000_TDT(0): 1851 retval = sc->esc_TDT; 1852 break; 1853 case E1000_TIDV: 1854 retval = sc->esc_TIDV; 1855 break; 1856 case E1000_TXDCTL(0): 1857 retval = sc->esc_TXDCTL; 1858 break; 1859 case E1000_TADV: 1860 retval = sc->esc_TADV; 1861 break; 1862 case E1000_RAL(0) ... E1000_RAH(15): 1863 /* convert to u32 offset */ 1864 ridx = (offset - E1000_RAL(0)) >> 2; 1865 retval = e82545_read_ra(sc, ridx); 1866 break; 1867 case E1000_MTA ... (E1000_MTA + (127*4)): 1868 retval = sc->esc_fmcast[(offset - E1000_MTA) >> 2]; 1869 break; 1870 case E1000_VFTA ... (E1000_VFTA + (127*4)): 1871 retval = sc->esc_fvlan[(offset - E1000_VFTA) >> 2]; 1872 break; 1873 case E1000_EECD: 1874 //DPRINTF("EECD read %x\r\n", sc->eeprom_control); 1875 retval = sc->eeprom_control; 1876 break; 1877 case E1000_MDIC: 1878 retval = sc->mdi_control; 1879 break; 1880 case E1000_MANC: 1881 retval = 0; 1882 break; 1883 /* stats that we emulate. */ 1884 case E1000_MPC: 1885 retval = sc->missed_pkt_count; 1886 break; 1887 case E1000_PRC64: 1888 retval = sc->pkt_rx_by_size[0]; 1889 break; 1890 case E1000_PRC127: 1891 retval = sc->pkt_rx_by_size[1]; 1892 break; 1893 case E1000_PRC255: 1894 retval = sc->pkt_rx_by_size[2]; 1895 break; 1896 case E1000_PRC511: 1897 retval = sc->pkt_rx_by_size[3]; 1898 break; 1899 case E1000_PRC1023: 1900 retval = sc->pkt_rx_by_size[4]; 1901 break; 1902 case E1000_PRC1522: 1903 retval = sc->pkt_rx_by_size[5]; 1904 break; 1905 case E1000_GPRC: 1906 retval = sc->good_pkt_rx_count; 1907 break; 1908 case E1000_BPRC: 1909 retval = sc->bcast_pkt_rx_count; 1910 break; 1911 case E1000_MPRC: 1912 retval = sc->mcast_pkt_rx_count; 1913 break; 1914 case E1000_GPTC: 1915 case E1000_TPT: 1916 retval = sc->good_pkt_tx_count; 1917 break; 1918 case E1000_GORCL: 1919 retval = (uint32_t)sc->good_octets_rx; 1920 break; 1921 case E1000_GORCH: 1922 retval = (uint32_t)(sc->good_octets_rx >> 32); 1923 break; 1924 case E1000_TOTL: 1925 case E1000_GOTCL: 1926 retval = (uint32_t)sc->good_octets_tx; 1927 break; 1928 case E1000_TOTH: 1929 case E1000_GOTCH: 1930 retval = (uint32_t)(sc->good_octets_tx >> 32); 1931 break; 1932 case E1000_ROC: 1933 retval = sc->oversize_rx_count; 1934 break; 1935 case E1000_TORL: 1936 retval = (uint32_t)(sc->good_octets_rx + sc->missed_octets); 1937 break; 1938 case E1000_TORH: 1939 retval = (uint32_t)((sc->good_octets_rx + 1940 sc->missed_octets) >> 32); 1941 break; 1942 case E1000_TPR: 1943 retval = sc->good_pkt_rx_count + sc->missed_pkt_count + 1944 sc->oversize_rx_count; 1945 break; 1946 case E1000_PTC64: 1947 retval = sc->pkt_tx_by_size[0]; 1948 break; 1949 case E1000_PTC127: 1950 retval = sc->pkt_tx_by_size[1]; 1951 break; 1952 case E1000_PTC255: 1953 retval = sc->pkt_tx_by_size[2]; 1954 break; 1955 case E1000_PTC511: 1956 retval = sc->pkt_tx_by_size[3]; 1957 break; 1958 case E1000_PTC1023: 1959 retval = sc->pkt_tx_by_size[4]; 1960 break; 1961 case E1000_PTC1522: 1962 retval = sc->pkt_tx_by_size[5]; 1963 break; 1964 case E1000_MPTC: 1965 retval = sc->mcast_pkt_tx_count; 1966 break; 1967 case E1000_BPTC: 1968 retval = sc->bcast_pkt_tx_count; 1969 break; 1970 case E1000_TSCTC: 1971 retval = sc->tso_tx_count; 1972 break; 1973 /* stats that are always 0. */ 1974 case E1000_CRCERRS: 1975 case E1000_ALGNERRC: 1976 case E1000_SYMERRS: 1977 case E1000_RXERRC: 1978 case E1000_SCC: 1979 case E1000_ECOL: 1980 case E1000_MCC: 1981 case E1000_LATECOL: 1982 case E1000_COLC: 1983 case E1000_DC: 1984 case E1000_TNCRS: 1985 case E1000_SEC: 1986 case E1000_CEXTERR: 1987 case E1000_RLEC: 1988 case E1000_XONRXC: 1989 case E1000_XONTXC: 1990 case E1000_XOFFRXC: 1991 case E1000_XOFFTXC: 1992 case E1000_FCRUC: 1993 case E1000_RNBC: 1994 case E1000_RUC: 1995 case E1000_RFC: 1996 case E1000_RJC: 1997 case E1000_MGTPRC: 1998 case E1000_MGTPDC: 1999 case E1000_MGTPTC: 2000 case E1000_TSCTFC: 2001 retval = 0; 2002 break; 2003 default: 2004 DPRINTF("Unknown read register: 0x%x\r\n", offset); 2005 retval = 0; 2006 break; 2007 } 2008 2009 return (retval); 2010 } 2011 2012 static void 2013 e82545_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2014 uint64_t offset, int size, uint64_t value) 2015 { 2016 struct e82545_softc *sc; 2017 2018 //DPRINTF("Write bar:%d offset:0x%lx value:0x%lx size:%d\r\n", baridx, offset, value, size); 2019 2020 sc = pi->pi_arg; 2021 2022 pthread_mutex_lock(&sc->esc_mtx); 2023 2024 switch (baridx) { 2025 case E82545_BAR_IO: 2026 switch (offset) { 2027 case E82545_IOADDR: 2028 if (size != 4) { 2029 DPRINTF("Wrong io addr write sz:%d value:0x%lx\r\n", size, value); 2030 } else 2031 sc->io_addr = (uint32_t)value; 2032 break; 2033 case E82545_IODATA: 2034 if (size != 4) { 2035 DPRINTF("Wrong io data write size:%d value:0x%lx\r\n", size, value); 2036 } else if (sc->io_addr > E82545_IO_REGISTER_MAX) { 2037 DPRINTF("Non-register io write addr:0x%x value:0x%lx\r\n", sc->io_addr, value); 2038 } else 2039 e82545_write_register(sc, sc->io_addr, 2040 (uint32_t)value); 2041 break; 2042 default: 2043 DPRINTF("Unknown io bar write offset:0x%lx value:0x%lx size:%d\r\n", offset, value, size); 2044 break; 2045 } 2046 break; 2047 case E82545_BAR_REGISTER: 2048 if (size != 4) { 2049 DPRINTF("Wrong register write size:%d offset:0x%lx value:0x%lx\r\n", size, offset, value); 2050 } else 2051 e82545_write_register(sc, (uint32_t)offset, 2052 (uint32_t)value); 2053 break; 2054 default: 2055 DPRINTF("Unknown write bar:%d off:0x%lx val:0x%lx size:%d\r\n", 2056 baridx, offset, value, size); 2057 } 2058 2059 pthread_mutex_unlock(&sc->esc_mtx); 2060 } 2061 2062 static uint64_t 2063 e82545_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2064 uint64_t offset, int size) 2065 { 2066 struct e82545_softc *sc; 2067 uint64_t retval; 2068 2069 //DPRINTF("Read bar:%d offset:0x%lx size:%d\r\n", baridx, offset, size); 2070 sc = pi->pi_arg; 2071 retval = 0; 2072 2073 pthread_mutex_lock(&sc->esc_mtx); 2074 2075 switch (baridx) { 2076 case E82545_BAR_IO: 2077 switch (offset) { 2078 case E82545_IOADDR: 2079 if (size != 4) { 2080 DPRINTF("Wrong io addr read sz:%d\r\n", size); 2081 } else 2082 retval = sc->io_addr; 2083 break; 2084 case E82545_IODATA: 2085 if (size != 4) { 2086 DPRINTF("Wrong io data read sz:%d\r\n", size); 2087 } 2088 if (sc->io_addr > E82545_IO_REGISTER_MAX) { 2089 DPRINTF("Non-register io read addr:0x%x\r\n", 2090 sc->io_addr); 2091 } else 2092 retval = e82545_read_register(sc, sc->io_addr); 2093 break; 2094 default: 2095 DPRINTF("Unknown io bar read offset:0x%lx size:%d\r\n", 2096 offset, size); 2097 break; 2098 } 2099 break; 2100 case E82545_BAR_REGISTER: 2101 if (size != 4) { 2102 DPRINTF("Wrong register read size:%d offset:0x%lx\r\n", 2103 size, offset); 2104 } else 2105 retval = e82545_read_register(sc, (uint32_t)offset); 2106 break; 2107 default: 2108 DPRINTF("Unknown read bar:%d offset:0x%lx size:%d\r\n", 2109 baridx, offset, size); 2110 break; 2111 } 2112 2113 pthread_mutex_unlock(&sc->esc_mtx); 2114 2115 return (retval); 2116 } 2117 2118 static void 2119 e82545_reset(struct e82545_softc *sc, int drvr) 2120 { 2121 int i; 2122 2123 e82545_rx_disable(sc); 2124 e82545_tx_disable(sc); 2125 2126 /* clear outstanding interrupts */ 2127 if (sc->esc_irq_asserted) 2128 pci_lintr_deassert(sc->esc_pi); 2129 2130 /* misc */ 2131 if (!drvr) { 2132 sc->esc_FCAL = 0; 2133 sc->esc_FCAH = 0; 2134 sc->esc_FCT = 0; 2135 sc->esc_VET = 0; 2136 sc->esc_FCTTV = 0; 2137 } 2138 sc->esc_LEDCTL = 0x07061302; 2139 sc->esc_PBA = 0x00100030; 2140 2141 /* start nvm in opcode mode. */ 2142 sc->nvm_opaddr = 0; 2143 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 2144 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 2145 sc->eeprom_control = E1000_EECD_PRES | E82545_EECD_FWE_EN; 2146 e82545_init_eeprom(sc); 2147 2148 /* interrupt */ 2149 sc->esc_ICR = 0; 2150 sc->esc_ITR = 250; 2151 sc->esc_ICS = 0; 2152 sc->esc_IMS = 0; 2153 sc->esc_IMC = 0; 2154 2155 /* L2 filters */ 2156 if (!drvr) { 2157 memset(sc->esc_fvlan, 0, sizeof(sc->esc_fvlan)); 2158 memset(sc->esc_fmcast, 0, sizeof(sc->esc_fmcast)); 2159 memset(sc->esc_uni, 0, sizeof(sc->esc_uni)); 2160 2161 /* XXX not necessary on 82545 ?? */ 2162 sc->esc_uni[0].eu_valid = 1; 2163 memcpy(sc->esc_uni[0].eu_eth.octet, sc->esc_mac.octet, 2164 ETHER_ADDR_LEN); 2165 } else { 2166 /* Clear RAH valid bits */ 2167 for (i = 0; i < 16; i++) 2168 sc->esc_uni[i].eu_valid = 0; 2169 } 2170 2171 /* receive */ 2172 if (!drvr) { 2173 sc->esc_RDBAL = 0; 2174 sc->esc_RDBAH = 0; 2175 } 2176 sc->esc_RCTL = 0; 2177 sc->esc_FCRTL = 0; 2178 sc->esc_FCRTH = 0; 2179 sc->esc_RDLEN = 0; 2180 sc->esc_RDH = 0; 2181 sc->esc_RDT = 0; 2182 sc->esc_RDTR = 0; 2183 sc->esc_RXDCTL = (1 << 24) | (1 << 16); /* default GRAN/WTHRESH */ 2184 sc->esc_RADV = 0; 2185 sc->esc_RXCSUM = 0; 2186 2187 /* transmit */ 2188 if (!drvr) { 2189 sc->esc_TDBAL = 0; 2190 sc->esc_TDBAH = 0; 2191 sc->esc_TIPG = 0; 2192 sc->esc_AIT = 0; 2193 sc->esc_TIDV = 0; 2194 sc->esc_TADV = 0; 2195 } 2196 sc->esc_tdba = 0; 2197 sc->esc_txdesc = NULL; 2198 sc->esc_TXCW = 0; 2199 sc->esc_TCTL = 0; 2200 sc->esc_TDLEN = 0; 2201 sc->esc_TDT = 0; 2202 sc->esc_TDHr = sc->esc_TDH = 0; 2203 sc->esc_TXDCTL = 0; 2204 } 2205 2206 static void 2207 e82545_open_tap(struct e82545_softc *sc, char *opts) 2208 { 2209 char tbuf[80]; 2210 #ifndef WITHOUT_CAPSICUM 2211 cap_rights_t rights; 2212 #endif 2213 2214 if (opts == NULL) { 2215 sc->esc_tapfd = -1; 2216 return; 2217 } 2218 2219 strcpy(tbuf, "/dev/"); 2220 strlcat(tbuf, opts, sizeof(tbuf)); 2221 2222 sc->esc_tapfd = open(tbuf, O_RDWR); 2223 if (sc->esc_tapfd == -1) { 2224 DPRINTF("unable to open tap device %s\n", opts); 2225 exit(1); 2226 } 2227 2228 /* 2229 * Set non-blocking and register for read 2230 * notifications with the event loop 2231 */ 2232 int opt = 1; 2233 if (ioctl(sc->esc_tapfd, FIONBIO, &opt) < 0) { 2234 WPRINTF("tap device O_NONBLOCK failed: %d\n", errno); 2235 close(sc->esc_tapfd); 2236 sc->esc_tapfd = -1; 2237 } 2238 2239 #ifndef WITHOUT_CAPSICUM 2240 cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); 2241 if (cap_rights_limit(sc->esc_tapfd, &rights) == -1 && errno != ENOSYS) 2242 errx(EX_OSERR, "Unable to apply rights for sandbox"); 2243 #endif 2244 2245 sc->esc_mevp = mevent_add(sc->esc_tapfd, 2246 EVF_READ, 2247 e82545_tap_callback, 2248 sc); 2249 if (sc->esc_mevp == NULL) { 2250 DPRINTF("Could not register mevent %d\n", EVF_READ); 2251 close(sc->esc_tapfd); 2252 sc->esc_tapfd = -1; 2253 } 2254 } 2255 2256 static int 2257 e82545_parsemac(char *mac_str, uint8_t *mac_addr) 2258 { 2259 struct ether_addr *ea; 2260 char *tmpstr; 2261 char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; 2262 2263 tmpstr = strsep(&mac_str,"="); 2264 if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) { 2265 ea = ether_aton(mac_str); 2266 if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) || 2267 memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) { 2268 fprintf(stderr, "Invalid MAC %s\n", mac_str); 2269 return (1); 2270 } else 2271 memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN); 2272 } 2273 return (0); 2274 } 2275 2276 static int 2277 e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 2278 { 2279 DPRINTF("Loading with options: %s\r\n", opts); 2280 2281 MD5_CTX mdctx; 2282 unsigned char digest[16]; 2283 char nstr[80]; 2284 struct e82545_softc *sc; 2285 char *devname; 2286 char *vtopts; 2287 int mac_provided; 2288 2289 /* Setup our softc */ 2290 sc = calloc(1, sizeof(*sc)); 2291 2292 pi->pi_arg = sc; 2293 sc->esc_pi = pi; 2294 sc->esc_ctx = ctx; 2295 2296 pthread_mutex_init(&sc->esc_mtx, NULL); 2297 pthread_cond_init(&sc->esc_rx_cond, NULL); 2298 pthread_cond_init(&sc->esc_tx_cond, NULL); 2299 pthread_create(&sc->esc_tx_tid, NULL, e82545_tx_thread, sc); 2300 snprintf(nstr, sizeof(nstr), "e82545-%d:%d tx", pi->pi_slot, 2301 pi->pi_func); 2302 pthread_set_name_np(sc->esc_tx_tid, nstr); 2303 2304 pci_set_cfgdata16(pi, PCIR_DEVICE, E82545_DEV_ID_82545EM_COPPER); 2305 pci_set_cfgdata16(pi, PCIR_VENDOR, E82545_VENDOR_ID_INTEL); 2306 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 2307 pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_NETWORK_ETHERNET); 2308 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, E82545_SUBDEV_ID); 2309 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, E82545_VENDOR_ID_INTEL); 2310 2311 pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL); 2312 pci_set_cfgdata8(pi, PCIR_INTPIN, 0x1); 2313 2314 /* TODO: this card also supports msi, but the freebsd driver for it 2315 * does not, so I have not implemented it. */ 2316 pci_lintr_request(pi); 2317 2318 pci_emul_alloc_bar(pi, E82545_BAR_REGISTER, PCIBAR_MEM32, 2319 E82545_BAR_REGISTER_LEN); 2320 pci_emul_alloc_bar(pi, E82545_BAR_FLASH, PCIBAR_MEM32, 2321 E82545_BAR_FLASH_LEN); 2322 pci_emul_alloc_bar(pi, E82545_BAR_IO, PCIBAR_IO, 2323 E82545_BAR_IO_LEN); 2324 2325 /* 2326 * Attempt to open the tap device and read the MAC address 2327 * if specified. Copied from virtio-net, slightly modified. 2328 */ 2329 mac_provided = 0; 2330 sc->esc_tapfd = -1; 2331 if (opts != NULL) { 2332 int err; 2333 2334 devname = vtopts = strdup(opts); 2335 (void) strsep(&vtopts, ","); 2336 2337 if (vtopts != NULL) { 2338 err = e82545_parsemac(vtopts, sc->esc_mac.octet); 2339 if (err != 0) { 2340 free(devname); 2341 return (err); 2342 } 2343 mac_provided = 1; 2344 } 2345 2346 if (strncmp(devname, "tap", 3) == 0 || 2347 strncmp(devname, "vmnet", 5) == 0) 2348 e82545_open_tap(sc, devname); 2349 2350 free(devname); 2351 } 2352 2353 /* 2354 * The default MAC address is the standard NetApp OUI of 00-a0-98, 2355 * followed by an MD5 of the PCI slot/func number and dev name 2356 */ 2357 if (!mac_provided) { 2358 snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, 2359 pi->pi_func, vmname); 2360 2361 MD5Init(&mdctx); 2362 MD5Update(&mdctx, nstr, strlen(nstr)); 2363 MD5Final(digest, &mdctx); 2364 2365 sc->esc_mac.octet[0] = 0x00; 2366 sc->esc_mac.octet[1] = 0xa0; 2367 sc->esc_mac.octet[2] = 0x98; 2368 sc->esc_mac.octet[3] = digest[0]; 2369 sc->esc_mac.octet[4] = digest[1]; 2370 sc->esc_mac.octet[5] = digest[2]; 2371 } 2372 2373 /* H/w initiated reset */ 2374 e82545_reset(sc, 0); 2375 2376 return (0); 2377 } 2378 2379 struct pci_devemu pci_de_e82545 = { 2380 .pe_emu = "e1000", 2381 .pe_init = e82545_init, 2382 .pe_barwrite = e82545_write, 2383 .pe_barread = e82545_read 2384 }; 2385 PCI_EMUL_SET(pci_de_e82545); 2386 2387