1 /* 2 * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org> 3 * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org> 4 * Copyright (c) 2013 Jeremiah Lott, Avere Systems 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/types.h> 34 #include <sys/limits.h> 35 #include <sys/ioctl.h> 36 #include <sys/uio.h> 37 #include <net/ethernet.h> 38 #include <netinet/in.h> 39 #include <netinet/tcp.h> 40 41 #include <errno.h> 42 #include <fcntl.h> 43 #include <md5.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <pthread.h> 49 #include <pthread_np.h> 50 51 #include "e1000_regs.h" 52 #include "e1000_defines.h" 53 #include "mii.h" 54 55 #include "bhyverun.h" 56 #include "pci_emul.h" 57 #include "mevent.h" 58 59 /* Hardware/register definitions XXX: move some to common code. */ 60 #define E82545_VENDOR_ID_INTEL 0x8086 61 #define E82545_DEV_ID_82545EM_COPPER 0x100F 62 #define E82545_SUBDEV_ID 0x1008 63 64 #define E82545_REVISION_4 4 65 66 #define E82545_MDIC_DATA_MASK 0x0000FFFF 67 #define E82545_MDIC_OP_MASK 0x0c000000 68 #define E82545_MDIC_IE 0x20000000 69 70 #define E82545_EECD_FWE_DIS 0x00000010 /* Flash writes disabled */ 71 #define E82545_EECD_FWE_EN 0x00000020 /* Flash writes enabled */ 72 #define E82545_EECD_FWE_MASK 0x00000030 /* Flash writes mask */ 73 74 #define E82545_BAR_REGISTER 0 75 #define E82545_BAR_REGISTER_LEN (128*1024) 76 #define E82545_BAR_FLASH 1 77 #define E82545_BAR_FLASH_LEN (64*1024) 78 #define E82545_BAR_IO 2 79 #define E82545_BAR_IO_LEN 8 80 81 #define E82545_IOADDR 0x00000000 82 #define E82545_IODATA 0x00000004 83 #define E82545_IO_REGISTER_MAX 0x0001FFFF 84 #define E82545_IO_FLASH_BASE 0x00080000 85 #define E82545_IO_FLASH_MAX 0x000FFFFF 86 87 #define E82545_ARRAY_ENTRY(reg, offset) (reg + (offset<<2)) 88 #define E82545_RAR_MAX 15 89 #define E82545_MTA_MAX 127 90 #define E82545_VFTA_MAX 127 91 92 /* Slightly modified from the driver versions, hardcoded for 3 opcode bits, 93 * followed by 6 address bits. 94 * TODO: make opcode bits and addr bits configurable? 95 * NVM Commands - Microwire */ 96 #define E82545_NVM_OPCODE_BITS 3 97 #define E82545_NVM_ADDR_BITS 6 98 #define E82545_NVM_DATA_BITS 16 99 #define E82545_NVM_OPADDR_BITS (E82545_NVM_OPCODE_BITS + E82545_NVM_ADDR_BITS) 100 #define E82545_NVM_ADDR_MASK ((1 << E82545_NVM_ADDR_BITS)-1) 101 #define E82545_NVM_OPCODE_MASK \ 102 (((1 << E82545_NVM_OPCODE_BITS) - 1) << E82545_NVM_ADDR_BITS) 103 #define E82545_NVM_OPCODE_READ (0x6 << E82545_NVM_ADDR_BITS) /* read */ 104 #define E82545_NVM_OPCODE_WRITE (0x5 << E82545_NVM_ADDR_BITS) /* write */ 105 #define E82545_NVM_OPCODE_ERASE (0x7 << E82545_NVM_ADDR_BITS) /* erase */ 106 #define E82545_NVM_OPCODE_EWEN (0x4 << E82545_NVM_ADDR_BITS) /* wr-enable */ 107 108 #define E82545_NVM_EEPROM_SIZE 64 /* 64 * 16-bit values == 128K */ 109 110 #define E1000_ICR_SRPD 0x00010000 111 112 /* This is an arbitrary number. There is no hard limit on the chip. */ 113 #define I82545_MAX_TXSEGS 64 114 115 /* Legacy receive descriptor */ 116 struct e1000_rx_desc { 117 uint64_t buffer_addr; /* Address of the descriptor's data buffer */ 118 uint16_t length; /* Length of data DMAed into data buffer */ 119 uint16_t csum; /* Packet checksum */ 120 uint8_t status; /* Descriptor status */ 121 uint8_t errors; /* Descriptor Errors */ 122 uint16_t special; 123 }; 124 125 /* Transmit descriptor types */ 126 #define E1000_TXD_MASK (E1000_TXD_CMD_DEXT | 0x00F00000) 127 #define E1000_TXD_TYP_L (0) 128 #define E1000_TXD_TYP_C (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_C) 129 #define E1000_TXD_TYP_D (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D) 130 131 /* Legacy transmit descriptor */ 132 struct e1000_tx_desc { 133 uint64_t buffer_addr; /* Address of the descriptor's data buffer */ 134 union { 135 uint32_t data; 136 struct { 137 uint16_t length; /* Data buffer length */ 138 uint8_t cso; /* Checksum offset */ 139 uint8_t cmd; /* Descriptor control */ 140 } flags; 141 } lower; 142 union { 143 uint32_t data; 144 struct { 145 uint8_t status; /* Descriptor status */ 146 uint8_t css; /* Checksum start */ 147 uint16_t special; 148 } fields; 149 } upper; 150 }; 151 152 /* Context descriptor */ 153 struct e1000_context_desc { 154 union { 155 uint32_t ip_config; 156 struct { 157 uint8_t ipcss; /* IP checksum start */ 158 uint8_t ipcso; /* IP checksum offset */ 159 uint16_t ipcse; /* IP checksum end */ 160 } ip_fields; 161 } lower_setup; 162 union { 163 uint32_t tcp_config; 164 struct { 165 uint8_t tucss; /* TCP checksum start */ 166 uint8_t tucso; /* TCP checksum offset */ 167 uint16_t tucse; /* TCP checksum end */ 168 } tcp_fields; 169 } upper_setup; 170 uint32_t cmd_and_length; 171 union { 172 uint32_t data; 173 struct { 174 uint8_t status; /* Descriptor status */ 175 uint8_t hdr_len; /* Header length */ 176 uint16_t mss; /* Maximum segment size */ 177 } fields; 178 } tcp_seg_setup; 179 }; 180 181 /* Data descriptor */ 182 struct e1000_data_desc { 183 uint64_t buffer_addr; /* Address of the descriptor's buffer address */ 184 union { 185 uint32_t data; 186 struct { 187 uint16_t length; /* Data buffer length */ 188 uint8_t typ_len_ext; 189 uint8_t cmd; 190 } flags; 191 } lower; 192 union { 193 uint32_t data; 194 struct { 195 uint8_t status; /* Descriptor status */ 196 uint8_t popts; /* Packet Options */ 197 uint16_t special; 198 } fields; 199 } upper; 200 }; 201 202 union e1000_tx_udesc { 203 struct e1000_tx_desc td; 204 struct e1000_context_desc cd; 205 struct e1000_data_desc dd; 206 }; 207 208 /* Tx checksum info for a packet. */ 209 struct ck_info { 210 int ck_valid; /* ck_info is valid */ 211 uint8_t ck_start; /* start byte of cksum calcuation */ 212 uint8_t ck_off; /* offset of cksum insertion */ 213 uint16_t ck_len; /* length of cksum calc: 0 is to packet-end */ 214 }; 215 216 /* 217 * Debug printf 218 */ 219 static int e82545_debug = 0; 220 #define DPRINTF(msg,params...) if (e82545_debug) fprintf(stderr, "e82545: " msg, params) 221 #define WPRINTF(msg,params...) fprintf(stderr, "e82545: " msg, params) 222 223 #define MIN(a,b) (((a)<(b))?(a):(b)) 224 #define MAX(a,b) (((a)>(b))?(a):(b)) 225 226 /* s/w representation of the RAL/RAH regs */ 227 struct eth_uni { 228 int eu_valid; 229 int eu_addrsel; 230 struct ether_addr eu_eth; 231 }; 232 233 234 struct e82545_softc { 235 struct pci_devinst *esc_pi; 236 struct vmctx *esc_ctx; 237 struct mevent *esc_mevp; 238 struct mevent *esc_mevpitr; 239 pthread_mutex_t esc_mtx; 240 struct ether_addr esc_mac; 241 int esc_tapfd; 242 243 /* General */ 244 uint32_t esc_CTRL; /* x0000 device ctl */ 245 uint32_t esc_FCAL; /* x0028 flow ctl addr lo */ 246 uint32_t esc_FCAH; /* x002C flow ctl addr hi */ 247 uint32_t esc_FCT; /* x0030 flow ctl type */ 248 uint32_t esc_VET; /* x0038 VLAN eth type */ 249 uint32_t esc_FCTTV; /* x0170 flow ctl tx timer */ 250 uint32_t esc_LEDCTL; /* x0E00 LED control */ 251 uint32_t esc_PBA; /* x1000 pkt buffer allocation */ 252 253 /* Interrupt control */ 254 int esc_irq_asserted; 255 uint32_t esc_ICR; /* x00C0 cause read/clear */ 256 uint32_t esc_ITR; /* x00C4 intr throttling */ 257 uint32_t esc_ICS; /* x00C8 cause set */ 258 uint32_t esc_IMS; /* x00D0 mask set/read */ 259 uint32_t esc_IMC; /* x00D8 mask clear */ 260 261 /* Transmit */ 262 union e1000_tx_udesc *esc_txdesc; 263 struct e1000_context_desc esc_txctx; 264 pthread_t esc_tx_tid; 265 pthread_cond_t esc_tx_cond; 266 int esc_tx_enabled; 267 int esc_tx_active; 268 uint32_t esc_TXCW; /* x0178 transmit config */ 269 uint32_t esc_TCTL; /* x0400 transmit ctl */ 270 uint32_t esc_TIPG; /* x0410 inter-packet gap */ 271 uint16_t esc_AIT; /* x0458 Adaptive Interframe Throttle */ 272 uint64_t esc_tdba; /* verified 64-bit desc table addr */ 273 uint32_t esc_TDBAL; /* x3800 desc table addr, low bits */ 274 uint32_t esc_TDBAH; /* x3804 desc table addr, hi 32-bits */ 275 uint32_t esc_TDLEN; /* x3808 # descriptors in bytes */ 276 uint16_t esc_TDH; /* x3810 desc table head idx */ 277 uint16_t esc_TDHr; /* internal read version of TDH */ 278 uint16_t esc_TDT; /* x3818 desc table tail idx */ 279 uint32_t esc_TIDV; /* x3820 intr delay */ 280 uint32_t esc_TXDCTL; /* x3828 desc control */ 281 uint32_t esc_TADV; /* x382C intr absolute delay */ 282 283 /* L2 frame acceptance */ 284 struct eth_uni esc_uni[16]; /* 16 x unicast MAC addresses */ 285 uint32_t esc_fmcast[128]; /* Multicast filter bit-match */ 286 uint32_t esc_fvlan[128]; /* VLAN 4096-bit filter */ 287 288 /* Receive */ 289 struct e1000_rx_desc *esc_rxdesc; 290 pthread_cond_t esc_rx_cond; 291 int esc_rx_enabled; 292 int esc_rx_active; 293 int esc_rx_loopback; 294 uint32_t esc_RCTL; /* x0100 receive ctl */ 295 uint32_t esc_FCRTL; /* x2160 flow cntl thresh, low */ 296 uint32_t esc_FCRTH; /* x2168 flow cntl thresh, hi */ 297 uint64_t esc_rdba; /* verified 64-bit desc table addr */ 298 uint32_t esc_RDBAL; /* x2800 desc table addr, low bits */ 299 uint32_t esc_RDBAH; /* x2804 desc table addr, hi 32-bits*/ 300 uint32_t esc_RDLEN; /* x2808 #descriptors */ 301 uint16_t esc_RDH; /* x2810 desc table head idx */ 302 uint16_t esc_RDT; /* x2818 desc table tail idx */ 303 uint32_t esc_RDTR; /* x2820 intr delay */ 304 uint32_t esc_RXDCTL; /* x2828 desc control */ 305 uint32_t esc_RADV; /* x282C intr absolute delay */ 306 uint32_t esc_RSRPD; /* x2C00 recv small packet detect */ 307 uint32_t esc_RXCSUM; /* x5000 receive cksum ctl */ 308 309 /* IO Port register access */ 310 uint32_t io_addr; 311 312 /* Shadow copy of MDIC */ 313 uint32_t mdi_control; 314 /* Shadow copy of EECD */ 315 uint32_t eeprom_control; 316 /* Latest NVM in/out */ 317 uint16_t nvm_data; 318 uint16_t nvm_opaddr; 319 /* stats */ 320 uint32_t missed_pkt_count; /* dropped for no room in rx queue */ 321 uint32_t pkt_rx_by_size[6]; 322 uint32_t pkt_tx_by_size[6]; 323 uint32_t good_pkt_rx_count; 324 uint32_t bcast_pkt_rx_count; 325 uint32_t mcast_pkt_rx_count; 326 uint32_t good_pkt_tx_count; 327 uint32_t bcast_pkt_tx_count; 328 uint32_t mcast_pkt_tx_count; 329 uint32_t oversize_rx_count; 330 uint32_t tso_tx_count; 331 uint64_t good_octets_rx; 332 uint64_t good_octets_tx; 333 uint64_t missed_octets; /* counts missed and oversized */ 334 335 uint8_t nvm_bits:6; /* number of bits remaining in/out */ 336 uint8_t nvm_mode:2; 337 #define E82545_NVM_MODE_OPADDR 0x0 338 #define E82545_NVM_MODE_DATAIN 0x1 339 #define E82545_NVM_MODE_DATAOUT 0x2 340 /* EEPROM data */ 341 uint16_t eeprom_data[E82545_NVM_EEPROM_SIZE]; 342 }; 343 344 static void e82545_reset(struct e82545_softc *sc, int dev); 345 static void e82545_rx_enable(struct e82545_softc *sc); 346 static void e82545_rx_disable(struct e82545_softc *sc); 347 static void e82545_tap_callback(int fd, enum ev_type type, void *param); 348 static void e82545_tx_start(struct e82545_softc *sc); 349 static void e82545_tx_enable(struct e82545_softc *sc); 350 static void e82545_tx_disable(struct e82545_softc *sc); 351 352 static inline int 353 e82545_size_stat_index(uint32_t size) 354 { 355 if (size <= 64) { 356 return 0; 357 } else if (size >= 1024) { 358 return 5; 359 } else { 360 /* should be 1-4 */ 361 return (ffs(size) - 6); 362 } 363 } 364 365 static void 366 e82545_init_eeprom(struct e82545_softc *sc) 367 { 368 uint16_t checksum, i; 369 370 /* mac addr */ 371 sc->eeprom_data[NVM_MAC_ADDR] = ((uint16_t)sc->esc_mac.octet[0]) | 372 (((uint16_t)sc->esc_mac.octet[1]) << 8); 373 sc->eeprom_data[NVM_MAC_ADDR+1] = ((uint16_t)sc->esc_mac.octet[2]) | 374 (((uint16_t)sc->esc_mac.octet[3]) << 8); 375 sc->eeprom_data[NVM_MAC_ADDR+2] = ((uint16_t)sc->esc_mac.octet[4]) | 376 (((uint16_t)sc->esc_mac.octet[5]) << 8); 377 378 /* pci ids */ 379 sc->eeprom_data[NVM_SUB_DEV_ID] = E82545_SUBDEV_ID; 380 sc->eeprom_data[NVM_SUB_VEN_ID] = E82545_VENDOR_ID_INTEL; 381 sc->eeprom_data[NVM_DEV_ID] = E82545_DEV_ID_82545EM_COPPER; 382 sc->eeprom_data[NVM_VEN_ID] = E82545_VENDOR_ID_INTEL; 383 384 /* fill in the checksum */ 385 checksum = 0; 386 for (i = 0; i < NVM_CHECKSUM_REG; i++) { 387 checksum += sc->eeprom_data[i]; 388 } 389 checksum = NVM_SUM - checksum; 390 sc->eeprom_data[NVM_CHECKSUM_REG] = checksum; 391 DPRINTF("eeprom checksum: 0x%x\r\n", checksum); 392 } 393 394 static void 395 e82545_write_mdi(struct e82545_softc *sc, uint8_t reg_addr, 396 uint8_t phy_addr, uint32_t data) 397 { 398 DPRINTF("Write mdi reg:0x%x phy:0x%x data: 0x%x\r\n", reg_addr, phy_addr, data); 399 } 400 401 static uint32_t 402 e82545_read_mdi(struct e82545_softc *sc, uint8_t reg_addr, 403 uint8_t phy_addr) 404 { 405 //DPRINTF("Read mdi reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); 406 switch (reg_addr) { 407 case PHY_STATUS: 408 return (MII_SR_LINK_STATUS | MII_SR_AUTONEG_CAPS | 409 MII_SR_AUTONEG_COMPLETE); 410 case PHY_AUTONEG_ADV: 411 return NWAY_AR_SELECTOR_FIELD; 412 case PHY_LP_ABILITY: 413 return 0; 414 case PHY_1000T_STATUS: 415 return (SR_1000T_LP_FD_CAPS | SR_1000T_REMOTE_RX_STATUS | 416 SR_1000T_LOCAL_RX_STATUS); 417 case PHY_ID1: 418 return (M88E1011_I_PHY_ID >> 16) & 0xFFFF; 419 case PHY_ID2: 420 return (M88E1011_I_PHY_ID | E82545_REVISION_4) & 0xFFFF; 421 default: 422 DPRINTF("Unknown mdi read reg:0x%x phy:0x%x\r\n", reg_addr, phy_addr); 423 return 0; 424 } 425 /* not reached */ 426 } 427 428 static void 429 e82545_eecd_strobe(struct e82545_softc *sc) 430 { 431 /* Microwire state machine */ 432 /* 433 DPRINTF("eeprom state machine srtobe " 434 "0x%x 0x%x 0x%x 0x%x\r\n", 435 sc->nvm_mode, sc->nvm_bits, 436 sc->nvm_opaddr, sc->nvm_data);*/ 437 438 if (sc->nvm_bits == 0) { 439 DPRINTF("eeprom state machine not expecting data! " 440 "0x%x 0x%x 0x%x 0x%x\r\n", 441 sc->nvm_mode, sc->nvm_bits, 442 sc->nvm_opaddr, sc->nvm_data); 443 return; 444 } 445 sc->nvm_bits--; 446 if (sc->nvm_mode == E82545_NVM_MODE_DATAOUT) { 447 /* shifting out */ 448 if (sc->nvm_data & 0x8000) { 449 sc->eeprom_control |= E1000_EECD_DO; 450 } else { 451 sc->eeprom_control &= ~E1000_EECD_DO; 452 } 453 sc->nvm_data <<= 1; 454 if (sc->nvm_bits == 0) { 455 /* read done, back to opcode mode. */ 456 sc->nvm_opaddr = 0; 457 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 458 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 459 } 460 } else if (sc->nvm_mode == E82545_NVM_MODE_DATAIN) { 461 /* shifting in */ 462 sc->nvm_data <<= 1; 463 if (sc->eeprom_control & E1000_EECD_DI) { 464 sc->nvm_data |= 1; 465 } 466 if (sc->nvm_bits == 0) { 467 /* eeprom write */ 468 uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; 469 uint16_t addr = sc->nvm_opaddr & E82545_NVM_ADDR_MASK; 470 if (op != E82545_NVM_OPCODE_WRITE) { 471 DPRINTF("Illegal eeprom write op 0x%x\r\n", 472 sc->nvm_opaddr); 473 } else if (addr >= E82545_NVM_EEPROM_SIZE) { 474 DPRINTF("Illegal eeprom write addr 0x%x\r\n", 475 sc->nvm_opaddr); 476 } else { 477 DPRINTF("eeprom write eeprom[0x%x] = 0x%x\r\n", 478 addr, sc->nvm_data); 479 sc->eeprom_data[addr] = sc->nvm_data; 480 } 481 /* back to opcode mode */ 482 sc->nvm_opaddr = 0; 483 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 484 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 485 } 486 } else if (sc->nvm_mode == E82545_NVM_MODE_OPADDR) { 487 sc->nvm_opaddr <<= 1; 488 if (sc->eeprom_control & E1000_EECD_DI) { 489 sc->nvm_opaddr |= 1; 490 } 491 if (sc->nvm_bits == 0) { 492 uint16_t op = sc->nvm_opaddr & E82545_NVM_OPCODE_MASK; 493 switch (op) { 494 case E82545_NVM_OPCODE_EWEN: 495 DPRINTF("eeprom write enable: 0x%x\r\n", 496 sc->nvm_opaddr); 497 /* back to opcode mode */ 498 sc->nvm_opaddr = 0; 499 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 500 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 501 break; 502 case E82545_NVM_OPCODE_READ: 503 { 504 uint16_t addr = sc->nvm_opaddr & 505 E82545_NVM_ADDR_MASK; 506 sc->nvm_mode = E82545_NVM_MODE_DATAOUT; 507 sc->nvm_bits = E82545_NVM_DATA_BITS; 508 if (addr < E82545_NVM_EEPROM_SIZE) { 509 sc->nvm_data = sc->eeprom_data[addr]; 510 DPRINTF("eeprom read: eeprom[0x%x] = 0x%x\r\n", 511 addr, sc->nvm_data); 512 } else { 513 DPRINTF("eeprom illegal read: 0x%x\r\n", 514 sc->nvm_opaddr); 515 sc->nvm_data = 0; 516 } 517 break; 518 } 519 case E82545_NVM_OPCODE_WRITE: 520 sc->nvm_mode = E82545_NVM_MODE_DATAIN; 521 sc->nvm_bits = E82545_NVM_DATA_BITS; 522 sc->nvm_data = 0; 523 break; 524 default: 525 DPRINTF("eeprom unknown op: 0x%x\r\r", 526 sc->nvm_opaddr); 527 /* back to opcode mode */ 528 sc->nvm_opaddr = 0; 529 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 530 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 531 } 532 } 533 } else { 534 DPRINTF("eeprom state machine wrong state! " 535 "0x%x 0x%x 0x%x 0x%x\r\n", 536 sc->nvm_mode, sc->nvm_bits, 537 sc->nvm_opaddr, sc->nvm_data); 538 } 539 } 540 541 static void 542 e82545_itr_callback(int fd, enum ev_type type, void *param) 543 { 544 uint32_t new; 545 struct e82545_softc *sc = param; 546 547 pthread_mutex_lock(&sc->esc_mtx); 548 new = sc->esc_ICR & sc->esc_IMS; 549 if (new && !sc->esc_irq_asserted) { 550 DPRINTF("itr callback: lintr assert %x\r\n", new); 551 sc->esc_irq_asserted = 1; 552 pci_lintr_assert(sc->esc_pi); 553 } else { 554 mevent_delete(sc->esc_mevpitr); 555 sc->esc_mevpitr = NULL; 556 } 557 pthread_mutex_unlock(&sc->esc_mtx); 558 } 559 560 static void 561 e82545_icr_assert(struct e82545_softc *sc, uint32_t bits) 562 { 563 uint32_t new; 564 565 DPRINTF("icr assert: 0x%x\r\n", bits); 566 567 /* 568 * An interrupt is only generated if bits are set that 569 * aren't already in the ICR, these bits are unmasked, 570 * and there isn't an interrupt already pending. 571 */ 572 new = bits & ~sc->esc_ICR & sc->esc_IMS; 573 sc->esc_ICR |= bits; 574 575 if (new == 0) { 576 DPRINTF("icr assert: masked %x, ims %x\r\n", new, sc->esc_IMS); 577 } else if (sc->esc_mevpitr != NULL) { 578 DPRINTF("icr assert: throttled %x, ims %x\r\n", new, sc->esc_IMS); 579 } else if (!sc->esc_irq_asserted) { 580 DPRINTF("icr assert: lintr assert %x\r\n", new); 581 sc->esc_irq_asserted = 1; 582 pci_lintr_assert(sc->esc_pi); 583 if (sc->esc_ITR != 0) { 584 sc->esc_mevpitr = mevent_add( 585 (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ 586 EVF_TIMER, e82545_itr_callback, sc); 587 } 588 } 589 } 590 591 static void 592 e82545_ims_change(struct e82545_softc *sc, uint32_t bits) 593 { 594 uint32_t new; 595 596 /* 597 * Changing the mask may allow previously asserted 598 * but masked interrupt requests to generate an interrupt. 599 */ 600 new = bits & sc->esc_ICR & ~sc->esc_IMS; 601 sc->esc_IMS |= bits; 602 603 if (new == 0) { 604 DPRINTF("ims change: masked %x, ims %x\r\n", new, sc->esc_IMS); 605 } else if (sc->esc_mevpitr != NULL) { 606 DPRINTF("ims change: throttled %x, ims %x\r\n", new, sc->esc_IMS); 607 } else if (!sc->esc_irq_asserted) { 608 DPRINTF("ims change: lintr assert %x\n\r", new); 609 sc->esc_irq_asserted = 1; 610 pci_lintr_assert(sc->esc_pi); 611 if (sc->esc_ITR != 0) { 612 sc->esc_mevpitr = mevent_add( 613 (sc->esc_ITR + 3905) / 3906, /* 256ns -> 1ms */ 614 EVF_TIMER, e82545_itr_callback, sc); 615 } 616 } 617 } 618 619 static void 620 e82545_icr_deassert(struct e82545_softc *sc, uint32_t bits) 621 { 622 623 DPRINTF("icr deassert: 0x%x\r\n", bits); 624 sc->esc_ICR &= ~bits; 625 626 /* 627 * If there are no longer any interrupt sources and there 628 * was an asserted interrupt, clear it 629 */ 630 if (sc->esc_irq_asserted && !(sc->esc_ICR & sc->esc_IMS)) { 631 DPRINTF("icr deassert: lintr deassert %x\r\n", bits); 632 pci_lintr_deassert(sc->esc_pi); 633 sc->esc_irq_asserted = 0; 634 } 635 } 636 637 static void 638 e82545_intr_write(struct e82545_softc *sc, uint32_t offset, uint32_t value) 639 { 640 641 DPRINTF("intr_write: off %x, val %x\n\r", offset, value); 642 643 switch (offset) { 644 case E1000_ICR: 645 e82545_icr_deassert(sc, value); 646 break; 647 case E1000_ITR: 648 sc->esc_ITR = value; 649 break; 650 case E1000_ICS: 651 sc->esc_ICS = value; /* not used: store for debug */ 652 e82545_icr_assert(sc, value); 653 break; 654 case E1000_IMS: 655 e82545_ims_change(sc, value); 656 break; 657 case E1000_IMC: 658 sc->esc_IMC = value; /* for debug */ 659 sc->esc_IMS &= ~value; 660 // XXX clear interrupts if all ICR bits now masked 661 // and interrupt was pending ? 662 break; 663 default: 664 break; 665 } 666 } 667 668 static uint32_t 669 e82545_intr_read(struct e82545_softc *sc, uint32_t offset) 670 { 671 uint32_t retval; 672 673 retval = 0; 674 675 DPRINTF("intr_read: off %x\n\r", offset); 676 677 switch (offset) { 678 case E1000_ICR: 679 retval = sc->esc_ICR; 680 sc->esc_ICR = 0; 681 e82545_icr_deassert(sc, ~0); 682 break; 683 case E1000_ITR: 684 retval = sc->esc_ITR; 685 break; 686 case E1000_ICS: 687 /* write-only register */ 688 break; 689 case E1000_IMS: 690 retval = sc->esc_IMS; 691 break; 692 case E1000_IMC: 693 /* write-only register */ 694 break; 695 default: 696 break; 697 } 698 699 return (retval); 700 } 701 702 static void 703 e82545_devctl(struct e82545_softc *sc, uint32_t val) 704 { 705 706 sc->esc_CTRL = val & ~E1000_CTRL_RST; 707 708 if (val & E1000_CTRL_RST) { 709 DPRINTF("e1k: s/w reset, ctl %x\n", val); 710 e82545_reset(sc, 1); 711 } 712 /* XXX check for phy reset ? */ 713 } 714 715 static void 716 e82545_rx_update_rdba(struct e82545_softc *sc) 717 { 718 719 /* XXX verify desc base/len within phys mem range */ 720 sc->esc_rdba = (uint64_t)sc->esc_RDBAH << 32 | 721 sc->esc_RDBAL; 722 723 /* Cache host mapping of guest descriptor array */ 724 sc->esc_rxdesc = paddr_guest2host(sc->esc_ctx, 725 sc->esc_rdba, sc->esc_RDLEN); 726 } 727 728 static void 729 e82545_rx_ctl(struct e82545_softc *sc, uint32_t val) 730 { 731 int on; 732 733 on = ((val & E1000_RCTL_EN) == E1000_RCTL_EN); 734 735 /* Save RCTL after stripping reserved bits 31:27,24,21,14,11:10,0 */ 736 sc->esc_RCTL = val & ~0xF9204c01; 737 738 DPRINTF("rx_ctl - %s RCTL %x, val %x\n", 739 on ? "on" : "off", sc->esc_RCTL, val); 740 741 /* state change requested */ 742 if (on != sc->esc_rx_enabled) { 743 if (on) { 744 /* Catch disallowed/unimplemented settings */ 745 //assert(!(val & E1000_RCTL_LBM_TCVR)); 746 747 if (sc->esc_RCTL & E1000_RCTL_LBM_TCVR) { 748 sc->esc_rx_loopback = 1; 749 } else { 750 sc->esc_rx_loopback = 0; 751 } 752 753 e82545_rx_update_rdba(sc); 754 e82545_rx_enable(sc); 755 } else { 756 e82545_rx_disable(sc); 757 sc->esc_rx_loopback = 0; 758 sc->esc_rdba = 0; 759 sc->esc_rxdesc = NULL; 760 } 761 } 762 } 763 764 static void 765 e82545_tx_update_tdba(struct e82545_softc *sc) 766 { 767 768 /* XXX verify desc base/len within phys mem range */ 769 sc->esc_tdba = (uint64_t)sc->esc_TDBAH << 32 | sc->esc_TDBAL; 770 771 /* Cache host mapping of guest descriptor array */ 772 sc->esc_txdesc = paddr_guest2host(sc->esc_ctx, sc->esc_tdba, 773 sc->esc_TDLEN); 774 } 775 776 static void 777 e82545_tx_ctl(struct e82545_softc *sc, uint32_t val) 778 { 779 int on; 780 781 on = ((val & E1000_TCTL_EN) == E1000_TCTL_EN); 782 783 /* ignore TCTL_EN settings that don't change state */ 784 if (on == sc->esc_tx_enabled) 785 return; 786 787 if (on) { 788 e82545_tx_update_tdba(sc); 789 e82545_tx_enable(sc); 790 } else { 791 e82545_tx_disable(sc); 792 sc->esc_tdba = 0; 793 sc->esc_txdesc = NULL; 794 } 795 796 /* Save TCTL value after stripping reserved bits 31:25,23,2,0 */ 797 sc->esc_TCTL = val & ~0xFE800005; 798 } 799 800 int 801 e82545_bufsz(uint32_t rctl) 802 { 803 804 switch (rctl & (E1000_RCTL_BSEX | E1000_RCTL_SZ_256)) { 805 case (E1000_RCTL_SZ_2048): return (2048); 806 case (E1000_RCTL_SZ_1024): return (1024); 807 case (E1000_RCTL_SZ_512): return (512); 808 case (E1000_RCTL_SZ_256): return (256); 809 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_16384): return (16384); 810 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_8192): return (8192); 811 case (E1000_RCTL_BSEX|E1000_RCTL_SZ_4096): return (4096); 812 } 813 return (256); /* Forbidden value. */ 814 } 815 816 static uint8_t dummybuf[2048]; 817 818 /* XXX one packet at a time until this is debugged */ 819 static void 820 e82545_tap_callback(int fd, enum ev_type type, void *param) 821 { 822 struct e82545_softc *sc = param; 823 struct e1000_rx_desc *rxd; 824 struct iovec vec[64]; 825 int left, len, lim, maxpktsz, maxpktdesc, bufsz, i, n, size; 826 uint32_t cause = 0; 827 uint16_t *tp, tag, head; 828 829 pthread_mutex_lock(&sc->esc_mtx); 830 DPRINTF("rx_run: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); 831 832 if (!sc->esc_rx_enabled || sc->esc_rx_loopback) { 833 DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped\r\n", 834 sc->esc_rx_enabled, sc->esc_rx_loopback); 835 while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { 836 } 837 goto done1; 838 } 839 bufsz = e82545_bufsz(sc->esc_RCTL); 840 maxpktsz = (sc->esc_RCTL & E1000_RCTL_LPE) ? 16384 : 1522; 841 maxpktdesc = (maxpktsz + bufsz - 1) / bufsz; 842 size = sc->esc_RDLEN / 16; 843 head = sc->esc_RDH; 844 left = (size + sc->esc_RDT - head) % size; 845 if (left < maxpktdesc) { 846 DPRINTF("rx overflow (%d < %d) -- packet(s) dropped\r\n", 847 left, maxpktdesc); 848 while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) { 849 } 850 goto done1; 851 } 852 853 sc->esc_rx_active = 1; 854 pthread_mutex_unlock(&sc->esc_mtx); 855 856 for (lim = size / 4; lim > 0 && left >= maxpktdesc; lim -= n) { 857 858 /* Grab rx descriptor pointed to by the head pointer */ 859 for (i = 0; i < maxpktdesc; i++) { 860 rxd = &sc->esc_rxdesc[(head + i) % size]; 861 vec[i].iov_base = paddr_guest2host(sc->esc_ctx, 862 rxd->buffer_addr, bufsz); 863 vec[i].iov_len = bufsz; 864 } 865 len = readv(sc->esc_tapfd, vec, maxpktdesc); 866 if (len <= 0) { 867 DPRINTF("tap: readv() returned %d\n", len); 868 goto done; 869 } 870 871 /* 872 * Adjust the packet length based on whether the CRC needs 873 * to be stripped or if the packet is less than the minimum 874 * eth packet size. 875 */ 876 if (len < ETHER_MIN_LEN - ETHER_CRC_LEN) 877 len = ETHER_MIN_LEN - ETHER_CRC_LEN; 878 if (!(sc->esc_RCTL & E1000_RCTL_SECRC)) 879 len += ETHER_CRC_LEN; 880 n = (len + bufsz - 1) / bufsz; 881 882 DPRINTF("packet read %d bytes, %d segs, head %d\r\n", 883 len, n, head); 884 885 /* Apply VLAN filter. */ 886 tp = (uint16_t *)vec[0].iov_base + 6; 887 if ((sc->esc_RCTL & E1000_RCTL_VFE) && 888 (ntohs(tp[0]) == sc->esc_VET)) { 889 tag = ntohs(tp[1]) & 0x0fff; 890 if ((sc->esc_fvlan[tag >> 5] & 891 (1 << (tag & 0x1f))) != 0) { 892 DPRINTF("known VLAN %d\r\n", tag); 893 } else { 894 DPRINTF("unknown VLAN %d\r\n", tag); 895 n = 0; 896 continue; 897 } 898 } 899 900 /* Update all consumed descriptors. */ 901 for (i = 0; i < n - 1; i++) { 902 rxd = &sc->esc_rxdesc[(head + i) % size]; 903 rxd->length = bufsz; 904 rxd->csum = 0; 905 rxd->errors = 0; 906 rxd->special = 0; 907 rxd->status = E1000_RXD_STAT_DD; 908 } 909 rxd = &sc->esc_rxdesc[(head + i) % size]; 910 rxd->length = len % bufsz; 911 rxd->csum = 0; 912 rxd->errors = 0; 913 rxd->special = 0; 914 /* XXX signal no checksum for now */ 915 rxd->status = E1000_RXD_STAT_PIF | E1000_RXD_STAT_IXSM | 916 E1000_RXD_STAT_EOP | E1000_RXD_STAT_DD; 917 918 /* Schedule receive interrupts. */ 919 if (len <= sc->esc_RSRPD) { 920 cause |= E1000_ICR_SRPD | E1000_ICR_RXT0; 921 } else { 922 /* XXX: RDRT and RADV timers should be here. */ 923 cause |= E1000_ICR_RXT0; 924 } 925 926 head = (head + n) % size; 927 left -= n; 928 } 929 930 done: 931 pthread_mutex_lock(&sc->esc_mtx); 932 sc->esc_rx_active = 0; 933 if (sc->esc_rx_enabled == 0) 934 pthread_cond_signal(&sc->esc_rx_cond); 935 936 sc->esc_RDH = head; 937 /* Respect E1000_RCTL_RDMTS */ 938 left = (size + sc->esc_RDT - head) % size; 939 if (left < (size >> (((sc->esc_RCTL >> 8) & 3) + 1))) 940 cause |= E1000_ICR_RXDMT0; 941 /* Assert all accumulated interrupts. */ 942 if (cause != 0) 943 e82545_icr_assert(sc, cause); 944 done1: 945 DPRINTF("rx_run done: head %x, tail %x\r\n", sc->esc_RDH, sc->esc_RDT); 946 pthread_mutex_unlock(&sc->esc_mtx); 947 } 948 949 static uint16_t 950 e82545_carry(uint32_t sum) 951 { 952 953 sum = (sum & 0xFFFF) + (sum >> 16); 954 if (sum > 0xFFFF) 955 sum -= 0xFFFF; 956 return (sum); 957 } 958 959 static uint16_t 960 e82545_buf_checksum(uint8_t *buf, int len) 961 { 962 int i; 963 uint32_t sum = 0; 964 965 /* Checksum all the pairs of bytes first... */ 966 for (i = 0; i < (len & ~1U); i += 2) 967 sum += *((u_int16_t *)(buf + i)); 968 969 /* 970 * If there's a single byte left over, checksum it, too. 971 * Network byte order is big-endian, so the remaining byte is 972 * the high byte. 973 */ 974 if (i < len) 975 sum += htons(buf[i] << 8); 976 977 return (e82545_carry(sum)); 978 } 979 980 static uint16_t 981 e82545_iov_checksum(struct iovec *iov, int iovcnt, int off, int len) 982 { 983 int now, odd; 984 uint32_t sum = 0, s; 985 986 /* Skip completely unneeded vectors. */ 987 while (iovcnt > 0 && iov->iov_len <= off && off > 0) { 988 off -= iov->iov_len; 989 iov++; 990 iovcnt--; 991 } 992 993 /* Calculate checksum of requested range. */ 994 odd = 0; 995 while (len > 0 && iovcnt > 0) { 996 now = MIN(len, iov->iov_len - off); 997 s = e82545_buf_checksum(iov->iov_base + off, now); 998 sum += odd ? (s << 8) : s; 999 odd ^= (now & 1); 1000 len -= now; 1001 off = 0; 1002 iov++; 1003 iovcnt--; 1004 } 1005 1006 return (e82545_carry(sum)); 1007 } 1008 1009 /* 1010 * Return the transmit descriptor type. 1011 */ 1012 int 1013 e82545_txdesc_type(uint32_t lower) 1014 { 1015 int type; 1016 1017 type = 0; 1018 1019 if (lower & E1000_TXD_CMD_DEXT) 1020 type = lower & E1000_TXD_MASK; 1021 1022 return (type); 1023 } 1024 1025 static void 1026 e82545_transmit_checksum(struct iovec *iov, int iovcnt, struct ck_info *ck) 1027 { 1028 uint16_t cksum; 1029 int cklen; 1030 1031 DPRINTF("tx cksum: iovcnt/s/off/len %d/%d/%d/%d\r\n", 1032 iovcnt, ck->ck_start, ck->ck_off, ck->ck_len); 1033 cklen = ck->ck_len ? ck->ck_len - ck->ck_start + 1 : INT_MAX; 1034 cksum = e82545_iov_checksum(iov, iovcnt, ck->ck_start, cklen); 1035 *(uint16_t *)((uint8_t *)iov[0].iov_base + ck->ck_off) = ~cksum; 1036 } 1037 1038 static void 1039 e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt) 1040 { 1041 1042 if (sc->esc_tapfd == -1) 1043 return; 1044 1045 (void) writev(sc->esc_tapfd, iov, iovcnt); 1046 } 1047 1048 static void 1049 e82545_transmit_done(struct e82545_softc *sc, uint16_t head, uint16_t tail, 1050 uint16_t dsize, int *tdwb) 1051 { 1052 union e1000_tx_udesc *dsc; 1053 1054 for ( ; head != tail; head = (head + 1) % dsize) { 1055 dsc = &sc->esc_txdesc[head]; 1056 if (dsc->td.lower.data & E1000_TXD_CMD_RS) { 1057 dsc->td.upper.data |= E1000_TXD_STAT_DD; 1058 *tdwb = 1; 1059 } 1060 } 1061 } 1062 1063 static int 1064 e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail, 1065 uint16_t dsize, uint16_t *rhead, int *tdwb) 1066 { 1067 uint8_t *hdr, *hdrp; 1068 struct iovec iovb[I82545_MAX_TXSEGS + 2]; 1069 struct iovec tiov[I82545_MAX_TXSEGS + 2]; 1070 struct e1000_context_desc *cd; 1071 struct ck_info ckinfo[2]; 1072 struct iovec *iov; 1073 union e1000_tx_udesc *dsc; 1074 int desc, dtype, len, ntype, iovcnt, tlen, hdrlen, vlen, tcp, tso; 1075 int mss, paylen, seg, tiovcnt, left, now, nleft, nnow, pv, pvoff; 1076 uint32_t tcpsum, tcpseq; 1077 uint16_t ipcs, tcpcs, ipid, ohead; 1078 1079 ckinfo[0].ck_valid = ckinfo[1].ck_valid = 0; 1080 iovcnt = 0; 1081 tlen = 0; 1082 ntype = 0; 1083 tso = 0; 1084 ohead = head; 1085 1086 /* iovb[0/1] may be used for writable copy of headers. */ 1087 iov = &iovb[2]; 1088 1089 for (desc = 0; ; desc++, head = (head + 1) % dsize) { 1090 if (head == tail) { 1091 *rhead = head; 1092 return (0); 1093 } 1094 dsc = &sc->esc_txdesc[head]; 1095 dtype = e82545_txdesc_type(dsc->td.lower.data); 1096 1097 if (desc == 0) { 1098 switch (dtype) { 1099 case E1000_TXD_TYP_C: 1100 DPRINTF("tx ctxt desc idx %d: %016jx " 1101 "%08x%08x\r\n", 1102 head, dsc->td.buffer_addr, 1103 dsc->td.upper.data, dsc->td.lower.data); 1104 /* Save context and return */ 1105 sc->esc_txctx = dsc->cd; 1106 goto done; 1107 case E1000_TXD_TYP_L: 1108 DPRINTF("tx legacy desc idx %d: %08x%08x\r\n", 1109 head, dsc->td.upper.data, dsc->td.lower.data); 1110 /* 1111 * legacy cksum start valid in first descriptor 1112 */ 1113 ntype = dtype; 1114 ckinfo[0].ck_start = dsc->td.upper.fields.css; 1115 break; 1116 case E1000_TXD_TYP_D: 1117 DPRINTF("tx data desc idx %d: %08x%08x\r\n", 1118 head, dsc->td.upper.data, dsc->td.lower.data); 1119 ntype = dtype; 1120 break; 1121 default: 1122 break; 1123 } 1124 } else { 1125 /* Descriptor type must be consistent */ 1126 assert(dtype == ntype); 1127 DPRINTF("tx next desc idx %d: %08x%08x\r\n", 1128 head, dsc->td.upper.data, dsc->td.lower.data); 1129 } 1130 1131 len = (dtype == E1000_TXD_TYP_L) ? dsc->td.lower.flags.length : 1132 dsc->dd.lower.data & 0xFFFFF; 1133 1134 if (len > 0) { 1135 /* Strip checksum supplied by guest. */ 1136 if ((dsc->td.lower.data & E1000_TXD_CMD_EOP) != 0 && 1137 (dsc->td.lower.data & E1000_TXD_CMD_IFCS) == 0) 1138 len -= 2; 1139 tlen += len; 1140 if (iovcnt < I82545_MAX_TXSEGS) { 1141 iov[iovcnt].iov_base = paddr_guest2host( 1142 sc->esc_ctx, dsc->td.buffer_addr, len); 1143 iov[iovcnt].iov_len = len; 1144 } 1145 iovcnt++; 1146 } 1147 1148 /* 1149 * Pull out info that is valid in the final descriptor 1150 * and exit descriptor loop. 1151 */ 1152 if (dsc->td.lower.data & E1000_TXD_CMD_EOP) { 1153 if (dtype == E1000_TXD_TYP_L) { 1154 if (dsc->td.lower.data & E1000_TXD_CMD_IC) { 1155 ckinfo[0].ck_valid = 1; 1156 ckinfo[0].ck_off = 1157 dsc->td.lower.flags.cso; 1158 ckinfo[0].ck_len = 0; 1159 } 1160 } else { 1161 cd = &sc->esc_txctx; 1162 if (dsc->dd.lower.data & E1000_TXD_CMD_TSE) 1163 tso = 1; 1164 if (dsc->dd.upper.fields.popts & 1165 E1000_TXD_POPTS_IXSM) 1166 ckinfo[0].ck_valid = 1; 1167 if (dsc->dd.upper.fields.popts & 1168 E1000_TXD_POPTS_IXSM || tso) { 1169 ckinfo[0].ck_start = 1170 cd->lower_setup.ip_fields.ipcss; 1171 ckinfo[0].ck_off = 1172 cd->lower_setup.ip_fields.ipcso; 1173 ckinfo[0].ck_len = 1174 cd->lower_setup.ip_fields.ipcse; 1175 } 1176 if (dsc->dd.upper.fields.popts & 1177 E1000_TXD_POPTS_TXSM) 1178 ckinfo[1].ck_valid = 1; 1179 if (dsc->dd.upper.fields.popts & 1180 E1000_TXD_POPTS_TXSM || tso) { 1181 ckinfo[1].ck_start = 1182 cd->upper_setup.tcp_fields.tucss; 1183 ckinfo[1].ck_off = 1184 cd->upper_setup.tcp_fields.tucso; 1185 ckinfo[1].ck_len = 1186 cd->upper_setup.tcp_fields.tucse; 1187 } 1188 } 1189 break; 1190 } 1191 } 1192 1193 if (iovcnt > I82545_MAX_TXSEGS) { 1194 WPRINTF("tx too many descriptors (%d > %d) -- dropped\r\n", 1195 iovcnt, I82545_MAX_TXSEGS); 1196 goto done; 1197 } 1198 1199 hdrlen = vlen = 0; 1200 /* Estimate writable space for VLAN header insertion. */ 1201 if ((sc->esc_CTRL & E1000_CTRL_VME) && 1202 (dsc->td.lower.data & E1000_TXD_CMD_VLE)) { 1203 hdrlen = ETHER_ADDR_LEN*2; 1204 vlen = ETHER_VLAN_ENCAP_LEN; 1205 } 1206 if (!tso) { 1207 /* Estimate required writable space for checksums. */ 1208 if (ckinfo[0].ck_valid) 1209 hdrlen = MAX(hdrlen, ckinfo[0].ck_off + 2); 1210 if (ckinfo[1].ck_valid) 1211 hdrlen = MAX(hdrlen, ckinfo[1].ck_off + 2); 1212 /* Round up writable space to the first vector. */ 1213 if (hdrlen != 0 && iov[0].iov_len > hdrlen && 1214 iov[0].iov_len < hdrlen + 100) 1215 hdrlen = iov[0].iov_len; 1216 } else { 1217 /* In case of TSO header length provided by software. */ 1218 hdrlen = sc->esc_txctx.tcp_seg_setup.fields.hdr_len; 1219 } 1220 1221 /* Allocate, fill and prepend writable header vector. */ 1222 if (hdrlen != 0) { 1223 hdr = __builtin_alloca(hdrlen + vlen); 1224 hdr += vlen; 1225 for (left = hdrlen, hdrp = hdr; left > 0; 1226 left -= now, hdrp += now) { 1227 now = MIN(left, iov->iov_len); 1228 memcpy(hdrp, iov->iov_base, now); 1229 iov->iov_base += now; 1230 iov->iov_len -= now; 1231 if (iov->iov_len == 0) { 1232 iov++; 1233 iovcnt--; 1234 } 1235 } 1236 iov--; 1237 iovcnt++; 1238 iov->iov_base = hdr; 1239 iov->iov_len = hdrlen; 1240 } 1241 1242 /* Insert VLAN tag. */ 1243 if (vlen != 0) { 1244 hdr -= ETHER_VLAN_ENCAP_LEN; 1245 memmove(hdr, hdr + ETHER_VLAN_ENCAP_LEN, ETHER_ADDR_LEN*2); 1246 hdrlen += ETHER_VLAN_ENCAP_LEN; 1247 hdr[ETHER_ADDR_LEN*2 + 0] = sc->esc_VET >> 8; 1248 hdr[ETHER_ADDR_LEN*2 + 1] = sc->esc_VET & 0xff; 1249 hdr[ETHER_ADDR_LEN*2 + 2] = dsc->td.upper.fields.special >> 8; 1250 hdr[ETHER_ADDR_LEN*2 + 3] = dsc->td.upper.fields.special & 0xff; 1251 iov->iov_base = hdr; 1252 iov->iov_len += ETHER_VLAN_ENCAP_LEN; 1253 /* Correct checksum offsets after VLAN tag insertion. */ 1254 ckinfo[0].ck_start += ETHER_VLAN_ENCAP_LEN; 1255 ckinfo[0].ck_off += ETHER_VLAN_ENCAP_LEN; 1256 if (ckinfo[0].ck_len != 0) 1257 ckinfo[0].ck_len += ETHER_VLAN_ENCAP_LEN; 1258 ckinfo[1].ck_start += ETHER_VLAN_ENCAP_LEN; 1259 ckinfo[1].ck_off += ETHER_VLAN_ENCAP_LEN; 1260 if (ckinfo[1].ck_len != 0) 1261 ckinfo[1].ck_len += ETHER_VLAN_ENCAP_LEN; 1262 } 1263 1264 /* Simple non-TSO case. */ 1265 if (!tso) { 1266 /* Calculate checksums and transmit. */ 1267 if (ckinfo[0].ck_valid) 1268 e82545_transmit_checksum(iov, iovcnt, &ckinfo[0]); 1269 if (ckinfo[1].ck_valid) 1270 e82545_transmit_checksum(iov, iovcnt, &ckinfo[1]); 1271 e82545_transmit_backend(sc, iov, iovcnt); 1272 goto done; 1273 } 1274 1275 /* Doing TSO. */ 1276 tcp = (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_TCP) != 0; 1277 mss = sc->esc_txctx.tcp_seg_setup.fields.mss; 1278 paylen = (sc->esc_txctx.cmd_and_length & 0x000fffff); 1279 DPRINTF("tx %s segmentation offload %d+%d/%d bytes %d iovs\r\n", 1280 tcp ? "TCP" : "UDP", hdrlen, paylen, mss, iovcnt); 1281 ipid = ntohs(*(uint16_t *)&hdr[ckinfo[0].ck_start + 4]); 1282 tcpseq = ntohl(*(uint32_t *)&hdr[ckinfo[1].ck_start + 4]); 1283 ipcs = *(uint16_t *)&hdr[ckinfo[0].ck_off]; 1284 tcpcs = 0; 1285 if (ckinfo[1].ck_valid) /* Save partial pseudo-header checksum. */ 1286 tcpcs = *(uint16_t *)&hdr[ckinfo[1].ck_off]; 1287 pv = 1; 1288 pvoff = 0; 1289 for (seg = 0, left = paylen; left > 0; seg++, left -= now) { 1290 now = MIN(left, mss); 1291 1292 /* Construct IOVs for the segment. */ 1293 /* Include whole original header. */ 1294 tiov[0].iov_base = hdr; 1295 tiov[0].iov_len = hdrlen; 1296 tiovcnt = 1; 1297 /* Include respective part of payload IOV. */ 1298 for (nleft = now; pv < iovcnt && nleft > 0; nleft -= nnow) { 1299 nnow = MIN(nleft, iov[pv].iov_len - pvoff); 1300 tiov[tiovcnt].iov_base = iov[pv].iov_base + pvoff; 1301 tiov[tiovcnt++].iov_len = nnow; 1302 if (pvoff + nnow == iov[pv].iov_len) { 1303 pv++; 1304 pvoff = 0; 1305 } else 1306 pvoff += nnow; 1307 } 1308 DPRINTF("tx segment %d %d+%d bytes %d iovs\r\n", 1309 seg, hdrlen, now, tiovcnt); 1310 1311 /* Update IP header. */ 1312 if (sc->esc_txctx.cmd_and_length & E1000_TXD_CMD_IP) { 1313 /* IPv4 -- set length and ID */ 1314 *(uint16_t *)&hdr[ckinfo[0].ck_start + 2] = 1315 htons(hdrlen - ckinfo[0].ck_start + now); 1316 *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] = 1317 htons(ipid + seg); 1318 } else { 1319 /* IPv6 -- set length */ 1320 *(uint16_t *)&hdr[ckinfo[0].ck_start + 4] = 1321 htons(hdrlen - ckinfo[0].ck_start - 40 + 1322 now); 1323 } 1324 1325 /* Update pseudo-header checksum. */ 1326 tcpsum = tcpcs; 1327 tcpsum += htons(hdrlen - ckinfo[1].ck_start + now); 1328 1329 /* Update TCP/UDP headers. */ 1330 if (tcp) { 1331 /* Update sequence number and FIN/PUSH flags. */ 1332 *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] = 1333 htonl(tcpseq + paylen - left); 1334 if (now < left) { 1335 hdr[ckinfo[1].ck_start + 13] &= 1336 ~(TH_FIN | TH_PUSH); 1337 } 1338 } else { 1339 /* Update payload length. */ 1340 *(uint32_t *)&hdr[ckinfo[1].ck_start + 4] = 1341 hdrlen - ckinfo[1].ck_start + now; 1342 } 1343 1344 /* Calculate checksums and transmit. */ 1345 if (ckinfo[0].ck_valid) { 1346 *(uint16_t *)&hdr[ckinfo[0].ck_off] = ipcs; 1347 e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[0]); 1348 } 1349 if (ckinfo[1].ck_valid) { 1350 *(uint16_t *)&hdr[ckinfo[1].ck_off] = 1351 e82545_carry(tcpsum); 1352 e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[1]); 1353 } 1354 e82545_transmit_backend(sc, tiov, tiovcnt); 1355 } 1356 1357 done: 1358 head = (head + 1) % dsize; 1359 e82545_transmit_done(sc, ohead, head, dsize, tdwb); 1360 1361 *rhead = head; 1362 return (desc + 1); 1363 } 1364 1365 static void 1366 e82545_tx_run(struct e82545_softc *sc) 1367 { 1368 uint32_t cause; 1369 uint16_t head, rhead, tail, size; 1370 int lim, tdwb, sent; 1371 1372 head = sc->esc_TDH; 1373 tail = sc->esc_TDT; 1374 size = sc->esc_TDLEN / 16; 1375 DPRINTF("tx_run: head %x, rhead %x, tail %x\r\n", 1376 sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); 1377 1378 pthread_mutex_unlock(&sc->esc_mtx); 1379 rhead = head; 1380 tdwb = 0; 1381 for (lim = size / 4; sc->esc_tx_enabled && lim > 0; lim -= sent) { 1382 sent = e82545_transmit(sc, head, tail, size, &rhead, &tdwb); 1383 if (sent == 0) 1384 break; 1385 head = rhead; 1386 } 1387 pthread_mutex_lock(&sc->esc_mtx); 1388 1389 sc->esc_TDH = head; 1390 sc->esc_TDHr = rhead; 1391 cause = 0; 1392 if (tdwb) 1393 cause |= E1000_ICR_TXDW; 1394 if (lim != size / 4 && sc->esc_TDH == sc->esc_TDT) 1395 cause |= E1000_ICR_TXQE; 1396 if (cause) 1397 e82545_icr_assert(sc, cause); 1398 1399 DPRINTF("tx_run done: head %x, rhead %x, tail %x\r\n", 1400 sc->esc_TDH, sc->esc_TDHr, sc->esc_TDT); 1401 } 1402 1403 static void * 1404 e82545_tx_thread(void *param) 1405 { 1406 struct e82545_softc *sc = param; 1407 1408 pthread_mutex_lock(&sc->esc_mtx); 1409 for (;;) { 1410 while (!sc->esc_tx_enabled || sc->esc_TDHr == sc->esc_TDT) { 1411 if (sc->esc_tx_enabled && sc->esc_TDHr != sc->esc_TDT) 1412 break; 1413 sc->esc_tx_active = 0; 1414 if (sc->esc_tx_enabled == 0) 1415 pthread_cond_signal(&sc->esc_tx_cond); 1416 pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx); 1417 } 1418 sc->esc_tx_active = 1; 1419 1420 /* Process some tx descriptors. Lock dropped inside. */ 1421 e82545_tx_run(sc); 1422 } 1423 } 1424 1425 static void 1426 e82545_tx_start(struct e82545_softc *sc) 1427 { 1428 1429 if (sc->esc_tx_active == 0) 1430 pthread_cond_signal(&sc->esc_tx_cond); 1431 } 1432 1433 static void 1434 e82545_tx_enable(struct e82545_softc *sc) 1435 { 1436 1437 sc->esc_tx_enabled = 1; 1438 } 1439 1440 static void 1441 e82545_tx_disable(struct e82545_softc *sc) 1442 { 1443 1444 sc->esc_tx_enabled = 0; 1445 while (sc->esc_tx_active) 1446 pthread_cond_wait(&sc->esc_tx_cond, &sc->esc_mtx); 1447 } 1448 1449 static void 1450 e82545_rx_enable(struct e82545_softc *sc) 1451 { 1452 1453 sc->esc_rx_enabled = 1; 1454 } 1455 1456 static void 1457 e82545_rx_disable(struct e82545_softc *sc) 1458 { 1459 1460 sc->esc_rx_enabled = 0; 1461 while (sc->esc_rx_active) 1462 pthread_cond_wait(&sc->esc_rx_cond, &sc->esc_mtx); 1463 } 1464 1465 static void 1466 e82545_write_ra(struct e82545_softc *sc, int reg, uint32_t wval) 1467 { 1468 struct eth_uni *eu; 1469 int idx; 1470 1471 idx = reg >> 1; 1472 assert(idx < 15); 1473 1474 eu = &sc->esc_uni[idx]; 1475 1476 if (reg & 0x1) { 1477 /* RAH */ 1478 eu->eu_valid = ((wval & E1000_RAH_AV) == E1000_RAH_AV); 1479 eu->eu_addrsel = (wval >> 16) & 0x3; 1480 eu->eu_eth.octet[5] = wval >> 8; 1481 eu->eu_eth.octet[4] = wval; 1482 } else { 1483 /* RAL */ 1484 eu->eu_eth.octet[3] = wval >> 24; 1485 eu->eu_eth.octet[2] = wval >> 16; 1486 eu->eu_eth.octet[1] = wval >> 8; 1487 eu->eu_eth.octet[0] = wval; 1488 } 1489 } 1490 1491 static uint32_t 1492 e82545_read_ra(struct e82545_softc *sc, int reg) 1493 { 1494 struct eth_uni *eu; 1495 uint32_t retval; 1496 int idx; 1497 1498 idx = reg >> 1; 1499 assert(idx < 15); 1500 1501 eu = &sc->esc_uni[idx]; 1502 1503 if (reg & 0x1) { 1504 /* RAH */ 1505 retval = (eu->eu_valid << 31) | 1506 (eu->eu_addrsel << 16) | 1507 (eu->eu_eth.octet[5] << 8) | 1508 eu->eu_eth.octet[4]; 1509 } else { 1510 /* RAL */ 1511 retval = (eu->eu_eth.octet[3] << 24) | 1512 (eu->eu_eth.octet[2] << 16) | 1513 (eu->eu_eth.octet[1] << 8) | 1514 eu->eu_eth.octet[0]; 1515 } 1516 1517 return (retval); 1518 } 1519 1520 static void 1521 e82545_write_register(struct e82545_softc *sc, uint32_t offset, uint32_t value) 1522 { 1523 int ridx; 1524 1525 if (offset & 0x3) { 1526 DPRINTF("Unaligned register write offset:0x%x value:0x%x\r\n", offset, value); 1527 return; 1528 } 1529 DPRINTF("Register write: 0x%x value: 0x%x\r\n", offset, value); 1530 1531 switch (offset) { 1532 case E1000_CTRL: 1533 case E1000_CTRL_DUP: 1534 e82545_devctl(sc, value); 1535 break; 1536 case E1000_FCAL: 1537 sc->esc_FCAL = value; 1538 break; 1539 case E1000_FCAH: 1540 sc->esc_FCAH = value & ~0xFFFF0000; 1541 break; 1542 case E1000_FCT: 1543 sc->esc_FCT = value & ~0xFFFF0000; 1544 break; 1545 case E1000_VET: 1546 sc->esc_VET = value & ~0xFFFF0000; 1547 break; 1548 case E1000_FCTTV: 1549 sc->esc_FCTTV = value & ~0xFFFF0000; 1550 break; 1551 case E1000_LEDCTL: 1552 sc->esc_LEDCTL = value & ~0x30303000; 1553 break; 1554 case E1000_PBA: 1555 sc->esc_PBA = value & 0x0000FF80; 1556 break; 1557 case E1000_ICR: 1558 case E1000_ITR: 1559 case E1000_ICS: 1560 case E1000_IMS: 1561 case E1000_IMC: 1562 e82545_intr_write(sc, offset, value); 1563 break; 1564 case E1000_RCTL: 1565 e82545_rx_ctl(sc, value); 1566 break; 1567 case E1000_FCRTL: 1568 sc->esc_FCRTL = value & ~0xFFFF0007; 1569 break; 1570 case E1000_FCRTH: 1571 sc->esc_FCRTH = value & ~0xFFFF0007; 1572 break; 1573 case E1000_RDBAL(0): 1574 sc->esc_RDBAL = value & ~0xF; 1575 if (sc->esc_rx_enabled) { 1576 /* Apparently legal: update cached address */ 1577 e82545_rx_update_rdba(sc); 1578 } 1579 break; 1580 case E1000_RDBAH(0): 1581 assert(!sc->esc_rx_enabled); 1582 sc->esc_RDBAH = value; 1583 break; 1584 case E1000_RDLEN(0): 1585 assert(!sc->esc_rx_enabled); 1586 sc->esc_RDLEN = value & ~0xFFF0007F; 1587 break; 1588 case E1000_RDH(0): 1589 /* XXX should only ever be zero ? Range check ? */ 1590 sc->esc_RDH = value; 1591 break; 1592 case E1000_RDT(0): 1593 /* XXX if this opens up the rx ring, do something ? */ 1594 sc->esc_RDT = value; 1595 break; 1596 case E1000_RDTR: 1597 /* ignore FPD bit 31 */ 1598 sc->esc_RDTR = value & ~0xFFFF0000; 1599 break; 1600 case E1000_RXDCTL(0): 1601 sc->esc_RXDCTL = value & ~0xFEC0C0C0; 1602 break; 1603 case E1000_RADV: 1604 sc->esc_RADV = value & ~0xFFFF0000; 1605 break; 1606 case E1000_RSRPD: 1607 sc->esc_RSRPD = value & ~0xFFFFF000; 1608 break; 1609 case E1000_RXCSUM: 1610 sc->esc_RXCSUM = value & ~0xFFFFF800; 1611 break; 1612 case E1000_TXCW: 1613 sc->esc_TXCW = value & ~0x3FFF0000; 1614 break; 1615 case E1000_TCTL: 1616 e82545_tx_ctl(sc, value); 1617 break; 1618 case E1000_TIPG: 1619 sc->esc_TIPG = value; 1620 break; 1621 case E1000_AIT: 1622 sc->esc_AIT = value; 1623 break; 1624 case E1000_TDBAL(0): 1625 sc->esc_TDBAL = value & ~0xF; 1626 if (sc->esc_tx_enabled) { 1627 /* Apparently legal */ 1628 e82545_tx_update_tdba(sc); 1629 } 1630 break; 1631 case E1000_TDBAH(0): 1632 //assert(!sc->esc_tx_enabled); 1633 sc->esc_TDBAH = value; 1634 break; 1635 case E1000_TDLEN(0): 1636 //assert(!sc->esc_tx_enabled); 1637 sc->esc_TDLEN = value & ~0xFFF0007F; 1638 break; 1639 case E1000_TDH(0): 1640 //assert(!sc->esc_tx_enabled); 1641 /* XXX should only ever be zero ? Range check ? */ 1642 sc->esc_TDHr = sc->esc_TDH = value; 1643 break; 1644 case E1000_TDT(0): 1645 /* XXX range check ? */ 1646 sc->esc_TDT = value; 1647 if (sc->esc_tx_enabled) 1648 e82545_tx_start(sc); 1649 break; 1650 case E1000_TIDV: 1651 sc->esc_TIDV = value & ~0xFFFF0000; 1652 break; 1653 case E1000_TXDCTL(0): 1654 //assert(!sc->esc_tx_enabled); 1655 sc->esc_TXDCTL = value & ~0xC0C0C0; 1656 break; 1657 case E1000_TADV: 1658 sc->esc_TADV = value & ~0xFFFF0000; 1659 break; 1660 case E1000_RAL(0) ... E1000_RAH(15): 1661 /* convert to u32 offset */ 1662 ridx = (offset - E1000_RAL(0)) >> 2; 1663 e82545_write_ra(sc, ridx, value); 1664 break; 1665 case E1000_MTA ... (E1000_MTA + (127*4)): 1666 sc->esc_fmcast[(offset - E1000_MTA) >> 2] = value; 1667 break; 1668 case E1000_VFTA ... (E1000_VFTA + (127*4)): 1669 sc->esc_fvlan[(offset - E1000_VFTA) >> 2] = value; 1670 break; 1671 case E1000_EECD: 1672 { 1673 //DPRINTF("EECD write 0x%x -> 0x%x\r\n", sc->eeprom_control, value); 1674 /* edge triggered low->high */ 1675 uint32_t eecd_strobe = ((sc->eeprom_control & E1000_EECD_SK) ? 1676 0 : (value & E1000_EECD_SK)); 1677 uint32_t eecd_mask = (E1000_EECD_SK|E1000_EECD_CS| 1678 E1000_EECD_DI|E1000_EECD_REQ); 1679 sc->eeprom_control &= ~eecd_mask; 1680 sc->eeprom_control |= (value & eecd_mask); 1681 /* grant/revoke immediately */ 1682 if (value & E1000_EECD_REQ) { 1683 sc->eeprom_control |= E1000_EECD_GNT; 1684 } else { 1685 sc->eeprom_control &= ~E1000_EECD_GNT; 1686 } 1687 if (eecd_strobe && (sc->eeprom_control & E1000_EECD_CS)) { 1688 e82545_eecd_strobe(sc); 1689 } 1690 return; 1691 } 1692 case E1000_MDIC: 1693 { 1694 uint8_t reg_addr = (uint8_t)((value & E1000_MDIC_REG_MASK) >> 1695 E1000_MDIC_REG_SHIFT); 1696 uint8_t phy_addr = (uint8_t)((value & E1000_MDIC_PHY_MASK) >> 1697 E1000_MDIC_PHY_SHIFT); 1698 sc->mdi_control = 1699 (value & ~(E1000_MDIC_ERROR|E1000_MDIC_DEST)); 1700 if ((value & E1000_MDIC_READY) != 0) { 1701 DPRINTF("Incorrect MDIC ready bit: 0x%x\r\n", value); 1702 return; 1703 } 1704 switch (value & E82545_MDIC_OP_MASK) { 1705 case E1000_MDIC_OP_READ: 1706 sc->mdi_control &= ~E82545_MDIC_DATA_MASK; 1707 sc->mdi_control |= e82545_read_mdi(sc, reg_addr, phy_addr); 1708 break; 1709 case E1000_MDIC_OP_WRITE: 1710 e82545_write_mdi(sc, reg_addr, phy_addr, 1711 value & E82545_MDIC_DATA_MASK); 1712 break; 1713 default: 1714 DPRINTF("Unknown MDIC op: 0x%x\r\n", value); 1715 return; 1716 } 1717 /* TODO: barrier? */ 1718 sc->mdi_control |= E1000_MDIC_READY; 1719 if (value & E82545_MDIC_IE) { 1720 // TODO: generate interrupt 1721 } 1722 return; 1723 } 1724 case E1000_MANC: 1725 case E1000_STATUS: 1726 return; 1727 default: 1728 DPRINTF("Unknown write register: 0x%x value:%x\r\n", offset, value); 1729 return; 1730 } 1731 } 1732 1733 static uint32_t 1734 e82545_read_register(struct e82545_softc *sc, uint32_t offset) 1735 { 1736 uint32_t retval; 1737 int ridx; 1738 1739 if (offset & 0x3) { 1740 DPRINTF("Unaligned register read offset:0x%x\r\n", offset); 1741 return 0; 1742 } 1743 1744 DPRINTF("Register read: 0x%x\r\n", offset); 1745 1746 switch (offset) { 1747 case E1000_CTRL: 1748 retval = sc->esc_CTRL; 1749 break; 1750 case E1000_STATUS: 1751 retval = E1000_STATUS_FD | E1000_STATUS_LU | 1752 E1000_STATUS_SPEED_1000; 1753 break; 1754 case E1000_FCAL: 1755 retval = sc->esc_FCAL; 1756 break; 1757 case E1000_FCAH: 1758 retval = sc->esc_FCAH; 1759 break; 1760 case E1000_FCT: 1761 retval = sc->esc_FCT; 1762 break; 1763 case E1000_VET: 1764 retval = sc->esc_VET; 1765 break; 1766 case E1000_FCTTV: 1767 retval = sc->esc_FCTTV; 1768 break; 1769 case E1000_LEDCTL: 1770 retval = sc->esc_LEDCTL; 1771 break; 1772 case E1000_PBA: 1773 retval = sc->esc_PBA; 1774 break; 1775 case E1000_ICR: 1776 case E1000_ITR: 1777 case E1000_ICS: 1778 case E1000_IMS: 1779 case E1000_IMC: 1780 retval = e82545_intr_read(sc, offset); 1781 break; 1782 case E1000_RCTL: 1783 retval = sc->esc_RCTL; 1784 break; 1785 case E1000_FCRTL: 1786 retval = sc->esc_FCRTL; 1787 break; 1788 case E1000_FCRTH: 1789 retval = sc->esc_FCRTH; 1790 break; 1791 case E1000_RDBAL(0): 1792 retval = sc->esc_RDBAL; 1793 break; 1794 case E1000_RDBAH(0): 1795 retval = sc->esc_RDBAH; 1796 break; 1797 case E1000_RDLEN(0): 1798 retval = sc->esc_RDLEN; 1799 break; 1800 case E1000_RDH(0): 1801 retval = sc->esc_RDH; 1802 break; 1803 case E1000_RDT(0): 1804 retval = sc->esc_RDT; 1805 break; 1806 case E1000_RDTR: 1807 retval = sc->esc_RDTR; 1808 break; 1809 case E1000_RXDCTL(0): 1810 retval = sc->esc_RXDCTL; 1811 break; 1812 case E1000_RADV: 1813 retval = sc->esc_RADV; 1814 break; 1815 case E1000_RSRPD: 1816 retval = sc->esc_RSRPD; 1817 break; 1818 case E1000_RXCSUM: 1819 retval = sc->esc_RXCSUM; 1820 break; 1821 case E1000_TXCW: 1822 retval = sc->esc_TXCW; 1823 break; 1824 case E1000_TCTL: 1825 retval = sc->esc_TCTL; 1826 break; 1827 case E1000_TIPG: 1828 retval = sc->esc_TIPG; 1829 break; 1830 case E1000_AIT: 1831 retval = sc->esc_AIT; 1832 break; 1833 case E1000_TDBAL(0): 1834 retval = sc->esc_TDBAL; 1835 break; 1836 case E1000_TDBAH(0): 1837 retval = sc->esc_TDBAH; 1838 break; 1839 case E1000_TDLEN(0): 1840 retval = sc->esc_TDLEN; 1841 break; 1842 case E1000_TDH(0): 1843 retval = sc->esc_TDH; 1844 break; 1845 case E1000_TDT(0): 1846 retval = sc->esc_TDT; 1847 break; 1848 case E1000_TIDV: 1849 retval = sc->esc_TIDV; 1850 break; 1851 case E1000_TXDCTL(0): 1852 retval = sc->esc_TXDCTL; 1853 break; 1854 case E1000_TADV: 1855 retval = sc->esc_TADV; 1856 break; 1857 case E1000_RAL(0) ... E1000_RAH(15): 1858 /* convert to u32 offset */ 1859 ridx = (offset - E1000_RAL(0)) >> 2; 1860 retval = e82545_read_ra(sc, ridx); 1861 break; 1862 case E1000_MTA ... (E1000_MTA + (127*4)): 1863 retval = sc->esc_fmcast[(offset - E1000_MTA) >> 2]; 1864 break; 1865 case E1000_VFTA ... (E1000_VFTA + (127*4)): 1866 retval = sc->esc_fvlan[(offset - E1000_VFTA) >> 2]; 1867 break; 1868 case E1000_EECD: 1869 //DPRINTF("EECD read %x\r\n", sc->eeprom_control); 1870 retval = sc->eeprom_control; 1871 break; 1872 case E1000_MDIC: 1873 retval = sc->mdi_control; 1874 break; 1875 case E1000_MANC: 1876 retval = 0; 1877 break; 1878 /* stats that we emulate. */ 1879 case E1000_MPC: 1880 retval = sc->missed_pkt_count; 1881 break; 1882 case E1000_PRC64: 1883 retval = sc->pkt_rx_by_size[0]; 1884 break; 1885 case E1000_PRC127: 1886 retval = sc->pkt_rx_by_size[1]; 1887 break; 1888 case E1000_PRC255: 1889 retval = sc->pkt_rx_by_size[2]; 1890 break; 1891 case E1000_PRC511: 1892 retval = sc->pkt_rx_by_size[3]; 1893 break; 1894 case E1000_PRC1023: 1895 retval = sc->pkt_rx_by_size[4]; 1896 break; 1897 case E1000_PRC1522: 1898 retval = sc->pkt_rx_by_size[5]; 1899 break; 1900 case E1000_GPRC: 1901 retval = sc->good_pkt_rx_count; 1902 break; 1903 case E1000_BPRC: 1904 retval = sc->bcast_pkt_rx_count; 1905 break; 1906 case E1000_MPRC: 1907 retval = sc->mcast_pkt_rx_count; 1908 break; 1909 case E1000_GPTC: 1910 case E1000_TPT: 1911 retval = sc->good_pkt_tx_count; 1912 break; 1913 case E1000_GORCL: 1914 retval = (uint32_t)sc->good_octets_rx; 1915 break; 1916 case E1000_GORCH: 1917 retval = (uint32_t)(sc->good_octets_rx >> 32); 1918 break; 1919 case E1000_TOTL: 1920 case E1000_GOTCL: 1921 retval = (uint32_t)sc->good_octets_tx; 1922 break; 1923 case E1000_TOTH: 1924 case E1000_GOTCH: 1925 retval = (uint32_t)(sc->good_octets_tx >> 32); 1926 break; 1927 case E1000_ROC: 1928 retval = sc->oversize_rx_count; 1929 break; 1930 case E1000_TORL: 1931 retval = (uint32_t)(sc->good_octets_rx + sc->missed_octets); 1932 break; 1933 case E1000_TORH: 1934 retval = (uint32_t)((sc->good_octets_rx + 1935 sc->missed_octets) >> 32); 1936 break; 1937 case E1000_TPR: 1938 retval = sc->good_pkt_rx_count + sc->missed_pkt_count + 1939 sc->oversize_rx_count; 1940 break; 1941 case E1000_PTC64: 1942 retval = sc->pkt_tx_by_size[0]; 1943 break; 1944 case E1000_PTC127: 1945 retval = sc->pkt_tx_by_size[1]; 1946 break; 1947 case E1000_PTC255: 1948 retval = sc->pkt_tx_by_size[2]; 1949 break; 1950 case E1000_PTC511: 1951 retval = sc->pkt_tx_by_size[3]; 1952 break; 1953 case E1000_PTC1023: 1954 retval = sc->pkt_tx_by_size[4]; 1955 break; 1956 case E1000_PTC1522: 1957 retval = sc->pkt_tx_by_size[5]; 1958 break; 1959 case E1000_MPTC: 1960 retval = sc->mcast_pkt_tx_count; 1961 break; 1962 case E1000_BPTC: 1963 retval = sc->bcast_pkt_tx_count; 1964 break; 1965 case E1000_TSCTC: 1966 retval = sc->tso_tx_count; 1967 break; 1968 /* stats that are always 0. */ 1969 case E1000_CRCERRS: 1970 case E1000_ALGNERRC: 1971 case E1000_SYMERRS: 1972 case E1000_RXERRC: 1973 case E1000_SCC: 1974 case E1000_ECOL: 1975 case E1000_MCC: 1976 case E1000_LATECOL: 1977 case E1000_COLC: 1978 case E1000_DC: 1979 case E1000_TNCRS: 1980 case E1000_SEC: 1981 case E1000_CEXTERR: 1982 case E1000_RLEC: 1983 case E1000_XONRXC: 1984 case E1000_XONTXC: 1985 case E1000_XOFFRXC: 1986 case E1000_XOFFTXC: 1987 case E1000_FCRUC: 1988 case E1000_RNBC: 1989 case E1000_RUC: 1990 case E1000_RFC: 1991 case E1000_RJC: 1992 case E1000_MGTPRC: 1993 case E1000_MGTPDC: 1994 case E1000_MGTPTC: 1995 case E1000_TSCTFC: 1996 retval = 0; 1997 break; 1998 default: 1999 DPRINTF("Unknown read register: 0x%x\r\n", offset); 2000 retval = 0; 2001 break; 2002 } 2003 2004 return (retval); 2005 } 2006 2007 static void 2008 e82545_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2009 uint64_t offset, int size, uint64_t value) 2010 { 2011 struct e82545_softc *sc; 2012 2013 //DPRINTF("Write bar:%d offset:0x%lx value:0x%lx size:%d\r\n", baridx, offset, value, size); 2014 2015 sc = pi->pi_arg; 2016 2017 pthread_mutex_lock(&sc->esc_mtx); 2018 2019 switch (baridx) { 2020 case E82545_BAR_IO: 2021 switch (offset) { 2022 case E82545_IOADDR: 2023 if (size != 4) { 2024 DPRINTF("Wrong io addr write sz:%d value:0x%lx\r\n", size, value); 2025 } else 2026 sc->io_addr = (uint32_t)value; 2027 break; 2028 case E82545_IODATA: 2029 if (size != 4) { 2030 DPRINTF("Wrong io data write size:%d value:0x%lx\r\n", size, value); 2031 } else if (sc->io_addr > E82545_IO_REGISTER_MAX) { 2032 DPRINTF("Non-register io write addr:0x%x value:0x%lx\r\n", sc->io_addr, value); 2033 } else 2034 e82545_write_register(sc, sc->io_addr, 2035 (uint32_t)value); 2036 break; 2037 default: 2038 DPRINTF("Unknown io bar write offset:0x%lx value:0x%lx size:%d\r\n", offset, value, size); 2039 break; 2040 } 2041 break; 2042 case E82545_BAR_REGISTER: 2043 if (size != 4) { 2044 DPRINTF("Wrong register write size:%d offset:0x%lx value:0x%lx\r\n", size, offset, value); 2045 } else 2046 e82545_write_register(sc, (uint32_t)offset, 2047 (uint32_t)value); 2048 break; 2049 default: 2050 DPRINTF("Unknown write bar:%d off:0x%lx val:0x%lx size:%d\r\n", 2051 baridx, offset, value, size); 2052 } 2053 2054 pthread_mutex_unlock(&sc->esc_mtx); 2055 } 2056 2057 static uint64_t 2058 e82545_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, 2059 uint64_t offset, int size) 2060 { 2061 struct e82545_softc *sc; 2062 uint64_t retval; 2063 2064 //DPRINTF("Read bar:%d offset:0x%lx size:%d\r\n", baridx, offset, size); 2065 sc = pi->pi_arg; 2066 retval = 0; 2067 2068 pthread_mutex_lock(&sc->esc_mtx); 2069 2070 switch (baridx) { 2071 case E82545_BAR_IO: 2072 switch (offset) { 2073 case E82545_IOADDR: 2074 if (size != 4) { 2075 DPRINTF("Wrong io addr read sz:%d\r\n", size); 2076 } else 2077 retval = sc->io_addr; 2078 break; 2079 case E82545_IODATA: 2080 if (size != 4) { 2081 DPRINTF("Wrong io data read sz:%d\r\n", size); 2082 } 2083 if (sc->io_addr > E82545_IO_REGISTER_MAX) { 2084 DPRINTF("Non-register io read addr:0x%x\r\n", 2085 sc->io_addr); 2086 } else 2087 retval = e82545_read_register(sc, sc->io_addr); 2088 break; 2089 default: 2090 DPRINTF("Unknown io bar read offset:0x%lx size:%d\r\n", 2091 offset, size); 2092 break; 2093 } 2094 break; 2095 case E82545_BAR_REGISTER: 2096 if (size != 4) { 2097 DPRINTF("Wrong register read size:%d offset:0x%lx\r\n", 2098 size, offset); 2099 } else 2100 retval = e82545_read_register(sc, (uint32_t)offset); 2101 break; 2102 default: 2103 DPRINTF("Unknown read bar:%d offset:0x%lx size:%d\r\n", 2104 baridx, offset, size); 2105 break; 2106 } 2107 2108 pthread_mutex_unlock(&sc->esc_mtx); 2109 2110 return (retval); 2111 } 2112 2113 static void 2114 e82545_reset(struct e82545_softc *sc, int drvr) 2115 { 2116 int i; 2117 2118 e82545_rx_disable(sc); 2119 e82545_tx_disable(sc); 2120 2121 /* clear outstanding interrupts */ 2122 if (sc->esc_irq_asserted) 2123 pci_lintr_deassert(sc->esc_pi); 2124 2125 /* misc */ 2126 if (!drvr) { 2127 sc->esc_FCAL = 0; 2128 sc->esc_FCAH = 0; 2129 sc->esc_FCT = 0; 2130 sc->esc_VET = 0; 2131 sc->esc_FCTTV = 0; 2132 } 2133 sc->esc_LEDCTL = 0x07061302; 2134 sc->esc_PBA = 0x00100030; 2135 2136 /* start nvm in opcode mode. */ 2137 sc->nvm_opaddr = 0; 2138 sc->nvm_mode = E82545_NVM_MODE_OPADDR; 2139 sc->nvm_bits = E82545_NVM_OPADDR_BITS; 2140 sc->eeprom_control = E1000_EECD_PRES | E82545_EECD_FWE_EN; 2141 e82545_init_eeprom(sc); 2142 2143 /* interrupt */ 2144 sc->esc_ICR = 0; 2145 sc->esc_ITR = 250; 2146 sc->esc_ICS = 0; 2147 sc->esc_IMS = 0; 2148 sc->esc_IMC = 0; 2149 2150 /* L2 filters */ 2151 if (!drvr) { 2152 memset(sc->esc_fvlan, 0, sizeof(sc->esc_fvlan)); 2153 memset(sc->esc_fmcast, 0, sizeof(sc->esc_fmcast)); 2154 memset(sc->esc_uni, 0, sizeof(sc->esc_uni)); 2155 2156 /* XXX not necessary on 82545 ?? */ 2157 sc->esc_uni[0].eu_valid = 1; 2158 memcpy(sc->esc_uni[0].eu_eth.octet, sc->esc_mac.octet, 2159 ETHER_ADDR_LEN); 2160 } else { 2161 /* Clear RAH valid bits */ 2162 for (i = 0; i < 16; i++) 2163 sc->esc_uni[i].eu_valid = 0; 2164 } 2165 2166 /* receive */ 2167 if (!drvr) { 2168 sc->esc_RDBAL = 0; 2169 sc->esc_RDBAH = 0; 2170 } 2171 sc->esc_RCTL = 0; 2172 sc->esc_FCRTL = 0; 2173 sc->esc_FCRTH = 0; 2174 sc->esc_RDLEN = 0; 2175 sc->esc_RDH = 0; 2176 sc->esc_RDT = 0; 2177 sc->esc_RDTR = 0; 2178 sc->esc_RXDCTL = (1 << 24) | (1 << 16); /* default GRAN/WTHRESH */ 2179 sc->esc_RADV = 0; 2180 sc->esc_RXCSUM = 0; 2181 2182 /* transmit */ 2183 if (!drvr) { 2184 sc->esc_TDBAL = 0; 2185 sc->esc_TDBAH = 0; 2186 sc->esc_TIPG = 0; 2187 sc->esc_AIT = 0; 2188 sc->esc_TIDV = 0; 2189 sc->esc_TADV = 0; 2190 } 2191 sc->esc_tdba = 0; 2192 sc->esc_txdesc = NULL; 2193 sc->esc_TXCW = 0; 2194 sc->esc_TCTL = 0; 2195 sc->esc_TDLEN = 0; 2196 sc->esc_TDT = 0; 2197 sc->esc_TDHr = sc->esc_TDH = 0; 2198 sc->esc_TXDCTL = 0; 2199 } 2200 2201 static void 2202 e82545_open_tap(struct e82545_softc *sc, char *opts) 2203 { 2204 char tbuf[80]; 2205 2206 if (opts == NULL) { 2207 sc->esc_tapfd = -1; 2208 return; 2209 } 2210 2211 strcpy(tbuf, "/dev/"); 2212 strlcat(tbuf, opts, sizeof(tbuf)); 2213 2214 sc->esc_tapfd = open(tbuf, O_RDWR); 2215 if (sc->esc_tapfd == -1) { 2216 DPRINTF("unable to open tap device %s\n", opts); 2217 exit(1); 2218 } 2219 2220 /* 2221 * Set non-blocking and register for read 2222 * notifications with the event loop 2223 */ 2224 int opt = 1; 2225 if (ioctl(sc->esc_tapfd, FIONBIO, &opt) < 0) { 2226 WPRINTF("tap device O_NONBLOCK failed: %d\n", errno); 2227 close(sc->esc_tapfd); 2228 sc->esc_tapfd = -1; 2229 } 2230 2231 sc->esc_mevp = mevent_add(sc->esc_tapfd, 2232 EVF_READ, 2233 e82545_tap_callback, 2234 sc); 2235 if (sc->esc_mevp == NULL) { 2236 DPRINTF("Could not register mevent %d\n", EVF_READ); 2237 close(sc->esc_tapfd); 2238 sc->esc_tapfd = -1; 2239 } 2240 } 2241 2242 static int 2243 e82545_parsemac(char *mac_str, uint8_t *mac_addr) 2244 { 2245 struct ether_addr *ea; 2246 char *tmpstr; 2247 char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; 2248 2249 tmpstr = strsep(&mac_str,"="); 2250 if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) { 2251 ea = ether_aton(mac_str); 2252 if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) || 2253 memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) { 2254 fprintf(stderr, "Invalid MAC %s\n", mac_str); 2255 return (1); 2256 } else 2257 memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN); 2258 } 2259 return (0); 2260 } 2261 2262 static int 2263 e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 2264 { 2265 DPRINTF("Loading with options: %s\r\n", opts); 2266 2267 MD5_CTX mdctx; 2268 unsigned char digest[16]; 2269 char nstr[80]; 2270 struct e82545_softc *sc; 2271 char *devname; 2272 char *vtopts; 2273 int mac_provided; 2274 2275 /* Setup our softc */ 2276 sc = calloc(1, sizeof(*sc)); 2277 2278 pi->pi_arg = sc; 2279 sc->esc_pi = pi; 2280 sc->esc_ctx = ctx; 2281 2282 pthread_mutex_init(&sc->esc_mtx, NULL); 2283 pthread_cond_init(&sc->esc_rx_cond, NULL); 2284 pthread_cond_init(&sc->esc_tx_cond, NULL); 2285 pthread_create(&sc->esc_tx_tid, NULL, e82545_tx_thread, sc); 2286 snprintf(nstr, sizeof(nstr), "e82545-%d:%d tx", pi->pi_slot, 2287 pi->pi_func); 2288 pthread_set_name_np(sc->esc_tx_tid, nstr); 2289 2290 pci_set_cfgdata16(pi, PCIR_DEVICE, E82545_DEV_ID_82545EM_COPPER); 2291 pci_set_cfgdata16(pi, PCIR_VENDOR, E82545_VENDOR_ID_INTEL); 2292 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 2293 pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_NETWORK_ETHERNET); 2294 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, E82545_SUBDEV_ID); 2295 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, E82545_VENDOR_ID_INTEL); 2296 2297 pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL); 2298 pci_set_cfgdata8(pi, PCIR_INTPIN, 0x1); 2299 2300 /* TODO: this card also supports msi, but the freebsd driver for it 2301 * does not, so I have not implemented it. */ 2302 pci_lintr_request(pi); 2303 2304 pci_emul_alloc_bar(pi, E82545_BAR_REGISTER, PCIBAR_MEM32, 2305 E82545_BAR_REGISTER_LEN); 2306 pci_emul_alloc_bar(pi, E82545_BAR_FLASH, PCIBAR_MEM32, 2307 E82545_BAR_FLASH_LEN); 2308 pci_emul_alloc_bar(pi, E82545_BAR_IO, PCIBAR_IO, 2309 E82545_BAR_IO_LEN); 2310 2311 /* 2312 * Attempt to open the tap device and read the MAC address 2313 * if specified. Copied from virtio-net, slightly modified. 2314 */ 2315 mac_provided = 0; 2316 sc->esc_tapfd = -1; 2317 if (opts != NULL) { 2318 int err; 2319 2320 devname = vtopts = strdup(opts); 2321 (void) strsep(&vtopts, ","); 2322 2323 if (vtopts != NULL) { 2324 err = e82545_parsemac(vtopts, sc->esc_mac.octet); 2325 if (err != 0) { 2326 free(devname); 2327 return (err); 2328 } 2329 mac_provided = 1; 2330 } 2331 2332 if (strncmp(devname, "tap", 3) == 0 || 2333 strncmp(devname, "vmnet", 5) == 0) 2334 e82545_open_tap(sc, devname); 2335 2336 free(devname); 2337 } 2338 2339 /* 2340 * The default MAC address is the standard NetApp OUI of 00-a0-98, 2341 * followed by an MD5 of the PCI slot/func number and dev name 2342 */ 2343 if (!mac_provided) { 2344 snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, 2345 pi->pi_func, vmname); 2346 2347 MD5Init(&mdctx); 2348 MD5Update(&mdctx, nstr, strlen(nstr)); 2349 MD5Final(digest, &mdctx); 2350 2351 sc->esc_mac.octet[0] = 0x00; 2352 sc->esc_mac.octet[1] = 0xa0; 2353 sc->esc_mac.octet[2] = 0x98; 2354 sc->esc_mac.octet[3] = digest[0]; 2355 sc->esc_mac.octet[4] = digest[1]; 2356 sc->esc_mac.octet[5] = digest[2]; 2357 } 2358 2359 /* H/w initiated reset */ 2360 e82545_reset(sc, 0); 2361 2362 return (0); 2363 } 2364 2365 struct pci_devemu pci_de_e82545 = { 2366 .pe_emu = "e1000", 2367 .pe_init = e82545_init, 2368 .pe_barwrite = e82545_write, 2369 .pe_barread = e82545_read 2370 }; 2371 PCI_EMUL_SET(pci_de_e82545); 2372 2373