1 /* 2 * drivers/net/veth.c 3 * 4 * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 5 * 6 * Author: Pavel Emelianov <xemul@openvz.org> 7 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 8 * 9 */ 10 11 #include <linux/netdevice.h> 12 #include <linux/slab.h> 13 #include <linux/ethtool.h> 14 #include <linux/etherdevice.h> 15 #include <linux/u64_stats_sync.h> 16 17 #include <net/rtnetlink.h> 18 #include <net/dst.h> 19 #include <net/xfrm.h> 20 #include <linux/veth.h> 21 #include <linux/module.h> 22 23 #define DRV_NAME "veth" 24 #define DRV_VERSION "1.0" 25 26 struct pcpu_vstats { 27 u64 packets; 28 u64 bytes; 29 struct u64_stats_sync syncp; 30 }; 31 32 struct veth_priv { 33 struct net_device __rcu *peer; 34 atomic64_t dropped; 35 unsigned requested_headroom; 36 }; 37 38 /* 39 * ethtool interface 40 */ 41 42 static struct { 43 const char string[ETH_GSTRING_LEN]; 44 } ethtool_stats_keys[] = { 45 { "peer_ifindex" }, 46 }; 47 48 static int veth_get_link_ksettings(struct net_device *dev, 49 struct ethtool_link_ksettings *cmd) 50 { 51 cmd->base.speed = SPEED_10000; 52 cmd->base.duplex = DUPLEX_FULL; 53 cmd->base.port = PORT_TP; 54 cmd->base.autoneg = AUTONEG_DISABLE; 55 return 0; 56 } 57 58 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 59 { 60 strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); 61 strlcpy(info->version, DRV_VERSION, sizeof(info->version)); 62 } 63 64 static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 65 { 66 switch(stringset) { 67 case ETH_SS_STATS: 68 memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 69 break; 70 } 71 } 72 73 static int veth_get_sset_count(struct net_device *dev, int sset) 74 { 75 switch (sset) { 76 case ETH_SS_STATS: 77 return ARRAY_SIZE(ethtool_stats_keys); 78 default: 79 return -EOPNOTSUPP; 80 } 81 } 82 83 static void veth_get_ethtool_stats(struct net_device *dev, 84 struct ethtool_stats *stats, u64 *data) 85 { 86 struct veth_priv *priv = netdev_priv(dev); 87 struct net_device *peer = rtnl_dereference(priv->peer); 88 89 data[0] = peer ? peer->ifindex : 0; 90 } 91 92 static const struct ethtool_ops veth_ethtool_ops = { 93 .get_drvinfo = veth_get_drvinfo, 94 .get_link = ethtool_op_get_link, 95 .get_strings = veth_get_strings, 96 .get_sset_count = veth_get_sset_count, 97 .get_ethtool_stats = veth_get_ethtool_stats, 98 .get_link_ksettings = veth_get_link_ksettings, 99 }; 100 101 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 102 { 103 struct veth_priv *priv = netdev_priv(dev); 104 struct net_device *rcv; 105 int length = skb->len; 106 107 rcu_read_lock(); 108 rcv = rcu_dereference(priv->peer); 109 if (unlikely(!rcv)) { 110 kfree_skb(skb); 111 goto drop; 112 } 113 114 if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { 115 struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); 116 117 u64_stats_update_begin(&stats->syncp); 118 stats->bytes += length; 119 stats->packets++; 120 u64_stats_update_end(&stats->syncp); 121 } else { 122 drop: 123 atomic64_inc(&priv->dropped); 124 } 125 rcu_read_unlock(); 126 return NETDEV_TX_OK; 127 } 128 129 /* 130 * general routines 131 */ 132 133 static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) 134 { 135 struct veth_priv *priv = netdev_priv(dev); 136 int cpu; 137 138 result->packets = 0; 139 result->bytes = 0; 140 for_each_possible_cpu(cpu) { 141 struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu); 142 u64 packets, bytes; 143 unsigned int start; 144 145 do { 146 start = u64_stats_fetch_begin_irq(&stats->syncp); 147 packets = stats->packets; 148 bytes = stats->bytes; 149 } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 150 result->packets += packets; 151 result->bytes += bytes; 152 } 153 return atomic64_read(&priv->dropped); 154 } 155 156 static void veth_get_stats64(struct net_device *dev, 157 struct rtnl_link_stats64 *tot) 158 { 159 struct veth_priv *priv = netdev_priv(dev); 160 struct net_device *peer; 161 struct pcpu_vstats one; 162 163 tot->tx_dropped = veth_stats_one(&one, dev); 164 tot->tx_bytes = one.bytes; 165 tot->tx_packets = one.packets; 166 167 rcu_read_lock(); 168 peer = rcu_dereference(priv->peer); 169 if (peer) { 170 tot->rx_dropped = veth_stats_one(&one, peer); 171 tot->rx_bytes = one.bytes; 172 tot->rx_packets = one.packets; 173 } 174 rcu_read_unlock(); 175 } 176 177 /* fake multicast ability */ 178 static void veth_set_multicast_list(struct net_device *dev) 179 { 180 } 181 182 static int veth_open(struct net_device *dev) 183 { 184 struct veth_priv *priv = netdev_priv(dev); 185 struct net_device *peer = rtnl_dereference(priv->peer); 186 187 if (!peer) 188 return -ENOTCONN; 189 190 if (peer->flags & IFF_UP) { 191 netif_carrier_on(dev); 192 netif_carrier_on(peer); 193 } 194 return 0; 195 } 196 197 static int veth_close(struct net_device *dev) 198 { 199 struct veth_priv *priv = netdev_priv(dev); 200 struct net_device *peer = rtnl_dereference(priv->peer); 201 202 netif_carrier_off(dev); 203 if (peer) 204 netif_carrier_off(peer); 205 206 return 0; 207 } 208 209 static int is_valid_veth_mtu(int mtu) 210 { 211 return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 212 } 213 214 static int veth_dev_init(struct net_device *dev) 215 { 216 dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats); 217 if (!dev->vstats) 218 return -ENOMEM; 219 return 0; 220 } 221 222 static void veth_dev_free(struct net_device *dev) 223 { 224 free_percpu(dev->vstats); 225 } 226 227 #ifdef CONFIG_NET_POLL_CONTROLLER 228 static void veth_poll_controller(struct net_device *dev) 229 { 230 /* veth only receives frames when its peer sends one 231 * Since it's a synchronous operation, we are guaranteed 232 * never to have pending data when we poll for it so 233 * there is nothing to do here. 234 * 235 * We need this though so netpoll recognizes us as an interface that 236 * supports polling, which enables bridge devices in virt setups to 237 * still use netconsole 238 */ 239 } 240 #endif /* CONFIG_NET_POLL_CONTROLLER */ 241 242 static int veth_get_iflink(const struct net_device *dev) 243 { 244 struct veth_priv *priv = netdev_priv(dev); 245 struct net_device *peer; 246 int iflink; 247 248 rcu_read_lock(); 249 peer = rcu_dereference(priv->peer); 250 iflink = peer ? peer->ifindex : 0; 251 rcu_read_unlock(); 252 253 return iflink; 254 } 255 256 static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 257 { 258 struct veth_priv *peer_priv, *priv = netdev_priv(dev); 259 struct net_device *peer; 260 261 if (new_hr < 0) 262 new_hr = 0; 263 264 rcu_read_lock(); 265 peer = rcu_dereference(priv->peer); 266 if (unlikely(!peer)) 267 goto out; 268 269 peer_priv = netdev_priv(peer); 270 priv->requested_headroom = new_hr; 271 new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 272 dev->needed_headroom = new_hr; 273 peer->needed_headroom = new_hr; 274 275 out: 276 rcu_read_unlock(); 277 } 278 279 static const struct net_device_ops veth_netdev_ops = { 280 .ndo_init = veth_dev_init, 281 .ndo_open = veth_open, 282 .ndo_stop = veth_close, 283 .ndo_start_xmit = veth_xmit, 284 .ndo_get_stats64 = veth_get_stats64, 285 .ndo_set_rx_mode = veth_set_multicast_list, 286 .ndo_set_mac_address = eth_mac_addr, 287 #ifdef CONFIG_NET_POLL_CONTROLLER 288 .ndo_poll_controller = veth_poll_controller, 289 #endif 290 .ndo_get_iflink = veth_get_iflink, 291 .ndo_features_check = passthru_features_check, 292 .ndo_set_rx_headroom = veth_set_rx_headroom, 293 }; 294 295 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 296 NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 297 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 298 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 299 NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 300 301 static void veth_setup(struct net_device *dev) 302 { 303 ether_setup(dev); 304 305 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 306 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 307 dev->priv_flags |= IFF_NO_QUEUE; 308 dev->priv_flags |= IFF_PHONY_HEADROOM; 309 310 dev->netdev_ops = &veth_netdev_ops; 311 dev->ethtool_ops = &veth_ethtool_ops; 312 dev->features |= NETIF_F_LLTX; 313 dev->features |= VETH_FEATURES; 314 dev->vlan_features = dev->features & 315 ~(NETIF_F_HW_VLAN_CTAG_TX | 316 NETIF_F_HW_VLAN_STAG_TX | 317 NETIF_F_HW_VLAN_CTAG_RX | 318 NETIF_F_HW_VLAN_STAG_RX); 319 dev->needs_free_netdev = true; 320 dev->priv_destructor = veth_dev_free; 321 dev->max_mtu = ETH_MAX_MTU; 322 323 dev->hw_features = VETH_FEATURES; 324 dev->hw_enc_features = VETH_FEATURES; 325 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 326 } 327 328 /* 329 * netlink interface 330 */ 331 332 static int veth_validate(struct nlattr *tb[], struct nlattr *data[], 333 struct netlink_ext_ack *extack) 334 { 335 if (tb[IFLA_ADDRESS]) { 336 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 337 return -EINVAL; 338 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 339 return -EADDRNOTAVAIL; 340 } 341 if (tb[IFLA_MTU]) { 342 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 343 return -EINVAL; 344 } 345 return 0; 346 } 347 348 static struct rtnl_link_ops veth_link_ops; 349 350 static int veth_newlink(struct net *src_net, struct net_device *dev, 351 struct nlattr *tb[], struct nlattr *data[], 352 struct netlink_ext_ack *extack) 353 { 354 int err; 355 struct net_device *peer; 356 struct veth_priv *priv; 357 char ifname[IFNAMSIZ]; 358 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 359 unsigned char name_assign_type; 360 struct ifinfomsg *ifmp; 361 struct net *net; 362 363 /* 364 * create and register peer first 365 */ 366 if (data != NULL && data[VETH_INFO_PEER] != NULL) { 367 struct nlattr *nla_peer; 368 369 nla_peer = data[VETH_INFO_PEER]; 370 ifmp = nla_data(nla_peer); 371 err = rtnl_nla_parse_ifla(peer_tb, 372 nla_data(nla_peer) + sizeof(struct ifinfomsg), 373 nla_len(nla_peer) - sizeof(struct ifinfomsg), 374 NULL); 375 if (err < 0) 376 return err; 377 378 err = veth_validate(peer_tb, NULL, extack); 379 if (err < 0) 380 return err; 381 382 tbp = peer_tb; 383 } else { 384 ifmp = NULL; 385 tbp = tb; 386 } 387 388 if (ifmp && tbp[IFLA_IFNAME]) { 389 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 390 name_assign_type = NET_NAME_USER; 391 } else { 392 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 393 name_assign_type = NET_NAME_ENUM; 394 } 395 396 net = rtnl_link_get_net(src_net, tbp); 397 if (IS_ERR(net)) 398 return PTR_ERR(net); 399 400 peer = rtnl_create_link(net, ifname, name_assign_type, 401 &veth_link_ops, tbp); 402 if (IS_ERR(peer)) { 403 put_net(net); 404 return PTR_ERR(peer); 405 } 406 407 if (!ifmp || !tbp[IFLA_ADDRESS]) 408 eth_hw_addr_random(peer); 409 410 if (ifmp && (dev->ifindex != 0)) 411 peer->ifindex = ifmp->ifi_index; 412 413 peer->gso_max_size = dev->gso_max_size; 414 peer->gso_max_segs = dev->gso_max_segs; 415 416 err = register_netdevice(peer); 417 put_net(net); 418 net = NULL; 419 if (err < 0) 420 goto err_register_peer; 421 422 netif_carrier_off(peer); 423 424 err = rtnl_configure_link(peer, ifmp); 425 if (err < 0) 426 goto err_configure_peer; 427 428 /* 429 * register dev last 430 * 431 * note, that since we've registered new device the dev's name 432 * should be re-allocated 433 */ 434 435 if (tb[IFLA_ADDRESS] == NULL) 436 eth_hw_addr_random(dev); 437 438 if (tb[IFLA_IFNAME]) 439 nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 440 else 441 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 442 443 err = register_netdevice(dev); 444 if (err < 0) 445 goto err_register_dev; 446 447 netif_carrier_off(dev); 448 449 /* 450 * tie the deviced together 451 */ 452 453 priv = netdev_priv(dev); 454 rcu_assign_pointer(priv->peer, peer); 455 456 priv = netdev_priv(peer); 457 rcu_assign_pointer(priv->peer, dev); 458 return 0; 459 460 err_register_dev: 461 /* nothing to do */ 462 err_configure_peer: 463 unregister_netdevice(peer); 464 return err; 465 466 err_register_peer: 467 free_netdev(peer); 468 return err; 469 } 470 471 static void veth_dellink(struct net_device *dev, struct list_head *head) 472 { 473 struct veth_priv *priv; 474 struct net_device *peer; 475 476 priv = netdev_priv(dev); 477 peer = rtnl_dereference(priv->peer); 478 479 /* Note : dellink() is called from default_device_exit_batch(), 480 * before a rcu_synchronize() point. The devices are guaranteed 481 * not being freed before one RCU grace period. 482 */ 483 RCU_INIT_POINTER(priv->peer, NULL); 484 unregister_netdevice_queue(dev, head); 485 486 if (peer) { 487 priv = netdev_priv(peer); 488 RCU_INIT_POINTER(priv->peer, NULL); 489 unregister_netdevice_queue(peer, head); 490 } 491 } 492 493 static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 494 [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 495 }; 496 497 static struct net *veth_get_link_net(const struct net_device *dev) 498 { 499 struct veth_priv *priv = netdev_priv(dev); 500 struct net_device *peer = rtnl_dereference(priv->peer); 501 502 return peer ? dev_net(peer) : dev_net(dev); 503 } 504 505 static struct rtnl_link_ops veth_link_ops = { 506 .kind = DRV_NAME, 507 .priv_size = sizeof(struct veth_priv), 508 .setup = veth_setup, 509 .validate = veth_validate, 510 .newlink = veth_newlink, 511 .dellink = veth_dellink, 512 .policy = veth_policy, 513 .maxtype = VETH_INFO_MAX, 514 .get_link_net = veth_get_link_net, 515 }; 516 517 /* 518 * init/fini 519 */ 520 521 static __init int veth_init(void) 522 { 523 return rtnl_link_register(&veth_link_ops); 524 } 525 526 static __exit void veth_exit(void) 527 { 528 rtnl_link_unregister(&veth_link_ops); 529 } 530 531 module_init(veth_init); 532 module_exit(veth_exit); 533 534 MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 535 MODULE_LICENSE("GPL v2"); 536 MODULE_ALIAS_RTNL_LINK(DRV_NAME); 537