1 /* 2 * drivers/net/veth.c 3 * 4 * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 5 * 6 * Author: Pavel Emelianov <xemul@openvz.org> 7 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 8 * 9 */ 10 11 #include <linux/netdevice.h> 12 #include <linux/slab.h> 13 #include <linux/ethtool.h> 14 #include <linux/etherdevice.h> 15 #include <linux/u64_stats_sync.h> 16 17 #include <net/rtnetlink.h> 18 #include <net/dst.h> 19 #include <net/xfrm.h> 20 #include <linux/veth.h> 21 #include <linux/module.h> 22 23 #define DRV_NAME "veth" 24 #define DRV_VERSION "1.0" 25 26 struct pcpu_vstats { 27 u64 packets; 28 u64 bytes; 29 struct u64_stats_sync syncp; 30 }; 31 32 struct veth_priv { 33 struct net_device __rcu *peer; 34 atomic64_t dropped; 35 unsigned requested_headroom; 36 }; 37 38 /* 39 * ethtool interface 40 */ 41 42 static struct { 43 const char string[ETH_GSTRING_LEN]; 44 } ethtool_stats_keys[] = { 45 { "peer_ifindex" }, 46 }; 47 48 static int veth_get_link_ksettings(struct net_device *dev, 49 struct ethtool_link_ksettings *cmd) 50 { 51 cmd->base.speed = SPEED_10000; 52 cmd->base.duplex = DUPLEX_FULL; 53 cmd->base.port = PORT_TP; 54 cmd->base.autoneg = AUTONEG_DISABLE; 55 return 0; 56 } 57 58 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 59 { 60 strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); 61 strlcpy(info->version, DRV_VERSION, sizeof(info->version)); 62 } 63 64 static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 65 { 66 switch(stringset) { 67 case ETH_SS_STATS: 68 memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 69 break; 70 } 71 } 72 73 static int veth_get_sset_count(struct net_device *dev, int sset) 74 { 75 switch (sset) { 76 case ETH_SS_STATS: 77 return ARRAY_SIZE(ethtool_stats_keys); 78 default: 79 return -EOPNOTSUPP; 80 } 81 } 82 83 static void veth_get_ethtool_stats(struct net_device *dev, 84 struct ethtool_stats *stats, u64 *data) 85 { 86 struct veth_priv *priv = netdev_priv(dev); 87 struct net_device *peer = rtnl_dereference(priv->peer); 88 89 data[0] = peer ? peer->ifindex : 0; 90 } 91 92 static const struct ethtool_ops veth_ethtool_ops = { 93 .get_drvinfo = veth_get_drvinfo, 94 .get_link = ethtool_op_get_link, 95 .get_strings = veth_get_strings, 96 .get_sset_count = veth_get_sset_count, 97 .get_ethtool_stats = veth_get_ethtool_stats, 98 .get_link_ksettings = veth_get_link_ksettings, 99 }; 100 101 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 102 { 103 struct veth_priv *priv = netdev_priv(dev); 104 struct net_device *rcv; 105 int length = skb->len; 106 107 rcu_read_lock(); 108 rcv = rcu_dereference(priv->peer); 109 if (unlikely(!rcv)) { 110 kfree_skb(skb); 111 goto drop; 112 } 113 114 if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { 115 struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); 116 117 u64_stats_update_begin(&stats->syncp); 118 stats->bytes += length; 119 stats->packets++; 120 u64_stats_update_end(&stats->syncp); 121 } else { 122 drop: 123 atomic64_inc(&priv->dropped); 124 } 125 rcu_read_unlock(); 126 return NETDEV_TX_OK; 127 } 128 129 /* 130 * general routines 131 */ 132 133 static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) 134 { 135 struct veth_priv *priv = netdev_priv(dev); 136 int cpu; 137 138 result->packets = 0; 139 result->bytes = 0; 140 for_each_possible_cpu(cpu) { 141 struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu); 142 u64 packets, bytes; 143 unsigned int start; 144 145 do { 146 start = u64_stats_fetch_begin_irq(&stats->syncp); 147 packets = stats->packets; 148 bytes = stats->bytes; 149 } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); 150 result->packets += packets; 151 result->bytes += bytes; 152 } 153 return atomic64_read(&priv->dropped); 154 } 155 156 static void veth_get_stats64(struct net_device *dev, 157 struct rtnl_link_stats64 *tot) 158 { 159 struct veth_priv *priv = netdev_priv(dev); 160 struct net_device *peer; 161 struct pcpu_vstats one; 162 163 tot->tx_dropped = veth_stats_one(&one, dev); 164 tot->tx_bytes = one.bytes; 165 tot->tx_packets = one.packets; 166 167 rcu_read_lock(); 168 peer = rcu_dereference(priv->peer); 169 if (peer) { 170 tot->rx_dropped = veth_stats_one(&one, peer); 171 tot->rx_bytes = one.bytes; 172 tot->rx_packets = one.packets; 173 } 174 rcu_read_unlock(); 175 } 176 177 /* fake multicast ability */ 178 static void veth_set_multicast_list(struct net_device *dev) 179 { 180 } 181 182 static int veth_open(struct net_device *dev) 183 { 184 struct veth_priv *priv = netdev_priv(dev); 185 struct net_device *peer = rtnl_dereference(priv->peer); 186 187 if (!peer) 188 return -ENOTCONN; 189 190 if (peer->flags & IFF_UP) { 191 netif_carrier_on(dev); 192 netif_carrier_on(peer); 193 } 194 return 0; 195 } 196 197 static int veth_close(struct net_device *dev) 198 { 199 struct veth_priv *priv = netdev_priv(dev); 200 struct net_device *peer = rtnl_dereference(priv->peer); 201 202 netif_carrier_off(dev); 203 if (peer) 204 netif_carrier_off(peer); 205 206 return 0; 207 } 208 209 static int is_valid_veth_mtu(int mtu) 210 { 211 return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 212 } 213 214 static int veth_dev_init(struct net_device *dev) 215 { 216 dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats); 217 if (!dev->vstats) 218 return -ENOMEM; 219 return 0; 220 } 221 222 static void veth_dev_free(struct net_device *dev) 223 { 224 free_percpu(dev->vstats); 225 free_netdev(dev); 226 } 227 228 #ifdef CONFIG_NET_POLL_CONTROLLER 229 static void veth_poll_controller(struct net_device *dev) 230 { 231 /* veth only receives frames when its peer sends one 232 * Since it's a synchronous operation, we are guaranteed 233 * never to have pending data when we poll for it so 234 * there is nothing to do here. 235 * 236 * We need this though so netpoll recognizes us as an interface that 237 * supports polling, which enables bridge devices in virt setups to 238 * still use netconsole 239 */ 240 } 241 #endif /* CONFIG_NET_POLL_CONTROLLER */ 242 243 static int veth_get_iflink(const struct net_device *dev) 244 { 245 struct veth_priv *priv = netdev_priv(dev); 246 struct net_device *peer; 247 int iflink; 248 249 rcu_read_lock(); 250 peer = rcu_dereference(priv->peer); 251 iflink = peer ? peer->ifindex : 0; 252 rcu_read_unlock(); 253 254 return iflink; 255 } 256 257 static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 258 { 259 struct veth_priv *peer_priv, *priv = netdev_priv(dev); 260 struct net_device *peer; 261 262 if (new_hr < 0) 263 new_hr = 0; 264 265 rcu_read_lock(); 266 peer = rcu_dereference(priv->peer); 267 if (unlikely(!peer)) 268 goto out; 269 270 peer_priv = netdev_priv(peer); 271 priv->requested_headroom = new_hr; 272 new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 273 dev->needed_headroom = new_hr; 274 peer->needed_headroom = new_hr; 275 276 out: 277 rcu_read_unlock(); 278 } 279 280 static const struct net_device_ops veth_netdev_ops = { 281 .ndo_init = veth_dev_init, 282 .ndo_open = veth_open, 283 .ndo_stop = veth_close, 284 .ndo_start_xmit = veth_xmit, 285 .ndo_get_stats64 = veth_get_stats64, 286 .ndo_set_rx_mode = veth_set_multicast_list, 287 .ndo_set_mac_address = eth_mac_addr, 288 #ifdef CONFIG_NET_POLL_CONTROLLER 289 .ndo_poll_controller = veth_poll_controller, 290 #endif 291 .ndo_get_iflink = veth_get_iflink, 292 .ndo_features_check = passthru_features_check, 293 .ndo_set_rx_headroom = veth_set_rx_headroom, 294 }; 295 296 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 297 NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 298 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 299 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 300 NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 301 302 static void veth_setup(struct net_device *dev) 303 { 304 ether_setup(dev); 305 306 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 307 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 308 dev->priv_flags |= IFF_NO_QUEUE; 309 dev->priv_flags |= IFF_PHONY_HEADROOM; 310 311 dev->netdev_ops = &veth_netdev_ops; 312 dev->ethtool_ops = &veth_ethtool_ops; 313 dev->features |= NETIF_F_LLTX; 314 dev->features |= VETH_FEATURES; 315 dev->vlan_features = dev->features & 316 ~(NETIF_F_HW_VLAN_CTAG_TX | 317 NETIF_F_HW_VLAN_STAG_TX | 318 NETIF_F_HW_VLAN_CTAG_RX | 319 NETIF_F_HW_VLAN_STAG_RX); 320 dev->destructor = veth_dev_free; 321 dev->max_mtu = ETH_MAX_MTU; 322 323 dev->hw_features = VETH_FEATURES; 324 dev->hw_enc_features = VETH_FEATURES; 325 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 326 } 327 328 /* 329 * netlink interface 330 */ 331 332 static int veth_validate(struct nlattr *tb[], struct nlattr *data[]) 333 { 334 if (tb[IFLA_ADDRESS]) { 335 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 336 return -EINVAL; 337 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 338 return -EADDRNOTAVAIL; 339 } 340 if (tb[IFLA_MTU]) { 341 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 342 return -EINVAL; 343 } 344 return 0; 345 } 346 347 static struct rtnl_link_ops veth_link_ops; 348 349 static int veth_newlink(struct net *src_net, struct net_device *dev, 350 struct nlattr *tb[], struct nlattr *data[]) 351 { 352 int err; 353 struct net_device *peer; 354 struct veth_priv *priv; 355 char ifname[IFNAMSIZ]; 356 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 357 unsigned char name_assign_type; 358 struct ifinfomsg *ifmp; 359 struct net *net; 360 361 /* 362 * create and register peer first 363 */ 364 if (data != NULL && data[VETH_INFO_PEER] != NULL) { 365 struct nlattr *nla_peer; 366 367 nla_peer = data[VETH_INFO_PEER]; 368 ifmp = nla_data(nla_peer); 369 err = rtnl_nla_parse_ifla(peer_tb, 370 nla_data(nla_peer) + sizeof(struct ifinfomsg), 371 nla_len(nla_peer) - sizeof(struct ifinfomsg)); 372 if (err < 0) 373 return err; 374 375 err = veth_validate(peer_tb, NULL); 376 if (err < 0) 377 return err; 378 379 tbp = peer_tb; 380 } else { 381 ifmp = NULL; 382 tbp = tb; 383 } 384 385 if (tbp[IFLA_IFNAME]) { 386 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 387 name_assign_type = NET_NAME_USER; 388 } else { 389 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 390 name_assign_type = NET_NAME_ENUM; 391 } 392 393 net = rtnl_link_get_net(src_net, tbp); 394 if (IS_ERR(net)) 395 return PTR_ERR(net); 396 397 peer = rtnl_create_link(net, ifname, name_assign_type, 398 &veth_link_ops, tbp); 399 if (IS_ERR(peer)) { 400 put_net(net); 401 return PTR_ERR(peer); 402 } 403 404 if (tbp[IFLA_ADDRESS] == NULL) 405 eth_hw_addr_random(peer); 406 407 if (ifmp && (dev->ifindex != 0)) 408 peer->ifindex = ifmp->ifi_index; 409 410 err = register_netdevice(peer); 411 put_net(net); 412 net = NULL; 413 if (err < 0) 414 goto err_register_peer; 415 416 netif_carrier_off(peer); 417 418 err = rtnl_configure_link(peer, ifmp); 419 if (err < 0) 420 goto err_configure_peer; 421 422 /* 423 * register dev last 424 * 425 * note, that since we've registered new device the dev's name 426 * should be re-allocated 427 */ 428 429 if (tb[IFLA_ADDRESS] == NULL) 430 eth_hw_addr_random(dev); 431 432 if (tb[IFLA_IFNAME]) 433 nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 434 else 435 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 436 437 err = register_netdevice(dev); 438 if (err < 0) 439 goto err_register_dev; 440 441 netif_carrier_off(dev); 442 443 /* 444 * tie the deviced together 445 */ 446 447 priv = netdev_priv(dev); 448 rcu_assign_pointer(priv->peer, peer); 449 450 priv = netdev_priv(peer); 451 rcu_assign_pointer(priv->peer, dev); 452 return 0; 453 454 err_register_dev: 455 /* nothing to do */ 456 err_configure_peer: 457 unregister_netdevice(peer); 458 return err; 459 460 err_register_peer: 461 free_netdev(peer); 462 return err; 463 } 464 465 static void veth_dellink(struct net_device *dev, struct list_head *head) 466 { 467 struct veth_priv *priv; 468 struct net_device *peer; 469 470 priv = netdev_priv(dev); 471 peer = rtnl_dereference(priv->peer); 472 473 /* Note : dellink() is called from default_device_exit_batch(), 474 * before a rcu_synchronize() point. The devices are guaranteed 475 * not being freed before one RCU grace period. 476 */ 477 RCU_INIT_POINTER(priv->peer, NULL); 478 unregister_netdevice_queue(dev, head); 479 480 if (peer) { 481 priv = netdev_priv(peer); 482 RCU_INIT_POINTER(priv->peer, NULL); 483 unregister_netdevice_queue(peer, head); 484 } 485 } 486 487 static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 488 [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 489 }; 490 491 static struct net *veth_get_link_net(const struct net_device *dev) 492 { 493 struct veth_priv *priv = netdev_priv(dev); 494 struct net_device *peer = rtnl_dereference(priv->peer); 495 496 return peer ? dev_net(peer) : dev_net(dev); 497 } 498 499 static struct rtnl_link_ops veth_link_ops = { 500 .kind = DRV_NAME, 501 .priv_size = sizeof(struct veth_priv), 502 .setup = veth_setup, 503 .validate = veth_validate, 504 .newlink = veth_newlink, 505 .dellink = veth_dellink, 506 .policy = veth_policy, 507 .maxtype = VETH_INFO_MAX, 508 .get_link_net = veth_get_link_net, 509 }; 510 511 /* 512 * init/fini 513 */ 514 515 static __init int veth_init(void) 516 { 517 return rtnl_link_register(&veth_link_ops); 518 } 519 520 static __exit void veth_exit(void) 521 { 522 rtnl_link_unregister(&veth_link_ops); 523 } 524 525 module_init(veth_init); 526 module_exit(veth_exit); 527 528 MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 529 MODULE_LICENSE("GPL v2"); 530 MODULE_ALIAS_RTNL_LINK(DRV_NAME); 531