1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * drivers/net/veth.c 4 * 5 * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 6 * 7 * Author: Pavel Emelianov <xemul@openvz.org> 8 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 9 * 10 */ 11 12 #include <linux/netdevice.h> 13 #include <linux/slab.h> 14 #include <linux/ethtool.h> 15 #include <linux/etherdevice.h> 16 #include <linux/u64_stats_sync.h> 17 18 #include <net/rtnetlink.h> 19 #include <net/dst.h> 20 #include <net/xfrm.h> 21 #include <net/xdp.h> 22 #include <linux/veth.h> 23 #include <linux/module.h> 24 #include <linux/bpf.h> 25 #include <linux/filter.h> 26 #include <linux/ptr_ring.h> 27 #include <linux/bpf_trace.h> 28 #include <linux/net_tstamp.h> 29 #include <net/page_pool/helpers.h> 30 31 #define DRV_NAME "veth" 32 #define DRV_VERSION "1.0" 33 34 #define VETH_XDP_FLAG BIT(0) 35 #define VETH_RING_SIZE 256 36 #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) 37 38 #define VETH_XDP_TX_BULK_SIZE 16 39 #define VETH_XDP_BATCH 16 40 41 struct veth_stats { 42 u64 rx_drops; 43 /* xdp */ 44 u64 xdp_packets; 45 u64 xdp_bytes; 46 u64 xdp_redirect; 47 u64 xdp_drops; 48 u64 xdp_tx; 49 u64 xdp_tx_err; 50 u64 peer_tq_xdp_xmit; 51 u64 peer_tq_xdp_xmit_err; 52 }; 53 54 struct veth_rq_stats { 55 struct veth_stats vs; 56 struct u64_stats_sync syncp; 57 }; 58 59 struct veth_rq { 60 struct napi_struct xdp_napi; 61 struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */ 62 struct net_device *dev; 63 struct bpf_prog __rcu *xdp_prog; 64 struct xdp_mem_info xdp_mem; 65 struct veth_rq_stats stats; 66 bool rx_notify_masked; 67 struct ptr_ring xdp_ring; 68 struct xdp_rxq_info xdp_rxq; 69 struct page_pool *page_pool; 70 }; 71 72 struct veth_priv { 73 struct net_device __rcu *peer; 74 atomic64_t dropped; 75 struct bpf_prog *_xdp_prog; 76 struct veth_rq *rq; 77 unsigned int requested_headroom; 78 }; 79 80 struct veth_xdp_tx_bq { 81 struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE]; 82 unsigned int count; 83 }; 84 85 /* 86 * ethtool interface 87 */ 88 89 struct veth_q_stat_desc { 90 char desc[ETH_GSTRING_LEN]; 91 size_t offset; 92 }; 93 94 #define VETH_RQ_STAT(m) offsetof(struct veth_stats, m) 95 96 static const struct veth_q_stat_desc veth_rq_stats_desc[] = { 97 { "xdp_packets", VETH_RQ_STAT(xdp_packets) }, 98 { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) }, 99 { "drops", VETH_RQ_STAT(rx_drops) }, 100 { "xdp_redirect", VETH_RQ_STAT(xdp_redirect) }, 101 { "xdp_drops", VETH_RQ_STAT(xdp_drops) }, 102 { "xdp_tx", VETH_RQ_STAT(xdp_tx) }, 103 { "xdp_tx_errors", VETH_RQ_STAT(xdp_tx_err) }, 104 }; 105 106 #define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc) 107 108 static const struct veth_q_stat_desc veth_tq_stats_desc[] = { 109 { "xdp_xmit", VETH_RQ_STAT(peer_tq_xdp_xmit) }, 110 { "xdp_xmit_errors", VETH_RQ_STAT(peer_tq_xdp_xmit_err) }, 111 }; 112 113 #define VETH_TQ_STATS_LEN ARRAY_SIZE(veth_tq_stats_desc) 114 115 static struct { 116 const char string[ETH_GSTRING_LEN]; 117 } ethtool_stats_keys[] = { 118 { "peer_ifindex" }, 119 }; 120 121 struct veth_xdp_buff { 122 struct xdp_buff xdp; 123 struct sk_buff *skb; 124 }; 125 126 static int veth_get_link_ksettings(struct net_device *dev, 127 struct ethtool_link_ksettings *cmd) 128 { 129 cmd->base.speed = SPEED_10000; 130 cmd->base.duplex = DUPLEX_FULL; 131 cmd->base.port = PORT_TP; 132 cmd->base.autoneg = AUTONEG_DISABLE; 133 return 0; 134 } 135 136 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 137 { 138 strscpy(info->driver, DRV_NAME, sizeof(info->driver)); 139 strscpy(info->version, DRV_VERSION, sizeof(info->version)); 140 } 141 142 static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 143 { 144 u8 *p = buf; 145 int i, j; 146 147 switch(stringset) { 148 case ETH_SS_STATS: 149 memcpy(p, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 150 p += sizeof(ethtool_stats_keys); 151 for (i = 0; i < dev->real_num_rx_queues; i++) 152 for (j = 0; j < VETH_RQ_STATS_LEN; j++) 153 ethtool_sprintf(&p, "rx_queue_%u_%.18s", 154 i, veth_rq_stats_desc[j].desc); 155 156 for (i = 0; i < dev->real_num_tx_queues; i++) 157 for (j = 0; j < VETH_TQ_STATS_LEN; j++) 158 ethtool_sprintf(&p, "tx_queue_%u_%.18s", 159 i, veth_tq_stats_desc[j].desc); 160 161 page_pool_ethtool_stats_get_strings(p); 162 break; 163 } 164 } 165 166 static int veth_get_sset_count(struct net_device *dev, int sset) 167 { 168 switch (sset) { 169 case ETH_SS_STATS: 170 return ARRAY_SIZE(ethtool_stats_keys) + 171 VETH_RQ_STATS_LEN * dev->real_num_rx_queues + 172 VETH_TQ_STATS_LEN * dev->real_num_tx_queues + 173 page_pool_ethtool_stats_get_count(); 174 default: 175 return -EOPNOTSUPP; 176 } 177 } 178 179 static void veth_get_page_pool_stats(struct net_device *dev, u64 *data) 180 { 181 #ifdef CONFIG_PAGE_POOL_STATS 182 struct veth_priv *priv = netdev_priv(dev); 183 struct page_pool_stats pp_stats = {}; 184 int i; 185 186 for (i = 0; i < dev->real_num_rx_queues; i++) { 187 if (!priv->rq[i].page_pool) 188 continue; 189 page_pool_get_stats(priv->rq[i].page_pool, &pp_stats); 190 } 191 page_pool_ethtool_stats_get(data, &pp_stats); 192 #endif /* CONFIG_PAGE_POOL_STATS */ 193 } 194 195 static void veth_get_ethtool_stats(struct net_device *dev, 196 struct ethtool_stats *stats, u64 *data) 197 { 198 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 199 struct net_device *peer = rtnl_dereference(priv->peer); 200 int i, j, idx, pp_idx; 201 202 data[0] = peer ? peer->ifindex : 0; 203 idx = 1; 204 for (i = 0; i < dev->real_num_rx_queues; i++) { 205 const struct veth_rq_stats *rq_stats = &priv->rq[i].stats; 206 const void *stats_base = (void *)&rq_stats->vs; 207 unsigned int start; 208 size_t offset; 209 210 do { 211 start = u64_stats_fetch_begin(&rq_stats->syncp); 212 for (j = 0; j < VETH_RQ_STATS_LEN; j++) { 213 offset = veth_rq_stats_desc[j].offset; 214 data[idx + j] = *(u64 *)(stats_base + offset); 215 } 216 } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); 217 idx += VETH_RQ_STATS_LEN; 218 } 219 pp_idx = idx; 220 221 if (!peer) 222 goto page_pool_stats; 223 224 rcv_priv = netdev_priv(peer); 225 for (i = 0; i < peer->real_num_rx_queues; i++) { 226 const struct veth_rq_stats *rq_stats = &rcv_priv->rq[i].stats; 227 const void *base = (void *)&rq_stats->vs; 228 unsigned int start, tx_idx = idx; 229 size_t offset; 230 231 tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN; 232 do { 233 start = u64_stats_fetch_begin(&rq_stats->syncp); 234 for (j = 0; j < VETH_TQ_STATS_LEN; j++) { 235 offset = veth_tq_stats_desc[j].offset; 236 data[tx_idx + j] += *(u64 *)(base + offset); 237 } 238 } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); 239 pp_idx = tx_idx + VETH_TQ_STATS_LEN; 240 } 241 242 page_pool_stats: 243 veth_get_page_pool_stats(dev, &data[pp_idx]); 244 } 245 246 static void veth_get_channels(struct net_device *dev, 247 struct ethtool_channels *channels) 248 { 249 channels->tx_count = dev->real_num_tx_queues; 250 channels->rx_count = dev->real_num_rx_queues; 251 channels->max_tx = dev->num_tx_queues; 252 channels->max_rx = dev->num_rx_queues; 253 } 254 255 static int veth_set_channels(struct net_device *dev, 256 struct ethtool_channels *ch); 257 258 static const struct ethtool_ops veth_ethtool_ops = { 259 .get_drvinfo = veth_get_drvinfo, 260 .get_link = ethtool_op_get_link, 261 .get_strings = veth_get_strings, 262 .get_sset_count = veth_get_sset_count, 263 .get_ethtool_stats = veth_get_ethtool_stats, 264 .get_link_ksettings = veth_get_link_ksettings, 265 .get_ts_info = ethtool_op_get_ts_info, 266 .get_channels = veth_get_channels, 267 .set_channels = veth_set_channels, 268 }; 269 270 /* general routines */ 271 272 static bool veth_is_xdp_frame(void *ptr) 273 { 274 return (unsigned long)ptr & VETH_XDP_FLAG; 275 } 276 277 static struct xdp_frame *veth_ptr_to_xdp(void *ptr) 278 { 279 return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG); 280 } 281 282 static void *veth_xdp_to_ptr(struct xdp_frame *xdp) 283 { 284 return (void *)((unsigned long)xdp | VETH_XDP_FLAG); 285 } 286 287 static void veth_ptr_free(void *ptr) 288 { 289 if (veth_is_xdp_frame(ptr)) 290 xdp_return_frame(veth_ptr_to_xdp(ptr)); 291 else 292 kfree_skb(ptr); 293 } 294 295 static void __veth_xdp_flush(struct veth_rq *rq) 296 { 297 /* Write ptr_ring before reading rx_notify_masked */ 298 smp_mb(); 299 if (!READ_ONCE(rq->rx_notify_masked) && 300 napi_schedule_prep(&rq->xdp_napi)) { 301 WRITE_ONCE(rq->rx_notify_masked, true); 302 __napi_schedule(&rq->xdp_napi); 303 } 304 } 305 306 static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb) 307 { 308 if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) { 309 dev_kfree_skb_any(skb); 310 return NET_RX_DROP; 311 } 312 313 return NET_RX_SUCCESS; 314 } 315 316 static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, 317 struct veth_rq *rq, bool xdp) 318 { 319 return __dev_forward_skb(dev, skb) ?: xdp ? 320 veth_xdp_rx(rq, skb) : 321 __netif_rx(skb); 322 } 323 324 /* return true if the specified skb has chances of GRO aggregation 325 * Don't strive for accuracy, but try to avoid GRO overhead in the most 326 * common scenarios. 327 * When XDP is enabled, all traffic is considered eligible, as the xmit 328 * device has TSO off. 329 * When TSO is enabled on the xmit device, we are likely interested only 330 * in UDP aggregation, explicitly check for that if the skb is suspected 331 * - the sock_wfree destructor is used by UDP, ICMP and XDP sockets - 332 * to belong to locally generated UDP traffic. 333 */ 334 static bool veth_skb_is_eligible_for_gro(const struct net_device *dev, 335 const struct net_device *rcv, 336 const struct sk_buff *skb) 337 { 338 return !(dev->features & NETIF_F_ALL_TSO) || 339 (skb->destructor == sock_wfree && 340 rcv->features & (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD)); 341 } 342 343 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 344 { 345 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 346 struct veth_rq *rq = NULL; 347 int ret = NETDEV_TX_OK; 348 struct net_device *rcv; 349 int length = skb->len; 350 bool use_napi = false; 351 int rxq; 352 353 rcu_read_lock(); 354 rcv = rcu_dereference(priv->peer); 355 if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { 356 kfree_skb(skb); 357 goto drop; 358 } 359 360 rcv_priv = netdev_priv(rcv); 361 rxq = skb_get_queue_mapping(skb); 362 if (rxq < rcv->real_num_rx_queues) { 363 rq = &rcv_priv->rq[rxq]; 364 365 /* The napi pointer is available when an XDP program is 366 * attached or when GRO is enabled 367 * Don't bother with napi/GRO if the skb can't be aggregated 368 */ 369 use_napi = rcu_access_pointer(rq->napi) && 370 veth_skb_is_eligible_for_gro(dev, rcv, skb); 371 } 372 373 skb_tx_timestamp(skb); 374 if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { 375 if (!use_napi) 376 dev_lstats_add(dev, length); 377 else 378 __veth_xdp_flush(rq); 379 } else { 380 drop: 381 atomic64_inc(&priv->dropped); 382 ret = NET_XMIT_DROP; 383 } 384 385 rcu_read_unlock(); 386 387 return ret; 388 } 389 390 static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) 391 { 392 struct veth_priv *priv = netdev_priv(dev); 393 394 dev_lstats_read(dev, packets, bytes); 395 return atomic64_read(&priv->dropped); 396 } 397 398 static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) 399 { 400 struct veth_priv *priv = netdev_priv(dev); 401 int i; 402 403 result->peer_tq_xdp_xmit_err = 0; 404 result->xdp_packets = 0; 405 result->xdp_tx_err = 0; 406 result->xdp_bytes = 0; 407 result->rx_drops = 0; 408 for (i = 0; i < dev->num_rx_queues; i++) { 409 u64 packets, bytes, drops, xdp_tx_err, peer_tq_xdp_xmit_err; 410 struct veth_rq_stats *stats = &priv->rq[i].stats; 411 unsigned int start; 412 413 do { 414 start = u64_stats_fetch_begin(&stats->syncp); 415 peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err; 416 xdp_tx_err = stats->vs.xdp_tx_err; 417 packets = stats->vs.xdp_packets; 418 bytes = stats->vs.xdp_bytes; 419 drops = stats->vs.rx_drops; 420 } while (u64_stats_fetch_retry(&stats->syncp, start)); 421 result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err; 422 result->xdp_tx_err += xdp_tx_err; 423 result->xdp_packets += packets; 424 result->xdp_bytes += bytes; 425 result->rx_drops += drops; 426 } 427 } 428 429 static void veth_get_stats64(struct net_device *dev, 430 struct rtnl_link_stats64 *tot) 431 { 432 struct veth_priv *priv = netdev_priv(dev); 433 struct net_device *peer; 434 struct veth_stats rx; 435 u64 packets, bytes; 436 437 tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); 438 tot->tx_bytes = bytes; 439 tot->tx_packets = packets; 440 441 veth_stats_rx(&rx, dev); 442 tot->tx_dropped += rx.xdp_tx_err; 443 tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err; 444 tot->rx_bytes = rx.xdp_bytes; 445 tot->rx_packets = rx.xdp_packets; 446 447 rcu_read_lock(); 448 peer = rcu_dereference(priv->peer); 449 if (peer) { 450 veth_stats_tx(peer, &packets, &bytes); 451 tot->rx_bytes += bytes; 452 tot->rx_packets += packets; 453 454 veth_stats_rx(&rx, peer); 455 tot->tx_dropped += rx.peer_tq_xdp_xmit_err; 456 tot->rx_dropped += rx.xdp_tx_err; 457 tot->tx_bytes += rx.xdp_bytes; 458 tot->tx_packets += rx.xdp_packets; 459 } 460 rcu_read_unlock(); 461 } 462 463 /* fake multicast ability */ 464 static void veth_set_multicast_list(struct net_device *dev) 465 { 466 } 467 468 static int veth_select_rxq(struct net_device *dev) 469 { 470 return smp_processor_id() % dev->real_num_rx_queues; 471 } 472 473 static struct net_device *veth_peer_dev(struct net_device *dev) 474 { 475 struct veth_priv *priv = netdev_priv(dev); 476 477 /* Callers must be under RCU read side. */ 478 return rcu_dereference(priv->peer); 479 } 480 481 static int veth_xdp_xmit(struct net_device *dev, int n, 482 struct xdp_frame **frames, 483 u32 flags, bool ndo_xmit) 484 { 485 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 486 int i, ret = -ENXIO, nxmit = 0; 487 struct net_device *rcv; 488 unsigned int max_len; 489 struct veth_rq *rq; 490 491 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 492 return -EINVAL; 493 494 rcu_read_lock(); 495 rcv = rcu_dereference(priv->peer); 496 if (unlikely(!rcv)) 497 goto out; 498 499 rcv_priv = netdev_priv(rcv); 500 rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 501 /* The napi pointer is set if NAPI is enabled, which ensures that 502 * xdp_ring is initialized on receive side and the peer device is up. 503 */ 504 if (!rcu_access_pointer(rq->napi)) 505 goto out; 506 507 max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN; 508 509 spin_lock(&rq->xdp_ring.producer_lock); 510 for (i = 0; i < n; i++) { 511 struct xdp_frame *frame = frames[i]; 512 void *ptr = veth_xdp_to_ptr(frame); 513 514 if (unlikely(xdp_get_frame_len(frame) > max_len || 515 __ptr_ring_produce(&rq->xdp_ring, ptr))) 516 break; 517 nxmit++; 518 } 519 spin_unlock(&rq->xdp_ring.producer_lock); 520 521 if (flags & XDP_XMIT_FLUSH) 522 __veth_xdp_flush(rq); 523 524 ret = nxmit; 525 if (ndo_xmit) { 526 u64_stats_update_begin(&rq->stats.syncp); 527 rq->stats.vs.peer_tq_xdp_xmit += nxmit; 528 rq->stats.vs.peer_tq_xdp_xmit_err += n - nxmit; 529 u64_stats_update_end(&rq->stats.syncp); 530 } 531 532 out: 533 rcu_read_unlock(); 534 535 return ret; 536 } 537 538 static int veth_ndo_xdp_xmit(struct net_device *dev, int n, 539 struct xdp_frame **frames, u32 flags) 540 { 541 int err; 542 543 err = veth_xdp_xmit(dev, n, frames, flags, true); 544 if (err < 0) { 545 struct veth_priv *priv = netdev_priv(dev); 546 547 atomic64_add(n, &priv->dropped); 548 } 549 550 return err; 551 } 552 553 static void veth_xdp_flush_bq(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 554 { 555 int sent, i, err = 0, drops; 556 557 sent = veth_xdp_xmit(rq->dev, bq->count, bq->q, 0, false); 558 if (sent < 0) { 559 err = sent; 560 sent = 0; 561 } 562 563 for (i = sent; unlikely(i < bq->count); i++) 564 xdp_return_frame(bq->q[i]); 565 566 drops = bq->count - sent; 567 trace_xdp_bulk_tx(rq->dev, sent, drops, err); 568 569 u64_stats_update_begin(&rq->stats.syncp); 570 rq->stats.vs.xdp_tx += sent; 571 rq->stats.vs.xdp_tx_err += drops; 572 u64_stats_update_end(&rq->stats.syncp); 573 574 bq->count = 0; 575 } 576 577 static void veth_xdp_flush(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 578 { 579 struct veth_priv *rcv_priv, *priv = netdev_priv(rq->dev); 580 struct net_device *rcv; 581 struct veth_rq *rcv_rq; 582 583 rcu_read_lock(); 584 veth_xdp_flush_bq(rq, bq); 585 rcv = rcu_dereference(priv->peer); 586 if (unlikely(!rcv)) 587 goto out; 588 589 rcv_priv = netdev_priv(rcv); 590 rcv_rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 591 /* xdp_ring is initialized on receive side? */ 592 if (unlikely(!rcu_access_pointer(rcv_rq->xdp_prog))) 593 goto out; 594 595 __veth_xdp_flush(rcv_rq); 596 out: 597 rcu_read_unlock(); 598 } 599 600 static int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp, 601 struct veth_xdp_tx_bq *bq) 602 { 603 struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp); 604 605 if (unlikely(!frame)) 606 return -EOVERFLOW; 607 608 if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE)) 609 veth_xdp_flush_bq(rq, bq); 610 611 bq->q[bq->count++] = frame; 612 613 return 0; 614 } 615 616 static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq, 617 struct xdp_frame *frame, 618 struct veth_xdp_tx_bq *bq, 619 struct veth_stats *stats) 620 { 621 struct xdp_frame orig_frame; 622 struct bpf_prog *xdp_prog; 623 624 rcu_read_lock(); 625 xdp_prog = rcu_dereference(rq->xdp_prog); 626 if (likely(xdp_prog)) { 627 struct veth_xdp_buff vxbuf; 628 struct xdp_buff *xdp = &vxbuf.xdp; 629 u32 act; 630 631 xdp_convert_frame_to_buff(frame, xdp); 632 xdp->rxq = &rq->xdp_rxq; 633 vxbuf.skb = NULL; 634 635 act = bpf_prog_run_xdp(xdp_prog, xdp); 636 637 switch (act) { 638 case XDP_PASS: 639 if (xdp_update_frame_from_buff(xdp, frame)) 640 goto err_xdp; 641 break; 642 case XDP_TX: 643 orig_frame = *frame; 644 xdp->rxq->mem = frame->mem; 645 if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { 646 trace_xdp_exception(rq->dev, xdp_prog, act); 647 frame = &orig_frame; 648 stats->rx_drops++; 649 goto err_xdp; 650 } 651 stats->xdp_tx++; 652 rcu_read_unlock(); 653 goto xdp_xmit; 654 case XDP_REDIRECT: 655 orig_frame = *frame; 656 xdp->rxq->mem = frame->mem; 657 if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { 658 frame = &orig_frame; 659 stats->rx_drops++; 660 goto err_xdp; 661 } 662 stats->xdp_redirect++; 663 rcu_read_unlock(); 664 goto xdp_xmit; 665 default: 666 bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act); 667 fallthrough; 668 case XDP_ABORTED: 669 trace_xdp_exception(rq->dev, xdp_prog, act); 670 fallthrough; 671 case XDP_DROP: 672 stats->xdp_drops++; 673 goto err_xdp; 674 } 675 } 676 rcu_read_unlock(); 677 678 return frame; 679 err_xdp: 680 rcu_read_unlock(); 681 xdp_return_frame(frame); 682 xdp_xmit: 683 return NULL; 684 } 685 686 /* frames array contains VETH_XDP_BATCH at most */ 687 static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames, 688 int n_xdpf, struct veth_xdp_tx_bq *bq, 689 struct veth_stats *stats) 690 { 691 void *skbs[VETH_XDP_BATCH]; 692 int i; 693 694 if (xdp_alloc_skb_bulk(skbs, n_xdpf, 695 GFP_ATOMIC | __GFP_ZERO) < 0) { 696 for (i = 0; i < n_xdpf; i++) 697 xdp_return_frame(frames[i]); 698 stats->rx_drops += n_xdpf; 699 700 return; 701 } 702 703 for (i = 0; i < n_xdpf; i++) { 704 struct sk_buff *skb = skbs[i]; 705 706 skb = __xdp_build_skb_from_frame(frames[i], skb, 707 rq->dev); 708 if (!skb) { 709 xdp_return_frame(frames[i]); 710 stats->rx_drops++; 711 continue; 712 } 713 napi_gro_receive(&rq->xdp_napi, skb); 714 } 715 } 716 717 static void veth_xdp_get(struct xdp_buff *xdp) 718 { 719 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 720 int i; 721 722 get_page(virt_to_page(xdp->data)); 723 if (likely(!xdp_buff_has_frags(xdp))) 724 return; 725 726 for (i = 0; i < sinfo->nr_frags; i++) 727 __skb_frag_ref(&sinfo->frags[i]); 728 } 729 730 static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, 731 struct xdp_buff *xdp, 732 struct sk_buff **pskb) 733 { 734 struct sk_buff *skb = *pskb; 735 u32 frame_sz; 736 737 if (skb_shared(skb) || skb_head_is_locked(skb) || 738 skb_shinfo(skb)->nr_frags || 739 skb_headroom(skb) < XDP_PACKET_HEADROOM) { 740 u32 size, len, max_head_size, off, truesize, page_offset; 741 struct sk_buff *nskb; 742 struct page *page; 743 int i, head_off; 744 void *va; 745 746 /* We need a private copy of the skb and data buffers since 747 * the ebpf program can modify it. We segment the original skb 748 * into order-0 pages without linearize it. 749 * 750 * Make sure we have enough space for linear and paged area 751 */ 752 max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - 753 VETH_XDP_HEADROOM); 754 if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size) 755 goto drop; 756 757 size = min_t(u32, skb->len, max_head_size); 758 truesize = SKB_HEAD_ALIGN(size) + VETH_XDP_HEADROOM; 759 760 /* Allocate skb head */ 761 va = page_pool_dev_alloc_va(rq->page_pool, &truesize); 762 if (!va) 763 goto drop; 764 765 nskb = napi_build_skb(va, truesize); 766 if (!nskb) { 767 page_pool_free_va(rq->page_pool, va, true); 768 goto drop; 769 } 770 771 skb_reserve(nskb, VETH_XDP_HEADROOM); 772 skb_copy_header(nskb, skb); 773 skb_mark_for_recycle(nskb); 774 775 if (skb_copy_bits(skb, 0, nskb->data, size)) { 776 consume_skb(nskb); 777 goto drop; 778 } 779 skb_put(nskb, size); 780 781 head_off = skb_headroom(nskb) - skb_headroom(skb); 782 skb_headers_offset_update(nskb, head_off); 783 784 /* Allocate paged area of new skb */ 785 off = size; 786 len = skb->len - off; 787 788 for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { 789 size = min_t(u32, len, PAGE_SIZE); 790 truesize = size; 791 792 page = page_pool_dev_alloc(rq->page_pool, &page_offset, 793 &truesize); 794 if (!page) { 795 consume_skb(nskb); 796 goto drop; 797 } 798 799 skb_add_rx_frag(nskb, i, page, page_offset, size, 800 truesize); 801 if (skb_copy_bits(skb, off, page_address(page), 802 size)) { 803 consume_skb(nskb); 804 goto drop; 805 } 806 807 len -= size; 808 off += size; 809 } 810 811 consume_skb(skb); 812 skb = nskb; 813 } 814 815 /* SKB "head" area always have tailroom for skb_shared_info */ 816 frame_sz = skb_end_pointer(skb) - skb->head; 817 frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 818 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 819 xdp_prepare_buff(xdp, skb->head, skb_headroom(skb), 820 skb_headlen(skb), true); 821 822 if (skb_is_nonlinear(skb)) { 823 skb_shinfo(skb)->xdp_frags_size = skb->data_len; 824 xdp_buff_set_frags_flag(xdp); 825 } else { 826 xdp_buff_clear_frags_flag(xdp); 827 } 828 *pskb = skb; 829 830 return 0; 831 drop: 832 consume_skb(skb); 833 *pskb = NULL; 834 835 return -ENOMEM; 836 } 837 838 static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, 839 struct sk_buff *skb, 840 struct veth_xdp_tx_bq *bq, 841 struct veth_stats *stats) 842 { 843 void *orig_data, *orig_data_end; 844 struct bpf_prog *xdp_prog; 845 struct veth_xdp_buff vxbuf; 846 struct xdp_buff *xdp = &vxbuf.xdp; 847 u32 act, metalen; 848 int off; 849 850 skb_prepare_for_gro(skb); 851 852 rcu_read_lock(); 853 xdp_prog = rcu_dereference(rq->xdp_prog); 854 if (unlikely(!xdp_prog)) { 855 rcu_read_unlock(); 856 goto out; 857 } 858 859 __skb_push(skb, skb->data - skb_mac_header(skb)); 860 if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb)) 861 goto drop; 862 vxbuf.skb = skb; 863 864 orig_data = xdp->data; 865 orig_data_end = xdp->data_end; 866 867 act = bpf_prog_run_xdp(xdp_prog, xdp); 868 869 switch (act) { 870 case XDP_PASS: 871 break; 872 case XDP_TX: 873 veth_xdp_get(xdp); 874 consume_skb(skb); 875 xdp->rxq->mem = rq->xdp_mem; 876 if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { 877 trace_xdp_exception(rq->dev, xdp_prog, act); 878 stats->rx_drops++; 879 goto err_xdp; 880 } 881 stats->xdp_tx++; 882 rcu_read_unlock(); 883 goto xdp_xmit; 884 case XDP_REDIRECT: 885 veth_xdp_get(xdp); 886 consume_skb(skb); 887 xdp->rxq->mem = rq->xdp_mem; 888 if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { 889 stats->rx_drops++; 890 goto err_xdp; 891 } 892 stats->xdp_redirect++; 893 rcu_read_unlock(); 894 goto xdp_xmit; 895 default: 896 bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act); 897 fallthrough; 898 case XDP_ABORTED: 899 trace_xdp_exception(rq->dev, xdp_prog, act); 900 fallthrough; 901 case XDP_DROP: 902 stats->xdp_drops++; 903 goto xdp_drop; 904 } 905 rcu_read_unlock(); 906 907 /* check if bpf_xdp_adjust_head was used */ 908 off = orig_data - xdp->data; 909 if (off > 0) 910 __skb_push(skb, off); 911 else if (off < 0) 912 __skb_pull(skb, -off); 913 914 skb_reset_mac_header(skb); 915 916 /* check if bpf_xdp_adjust_tail was used */ 917 off = xdp->data_end - orig_data_end; 918 if (off != 0) 919 __skb_put(skb, off); /* positive on grow, negative on shrink */ 920 921 /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers 922 * (e.g. bpf_xdp_adjust_tail), we need to update data_len here. 923 */ 924 if (xdp_buff_has_frags(xdp)) 925 skb->data_len = skb_shinfo(skb)->xdp_frags_size; 926 else 927 skb->data_len = 0; 928 929 skb->protocol = eth_type_trans(skb, rq->dev); 930 931 metalen = xdp->data - xdp->data_meta; 932 if (metalen) 933 skb_metadata_set(skb, metalen); 934 out: 935 return skb; 936 drop: 937 stats->rx_drops++; 938 xdp_drop: 939 rcu_read_unlock(); 940 kfree_skb(skb); 941 return NULL; 942 err_xdp: 943 rcu_read_unlock(); 944 xdp_return_buff(xdp); 945 xdp_xmit: 946 return NULL; 947 } 948 949 static int veth_xdp_rcv(struct veth_rq *rq, int budget, 950 struct veth_xdp_tx_bq *bq, 951 struct veth_stats *stats) 952 { 953 int i, done = 0, n_xdpf = 0; 954 void *xdpf[VETH_XDP_BATCH]; 955 956 for (i = 0; i < budget; i++) { 957 void *ptr = __ptr_ring_consume(&rq->xdp_ring); 958 959 if (!ptr) 960 break; 961 962 if (veth_is_xdp_frame(ptr)) { 963 /* ndo_xdp_xmit */ 964 struct xdp_frame *frame = veth_ptr_to_xdp(ptr); 965 966 stats->xdp_bytes += xdp_get_frame_len(frame); 967 frame = veth_xdp_rcv_one(rq, frame, bq, stats); 968 if (frame) { 969 /* XDP_PASS */ 970 xdpf[n_xdpf++] = frame; 971 if (n_xdpf == VETH_XDP_BATCH) { 972 veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf, 973 bq, stats); 974 n_xdpf = 0; 975 } 976 } 977 } else { 978 /* ndo_start_xmit */ 979 struct sk_buff *skb = ptr; 980 981 stats->xdp_bytes += skb->len; 982 skb = veth_xdp_rcv_skb(rq, skb, bq, stats); 983 if (skb) { 984 if (skb_shared(skb) || skb_unclone(skb, GFP_ATOMIC)) 985 netif_receive_skb(skb); 986 else 987 napi_gro_receive(&rq->xdp_napi, skb); 988 } 989 } 990 done++; 991 } 992 993 if (n_xdpf) 994 veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf, bq, stats); 995 996 u64_stats_update_begin(&rq->stats.syncp); 997 rq->stats.vs.xdp_redirect += stats->xdp_redirect; 998 rq->stats.vs.xdp_bytes += stats->xdp_bytes; 999 rq->stats.vs.xdp_drops += stats->xdp_drops; 1000 rq->stats.vs.rx_drops += stats->rx_drops; 1001 rq->stats.vs.xdp_packets += done; 1002 u64_stats_update_end(&rq->stats.syncp); 1003 1004 return done; 1005 } 1006 1007 static int veth_poll(struct napi_struct *napi, int budget) 1008 { 1009 struct veth_rq *rq = 1010 container_of(napi, struct veth_rq, xdp_napi); 1011 struct veth_stats stats = {}; 1012 struct veth_xdp_tx_bq bq; 1013 int done; 1014 1015 bq.count = 0; 1016 1017 xdp_set_return_frame_no_direct(); 1018 done = veth_xdp_rcv(rq, budget, &bq, &stats); 1019 1020 if (stats.xdp_redirect > 0) 1021 xdp_do_flush(); 1022 1023 if (done < budget && napi_complete_done(napi, done)) { 1024 /* Write rx_notify_masked before reading ptr_ring */ 1025 smp_store_mb(rq->rx_notify_masked, false); 1026 if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { 1027 if (napi_schedule_prep(&rq->xdp_napi)) { 1028 WRITE_ONCE(rq->rx_notify_masked, true); 1029 __napi_schedule(&rq->xdp_napi); 1030 } 1031 } 1032 } 1033 1034 if (stats.xdp_tx > 0) 1035 veth_xdp_flush(rq, &bq); 1036 xdp_clear_return_frame_no_direct(); 1037 1038 return done; 1039 } 1040 1041 static int veth_create_page_pool(struct veth_rq *rq) 1042 { 1043 struct page_pool_params pp_params = { 1044 .order = 0, 1045 .pool_size = VETH_RING_SIZE, 1046 .nid = NUMA_NO_NODE, 1047 .dev = &rq->dev->dev, 1048 }; 1049 1050 rq->page_pool = page_pool_create(&pp_params); 1051 if (IS_ERR(rq->page_pool)) { 1052 int err = PTR_ERR(rq->page_pool); 1053 1054 rq->page_pool = NULL; 1055 return err; 1056 } 1057 1058 return 0; 1059 } 1060 1061 static int __veth_napi_enable_range(struct net_device *dev, int start, int end) 1062 { 1063 struct veth_priv *priv = netdev_priv(dev); 1064 int err, i; 1065 1066 for (i = start; i < end; i++) { 1067 err = veth_create_page_pool(&priv->rq[i]); 1068 if (err) 1069 goto err_page_pool; 1070 } 1071 1072 for (i = start; i < end; i++) { 1073 struct veth_rq *rq = &priv->rq[i]; 1074 1075 err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL); 1076 if (err) 1077 goto err_xdp_ring; 1078 } 1079 1080 for (i = start; i < end; i++) { 1081 struct veth_rq *rq = &priv->rq[i]; 1082 1083 napi_enable(&rq->xdp_napi); 1084 rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); 1085 } 1086 1087 return 0; 1088 1089 err_xdp_ring: 1090 for (i--; i >= start; i--) 1091 ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); 1092 i = end; 1093 err_page_pool: 1094 for (i--; i >= start; i--) { 1095 page_pool_destroy(priv->rq[i].page_pool); 1096 priv->rq[i].page_pool = NULL; 1097 } 1098 1099 return err; 1100 } 1101 1102 static int __veth_napi_enable(struct net_device *dev) 1103 { 1104 return __veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); 1105 } 1106 1107 static void veth_napi_del_range(struct net_device *dev, int start, int end) 1108 { 1109 struct veth_priv *priv = netdev_priv(dev); 1110 int i; 1111 1112 for (i = start; i < end; i++) { 1113 struct veth_rq *rq = &priv->rq[i]; 1114 1115 rcu_assign_pointer(priv->rq[i].napi, NULL); 1116 napi_disable(&rq->xdp_napi); 1117 __netif_napi_del(&rq->xdp_napi); 1118 } 1119 synchronize_net(); 1120 1121 for (i = start; i < end; i++) { 1122 struct veth_rq *rq = &priv->rq[i]; 1123 1124 rq->rx_notify_masked = false; 1125 ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free); 1126 } 1127 1128 for (i = start; i < end; i++) { 1129 page_pool_destroy(priv->rq[i].page_pool); 1130 priv->rq[i].page_pool = NULL; 1131 } 1132 } 1133 1134 static void veth_napi_del(struct net_device *dev) 1135 { 1136 veth_napi_del_range(dev, 0, dev->real_num_rx_queues); 1137 } 1138 1139 static bool veth_gro_requested(const struct net_device *dev) 1140 { 1141 return !!(dev->wanted_features & NETIF_F_GRO); 1142 } 1143 1144 static int veth_enable_xdp_range(struct net_device *dev, int start, int end, 1145 bool napi_already_on) 1146 { 1147 struct veth_priv *priv = netdev_priv(dev); 1148 int err, i; 1149 1150 for (i = start; i < end; i++) { 1151 struct veth_rq *rq = &priv->rq[i]; 1152 1153 if (!napi_already_on) 1154 netif_napi_add(dev, &rq->xdp_napi, veth_poll); 1155 err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id); 1156 if (err < 0) 1157 goto err_rxq_reg; 1158 1159 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 1160 MEM_TYPE_PAGE_SHARED, 1161 NULL); 1162 if (err < 0) 1163 goto err_reg_mem; 1164 1165 /* Save original mem info as it can be overwritten */ 1166 rq->xdp_mem = rq->xdp_rxq.mem; 1167 } 1168 return 0; 1169 1170 err_reg_mem: 1171 xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); 1172 err_rxq_reg: 1173 for (i--; i >= start; i--) { 1174 struct veth_rq *rq = &priv->rq[i]; 1175 1176 xdp_rxq_info_unreg(&rq->xdp_rxq); 1177 if (!napi_already_on) 1178 netif_napi_del(&rq->xdp_napi); 1179 } 1180 1181 return err; 1182 } 1183 1184 static void veth_disable_xdp_range(struct net_device *dev, int start, int end, 1185 bool delete_napi) 1186 { 1187 struct veth_priv *priv = netdev_priv(dev); 1188 int i; 1189 1190 for (i = start; i < end; i++) { 1191 struct veth_rq *rq = &priv->rq[i]; 1192 1193 rq->xdp_rxq.mem = rq->xdp_mem; 1194 xdp_rxq_info_unreg(&rq->xdp_rxq); 1195 1196 if (delete_napi) 1197 netif_napi_del(&rq->xdp_napi); 1198 } 1199 } 1200 1201 static int veth_enable_xdp(struct net_device *dev) 1202 { 1203 bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP); 1204 struct veth_priv *priv = netdev_priv(dev); 1205 int err, i; 1206 1207 if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { 1208 err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on); 1209 if (err) 1210 return err; 1211 1212 if (!napi_already_on) { 1213 err = __veth_napi_enable(dev); 1214 if (err) { 1215 veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); 1216 return err; 1217 } 1218 1219 if (!veth_gro_requested(dev)) { 1220 /* user-space did not require GRO, but adding XDP 1221 * is supposed to get GRO working 1222 */ 1223 dev->features |= NETIF_F_GRO; 1224 netdev_features_change(dev); 1225 } 1226 } 1227 } 1228 1229 for (i = 0; i < dev->real_num_rx_queues; i++) { 1230 rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog); 1231 rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); 1232 } 1233 1234 return 0; 1235 } 1236 1237 static void veth_disable_xdp(struct net_device *dev) 1238 { 1239 struct veth_priv *priv = netdev_priv(dev); 1240 int i; 1241 1242 for (i = 0; i < dev->real_num_rx_queues; i++) 1243 rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); 1244 1245 if (!netif_running(dev) || !veth_gro_requested(dev)) { 1246 veth_napi_del(dev); 1247 1248 /* if user-space did not require GRO, since adding XDP 1249 * enabled it, clear it now 1250 */ 1251 if (!veth_gro_requested(dev) && netif_running(dev)) { 1252 dev->features &= ~NETIF_F_GRO; 1253 netdev_features_change(dev); 1254 } 1255 } 1256 1257 veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); 1258 } 1259 1260 static int veth_napi_enable_range(struct net_device *dev, int start, int end) 1261 { 1262 struct veth_priv *priv = netdev_priv(dev); 1263 int err, i; 1264 1265 for (i = start; i < end; i++) { 1266 struct veth_rq *rq = &priv->rq[i]; 1267 1268 netif_napi_add(dev, &rq->xdp_napi, veth_poll); 1269 } 1270 1271 err = __veth_napi_enable_range(dev, start, end); 1272 if (err) { 1273 for (i = start; i < end; i++) { 1274 struct veth_rq *rq = &priv->rq[i]; 1275 1276 netif_napi_del(&rq->xdp_napi); 1277 } 1278 return err; 1279 } 1280 return err; 1281 } 1282 1283 static int veth_napi_enable(struct net_device *dev) 1284 { 1285 return veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); 1286 } 1287 1288 static void veth_disable_range_safe(struct net_device *dev, int start, int end) 1289 { 1290 struct veth_priv *priv = netdev_priv(dev); 1291 1292 if (start >= end) 1293 return; 1294 1295 if (priv->_xdp_prog) { 1296 veth_napi_del_range(dev, start, end); 1297 veth_disable_xdp_range(dev, start, end, false); 1298 } else if (veth_gro_requested(dev)) { 1299 veth_napi_del_range(dev, start, end); 1300 } 1301 } 1302 1303 static int veth_enable_range_safe(struct net_device *dev, int start, int end) 1304 { 1305 struct veth_priv *priv = netdev_priv(dev); 1306 int err; 1307 1308 if (start >= end) 1309 return 0; 1310 1311 if (priv->_xdp_prog) { 1312 /* these channels are freshly initialized, napi is not on there even 1313 * when GRO is requeste 1314 */ 1315 err = veth_enable_xdp_range(dev, start, end, false); 1316 if (err) 1317 return err; 1318 1319 err = __veth_napi_enable_range(dev, start, end); 1320 if (err) { 1321 /* on error always delete the newly added napis */ 1322 veth_disable_xdp_range(dev, start, end, true); 1323 return err; 1324 } 1325 } else if (veth_gro_requested(dev)) { 1326 return veth_napi_enable_range(dev, start, end); 1327 } 1328 return 0; 1329 } 1330 1331 static void veth_set_xdp_features(struct net_device *dev) 1332 { 1333 struct veth_priv *priv = netdev_priv(dev); 1334 struct net_device *peer; 1335 1336 peer = rtnl_dereference(priv->peer); 1337 if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) { 1338 struct veth_priv *priv_peer = netdev_priv(peer); 1339 xdp_features_t val = NETDEV_XDP_ACT_BASIC | 1340 NETDEV_XDP_ACT_REDIRECT | 1341 NETDEV_XDP_ACT_RX_SG; 1342 1343 if (priv_peer->_xdp_prog || veth_gro_requested(peer)) 1344 val |= NETDEV_XDP_ACT_NDO_XMIT | 1345 NETDEV_XDP_ACT_NDO_XMIT_SG; 1346 xdp_set_features_flag(dev, val); 1347 } else { 1348 xdp_clear_features_flag(dev); 1349 } 1350 } 1351 1352 static int veth_set_channels(struct net_device *dev, 1353 struct ethtool_channels *ch) 1354 { 1355 struct veth_priv *priv = netdev_priv(dev); 1356 unsigned int old_rx_count, new_rx_count; 1357 struct veth_priv *peer_priv; 1358 struct net_device *peer; 1359 int err; 1360 1361 /* sanity check. Upper bounds are already enforced by the caller */ 1362 if (!ch->rx_count || !ch->tx_count) 1363 return -EINVAL; 1364 1365 /* avoid braking XDP, if that is enabled */ 1366 peer = rtnl_dereference(priv->peer); 1367 peer_priv = peer ? netdev_priv(peer) : NULL; 1368 if (priv->_xdp_prog && peer && ch->rx_count < peer->real_num_tx_queues) 1369 return -EINVAL; 1370 1371 if (peer && peer_priv && peer_priv->_xdp_prog && ch->tx_count > peer->real_num_rx_queues) 1372 return -EINVAL; 1373 1374 old_rx_count = dev->real_num_rx_queues; 1375 new_rx_count = ch->rx_count; 1376 if (netif_running(dev)) { 1377 /* turn device off */ 1378 netif_carrier_off(dev); 1379 if (peer) 1380 netif_carrier_off(peer); 1381 1382 /* try to allocate new resurces, as needed*/ 1383 err = veth_enable_range_safe(dev, old_rx_count, new_rx_count); 1384 if (err) 1385 goto out; 1386 } 1387 1388 err = netif_set_real_num_rx_queues(dev, ch->rx_count); 1389 if (err) 1390 goto revert; 1391 1392 err = netif_set_real_num_tx_queues(dev, ch->tx_count); 1393 if (err) { 1394 int err2 = netif_set_real_num_rx_queues(dev, old_rx_count); 1395 1396 /* this error condition could happen only if rx and tx change 1397 * in opposite directions (e.g. tx nr raises, rx nr decreases) 1398 * and we can't do anything to fully restore the original 1399 * status 1400 */ 1401 if (err2) 1402 pr_warn("Can't restore rx queues config %d -> %d %d", 1403 new_rx_count, old_rx_count, err2); 1404 else 1405 goto revert; 1406 } 1407 1408 out: 1409 if (netif_running(dev)) { 1410 /* note that we need to swap the arguments WRT the enable part 1411 * to identify the range we have to disable 1412 */ 1413 veth_disable_range_safe(dev, new_rx_count, old_rx_count); 1414 netif_carrier_on(dev); 1415 if (peer) 1416 netif_carrier_on(peer); 1417 } 1418 1419 /* update XDP supported features */ 1420 veth_set_xdp_features(dev); 1421 if (peer) 1422 veth_set_xdp_features(peer); 1423 1424 return err; 1425 1426 revert: 1427 new_rx_count = old_rx_count; 1428 old_rx_count = ch->rx_count; 1429 goto out; 1430 } 1431 1432 static int veth_open(struct net_device *dev) 1433 { 1434 struct veth_priv *priv = netdev_priv(dev); 1435 struct net_device *peer = rtnl_dereference(priv->peer); 1436 int err; 1437 1438 if (!peer) 1439 return -ENOTCONN; 1440 1441 if (priv->_xdp_prog) { 1442 err = veth_enable_xdp(dev); 1443 if (err) 1444 return err; 1445 } else if (veth_gro_requested(dev)) { 1446 err = veth_napi_enable(dev); 1447 if (err) 1448 return err; 1449 } 1450 1451 if (peer->flags & IFF_UP) { 1452 netif_carrier_on(dev); 1453 netif_carrier_on(peer); 1454 } 1455 1456 veth_set_xdp_features(dev); 1457 1458 return 0; 1459 } 1460 1461 static int veth_close(struct net_device *dev) 1462 { 1463 struct veth_priv *priv = netdev_priv(dev); 1464 struct net_device *peer = rtnl_dereference(priv->peer); 1465 1466 netif_carrier_off(dev); 1467 if (peer) 1468 netif_carrier_off(peer); 1469 1470 if (priv->_xdp_prog) 1471 veth_disable_xdp(dev); 1472 else if (veth_gro_requested(dev)) 1473 veth_napi_del(dev); 1474 1475 return 0; 1476 } 1477 1478 static int is_valid_veth_mtu(int mtu) 1479 { 1480 return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 1481 } 1482 1483 static int veth_alloc_queues(struct net_device *dev) 1484 { 1485 struct veth_priv *priv = netdev_priv(dev); 1486 int i; 1487 1488 priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); 1489 if (!priv->rq) 1490 return -ENOMEM; 1491 1492 for (i = 0; i < dev->num_rx_queues; i++) { 1493 priv->rq[i].dev = dev; 1494 u64_stats_init(&priv->rq[i].stats.syncp); 1495 } 1496 1497 return 0; 1498 } 1499 1500 static void veth_free_queues(struct net_device *dev) 1501 { 1502 struct veth_priv *priv = netdev_priv(dev); 1503 1504 kfree(priv->rq); 1505 } 1506 1507 static int veth_dev_init(struct net_device *dev) 1508 { 1509 int err; 1510 1511 dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); 1512 if (!dev->lstats) 1513 return -ENOMEM; 1514 1515 err = veth_alloc_queues(dev); 1516 if (err) { 1517 free_percpu(dev->lstats); 1518 return err; 1519 } 1520 1521 return 0; 1522 } 1523 1524 static void veth_dev_free(struct net_device *dev) 1525 { 1526 veth_free_queues(dev); 1527 free_percpu(dev->lstats); 1528 } 1529 1530 #ifdef CONFIG_NET_POLL_CONTROLLER 1531 static void veth_poll_controller(struct net_device *dev) 1532 { 1533 /* veth only receives frames when its peer sends one 1534 * Since it has nothing to do with disabling irqs, we are guaranteed 1535 * never to have pending data when we poll for it so 1536 * there is nothing to do here. 1537 * 1538 * We need this though so netpoll recognizes us as an interface that 1539 * supports polling, which enables bridge devices in virt setups to 1540 * still use netconsole 1541 */ 1542 } 1543 #endif /* CONFIG_NET_POLL_CONTROLLER */ 1544 1545 static int veth_get_iflink(const struct net_device *dev) 1546 { 1547 struct veth_priv *priv = netdev_priv(dev); 1548 struct net_device *peer; 1549 int iflink; 1550 1551 rcu_read_lock(); 1552 peer = rcu_dereference(priv->peer); 1553 iflink = peer ? peer->ifindex : 0; 1554 rcu_read_unlock(); 1555 1556 return iflink; 1557 } 1558 1559 static netdev_features_t veth_fix_features(struct net_device *dev, 1560 netdev_features_t features) 1561 { 1562 struct veth_priv *priv = netdev_priv(dev); 1563 struct net_device *peer; 1564 1565 peer = rtnl_dereference(priv->peer); 1566 if (peer) { 1567 struct veth_priv *peer_priv = netdev_priv(peer); 1568 1569 if (peer_priv->_xdp_prog) 1570 features &= ~NETIF_F_GSO_SOFTWARE; 1571 } 1572 if (priv->_xdp_prog) 1573 features |= NETIF_F_GRO; 1574 1575 return features; 1576 } 1577 1578 static int veth_set_features(struct net_device *dev, 1579 netdev_features_t features) 1580 { 1581 netdev_features_t changed = features ^ dev->features; 1582 struct veth_priv *priv = netdev_priv(dev); 1583 struct net_device *peer; 1584 int err; 1585 1586 if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog) 1587 return 0; 1588 1589 peer = rtnl_dereference(priv->peer); 1590 if (features & NETIF_F_GRO) { 1591 err = veth_napi_enable(dev); 1592 if (err) 1593 return err; 1594 1595 if (peer) 1596 xdp_features_set_redirect_target(peer, true); 1597 } else { 1598 if (peer) 1599 xdp_features_clear_redirect_target(peer); 1600 veth_napi_del(dev); 1601 } 1602 return 0; 1603 } 1604 1605 static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 1606 { 1607 struct veth_priv *peer_priv, *priv = netdev_priv(dev); 1608 struct net_device *peer; 1609 1610 if (new_hr < 0) 1611 new_hr = 0; 1612 1613 rcu_read_lock(); 1614 peer = rcu_dereference(priv->peer); 1615 if (unlikely(!peer)) 1616 goto out; 1617 1618 peer_priv = netdev_priv(peer); 1619 priv->requested_headroom = new_hr; 1620 new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 1621 dev->needed_headroom = new_hr; 1622 peer->needed_headroom = new_hr; 1623 1624 out: 1625 rcu_read_unlock(); 1626 } 1627 1628 static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, 1629 struct netlink_ext_ack *extack) 1630 { 1631 struct veth_priv *priv = netdev_priv(dev); 1632 struct bpf_prog *old_prog; 1633 struct net_device *peer; 1634 unsigned int max_mtu; 1635 int err; 1636 1637 old_prog = priv->_xdp_prog; 1638 priv->_xdp_prog = prog; 1639 peer = rtnl_dereference(priv->peer); 1640 1641 if (prog) { 1642 if (!peer) { 1643 NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached"); 1644 err = -ENOTCONN; 1645 goto err; 1646 } 1647 1648 max_mtu = SKB_WITH_OVERHEAD(PAGE_SIZE - VETH_XDP_HEADROOM) - 1649 peer->hard_header_len; 1650 /* Allow increasing the max_mtu if the program supports 1651 * XDP fragments. 1652 */ 1653 if (prog->aux->xdp_has_frags) 1654 max_mtu += PAGE_SIZE * MAX_SKB_FRAGS; 1655 1656 if (peer->mtu > max_mtu) { 1657 NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP"); 1658 err = -ERANGE; 1659 goto err; 1660 } 1661 1662 if (dev->real_num_rx_queues < peer->real_num_tx_queues) { 1663 NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues"); 1664 err = -ENOSPC; 1665 goto err; 1666 } 1667 1668 if (dev->flags & IFF_UP) { 1669 err = veth_enable_xdp(dev); 1670 if (err) { 1671 NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed"); 1672 goto err; 1673 } 1674 } 1675 1676 if (!old_prog) { 1677 peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; 1678 peer->max_mtu = max_mtu; 1679 } 1680 1681 xdp_features_set_redirect_target(peer, true); 1682 } 1683 1684 if (old_prog) { 1685 if (!prog) { 1686 if (peer && !veth_gro_requested(dev)) 1687 xdp_features_clear_redirect_target(peer); 1688 1689 if (dev->flags & IFF_UP) 1690 veth_disable_xdp(dev); 1691 1692 if (peer) { 1693 peer->hw_features |= NETIF_F_GSO_SOFTWARE; 1694 peer->max_mtu = ETH_MAX_MTU; 1695 } 1696 } 1697 bpf_prog_put(old_prog); 1698 } 1699 1700 if ((!!old_prog ^ !!prog) && peer) 1701 netdev_update_features(peer); 1702 1703 return 0; 1704 err: 1705 priv->_xdp_prog = old_prog; 1706 1707 return err; 1708 } 1709 1710 static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1711 { 1712 switch (xdp->command) { 1713 case XDP_SETUP_PROG: 1714 return veth_xdp_set(dev, xdp->prog, xdp->extack); 1715 default: 1716 return -EINVAL; 1717 } 1718 } 1719 1720 static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) 1721 { 1722 struct veth_xdp_buff *_ctx = (void *)ctx; 1723 1724 if (!_ctx->skb) 1725 return -ENODATA; 1726 1727 *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp; 1728 return 0; 1729 } 1730 1731 static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, 1732 enum xdp_rss_hash_type *rss_type) 1733 { 1734 struct veth_xdp_buff *_ctx = (void *)ctx; 1735 struct sk_buff *skb = _ctx->skb; 1736 1737 if (!skb) 1738 return -ENODATA; 1739 1740 *hash = skb_get_hash(skb); 1741 *rss_type = skb->l4_hash ? XDP_RSS_TYPE_L4_ANY : XDP_RSS_TYPE_NONE; 1742 1743 return 0; 1744 } 1745 1746 static const struct net_device_ops veth_netdev_ops = { 1747 .ndo_init = veth_dev_init, 1748 .ndo_open = veth_open, 1749 .ndo_stop = veth_close, 1750 .ndo_start_xmit = veth_xmit, 1751 .ndo_get_stats64 = veth_get_stats64, 1752 .ndo_set_rx_mode = veth_set_multicast_list, 1753 .ndo_set_mac_address = eth_mac_addr, 1754 #ifdef CONFIG_NET_POLL_CONTROLLER 1755 .ndo_poll_controller = veth_poll_controller, 1756 #endif 1757 .ndo_get_iflink = veth_get_iflink, 1758 .ndo_fix_features = veth_fix_features, 1759 .ndo_set_features = veth_set_features, 1760 .ndo_features_check = passthru_features_check, 1761 .ndo_set_rx_headroom = veth_set_rx_headroom, 1762 .ndo_bpf = veth_xdp, 1763 .ndo_xdp_xmit = veth_ndo_xdp_xmit, 1764 .ndo_get_peer_dev = veth_peer_dev, 1765 }; 1766 1767 static const struct xdp_metadata_ops veth_xdp_metadata_ops = { 1768 .xmo_rx_timestamp = veth_xdp_rx_timestamp, 1769 .xmo_rx_hash = veth_xdp_rx_hash, 1770 }; 1771 1772 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 1773 NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 1774 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 1775 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 1776 NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 1777 1778 static void veth_setup(struct net_device *dev) 1779 { 1780 ether_setup(dev); 1781 1782 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1783 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1784 dev->priv_flags |= IFF_NO_QUEUE; 1785 dev->priv_flags |= IFF_PHONY_HEADROOM; 1786 1787 dev->netdev_ops = &veth_netdev_ops; 1788 dev->xdp_metadata_ops = &veth_xdp_metadata_ops; 1789 dev->ethtool_ops = &veth_ethtool_ops; 1790 dev->features |= NETIF_F_LLTX; 1791 dev->features |= VETH_FEATURES; 1792 dev->vlan_features = dev->features & 1793 ~(NETIF_F_HW_VLAN_CTAG_TX | 1794 NETIF_F_HW_VLAN_STAG_TX | 1795 NETIF_F_HW_VLAN_CTAG_RX | 1796 NETIF_F_HW_VLAN_STAG_RX); 1797 dev->needs_free_netdev = true; 1798 dev->priv_destructor = veth_dev_free; 1799 dev->max_mtu = ETH_MAX_MTU; 1800 1801 dev->hw_features = VETH_FEATURES; 1802 dev->hw_enc_features = VETH_FEATURES; 1803 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 1804 netif_set_tso_max_size(dev, GSO_MAX_SIZE); 1805 } 1806 1807 /* 1808 * netlink interface 1809 */ 1810 1811 static int veth_validate(struct nlattr *tb[], struct nlattr *data[], 1812 struct netlink_ext_ack *extack) 1813 { 1814 if (tb[IFLA_ADDRESS]) { 1815 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1816 return -EINVAL; 1817 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1818 return -EADDRNOTAVAIL; 1819 } 1820 if (tb[IFLA_MTU]) { 1821 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 1822 return -EINVAL; 1823 } 1824 return 0; 1825 } 1826 1827 static struct rtnl_link_ops veth_link_ops; 1828 1829 static void veth_disable_gro(struct net_device *dev) 1830 { 1831 dev->features &= ~NETIF_F_GRO; 1832 dev->wanted_features &= ~NETIF_F_GRO; 1833 netdev_update_features(dev); 1834 } 1835 1836 static int veth_init_queues(struct net_device *dev, struct nlattr *tb[]) 1837 { 1838 int err; 1839 1840 if (!tb[IFLA_NUM_TX_QUEUES] && dev->num_tx_queues > 1) { 1841 err = netif_set_real_num_tx_queues(dev, 1); 1842 if (err) 1843 return err; 1844 } 1845 if (!tb[IFLA_NUM_RX_QUEUES] && dev->num_rx_queues > 1) { 1846 err = netif_set_real_num_rx_queues(dev, 1); 1847 if (err) 1848 return err; 1849 } 1850 return 0; 1851 } 1852 1853 static int veth_newlink(struct net *src_net, struct net_device *dev, 1854 struct nlattr *tb[], struct nlattr *data[], 1855 struct netlink_ext_ack *extack) 1856 { 1857 int err; 1858 struct net_device *peer; 1859 struct veth_priv *priv; 1860 char ifname[IFNAMSIZ]; 1861 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 1862 unsigned char name_assign_type; 1863 struct ifinfomsg *ifmp; 1864 struct net *net; 1865 1866 /* 1867 * create and register peer first 1868 */ 1869 if (data != NULL && data[VETH_INFO_PEER] != NULL) { 1870 struct nlattr *nla_peer; 1871 1872 nla_peer = data[VETH_INFO_PEER]; 1873 ifmp = nla_data(nla_peer); 1874 err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack); 1875 if (err < 0) 1876 return err; 1877 1878 err = veth_validate(peer_tb, NULL, extack); 1879 if (err < 0) 1880 return err; 1881 1882 tbp = peer_tb; 1883 } else { 1884 ifmp = NULL; 1885 tbp = tb; 1886 } 1887 1888 if (ifmp && tbp[IFLA_IFNAME]) { 1889 nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 1890 name_assign_type = NET_NAME_USER; 1891 } else { 1892 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 1893 name_assign_type = NET_NAME_ENUM; 1894 } 1895 1896 net = rtnl_link_get_net(src_net, tbp); 1897 if (IS_ERR(net)) 1898 return PTR_ERR(net); 1899 1900 peer = rtnl_create_link(net, ifname, name_assign_type, 1901 &veth_link_ops, tbp, extack); 1902 if (IS_ERR(peer)) { 1903 put_net(net); 1904 return PTR_ERR(peer); 1905 } 1906 1907 if (!ifmp || !tbp[IFLA_ADDRESS]) 1908 eth_hw_addr_random(peer); 1909 1910 if (ifmp && (dev->ifindex != 0)) 1911 peer->ifindex = ifmp->ifi_index; 1912 1913 netif_inherit_tso_max(peer, dev); 1914 1915 err = register_netdevice(peer); 1916 put_net(net); 1917 net = NULL; 1918 if (err < 0) 1919 goto err_register_peer; 1920 1921 /* keep GRO disabled by default to be consistent with the established 1922 * veth behavior 1923 */ 1924 veth_disable_gro(peer); 1925 netif_carrier_off(peer); 1926 1927 err = rtnl_configure_link(peer, ifmp, 0, NULL); 1928 if (err < 0) 1929 goto err_configure_peer; 1930 1931 /* 1932 * register dev last 1933 * 1934 * note, that since we've registered new device the dev's name 1935 * should be re-allocated 1936 */ 1937 1938 if (tb[IFLA_ADDRESS] == NULL) 1939 eth_hw_addr_random(dev); 1940 1941 if (tb[IFLA_IFNAME]) 1942 nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 1943 else 1944 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 1945 1946 err = register_netdevice(dev); 1947 if (err < 0) 1948 goto err_register_dev; 1949 1950 netif_carrier_off(dev); 1951 1952 /* 1953 * tie the deviced together 1954 */ 1955 1956 priv = netdev_priv(dev); 1957 rcu_assign_pointer(priv->peer, peer); 1958 err = veth_init_queues(dev, tb); 1959 if (err) 1960 goto err_queues; 1961 1962 priv = netdev_priv(peer); 1963 rcu_assign_pointer(priv->peer, dev); 1964 err = veth_init_queues(peer, tb); 1965 if (err) 1966 goto err_queues; 1967 1968 veth_disable_gro(dev); 1969 /* update XDP supported features */ 1970 veth_set_xdp_features(dev); 1971 veth_set_xdp_features(peer); 1972 1973 return 0; 1974 1975 err_queues: 1976 unregister_netdevice(dev); 1977 err_register_dev: 1978 /* nothing to do */ 1979 err_configure_peer: 1980 unregister_netdevice(peer); 1981 return err; 1982 1983 err_register_peer: 1984 free_netdev(peer); 1985 return err; 1986 } 1987 1988 static void veth_dellink(struct net_device *dev, struct list_head *head) 1989 { 1990 struct veth_priv *priv; 1991 struct net_device *peer; 1992 1993 priv = netdev_priv(dev); 1994 peer = rtnl_dereference(priv->peer); 1995 1996 /* Note : dellink() is called from default_device_exit_batch(), 1997 * before a rcu_synchronize() point. The devices are guaranteed 1998 * not being freed before one RCU grace period. 1999 */ 2000 RCU_INIT_POINTER(priv->peer, NULL); 2001 unregister_netdevice_queue(dev, head); 2002 2003 if (peer) { 2004 priv = netdev_priv(peer); 2005 RCU_INIT_POINTER(priv->peer, NULL); 2006 unregister_netdevice_queue(peer, head); 2007 } 2008 } 2009 2010 static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 2011 [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 2012 }; 2013 2014 static struct net *veth_get_link_net(const struct net_device *dev) 2015 { 2016 struct veth_priv *priv = netdev_priv(dev); 2017 struct net_device *peer = rtnl_dereference(priv->peer); 2018 2019 return peer ? dev_net(peer) : dev_net(dev); 2020 } 2021 2022 static unsigned int veth_get_num_queues(void) 2023 { 2024 /* enforce the same queue limit as rtnl_create_link */ 2025 int queues = num_possible_cpus(); 2026 2027 if (queues > 4096) 2028 queues = 4096; 2029 return queues; 2030 } 2031 2032 static struct rtnl_link_ops veth_link_ops = { 2033 .kind = DRV_NAME, 2034 .priv_size = sizeof(struct veth_priv), 2035 .setup = veth_setup, 2036 .validate = veth_validate, 2037 .newlink = veth_newlink, 2038 .dellink = veth_dellink, 2039 .policy = veth_policy, 2040 .maxtype = VETH_INFO_MAX, 2041 .get_link_net = veth_get_link_net, 2042 .get_num_tx_queues = veth_get_num_queues, 2043 .get_num_rx_queues = veth_get_num_queues, 2044 }; 2045 2046 /* 2047 * init/fini 2048 */ 2049 2050 static __init int veth_init(void) 2051 { 2052 return rtnl_link_register(&veth_link_ops); 2053 } 2054 2055 static __exit void veth_exit(void) 2056 { 2057 rtnl_link_unregister(&veth_link_ops); 2058 } 2059 2060 module_init(veth_init); 2061 module_exit(veth_exit); 2062 2063 MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 2064 MODULE_LICENSE("GPL v2"); 2065 MODULE_ALIAS_RTNL_LINK(DRV_NAME); 2066