1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * drivers/net/veth.c 4 * 5 * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc 6 * 7 * Author: Pavel Emelianov <xemul@openvz.org> 8 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com> 9 * 10 */ 11 12 #include <linux/netdevice.h> 13 #include <linux/slab.h> 14 #include <linux/ethtool.h> 15 #include <linux/etherdevice.h> 16 #include <linux/u64_stats_sync.h> 17 18 #include <net/rtnetlink.h> 19 #include <net/dst.h> 20 #include <net/xfrm.h> 21 #include <net/xdp.h> 22 #include <linux/veth.h> 23 #include <linux/module.h> 24 #include <linux/bpf.h> 25 #include <linux/filter.h> 26 #include <linux/ptr_ring.h> 27 #include <linux/bpf_trace.h> 28 #include <linux/net_tstamp.h> 29 #include <net/page_pool/helpers.h> 30 31 #define DRV_NAME "veth" 32 #define DRV_VERSION "1.0" 33 34 #define VETH_XDP_FLAG BIT(0) 35 #define VETH_RING_SIZE 256 36 #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) 37 38 #define VETH_XDP_TX_BULK_SIZE 16 39 #define VETH_XDP_BATCH 16 40 41 struct veth_stats { 42 u64 rx_drops; 43 /* xdp */ 44 u64 xdp_packets; 45 u64 xdp_bytes; 46 u64 xdp_redirect; 47 u64 xdp_drops; 48 u64 xdp_tx; 49 u64 xdp_tx_err; 50 u64 peer_tq_xdp_xmit; 51 u64 peer_tq_xdp_xmit_err; 52 }; 53 54 struct veth_rq_stats { 55 struct veth_stats vs; 56 struct u64_stats_sync syncp; 57 }; 58 59 struct veth_rq { 60 struct napi_struct xdp_napi; 61 struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */ 62 struct net_device *dev; 63 struct bpf_prog __rcu *xdp_prog; 64 struct xdp_mem_info xdp_mem; 65 struct veth_rq_stats stats; 66 bool rx_notify_masked; 67 struct ptr_ring xdp_ring; 68 struct xdp_rxq_info xdp_rxq; 69 struct page_pool *page_pool; 70 }; 71 72 struct veth_priv { 73 struct net_device __rcu *peer; 74 atomic64_t dropped; 75 struct bpf_prog *_xdp_prog; 76 struct veth_rq *rq; 77 unsigned int requested_headroom; 78 }; 79 80 struct veth_xdp_tx_bq { 81 struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE]; 82 unsigned int count; 83 }; 84 85 /* 86 * ethtool interface 87 */ 88 89 struct veth_q_stat_desc { 90 char desc[ETH_GSTRING_LEN]; 91 size_t offset; 92 }; 93 94 #define VETH_RQ_STAT(m) offsetof(struct veth_stats, m) 95 96 static const struct veth_q_stat_desc veth_rq_stats_desc[] = { 97 { "xdp_packets", VETH_RQ_STAT(xdp_packets) }, 98 { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) }, 99 { "drops", VETH_RQ_STAT(rx_drops) }, 100 { "xdp_redirect", VETH_RQ_STAT(xdp_redirect) }, 101 { "xdp_drops", VETH_RQ_STAT(xdp_drops) }, 102 { "xdp_tx", VETH_RQ_STAT(xdp_tx) }, 103 { "xdp_tx_errors", VETH_RQ_STAT(xdp_tx_err) }, 104 }; 105 106 #define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc) 107 108 static const struct veth_q_stat_desc veth_tq_stats_desc[] = { 109 { "xdp_xmit", VETH_RQ_STAT(peer_tq_xdp_xmit) }, 110 { "xdp_xmit_errors", VETH_RQ_STAT(peer_tq_xdp_xmit_err) }, 111 }; 112 113 #define VETH_TQ_STATS_LEN ARRAY_SIZE(veth_tq_stats_desc) 114 115 static struct { 116 const char string[ETH_GSTRING_LEN]; 117 } ethtool_stats_keys[] = { 118 { "peer_ifindex" }, 119 }; 120 121 struct veth_xdp_buff { 122 struct xdp_buff xdp; 123 struct sk_buff *skb; 124 }; 125 126 static int veth_get_link_ksettings(struct net_device *dev, 127 struct ethtool_link_ksettings *cmd) 128 { 129 cmd->base.speed = SPEED_10000; 130 cmd->base.duplex = DUPLEX_FULL; 131 cmd->base.port = PORT_TP; 132 cmd->base.autoneg = AUTONEG_DISABLE; 133 return 0; 134 } 135 136 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 137 { 138 strscpy(info->driver, DRV_NAME, sizeof(info->driver)); 139 strscpy(info->version, DRV_VERSION, sizeof(info->version)); 140 } 141 142 static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 143 { 144 u8 *p = buf; 145 int i, j; 146 147 switch(stringset) { 148 case ETH_SS_STATS: 149 memcpy(p, ðtool_stats_keys, sizeof(ethtool_stats_keys)); 150 p += sizeof(ethtool_stats_keys); 151 for (i = 0; i < dev->real_num_rx_queues; i++) 152 for (j = 0; j < VETH_RQ_STATS_LEN; j++) 153 ethtool_sprintf(&p, "rx_queue_%u_%.18s", 154 i, veth_rq_stats_desc[j].desc); 155 156 for (i = 0; i < dev->real_num_tx_queues; i++) 157 for (j = 0; j < VETH_TQ_STATS_LEN; j++) 158 ethtool_sprintf(&p, "tx_queue_%u_%.18s", 159 i, veth_tq_stats_desc[j].desc); 160 161 page_pool_ethtool_stats_get_strings(p); 162 break; 163 } 164 } 165 166 static int veth_get_sset_count(struct net_device *dev, int sset) 167 { 168 switch (sset) { 169 case ETH_SS_STATS: 170 return ARRAY_SIZE(ethtool_stats_keys) + 171 VETH_RQ_STATS_LEN * dev->real_num_rx_queues + 172 VETH_TQ_STATS_LEN * dev->real_num_tx_queues + 173 page_pool_ethtool_stats_get_count(); 174 default: 175 return -EOPNOTSUPP; 176 } 177 } 178 179 static void veth_get_page_pool_stats(struct net_device *dev, u64 *data) 180 { 181 #ifdef CONFIG_PAGE_POOL_STATS 182 struct veth_priv *priv = netdev_priv(dev); 183 struct page_pool_stats pp_stats = {}; 184 int i; 185 186 for (i = 0; i < dev->real_num_rx_queues; i++) { 187 if (!priv->rq[i].page_pool) 188 continue; 189 page_pool_get_stats(priv->rq[i].page_pool, &pp_stats); 190 } 191 page_pool_ethtool_stats_get(data, &pp_stats); 192 #endif /* CONFIG_PAGE_POOL_STATS */ 193 } 194 195 static void veth_get_ethtool_stats(struct net_device *dev, 196 struct ethtool_stats *stats, u64 *data) 197 { 198 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 199 struct net_device *peer = rtnl_dereference(priv->peer); 200 int i, j, idx, pp_idx; 201 202 data[0] = peer ? peer->ifindex : 0; 203 idx = 1; 204 for (i = 0; i < dev->real_num_rx_queues; i++) { 205 const struct veth_rq_stats *rq_stats = &priv->rq[i].stats; 206 const void *stats_base = (void *)&rq_stats->vs; 207 unsigned int start; 208 size_t offset; 209 210 do { 211 start = u64_stats_fetch_begin(&rq_stats->syncp); 212 for (j = 0; j < VETH_RQ_STATS_LEN; j++) { 213 offset = veth_rq_stats_desc[j].offset; 214 data[idx + j] = *(u64 *)(stats_base + offset); 215 } 216 } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); 217 idx += VETH_RQ_STATS_LEN; 218 } 219 pp_idx = idx; 220 221 if (!peer) 222 goto page_pool_stats; 223 224 rcv_priv = netdev_priv(peer); 225 for (i = 0; i < peer->real_num_rx_queues; i++) { 226 const struct veth_rq_stats *rq_stats = &rcv_priv->rq[i].stats; 227 const void *base = (void *)&rq_stats->vs; 228 unsigned int start, tx_idx = idx; 229 size_t offset; 230 231 tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN; 232 do { 233 start = u64_stats_fetch_begin(&rq_stats->syncp); 234 for (j = 0; j < VETH_TQ_STATS_LEN; j++) { 235 offset = veth_tq_stats_desc[j].offset; 236 data[tx_idx + j] += *(u64 *)(base + offset); 237 } 238 } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); 239 pp_idx = tx_idx + VETH_TQ_STATS_LEN; 240 } 241 242 page_pool_stats: 243 veth_get_page_pool_stats(dev, &data[pp_idx]); 244 } 245 246 static void veth_get_channels(struct net_device *dev, 247 struct ethtool_channels *channels) 248 { 249 channels->tx_count = dev->real_num_tx_queues; 250 channels->rx_count = dev->real_num_rx_queues; 251 channels->max_tx = dev->num_tx_queues; 252 channels->max_rx = dev->num_rx_queues; 253 } 254 255 static int veth_set_channels(struct net_device *dev, 256 struct ethtool_channels *ch); 257 258 static const struct ethtool_ops veth_ethtool_ops = { 259 .get_drvinfo = veth_get_drvinfo, 260 .get_link = ethtool_op_get_link, 261 .get_strings = veth_get_strings, 262 .get_sset_count = veth_get_sset_count, 263 .get_ethtool_stats = veth_get_ethtool_stats, 264 .get_link_ksettings = veth_get_link_ksettings, 265 .get_ts_info = ethtool_op_get_ts_info, 266 .get_channels = veth_get_channels, 267 .set_channels = veth_set_channels, 268 }; 269 270 /* general routines */ 271 272 static bool veth_is_xdp_frame(void *ptr) 273 { 274 return (unsigned long)ptr & VETH_XDP_FLAG; 275 } 276 277 static struct xdp_frame *veth_ptr_to_xdp(void *ptr) 278 { 279 return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG); 280 } 281 282 static void *veth_xdp_to_ptr(struct xdp_frame *xdp) 283 { 284 return (void *)((unsigned long)xdp | VETH_XDP_FLAG); 285 } 286 287 static void veth_ptr_free(void *ptr) 288 { 289 if (veth_is_xdp_frame(ptr)) 290 xdp_return_frame(veth_ptr_to_xdp(ptr)); 291 else 292 kfree_skb(ptr); 293 } 294 295 static void __veth_xdp_flush(struct veth_rq *rq) 296 { 297 /* Write ptr_ring before reading rx_notify_masked */ 298 smp_mb(); 299 if (!READ_ONCE(rq->rx_notify_masked) && 300 napi_schedule_prep(&rq->xdp_napi)) { 301 WRITE_ONCE(rq->rx_notify_masked, true); 302 __napi_schedule(&rq->xdp_napi); 303 } 304 } 305 306 static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb) 307 { 308 if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) { 309 dev_kfree_skb_any(skb); 310 return NET_RX_DROP; 311 } 312 313 return NET_RX_SUCCESS; 314 } 315 316 static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, 317 struct veth_rq *rq, bool xdp) 318 { 319 return __dev_forward_skb(dev, skb) ?: xdp ? 320 veth_xdp_rx(rq, skb) : 321 __netif_rx(skb); 322 } 323 324 /* return true if the specified skb has chances of GRO aggregation 325 * Don't strive for accuracy, but try to avoid GRO overhead in the most 326 * common scenarios. 327 * When XDP is enabled, all traffic is considered eligible, as the xmit 328 * device has TSO off. 329 * When TSO is enabled on the xmit device, we are likely interested only 330 * in UDP aggregation, explicitly check for that if the skb is suspected 331 * - the sock_wfree destructor is used by UDP, ICMP and XDP sockets - 332 * to belong to locally generated UDP traffic. 333 */ 334 static bool veth_skb_is_eligible_for_gro(const struct net_device *dev, 335 const struct net_device *rcv, 336 const struct sk_buff *skb) 337 { 338 return !(dev->features & NETIF_F_ALL_TSO) || 339 (skb->destructor == sock_wfree && 340 rcv->features & (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD)); 341 } 342 343 static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) 344 { 345 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 346 struct veth_rq *rq = NULL; 347 int ret = NETDEV_TX_OK; 348 struct net_device *rcv; 349 int length = skb->len; 350 bool use_napi = false; 351 int rxq; 352 353 rcu_read_lock(); 354 rcv = rcu_dereference(priv->peer); 355 if (unlikely(!rcv) || !pskb_may_pull(skb, ETH_HLEN)) { 356 kfree_skb(skb); 357 goto drop; 358 } 359 360 rcv_priv = netdev_priv(rcv); 361 rxq = skb_get_queue_mapping(skb); 362 if (rxq < rcv->real_num_rx_queues) { 363 rq = &rcv_priv->rq[rxq]; 364 365 /* The napi pointer is available when an XDP program is 366 * attached or when GRO is enabled 367 * Don't bother with napi/GRO if the skb can't be aggregated 368 */ 369 use_napi = rcu_access_pointer(rq->napi) && 370 veth_skb_is_eligible_for_gro(dev, rcv, skb); 371 } 372 373 skb_tx_timestamp(skb); 374 if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { 375 if (!use_napi) 376 dev_lstats_add(dev, length); 377 else 378 __veth_xdp_flush(rq); 379 } else { 380 drop: 381 atomic64_inc(&priv->dropped); 382 ret = NET_XMIT_DROP; 383 } 384 385 rcu_read_unlock(); 386 387 return ret; 388 } 389 390 static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes) 391 { 392 struct veth_priv *priv = netdev_priv(dev); 393 394 dev_lstats_read(dev, packets, bytes); 395 return atomic64_read(&priv->dropped); 396 } 397 398 static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) 399 { 400 struct veth_priv *priv = netdev_priv(dev); 401 int i; 402 403 result->peer_tq_xdp_xmit_err = 0; 404 result->xdp_packets = 0; 405 result->xdp_tx_err = 0; 406 result->xdp_bytes = 0; 407 result->rx_drops = 0; 408 for (i = 0; i < dev->num_rx_queues; i++) { 409 u64 packets, bytes, drops, xdp_tx_err, peer_tq_xdp_xmit_err; 410 struct veth_rq_stats *stats = &priv->rq[i].stats; 411 unsigned int start; 412 413 do { 414 start = u64_stats_fetch_begin(&stats->syncp); 415 peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err; 416 xdp_tx_err = stats->vs.xdp_tx_err; 417 packets = stats->vs.xdp_packets; 418 bytes = stats->vs.xdp_bytes; 419 drops = stats->vs.rx_drops; 420 } while (u64_stats_fetch_retry(&stats->syncp, start)); 421 result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err; 422 result->xdp_tx_err += xdp_tx_err; 423 result->xdp_packets += packets; 424 result->xdp_bytes += bytes; 425 result->rx_drops += drops; 426 } 427 } 428 429 static void veth_get_stats64(struct net_device *dev, 430 struct rtnl_link_stats64 *tot) 431 { 432 struct veth_priv *priv = netdev_priv(dev); 433 struct net_device *peer; 434 struct veth_stats rx; 435 u64 packets, bytes; 436 437 tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes); 438 tot->tx_bytes = bytes; 439 tot->tx_packets = packets; 440 441 veth_stats_rx(&rx, dev); 442 tot->tx_dropped += rx.xdp_tx_err; 443 tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err; 444 tot->rx_bytes = rx.xdp_bytes; 445 tot->rx_packets = rx.xdp_packets; 446 447 rcu_read_lock(); 448 peer = rcu_dereference(priv->peer); 449 if (peer) { 450 veth_stats_tx(peer, &packets, &bytes); 451 tot->rx_bytes += bytes; 452 tot->rx_packets += packets; 453 454 veth_stats_rx(&rx, peer); 455 tot->tx_dropped += rx.peer_tq_xdp_xmit_err; 456 tot->rx_dropped += rx.xdp_tx_err; 457 tot->tx_bytes += rx.xdp_bytes; 458 tot->tx_packets += rx.xdp_packets; 459 } 460 rcu_read_unlock(); 461 } 462 463 /* fake multicast ability */ 464 static void veth_set_multicast_list(struct net_device *dev) 465 { 466 } 467 468 static int veth_select_rxq(struct net_device *dev) 469 { 470 return smp_processor_id() % dev->real_num_rx_queues; 471 } 472 473 static struct net_device *veth_peer_dev(struct net_device *dev) 474 { 475 struct veth_priv *priv = netdev_priv(dev); 476 477 /* Callers must be under RCU read side. */ 478 return rcu_dereference(priv->peer); 479 } 480 481 static int veth_xdp_xmit(struct net_device *dev, int n, 482 struct xdp_frame **frames, 483 u32 flags, bool ndo_xmit) 484 { 485 struct veth_priv *rcv_priv, *priv = netdev_priv(dev); 486 int i, ret = -ENXIO, nxmit = 0; 487 struct net_device *rcv; 488 unsigned int max_len; 489 struct veth_rq *rq; 490 491 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 492 return -EINVAL; 493 494 rcu_read_lock(); 495 rcv = rcu_dereference(priv->peer); 496 if (unlikely(!rcv)) 497 goto out; 498 499 rcv_priv = netdev_priv(rcv); 500 rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 501 /* The napi pointer is set if NAPI is enabled, which ensures that 502 * xdp_ring is initialized on receive side and the peer device is up. 503 */ 504 if (!rcu_access_pointer(rq->napi)) 505 goto out; 506 507 max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN; 508 509 spin_lock(&rq->xdp_ring.producer_lock); 510 for (i = 0; i < n; i++) { 511 struct xdp_frame *frame = frames[i]; 512 void *ptr = veth_xdp_to_ptr(frame); 513 514 if (unlikely(xdp_get_frame_len(frame) > max_len || 515 __ptr_ring_produce(&rq->xdp_ring, ptr))) 516 break; 517 nxmit++; 518 } 519 spin_unlock(&rq->xdp_ring.producer_lock); 520 521 if (flags & XDP_XMIT_FLUSH) 522 __veth_xdp_flush(rq); 523 524 ret = nxmit; 525 if (ndo_xmit) { 526 u64_stats_update_begin(&rq->stats.syncp); 527 rq->stats.vs.peer_tq_xdp_xmit += nxmit; 528 rq->stats.vs.peer_tq_xdp_xmit_err += n - nxmit; 529 u64_stats_update_end(&rq->stats.syncp); 530 } 531 532 out: 533 rcu_read_unlock(); 534 535 return ret; 536 } 537 538 static int veth_ndo_xdp_xmit(struct net_device *dev, int n, 539 struct xdp_frame **frames, u32 flags) 540 { 541 int err; 542 543 err = veth_xdp_xmit(dev, n, frames, flags, true); 544 if (err < 0) { 545 struct veth_priv *priv = netdev_priv(dev); 546 547 atomic64_add(n, &priv->dropped); 548 } 549 550 return err; 551 } 552 553 static void veth_xdp_flush_bq(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 554 { 555 int sent, i, err = 0, drops; 556 557 sent = veth_xdp_xmit(rq->dev, bq->count, bq->q, 0, false); 558 if (sent < 0) { 559 err = sent; 560 sent = 0; 561 } 562 563 for (i = sent; unlikely(i < bq->count); i++) 564 xdp_return_frame(bq->q[i]); 565 566 drops = bq->count - sent; 567 trace_xdp_bulk_tx(rq->dev, sent, drops, err); 568 569 u64_stats_update_begin(&rq->stats.syncp); 570 rq->stats.vs.xdp_tx += sent; 571 rq->stats.vs.xdp_tx_err += drops; 572 u64_stats_update_end(&rq->stats.syncp); 573 574 bq->count = 0; 575 } 576 577 static void veth_xdp_flush(struct veth_rq *rq, struct veth_xdp_tx_bq *bq) 578 { 579 struct veth_priv *rcv_priv, *priv = netdev_priv(rq->dev); 580 struct net_device *rcv; 581 struct veth_rq *rcv_rq; 582 583 rcu_read_lock(); 584 veth_xdp_flush_bq(rq, bq); 585 rcv = rcu_dereference(priv->peer); 586 if (unlikely(!rcv)) 587 goto out; 588 589 rcv_priv = netdev_priv(rcv); 590 rcv_rq = &rcv_priv->rq[veth_select_rxq(rcv)]; 591 /* xdp_ring is initialized on receive side? */ 592 if (unlikely(!rcu_access_pointer(rcv_rq->xdp_prog))) 593 goto out; 594 595 __veth_xdp_flush(rcv_rq); 596 out: 597 rcu_read_unlock(); 598 } 599 600 static int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp, 601 struct veth_xdp_tx_bq *bq) 602 { 603 struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp); 604 605 if (unlikely(!frame)) 606 return -EOVERFLOW; 607 608 if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE)) 609 veth_xdp_flush_bq(rq, bq); 610 611 bq->q[bq->count++] = frame; 612 613 return 0; 614 } 615 616 static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq, 617 struct xdp_frame *frame, 618 struct veth_xdp_tx_bq *bq, 619 struct veth_stats *stats) 620 { 621 struct xdp_frame orig_frame; 622 struct bpf_prog *xdp_prog; 623 624 rcu_read_lock(); 625 xdp_prog = rcu_dereference(rq->xdp_prog); 626 if (likely(xdp_prog)) { 627 struct veth_xdp_buff vxbuf; 628 struct xdp_buff *xdp = &vxbuf.xdp; 629 u32 act; 630 631 xdp_convert_frame_to_buff(frame, xdp); 632 xdp->rxq = &rq->xdp_rxq; 633 vxbuf.skb = NULL; 634 635 act = bpf_prog_run_xdp(xdp_prog, xdp); 636 637 switch (act) { 638 case XDP_PASS: 639 if (xdp_update_frame_from_buff(xdp, frame)) 640 goto err_xdp; 641 break; 642 case XDP_TX: 643 orig_frame = *frame; 644 xdp->rxq->mem = frame->mem; 645 if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { 646 trace_xdp_exception(rq->dev, xdp_prog, act); 647 frame = &orig_frame; 648 stats->rx_drops++; 649 goto err_xdp; 650 } 651 stats->xdp_tx++; 652 rcu_read_unlock(); 653 goto xdp_xmit; 654 case XDP_REDIRECT: 655 orig_frame = *frame; 656 xdp->rxq->mem = frame->mem; 657 if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { 658 frame = &orig_frame; 659 stats->rx_drops++; 660 goto err_xdp; 661 } 662 stats->xdp_redirect++; 663 rcu_read_unlock(); 664 goto xdp_xmit; 665 default: 666 bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act); 667 fallthrough; 668 case XDP_ABORTED: 669 trace_xdp_exception(rq->dev, xdp_prog, act); 670 fallthrough; 671 case XDP_DROP: 672 stats->xdp_drops++; 673 goto err_xdp; 674 } 675 } 676 rcu_read_unlock(); 677 678 return frame; 679 err_xdp: 680 rcu_read_unlock(); 681 xdp_return_frame(frame); 682 xdp_xmit: 683 return NULL; 684 } 685 686 /* frames array contains VETH_XDP_BATCH at most */ 687 static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames, 688 int n_xdpf, struct veth_xdp_tx_bq *bq, 689 struct veth_stats *stats) 690 { 691 void *skbs[VETH_XDP_BATCH]; 692 int i; 693 694 if (xdp_alloc_skb_bulk(skbs, n_xdpf, 695 GFP_ATOMIC | __GFP_ZERO) < 0) { 696 for (i = 0; i < n_xdpf; i++) 697 xdp_return_frame(frames[i]); 698 stats->rx_drops += n_xdpf; 699 700 return; 701 } 702 703 for (i = 0; i < n_xdpf; i++) { 704 struct sk_buff *skb = skbs[i]; 705 706 skb = __xdp_build_skb_from_frame(frames[i], skb, 707 rq->dev); 708 if (!skb) { 709 xdp_return_frame(frames[i]); 710 stats->rx_drops++; 711 continue; 712 } 713 napi_gro_receive(&rq->xdp_napi, skb); 714 } 715 } 716 717 static void veth_xdp_get(struct xdp_buff *xdp) 718 { 719 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 720 int i; 721 722 get_page(virt_to_page(xdp->data)); 723 if (likely(!xdp_buff_has_frags(xdp))) 724 return; 725 726 for (i = 0; i < sinfo->nr_frags; i++) 727 __skb_frag_ref(&sinfo->frags[i]); 728 } 729 730 static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, 731 struct xdp_buff *xdp, 732 struct sk_buff **pskb) 733 { 734 struct sk_buff *skb = *pskb; 735 u32 frame_sz; 736 737 if (skb_shared(skb) || skb_head_is_locked(skb) || 738 skb_shinfo(skb)->nr_frags || 739 skb_headroom(skb) < XDP_PACKET_HEADROOM) { 740 u32 size, len, max_head_size, off; 741 struct sk_buff *nskb; 742 struct page *page; 743 int i, head_off; 744 745 /* We need a private copy of the skb and data buffers since 746 * the ebpf program can modify it. We segment the original skb 747 * into order-0 pages without linearize it. 748 * 749 * Make sure we have enough space for linear and paged area 750 */ 751 max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE - 752 VETH_XDP_HEADROOM); 753 if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size) 754 goto drop; 755 756 /* Allocate skb head */ 757 page = page_pool_dev_alloc_pages(rq->page_pool); 758 if (!page) 759 goto drop; 760 761 nskb = napi_build_skb(page_address(page), PAGE_SIZE); 762 if (!nskb) { 763 page_pool_put_full_page(rq->page_pool, page, true); 764 goto drop; 765 } 766 767 skb_reserve(nskb, VETH_XDP_HEADROOM); 768 skb_copy_header(nskb, skb); 769 skb_mark_for_recycle(nskb); 770 771 size = min_t(u32, skb->len, max_head_size); 772 if (skb_copy_bits(skb, 0, nskb->data, size)) { 773 consume_skb(nskb); 774 goto drop; 775 } 776 skb_put(nskb, size); 777 778 head_off = skb_headroom(nskb) - skb_headroom(skb); 779 skb_headers_offset_update(nskb, head_off); 780 781 /* Allocate paged area of new skb */ 782 off = size; 783 len = skb->len - off; 784 785 for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { 786 page = page_pool_dev_alloc_pages(rq->page_pool); 787 if (!page) { 788 consume_skb(nskb); 789 goto drop; 790 } 791 792 size = min_t(u32, len, PAGE_SIZE); 793 skb_add_rx_frag(nskb, i, page, 0, size, PAGE_SIZE); 794 if (skb_copy_bits(skb, off, page_address(page), 795 size)) { 796 consume_skb(nskb); 797 goto drop; 798 } 799 800 len -= size; 801 off += size; 802 } 803 804 consume_skb(skb); 805 skb = nskb; 806 } 807 808 /* SKB "head" area always have tailroom for skb_shared_info */ 809 frame_sz = skb_end_pointer(skb) - skb->head; 810 frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 811 xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); 812 xdp_prepare_buff(xdp, skb->head, skb_headroom(skb), 813 skb_headlen(skb), true); 814 815 if (skb_is_nonlinear(skb)) { 816 skb_shinfo(skb)->xdp_frags_size = skb->data_len; 817 xdp_buff_set_frags_flag(xdp); 818 } else { 819 xdp_buff_clear_frags_flag(xdp); 820 } 821 *pskb = skb; 822 823 return 0; 824 drop: 825 consume_skb(skb); 826 *pskb = NULL; 827 828 return -ENOMEM; 829 } 830 831 static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, 832 struct sk_buff *skb, 833 struct veth_xdp_tx_bq *bq, 834 struct veth_stats *stats) 835 { 836 void *orig_data, *orig_data_end; 837 struct bpf_prog *xdp_prog; 838 struct veth_xdp_buff vxbuf; 839 struct xdp_buff *xdp = &vxbuf.xdp; 840 u32 act, metalen; 841 int off; 842 843 skb_prepare_for_gro(skb); 844 845 rcu_read_lock(); 846 xdp_prog = rcu_dereference(rq->xdp_prog); 847 if (unlikely(!xdp_prog)) { 848 rcu_read_unlock(); 849 goto out; 850 } 851 852 __skb_push(skb, skb->data - skb_mac_header(skb)); 853 if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb)) 854 goto drop; 855 vxbuf.skb = skb; 856 857 orig_data = xdp->data; 858 orig_data_end = xdp->data_end; 859 860 act = bpf_prog_run_xdp(xdp_prog, xdp); 861 862 switch (act) { 863 case XDP_PASS: 864 break; 865 case XDP_TX: 866 veth_xdp_get(xdp); 867 consume_skb(skb); 868 xdp->rxq->mem = rq->xdp_mem; 869 if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { 870 trace_xdp_exception(rq->dev, xdp_prog, act); 871 stats->rx_drops++; 872 goto err_xdp; 873 } 874 stats->xdp_tx++; 875 rcu_read_unlock(); 876 goto xdp_xmit; 877 case XDP_REDIRECT: 878 veth_xdp_get(xdp); 879 consume_skb(skb); 880 xdp->rxq->mem = rq->xdp_mem; 881 if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { 882 stats->rx_drops++; 883 goto err_xdp; 884 } 885 stats->xdp_redirect++; 886 rcu_read_unlock(); 887 goto xdp_xmit; 888 default: 889 bpf_warn_invalid_xdp_action(rq->dev, xdp_prog, act); 890 fallthrough; 891 case XDP_ABORTED: 892 trace_xdp_exception(rq->dev, xdp_prog, act); 893 fallthrough; 894 case XDP_DROP: 895 stats->xdp_drops++; 896 goto xdp_drop; 897 } 898 rcu_read_unlock(); 899 900 /* check if bpf_xdp_adjust_head was used */ 901 off = orig_data - xdp->data; 902 if (off > 0) 903 __skb_push(skb, off); 904 else if (off < 0) 905 __skb_pull(skb, -off); 906 907 skb_reset_mac_header(skb); 908 909 /* check if bpf_xdp_adjust_tail was used */ 910 off = xdp->data_end - orig_data_end; 911 if (off != 0) 912 __skb_put(skb, off); /* positive on grow, negative on shrink */ 913 914 /* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers 915 * (e.g. bpf_xdp_adjust_tail), we need to update data_len here. 916 */ 917 if (xdp_buff_has_frags(xdp)) 918 skb->data_len = skb_shinfo(skb)->xdp_frags_size; 919 else 920 skb->data_len = 0; 921 922 skb->protocol = eth_type_trans(skb, rq->dev); 923 924 metalen = xdp->data - xdp->data_meta; 925 if (metalen) 926 skb_metadata_set(skb, metalen); 927 out: 928 return skb; 929 drop: 930 stats->rx_drops++; 931 xdp_drop: 932 rcu_read_unlock(); 933 kfree_skb(skb); 934 return NULL; 935 err_xdp: 936 rcu_read_unlock(); 937 xdp_return_buff(xdp); 938 xdp_xmit: 939 return NULL; 940 } 941 942 static int veth_xdp_rcv(struct veth_rq *rq, int budget, 943 struct veth_xdp_tx_bq *bq, 944 struct veth_stats *stats) 945 { 946 int i, done = 0, n_xdpf = 0; 947 void *xdpf[VETH_XDP_BATCH]; 948 949 for (i = 0; i < budget; i++) { 950 void *ptr = __ptr_ring_consume(&rq->xdp_ring); 951 952 if (!ptr) 953 break; 954 955 if (veth_is_xdp_frame(ptr)) { 956 /* ndo_xdp_xmit */ 957 struct xdp_frame *frame = veth_ptr_to_xdp(ptr); 958 959 stats->xdp_bytes += xdp_get_frame_len(frame); 960 frame = veth_xdp_rcv_one(rq, frame, bq, stats); 961 if (frame) { 962 /* XDP_PASS */ 963 xdpf[n_xdpf++] = frame; 964 if (n_xdpf == VETH_XDP_BATCH) { 965 veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf, 966 bq, stats); 967 n_xdpf = 0; 968 } 969 } 970 } else { 971 /* ndo_start_xmit */ 972 struct sk_buff *skb = ptr; 973 974 stats->xdp_bytes += skb->len; 975 skb = veth_xdp_rcv_skb(rq, skb, bq, stats); 976 if (skb) { 977 if (skb_shared(skb) || skb_unclone(skb, GFP_ATOMIC)) 978 netif_receive_skb(skb); 979 else 980 napi_gro_receive(&rq->xdp_napi, skb); 981 } 982 } 983 done++; 984 } 985 986 if (n_xdpf) 987 veth_xdp_rcv_bulk_skb(rq, xdpf, n_xdpf, bq, stats); 988 989 u64_stats_update_begin(&rq->stats.syncp); 990 rq->stats.vs.xdp_redirect += stats->xdp_redirect; 991 rq->stats.vs.xdp_bytes += stats->xdp_bytes; 992 rq->stats.vs.xdp_drops += stats->xdp_drops; 993 rq->stats.vs.rx_drops += stats->rx_drops; 994 rq->stats.vs.xdp_packets += done; 995 u64_stats_update_end(&rq->stats.syncp); 996 997 return done; 998 } 999 1000 static int veth_poll(struct napi_struct *napi, int budget) 1001 { 1002 struct veth_rq *rq = 1003 container_of(napi, struct veth_rq, xdp_napi); 1004 struct veth_stats stats = {}; 1005 struct veth_xdp_tx_bq bq; 1006 int done; 1007 1008 bq.count = 0; 1009 1010 xdp_set_return_frame_no_direct(); 1011 done = veth_xdp_rcv(rq, budget, &bq, &stats); 1012 1013 if (stats.xdp_redirect > 0) 1014 xdp_do_flush(); 1015 1016 if (done < budget && napi_complete_done(napi, done)) { 1017 /* Write rx_notify_masked before reading ptr_ring */ 1018 smp_store_mb(rq->rx_notify_masked, false); 1019 if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { 1020 if (napi_schedule_prep(&rq->xdp_napi)) { 1021 WRITE_ONCE(rq->rx_notify_masked, true); 1022 __napi_schedule(&rq->xdp_napi); 1023 } 1024 } 1025 } 1026 1027 if (stats.xdp_tx > 0) 1028 veth_xdp_flush(rq, &bq); 1029 xdp_clear_return_frame_no_direct(); 1030 1031 return done; 1032 } 1033 1034 static int veth_create_page_pool(struct veth_rq *rq) 1035 { 1036 struct page_pool_params pp_params = { 1037 .order = 0, 1038 .pool_size = VETH_RING_SIZE, 1039 .nid = NUMA_NO_NODE, 1040 .dev = &rq->dev->dev, 1041 }; 1042 1043 rq->page_pool = page_pool_create(&pp_params); 1044 if (IS_ERR(rq->page_pool)) { 1045 int err = PTR_ERR(rq->page_pool); 1046 1047 rq->page_pool = NULL; 1048 return err; 1049 } 1050 1051 return 0; 1052 } 1053 1054 static int __veth_napi_enable_range(struct net_device *dev, int start, int end) 1055 { 1056 struct veth_priv *priv = netdev_priv(dev); 1057 int err, i; 1058 1059 for (i = start; i < end; i++) { 1060 err = veth_create_page_pool(&priv->rq[i]); 1061 if (err) 1062 goto err_page_pool; 1063 } 1064 1065 for (i = start; i < end; i++) { 1066 struct veth_rq *rq = &priv->rq[i]; 1067 1068 err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL); 1069 if (err) 1070 goto err_xdp_ring; 1071 } 1072 1073 for (i = start; i < end; i++) { 1074 struct veth_rq *rq = &priv->rq[i]; 1075 1076 napi_enable(&rq->xdp_napi); 1077 rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); 1078 } 1079 1080 return 0; 1081 1082 err_xdp_ring: 1083 for (i--; i >= start; i--) 1084 ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); 1085 i = end; 1086 err_page_pool: 1087 for (i--; i >= start; i--) { 1088 page_pool_destroy(priv->rq[i].page_pool); 1089 priv->rq[i].page_pool = NULL; 1090 } 1091 1092 return err; 1093 } 1094 1095 static int __veth_napi_enable(struct net_device *dev) 1096 { 1097 return __veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); 1098 } 1099 1100 static void veth_napi_del_range(struct net_device *dev, int start, int end) 1101 { 1102 struct veth_priv *priv = netdev_priv(dev); 1103 int i; 1104 1105 for (i = start; i < end; i++) { 1106 struct veth_rq *rq = &priv->rq[i]; 1107 1108 rcu_assign_pointer(priv->rq[i].napi, NULL); 1109 napi_disable(&rq->xdp_napi); 1110 __netif_napi_del(&rq->xdp_napi); 1111 } 1112 synchronize_net(); 1113 1114 for (i = start; i < end; i++) { 1115 struct veth_rq *rq = &priv->rq[i]; 1116 1117 rq->rx_notify_masked = false; 1118 ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free); 1119 } 1120 1121 for (i = start; i < end; i++) { 1122 page_pool_destroy(priv->rq[i].page_pool); 1123 priv->rq[i].page_pool = NULL; 1124 } 1125 } 1126 1127 static void veth_napi_del(struct net_device *dev) 1128 { 1129 veth_napi_del_range(dev, 0, dev->real_num_rx_queues); 1130 } 1131 1132 static bool veth_gro_requested(const struct net_device *dev) 1133 { 1134 return !!(dev->wanted_features & NETIF_F_GRO); 1135 } 1136 1137 static int veth_enable_xdp_range(struct net_device *dev, int start, int end, 1138 bool napi_already_on) 1139 { 1140 struct veth_priv *priv = netdev_priv(dev); 1141 int err, i; 1142 1143 for (i = start; i < end; i++) { 1144 struct veth_rq *rq = &priv->rq[i]; 1145 1146 if (!napi_already_on) 1147 netif_napi_add(dev, &rq->xdp_napi, veth_poll); 1148 err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id); 1149 if (err < 0) 1150 goto err_rxq_reg; 1151 1152 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, 1153 MEM_TYPE_PAGE_SHARED, 1154 NULL); 1155 if (err < 0) 1156 goto err_reg_mem; 1157 1158 /* Save original mem info as it can be overwritten */ 1159 rq->xdp_mem = rq->xdp_rxq.mem; 1160 } 1161 return 0; 1162 1163 err_reg_mem: 1164 xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq); 1165 err_rxq_reg: 1166 for (i--; i >= start; i--) { 1167 struct veth_rq *rq = &priv->rq[i]; 1168 1169 xdp_rxq_info_unreg(&rq->xdp_rxq); 1170 if (!napi_already_on) 1171 netif_napi_del(&rq->xdp_napi); 1172 } 1173 1174 return err; 1175 } 1176 1177 static void veth_disable_xdp_range(struct net_device *dev, int start, int end, 1178 bool delete_napi) 1179 { 1180 struct veth_priv *priv = netdev_priv(dev); 1181 int i; 1182 1183 for (i = start; i < end; i++) { 1184 struct veth_rq *rq = &priv->rq[i]; 1185 1186 rq->xdp_rxq.mem = rq->xdp_mem; 1187 xdp_rxq_info_unreg(&rq->xdp_rxq); 1188 1189 if (delete_napi) 1190 netif_napi_del(&rq->xdp_napi); 1191 } 1192 } 1193 1194 static int veth_enable_xdp(struct net_device *dev) 1195 { 1196 bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP); 1197 struct veth_priv *priv = netdev_priv(dev); 1198 int err, i; 1199 1200 if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) { 1201 err = veth_enable_xdp_range(dev, 0, dev->real_num_rx_queues, napi_already_on); 1202 if (err) 1203 return err; 1204 1205 if (!napi_already_on) { 1206 err = __veth_napi_enable(dev); 1207 if (err) { 1208 veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, true); 1209 return err; 1210 } 1211 1212 if (!veth_gro_requested(dev)) { 1213 /* user-space did not require GRO, but adding XDP 1214 * is supposed to get GRO working 1215 */ 1216 dev->features |= NETIF_F_GRO; 1217 netdev_features_change(dev); 1218 } 1219 } 1220 } 1221 1222 for (i = 0; i < dev->real_num_rx_queues; i++) { 1223 rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog); 1224 rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi); 1225 } 1226 1227 return 0; 1228 } 1229 1230 static void veth_disable_xdp(struct net_device *dev) 1231 { 1232 struct veth_priv *priv = netdev_priv(dev); 1233 int i; 1234 1235 for (i = 0; i < dev->real_num_rx_queues; i++) 1236 rcu_assign_pointer(priv->rq[i].xdp_prog, NULL); 1237 1238 if (!netif_running(dev) || !veth_gro_requested(dev)) { 1239 veth_napi_del(dev); 1240 1241 /* if user-space did not require GRO, since adding XDP 1242 * enabled it, clear it now 1243 */ 1244 if (!veth_gro_requested(dev) && netif_running(dev)) { 1245 dev->features &= ~NETIF_F_GRO; 1246 netdev_features_change(dev); 1247 } 1248 } 1249 1250 veth_disable_xdp_range(dev, 0, dev->real_num_rx_queues, false); 1251 } 1252 1253 static int veth_napi_enable_range(struct net_device *dev, int start, int end) 1254 { 1255 struct veth_priv *priv = netdev_priv(dev); 1256 int err, i; 1257 1258 for (i = start; i < end; i++) { 1259 struct veth_rq *rq = &priv->rq[i]; 1260 1261 netif_napi_add(dev, &rq->xdp_napi, veth_poll); 1262 } 1263 1264 err = __veth_napi_enable_range(dev, start, end); 1265 if (err) { 1266 for (i = start; i < end; i++) { 1267 struct veth_rq *rq = &priv->rq[i]; 1268 1269 netif_napi_del(&rq->xdp_napi); 1270 } 1271 return err; 1272 } 1273 return err; 1274 } 1275 1276 static int veth_napi_enable(struct net_device *dev) 1277 { 1278 return veth_napi_enable_range(dev, 0, dev->real_num_rx_queues); 1279 } 1280 1281 static void veth_disable_range_safe(struct net_device *dev, int start, int end) 1282 { 1283 struct veth_priv *priv = netdev_priv(dev); 1284 1285 if (start >= end) 1286 return; 1287 1288 if (priv->_xdp_prog) { 1289 veth_napi_del_range(dev, start, end); 1290 veth_disable_xdp_range(dev, start, end, false); 1291 } else if (veth_gro_requested(dev)) { 1292 veth_napi_del_range(dev, start, end); 1293 } 1294 } 1295 1296 static int veth_enable_range_safe(struct net_device *dev, int start, int end) 1297 { 1298 struct veth_priv *priv = netdev_priv(dev); 1299 int err; 1300 1301 if (start >= end) 1302 return 0; 1303 1304 if (priv->_xdp_prog) { 1305 /* these channels are freshly initialized, napi is not on there even 1306 * when GRO is requeste 1307 */ 1308 err = veth_enable_xdp_range(dev, start, end, false); 1309 if (err) 1310 return err; 1311 1312 err = __veth_napi_enable_range(dev, start, end); 1313 if (err) { 1314 /* on error always delete the newly added napis */ 1315 veth_disable_xdp_range(dev, start, end, true); 1316 return err; 1317 } 1318 } else if (veth_gro_requested(dev)) { 1319 return veth_napi_enable_range(dev, start, end); 1320 } 1321 return 0; 1322 } 1323 1324 static void veth_set_xdp_features(struct net_device *dev) 1325 { 1326 struct veth_priv *priv = netdev_priv(dev); 1327 struct net_device *peer; 1328 1329 peer = rtnl_dereference(priv->peer); 1330 if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) { 1331 struct veth_priv *priv_peer = netdev_priv(peer); 1332 xdp_features_t val = NETDEV_XDP_ACT_BASIC | 1333 NETDEV_XDP_ACT_REDIRECT | 1334 NETDEV_XDP_ACT_RX_SG; 1335 1336 if (priv_peer->_xdp_prog || veth_gro_requested(peer)) 1337 val |= NETDEV_XDP_ACT_NDO_XMIT | 1338 NETDEV_XDP_ACT_NDO_XMIT_SG; 1339 xdp_set_features_flag(dev, val); 1340 } else { 1341 xdp_clear_features_flag(dev); 1342 } 1343 } 1344 1345 static int veth_set_channels(struct net_device *dev, 1346 struct ethtool_channels *ch) 1347 { 1348 struct veth_priv *priv = netdev_priv(dev); 1349 unsigned int old_rx_count, new_rx_count; 1350 struct veth_priv *peer_priv; 1351 struct net_device *peer; 1352 int err; 1353 1354 /* sanity check. Upper bounds are already enforced by the caller */ 1355 if (!ch->rx_count || !ch->tx_count) 1356 return -EINVAL; 1357 1358 /* avoid braking XDP, if that is enabled */ 1359 peer = rtnl_dereference(priv->peer); 1360 peer_priv = peer ? netdev_priv(peer) : NULL; 1361 if (priv->_xdp_prog && peer && ch->rx_count < peer->real_num_tx_queues) 1362 return -EINVAL; 1363 1364 if (peer && peer_priv && peer_priv->_xdp_prog && ch->tx_count > peer->real_num_rx_queues) 1365 return -EINVAL; 1366 1367 old_rx_count = dev->real_num_rx_queues; 1368 new_rx_count = ch->rx_count; 1369 if (netif_running(dev)) { 1370 /* turn device off */ 1371 netif_carrier_off(dev); 1372 if (peer) 1373 netif_carrier_off(peer); 1374 1375 /* try to allocate new resurces, as needed*/ 1376 err = veth_enable_range_safe(dev, old_rx_count, new_rx_count); 1377 if (err) 1378 goto out; 1379 } 1380 1381 err = netif_set_real_num_rx_queues(dev, ch->rx_count); 1382 if (err) 1383 goto revert; 1384 1385 err = netif_set_real_num_tx_queues(dev, ch->tx_count); 1386 if (err) { 1387 int err2 = netif_set_real_num_rx_queues(dev, old_rx_count); 1388 1389 /* this error condition could happen only if rx and tx change 1390 * in opposite directions (e.g. tx nr raises, rx nr decreases) 1391 * and we can't do anything to fully restore the original 1392 * status 1393 */ 1394 if (err2) 1395 pr_warn("Can't restore rx queues config %d -> %d %d", 1396 new_rx_count, old_rx_count, err2); 1397 else 1398 goto revert; 1399 } 1400 1401 out: 1402 if (netif_running(dev)) { 1403 /* note that we need to swap the arguments WRT the enable part 1404 * to identify the range we have to disable 1405 */ 1406 veth_disable_range_safe(dev, new_rx_count, old_rx_count); 1407 netif_carrier_on(dev); 1408 if (peer) 1409 netif_carrier_on(peer); 1410 } 1411 1412 /* update XDP supported features */ 1413 veth_set_xdp_features(dev); 1414 if (peer) 1415 veth_set_xdp_features(peer); 1416 1417 return err; 1418 1419 revert: 1420 new_rx_count = old_rx_count; 1421 old_rx_count = ch->rx_count; 1422 goto out; 1423 } 1424 1425 static int veth_open(struct net_device *dev) 1426 { 1427 struct veth_priv *priv = netdev_priv(dev); 1428 struct net_device *peer = rtnl_dereference(priv->peer); 1429 int err; 1430 1431 if (!peer) 1432 return -ENOTCONN; 1433 1434 if (priv->_xdp_prog) { 1435 err = veth_enable_xdp(dev); 1436 if (err) 1437 return err; 1438 } else if (veth_gro_requested(dev)) { 1439 err = veth_napi_enable(dev); 1440 if (err) 1441 return err; 1442 } 1443 1444 if (peer->flags & IFF_UP) { 1445 netif_carrier_on(dev); 1446 netif_carrier_on(peer); 1447 } 1448 1449 veth_set_xdp_features(dev); 1450 1451 return 0; 1452 } 1453 1454 static int veth_close(struct net_device *dev) 1455 { 1456 struct veth_priv *priv = netdev_priv(dev); 1457 struct net_device *peer = rtnl_dereference(priv->peer); 1458 1459 netif_carrier_off(dev); 1460 if (peer) 1461 netif_carrier_off(peer); 1462 1463 if (priv->_xdp_prog) 1464 veth_disable_xdp(dev); 1465 else if (veth_gro_requested(dev)) 1466 veth_napi_del(dev); 1467 1468 return 0; 1469 } 1470 1471 static int is_valid_veth_mtu(int mtu) 1472 { 1473 return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU; 1474 } 1475 1476 static int veth_alloc_queues(struct net_device *dev) 1477 { 1478 struct veth_priv *priv = netdev_priv(dev); 1479 int i; 1480 1481 priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL_ACCOUNT); 1482 if (!priv->rq) 1483 return -ENOMEM; 1484 1485 for (i = 0; i < dev->num_rx_queues; i++) { 1486 priv->rq[i].dev = dev; 1487 u64_stats_init(&priv->rq[i].stats.syncp); 1488 } 1489 1490 return 0; 1491 } 1492 1493 static void veth_free_queues(struct net_device *dev) 1494 { 1495 struct veth_priv *priv = netdev_priv(dev); 1496 1497 kfree(priv->rq); 1498 } 1499 1500 static int veth_dev_init(struct net_device *dev) 1501 { 1502 int err; 1503 1504 dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); 1505 if (!dev->lstats) 1506 return -ENOMEM; 1507 1508 err = veth_alloc_queues(dev); 1509 if (err) { 1510 free_percpu(dev->lstats); 1511 return err; 1512 } 1513 1514 return 0; 1515 } 1516 1517 static void veth_dev_free(struct net_device *dev) 1518 { 1519 veth_free_queues(dev); 1520 free_percpu(dev->lstats); 1521 } 1522 1523 #ifdef CONFIG_NET_POLL_CONTROLLER 1524 static void veth_poll_controller(struct net_device *dev) 1525 { 1526 /* veth only receives frames when its peer sends one 1527 * Since it has nothing to do with disabling irqs, we are guaranteed 1528 * never to have pending data when we poll for it so 1529 * there is nothing to do here. 1530 * 1531 * We need this though so netpoll recognizes us as an interface that 1532 * supports polling, which enables bridge devices in virt setups to 1533 * still use netconsole 1534 */ 1535 } 1536 #endif /* CONFIG_NET_POLL_CONTROLLER */ 1537 1538 static int veth_get_iflink(const struct net_device *dev) 1539 { 1540 struct veth_priv *priv = netdev_priv(dev); 1541 struct net_device *peer; 1542 int iflink; 1543 1544 rcu_read_lock(); 1545 peer = rcu_dereference(priv->peer); 1546 iflink = peer ? peer->ifindex : 0; 1547 rcu_read_unlock(); 1548 1549 return iflink; 1550 } 1551 1552 static netdev_features_t veth_fix_features(struct net_device *dev, 1553 netdev_features_t features) 1554 { 1555 struct veth_priv *priv = netdev_priv(dev); 1556 struct net_device *peer; 1557 1558 peer = rtnl_dereference(priv->peer); 1559 if (peer) { 1560 struct veth_priv *peer_priv = netdev_priv(peer); 1561 1562 if (peer_priv->_xdp_prog) 1563 features &= ~NETIF_F_GSO_SOFTWARE; 1564 } 1565 if (priv->_xdp_prog) 1566 features |= NETIF_F_GRO; 1567 1568 return features; 1569 } 1570 1571 static int veth_set_features(struct net_device *dev, 1572 netdev_features_t features) 1573 { 1574 netdev_features_t changed = features ^ dev->features; 1575 struct veth_priv *priv = netdev_priv(dev); 1576 struct net_device *peer; 1577 int err; 1578 1579 if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog) 1580 return 0; 1581 1582 peer = rtnl_dereference(priv->peer); 1583 if (features & NETIF_F_GRO) { 1584 err = veth_napi_enable(dev); 1585 if (err) 1586 return err; 1587 1588 if (peer) 1589 xdp_features_set_redirect_target(peer, true); 1590 } else { 1591 if (peer) 1592 xdp_features_clear_redirect_target(peer); 1593 veth_napi_del(dev); 1594 } 1595 return 0; 1596 } 1597 1598 static void veth_set_rx_headroom(struct net_device *dev, int new_hr) 1599 { 1600 struct veth_priv *peer_priv, *priv = netdev_priv(dev); 1601 struct net_device *peer; 1602 1603 if (new_hr < 0) 1604 new_hr = 0; 1605 1606 rcu_read_lock(); 1607 peer = rcu_dereference(priv->peer); 1608 if (unlikely(!peer)) 1609 goto out; 1610 1611 peer_priv = netdev_priv(peer); 1612 priv->requested_headroom = new_hr; 1613 new_hr = max(priv->requested_headroom, peer_priv->requested_headroom); 1614 dev->needed_headroom = new_hr; 1615 peer->needed_headroom = new_hr; 1616 1617 out: 1618 rcu_read_unlock(); 1619 } 1620 1621 static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog, 1622 struct netlink_ext_ack *extack) 1623 { 1624 struct veth_priv *priv = netdev_priv(dev); 1625 struct bpf_prog *old_prog; 1626 struct net_device *peer; 1627 unsigned int max_mtu; 1628 int err; 1629 1630 old_prog = priv->_xdp_prog; 1631 priv->_xdp_prog = prog; 1632 peer = rtnl_dereference(priv->peer); 1633 1634 if (prog) { 1635 if (!peer) { 1636 NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached"); 1637 err = -ENOTCONN; 1638 goto err; 1639 } 1640 1641 max_mtu = SKB_WITH_OVERHEAD(PAGE_SIZE - VETH_XDP_HEADROOM) - 1642 peer->hard_header_len; 1643 /* Allow increasing the max_mtu if the program supports 1644 * XDP fragments. 1645 */ 1646 if (prog->aux->xdp_has_frags) 1647 max_mtu += PAGE_SIZE * MAX_SKB_FRAGS; 1648 1649 if (peer->mtu > max_mtu) { 1650 NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP"); 1651 err = -ERANGE; 1652 goto err; 1653 } 1654 1655 if (dev->real_num_rx_queues < peer->real_num_tx_queues) { 1656 NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues"); 1657 err = -ENOSPC; 1658 goto err; 1659 } 1660 1661 if (dev->flags & IFF_UP) { 1662 err = veth_enable_xdp(dev); 1663 if (err) { 1664 NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed"); 1665 goto err; 1666 } 1667 } 1668 1669 if (!old_prog) { 1670 peer->hw_features &= ~NETIF_F_GSO_SOFTWARE; 1671 peer->max_mtu = max_mtu; 1672 } 1673 1674 xdp_features_set_redirect_target(peer, true); 1675 } 1676 1677 if (old_prog) { 1678 if (!prog) { 1679 if (peer && !veth_gro_requested(dev)) 1680 xdp_features_clear_redirect_target(peer); 1681 1682 if (dev->flags & IFF_UP) 1683 veth_disable_xdp(dev); 1684 1685 if (peer) { 1686 peer->hw_features |= NETIF_F_GSO_SOFTWARE; 1687 peer->max_mtu = ETH_MAX_MTU; 1688 } 1689 } 1690 bpf_prog_put(old_prog); 1691 } 1692 1693 if ((!!old_prog ^ !!prog) && peer) 1694 netdev_update_features(peer); 1695 1696 return 0; 1697 err: 1698 priv->_xdp_prog = old_prog; 1699 1700 return err; 1701 } 1702 1703 static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp) 1704 { 1705 switch (xdp->command) { 1706 case XDP_SETUP_PROG: 1707 return veth_xdp_set(dev, xdp->prog, xdp->extack); 1708 default: 1709 return -EINVAL; 1710 } 1711 } 1712 1713 static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) 1714 { 1715 struct veth_xdp_buff *_ctx = (void *)ctx; 1716 1717 if (!_ctx->skb) 1718 return -ENODATA; 1719 1720 *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp; 1721 return 0; 1722 } 1723 1724 static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, 1725 enum xdp_rss_hash_type *rss_type) 1726 { 1727 struct veth_xdp_buff *_ctx = (void *)ctx; 1728 struct sk_buff *skb = _ctx->skb; 1729 1730 if (!skb) 1731 return -ENODATA; 1732 1733 *hash = skb_get_hash(skb); 1734 *rss_type = skb->l4_hash ? XDP_RSS_TYPE_L4_ANY : XDP_RSS_TYPE_NONE; 1735 1736 return 0; 1737 } 1738 1739 static const struct net_device_ops veth_netdev_ops = { 1740 .ndo_init = veth_dev_init, 1741 .ndo_open = veth_open, 1742 .ndo_stop = veth_close, 1743 .ndo_start_xmit = veth_xmit, 1744 .ndo_get_stats64 = veth_get_stats64, 1745 .ndo_set_rx_mode = veth_set_multicast_list, 1746 .ndo_set_mac_address = eth_mac_addr, 1747 #ifdef CONFIG_NET_POLL_CONTROLLER 1748 .ndo_poll_controller = veth_poll_controller, 1749 #endif 1750 .ndo_get_iflink = veth_get_iflink, 1751 .ndo_fix_features = veth_fix_features, 1752 .ndo_set_features = veth_set_features, 1753 .ndo_features_check = passthru_features_check, 1754 .ndo_set_rx_headroom = veth_set_rx_headroom, 1755 .ndo_bpf = veth_xdp, 1756 .ndo_xdp_xmit = veth_ndo_xdp_xmit, 1757 .ndo_get_peer_dev = veth_peer_dev, 1758 }; 1759 1760 static const struct xdp_metadata_ops veth_xdp_metadata_ops = { 1761 .xmo_rx_timestamp = veth_xdp_rx_timestamp, 1762 .xmo_rx_hash = veth_xdp_rx_hash, 1763 }; 1764 1765 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ 1766 NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ 1767 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 1768 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ 1769 NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) 1770 1771 static void veth_setup(struct net_device *dev) 1772 { 1773 ether_setup(dev); 1774 1775 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 1776 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; 1777 dev->priv_flags |= IFF_NO_QUEUE; 1778 dev->priv_flags |= IFF_PHONY_HEADROOM; 1779 1780 dev->netdev_ops = &veth_netdev_ops; 1781 dev->xdp_metadata_ops = &veth_xdp_metadata_ops; 1782 dev->ethtool_ops = &veth_ethtool_ops; 1783 dev->features |= NETIF_F_LLTX; 1784 dev->features |= VETH_FEATURES; 1785 dev->vlan_features = dev->features & 1786 ~(NETIF_F_HW_VLAN_CTAG_TX | 1787 NETIF_F_HW_VLAN_STAG_TX | 1788 NETIF_F_HW_VLAN_CTAG_RX | 1789 NETIF_F_HW_VLAN_STAG_RX); 1790 dev->needs_free_netdev = true; 1791 dev->priv_destructor = veth_dev_free; 1792 dev->max_mtu = ETH_MAX_MTU; 1793 1794 dev->hw_features = VETH_FEATURES; 1795 dev->hw_enc_features = VETH_FEATURES; 1796 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 1797 netif_set_tso_max_size(dev, GSO_MAX_SIZE); 1798 } 1799 1800 /* 1801 * netlink interface 1802 */ 1803 1804 static int veth_validate(struct nlattr *tb[], struct nlattr *data[], 1805 struct netlink_ext_ack *extack) 1806 { 1807 if (tb[IFLA_ADDRESS]) { 1808 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 1809 return -EINVAL; 1810 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 1811 return -EADDRNOTAVAIL; 1812 } 1813 if (tb[IFLA_MTU]) { 1814 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU]))) 1815 return -EINVAL; 1816 } 1817 return 0; 1818 } 1819 1820 static struct rtnl_link_ops veth_link_ops; 1821 1822 static void veth_disable_gro(struct net_device *dev) 1823 { 1824 dev->features &= ~NETIF_F_GRO; 1825 dev->wanted_features &= ~NETIF_F_GRO; 1826 netdev_update_features(dev); 1827 } 1828 1829 static int veth_init_queues(struct net_device *dev, struct nlattr *tb[]) 1830 { 1831 int err; 1832 1833 if (!tb[IFLA_NUM_TX_QUEUES] && dev->num_tx_queues > 1) { 1834 err = netif_set_real_num_tx_queues(dev, 1); 1835 if (err) 1836 return err; 1837 } 1838 if (!tb[IFLA_NUM_RX_QUEUES] && dev->num_rx_queues > 1) { 1839 err = netif_set_real_num_rx_queues(dev, 1); 1840 if (err) 1841 return err; 1842 } 1843 return 0; 1844 } 1845 1846 static int veth_newlink(struct net *src_net, struct net_device *dev, 1847 struct nlattr *tb[], struct nlattr *data[], 1848 struct netlink_ext_ack *extack) 1849 { 1850 int err; 1851 struct net_device *peer; 1852 struct veth_priv *priv; 1853 char ifname[IFNAMSIZ]; 1854 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; 1855 unsigned char name_assign_type; 1856 struct ifinfomsg *ifmp; 1857 struct net *net; 1858 1859 /* 1860 * create and register peer first 1861 */ 1862 if (data != NULL && data[VETH_INFO_PEER] != NULL) { 1863 struct nlattr *nla_peer; 1864 1865 nla_peer = data[VETH_INFO_PEER]; 1866 ifmp = nla_data(nla_peer); 1867 err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack); 1868 if (err < 0) 1869 return err; 1870 1871 err = veth_validate(peer_tb, NULL, extack); 1872 if (err < 0) 1873 return err; 1874 1875 tbp = peer_tb; 1876 } else { 1877 ifmp = NULL; 1878 tbp = tb; 1879 } 1880 1881 if (ifmp && tbp[IFLA_IFNAME]) { 1882 nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); 1883 name_assign_type = NET_NAME_USER; 1884 } else { 1885 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); 1886 name_assign_type = NET_NAME_ENUM; 1887 } 1888 1889 net = rtnl_link_get_net(src_net, tbp); 1890 if (IS_ERR(net)) 1891 return PTR_ERR(net); 1892 1893 peer = rtnl_create_link(net, ifname, name_assign_type, 1894 &veth_link_ops, tbp, extack); 1895 if (IS_ERR(peer)) { 1896 put_net(net); 1897 return PTR_ERR(peer); 1898 } 1899 1900 if (!ifmp || !tbp[IFLA_ADDRESS]) 1901 eth_hw_addr_random(peer); 1902 1903 if (ifmp && (dev->ifindex != 0)) 1904 peer->ifindex = ifmp->ifi_index; 1905 1906 netif_inherit_tso_max(peer, dev); 1907 1908 err = register_netdevice(peer); 1909 put_net(net); 1910 net = NULL; 1911 if (err < 0) 1912 goto err_register_peer; 1913 1914 /* keep GRO disabled by default to be consistent with the established 1915 * veth behavior 1916 */ 1917 veth_disable_gro(peer); 1918 netif_carrier_off(peer); 1919 1920 err = rtnl_configure_link(peer, ifmp, 0, NULL); 1921 if (err < 0) 1922 goto err_configure_peer; 1923 1924 /* 1925 * register dev last 1926 * 1927 * note, that since we've registered new device the dev's name 1928 * should be re-allocated 1929 */ 1930 1931 if (tb[IFLA_ADDRESS] == NULL) 1932 eth_hw_addr_random(dev); 1933 1934 if (tb[IFLA_IFNAME]) 1935 nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 1936 else 1937 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); 1938 1939 err = register_netdevice(dev); 1940 if (err < 0) 1941 goto err_register_dev; 1942 1943 netif_carrier_off(dev); 1944 1945 /* 1946 * tie the deviced together 1947 */ 1948 1949 priv = netdev_priv(dev); 1950 rcu_assign_pointer(priv->peer, peer); 1951 err = veth_init_queues(dev, tb); 1952 if (err) 1953 goto err_queues; 1954 1955 priv = netdev_priv(peer); 1956 rcu_assign_pointer(priv->peer, dev); 1957 err = veth_init_queues(peer, tb); 1958 if (err) 1959 goto err_queues; 1960 1961 veth_disable_gro(dev); 1962 /* update XDP supported features */ 1963 veth_set_xdp_features(dev); 1964 veth_set_xdp_features(peer); 1965 1966 return 0; 1967 1968 err_queues: 1969 unregister_netdevice(dev); 1970 err_register_dev: 1971 /* nothing to do */ 1972 err_configure_peer: 1973 unregister_netdevice(peer); 1974 return err; 1975 1976 err_register_peer: 1977 free_netdev(peer); 1978 return err; 1979 } 1980 1981 static void veth_dellink(struct net_device *dev, struct list_head *head) 1982 { 1983 struct veth_priv *priv; 1984 struct net_device *peer; 1985 1986 priv = netdev_priv(dev); 1987 peer = rtnl_dereference(priv->peer); 1988 1989 /* Note : dellink() is called from default_device_exit_batch(), 1990 * before a rcu_synchronize() point. The devices are guaranteed 1991 * not being freed before one RCU grace period. 1992 */ 1993 RCU_INIT_POINTER(priv->peer, NULL); 1994 unregister_netdevice_queue(dev, head); 1995 1996 if (peer) { 1997 priv = netdev_priv(peer); 1998 RCU_INIT_POINTER(priv->peer, NULL); 1999 unregister_netdevice_queue(peer, head); 2000 } 2001 } 2002 2003 static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = { 2004 [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, 2005 }; 2006 2007 static struct net *veth_get_link_net(const struct net_device *dev) 2008 { 2009 struct veth_priv *priv = netdev_priv(dev); 2010 struct net_device *peer = rtnl_dereference(priv->peer); 2011 2012 return peer ? dev_net(peer) : dev_net(dev); 2013 } 2014 2015 static unsigned int veth_get_num_queues(void) 2016 { 2017 /* enforce the same queue limit as rtnl_create_link */ 2018 int queues = num_possible_cpus(); 2019 2020 if (queues > 4096) 2021 queues = 4096; 2022 return queues; 2023 } 2024 2025 static struct rtnl_link_ops veth_link_ops = { 2026 .kind = DRV_NAME, 2027 .priv_size = sizeof(struct veth_priv), 2028 .setup = veth_setup, 2029 .validate = veth_validate, 2030 .newlink = veth_newlink, 2031 .dellink = veth_dellink, 2032 .policy = veth_policy, 2033 .maxtype = VETH_INFO_MAX, 2034 .get_link_net = veth_get_link_net, 2035 .get_num_tx_queues = veth_get_num_queues, 2036 .get_num_rx_queues = veth_get_num_queues, 2037 }; 2038 2039 /* 2040 * init/fini 2041 */ 2042 2043 static __init int veth_init(void) 2044 { 2045 return rtnl_link_register(&veth_link_ops); 2046 } 2047 2048 static __exit void veth_exit(void) 2049 { 2050 rtnl_link_unregister(&veth_link_ops); 2051 } 2052 2053 module_init(veth_init); 2054 module_exit(veth_exit); 2055 2056 MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); 2057 MODULE_LICENSE("GPL v2"); 2058 MODULE_ALIAS_RTNL_LINK(DRV_NAME); 2059