1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> 3 */ 4 5 #include <linux/ethtool.h> 6 #include <net/netdev_lock.h> 7 8 #include "ipvlan.h" 9 ipvlan_set_port_mode(struct ipvl_port * port,u16 nval,struct netlink_ext_ack * extack)10 static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval, 11 struct netlink_ext_ack *extack) 12 { 13 struct ipvl_dev *ipvlan; 14 unsigned int flags; 15 int err; 16 17 ASSERT_RTNL(); 18 if (port->mode != nval) { 19 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 20 flags = ipvlan->dev->flags; 21 if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) { 22 err = dev_change_flags(ipvlan->dev, 23 flags | IFF_NOARP, 24 extack); 25 } else { 26 err = dev_change_flags(ipvlan->dev, 27 flags & ~IFF_NOARP, 28 extack); 29 } 30 if (unlikely(err)) 31 goto fail; 32 } 33 if (nval == IPVLAN_MODE_L3S) { 34 /* New mode is L3S */ 35 err = ipvlan_l3s_register(port); 36 if (err) 37 goto fail; 38 } else if (port->mode == IPVLAN_MODE_L3S) { 39 /* Old mode was L3S */ 40 ipvlan_l3s_unregister(port); 41 } 42 port->mode = nval; 43 } 44 return 0; 45 46 fail: 47 /* Undo the flags changes that have been done so far. */ 48 list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) { 49 flags = ipvlan->dev->flags; 50 if (port->mode == IPVLAN_MODE_L3 || 51 port->mode == IPVLAN_MODE_L3S) 52 dev_change_flags(ipvlan->dev, flags | IFF_NOARP, 53 NULL); 54 else 55 dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP, 56 NULL); 57 } 58 59 return err; 60 } 61 ipvlan_port_create(struct net_device * dev)62 static int ipvlan_port_create(struct net_device *dev) 63 { 64 struct ipvl_port *port; 65 int err, idx; 66 67 port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL); 68 if (!port) 69 return -ENOMEM; 70 71 write_pnet(&port->pnet, dev_net(dev)); 72 port->dev = dev; 73 port->mode = IPVLAN_MODE_L3; 74 INIT_LIST_HEAD(&port->ipvlans); 75 for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++) 76 INIT_HLIST_HEAD(&port->hlhead[idx]); 77 78 skb_queue_head_init(&port->backlog); 79 INIT_WORK(&port->wq, ipvlan_process_multicast); 80 ida_init(&port->ida); 81 port->dev_id_start = 1; 82 83 err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port); 84 if (err) 85 goto err; 86 87 netdev_hold(dev, &port->dev_tracker, GFP_KERNEL); 88 return 0; 89 90 err: 91 kfree(port); 92 return err; 93 } 94 ipvlan_port_destroy(struct net_device * dev)95 static void ipvlan_port_destroy(struct net_device *dev) 96 { 97 struct ipvl_port *port = ipvlan_port_get_rtnl(dev); 98 struct sk_buff *skb; 99 100 netdev_put(dev, &port->dev_tracker); 101 if (port->mode == IPVLAN_MODE_L3S) 102 ipvlan_l3s_unregister(port); 103 netdev_rx_handler_unregister(dev); 104 cancel_work_sync(&port->wq); 105 while ((skb = __skb_dequeue(&port->backlog)) != NULL) { 106 dev_put(skb->dev); 107 kfree_skb(skb); 108 } 109 ida_destroy(&port->ida); 110 kfree(port); 111 } 112 113 #define IPVLAN_ALWAYS_ON_OFLOADS \ 114 (NETIF_F_SG | NETIF_F_HW_CSUM | \ 115 NETIF_F_GSO_ROBUST | NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL) 116 117 #define IPVLAN_ALWAYS_ON \ 118 (IPVLAN_ALWAYS_ON_OFLOADS | NETIF_F_VLAN_CHALLENGED) 119 120 #define IPVLAN_FEATURES \ 121 (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ 122 NETIF_F_GSO | NETIF_F_ALL_TSO | NETIF_F_GSO_ROBUST | \ 123 NETIF_F_GRO | NETIF_F_RXCSUM | \ 124 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER) 125 126 /* NETIF_F_GSO_ENCAP_ALL NETIF_F_GSO_SOFTWARE Newly added */ 127 128 #define IPVLAN_STATE_MASK \ 129 ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) 130 ipvlan_init(struct net_device * dev)131 static int ipvlan_init(struct net_device *dev) 132 { 133 struct ipvl_dev *ipvlan = netdev_priv(dev); 134 struct net_device *phy_dev = ipvlan->phy_dev; 135 struct ipvl_port *port; 136 int err; 137 138 dev->state = (dev->state & ~IPVLAN_STATE_MASK) | 139 (phy_dev->state & IPVLAN_STATE_MASK); 140 dev->features = phy_dev->features & IPVLAN_FEATURES; 141 dev->features |= IPVLAN_ALWAYS_ON; 142 dev->vlan_features = phy_dev->vlan_features & IPVLAN_FEATURES; 143 dev->vlan_features |= IPVLAN_ALWAYS_ON_OFLOADS; 144 dev->hw_enc_features |= dev->features; 145 dev->lltx = true; 146 netif_inherit_tso_max(dev, phy_dev); 147 dev->hard_header_len = phy_dev->hard_header_len; 148 149 netdev_lockdep_set_classes(dev); 150 151 ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats); 152 if (!ipvlan->pcpu_stats) 153 return -ENOMEM; 154 155 if (!netif_is_ipvlan_port(phy_dev)) { 156 err = ipvlan_port_create(phy_dev); 157 if (err < 0) { 158 free_percpu(ipvlan->pcpu_stats); 159 return err; 160 } 161 } 162 port = ipvlan_port_get_rtnl(phy_dev); 163 port->count += 1; 164 return 0; 165 } 166 ipvlan_uninit(struct net_device * dev)167 static void ipvlan_uninit(struct net_device *dev) 168 { 169 struct ipvl_dev *ipvlan = netdev_priv(dev); 170 struct net_device *phy_dev = ipvlan->phy_dev; 171 struct ipvl_port *port; 172 173 free_percpu(ipvlan->pcpu_stats); 174 175 port = ipvlan_port_get_rtnl(phy_dev); 176 port->count -= 1; 177 if (!port->count) 178 ipvlan_port_destroy(port->dev); 179 } 180 ipvlan_open(struct net_device * dev)181 static int ipvlan_open(struct net_device *dev) 182 { 183 struct ipvl_dev *ipvlan = netdev_priv(dev); 184 struct ipvl_addr *addr; 185 186 if (ipvlan->port->mode == IPVLAN_MODE_L3 || 187 ipvlan->port->mode == IPVLAN_MODE_L3S) 188 dev->flags |= IFF_NOARP; 189 else 190 dev->flags &= ~IFF_NOARP; 191 192 rcu_read_lock(); 193 list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) 194 ipvlan_ht_addr_add(ipvlan, addr); 195 rcu_read_unlock(); 196 197 return 0; 198 } 199 ipvlan_stop(struct net_device * dev)200 static int ipvlan_stop(struct net_device *dev) 201 { 202 struct ipvl_dev *ipvlan = netdev_priv(dev); 203 struct net_device *phy_dev = ipvlan->phy_dev; 204 struct ipvl_addr *addr; 205 206 dev_uc_unsync(phy_dev, dev); 207 dev_mc_unsync(phy_dev, dev); 208 209 rcu_read_lock(); 210 list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) 211 ipvlan_ht_addr_del(addr); 212 rcu_read_unlock(); 213 214 return 0; 215 } 216 ipvlan_start_xmit(struct sk_buff * skb,struct net_device * dev)217 static netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb, 218 struct net_device *dev) 219 { 220 const struct ipvl_dev *ipvlan = netdev_priv(dev); 221 int skblen = skb->len; 222 int ret; 223 224 ret = ipvlan_queue_xmit(skb, dev); 225 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { 226 struct ipvl_pcpu_stats *pcptr; 227 228 pcptr = this_cpu_ptr(ipvlan->pcpu_stats); 229 230 u64_stats_update_begin(&pcptr->syncp); 231 u64_stats_inc(&pcptr->tx_pkts); 232 u64_stats_add(&pcptr->tx_bytes, skblen); 233 u64_stats_update_end(&pcptr->syncp); 234 } else { 235 this_cpu_inc(ipvlan->pcpu_stats->tx_drps); 236 } 237 return ret; 238 } 239 ipvlan_fix_features(struct net_device * dev,netdev_features_t features)240 static netdev_features_t ipvlan_fix_features(struct net_device *dev, 241 netdev_features_t features) 242 { 243 struct ipvl_dev *ipvlan = netdev_priv(dev); 244 245 features |= NETIF_F_ALL_FOR_ALL; 246 features &= (ipvlan->sfeatures | ~IPVLAN_FEATURES); 247 features = netdev_increment_features(ipvlan->phy_dev->features, 248 features, features); 249 features |= IPVLAN_ALWAYS_ON; 250 features &= (IPVLAN_FEATURES | IPVLAN_ALWAYS_ON); 251 252 return features; 253 } 254 ipvlan_change_rx_flags(struct net_device * dev,int change)255 static void ipvlan_change_rx_flags(struct net_device *dev, int change) 256 { 257 struct ipvl_dev *ipvlan = netdev_priv(dev); 258 struct net_device *phy_dev = ipvlan->phy_dev; 259 260 if (change & IFF_ALLMULTI) 261 dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1); 262 } 263 ipvlan_set_multicast_mac_filter(struct net_device * dev)264 static void ipvlan_set_multicast_mac_filter(struct net_device *dev) 265 { 266 struct ipvl_dev *ipvlan = netdev_priv(dev); 267 268 if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { 269 bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE); 270 } else { 271 struct netdev_hw_addr *ha; 272 DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE); 273 274 bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE); 275 netdev_for_each_mc_addr(ha, dev) 276 __set_bit(ipvlan_mac_hash(ha->addr), mc_filters); 277 278 /* Turn-on broadcast bit irrespective of address family, 279 * since broadcast is deferred to a work-queue, hence no 280 * impact on fast-path processing. 281 */ 282 __set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters); 283 284 bitmap_copy(ipvlan->mac_filters, mc_filters, 285 IPVLAN_MAC_FILTER_SIZE); 286 } 287 dev_uc_sync(ipvlan->phy_dev, dev); 288 dev_mc_sync(ipvlan->phy_dev, dev); 289 } 290 ipvlan_get_stats64(struct net_device * dev,struct rtnl_link_stats64 * s)291 static void ipvlan_get_stats64(struct net_device *dev, 292 struct rtnl_link_stats64 *s) 293 { 294 struct ipvl_dev *ipvlan = netdev_priv(dev); 295 296 if (ipvlan->pcpu_stats) { 297 struct ipvl_pcpu_stats *pcptr; 298 u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes; 299 u32 rx_errs = 0, tx_drps = 0; 300 u32 strt; 301 int idx; 302 303 for_each_possible_cpu(idx) { 304 pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx); 305 do { 306 strt = u64_stats_fetch_begin(&pcptr->syncp); 307 rx_pkts = u64_stats_read(&pcptr->rx_pkts); 308 rx_bytes = u64_stats_read(&pcptr->rx_bytes); 309 rx_mcast = u64_stats_read(&pcptr->rx_mcast); 310 tx_pkts = u64_stats_read(&pcptr->tx_pkts); 311 tx_bytes = u64_stats_read(&pcptr->tx_bytes); 312 } while (u64_stats_fetch_retry(&pcptr->syncp, 313 strt)); 314 315 s->rx_packets += rx_pkts; 316 s->rx_bytes += rx_bytes; 317 s->multicast += rx_mcast; 318 s->tx_packets += tx_pkts; 319 s->tx_bytes += tx_bytes; 320 321 /* u32 values are updated without syncp protection. */ 322 rx_errs += READ_ONCE(pcptr->rx_errs); 323 tx_drps += READ_ONCE(pcptr->tx_drps); 324 } 325 s->rx_errors = rx_errs; 326 s->rx_dropped = rx_errs; 327 s->tx_dropped = tx_drps; 328 } 329 s->tx_errors = DEV_STATS_READ(dev, tx_errors); 330 } 331 ipvlan_vlan_rx_add_vid(struct net_device * dev,__be16 proto,u16 vid)332 static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) 333 { 334 struct ipvl_dev *ipvlan = netdev_priv(dev); 335 struct net_device *phy_dev = ipvlan->phy_dev; 336 337 return vlan_vid_add(phy_dev, proto, vid); 338 } 339 ipvlan_vlan_rx_kill_vid(struct net_device * dev,__be16 proto,u16 vid)340 static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, 341 u16 vid) 342 { 343 struct ipvl_dev *ipvlan = netdev_priv(dev); 344 struct net_device *phy_dev = ipvlan->phy_dev; 345 346 vlan_vid_del(phy_dev, proto, vid); 347 return 0; 348 } 349 ipvlan_get_iflink(const struct net_device * dev)350 static int ipvlan_get_iflink(const struct net_device *dev) 351 { 352 struct ipvl_dev *ipvlan = netdev_priv(dev); 353 354 return READ_ONCE(ipvlan->phy_dev->ifindex); 355 } 356 357 static const struct net_device_ops ipvlan_netdev_ops = { 358 .ndo_init = ipvlan_init, 359 .ndo_uninit = ipvlan_uninit, 360 .ndo_open = ipvlan_open, 361 .ndo_stop = ipvlan_stop, 362 .ndo_start_xmit = ipvlan_start_xmit, 363 .ndo_fix_features = ipvlan_fix_features, 364 .ndo_change_rx_flags = ipvlan_change_rx_flags, 365 .ndo_set_rx_mode = ipvlan_set_multicast_mac_filter, 366 .ndo_get_stats64 = ipvlan_get_stats64, 367 .ndo_vlan_rx_add_vid = ipvlan_vlan_rx_add_vid, 368 .ndo_vlan_rx_kill_vid = ipvlan_vlan_rx_kill_vid, 369 .ndo_get_iflink = ipvlan_get_iflink, 370 }; 371 ipvlan_hard_header(struct sk_buff * skb,struct net_device * dev,unsigned short type,const void * daddr,const void * saddr,unsigned len)372 static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev, 373 unsigned short type, const void *daddr, 374 const void *saddr, unsigned len) 375 { 376 const struct ipvl_dev *ipvlan = netdev_priv(dev); 377 struct net_device *phy_dev = ipvlan->phy_dev; 378 379 /* TODO Probably use a different field than dev_addr so that the 380 * mac-address on the virtual device is portable and can be carried 381 * while the packets use the mac-addr on the physical device. 382 */ 383 return dev_hard_header(skb, phy_dev, type, daddr, 384 saddr ? : phy_dev->dev_addr, len); 385 } 386 387 static const struct header_ops ipvlan_header_ops = { 388 .create = ipvlan_hard_header, 389 .parse = eth_header_parse, 390 .cache = eth_header_cache, 391 .cache_update = eth_header_cache_update, 392 .parse_protocol = eth_header_parse_protocol, 393 }; 394 ipvlan_adjust_mtu(struct ipvl_dev * ipvlan,struct net_device * dev)395 static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) 396 { 397 ipvlan->dev->mtu = dev->mtu; 398 } 399 netif_is_ipvlan(const struct net_device * dev)400 static bool netif_is_ipvlan(const struct net_device *dev) 401 { 402 /* both ipvlan and ipvtap devices use the same netdev_ops */ 403 return dev->netdev_ops == &ipvlan_netdev_ops; 404 } 405 ipvlan_ethtool_get_link_ksettings(struct net_device * dev,struct ethtool_link_ksettings * cmd)406 static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev, 407 struct ethtool_link_ksettings *cmd) 408 { 409 const struct ipvl_dev *ipvlan = netdev_priv(dev); 410 411 return __ethtool_get_link_ksettings(ipvlan->phy_dev, cmd); 412 } 413 ipvlan_ethtool_get_drvinfo(struct net_device * dev,struct ethtool_drvinfo * drvinfo)414 static void ipvlan_ethtool_get_drvinfo(struct net_device *dev, 415 struct ethtool_drvinfo *drvinfo) 416 { 417 strscpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver)); 418 strscpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version)); 419 } 420 ipvlan_ethtool_get_msglevel(struct net_device * dev)421 static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev) 422 { 423 const struct ipvl_dev *ipvlan = netdev_priv(dev); 424 425 return ipvlan->msg_enable; 426 } 427 ipvlan_ethtool_set_msglevel(struct net_device * dev,u32 value)428 static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value) 429 { 430 struct ipvl_dev *ipvlan = netdev_priv(dev); 431 432 ipvlan->msg_enable = value; 433 } 434 435 static const struct ethtool_ops ipvlan_ethtool_ops = { 436 .get_link = ethtool_op_get_link, 437 .get_link_ksettings = ipvlan_ethtool_get_link_ksettings, 438 .get_drvinfo = ipvlan_ethtool_get_drvinfo, 439 .get_msglevel = ipvlan_ethtool_get_msglevel, 440 .set_msglevel = ipvlan_ethtool_set_msglevel, 441 }; 442 ipvlan_nl_changelink(struct net_device * dev,struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)443 static int ipvlan_nl_changelink(struct net_device *dev, 444 struct nlattr *tb[], struct nlattr *data[], 445 struct netlink_ext_ack *extack) 446 { 447 struct ipvl_dev *ipvlan = netdev_priv(dev); 448 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); 449 int err = 0; 450 451 if (!data) 452 return 0; 453 if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN)) 454 return -EPERM; 455 456 if (data[IFLA_IPVLAN_MODE]) { 457 u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); 458 459 err = ipvlan_set_port_mode(port, nmode, extack); 460 } 461 462 if (!err && data[IFLA_IPVLAN_FLAGS]) { 463 u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); 464 465 if (flags & IPVLAN_F_PRIVATE) 466 ipvlan_mark_private(port); 467 else 468 ipvlan_clear_private(port); 469 470 if (flags & IPVLAN_F_VEPA) 471 ipvlan_mark_vepa(port); 472 else 473 ipvlan_clear_vepa(port); 474 } 475 476 return err; 477 } 478 ipvlan_nl_getsize(const struct net_device * dev)479 static size_t ipvlan_nl_getsize(const struct net_device *dev) 480 { 481 return (0 482 + nla_total_size(2) /* IFLA_IPVLAN_MODE */ 483 + nla_total_size(2) /* IFLA_IPVLAN_FLAGS */ 484 ); 485 } 486 ipvlan_nl_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)487 static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[], 488 struct netlink_ext_ack *extack) 489 { 490 if (!data) 491 return 0; 492 493 if (data[IFLA_IPVLAN_MODE]) { 494 u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); 495 496 if (mode >= IPVLAN_MODE_MAX) 497 return -EINVAL; 498 } 499 if (data[IFLA_IPVLAN_FLAGS]) { 500 u16 flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); 501 502 /* Only two bits are used at this moment. */ 503 if (flags & ~(IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) 504 return -EINVAL; 505 /* Also both flags can't be active at the same time. */ 506 if ((flags & (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) == 507 (IPVLAN_F_PRIVATE | IPVLAN_F_VEPA)) 508 return -EINVAL; 509 } 510 511 return 0; 512 } 513 ipvlan_nl_fillinfo(struct sk_buff * skb,const struct net_device * dev)514 static int ipvlan_nl_fillinfo(struct sk_buff *skb, 515 const struct net_device *dev) 516 { 517 struct ipvl_dev *ipvlan = netdev_priv(dev); 518 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); 519 int ret = -EINVAL; 520 521 if (!port) 522 goto err; 523 524 ret = -EMSGSIZE; 525 if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode)) 526 goto err; 527 if (nla_put_u16(skb, IFLA_IPVLAN_FLAGS, port->flags)) 528 goto err; 529 530 return 0; 531 532 err: 533 return ret; 534 } 535 ipvlan_link_new(struct net_device * dev,struct rtnl_newlink_params * params,struct netlink_ext_ack * extack)536 int ipvlan_link_new(struct net_device *dev, struct rtnl_newlink_params *params, 537 struct netlink_ext_ack *extack) 538 { 539 struct net *link_net = rtnl_newlink_link_net(params); 540 struct ipvl_dev *ipvlan = netdev_priv(dev); 541 struct nlattr **data = params->data; 542 struct nlattr **tb = params->tb; 543 struct ipvl_port *port; 544 struct net_device *phy_dev; 545 int err; 546 u16 mode = IPVLAN_MODE_L3; 547 548 if (!tb[IFLA_LINK]) 549 return -EINVAL; 550 551 phy_dev = __dev_get_by_index(link_net, nla_get_u32(tb[IFLA_LINK])); 552 if (!phy_dev) 553 return -ENODEV; 554 555 if (netif_is_ipvlan(phy_dev)) { 556 struct ipvl_dev *tmp = netdev_priv(phy_dev); 557 558 phy_dev = tmp->phy_dev; 559 if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN)) 560 return -EPERM; 561 } else if (!netif_is_ipvlan_port(phy_dev)) { 562 /* Exit early if the underlying link is invalid or busy */ 563 if (phy_dev->type != ARPHRD_ETHER || 564 phy_dev->flags & IFF_LOOPBACK) { 565 netdev_err(phy_dev, 566 "Master is either lo or non-ether device\n"); 567 return -EINVAL; 568 } 569 570 if (netdev_is_rx_handler_busy(phy_dev)) { 571 netdev_err(phy_dev, "Device is already in use.\n"); 572 return -EBUSY; 573 } 574 } 575 576 ipvlan->phy_dev = phy_dev; 577 ipvlan->dev = dev; 578 ipvlan->sfeatures = IPVLAN_FEATURES; 579 if (!tb[IFLA_MTU]) 580 ipvlan_adjust_mtu(ipvlan, phy_dev); 581 INIT_LIST_HEAD(&ipvlan->addrs); 582 spin_lock_init(&ipvlan->addrs_lock); 583 584 /* TODO Probably put random address here to be presented to the 585 * world but keep using the physical-dev address for the outgoing 586 * packets. 587 */ 588 eth_hw_addr_set(dev, phy_dev->dev_addr); 589 590 dev->priv_flags |= IFF_NO_RX_HANDLER; 591 592 err = register_netdevice(dev); 593 if (err < 0) 594 return err; 595 596 /* ipvlan_init() would have created the port, if required */ 597 port = ipvlan_port_get_rtnl(phy_dev); 598 ipvlan->port = port; 599 600 /* If the port-id base is at the MAX value, then wrap it around and 601 * begin from 0x1 again. This may be due to a busy system where lots 602 * of slaves are getting created and deleted. 603 */ 604 if (port->dev_id_start == 0xFFFE) 605 port->dev_id_start = 0x1; 606 607 /* Since L2 address is shared among all IPvlan slaves including 608 * master, use unique 16 bit dev-ids to differentiate among them. 609 * Assign IDs between 0x1 and 0xFFFE (used by the master) to each 610 * slave link [see addrconf_ifid_eui48()]. 611 */ 612 err = ida_alloc_range(&port->ida, port->dev_id_start, 0xFFFD, 613 GFP_KERNEL); 614 if (err < 0) 615 err = ida_alloc_range(&port->ida, 0x1, port->dev_id_start - 1, 616 GFP_KERNEL); 617 if (err < 0) 618 goto unregister_netdev; 619 dev->dev_id = err; 620 621 /* Increment id-base to the next slot for the future assignment */ 622 port->dev_id_start = err + 1; 623 624 err = netdev_upper_dev_link(phy_dev, dev, extack); 625 if (err) 626 goto remove_ida; 627 628 /* Flags are per port and latest update overrides. User has 629 * to be consistent in setting it just like the mode attribute. 630 */ 631 if (data && data[IFLA_IPVLAN_FLAGS]) 632 port->flags = nla_get_u16(data[IFLA_IPVLAN_FLAGS]); 633 634 if (data && data[IFLA_IPVLAN_MODE]) 635 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]); 636 637 err = ipvlan_set_port_mode(port, mode, extack); 638 if (err) 639 goto unlink_netdev; 640 641 list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); 642 netif_stacked_transfer_operstate(phy_dev, dev); 643 return 0; 644 645 unlink_netdev: 646 netdev_upper_dev_unlink(phy_dev, dev); 647 remove_ida: 648 ida_free(&port->ida, dev->dev_id); 649 unregister_netdev: 650 unregister_netdevice(dev); 651 return err; 652 } 653 EXPORT_SYMBOL_GPL(ipvlan_link_new); 654 ipvlan_link_delete(struct net_device * dev,struct list_head * head)655 void ipvlan_link_delete(struct net_device *dev, struct list_head *head) 656 { 657 struct ipvl_dev *ipvlan = netdev_priv(dev); 658 struct ipvl_addr *addr, *next; 659 660 spin_lock_bh(&ipvlan->addrs_lock); 661 list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { 662 ipvlan_ht_addr_del(addr); 663 list_del_rcu(&addr->anode); 664 kfree_rcu(addr, rcu); 665 } 666 spin_unlock_bh(&ipvlan->addrs_lock); 667 668 ida_free(&ipvlan->port->ida, dev->dev_id); 669 list_del_rcu(&ipvlan->pnode); 670 unregister_netdevice_queue(dev, head); 671 netdev_upper_dev_unlink(ipvlan->phy_dev, dev); 672 } 673 EXPORT_SYMBOL_GPL(ipvlan_link_delete); 674 ipvlan_link_setup(struct net_device * dev)675 void ipvlan_link_setup(struct net_device *dev) 676 { 677 ether_setup(dev); 678 679 dev->max_mtu = ETH_MAX_MTU; 680 dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); 681 dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; 682 dev->netdev_ops = &ipvlan_netdev_ops; 683 dev->needs_free_netdev = true; 684 dev->header_ops = &ipvlan_header_ops; 685 dev->ethtool_ops = &ipvlan_ethtool_ops; 686 } 687 EXPORT_SYMBOL_GPL(ipvlan_link_setup); 688 689 static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] = 690 { 691 [IFLA_IPVLAN_MODE] = { .type = NLA_U16 }, 692 [IFLA_IPVLAN_FLAGS] = { .type = NLA_U16 }, 693 }; 694 ipvlan_get_link_net(const struct net_device * dev)695 static struct net *ipvlan_get_link_net(const struct net_device *dev) 696 { 697 struct ipvl_dev *ipvlan = netdev_priv(dev); 698 699 return dev_net(ipvlan->phy_dev); 700 } 701 702 static struct rtnl_link_ops ipvlan_link_ops = { 703 .kind = "ipvlan", 704 .priv_size = sizeof(struct ipvl_dev), 705 706 .setup = ipvlan_link_setup, 707 .newlink = ipvlan_link_new, 708 .dellink = ipvlan_link_delete, 709 .get_link_net = ipvlan_get_link_net, 710 }; 711 ipvlan_link_register(struct rtnl_link_ops * ops)712 int ipvlan_link_register(struct rtnl_link_ops *ops) 713 { 714 ops->get_size = ipvlan_nl_getsize; 715 ops->policy = ipvlan_nl_policy; 716 ops->validate = ipvlan_nl_validate; 717 ops->fill_info = ipvlan_nl_fillinfo; 718 ops->changelink = ipvlan_nl_changelink; 719 ops->maxtype = IFLA_IPVLAN_MAX; 720 return rtnl_link_register(ops); 721 } 722 EXPORT_SYMBOL_GPL(ipvlan_link_register); 723 ipvlan_device_event(struct notifier_block * unused,unsigned long event,void * ptr)724 static int ipvlan_device_event(struct notifier_block *unused, 725 unsigned long event, void *ptr) 726 { 727 struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); 728 struct netdev_notifier_pre_changeaddr_info *prechaddr_info; 729 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 730 struct ipvl_dev *ipvlan, *next; 731 struct ipvl_port *port; 732 LIST_HEAD(lst_kill); 733 int err; 734 735 if (!netif_is_ipvlan_port(dev)) 736 return NOTIFY_DONE; 737 738 port = ipvlan_port_get_rtnl(dev); 739 740 switch (event) { 741 case NETDEV_UP: 742 case NETDEV_DOWN: 743 case NETDEV_CHANGE: 744 list_for_each_entry(ipvlan, &port->ipvlans, pnode) 745 netif_stacked_transfer_operstate(ipvlan->phy_dev, 746 ipvlan->dev); 747 break; 748 749 case NETDEV_REGISTER: { 750 struct net *oldnet, *newnet = dev_net(dev); 751 752 oldnet = read_pnet(&port->pnet); 753 if (net_eq(newnet, oldnet)) 754 break; 755 756 write_pnet(&port->pnet, newnet); 757 758 if (port->mode == IPVLAN_MODE_L3S) 759 ipvlan_migrate_l3s_hook(oldnet, newnet); 760 break; 761 } 762 case NETDEV_UNREGISTER: 763 if (dev->reg_state != NETREG_UNREGISTERING) 764 break; 765 766 list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode) 767 ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev, 768 &lst_kill); 769 unregister_netdevice_many(&lst_kill); 770 break; 771 772 case NETDEV_FEAT_CHANGE: 773 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 774 netif_inherit_tso_max(ipvlan->dev, dev); 775 netdev_update_features(ipvlan->dev); 776 } 777 break; 778 779 case NETDEV_CHANGEMTU: 780 list_for_each_entry(ipvlan, &port->ipvlans, pnode) 781 ipvlan_adjust_mtu(ipvlan, dev); 782 break; 783 784 case NETDEV_PRE_CHANGEADDR: 785 prechaddr_info = ptr; 786 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 787 err = dev_pre_changeaddr_notify(ipvlan->dev, 788 prechaddr_info->dev_addr, 789 extack); 790 if (err) 791 return notifier_from_errno(err); 792 } 793 break; 794 795 case NETDEV_CHANGEADDR: 796 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 797 eth_hw_addr_set(ipvlan->dev, dev->dev_addr); 798 call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev); 799 } 800 break; 801 802 case NETDEV_PRE_TYPE_CHANGE: 803 /* Forbid underlying device to change its type. */ 804 return NOTIFY_BAD; 805 806 case NETDEV_NOTIFY_PEERS: 807 case NETDEV_BONDING_FAILOVER: 808 case NETDEV_RESEND_IGMP: 809 list_for_each_entry(ipvlan, &port->ipvlans, pnode) 810 call_netdevice_notifiers(event, ipvlan->dev); 811 } 812 return NOTIFY_DONE; 813 } 814 815 /* the caller must held the addrs lock */ ipvlan_add_addr(struct ipvl_dev * ipvlan,void * iaddr,bool is_v6)816 static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) 817 { 818 struct ipvl_addr *addr; 819 820 addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC); 821 if (!addr) 822 return -ENOMEM; 823 824 addr->master = ipvlan; 825 if (!is_v6) { 826 memcpy(&addr->ip4addr, iaddr, sizeof(struct in_addr)); 827 addr->atype = IPVL_IPV4; 828 #if IS_ENABLED(CONFIG_IPV6) 829 } else { 830 memcpy(&addr->ip6addr, iaddr, sizeof(struct in6_addr)); 831 addr->atype = IPVL_IPV6; 832 #endif 833 } 834 835 list_add_tail_rcu(&addr->anode, &ipvlan->addrs); 836 837 /* If the interface is not up, the address will be added to the hash 838 * list by ipvlan_open. 839 */ 840 if (netif_running(ipvlan->dev)) 841 ipvlan_ht_addr_add(ipvlan, addr); 842 843 return 0; 844 } 845 ipvlan_del_addr(struct ipvl_dev * ipvlan,void * iaddr,bool is_v6)846 static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) 847 { 848 struct ipvl_addr *addr; 849 850 spin_lock_bh(&ipvlan->addrs_lock); 851 addr = ipvlan_find_addr(ipvlan, iaddr, is_v6); 852 if (!addr) { 853 spin_unlock_bh(&ipvlan->addrs_lock); 854 return; 855 } 856 857 ipvlan_ht_addr_del(addr); 858 list_del_rcu(&addr->anode); 859 spin_unlock_bh(&ipvlan->addrs_lock); 860 kfree_rcu(addr, rcu); 861 } 862 ipvlan_is_valid_dev(const struct net_device * dev)863 static bool ipvlan_is_valid_dev(const struct net_device *dev) 864 { 865 struct ipvl_dev *ipvlan = netdev_priv(dev); 866 867 if (!netif_is_ipvlan(dev)) 868 return false; 869 870 if (!ipvlan || !ipvlan->port) 871 return false; 872 873 return true; 874 } 875 876 #if IS_ENABLED(CONFIG_IPV6) ipvlan_add_addr6(struct ipvl_dev * ipvlan,struct in6_addr * ip6_addr)877 static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) 878 { 879 int ret = -EINVAL; 880 881 spin_lock_bh(&ipvlan->addrs_lock); 882 if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) 883 netif_err(ipvlan, ifup, ipvlan->dev, 884 "Failed to add IPv6=%pI6c addr for %s intf\n", 885 ip6_addr, ipvlan->dev->name); 886 else 887 ret = ipvlan_add_addr(ipvlan, ip6_addr, true); 888 spin_unlock_bh(&ipvlan->addrs_lock); 889 return ret; 890 } 891 ipvlan_del_addr6(struct ipvl_dev * ipvlan,struct in6_addr * ip6_addr)892 static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) 893 { 894 return ipvlan_del_addr(ipvlan, ip6_addr, true); 895 } 896 ipvlan_addr6_event(struct notifier_block * unused,unsigned long event,void * ptr)897 static int ipvlan_addr6_event(struct notifier_block *unused, 898 unsigned long event, void *ptr) 899 { 900 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr; 901 struct net_device *dev = (struct net_device *)if6->idev->dev; 902 struct ipvl_dev *ipvlan = netdev_priv(dev); 903 904 if (!ipvlan_is_valid_dev(dev)) 905 return NOTIFY_DONE; 906 907 switch (event) { 908 case NETDEV_UP: 909 if (ipvlan_add_addr6(ipvlan, &if6->addr)) 910 return NOTIFY_BAD; 911 break; 912 913 case NETDEV_DOWN: 914 ipvlan_del_addr6(ipvlan, &if6->addr); 915 break; 916 } 917 918 return NOTIFY_OK; 919 } 920 ipvlan_addr6_validator_event(struct notifier_block * unused,unsigned long event,void * ptr)921 static int ipvlan_addr6_validator_event(struct notifier_block *unused, 922 unsigned long event, void *ptr) 923 { 924 struct in6_validator_info *i6vi = (struct in6_validator_info *)ptr; 925 struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev; 926 struct ipvl_dev *ipvlan = netdev_priv(dev); 927 928 if (!ipvlan_is_valid_dev(dev)) 929 return NOTIFY_DONE; 930 931 switch (event) { 932 case NETDEV_UP: 933 if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) { 934 NL_SET_ERR_MSG(i6vi->extack, 935 "Address already assigned to an ipvlan device"); 936 return notifier_from_errno(-EADDRINUSE); 937 } 938 break; 939 } 940 941 return NOTIFY_OK; 942 } 943 #endif 944 ipvlan_add_addr4(struct ipvl_dev * ipvlan,struct in_addr * ip4_addr)945 static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) 946 { 947 int ret = -EINVAL; 948 949 spin_lock_bh(&ipvlan->addrs_lock); 950 if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) 951 netif_err(ipvlan, ifup, ipvlan->dev, 952 "Failed to add IPv4=%pI4 on %s intf.\n", 953 ip4_addr, ipvlan->dev->name); 954 else 955 ret = ipvlan_add_addr(ipvlan, ip4_addr, false); 956 spin_unlock_bh(&ipvlan->addrs_lock); 957 return ret; 958 } 959 ipvlan_del_addr4(struct ipvl_dev * ipvlan,struct in_addr * ip4_addr)960 static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) 961 { 962 return ipvlan_del_addr(ipvlan, ip4_addr, false); 963 } 964 ipvlan_addr4_event(struct notifier_block * unused,unsigned long event,void * ptr)965 static int ipvlan_addr4_event(struct notifier_block *unused, 966 unsigned long event, void *ptr) 967 { 968 struct in_ifaddr *if4 = (struct in_ifaddr *)ptr; 969 struct net_device *dev = (struct net_device *)if4->ifa_dev->dev; 970 struct ipvl_dev *ipvlan = netdev_priv(dev); 971 struct in_addr ip4_addr; 972 973 if (!ipvlan_is_valid_dev(dev)) 974 return NOTIFY_DONE; 975 976 switch (event) { 977 case NETDEV_UP: 978 ip4_addr.s_addr = if4->ifa_address; 979 if (ipvlan_add_addr4(ipvlan, &ip4_addr)) 980 return NOTIFY_BAD; 981 break; 982 983 case NETDEV_DOWN: 984 ip4_addr.s_addr = if4->ifa_address; 985 ipvlan_del_addr4(ipvlan, &ip4_addr); 986 break; 987 } 988 989 return NOTIFY_OK; 990 } 991 ipvlan_addr4_validator_event(struct notifier_block * unused,unsigned long event,void * ptr)992 static int ipvlan_addr4_validator_event(struct notifier_block *unused, 993 unsigned long event, void *ptr) 994 { 995 struct in_validator_info *ivi = (struct in_validator_info *)ptr; 996 struct net_device *dev = (struct net_device *)ivi->ivi_dev->dev; 997 struct ipvl_dev *ipvlan = netdev_priv(dev); 998 999 if (!ipvlan_is_valid_dev(dev)) 1000 return NOTIFY_DONE; 1001 1002 switch (event) { 1003 case NETDEV_UP: 1004 if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) { 1005 NL_SET_ERR_MSG(ivi->extack, 1006 "Address already assigned to an ipvlan device"); 1007 return notifier_from_errno(-EADDRINUSE); 1008 } 1009 break; 1010 } 1011 1012 return NOTIFY_OK; 1013 } 1014 1015 static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = { 1016 .notifier_call = ipvlan_addr4_event, 1017 }; 1018 1019 static struct notifier_block ipvlan_addr4_vtor_notifier_block __read_mostly = { 1020 .notifier_call = ipvlan_addr4_validator_event, 1021 }; 1022 1023 static struct notifier_block ipvlan_notifier_block __read_mostly = { 1024 .notifier_call = ipvlan_device_event, 1025 }; 1026 1027 #if IS_ENABLED(CONFIG_IPV6) 1028 static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = { 1029 .notifier_call = ipvlan_addr6_event, 1030 }; 1031 1032 static struct notifier_block ipvlan_addr6_vtor_notifier_block __read_mostly = { 1033 .notifier_call = ipvlan_addr6_validator_event, 1034 }; 1035 #endif 1036 ipvlan_init_module(void)1037 static int __init ipvlan_init_module(void) 1038 { 1039 int err; 1040 1041 ipvlan_init_secret(); 1042 register_netdevice_notifier(&ipvlan_notifier_block); 1043 #if IS_ENABLED(CONFIG_IPV6) 1044 register_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1045 register_inet6addr_validator_notifier( 1046 &ipvlan_addr6_vtor_notifier_block); 1047 #endif 1048 register_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1049 register_inetaddr_validator_notifier(&ipvlan_addr4_vtor_notifier_block); 1050 1051 err = ipvlan_l3s_init(); 1052 if (err < 0) 1053 goto error; 1054 1055 err = ipvlan_link_register(&ipvlan_link_ops); 1056 if (err < 0) { 1057 ipvlan_l3s_cleanup(); 1058 goto error; 1059 } 1060 1061 return 0; 1062 error: 1063 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1064 unregister_inetaddr_validator_notifier( 1065 &ipvlan_addr4_vtor_notifier_block); 1066 #if IS_ENABLED(CONFIG_IPV6) 1067 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1068 unregister_inet6addr_validator_notifier( 1069 &ipvlan_addr6_vtor_notifier_block); 1070 #endif 1071 unregister_netdevice_notifier(&ipvlan_notifier_block); 1072 return err; 1073 } 1074 ipvlan_cleanup_module(void)1075 static void __exit ipvlan_cleanup_module(void) 1076 { 1077 rtnl_link_unregister(&ipvlan_link_ops); 1078 ipvlan_l3s_cleanup(); 1079 unregister_netdevice_notifier(&ipvlan_notifier_block); 1080 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); 1081 unregister_inetaddr_validator_notifier( 1082 &ipvlan_addr4_vtor_notifier_block); 1083 #if IS_ENABLED(CONFIG_IPV6) 1084 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block); 1085 unregister_inet6addr_validator_notifier( 1086 &ipvlan_addr6_vtor_notifier_block); 1087 #endif 1088 } 1089 1090 module_init(ipvlan_init_module); 1091 module_exit(ipvlan_cleanup_module); 1092 1093 MODULE_LICENSE("GPL"); 1094 MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>"); 1095 MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs"); 1096 MODULE_ALIAS_RTNL_LINK("ipvlan"); 1097