1 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of the GNU General Public License as 5 * published by the Free Software Foundation; either version 2 of 6 * the License, or (at your option) any later version. 7 * 8 */ 9 10 #include "ipvlan.h" 11 12 static u32 ipvlan_jhash_secret __read_mostly; 13 14 void ipvlan_init_secret(void) 15 { 16 net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret)); 17 } 18 19 void ipvlan_count_rx(const struct ipvl_dev *ipvlan, 20 unsigned int len, bool success, bool mcast) 21 { 22 if (likely(success)) { 23 struct ipvl_pcpu_stats *pcptr; 24 25 pcptr = this_cpu_ptr(ipvlan->pcpu_stats); 26 u64_stats_update_begin(&pcptr->syncp); 27 pcptr->rx_pkts++; 28 pcptr->rx_bytes += len; 29 if (mcast) 30 pcptr->rx_mcast++; 31 u64_stats_update_end(&pcptr->syncp); 32 } else { 33 this_cpu_inc(ipvlan->pcpu_stats->rx_errs); 34 } 35 } 36 EXPORT_SYMBOL_GPL(ipvlan_count_rx); 37 38 #if IS_ENABLED(CONFIG_IPV6) 39 static u8 ipvlan_get_v6_hash(const void *iaddr) 40 { 41 const struct in6_addr *ip6_addr = iaddr; 42 43 return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) & 44 IPVLAN_HASH_MASK; 45 } 46 #else 47 static u8 ipvlan_get_v6_hash(const void *iaddr) 48 { 49 return 0; 50 } 51 #endif 52 53 static u8 ipvlan_get_v4_hash(const void *iaddr) 54 { 55 const struct in_addr *ip4_addr = iaddr; 56 57 return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) & 58 IPVLAN_HASH_MASK; 59 } 60 61 static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr) 62 { 63 if (!is_v6 && addr->atype == IPVL_IPV4) { 64 struct in_addr *i4addr = (struct in_addr *)iaddr; 65 66 return addr->ip4addr.s_addr == i4addr->s_addr; 67 #if IS_ENABLED(CONFIG_IPV6) 68 } else if (is_v6 && addr->atype == IPVL_IPV6) { 69 struct in6_addr *i6addr = (struct in6_addr *)iaddr; 70 71 return ipv6_addr_equal(&addr->ip6addr, i6addr); 72 #endif 73 } 74 75 return false; 76 } 77 78 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, 79 const void *iaddr, bool is_v6) 80 { 81 struct ipvl_addr *addr; 82 u8 hash; 83 84 hash = is_v6 ? ipvlan_get_v6_hash(iaddr) : 85 ipvlan_get_v4_hash(iaddr); 86 hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) 87 if (addr_equal(is_v6, addr, iaddr)) 88 return addr; 89 return NULL; 90 } 91 92 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) 93 { 94 struct ipvl_port *port = ipvlan->port; 95 u8 hash; 96 97 hash = (addr->atype == IPVL_IPV6) ? 98 ipvlan_get_v6_hash(&addr->ip6addr) : 99 ipvlan_get_v4_hash(&addr->ip4addr); 100 if (hlist_unhashed(&addr->hlnode)) 101 hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); 102 } 103 104 void ipvlan_ht_addr_del(struct ipvl_addr *addr) 105 { 106 hlist_del_init_rcu(&addr->hlnode); 107 } 108 109 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, 110 const void *iaddr, bool is_v6) 111 { 112 struct ipvl_addr *addr; 113 114 list_for_each_entry(addr, &ipvlan->addrs, anode) 115 if (addr_equal(is_v6, addr, iaddr)) 116 return addr; 117 return NULL; 118 } 119 120 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) 121 { 122 struct ipvl_dev *ipvlan; 123 124 ASSERT_RTNL(); 125 126 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { 127 if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) 128 return true; 129 } 130 return false; 131 } 132 133 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) 134 { 135 void *lyr3h = NULL; 136 137 switch (skb->protocol) { 138 case htons(ETH_P_ARP): { 139 struct arphdr *arph; 140 141 if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev)))) 142 return NULL; 143 144 arph = arp_hdr(skb); 145 *type = IPVL_ARP; 146 lyr3h = arph; 147 break; 148 } 149 case htons(ETH_P_IP): { 150 u32 pktlen; 151 struct iphdr *ip4h; 152 153 if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h)))) 154 return NULL; 155 156 ip4h = ip_hdr(skb); 157 pktlen = ntohs(ip4h->tot_len); 158 if (ip4h->ihl < 5 || ip4h->version != 4) 159 return NULL; 160 if (skb->len < pktlen || pktlen < (ip4h->ihl * 4)) 161 return NULL; 162 163 *type = IPVL_IPV4; 164 lyr3h = ip4h; 165 break; 166 } 167 #if IS_ENABLED(CONFIG_IPV6) 168 case htons(ETH_P_IPV6): { 169 struct ipv6hdr *ip6h; 170 171 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h)))) 172 return NULL; 173 174 ip6h = ipv6_hdr(skb); 175 if (ip6h->version != 6) 176 return NULL; 177 178 *type = IPVL_IPV6; 179 lyr3h = ip6h; 180 /* Only Neighbour Solicitation pkts need different treatment */ 181 if (ipv6_addr_any(&ip6h->saddr) && 182 ip6h->nexthdr == NEXTHDR_ICMP) { 183 struct icmp6hdr *icmph; 184 185 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)))) 186 return NULL; 187 188 ip6h = ipv6_hdr(skb); 189 icmph = (struct icmp6hdr *)(ip6h + 1); 190 191 if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { 192 /* Need to access the ipv6 address in body */ 193 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph) 194 + sizeof(struct in6_addr)))) 195 return NULL; 196 197 ip6h = ipv6_hdr(skb); 198 icmph = (struct icmp6hdr *)(ip6h + 1); 199 } 200 201 *type = IPVL_ICMPV6; 202 lyr3h = icmph; 203 } 204 break; 205 } 206 #endif 207 default: 208 return NULL; 209 } 210 211 return lyr3h; 212 } 213 214 unsigned int ipvlan_mac_hash(const unsigned char *addr) 215 { 216 u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2), 217 ipvlan_jhash_secret); 218 219 return hash & IPVLAN_MAC_FILTER_MASK; 220 } 221 222 void ipvlan_process_multicast(struct work_struct *work) 223 { 224 struct ipvl_port *port = container_of(work, struct ipvl_port, wq); 225 struct ethhdr *ethh; 226 struct ipvl_dev *ipvlan; 227 struct sk_buff *skb, *nskb; 228 struct sk_buff_head list; 229 unsigned int len; 230 unsigned int mac_hash; 231 int ret; 232 u8 pkt_type; 233 bool tx_pkt; 234 235 __skb_queue_head_init(&list); 236 237 spin_lock_bh(&port->backlog.lock); 238 skb_queue_splice_tail_init(&port->backlog, &list); 239 spin_unlock_bh(&port->backlog.lock); 240 241 while ((skb = __skb_dequeue(&list)) != NULL) { 242 struct net_device *dev = skb->dev; 243 bool consumed = false; 244 245 ethh = eth_hdr(skb); 246 tx_pkt = IPVL_SKB_CB(skb)->tx_pkt; 247 mac_hash = ipvlan_mac_hash(ethh->h_dest); 248 249 if (ether_addr_equal(ethh->h_dest, port->dev->broadcast)) 250 pkt_type = PACKET_BROADCAST; 251 else 252 pkt_type = PACKET_MULTICAST; 253 254 rcu_read_lock(); 255 list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { 256 if (tx_pkt && (ipvlan->dev == skb->dev)) 257 continue; 258 if (!test_bit(mac_hash, ipvlan->mac_filters)) 259 continue; 260 if (!(ipvlan->dev->flags & IFF_UP)) 261 continue; 262 ret = NET_RX_DROP; 263 len = skb->len + ETH_HLEN; 264 nskb = skb_clone(skb, GFP_ATOMIC); 265 local_bh_disable(); 266 if (nskb) { 267 consumed = true; 268 nskb->pkt_type = pkt_type; 269 nskb->dev = ipvlan->dev; 270 if (tx_pkt) 271 ret = dev_forward_skb(ipvlan->dev, nskb); 272 else 273 ret = netif_rx(nskb); 274 } 275 ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); 276 local_bh_enable(); 277 } 278 rcu_read_unlock(); 279 280 if (tx_pkt) { 281 /* If the packet originated here, send it out. */ 282 skb->dev = port->dev; 283 skb->pkt_type = pkt_type; 284 dev_queue_xmit(skb); 285 } else { 286 if (consumed) 287 consume_skb(skb); 288 else 289 kfree_skb(skb); 290 } 291 if (dev) 292 dev_put(dev); 293 } 294 } 295 296 static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev) 297 { 298 bool xnet = true; 299 300 if (dev) 301 xnet = !net_eq(dev_net(skb->dev), dev_net(dev)); 302 303 skb_scrub_packet(skb, xnet); 304 if (dev) 305 skb->dev = dev; 306 } 307 308 static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb, 309 bool local) 310 { 311 struct ipvl_dev *ipvlan = addr->master; 312 struct net_device *dev = ipvlan->dev; 313 unsigned int len; 314 rx_handler_result_t ret = RX_HANDLER_CONSUMED; 315 bool success = false; 316 struct sk_buff *skb = *pskb; 317 318 len = skb->len + ETH_HLEN; 319 /* Only packets exchanged between two local slaves need to have 320 * device-up check as well as skb-share check. 321 */ 322 if (local) { 323 if (unlikely(!(dev->flags & IFF_UP))) { 324 kfree_skb(skb); 325 goto out; 326 } 327 328 skb = skb_share_check(skb, GFP_ATOMIC); 329 if (!skb) 330 goto out; 331 332 *pskb = skb; 333 } 334 335 if (local) { 336 skb->pkt_type = PACKET_HOST; 337 if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS) 338 success = true; 339 } else { 340 skb->dev = dev; 341 ret = RX_HANDLER_ANOTHER; 342 success = true; 343 } 344 345 out: 346 ipvlan_count_rx(ipvlan, len, success, false); 347 return ret; 348 } 349 350 static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, 351 void *lyr3h, int addr_type, 352 bool use_dest) 353 { 354 struct ipvl_addr *addr = NULL; 355 356 switch (addr_type) { 357 #if IS_ENABLED(CONFIG_IPV6) 358 case IPVL_IPV6: { 359 struct ipv6hdr *ip6h; 360 struct in6_addr *i6addr; 361 362 ip6h = (struct ipv6hdr *)lyr3h; 363 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr; 364 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 365 break; 366 } 367 case IPVL_ICMPV6: { 368 struct nd_msg *ndmh; 369 struct in6_addr *i6addr; 370 371 /* Make sure that the NeighborSolicitation ICMPv6 packets 372 * are handled to avoid DAD issue. 373 */ 374 ndmh = (struct nd_msg *)lyr3h; 375 if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) { 376 i6addr = &ndmh->target; 377 addr = ipvlan_ht_addr_lookup(port, i6addr, true); 378 } 379 break; 380 } 381 #endif 382 case IPVL_IPV4: { 383 struct iphdr *ip4h; 384 __be32 *i4addr; 385 386 ip4h = (struct iphdr *)lyr3h; 387 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr; 388 addr = ipvlan_ht_addr_lookup(port, i4addr, false); 389 break; 390 } 391 case IPVL_ARP: { 392 struct arphdr *arph; 393 unsigned char *arp_ptr; 394 __be32 dip; 395 396 arph = (struct arphdr *)lyr3h; 397 arp_ptr = (unsigned char *)(arph + 1); 398 if (use_dest) 399 arp_ptr += (2 * port->dev->addr_len) + 4; 400 else 401 arp_ptr += port->dev->addr_len; 402 403 memcpy(&dip, arp_ptr, 4); 404 addr = ipvlan_ht_addr_lookup(port, &dip, false); 405 break; 406 } 407 } 408 409 return addr; 410 } 411 412 static int ipvlan_process_v4_outbound(struct sk_buff *skb) 413 { 414 const struct iphdr *ip4h = ip_hdr(skb); 415 struct net_device *dev = skb->dev; 416 struct net *net = dev_net(dev); 417 struct rtable *rt; 418 int err, ret = NET_XMIT_DROP; 419 struct flowi4 fl4 = { 420 .flowi4_oif = dev->ifindex, 421 .flowi4_tos = RT_TOS(ip4h->tos), 422 .flowi4_flags = FLOWI_FLAG_ANYSRC, 423 .flowi4_mark = skb->mark, 424 .daddr = ip4h->daddr, 425 .saddr = ip4h->saddr, 426 }; 427 428 rt = ip_route_output_flow(net, &fl4, NULL); 429 if (IS_ERR(rt)) 430 goto err; 431 432 if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { 433 ip_rt_put(rt); 434 goto err; 435 } 436 skb_dst_set(skb, &rt->dst); 437 err = ip_local_out(net, skb->sk, skb); 438 if (unlikely(net_xmit_eval(err))) 439 dev->stats.tx_errors++; 440 else 441 ret = NET_XMIT_SUCCESS; 442 goto out; 443 err: 444 dev->stats.tx_errors++; 445 kfree_skb(skb); 446 out: 447 return ret; 448 } 449 450 #if IS_ENABLED(CONFIG_IPV6) 451 static int ipvlan_process_v6_outbound(struct sk_buff *skb) 452 { 453 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 454 struct net_device *dev = skb->dev; 455 struct net *net = dev_net(dev); 456 struct dst_entry *dst; 457 int err, ret = NET_XMIT_DROP; 458 struct flowi6 fl6 = { 459 .flowi6_oif = dev->ifindex, 460 .daddr = ip6h->daddr, 461 .saddr = ip6h->saddr, 462 .flowi6_flags = FLOWI_FLAG_ANYSRC, 463 .flowlabel = ip6_flowinfo(ip6h), 464 .flowi6_mark = skb->mark, 465 .flowi6_proto = ip6h->nexthdr, 466 }; 467 468 dst = ip6_route_output(net, NULL, &fl6); 469 if (dst->error) { 470 ret = dst->error; 471 dst_release(dst); 472 goto err; 473 } 474 skb_dst_set(skb, dst); 475 err = ip6_local_out(net, skb->sk, skb); 476 if (unlikely(net_xmit_eval(err))) 477 dev->stats.tx_errors++; 478 else 479 ret = NET_XMIT_SUCCESS; 480 goto out; 481 err: 482 dev->stats.tx_errors++; 483 kfree_skb(skb); 484 out: 485 return ret; 486 } 487 #else 488 static int ipvlan_process_v6_outbound(struct sk_buff *skb) 489 { 490 return NET_XMIT_DROP; 491 } 492 #endif 493 494 static int ipvlan_process_outbound(struct sk_buff *skb) 495 { 496 struct ethhdr *ethh = eth_hdr(skb); 497 int ret = NET_XMIT_DROP; 498 499 /* In this mode we dont care about multicast and broadcast traffic */ 500 if (is_multicast_ether_addr(ethh->h_dest)) { 501 pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", 502 ntohs(skb->protocol)); 503 kfree_skb(skb); 504 goto out; 505 } 506 507 /* The ipvlan is a pseudo-L2 device, so the packets that we receive 508 * will have L2; which need to discarded and processed further 509 * in the net-ns of the main-device. 510 */ 511 if (skb_mac_header_was_set(skb)) { 512 skb_pull(skb, sizeof(*ethh)); 513 skb->mac_header = (typeof(skb->mac_header))~0U; 514 skb_reset_network_header(skb); 515 } 516 517 if (skb->protocol == htons(ETH_P_IPV6)) 518 ret = ipvlan_process_v6_outbound(skb); 519 else if (skb->protocol == htons(ETH_P_IP)) 520 ret = ipvlan_process_v4_outbound(skb); 521 else { 522 pr_warn_ratelimited("Dropped outbound packet type=%x\n", 523 ntohs(skb->protocol)); 524 kfree_skb(skb); 525 } 526 out: 527 return ret; 528 } 529 530 static void ipvlan_multicast_enqueue(struct ipvl_port *port, 531 struct sk_buff *skb, bool tx_pkt) 532 { 533 if (skb->protocol == htons(ETH_P_PAUSE)) { 534 kfree_skb(skb); 535 return; 536 } 537 538 /* Record that the deferred packet is from TX or RX path. By 539 * looking at mac-addresses on packet will lead to erronus decisions. 540 * (This would be true for a loopback-mode on master device or a 541 * hair-pin mode of the switch.) 542 */ 543 IPVL_SKB_CB(skb)->tx_pkt = tx_pkt; 544 545 spin_lock(&port->backlog.lock); 546 if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) { 547 if (skb->dev) 548 dev_hold(skb->dev); 549 __skb_queue_tail(&port->backlog, skb); 550 spin_unlock(&port->backlog.lock); 551 schedule_work(&port->wq); 552 } else { 553 spin_unlock(&port->backlog.lock); 554 atomic_long_inc(&skb->dev->rx_dropped); 555 kfree_skb(skb); 556 } 557 } 558 559 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) 560 { 561 const struct ipvl_dev *ipvlan = netdev_priv(dev); 562 void *lyr3h; 563 struct ipvl_addr *addr; 564 int addr_type; 565 566 lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); 567 if (!lyr3h) 568 goto out; 569 570 if (!ipvlan_is_vepa(ipvlan->port)) { 571 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 572 if (addr) { 573 if (ipvlan_is_private(ipvlan->port)) { 574 consume_skb(skb); 575 return NET_XMIT_DROP; 576 } 577 return ipvlan_rcv_frame(addr, &skb, true); 578 } 579 } 580 out: 581 ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); 582 return ipvlan_process_outbound(skb); 583 } 584 585 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) 586 { 587 const struct ipvl_dev *ipvlan = netdev_priv(dev); 588 struct ethhdr *eth = eth_hdr(skb); 589 struct ipvl_addr *addr; 590 void *lyr3h; 591 int addr_type; 592 593 if (!ipvlan_is_vepa(ipvlan->port) && 594 ether_addr_equal(eth->h_dest, eth->h_source)) { 595 lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type); 596 if (lyr3h) { 597 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true); 598 if (addr) { 599 if (ipvlan_is_private(ipvlan->port)) { 600 consume_skb(skb); 601 return NET_XMIT_DROP; 602 } 603 return ipvlan_rcv_frame(addr, &skb, true); 604 } 605 } 606 skb = skb_share_check(skb, GFP_ATOMIC); 607 if (!skb) 608 return NET_XMIT_DROP; 609 610 /* Packet definitely does not belong to any of the 611 * virtual devices, but the dest is local. So forward 612 * the skb for the main-dev. At the RX side we just return 613 * RX_PASS for it to be processed further on the stack. 614 */ 615 return dev_forward_skb(ipvlan->phy_dev, skb); 616 617 } else if (is_multicast_ether_addr(eth->h_dest)) { 618 ipvlan_skb_crossing_ns(skb, NULL); 619 ipvlan_multicast_enqueue(ipvlan->port, skb, true); 620 return NET_XMIT_SUCCESS; 621 } 622 623 skb->dev = ipvlan->phy_dev; 624 return dev_queue_xmit(skb); 625 } 626 627 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) 628 { 629 struct ipvl_dev *ipvlan = netdev_priv(dev); 630 struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev); 631 632 if (!port) 633 goto out; 634 635 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) 636 goto out; 637 638 switch(port->mode) { 639 case IPVLAN_MODE_L2: 640 return ipvlan_xmit_mode_l2(skb, dev); 641 case IPVLAN_MODE_L3: 642 case IPVLAN_MODE_L3S: 643 return ipvlan_xmit_mode_l3(skb, dev); 644 } 645 646 /* Should not reach here */ 647 WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n", 648 port->mode); 649 out: 650 kfree_skb(skb); 651 return NET_XMIT_DROP; 652 } 653 654 static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port) 655 { 656 struct ethhdr *eth = eth_hdr(skb); 657 struct ipvl_addr *addr; 658 void *lyr3h; 659 int addr_type; 660 661 if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) { 662 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 663 if (!lyr3h) 664 return true; 665 666 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false); 667 if (addr) 668 return false; 669 } 670 671 return true; 672 } 673 674 static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb, 675 struct ipvl_port *port) 676 { 677 void *lyr3h; 678 int addr_type; 679 struct ipvl_addr *addr; 680 struct sk_buff *skb = *pskb; 681 rx_handler_result_t ret = RX_HANDLER_PASS; 682 683 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 684 if (!lyr3h) 685 goto out; 686 687 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 688 if (addr) 689 ret = ipvlan_rcv_frame(addr, pskb, false); 690 691 out: 692 return ret; 693 } 694 695 static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb, 696 struct ipvl_port *port) 697 { 698 struct sk_buff *skb = *pskb; 699 struct ethhdr *eth = eth_hdr(skb); 700 rx_handler_result_t ret = RX_HANDLER_PASS; 701 702 if (is_multicast_ether_addr(eth->h_dest)) { 703 if (ipvlan_external_frame(skb, port)) { 704 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 705 706 /* External frames are queued for device local 707 * distribution, but a copy is given to master 708 * straight away to avoid sending duplicates later 709 * when work-queue processes this frame. This is 710 * achieved by returning RX_HANDLER_PASS. 711 */ 712 if (nskb) { 713 ipvlan_skb_crossing_ns(nskb, NULL); 714 ipvlan_multicast_enqueue(port, nskb, false); 715 } 716 } 717 } else { 718 /* Perform like l3 mode for non-multicast packet */ 719 ret = ipvlan_handle_mode_l3(pskb, port); 720 } 721 722 return ret; 723 } 724 725 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) 726 { 727 struct sk_buff *skb = *pskb; 728 struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev); 729 730 if (!port) 731 return RX_HANDLER_PASS; 732 733 switch (port->mode) { 734 case IPVLAN_MODE_L2: 735 return ipvlan_handle_mode_l2(pskb, port); 736 case IPVLAN_MODE_L3: 737 return ipvlan_handle_mode_l3(pskb, port); 738 case IPVLAN_MODE_L3S: 739 return RX_HANDLER_PASS; 740 } 741 742 /* Should not reach here */ 743 WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n", 744 port->mode); 745 kfree_skb(skb); 746 return RX_HANDLER_CONSUMED; 747 } 748 749 static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, 750 struct net_device *dev) 751 { 752 struct ipvl_addr *addr = NULL; 753 struct ipvl_port *port; 754 void *lyr3h; 755 int addr_type; 756 757 if (!dev || !netif_is_ipvlan_port(dev)) 758 goto out; 759 760 port = ipvlan_port_get_rcu(dev); 761 if (!port || port->mode != IPVLAN_MODE_L3S) 762 goto out; 763 764 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); 765 if (!lyr3h) 766 goto out; 767 768 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); 769 out: 770 return addr; 771 } 772 773 struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, 774 u16 proto) 775 { 776 struct ipvl_addr *addr; 777 struct net_device *sdev; 778 779 addr = ipvlan_skb_to_addr(skb, dev); 780 if (!addr) 781 goto out; 782 783 sdev = addr->master->dev; 784 switch (proto) { 785 case AF_INET: 786 { 787 int err; 788 struct iphdr *ip4h = ip_hdr(skb); 789 790 err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, 791 ip4h->tos, sdev); 792 if (unlikely(err)) 793 goto out; 794 break; 795 } 796 #if IS_ENABLED(CONFIG_IPV6) 797 case AF_INET6: 798 { 799 struct dst_entry *dst; 800 struct ipv6hdr *ip6h = ipv6_hdr(skb); 801 int flags = RT6_LOOKUP_F_HAS_SADDR; 802 struct flowi6 fl6 = { 803 .flowi6_iif = sdev->ifindex, 804 .daddr = ip6h->daddr, 805 .saddr = ip6h->saddr, 806 .flowlabel = ip6_flowinfo(ip6h), 807 .flowi6_mark = skb->mark, 808 .flowi6_proto = ip6h->nexthdr, 809 }; 810 811 skb_dst_drop(skb); 812 dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); 813 skb_dst_set(skb, dst); 814 break; 815 } 816 #endif 817 default: 818 break; 819 } 820 821 out: 822 return skb; 823 } 824 825 unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, 826 const struct nf_hook_state *state) 827 { 828 struct ipvl_addr *addr; 829 unsigned int len; 830 831 addr = ipvlan_skb_to_addr(skb, skb->dev); 832 if (!addr) 833 goto out; 834 835 skb->dev = addr->master->dev; 836 len = skb->len + ETH_HLEN; 837 ipvlan_count_rx(addr->master, len, true, false); 838 out: 839 return NF_ACCEPT; 840 } 841