1 /* 2 * Common framework for low-level network console, dump, and debugger code 3 * 4 * Sep 8 2003 Matt Mackall <mpm@selenic.com> 5 * 6 * based on the netconsole code from: 7 * 8 * Copyright (C) 2001 Ingo Molnar <mingo@redhat.com> 9 * Copyright (C) 2002 Red Hat, Inc. 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/moduleparam.h> 15 #include <linux/kernel.h> 16 #include <linux/netdevice.h> 17 #include <linux/etherdevice.h> 18 #include <linux/string.h> 19 #include <linux/if_arp.h> 20 #include <linux/inetdevice.h> 21 #include <linux/inet.h> 22 #include <linux/interrupt.h> 23 #include <linux/netpoll.h> 24 #include <linux/sched.h> 25 #include <linux/delay.h> 26 #include <linux/rcupdate.h> 27 #include <linux/workqueue.h> 28 #include <linux/slab.h> 29 #include <linux/export.h> 30 #include <linux/if_vlan.h> 31 #include <net/tcp.h> 32 #include <net/udp.h> 33 #include <net/addrconf.h> 34 #include <net/ndisc.h> 35 #include <net/ip6_checksum.h> 36 #include <asm/unaligned.h> 37 #include <trace/events/napi.h> 38 39 /* 40 * We maintain a small pool of fully-sized skbs, to make sure the 41 * message gets out even in extreme OOM situations. 42 */ 43 44 #define MAX_UDP_CHUNK 1460 45 #define MAX_SKBS 32 46 47 static struct sk_buff_head skb_pool; 48 49 static atomic_t trapped; 50 51 DEFINE_STATIC_SRCU(netpoll_srcu); 52 53 #define USEC_PER_POLL 50 54 #define NETPOLL_RX_ENABLED 1 55 #define NETPOLL_RX_DROP 2 56 57 #define MAX_SKB_SIZE \ 58 (sizeof(struct ethhdr) + \ 59 sizeof(struct iphdr) + \ 60 sizeof(struct udphdr) + \ 61 MAX_UDP_CHUNK) 62 63 static void zap_completion_queue(void); 64 static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); 65 static void netpoll_async_cleanup(struct work_struct *work); 66 67 static unsigned int carrier_timeout = 4; 68 module_param(carrier_timeout, uint, 0644); 69 70 #define np_info(np, fmt, ...) \ 71 pr_info("%s: " fmt, np->name, ##__VA_ARGS__) 72 #define np_err(np, fmt, ...) \ 73 pr_err("%s: " fmt, np->name, ##__VA_ARGS__) 74 #define np_notice(np, fmt, ...) \ 75 pr_notice("%s: " fmt, np->name, ##__VA_ARGS__) 76 77 static void queue_process(struct work_struct *work) 78 { 79 struct netpoll_info *npinfo = 80 container_of(work, struct netpoll_info, tx_work.work); 81 struct sk_buff *skb; 82 unsigned long flags; 83 84 while ((skb = skb_dequeue(&npinfo->txq))) { 85 struct net_device *dev = skb->dev; 86 const struct net_device_ops *ops = dev->netdev_ops; 87 struct netdev_queue *txq; 88 89 if (!netif_device_present(dev) || !netif_running(dev)) { 90 __kfree_skb(skb); 91 continue; 92 } 93 94 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); 95 96 local_irq_save(flags); 97 __netif_tx_lock(txq, smp_processor_id()); 98 if (netif_xmit_frozen_or_stopped(txq) || 99 ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { 100 skb_queue_head(&npinfo->txq, skb); 101 __netif_tx_unlock(txq); 102 local_irq_restore(flags); 103 104 schedule_delayed_work(&npinfo->tx_work, HZ/10); 105 return; 106 } 107 __netif_tx_unlock(txq); 108 local_irq_restore(flags); 109 } 110 } 111 112 static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, 113 unsigned short ulen, __be32 saddr, __be32 daddr) 114 { 115 __wsum psum; 116 117 if (uh->check == 0 || skb_csum_unnecessary(skb)) 118 return 0; 119 120 psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); 121 122 if (skb->ip_summed == CHECKSUM_COMPLETE && 123 !csum_fold(csum_add(psum, skb->csum))) 124 return 0; 125 126 skb->csum = psum; 127 128 return __skb_checksum_complete(skb); 129 } 130 131 /* 132 * Check whether delayed processing was scheduled for our NIC. If so, 133 * we attempt to grab the poll lock and use ->poll() to pump the card. 134 * If this fails, either we've recursed in ->poll() or it's already 135 * running on another CPU. 136 * 137 * Note: we don't mask interrupts with this lock because we're using 138 * trylock here and interrupts are already disabled in the softirq 139 * case. Further, we test the poll_owner to avoid recursion on UP 140 * systems where the lock doesn't exist. 141 * 142 * In cases where there is bi-directional communications, reading only 143 * one message at a time can lead to packets being dropped by the 144 * network adapter, forcing superfluous retries and possibly timeouts. 145 * Thus, we set our budget to greater than 1. 146 */ 147 static int poll_one_napi(struct netpoll_info *npinfo, 148 struct napi_struct *napi, int budget) 149 { 150 int work; 151 152 /* net_rx_action's ->poll() invocations and our's are 153 * synchronized by this test which is only made while 154 * holding the napi->poll_lock. 155 */ 156 if (!test_bit(NAPI_STATE_SCHED, &napi->state)) 157 return budget; 158 159 npinfo->rx_flags |= NETPOLL_RX_DROP; 160 atomic_inc(&trapped); 161 set_bit(NAPI_STATE_NPSVC, &napi->state); 162 163 work = napi->poll(napi, budget); 164 trace_napi_poll(napi); 165 166 clear_bit(NAPI_STATE_NPSVC, &napi->state); 167 atomic_dec(&trapped); 168 npinfo->rx_flags &= ~NETPOLL_RX_DROP; 169 170 return budget - work; 171 } 172 173 static void poll_napi(struct net_device *dev) 174 { 175 struct napi_struct *napi; 176 int budget = 16; 177 178 list_for_each_entry(napi, &dev->napi_list, dev_list) { 179 if (napi->poll_owner != smp_processor_id() && 180 spin_trylock(&napi->poll_lock)) { 181 budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), 182 napi, budget); 183 spin_unlock(&napi->poll_lock); 184 185 if (!budget) 186 break; 187 } 188 } 189 } 190 191 static void service_neigh_queue(struct netpoll_info *npi) 192 { 193 if (npi) { 194 struct sk_buff *skb; 195 196 while ((skb = skb_dequeue(&npi->neigh_tx))) 197 netpoll_neigh_reply(skb, npi); 198 } 199 } 200 201 static void netpoll_poll_dev(struct net_device *dev) 202 { 203 const struct net_device_ops *ops; 204 struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); 205 206 /* Don't do any rx activity if the dev_lock mutex is held 207 * the dev_open/close paths use this to block netpoll activity 208 * while changing device state 209 */ 210 if (down_trylock(&ni->dev_lock)) 211 return; 212 213 if (!netif_running(dev)) { 214 up(&ni->dev_lock); 215 return; 216 } 217 218 ops = dev->netdev_ops; 219 if (!ops->ndo_poll_controller) { 220 up(&ni->dev_lock); 221 return; 222 } 223 224 /* Process pending work on NIC */ 225 ops->ndo_poll_controller(dev); 226 227 poll_napi(dev); 228 229 up(&ni->dev_lock); 230 231 if (dev->flags & IFF_SLAVE) { 232 if (ni) { 233 struct net_device *bond_dev; 234 struct sk_buff *skb; 235 struct netpoll_info *bond_ni; 236 237 bond_dev = netdev_master_upper_dev_get_rcu(dev); 238 bond_ni = rcu_dereference_bh(bond_dev->npinfo); 239 while ((skb = skb_dequeue(&ni->neigh_tx))) { 240 skb->dev = bond_dev; 241 skb_queue_tail(&bond_ni->neigh_tx, skb); 242 } 243 } 244 } 245 246 service_neigh_queue(ni); 247 248 zap_completion_queue(); 249 } 250 251 int netpoll_rx_disable(struct net_device *dev) 252 { 253 struct netpoll_info *ni; 254 int idx; 255 might_sleep(); 256 idx = srcu_read_lock(&netpoll_srcu); 257 ni = srcu_dereference(dev->npinfo, &netpoll_srcu); 258 if (ni) 259 down(&ni->dev_lock); 260 srcu_read_unlock(&netpoll_srcu, idx); 261 return 0; 262 } 263 EXPORT_SYMBOL(netpoll_rx_disable); 264 265 void netpoll_rx_enable(struct net_device *dev) 266 { 267 struct netpoll_info *ni; 268 rcu_read_lock(); 269 ni = rcu_dereference(dev->npinfo); 270 if (ni) 271 up(&ni->dev_lock); 272 rcu_read_unlock(); 273 } 274 EXPORT_SYMBOL(netpoll_rx_enable); 275 276 static void refill_skbs(void) 277 { 278 struct sk_buff *skb; 279 unsigned long flags; 280 281 spin_lock_irqsave(&skb_pool.lock, flags); 282 while (skb_pool.qlen < MAX_SKBS) { 283 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); 284 if (!skb) 285 break; 286 287 __skb_queue_tail(&skb_pool, skb); 288 } 289 spin_unlock_irqrestore(&skb_pool.lock, flags); 290 } 291 292 static void zap_completion_queue(void) 293 { 294 unsigned long flags; 295 struct softnet_data *sd = &get_cpu_var(softnet_data); 296 297 if (sd->completion_queue) { 298 struct sk_buff *clist; 299 300 local_irq_save(flags); 301 clist = sd->completion_queue; 302 sd->completion_queue = NULL; 303 local_irq_restore(flags); 304 305 while (clist != NULL) { 306 struct sk_buff *skb = clist; 307 clist = clist->next; 308 if (skb->destructor) { 309 atomic_inc(&skb->users); 310 dev_kfree_skb_any(skb); /* put this one back */ 311 } else { 312 __kfree_skb(skb); 313 } 314 } 315 } 316 317 put_cpu_var(softnet_data); 318 } 319 320 static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) 321 { 322 int count = 0; 323 struct sk_buff *skb; 324 325 zap_completion_queue(); 326 refill_skbs(); 327 repeat: 328 329 skb = alloc_skb(len, GFP_ATOMIC); 330 if (!skb) 331 skb = skb_dequeue(&skb_pool); 332 333 if (!skb) { 334 if (++count < 10) { 335 netpoll_poll_dev(np->dev); 336 goto repeat; 337 } 338 return NULL; 339 } 340 341 atomic_set(&skb->users, 1); 342 skb_reserve(skb, reserve); 343 return skb; 344 } 345 346 static int netpoll_owner_active(struct net_device *dev) 347 { 348 struct napi_struct *napi; 349 350 list_for_each_entry(napi, &dev->napi_list, dev_list) { 351 if (napi->poll_owner == smp_processor_id()) 352 return 1; 353 } 354 return 0; 355 } 356 357 /* call with IRQ disabled */ 358 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, 359 struct net_device *dev) 360 { 361 int status = NETDEV_TX_BUSY; 362 unsigned long tries; 363 const struct net_device_ops *ops = dev->netdev_ops; 364 /* It is up to the caller to keep npinfo alive. */ 365 struct netpoll_info *npinfo; 366 367 WARN_ON_ONCE(!irqs_disabled()); 368 369 npinfo = rcu_dereference_bh(np->dev->npinfo); 370 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { 371 __kfree_skb(skb); 372 return; 373 } 374 375 /* don't get messages out of order, and no recursion */ 376 if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { 377 struct netdev_queue *txq; 378 379 txq = netdev_pick_tx(dev, skb); 380 381 /* try until next clock tick */ 382 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; 383 tries > 0; --tries) { 384 if (__netif_tx_trylock(txq)) { 385 if (!netif_xmit_stopped(txq)) { 386 if (vlan_tx_tag_present(skb) && 387 !vlan_hw_offload_capable(netif_skb_features(skb), 388 skb->vlan_proto)) { 389 skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); 390 if (unlikely(!skb)) 391 break; 392 skb->vlan_tci = 0; 393 } 394 395 status = ops->ndo_start_xmit(skb, dev); 396 if (status == NETDEV_TX_OK) 397 txq_trans_update(txq); 398 } 399 __netif_tx_unlock(txq); 400 401 if (status == NETDEV_TX_OK) 402 break; 403 404 } 405 406 /* tickle device maybe there is some cleanup */ 407 netpoll_poll_dev(np->dev); 408 409 udelay(USEC_PER_POLL); 410 } 411 412 WARN_ONCE(!irqs_disabled(), 413 "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n", 414 dev->name, ops->ndo_start_xmit); 415 416 } 417 418 if (status != NETDEV_TX_OK) { 419 skb_queue_tail(&npinfo->txq, skb); 420 schedule_delayed_work(&npinfo->tx_work,0); 421 } 422 } 423 EXPORT_SYMBOL(netpoll_send_skb_on_dev); 424 425 void netpoll_send_udp(struct netpoll *np, const char *msg, int len) 426 { 427 int total_len, ip_len, udp_len; 428 struct sk_buff *skb; 429 struct udphdr *udph; 430 struct iphdr *iph; 431 struct ethhdr *eth; 432 static atomic_t ip_ident; 433 struct ipv6hdr *ip6h; 434 435 udp_len = len + sizeof(*udph); 436 if (np->ipv6) 437 ip_len = udp_len + sizeof(*ip6h); 438 else 439 ip_len = udp_len + sizeof(*iph); 440 441 total_len = ip_len + LL_RESERVED_SPACE(np->dev); 442 443 skb = find_skb(np, total_len + np->dev->needed_tailroom, 444 total_len - len); 445 if (!skb) 446 return; 447 448 skb_copy_to_linear_data(skb, msg, len); 449 skb_put(skb, len); 450 451 skb_push(skb, sizeof(*udph)); 452 skb_reset_transport_header(skb); 453 udph = udp_hdr(skb); 454 udph->source = htons(np->local_port); 455 udph->dest = htons(np->remote_port); 456 udph->len = htons(udp_len); 457 458 if (np->ipv6) { 459 udph->check = 0; 460 udph->check = csum_ipv6_magic(&np->local_ip.in6, 461 &np->remote_ip.in6, 462 udp_len, IPPROTO_UDP, 463 csum_partial(udph, udp_len, 0)); 464 if (udph->check == 0) 465 udph->check = CSUM_MANGLED_0; 466 467 skb_push(skb, sizeof(*ip6h)); 468 skb_reset_network_header(skb); 469 ip6h = ipv6_hdr(skb); 470 471 /* ip6h->version = 6; ip6h->priority = 0; */ 472 put_unaligned(0x60, (unsigned char *)ip6h); 473 ip6h->flow_lbl[0] = 0; 474 ip6h->flow_lbl[1] = 0; 475 ip6h->flow_lbl[2] = 0; 476 477 ip6h->payload_len = htons(sizeof(struct udphdr) + len); 478 ip6h->nexthdr = IPPROTO_UDP; 479 ip6h->hop_limit = 32; 480 ip6h->saddr = np->local_ip.in6; 481 ip6h->daddr = np->remote_ip.in6; 482 483 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); 484 skb_reset_mac_header(skb); 485 skb->protocol = eth->h_proto = htons(ETH_P_IPV6); 486 } else { 487 udph->check = 0; 488 udph->check = csum_tcpudp_magic(np->local_ip.ip, 489 np->remote_ip.ip, 490 udp_len, IPPROTO_UDP, 491 csum_partial(udph, udp_len, 0)); 492 if (udph->check == 0) 493 udph->check = CSUM_MANGLED_0; 494 495 skb_push(skb, sizeof(*iph)); 496 skb_reset_network_header(skb); 497 iph = ip_hdr(skb); 498 499 /* iph->version = 4; iph->ihl = 5; */ 500 put_unaligned(0x45, (unsigned char *)iph); 501 iph->tos = 0; 502 put_unaligned(htons(ip_len), &(iph->tot_len)); 503 iph->id = htons(atomic_inc_return(&ip_ident)); 504 iph->frag_off = 0; 505 iph->ttl = 64; 506 iph->protocol = IPPROTO_UDP; 507 iph->check = 0; 508 put_unaligned(np->local_ip.ip, &(iph->saddr)); 509 put_unaligned(np->remote_ip.ip, &(iph->daddr)); 510 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 511 512 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); 513 skb_reset_mac_header(skb); 514 skb->protocol = eth->h_proto = htons(ETH_P_IP); 515 } 516 517 memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); 518 memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); 519 520 skb->dev = np->dev; 521 522 netpoll_send_skb(np, skb); 523 } 524 EXPORT_SYMBOL(netpoll_send_udp); 525 526 static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) 527 { 528 int size, type = ARPOP_REPLY; 529 __be32 sip, tip; 530 unsigned char *sha; 531 struct sk_buff *send_skb; 532 struct netpoll *np, *tmp; 533 unsigned long flags; 534 int hlen, tlen; 535 int hits = 0, proto; 536 537 if (list_empty(&npinfo->rx_np)) 538 return; 539 540 /* Before checking the packet, we do some early 541 inspection whether this is interesting at all */ 542 spin_lock_irqsave(&npinfo->rx_lock, flags); 543 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { 544 if (np->dev == skb->dev) 545 hits++; 546 } 547 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 548 549 /* No netpoll struct is using this dev */ 550 if (!hits) 551 return; 552 553 proto = ntohs(eth_hdr(skb)->h_proto); 554 if (proto == ETH_P_IP) { 555 struct arphdr *arp; 556 unsigned char *arp_ptr; 557 /* No arp on this interface */ 558 if (skb->dev->flags & IFF_NOARP) 559 return; 560 561 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) 562 return; 563 564 skb_reset_network_header(skb); 565 skb_reset_transport_header(skb); 566 arp = arp_hdr(skb); 567 568 if ((arp->ar_hrd != htons(ARPHRD_ETHER) && 569 arp->ar_hrd != htons(ARPHRD_IEEE802)) || 570 arp->ar_pro != htons(ETH_P_IP) || 571 arp->ar_op != htons(ARPOP_REQUEST)) 572 return; 573 574 arp_ptr = (unsigned char *)(arp+1); 575 /* save the location of the src hw addr */ 576 sha = arp_ptr; 577 arp_ptr += skb->dev->addr_len; 578 memcpy(&sip, arp_ptr, 4); 579 arp_ptr += 4; 580 /* If we actually cared about dst hw addr, 581 it would get copied here */ 582 arp_ptr += skb->dev->addr_len; 583 memcpy(&tip, arp_ptr, 4); 584 585 /* Should we ignore arp? */ 586 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) 587 return; 588 589 size = arp_hdr_len(skb->dev); 590 591 spin_lock_irqsave(&npinfo->rx_lock, flags); 592 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { 593 if (tip != np->local_ip.ip) 594 continue; 595 596 hlen = LL_RESERVED_SPACE(np->dev); 597 tlen = np->dev->needed_tailroom; 598 send_skb = find_skb(np, size + hlen + tlen, hlen); 599 if (!send_skb) 600 continue; 601 602 skb_reset_network_header(send_skb); 603 arp = (struct arphdr *) skb_put(send_skb, size); 604 send_skb->dev = skb->dev; 605 send_skb->protocol = htons(ETH_P_ARP); 606 607 /* Fill the device header for the ARP frame */ 608 if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP, 609 sha, np->dev->dev_addr, 610 send_skb->len) < 0) { 611 kfree_skb(send_skb); 612 continue; 613 } 614 615 /* 616 * Fill out the arp protocol part. 617 * 618 * we only support ethernet device type, 619 * which (according to RFC 1390) should 620 * always equal 1 (Ethernet). 621 */ 622 623 arp->ar_hrd = htons(np->dev->type); 624 arp->ar_pro = htons(ETH_P_IP); 625 arp->ar_hln = np->dev->addr_len; 626 arp->ar_pln = 4; 627 arp->ar_op = htons(type); 628 629 arp_ptr = (unsigned char *)(arp + 1); 630 memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); 631 arp_ptr += np->dev->addr_len; 632 memcpy(arp_ptr, &tip, 4); 633 arp_ptr += 4; 634 memcpy(arp_ptr, sha, np->dev->addr_len); 635 arp_ptr += np->dev->addr_len; 636 memcpy(arp_ptr, &sip, 4); 637 638 netpoll_send_skb(np, send_skb); 639 640 /* If there are several rx_hooks for the same address, 641 we're fine by sending a single reply */ 642 break; 643 } 644 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 645 } else if( proto == ETH_P_IPV6) { 646 #if IS_ENABLED(CONFIG_IPV6) 647 struct nd_msg *msg; 648 u8 *lladdr = NULL; 649 struct ipv6hdr *hdr; 650 struct icmp6hdr *icmp6h; 651 const struct in6_addr *saddr; 652 const struct in6_addr *daddr; 653 struct inet6_dev *in6_dev = NULL; 654 struct in6_addr *target; 655 656 in6_dev = in6_dev_get(skb->dev); 657 if (!in6_dev || !in6_dev->cnf.accept_ra) 658 return; 659 660 if (!pskb_may_pull(skb, skb->len)) 661 return; 662 663 msg = (struct nd_msg *)skb_transport_header(skb); 664 665 __skb_push(skb, skb->data - skb_transport_header(skb)); 666 667 if (ipv6_hdr(skb)->hop_limit != 255) 668 return; 669 if (msg->icmph.icmp6_code != 0) 670 return; 671 if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) 672 return; 673 674 saddr = &ipv6_hdr(skb)->saddr; 675 daddr = &ipv6_hdr(skb)->daddr; 676 677 size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); 678 679 spin_lock_irqsave(&npinfo->rx_lock, flags); 680 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { 681 if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) 682 continue; 683 684 hlen = LL_RESERVED_SPACE(np->dev); 685 tlen = np->dev->needed_tailroom; 686 send_skb = find_skb(np, size + hlen + tlen, hlen); 687 if (!send_skb) 688 continue; 689 690 send_skb->protocol = htons(ETH_P_IPV6); 691 send_skb->dev = skb->dev; 692 693 skb_reset_network_header(send_skb); 694 skb_put(send_skb, sizeof(struct ipv6hdr)); 695 hdr = ipv6_hdr(send_skb); 696 697 *(__be32*)hdr = htonl(0x60000000); 698 699 hdr->payload_len = htons(size); 700 hdr->nexthdr = IPPROTO_ICMPV6; 701 hdr->hop_limit = 255; 702 hdr->saddr = *saddr; 703 hdr->daddr = *daddr; 704 705 send_skb->transport_header = send_skb->tail; 706 skb_put(send_skb, size); 707 708 icmp6h = (struct icmp6hdr *)skb_transport_header(skb); 709 icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; 710 icmp6h->icmp6_router = 0; 711 icmp6h->icmp6_solicited = 1; 712 target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); 713 *target = msg->target; 714 icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, 715 IPPROTO_ICMPV6, 716 csum_partial(icmp6h, 717 size, 0)); 718 719 if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6, 720 lladdr, np->dev->dev_addr, 721 send_skb->len) < 0) { 722 kfree_skb(send_skb); 723 continue; 724 } 725 726 netpoll_send_skb(np, send_skb); 727 728 /* If there are several rx_hooks for the same address, 729 we're fine by sending a single reply */ 730 break; 731 } 732 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 733 #endif 734 } 735 } 736 737 static bool pkt_is_ns(struct sk_buff *skb) 738 { 739 struct nd_msg *msg; 740 struct ipv6hdr *hdr; 741 742 if (skb->protocol != htons(ETH_P_ARP)) 743 return false; 744 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) 745 return false; 746 747 msg = (struct nd_msg *)skb_transport_header(skb); 748 __skb_push(skb, skb->data - skb_transport_header(skb)); 749 hdr = ipv6_hdr(skb); 750 751 if (hdr->nexthdr != IPPROTO_ICMPV6) 752 return false; 753 if (hdr->hop_limit != 255) 754 return false; 755 if (msg->icmph.icmp6_code != 0) 756 return false; 757 if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) 758 return false; 759 760 return true; 761 } 762 763 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) 764 { 765 int proto, len, ulen; 766 int hits = 0; 767 const struct iphdr *iph; 768 struct udphdr *uh; 769 struct netpoll *np, *tmp; 770 771 if (list_empty(&npinfo->rx_np)) 772 goto out; 773 774 if (skb->dev->type != ARPHRD_ETHER) 775 goto out; 776 777 /* check if netpoll clients need ARP */ 778 if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { 779 skb_queue_tail(&npinfo->neigh_tx, skb); 780 return 1; 781 } else if (pkt_is_ns(skb) && atomic_read(&trapped)) { 782 skb_queue_tail(&npinfo->neigh_tx, skb); 783 return 1; 784 } 785 786 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { 787 skb = vlan_untag(skb); 788 if (unlikely(!skb)) 789 goto out; 790 } 791 792 proto = ntohs(eth_hdr(skb)->h_proto); 793 if (proto != ETH_P_IP && proto != ETH_P_IPV6) 794 goto out; 795 if (skb->pkt_type == PACKET_OTHERHOST) 796 goto out; 797 if (skb_shared(skb)) 798 goto out; 799 800 if (proto == ETH_P_IP) { 801 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 802 goto out; 803 iph = (struct iphdr *)skb->data; 804 if (iph->ihl < 5 || iph->version != 4) 805 goto out; 806 if (!pskb_may_pull(skb, iph->ihl*4)) 807 goto out; 808 iph = (struct iphdr *)skb->data; 809 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) 810 goto out; 811 812 len = ntohs(iph->tot_len); 813 if (skb->len < len || len < iph->ihl*4) 814 goto out; 815 816 /* 817 * Our transport medium may have padded the buffer out. 818 * Now We trim to the true length of the frame. 819 */ 820 if (pskb_trim_rcsum(skb, len)) 821 goto out; 822 823 iph = (struct iphdr *)skb->data; 824 if (iph->protocol != IPPROTO_UDP) 825 goto out; 826 827 len -= iph->ihl*4; 828 uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); 829 ulen = ntohs(uh->len); 830 831 if (ulen != len) 832 goto out; 833 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) 834 goto out; 835 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { 836 if (np->local_ip.ip && np->local_ip.ip != iph->daddr) 837 continue; 838 if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr) 839 continue; 840 if (np->local_port && np->local_port != ntohs(uh->dest)) 841 continue; 842 843 np->rx_hook(np, ntohs(uh->source), 844 (char *)(uh+1), 845 ulen - sizeof(struct udphdr)); 846 hits++; 847 } 848 } else { 849 #if IS_ENABLED(CONFIG_IPV6) 850 const struct ipv6hdr *ip6h; 851 852 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 853 goto out; 854 ip6h = (struct ipv6hdr *)skb->data; 855 if (ip6h->version != 6) 856 goto out; 857 len = ntohs(ip6h->payload_len); 858 if (!len) 859 goto out; 860 if (len + sizeof(struct ipv6hdr) > skb->len) 861 goto out; 862 if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr))) 863 goto out; 864 ip6h = ipv6_hdr(skb); 865 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 866 goto out; 867 uh = udp_hdr(skb); 868 ulen = ntohs(uh->len); 869 if (ulen != skb->len) 870 goto out; 871 if (udp6_csum_init(skb, uh, IPPROTO_UDP)) 872 goto out; 873 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { 874 if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr)) 875 continue; 876 if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr)) 877 continue; 878 if (np->local_port && np->local_port != ntohs(uh->dest)) 879 continue; 880 881 np->rx_hook(np, ntohs(uh->source), 882 (char *)(uh+1), 883 ulen - sizeof(struct udphdr)); 884 hits++; 885 } 886 #endif 887 } 888 889 if (!hits) 890 goto out; 891 892 kfree_skb(skb); 893 return 1; 894 895 out: 896 if (atomic_read(&trapped)) { 897 kfree_skb(skb); 898 return 1; 899 } 900 901 return 0; 902 } 903 904 void netpoll_print_options(struct netpoll *np) 905 { 906 np_info(np, "local port %d\n", np->local_port); 907 if (np->ipv6) 908 np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6); 909 else 910 np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); 911 np_info(np, "interface '%s'\n", np->dev_name); 912 np_info(np, "remote port %d\n", np->remote_port); 913 if (np->ipv6) 914 np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6); 915 else 916 np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip); 917 np_info(np, "remote ethernet address %pM\n", np->remote_mac); 918 } 919 EXPORT_SYMBOL(netpoll_print_options); 920 921 static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) 922 { 923 const char *end; 924 925 if (!strchr(str, ':') && 926 in4_pton(str, -1, (void *)addr, -1, &end) > 0) { 927 if (!*end) 928 return 0; 929 } 930 if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) { 931 #if IS_ENABLED(CONFIG_IPV6) 932 if (!*end) 933 return 1; 934 #else 935 return -1; 936 #endif 937 } 938 return -1; 939 } 940 941 int netpoll_parse_options(struct netpoll *np, char *opt) 942 { 943 char *cur=opt, *delim; 944 int ipv6; 945 946 if (*cur != '@') { 947 if ((delim = strchr(cur, '@')) == NULL) 948 goto parse_failed; 949 *delim = 0; 950 if (kstrtou16(cur, 10, &np->local_port)) 951 goto parse_failed; 952 cur = delim; 953 } 954 cur++; 955 956 if (*cur != '/') { 957 if ((delim = strchr(cur, '/')) == NULL) 958 goto parse_failed; 959 *delim = 0; 960 ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip); 961 if (ipv6 < 0) 962 goto parse_failed; 963 else 964 np->ipv6 = (bool)ipv6; 965 cur = delim; 966 } 967 cur++; 968 969 if (*cur != ',') { 970 /* parse out dev name */ 971 if ((delim = strchr(cur, ',')) == NULL) 972 goto parse_failed; 973 *delim = 0; 974 strlcpy(np->dev_name, cur, sizeof(np->dev_name)); 975 cur = delim; 976 } 977 cur++; 978 979 if (*cur != '@') { 980 /* dst port */ 981 if ((delim = strchr(cur, '@')) == NULL) 982 goto parse_failed; 983 *delim = 0; 984 if (*cur == ' ' || *cur == '\t') 985 np_info(np, "warning: whitespace is not allowed\n"); 986 if (kstrtou16(cur, 10, &np->remote_port)) 987 goto parse_failed; 988 cur = delim; 989 } 990 cur++; 991 992 /* dst ip */ 993 if ((delim = strchr(cur, '/')) == NULL) 994 goto parse_failed; 995 *delim = 0; 996 ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); 997 if (ipv6 < 0) 998 goto parse_failed; 999 else if (np->ipv6 != (bool)ipv6) 1000 goto parse_failed; 1001 else 1002 np->ipv6 = (bool)ipv6; 1003 cur = delim + 1; 1004 1005 if (*cur != 0) { 1006 /* MAC address */ 1007 if (!mac_pton(cur, np->remote_mac)) 1008 goto parse_failed; 1009 } 1010 1011 netpoll_print_options(np); 1012 1013 return 0; 1014 1015 parse_failed: 1016 np_info(np, "couldn't parse config at '%s'!\n", cur); 1017 return -1; 1018 } 1019 EXPORT_SYMBOL(netpoll_parse_options); 1020 1021 int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) 1022 { 1023 struct netpoll_info *npinfo; 1024 const struct net_device_ops *ops; 1025 unsigned long flags; 1026 int err; 1027 1028 np->dev = ndev; 1029 strlcpy(np->dev_name, ndev->name, IFNAMSIZ); 1030 INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); 1031 1032 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || 1033 !ndev->netdev_ops->ndo_poll_controller) { 1034 np_err(np, "%s doesn't support polling, aborting\n", 1035 np->dev_name); 1036 err = -ENOTSUPP; 1037 goto out; 1038 } 1039 1040 if (!ndev->npinfo) { 1041 npinfo = kmalloc(sizeof(*npinfo), gfp); 1042 if (!npinfo) { 1043 err = -ENOMEM; 1044 goto out; 1045 } 1046 1047 npinfo->rx_flags = 0; 1048 INIT_LIST_HEAD(&npinfo->rx_np); 1049 1050 spin_lock_init(&npinfo->rx_lock); 1051 sema_init(&npinfo->dev_lock, 1); 1052 skb_queue_head_init(&npinfo->neigh_tx); 1053 skb_queue_head_init(&npinfo->txq); 1054 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); 1055 1056 atomic_set(&npinfo->refcnt, 1); 1057 1058 ops = np->dev->netdev_ops; 1059 if (ops->ndo_netpoll_setup) { 1060 err = ops->ndo_netpoll_setup(ndev, npinfo, gfp); 1061 if (err) 1062 goto free_npinfo; 1063 } 1064 } else { 1065 npinfo = rtnl_dereference(ndev->npinfo); 1066 atomic_inc(&npinfo->refcnt); 1067 } 1068 1069 npinfo->netpoll = np; 1070 1071 if (np->rx_hook) { 1072 spin_lock_irqsave(&npinfo->rx_lock, flags); 1073 npinfo->rx_flags |= NETPOLL_RX_ENABLED; 1074 list_add_tail(&np->rx, &npinfo->rx_np); 1075 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 1076 } 1077 1078 /* last thing to do is link it to the net device structure */ 1079 rcu_assign_pointer(ndev->npinfo, npinfo); 1080 1081 return 0; 1082 1083 free_npinfo: 1084 kfree(npinfo); 1085 out: 1086 return err; 1087 } 1088 EXPORT_SYMBOL_GPL(__netpoll_setup); 1089 1090 int netpoll_setup(struct netpoll *np) 1091 { 1092 struct net_device *ndev = NULL; 1093 struct in_device *in_dev; 1094 int err; 1095 1096 rtnl_lock(); 1097 if (np->dev_name) { 1098 struct net *net = current->nsproxy->net_ns; 1099 ndev = __dev_get_by_name(net, np->dev_name); 1100 } 1101 if (!ndev) { 1102 np_err(np, "%s doesn't exist, aborting\n", np->dev_name); 1103 err = -ENODEV; 1104 goto unlock; 1105 } 1106 dev_hold(ndev); 1107 1108 if (netdev_master_upper_dev_get(ndev)) { 1109 np_err(np, "%s is a slave device, aborting\n", np->dev_name); 1110 err = -EBUSY; 1111 goto put; 1112 } 1113 1114 if (!netif_running(ndev)) { 1115 unsigned long atmost, atleast; 1116 1117 np_info(np, "device %s not up yet, forcing it\n", np->dev_name); 1118 1119 err = dev_open(ndev); 1120 1121 if (err) { 1122 np_err(np, "failed to open %s\n", ndev->name); 1123 goto put; 1124 } 1125 1126 rtnl_unlock(); 1127 atleast = jiffies + HZ/10; 1128 atmost = jiffies + carrier_timeout * HZ; 1129 while (!netif_carrier_ok(ndev)) { 1130 if (time_after(jiffies, atmost)) { 1131 np_notice(np, "timeout waiting for carrier\n"); 1132 break; 1133 } 1134 msleep(1); 1135 } 1136 1137 /* If carrier appears to come up instantly, we don't 1138 * trust it and pause so that we don't pump all our 1139 * queued console messages into the bitbucket. 1140 */ 1141 1142 if (time_before(jiffies, atleast)) { 1143 np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n"); 1144 msleep(4000); 1145 } 1146 rtnl_lock(); 1147 } 1148 1149 if (!np->local_ip.ip) { 1150 if (!np->ipv6) { 1151 in_dev = __in_dev_get_rtnl(ndev); 1152 1153 if (!in_dev || !in_dev->ifa_list) { 1154 np_err(np, "no IP address for %s, aborting\n", 1155 np->dev_name); 1156 err = -EDESTADDRREQ; 1157 goto put; 1158 } 1159 1160 np->local_ip.ip = in_dev->ifa_list->ifa_local; 1161 np_info(np, "local IP %pI4\n", &np->local_ip.ip); 1162 } else { 1163 #if IS_ENABLED(CONFIG_IPV6) 1164 struct inet6_dev *idev; 1165 1166 err = -EDESTADDRREQ; 1167 idev = __in6_dev_get(ndev); 1168 if (idev) { 1169 struct inet6_ifaddr *ifp; 1170 1171 read_lock_bh(&idev->lock); 1172 list_for_each_entry(ifp, &idev->addr_list, if_list) { 1173 if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) 1174 continue; 1175 np->local_ip.in6 = ifp->addr; 1176 err = 0; 1177 break; 1178 } 1179 read_unlock_bh(&idev->lock); 1180 } 1181 if (err) { 1182 np_err(np, "no IPv6 address for %s, aborting\n", 1183 np->dev_name); 1184 goto put; 1185 } else 1186 np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6); 1187 #else 1188 np_err(np, "IPv6 is not supported %s, aborting\n", 1189 np->dev_name); 1190 err = -EINVAL; 1191 goto put; 1192 #endif 1193 } 1194 } 1195 1196 /* fill up the skb queue */ 1197 refill_skbs(); 1198 1199 err = __netpoll_setup(np, ndev, GFP_KERNEL); 1200 if (err) 1201 goto put; 1202 1203 rtnl_unlock(); 1204 return 0; 1205 1206 put: 1207 dev_put(ndev); 1208 unlock: 1209 rtnl_unlock(); 1210 return err; 1211 } 1212 EXPORT_SYMBOL(netpoll_setup); 1213 1214 static int __init netpoll_init(void) 1215 { 1216 skb_queue_head_init(&skb_pool); 1217 return 0; 1218 } 1219 core_initcall(netpoll_init); 1220 1221 static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) 1222 { 1223 struct netpoll_info *npinfo = 1224 container_of(rcu_head, struct netpoll_info, rcu); 1225 1226 skb_queue_purge(&npinfo->neigh_tx); 1227 skb_queue_purge(&npinfo->txq); 1228 1229 /* we can't call cancel_delayed_work_sync here, as we are in softirq */ 1230 cancel_delayed_work(&npinfo->tx_work); 1231 1232 /* clean after last, unfinished work */ 1233 __skb_queue_purge(&npinfo->txq); 1234 /* now cancel it again */ 1235 cancel_delayed_work(&npinfo->tx_work); 1236 kfree(npinfo); 1237 } 1238 1239 void __netpoll_cleanup(struct netpoll *np) 1240 { 1241 struct netpoll_info *npinfo; 1242 unsigned long flags; 1243 1244 /* rtnl_dereference would be preferable here but 1245 * rcu_cleanup_netpoll path can put us in here safely without 1246 * holding the rtnl, so plain rcu_dereference it is 1247 */ 1248 npinfo = rtnl_dereference(np->dev->npinfo); 1249 if (!npinfo) 1250 return; 1251 1252 if (!list_empty(&npinfo->rx_np)) { 1253 spin_lock_irqsave(&npinfo->rx_lock, flags); 1254 list_del(&np->rx); 1255 if (list_empty(&npinfo->rx_np)) 1256 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; 1257 spin_unlock_irqrestore(&npinfo->rx_lock, flags); 1258 } 1259 1260 synchronize_srcu(&netpoll_srcu); 1261 1262 if (atomic_dec_and_test(&npinfo->refcnt)) { 1263 const struct net_device_ops *ops; 1264 1265 ops = np->dev->netdev_ops; 1266 if (ops->ndo_netpoll_cleanup) 1267 ops->ndo_netpoll_cleanup(np->dev); 1268 1269 rcu_assign_pointer(np->dev->npinfo, NULL); 1270 call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); 1271 } 1272 } 1273 EXPORT_SYMBOL_GPL(__netpoll_cleanup); 1274 1275 static void netpoll_async_cleanup(struct work_struct *work) 1276 { 1277 struct netpoll *np = container_of(work, struct netpoll, cleanup_work); 1278 1279 rtnl_lock(); 1280 __netpoll_cleanup(np); 1281 rtnl_unlock(); 1282 kfree(np); 1283 } 1284 1285 void __netpoll_free_async(struct netpoll *np) 1286 { 1287 schedule_work(&np->cleanup_work); 1288 } 1289 EXPORT_SYMBOL_GPL(__netpoll_free_async); 1290 1291 void netpoll_cleanup(struct netpoll *np) 1292 { 1293 if (!np->dev) 1294 return; 1295 1296 rtnl_lock(); 1297 __netpoll_cleanup(np); 1298 rtnl_unlock(); 1299 1300 dev_put(np->dev); 1301 np->dev = NULL; 1302 } 1303 EXPORT_SYMBOL(netpoll_cleanup); 1304 1305 int netpoll_trap(void) 1306 { 1307 return atomic_read(&trapped); 1308 } 1309 EXPORT_SYMBOL(netpoll_trap); 1310 1311 void netpoll_set_trap(int trap) 1312 { 1313 if (trap) 1314 atomic_inc(&trapped); 1315 else 1316 atomic_dec(&trapped); 1317 } 1318 EXPORT_SYMBOL(netpoll_set_trap); 1319