1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Common framework for low-level network console, dump, and debugger code
4 *
5 * Sep 8 2003 Matt Mackall <mpm@selenic.com>
6 *
7 * based on the netconsole code from:
8 *
9 * Copyright (C) 2001 Ingo Molnar <mingo@redhat.com>
10 * Copyright (C) 2002 Red Hat, Inc.
11 */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/moduleparam.h>
16 #include <linux/kernel.h>
17 #include <linux/netdevice.h>
18 #include <linux/etherdevice.h>
19 #include <linux/string.h>
20 #include <linux/if_arp.h>
21 #include <linux/inetdevice.h>
22 #include <linux/inet.h>
23 #include <linux/interrupt.h>
24 #include <linux/netpoll.h>
25 #include <linux/sched.h>
26 #include <linux/delay.h>
27 #include <linux/rcupdate.h>
28 #include <linux/workqueue.h>
29 #include <linux/slab.h>
30 #include <linux/export.h>
31 #include <linux/if_vlan.h>
32 #include <net/tcp.h>
33 #include <net/udp.h>
34 #include <net/addrconf.h>
35 #include <net/ndisc.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/unaligned.h>
38 #include <trace/events/napi.h>
39 #include <linux/kconfig.h>
40
41 /*
42 * We maintain a small pool of fully-sized skbs, to make sure the
43 * message gets out even in extreme OOM situations.
44 */
45
46 #define MAX_UDP_CHUNK 1460
47 #define MAX_SKBS 32
48 #define USEC_PER_POLL 50
49
50 #define MAX_SKB_SIZE \
51 (sizeof(struct ethhdr) + \
52 sizeof(struct iphdr) + \
53 sizeof(struct udphdr) + \
54 MAX_UDP_CHUNK)
55
56 static void zap_completion_queue(void);
57
58 static unsigned int carrier_timeout = 4;
59 module_param(carrier_timeout, uint, 0644);
60
netpoll_start_xmit(struct sk_buff * skb,struct net_device * dev,struct netdev_queue * txq)61 static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
62 struct net_device *dev,
63 struct netdev_queue *txq)
64 {
65 netdev_tx_t status = NETDEV_TX_OK;
66 netdev_features_t features;
67
68 features = netif_skb_features(skb);
69
70 if (skb_vlan_tag_present(skb) &&
71 !vlan_hw_offload_capable(features, skb->vlan_proto)) {
72 skb = __vlan_hwaccel_push_inside(skb);
73 if (unlikely(!skb)) {
74 /* This is actually a packet drop, but we
75 * don't want the code that calls this
76 * function to try and operate on a NULL skb.
77 */
78 goto out;
79 }
80 }
81
82 status = netdev_start_xmit(skb, dev, txq, false);
83
84 out:
85 return status;
86 }
87
queue_process(struct work_struct * work)88 static void queue_process(struct work_struct *work)
89 {
90 struct netpoll_info *npinfo =
91 container_of(work, struct netpoll_info, tx_work.work);
92 struct sk_buff *skb;
93 unsigned long flags;
94
95 while ((skb = skb_dequeue(&npinfo->txq))) {
96 struct net_device *dev = skb->dev;
97 struct netdev_queue *txq;
98 unsigned int q_index;
99
100 if (!netif_device_present(dev) || !netif_running(dev)) {
101 kfree_skb(skb);
102 continue;
103 }
104
105 local_irq_save(flags);
106 /* check if skb->queue_mapping is still valid */
107 q_index = skb_get_queue_mapping(skb);
108 if (unlikely(q_index >= dev->real_num_tx_queues)) {
109 q_index = q_index % dev->real_num_tx_queues;
110 skb_set_queue_mapping(skb, q_index);
111 }
112 txq = netdev_get_tx_queue(dev, q_index);
113 HARD_TX_LOCK(dev, txq, smp_processor_id());
114 if (netif_xmit_frozen_or_stopped(txq) ||
115 !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) {
116 skb_queue_head(&npinfo->txq, skb);
117 HARD_TX_UNLOCK(dev, txq);
118 local_irq_restore(flags);
119
120 schedule_delayed_work(&npinfo->tx_work, HZ/10);
121 return;
122 }
123 HARD_TX_UNLOCK(dev, txq);
124 local_irq_restore(flags);
125 }
126 }
127
netif_local_xmit_active(struct net_device * dev)128 static int netif_local_xmit_active(struct net_device *dev)
129 {
130 int i;
131
132 for (i = 0; i < dev->num_tx_queues; i++) {
133 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
134
135 if (netif_tx_owned(txq, smp_processor_id()))
136 return 1;
137 }
138
139 return 0;
140 }
141
poll_one_napi(struct napi_struct * napi)142 static void poll_one_napi(struct napi_struct *napi)
143 {
144 int work;
145
146 /* If we set this bit but see that it has already been set,
147 * that indicates that napi has been disabled and we need
148 * to abort this operation
149 */
150 if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
151 return;
152
153 /* We explicitly pass the polling call a budget of 0 to
154 * indicate that we are clearing the Tx path only.
155 */
156 work = napi->poll(napi, 0);
157 WARN_ONCE(work, "%pS exceeded budget in poll\n", napi->poll);
158 trace_napi_poll(napi, work, 0);
159
160 clear_bit(NAPI_STATE_NPSVC, &napi->state);
161 }
162
poll_napi(struct net_device * dev)163 static void poll_napi(struct net_device *dev)
164 {
165 struct napi_struct *napi;
166 int cpu = smp_processor_id();
167
168 list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
169 if (cmpxchg(&napi->poll_owner, -1, cpu) == -1) {
170 poll_one_napi(napi);
171 smp_store_release(&napi->poll_owner, -1);
172 }
173 }
174 }
175
netpoll_poll_dev(struct net_device * dev)176 void netpoll_poll_dev(struct net_device *dev)
177 {
178 struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
179 const struct net_device_ops *ops;
180
181 /* Don't do any rx activity if the dev_lock mutex is held
182 * the dev_open/close paths use this to block netpoll activity
183 * while changing device state
184 */
185 if (!ni || down_trylock(&ni->dev_lock))
186 return;
187
188 /* Some drivers will take the same locks in poll and xmit,
189 * we can't poll if local CPU is already in xmit.
190 */
191 if (!netif_running(dev) || netif_local_xmit_active(dev)) {
192 up(&ni->dev_lock);
193 return;
194 }
195
196 ops = dev->netdev_ops;
197 if (ops->ndo_poll_controller)
198 ops->ndo_poll_controller(dev);
199
200 poll_napi(dev);
201
202 up(&ni->dev_lock);
203
204 zap_completion_queue();
205 }
206 EXPORT_SYMBOL(netpoll_poll_dev);
207
netpoll_poll_disable(struct net_device * dev)208 void netpoll_poll_disable(struct net_device *dev)
209 {
210 struct netpoll_info *ni;
211
212 might_sleep();
213 ni = rtnl_dereference(dev->npinfo);
214 if (ni)
215 down(&ni->dev_lock);
216 }
217
netpoll_poll_enable(struct net_device * dev)218 void netpoll_poll_enable(struct net_device *dev)
219 {
220 struct netpoll_info *ni;
221
222 ni = rtnl_dereference(dev->npinfo);
223 if (ni)
224 up(&ni->dev_lock);
225 }
226
refill_skbs(struct netpoll * np)227 static void refill_skbs(struct netpoll *np)
228 {
229 struct sk_buff_head *skb_pool;
230 struct sk_buff *skb;
231
232 skb_pool = &np->skb_pool;
233
234 while (READ_ONCE(skb_pool->qlen) < MAX_SKBS) {
235 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
236 if (!skb)
237 break;
238
239 skb_queue_tail(skb_pool, skb);
240 }
241 }
242
zap_completion_queue(void)243 static void zap_completion_queue(void)
244 {
245 unsigned long flags;
246 struct softnet_data *sd = &get_cpu_var(softnet_data);
247
248 if (sd->completion_queue) {
249 struct sk_buff *clist;
250
251 local_irq_save(flags);
252 clist = sd->completion_queue;
253 sd->completion_queue = NULL;
254 local_irq_restore(flags);
255
256 while (clist != NULL) {
257 struct sk_buff *skb = clist;
258 clist = clist->next;
259 if (!skb_irq_freeable(skb)) {
260 refcount_set(&skb->users, 1);
261 dev_kfree_skb_any(skb); /* put this one back */
262 } else {
263 __kfree_skb(skb);
264 }
265 }
266 }
267
268 put_cpu_var(softnet_data);
269 }
270
find_skb(struct netpoll * np,int len,int reserve)271 static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
272 {
273 int count = 0;
274 struct sk_buff *skb;
275
276 zap_completion_queue();
277 repeat:
278
279 skb = alloc_skb(len, GFP_ATOMIC);
280 if (!skb) {
281 skb = skb_dequeue(&np->skb_pool);
282 schedule_work(&np->refill_wq);
283 }
284
285 if (!skb) {
286 if (++count < 10) {
287 netpoll_poll_dev(np->dev);
288 goto repeat;
289 }
290 return NULL;
291 }
292
293 refcount_set(&skb->users, 1);
294 skb_reserve(skb, reserve);
295 return skb;
296 }
297
netpoll_owner_active(struct net_device * dev)298 static int netpoll_owner_active(struct net_device *dev)
299 {
300 struct napi_struct *napi;
301
302 list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
303 if (READ_ONCE(napi->poll_owner) == smp_processor_id())
304 return 1;
305 }
306 return 0;
307 }
308
309 /* call with IRQ disabled */
__netpoll_send_skb(struct netpoll * np,struct sk_buff * skb)310 static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
311 {
312 netdev_tx_t status = NETDEV_TX_BUSY;
313 netdev_tx_t ret = NET_XMIT_DROP;
314 struct net_device *dev;
315 unsigned long tries;
316 /* It is up to the caller to keep npinfo alive. */
317 struct netpoll_info *npinfo;
318
319 lockdep_assert_irqs_disabled();
320
321 dev = np->dev;
322 rcu_read_lock();
323 npinfo = rcu_dereference_bh(dev->npinfo);
324
325 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
326 dev_kfree_skb_irq(skb);
327 goto out;
328 }
329
330 /* don't get messages out of order, and no recursion */
331 if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
332 struct netdev_queue *txq;
333
334 txq = netdev_core_pick_tx(dev, skb, NULL);
335
336 /* try until next clock tick */
337 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
338 tries > 0; --tries) {
339 if (HARD_TX_TRYLOCK(dev, txq)) {
340 if (!netif_xmit_stopped(txq))
341 status = netpoll_start_xmit(skb, dev, txq);
342
343 HARD_TX_UNLOCK(dev, txq);
344
345 if (dev_xmit_complete(status))
346 break;
347
348 }
349
350 /* tickle device maybe there is some cleanup */
351 netpoll_poll_dev(np->dev);
352
353 udelay(USEC_PER_POLL);
354 }
355
356 WARN_ONCE(!irqs_disabled(),
357 "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pS)\n",
358 dev->name, dev->netdev_ops->ndo_start_xmit);
359
360 }
361
362 if (!dev_xmit_complete(status)) {
363 skb_queue_tail(&npinfo->txq, skb);
364 schedule_delayed_work(&npinfo->tx_work,0);
365 }
366 ret = NETDEV_TX_OK;
367 out:
368 rcu_read_unlock();
369 return ret;
370 }
371
netpoll_udp_checksum(struct netpoll * np,struct sk_buff * skb,int len)372 static void netpoll_udp_checksum(struct netpoll *np, struct sk_buff *skb,
373 int len)
374 {
375 struct udphdr *udph;
376 int udp_len;
377
378 udp_len = len + sizeof(struct udphdr);
379 udph = udp_hdr(skb);
380
381 /* check needs to be set, since it will be consumed in csum_partial */
382 udph->check = 0;
383 if (np->ipv6)
384 udph->check = csum_ipv6_magic(&np->local_ip.in6,
385 &np->remote_ip.in6,
386 udp_len, IPPROTO_UDP,
387 csum_partial(udph, udp_len, 0));
388 else
389 udph->check = csum_tcpudp_magic(np->local_ip.ip,
390 np->remote_ip.ip,
391 udp_len, IPPROTO_UDP,
392 csum_partial(udph, udp_len, 0));
393 if (udph->check == 0)
394 udph->check = CSUM_MANGLED_0;
395 }
396
netpoll_send_skb(struct netpoll * np,struct sk_buff * skb)397 netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
398 {
399 unsigned long flags;
400 netdev_tx_t ret;
401
402 if (unlikely(!np)) {
403 dev_kfree_skb_irq(skb);
404 ret = NET_XMIT_DROP;
405 } else {
406 local_irq_save(flags);
407 ret = __netpoll_send_skb(np, skb);
408 local_irq_restore(flags);
409 }
410 return ret;
411 }
412 EXPORT_SYMBOL(netpoll_send_skb);
413
push_ipv6(struct netpoll * np,struct sk_buff * skb,int len)414 static void push_ipv6(struct netpoll *np, struct sk_buff *skb, int len)
415 {
416 struct ipv6hdr *ip6h;
417
418 skb_push(skb, sizeof(struct ipv6hdr));
419 skb_reset_network_header(skb);
420 ip6h = ipv6_hdr(skb);
421
422 /* ip6h->version = 6; ip6h->priority = 0; */
423 *(unsigned char *)ip6h = 0x60;
424 ip6h->flow_lbl[0] = 0;
425 ip6h->flow_lbl[1] = 0;
426 ip6h->flow_lbl[2] = 0;
427
428 ip6h->payload_len = htons(sizeof(struct udphdr) + len);
429 ip6h->nexthdr = IPPROTO_UDP;
430 ip6h->hop_limit = 32;
431 ip6h->saddr = np->local_ip.in6;
432 ip6h->daddr = np->remote_ip.in6;
433
434 skb->protocol = htons(ETH_P_IPV6);
435 }
436
push_ipv4(struct netpoll * np,struct sk_buff * skb,int len)437 static void push_ipv4(struct netpoll *np, struct sk_buff *skb, int len)
438 {
439 static atomic_t ip_ident;
440 struct iphdr *iph;
441 int ip_len;
442
443 ip_len = len + sizeof(struct udphdr) + sizeof(struct iphdr);
444
445 skb_push(skb, sizeof(struct iphdr));
446 skb_reset_network_header(skb);
447 iph = ip_hdr(skb);
448
449 /* iph->version = 4; iph->ihl = 5; */
450 *(unsigned char *)iph = 0x45;
451 iph->tos = 0;
452 put_unaligned(htons(ip_len), &iph->tot_len);
453 iph->id = htons(atomic_inc_return(&ip_ident));
454 iph->frag_off = 0;
455 iph->ttl = 64;
456 iph->protocol = IPPROTO_UDP;
457 iph->check = 0;
458 put_unaligned(np->local_ip.ip, &iph->saddr);
459 put_unaligned(np->remote_ip.ip, &iph->daddr);
460 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
461 skb->protocol = htons(ETH_P_IP);
462 }
463
push_udp(struct netpoll * np,struct sk_buff * skb,int len)464 static void push_udp(struct netpoll *np, struct sk_buff *skb, int len)
465 {
466 struct udphdr *udph;
467 int udp_len;
468
469 udp_len = len + sizeof(struct udphdr);
470
471 skb_push(skb, sizeof(struct udphdr));
472 skb_reset_transport_header(skb);
473
474 udph = udp_hdr(skb);
475 udph->source = htons(np->local_port);
476 udph->dest = htons(np->remote_port);
477 udph->len = htons(udp_len);
478
479 netpoll_udp_checksum(np, skb, len);
480 }
481
push_eth(struct netpoll * np,struct sk_buff * skb)482 static void push_eth(struct netpoll *np, struct sk_buff *skb)
483 {
484 struct ethhdr *eth;
485
486 eth = skb_push(skb, ETH_HLEN);
487 skb_reset_mac_header(skb);
488 ether_addr_copy(eth->h_source, np->dev->dev_addr);
489 ether_addr_copy(eth->h_dest, np->remote_mac);
490 if (np->ipv6)
491 eth->h_proto = htons(ETH_P_IPV6);
492 else
493 eth->h_proto = htons(ETH_P_IP);
494 }
495
netpoll_send_udp(struct netpoll * np,const char * msg,int len)496 int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
497 {
498 int total_len, ip_len, udp_len;
499 struct sk_buff *skb;
500
501 if (!IS_ENABLED(CONFIG_PREEMPT_RT))
502 WARN_ON_ONCE(!irqs_disabled());
503
504 udp_len = len + sizeof(struct udphdr);
505 if (np->ipv6)
506 ip_len = udp_len + sizeof(struct ipv6hdr);
507 else
508 ip_len = udp_len + sizeof(struct iphdr);
509
510 total_len = ip_len + LL_RESERVED_SPACE(np->dev);
511
512 skb = find_skb(np, total_len + np->dev->needed_tailroom,
513 total_len - len);
514 if (!skb)
515 return -ENOMEM;
516
517 skb_copy_to_linear_data(skb, msg, len);
518 skb_put(skb, len);
519
520 push_udp(np, skb, len);
521 if (np->ipv6)
522 push_ipv6(np, skb, len);
523 else
524 push_ipv4(np, skb, len);
525 push_eth(np, skb);
526 skb->dev = np->dev;
527
528 return (int)netpoll_send_skb(np, skb);
529 }
530 EXPORT_SYMBOL(netpoll_send_udp);
531
532
skb_pool_flush(struct netpoll * np)533 static void skb_pool_flush(struct netpoll *np)
534 {
535 struct sk_buff_head *skb_pool;
536
537 cancel_work_sync(&np->refill_wq);
538 skb_pool = &np->skb_pool;
539 skb_queue_purge_reason(skb_pool, SKB_CONSUMED);
540 }
541
refill_skbs_work_handler(struct work_struct * work)542 static void refill_skbs_work_handler(struct work_struct *work)
543 {
544 struct netpoll *np =
545 container_of(work, struct netpoll, refill_wq);
546
547 refill_skbs(np);
548 }
549
__netpoll_setup(struct netpoll * np,struct net_device * ndev)550 int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
551 {
552 struct netpoll_info *npinfo;
553 const struct net_device_ops *ops;
554 int err;
555
556 skb_queue_head_init(&np->skb_pool);
557 INIT_WORK(&np->refill_wq, refill_skbs_work_handler);
558
559 if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
560 np_err(np, "%s doesn't support polling, aborting\n",
561 ndev->name);
562 err = -ENOTSUPP;
563 goto out;
564 }
565
566 npinfo = rtnl_dereference(ndev->npinfo);
567 if (!npinfo) {
568 npinfo = kmalloc_obj(*npinfo);
569 if (!npinfo) {
570 err = -ENOMEM;
571 goto out;
572 }
573
574 sema_init(&npinfo->dev_lock, 1);
575 skb_queue_head_init(&npinfo->txq);
576 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
577
578 refcount_set(&npinfo->refcnt, 1);
579
580 ops = ndev->netdev_ops;
581 if (ops->ndo_netpoll_setup) {
582 err = ops->ndo_netpoll_setup(ndev);
583 if (err)
584 goto free_npinfo;
585 }
586 } else {
587 refcount_inc(&npinfo->refcnt);
588 }
589
590 np->dev = ndev;
591 strscpy(np->dev_name, ndev->name, IFNAMSIZ);
592
593 /* fill up the skb queue */
594 refill_skbs(np);
595
596 /* last thing to do is link it to the net device structure */
597 rcu_assign_pointer(ndev->npinfo, npinfo);
598
599 return 0;
600
601 free_npinfo:
602 kfree(npinfo);
603 out:
604 return err;
605 }
606 EXPORT_SYMBOL_GPL(__netpoll_setup);
607
608 /*
609 * Returns a pointer to a string representation of the identifier used
610 * to select the egress interface for the given netpoll instance. buf
611 * is used to format np->dev_mac when np->dev_name is empty; bufsz must
612 * be at least MAC_ADDR_STR_LEN + 1 to fit the formatted MAC address
613 * and its NUL terminator.
614 */
egress_dev(struct netpoll * np,char * buf,size_t bufsz)615 static char *egress_dev(struct netpoll *np, char *buf, size_t bufsz)
616 {
617 if (np->dev_name[0])
618 return np->dev_name;
619
620 snprintf(buf, bufsz, "%pM", np->dev_mac);
621 return buf;
622 }
623
netpoll_wait_carrier(struct netpoll * np,struct net_device * ndev,unsigned int timeout)624 static void netpoll_wait_carrier(struct netpoll *np, struct net_device *ndev,
625 unsigned int timeout)
626 {
627 unsigned long atmost;
628
629 atmost = jiffies + timeout * HZ;
630 while (!netif_carrier_ok(ndev)) {
631 if (time_after(jiffies, atmost)) {
632 np_notice(np, "timeout waiting for carrier\n");
633 break;
634 }
635 msleep(1);
636 }
637 }
638
639 /*
640 * Take the IPv6 from ndev and populate local_ip structure in netpoll
641 */
netpoll_take_ipv6(struct netpoll * np,struct net_device * ndev)642 static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev)
643 {
644 char buf[MAC_ADDR_STR_LEN + 1];
645 int err = -EDESTADDRREQ;
646 struct inet6_dev *idev;
647
648 if (!IS_ENABLED(CONFIG_IPV6)) {
649 np_err(np, "IPv6 is not supported %s, aborting\n",
650 egress_dev(np, buf, sizeof(buf)));
651 return -EINVAL;
652 }
653
654 idev = __in6_dev_get(ndev);
655 if (idev) {
656 struct inet6_ifaddr *ifp;
657
658 read_lock_bh(&idev->lock);
659 list_for_each_entry(ifp, &idev->addr_list, if_list) {
660 if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
661 !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
662 continue;
663 /* Got the IP, let's return */
664 np->local_ip.in6 = ifp->addr;
665 err = 0;
666 break;
667 }
668 read_unlock_bh(&idev->lock);
669 }
670 if (err) {
671 np_err(np, "no IPv6 address for %s, aborting\n",
672 egress_dev(np, buf, sizeof(buf)));
673 return err;
674 }
675
676 np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
677 return 0;
678 }
679
680 /*
681 * Take the IPv4 from ndev and populate local_ip structure in netpoll
682 */
netpoll_take_ipv4(struct netpoll * np,struct net_device * ndev)683 static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev)
684 {
685 char buf[MAC_ADDR_STR_LEN + 1];
686 const struct in_ifaddr *ifa;
687 struct in_device *in_dev;
688
689 in_dev = __in_dev_get_rtnl(ndev);
690 if (!in_dev) {
691 np_err(np, "no IP address for %s, aborting\n",
692 egress_dev(np, buf, sizeof(buf)));
693 return -EDESTADDRREQ;
694 }
695
696 ifa = rtnl_dereference(in_dev->ifa_list);
697 if (!ifa) {
698 np_err(np, "no IP address for %s, aborting\n",
699 egress_dev(np, buf, sizeof(buf)));
700 return -EDESTADDRREQ;
701 }
702
703 np->local_ip.ip = ifa->ifa_local;
704 np_info(np, "local IP %pI4\n", &np->local_ip.ip);
705
706 return 0;
707 }
708
709 /*
710 * Test whether the caller left np->local_ip unset, so that
711 * netpoll_setup() should auto-populate it from the egress device.
712 *
713 * np->local_ip is a union of __be32 (IPv4) and struct in6_addr (IPv6),
714 * so an IPv6 address whose first 4 bytes are zero (e.g. ::1, ::2,
715 * IPv4-mapped ::ffff:a.b.c.d) must not be tested via the IPv4 arm —
716 * doing so would misclassify a caller-supplied address as unset and
717 * silently overwrite it with whatever address the device exposes.
718 */
netpoll_local_ip_unset(const struct netpoll * np)719 static bool netpoll_local_ip_unset(const struct netpoll *np)
720 {
721 if (np->ipv6)
722 return ipv6_addr_any(&np->local_ip.in6);
723 return !np->local_ip.ip;
724 }
725
netpoll_setup(struct netpoll * np)726 int netpoll_setup(struct netpoll *np)
727 {
728 struct net *net = current->nsproxy->net_ns;
729 char buf[MAC_ADDR_STR_LEN + 1];
730 struct net_device *ndev = NULL;
731 bool ip_overwritten = false;
732 int err;
733
734 rtnl_lock();
735 if (np->dev_name[0])
736 ndev = __dev_get_by_name(net, np->dev_name);
737 else if (is_valid_ether_addr(np->dev_mac))
738 ndev = dev_getbyhwaddr(net, ARPHRD_ETHER, np->dev_mac);
739
740 if (!ndev) {
741 np_err(np, "%s doesn't exist, aborting\n",
742 egress_dev(np, buf, sizeof(buf)));
743 err = -ENODEV;
744 goto unlock;
745 }
746 netdev_hold(ndev, &np->dev_tracker, GFP_KERNEL);
747
748 if (netdev_master_upper_dev_get(ndev)) {
749 np_err(np, "%s is a slave device, aborting\n",
750 egress_dev(np, buf, sizeof(buf)));
751 err = -EBUSY;
752 goto put;
753 }
754
755 if (!netif_running(ndev)) {
756 np_info(np, "device %s not up yet, forcing it\n",
757 egress_dev(np, buf, sizeof(buf)));
758
759 err = dev_open(ndev, NULL);
760 if (err) {
761 np_err(np, "failed to open %s\n", ndev->name);
762 goto put;
763 }
764
765 rtnl_unlock();
766 netpoll_wait_carrier(np, ndev, carrier_timeout);
767 rtnl_lock();
768 }
769
770 if (netpoll_local_ip_unset(np)) {
771 if (!np->ipv6) {
772 err = netpoll_take_ipv4(np, ndev);
773 if (err)
774 goto put;
775 } else {
776 err = netpoll_take_ipv6(np, ndev);
777 if (err)
778 goto put;
779 }
780 ip_overwritten = true;
781 }
782
783 err = __netpoll_setup(np, ndev);
784 if (err)
785 goto flush;
786 rtnl_unlock();
787
788 /* Make sure all NAPI polls which started before dev->npinfo
789 * was visible have exited before we start calling NAPI poll.
790 * NAPI skips locking if dev->npinfo is NULL.
791 */
792 synchronize_rcu();
793
794 return 0;
795
796 flush:
797 skb_pool_flush(np);
798 put:
799 DEBUG_NET_WARN_ON_ONCE(np->dev);
800 if (ip_overwritten)
801 memset(&np->local_ip, 0, sizeof(np->local_ip));
802 netdev_put(ndev, &np->dev_tracker);
803 unlock:
804 rtnl_unlock();
805 return err;
806 }
807 EXPORT_SYMBOL(netpoll_setup);
808
rcu_cleanup_netpoll_info(struct rcu_head * rcu_head)809 static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
810 {
811 struct netpoll_info *npinfo =
812 container_of(rcu_head, struct netpoll_info, rcu);
813
814 skb_queue_purge(&npinfo->txq);
815
816 /* we can't call cancel_delayed_work_sync here, as we are in softirq */
817 cancel_delayed_work(&npinfo->tx_work);
818
819 /* clean after last, unfinished work */
820 __skb_queue_purge(&npinfo->txq);
821 /* now cancel it again */
822 cancel_delayed_work(&npinfo->tx_work);
823 kfree(npinfo);
824 }
825
__netpoll_cleanup(struct netpoll * np)826 static void __netpoll_cleanup(struct netpoll *np)
827 {
828 struct netpoll_info *npinfo;
829
830 npinfo = rtnl_dereference(np->dev->npinfo);
831 if (!npinfo)
832 return;
833
834 /* At this point, there is a single npinfo instance per netdevice, and
835 * its refcnt tracks how many netpoll structures are linked to it. We
836 * only perform npinfo cleanup when the refcnt decrements to zero.
837 */
838 if (refcount_dec_and_test(&npinfo->refcnt)) {
839 const struct net_device_ops *ops;
840
841 ops = np->dev->netdev_ops;
842 if (ops->ndo_netpoll_cleanup)
843 ops->ndo_netpoll_cleanup(np->dev);
844
845 RCU_INIT_POINTER(np->dev->npinfo, NULL);
846 call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info);
847 }
848
849 skb_pool_flush(np);
850 }
851
__netpoll_free(struct netpoll * np)852 void __netpoll_free(struct netpoll *np)
853 {
854 ASSERT_RTNL();
855
856 /* Wait for transmitting packets to finish before freeing. */
857 synchronize_net();
858 __netpoll_cleanup(np);
859 kfree(np);
860 }
861 EXPORT_SYMBOL_GPL(__netpoll_free);
862
do_netpoll_cleanup(struct netpoll * np)863 void do_netpoll_cleanup(struct netpoll *np)
864 {
865 __netpoll_cleanup(np);
866 netdev_put(np->dev, &np->dev_tracker);
867 np->dev = NULL;
868 }
869 EXPORT_SYMBOL(do_netpoll_cleanup);
870
netpoll_cleanup(struct netpoll * np)871 void netpoll_cleanup(struct netpoll *np)
872 {
873 rtnl_lock();
874 if (!np->dev)
875 goto out;
876 do_netpoll_cleanup(np);
877 out:
878 rtnl_unlock();
879 }
880 EXPORT_SYMBOL(netpoll_cleanup);
881