1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Common framework for low-level network console, dump, and debugger code
4 *
5 * Sep 8 2003 Matt Mackall <mpm@selenic.com>
6 *
7 * based on the netconsole code from:
8 *
9 * Copyright (C) 2001 Ingo Molnar <mingo@redhat.com>
10 * Copyright (C) 2002 Red Hat, Inc.
11 */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/moduleparam.h>
16 #include <linux/kernel.h>
17 #include <linux/netdevice.h>
18 #include <linux/etherdevice.h>
19 #include <linux/string.h>
20 #include <linux/if_arp.h>
21 #include <linux/inetdevice.h>
22 #include <linux/inet.h>
23 #include <linux/interrupt.h>
24 #include <linux/netpoll.h>
25 #include <linux/sched.h>
26 #include <linux/delay.h>
27 #include <linux/rcupdate.h>
28 #include <linux/workqueue.h>
29 #include <linux/slab.h>
30 #include <linux/export.h>
31 #include <linux/if_vlan.h>
32 #include <net/tcp.h>
33 #include <net/udp.h>
34 #include <net/addrconf.h>
35 #include <net/ndisc.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/unaligned.h>
38 #include <trace/events/napi.h>
39 #include <linux/kconfig.h>
40
41 /*
42 * We maintain a small pool of fully-sized skbs, to make sure the
43 * message gets out even in extreme OOM situations.
44 */
45
46 #define MAX_UDP_CHUNK 1460
47 #define MAX_SKBS 32
48 #define USEC_PER_POLL 50
49
50 #define MAX_SKB_SIZE \
51 (sizeof(struct ethhdr) + \
52 sizeof(struct iphdr) + \
53 sizeof(struct udphdr) + \
54 MAX_UDP_CHUNK)
55
56 static void zap_completion_queue(void);
57
58 static unsigned int carrier_timeout = 4;
59 module_param(carrier_timeout, uint, 0644);
60
netpoll_start_xmit(struct sk_buff * skb,struct net_device * dev,struct netdev_queue * txq)61 static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
62 struct net_device *dev,
63 struct netdev_queue *txq)
64 {
65 netdev_tx_t status = NETDEV_TX_OK;
66 netdev_features_t features;
67
68 features = netif_skb_features(skb);
69
70 if (skb_vlan_tag_present(skb) &&
71 !vlan_hw_offload_capable(features, skb->vlan_proto)) {
72 skb = __vlan_hwaccel_push_inside(skb);
73 if (unlikely(!skb)) {
74 /* This is actually a packet drop, but we
75 * don't want the code that calls this
76 * function to try and operate on a NULL skb.
77 */
78 goto out;
79 }
80 }
81
82 status = netdev_start_xmit(skb, dev, txq, false);
83
84 out:
85 return status;
86 }
87
queue_process(struct work_struct * work)88 static void queue_process(struct work_struct *work)
89 {
90 struct netpoll_info *npinfo =
91 container_of(work, struct netpoll_info, tx_work.work);
92 struct sk_buff *skb;
93 unsigned long flags;
94
95 while ((skb = skb_dequeue(&npinfo->txq))) {
96 struct net_device *dev = skb->dev;
97 struct netdev_queue *txq;
98 unsigned int q_index;
99
100 if (!netif_device_present(dev) || !netif_running(dev)) {
101 kfree_skb(skb);
102 continue;
103 }
104
105 local_irq_save(flags);
106 /* check if skb->queue_mapping is still valid */
107 q_index = skb_get_queue_mapping(skb);
108 if (unlikely(q_index >= dev->real_num_tx_queues)) {
109 q_index = q_index % dev->real_num_tx_queues;
110 skb_set_queue_mapping(skb, q_index);
111 }
112 txq = netdev_get_tx_queue(dev, q_index);
113 HARD_TX_LOCK(dev, txq, smp_processor_id());
114 if (netif_xmit_frozen_or_stopped(txq) ||
115 !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) {
116 skb_queue_head(&npinfo->txq, skb);
117 HARD_TX_UNLOCK(dev, txq);
118 local_irq_restore(flags);
119
120 schedule_delayed_work(&npinfo->tx_work, HZ/10);
121 return;
122 }
123 HARD_TX_UNLOCK(dev, txq);
124 local_irq_restore(flags);
125 }
126 }
127
netif_local_xmit_active(struct net_device * dev)128 static int netif_local_xmit_active(struct net_device *dev)
129 {
130 int i;
131
132 for (i = 0; i < dev->num_tx_queues; i++) {
133 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
134
135 if (netif_tx_owned(txq, smp_processor_id()))
136 return 1;
137 }
138
139 return 0;
140 }
141
poll_one_napi(struct napi_struct * napi)142 static void poll_one_napi(struct napi_struct *napi)
143 {
144 int work;
145
146 /* If we set this bit but see that it has already been set,
147 * that indicates that napi has been disabled and we need
148 * to abort this operation
149 */
150 if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
151 return;
152
153 /* We explicitly pass the polling call a budget of 0 to
154 * indicate that we are clearing the Tx path only.
155 */
156 work = napi->poll(napi, 0);
157 WARN_ONCE(work, "%pS exceeded budget in poll\n", napi->poll);
158 trace_napi_poll(napi, work, 0);
159
160 clear_bit(NAPI_STATE_NPSVC, &napi->state);
161 }
162
poll_napi(struct net_device * dev)163 static void poll_napi(struct net_device *dev)
164 {
165 struct napi_struct *napi;
166 int cpu = smp_processor_id();
167
168 list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
169 if (cmpxchg(&napi->poll_owner, -1, cpu) == -1) {
170 poll_one_napi(napi);
171 smp_store_release(&napi->poll_owner, -1);
172 }
173 }
174 }
175
netpoll_poll_dev(struct net_device * dev)176 void netpoll_poll_dev(struct net_device *dev)
177 {
178 struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
179 const struct net_device_ops *ops;
180
181 /* Don't do any rx activity if the dev_lock mutex is held
182 * the dev_open/close paths use this to block netpoll activity
183 * while changing device state
184 */
185 if (!ni || down_trylock(&ni->dev_lock))
186 return;
187
188 /* Some drivers will take the same locks in poll and xmit,
189 * we can't poll if local CPU is already in xmit.
190 */
191 if (!netif_running(dev) || netif_local_xmit_active(dev)) {
192 up(&ni->dev_lock);
193 return;
194 }
195
196 ops = dev->netdev_ops;
197 if (ops->ndo_poll_controller)
198 ops->ndo_poll_controller(dev);
199
200 poll_napi(dev);
201
202 up(&ni->dev_lock);
203
204 zap_completion_queue();
205 }
206 EXPORT_SYMBOL(netpoll_poll_dev);
207
netpoll_poll_disable(struct net_device * dev)208 void netpoll_poll_disable(struct net_device *dev)
209 {
210 struct netpoll_info *ni;
211
212 might_sleep();
213 ni = rtnl_dereference(dev->npinfo);
214 if (ni)
215 down(&ni->dev_lock);
216 }
217
netpoll_poll_enable(struct net_device * dev)218 void netpoll_poll_enable(struct net_device *dev)
219 {
220 struct netpoll_info *ni;
221
222 ni = rtnl_dereference(dev->npinfo);
223 if (ni)
224 up(&ni->dev_lock);
225 }
226
refill_skbs(struct netpoll * np)227 static void refill_skbs(struct netpoll *np)
228 {
229 struct sk_buff_head *skb_pool;
230 struct sk_buff *skb;
231
232 skb_pool = &np->skb_pool;
233
234 while (READ_ONCE(skb_pool->qlen) < MAX_SKBS) {
235 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
236 if (!skb)
237 break;
238
239 skb_queue_tail(skb_pool, skb);
240 }
241 }
242
zap_completion_queue(void)243 static void zap_completion_queue(void)
244 {
245 unsigned long flags;
246 struct softnet_data *sd = &get_cpu_var(softnet_data);
247
248 if (sd->completion_queue) {
249 struct sk_buff *clist;
250
251 local_irq_save(flags);
252 clist = sd->completion_queue;
253 sd->completion_queue = NULL;
254 local_irq_restore(flags);
255
256 while (clist != NULL) {
257 struct sk_buff *skb = clist;
258 clist = clist->next;
259 if (!skb_irq_freeable(skb)) {
260 refcount_set(&skb->users, 1);
261 dev_kfree_skb_any(skb); /* put this one back */
262 } else {
263 __kfree_skb(skb);
264 }
265 }
266 }
267
268 put_cpu_var(softnet_data);
269 }
270
find_skb(struct netpoll * np,int len,int reserve)271 static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
272 {
273 int count = 0;
274 struct sk_buff *skb;
275
276 zap_completion_queue();
277 repeat:
278
279 skb = alloc_skb(len, GFP_ATOMIC);
280 if (!skb) {
281 skb = skb_dequeue(&np->skb_pool);
282 schedule_work(&np->refill_wq);
283 }
284
285 if (!skb) {
286 if (++count < 10) {
287 netpoll_poll_dev(np->dev);
288 goto repeat;
289 }
290 return NULL;
291 }
292
293 refcount_set(&skb->users, 1);
294 skb_reserve(skb, reserve);
295 return skb;
296 }
297
netpoll_owner_active(struct net_device * dev)298 static int netpoll_owner_active(struct net_device *dev)
299 {
300 struct napi_struct *napi;
301
302 list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) {
303 if (READ_ONCE(napi->poll_owner) == smp_processor_id())
304 return 1;
305 }
306 return 0;
307 }
308
309 /* call with IRQ disabled */
__netpoll_send_skb(struct netpoll * np,struct sk_buff * skb)310 static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
311 {
312 netdev_tx_t status = NETDEV_TX_BUSY;
313 netdev_tx_t ret = NET_XMIT_DROP;
314 struct net_device *dev;
315 unsigned long tries;
316 /* It is up to the caller to keep npinfo alive. */
317 struct netpoll_info *npinfo;
318
319 lockdep_assert_irqs_disabled();
320
321 dev = np->dev;
322 /* npinfo->txq belongs to np->dev, so retries must stay bound to it. */
323 skb->dev = dev;
324 rcu_read_lock();
325 npinfo = rcu_dereference_bh(dev->npinfo);
326
327 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
328 dev_kfree_skb_irq(skb);
329 goto out;
330 }
331
332 /* don't get messages out of order, and no recursion */
333 if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
334 struct netdev_queue *txq;
335
336 txq = netdev_core_pick_tx(dev, skb, NULL);
337
338 /* try until next clock tick */
339 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
340 tries > 0; --tries) {
341 if (HARD_TX_TRYLOCK(dev, txq)) {
342 if (!netif_xmit_stopped(txq))
343 status = netpoll_start_xmit(skb, dev, txq);
344
345 HARD_TX_UNLOCK(dev, txq);
346
347 if (dev_xmit_complete(status))
348 break;
349
350 }
351
352 /* tickle device maybe there is some cleanup */
353 netpoll_poll_dev(np->dev);
354
355 udelay(USEC_PER_POLL);
356 }
357
358 WARN_ONCE(!irqs_disabled(),
359 "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pS)\n",
360 dev->name, dev->netdev_ops->ndo_start_xmit);
361
362 }
363
364 if (!dev_xmit_complete(status)) {
365 skb_queue_tail(&npinfo->txq, skb);
366 schedule_delayed_work(&npinfo->tx_work,0);
367 }
368 ret = NETDEV_TX_OK;
369 out:
370 rcu_read_unlock();
371 return ret;
372 }
373
netpoll_udp_checksum(struct netpoll * np,struct sk_buff * skb,int len)374 static void netpoll_udp_checksum(struct netpoll *np, struct sk_buff *skb,
375 int len)
376 {
377 struct udphdr *udph;
378 int udp_len;
379
380 udp_len = len + sizeof(struct udphdr);
381 udph = udp_hdr(skb);
382
383 /* check needs to be set, since it will be consumed in csum_partial */
384 udph->check = 0;
385 if (np->ipv6)
386 udph->check = csum_ipv6_magic(&np->local_ip.in6,
387 &np->remote_ip.in6,
388 udp_len, IPPROTO_UDP,
389 csum_partial(udph, udp_len, 0));
390 else
391 udph->check = csum_tcpudp_magic(np->local_ip.ip,
392 np->remote_ip.ip,
393 udp_len, IPPROTO_UDP,
394 csum_partial(udph, udp_len, 0));
395 if (udph->check == 0)
396 udph->check = CSUM_MANGLED_0;
397 }
398
netpoll_send_skb(struct netpoll * np,struct sk_buff * skb)399 netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
400 {
401 unsigned long flags;
402 netdev_tx_t ret;
403
404 if (unlikely(!np)) {
405 dev_kfree_skb_irq(skb);
406 ret = NET_XMIT_DROP;
407 } else {
408 local_irq_save(flags);
409 ret = __netpoll_send_skb(np, skb);
410 local_irq_restore(flags);
411 }
412 return ret;
413 }
414 EXPORT_SYMBOL(netpoll_send_skb);
415
push_ipv6(struct netpoll * np,struct sk_buff * skb,int len)416 static void push_ipv6(struct netpoll *np, struct sk_buff *skb, int len)
417 {
418 struct ipv6hdr *ip6h;
419
420 skb_push(skb, sizeof(struct ipv6hdr));
421 skb_reset_network_header(skb);
422 ip6h = ipv6_hdr(skb);
423
424 /* ip6h->version = 6; ip6h->priority = 0; */
425 *(unsigned char *)ip6h = 0x60;
426 ip6h->flow_lbl[0] = 0;
427 ip6h->flow_lbl[1] = 0;
428 ip6h->flow_lbl[2] = 0;
429
430 ip6h->payload_len = htons(sizeof(struct udphdr) + len);
431 ip6h->nexthdr = IPPROTO_UDP;
432 ip6h->hop_limit = 32;
433 ip6h->saddr = np->local_ip.in6;
434 ip6h->daddr = np->remote_ip.in6;
435
436 skb->protocol = htons(ETH_P_IPV6);
437 }
438
push_ipv4(struct netpoll * np,struct sk_buff * skb,int len)439 static void push_ipv4(struct netpoll *np, struct sk_buff *skb, int len)
440 {
441 static atomic_t ip_ident;
442 struct iphdr *iph;
443 int ip_len;
444
445 ip_len = len + sizeof(struct udphdr) + sizeof(struct iphdr);
446
447 skb_push(skb, sizeof(struct iphdr));
448 skb_reset_network_header(skb);
449 iph = ip_hdr(skb);
450
451 /* iph->version = 4; iph->ihl = 5; */
452 *(unsigned char *)iph = 0x45;
453 iph->tos = 0;
454 put_unaligned(htons(ip_len), &iph->tot_len);
455 iph->id = htons(atomic_inc_return(&ip_ident));
456 iph->frag_off = 0;
457 iph->ttl = 64;
458 iph->protocol = IPPROTO_UDP;
459 iph->check = 0;
460 put_unaligned(np->local_ip.ip, &iph->saddr);
461 put_unaligned(np->remote_ip.ip, &iph->daddr);
462 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
463 skb->protocol = htons(ETH_P_IP);
464 }
465
push_udp(struct netpoll * np,struct sk_buff * skb,int len)466 static void push_udp(struct netpoll *np, struct sk_buff *skb, int len)
467 {
468 struct udphdr *udph;
469 int udp_len;
470
471 udp_len = len + sizeof(struct udphdr);
472
473 skb_push(skb, sizeof(struct udphdr));
474 skb_reset_transport_header(skb);
475
476 udph = udp_hdr(skb);
477 udph->source = htons(np->local_port);
478 udph->dest = htons(np->remote_port);
479 udph->len = htons(udp_len);
480
481 netpoll_udp_checksum(np, skb, len);
482 }
483
push_eth(struct netpoll * np,struct sk_buff * skb)484 static void push_eth(struct netpoll *np, struct sk_buff *skb)
485 {
486 struct ethhdr *eth;
487
488 eth = skb_push(skb, ETH_HLEN);
489 skb_reset_mac_header(skb);
490 ether_addr_copy(eth->h_source, np->dev->dev_addr);
491 ether_addr_copy(eth->h_dest, np->remote_mac);
492 if (np->ipv6)
493 eth->h_proto = htons(ETH_P_IPV6);
494 else
495 eth->h_proto = htons(ETH_P_IP);
496 }
497
netpoll_send_udp(struct netpoll * np,const char * msg,int len)498 int netpoll_send_udp(struct netpoll *np, const char *msg, int len)
499 {
500 int total_len, ip_len, udp_len;
501 struct sk_buff *skb;
502
503 if (!IS_ENABLED(CONFIG_PREEMPT_RT))
504 WARN_ON_ONCE(!irqs_disabled());
505
506 udp_len = len + sizeof(struct udphdr);
507 if (np->ipv6)
508 ip_len = udp_len + sizeof(struct ipv6hdr);
509 else
510 ip_len = udp_len + sizeof(struct iphdr);
511
512 total_len = ip_len + LL_RESERVED_SPACE(np->dev);
513
514 skb = find_skb(np, total_len + np->dev->needed_tailroom,
515 total_len - len);
516 if (!skb)
517 return -ENOMEM;
518
519 skb_copy_to_linear_data(skb, msg, len);
520 skb_put(skb, len);
521
522 push_udp(np, skb, len);
523 if (np->ipv6)
524 push_ipv6(np, skb, len);
525 else
526 push_ipv4(np, skb, len);
527 push_eth(np, skb);
528 skb->dev = np->dev;
529
530 return (int)netpoll_send_skb(np, skb);
531 }
532 EXPORT_SYMBOL(netpoll_send_udp);
533
534
skb_pool_flush(struct netpoll * np)535 static void skb_pool_flush(struct netpoll *np)
536 {
537 struct sk_buff_head *skb_pool;
538
539 cancel_work_sync(&np->refill_wq);
540 skb_pool = &np->skb_pool;
541 skb_queue_purge_reason(skb_pool, SKB_CONSUMED);
542 }
543
refill_skbs_work_handler(struct work_struct * work)544 static void refill_skbs_work_handler(struct work_struct *work)
545 {
546 struct netpoll *np =
547 container_of(work, struct netpoll, refill_wq);
548
549 refill_skbs(np);
550 }
551
__netpoll_setup(struct netpoll * np,struct net_device * ndev)552 int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
553 {
554 struct netpoll_info *npinfo;
555 const struct net_device_ops *ops;
556 int err;
557
558 skb_queue_head_init(&np->skb_pool);
559 INIT_WORK(&np->refill_wq, refill_skbs_work_handler);
560
561 if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
562 np_err(np, "%s doesn't support polling, aborting\n",
563 ndev->name);
564 err = -ENOTSUPP;
565 goto out;
566 }
567
568 npinfo = rtnl_dereference(ndev->npinfo);
569 if (!npinfo) {
570 npinfo = kmalloc_obj(*npinfo);
571 if (!npinfo) {
572 err = -ENOMEM;
573 goto out;
574 }
575
576 sema_init(&npinfo->dev_lock, 1);
577 skb_queue_head_init(&npinfo->txq);
578 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
579
580 refcount_set(&npinfo->refcnt, 1);
581
582 ops = ndev->netdev_ops;
583 if (ops->ndo_netpoll_setup) {
584 err = ops->ndo_netpoll_setup(ndev);
585 if (err)
586 goto free_npinfo;
587 }
588 } else {
589 refcount_inc(&npinfo->refcnt);
590 }
591
592 np->dev = ndev;
593 strscpy(np->dev_name, ndev->name, IFNAMSIZ);
594
595 /* fill up the skb queue */
596 refill_skbs(np);
597
598 /* last thing to do is link it to the net device structure */
599 rcu_assign_pointer(ndev->npinfo, npinfo);
600
601 return 0;
602
603 free_npinfo:
604 kfree(npinfo);
605 out:
606 return err;
607 }
608 EXPORT_SYMBOL_GPL(__netpoll_setup);
609
610 /*
611 * Returns a pointer to a string representation of the identifier used
612 * to select the egress interface for the given netpoll instance. buf
613 * is used to format np->dev_mac when np->dev_name is empty; bufsz must
614 * be at least MAC_ADDR_STR_LEN + 1 to fit the formatted MAC address
615 * and its NUL terminator.
616 */
egress_dev(struct netpoll * np,char * buf,size_t bufsz)617 static char *egress_dev(struct netpoll *np, char *buf, size_t bufsz)
618 {
619 if (np->dev_name[0])
620 return np->dev_name;
621
622 snprintf(buf, bufsz, "%pM", np->dev_mac);
623 return buf;
624 }
625
netpoll_wait_carrier(struct netpoll * np,struct net_device * ndev,unsigned int timeout)626 static void netpoll_wait_carrier(struct netpoll *np, struct net_device *ndev,
627 unsigned int timeout)
628 {
629 unsigned long atmost;
630
631 atmost = jiffies + timeout * HZ;
632 while (!netif_carrier_ok(ndev)) {
633 if (time_after(jiffies, atmost)) {
634 np_notice(np, "timeout waiting for carrier\n");
635 break;
636 }
637 msleep(1);
638 }
639 }
640
641 /*
642 * Take the IPv6 from ndev and populate local_ip structure in netpoll
643 */
netpoll_take_ipv6(struct netpoll * np,struct net_device * ndev)644 static int netpoll_take_ipv6(struct netpoll *np, struct net_device *ndev)
645 {
646 char buf[MAC_ADDR_STR_LEN + 1];
647 int err = -EDESTADDRREQ;
648 struct inet6_dev *idev;
649
650 if (!IS_ENABLED(CONFIG_IPV6)) {
651 np_err(np, "IPv6 is not supported %s, aborting\n",
652 egress_dev(np, buf, sizeof(buf)));
653 return -EINVAL;
654 }
655
656 idev = __in6_dev_get(ndev);
657 if (idev) {
658 struct inet6_ifaddr *ifp;
659
660 read_lock_bh(&idev->lock);
661 list_for_each_entry(ifp, &idev->addr_list, if_list) {
662 if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
663 !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
664 continue;
665 /* Got the IP, let's return */
666 np->local_ip.in6 = ifp->addr;
667 err = 0;
668 break;
669 }
670 read_unlock_bh(&idev->lock);
671 }
672 if (err) {
673 np_err(np, "no IPv6 address for %s, aborting\n",
674 egress_dev(np, buf, sizeof(buf)));
675 return err;
676 }
677
678 np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
679 return 0;
680 }
681
682 /*
683 * Take the IPv4 from ndev and populate local_ip structure in netpoll
684 */
netpoll_take_ipv4(struct netpoll * np,struct net_device * ndev)685 static int netpoll_take_ipv4(struct netpoll *np, struct net_device *ndev)
686 {
687 char buf[MAC_ADDR_STR_LEN + 1];
688 const struct in_ifaddr *ifa;
689 struct in_device *in_dev;
690
691 in_dev = __in_dev_get_rtnl(ndev);
692 if (!in_dev) {
693 np_err(np, "no IP address for %s, aborting\n",
694 egress_dev(np, buf, sizeof(buf)));
695 return -EDESTADDRREQ;
696 }
697
698 ifa = rtnl_dereference(in_dev->ifa_list);
699 if (!ifa) {
700 np_err(np, "no IP address for %s, aborting\n",
701 egress_dev(np, buf, sizeof(buf)));
702 return -EDESTADDRREQ;
703 }
704
705 np->local_ip.ip = ifa->ifa_local;
706 np_info(np, "local IP %pI4\n", &np->local_ip.ip);
707
708 return 0;
709 }
710
711 /*
712 * Test whether the caller left np->local_ip unset, so that
713 * netpoll_setup() should auto-populate it from the egress device.
714 *
715 * np->local_ip is a union of __be32 (IPv4) and struct in6_addr (IPv6),
716 * so an IPv6 address whose first 4 bytes are zero (e.g. ::1, ::2,
717 * IPv4-mapped ::ffff:a.b.c.d) must not be tested via the IPv4 arm —
718 * doing so would misclassify a caller-supplied address as unset and
719 * silently overwrite it with whatever address the device exposes.
720 */
netpoll_local_ip_unset(const struct netpoll * np)721 static bool netpoll_local_ip_unset(const struct netpoll *np)
722 {
723 if (np->ipv6)
724 return ipv6_addr_any(&np->local_ip.in6);
725 return !np->local_ip.ip;
726 }
727
netpoll_setup(struct netpoll * np)728 int netpoll_setup(struct netpoll *np)
729 {
730 struct net *net = current->nsproxy->net_ns;
731 char buf[MAC_ADDR_STR_LEN + 1];
732 struct net_device *ndev = NULL;
733 bool ip_overwritten = false;
734 int err;
735
736 rtnl_lock();
737 if (np->dev_name[0])
738 ndev = __dev_get_by_name(net, np->dev_name);
739 else if (is_valid_ether_addr(np->dev_mac))
740 ndev = dev_getbyhwaddr(net, ARPHRD_ETHER, np->dev_mac);
741
742 if (!ndev) {
743 np_err(np, "%s doesn't exist, aborting\n",
744 egress_dev(np, buf, sizeof(buf)));
745 err = -ENODEV;
746 goto unlock;
747 }
748 netdev_hold(ndev, &np->dev_tracker, GFP_KERNEL);
749
750 if (netdev_master_upper_dev_get(ndev)) {
751 np_err(np, "%s is a slave device, aborting\n",
752 egress_dev(np, buf, sizeof(buf)));
753 err = -EBUSY;
754 goto put;
755 }
756
757 if (!netif_running(ndev)) {
758 np_info(np, "device %s not up yet, forcing it\n",
759 egress_dev(np, buf, sizeof(buf)));
760
761 err = dev_open(ndev, NULL);
762 if (err) {
763 np_err(np, "failed to open %s\n", ndev->name);
764 goto put;
765 }
766
767 rtnl_unlock();
768 netpoll_wait_carrier(np, ndev, carrier_timeout);
769 rtnl_lock();
770 }
771
772 if (netpoll_local_ip_unset(np)) {
773 if (!np->ipv6) {
774 err = netpoll_take_ipv4(np, ndev);
775 if (err)
776 goto put;
777 } else {
778 err = netpoll_take_ipv6(np, ndev);
779 if (err)
780 goto put;
781 }
782 ip_overwritten = true;
783 }
784
785 err = __netpoll_setup(np, ndev);
786 if (err)
787 goto flush;
788 rtnl_unlock();
789
790 /* Make sure all NAPI polls which started before dev->npinfo
791 * was visible have exited before we start calling NAPI poll.
792 * NAPI skips locking if dev->npinfo is NULL.
793 */
794 synchronize_rcu();
795
796 return 0;
797
798 flush:
799 skb_pool_flush(np);
800 put:
801 DEBUG_NET_WARN_ON_ONCE(np->dev);
802 if (ip_overwritten)
803 memset(&np->local_ip, 0, sizeof(np->local_ip));
804 netdev_put(ndev, &np->dev_tracker);
805 unlock:
806 rtnl_unlock();
807 return err;
808 }
809 EXPORT_SYMBOL(netpoll_setup);
810
rcu_cleanup_netpoll_info(struct rcu_head * rcu_head)811 static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
812 {
813 struct netpoll_info *npinfo =
814 container_of(rcu_head, struct netpoll_info, rcu);
815
816 skb_queue_purge(&npinfo->txq);
817
818 /* we can't call cancel_delayed_work_sync here, as we are in softirq */
819 cancel_delayed_work(&npinfo->tx_work);
820
821 /* clean after last, unfinished work */
822 __skb_queue_purge(&npinfo->txq);
823 /* now cancel it again */
824 cancel_delayed_work(&npinfo->tx_work);
825 kfree(npinfo);
826 }
827
__netpoll_cleanup(struct netpoll * np)828 static void __netpoll_cleanup(struct netpoll *np)
829 {
830 struct netpoll_info *npinfo;
831
832 npinfo = rtnl_dereference(np->dev->npinfo);
833 if (!npinfo)
834 return;
835
836 /* At this point, there is a single npinfo instance per netdevice, and
837 * its refcnt tracks how many netpoll structures are linked to it. We
838 * only perform npinfo cleanup when the refcnt decrements to zero.
839 */
840 if (refcount_dec_and_test(&npinfo->refcnt)) {
841 const struct net_device_ops *ops;
842
843 ops = np->dev->netdev_ops;
844 if (ops->ndo_netpoll_cleanup)
845 ops->ndo_netpoll_cleanup(np->dev);
846
847 RCU_INIT_POINTER(np->dev->npinfo, NULL);
848 call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info);
849 }
850
851 skb_pool_flush(np);
852 }
853
__netpoll_free(struct netpoll * np)854 void __netpoll_free(struct netpoll *np)
855 {
856 ASSERT_RTNL();
857
858 /* Wait for transmitting packets to finish before freeing. */
859 synchronize_net();
860 __netpoll_cleanup(np);
861 kfree(np);
862 }
863 EXPORT_SYMBOL_GPL(__netpoll_free);
864
do_netpoll_cleanup(struct netpoll * np)865 void do_netpoll_cleanup(struct netpoll *np)
866 {
867 __netpoll_cleanup(np);
868 netdev_put(np->dev, &np->dev_tracker);
869 np->dev = NULL;
870 }
871 EXPORT_SYMBOL(do_netpoll_cleanup);
872
netpoll_cleanup(struct netpoll * np)873 void netpoll_cleanup(struct netpoll *np)
874 {
875 rtnl_lock();
876 if (!np->dev)
877 goto out;
878 do_netpoll_cleanup(np);
879 out:
880 rtnl_unlock();
881 }
882 EXPORT_SYMBOL(netpoll_cleanup);
883