xref: /linux/net/core/neighbour.c (revision 1bf20cc62a54f95db32529871534751fb6b1b73c)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Generic address resolution entity
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
8  *
9  *	Fixes:
10  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
11  *	Harald Welte		Add neighbour cache statistics like rtstat
12  */
13 
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 
16 #include <linux/slab.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/socket.h>
21 #include <linux/netdevice.h>
22 #include <linux/proc_fs.h>
23 #ifdef CONFIG_SYSCTL
24 #include <linux/sysctl.h>
25 #endif
26 #include <linux/times.h>
27 #include <net/net_namespace.h>
28 #include <net/neighbour.h>
29 #include <net/arp.h>
30 #include <net/dst.h>
31 #include <net/ip.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
41 
42 #include <trace/events/neigh.h>
43 
44 #define NEIGH_DEBUG 1
45 #define neigh_dbg(level, fmt, ...)		\
46 do {						\
47 	if (level <= NEIGH_DEBUG)		\
48 		pr_debug(fmt, ##__VA_ARGS__);	\
49 } while (0)
50 
51 #define PNEIGH_HASHMASK		0xF
52 
53 static void neigh_timer_handler(struct timer_list *t);
54 static void neigh_notify(struct neighbour *n, int type, int flags, u32 pid);
55 static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid);
56 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
57 			  bool skip_perm);
58 
59 #ifdef CONFIG_PROC_FS
60 static const struct seq_operations neigh_stat_seq_ops;
61 #endif
62 
63 static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
64 {
65 	int i;
66 
67 	switch (family) {
68 	default:
69 		DEBUG_NET_WARN_ON_ONCE(1);
70 		fallthrough; /* to avoid panic by null-ptr-deref */
71 	case AF_INET:
72 		i = NEIGH_ARP_TABLE;
73 		break;
74 	case AF_INET6:
75 		i = NEIGH_ND_TABLE;
76 		break;
77 	}
78 
79 	return &dev->neighbours[i];
80 }
81 
82 /*
83    Neighbour hash table buckets are protected with tbl->lock.
84 
85    - All the scans/updates to hash buckets MUST be made under this lock.
86    - NOTHING clever should be made under this lock: no callbacks
87      to protocol backends, no attempts to send something to network.
88      It will result in deadlocks, if backend/driver wants to use neighbour
89      cache.
90    - If the entry requires some non-trivial actions, increase
91      its reference count and release table lock.
92 
93    Neighbour entries are protected:
94    - with reference count.
95    - with rwlock neigh->lock
96 
97    Reference count prevents destruction.
98 
99    neigh->lock mainly serializes ll address data and its validity state.
100    However, the same lock is used to protect another entry fields:
101     - timer
102     - resolution queue
103 
104    Again, nothing clever shall be made under neigh->lock,
105    the most complicated procedure, which we allow is dev->hard_header.
106    It is supposed, that dev->hard_header is simplistic and does
107    not make callbacks to neighbour tables.
108  */
109 
110 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
111 {
112 	kfree_skb(skb);
113 	return -ENETDOWN;
114 }
115 
116 static void neigh_cleanup_and_release(struct neighbour *neigh)
117 {
118 	trace_neigh_cleanup_and_release(neigh, 0);
119 	neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
120 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
121 	neigh_release(neigh);
122 }
123 
124 /*
125  * It is random distribution in the interval (1/2)*base...(3/2)*base.
126  * It corresponds to default IPv6 settings and is not overridable,
127  * because it is really reasonable choice.
128  */
129 
130 unsigned long neigh_rand_reach_time(unsigned long base)
131 {
132 	return base ? get_random_u32_below(base) + (base >> 1) : 0;
133 }
134 EXPORT_SYMBOL(neigh_rand_reach_time);
135 
136 static void neigh_mark_dead(struct neighbour *n)
137 {
138 	n->dead = 1;
139 	if (!list_empty(&n->gc_list)) {
140 		list_del_init(&n->gc_list);
141 		atomic_dec(&n->tbl->gc_entries);
142 	}
143 	if (!list_empty(&n->managed_list))
144 		list_del_init(&n->managed_list);
145 }
146 
147 static void neigh_update_gc_list(struct neighbour *n)
148 {
149 	bool on_gc_list, exempt_from_gc;
150 
151 	spin_lock_bh(&n->tbl->lock);
152 	write_lock(&n->lock);
153 	if (n->dead)
154 		goto out;
155 
156 	/* remove from the gc list if new state is permanent or if neighbor is
157 	 * externally learned / validated; otherwise entry should be on the gc
158 	 * list
159 	 */
160 	exempt_from_gc = n->nud_state & NUD_PERMANENT ||
161 			 n->flags & (NTF_EXT_LEARNED | NTF_EXT_VALIDATED);
162 	on_gc_list = !list_empty(&n->gc_list);
163 
164 	if (exempt_from_gc && on_gc_list) {
165 		list_del_init(&n->gc_list);
166 		atomic_dec(&n->tbl->gc_entries);
167 	} else if (!exempt_from_gc && !on_gc_list) {
168 		/* add entries to the tail; cleaning removes from the front */
169 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
170 		atomic_inc(&n->tbl->gc_entries);
171 	}
172 out:
173 	write_unlock(&n->lock);
174 	spin_unlock_bh(&n->tbl->lock);
175 }
176 
177 static void neigh_update_managed_list(struct neighbour *n)
178 {
179 	bool on_managed_list, add_to_managed;
180 
181 	spin_lock_bh(&n->tbl->lock);
182 	write_lock(&n->lock);
183 	if (n->dead)
184 		goto out;
185 
186 	add_to_managed = n->flags & NTF_MANAGED;
187 	on_managed_list = !list_empty(&n->managed_list);
188 
189 	if (!add_to_managed && on_managed_list)
190 		list_del_init(&n->managed_list);
191 	else if (add_to_managed && !on_managed_list)
192 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
193 out:
194 	write_unlock(&n->lock);
195 	spin_unlock_bh(&n->tbl->lock);
196 }
197 
198 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
199 			       bool *gc_update, bool *managed_update)
200 {
201 	u32 ndm_flags, old_flags = neigh->flags;
202 
203 	if (!(flags & NEIGH_UPDATE_F_ADMIN))
204 		return;
205 
206 	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
207 	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
208 	ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_VALIDATED) ? NTF_EXT_VALIDATED : 0;
209 
210 	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
211 		if (ndm_flags & NTF_EXT_LEARNED)
212 			neigh->flags |= NTF_EXT_LEARNED;
213 		else
214 			neigh->flags &= ~NTF_EXT_LEARNED;
215 		*notify = 1;
216 		*gc_update = true;
217 	}
218 	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
219 		if (ndm_flags & NTF_MANAGED)
220 			neigh->flags |= NTF_MANAGED;
221 		else
222 			neigh->flags &= ~NTF_MANAGED;
223 		*notify = 1;
224 		*managed_update = true;
225 	}
226 	if ((old_flags ^ ndm_flags) & NTF_EXT_VALIDATED) {
227 		if (ndm_flags & NTF_EXT_VALIDATED)
228 			neigh->flags |= NTF_EXT_VALIDATED;
229 		else
230 			neigh->flags &= ~NTF_EXT_VALIDATED;
231 		*notify = 1;
232 		*gc_update = true;
233 	}
234 }
235 
236 bool neigh_remove_one(struct neighbour *n)
237 {
238 	bool retval = false;
239 
240 	write_lock(&n->lock);
241 	if (refcount_read(&n->refcnt) == 1) {
242 		hlist_del_rcu(&n->hash);
243 		hlist_del_rcu(&n->dev_list);
244 		neigh_mark_dead(n);
245 		retval = true;
246 	}
247 	write_unlock(&n->lock);
248 	if (retval)
249 		neigh_cleanup_and_release(n);
250 	return retval;
251 }
252 
253 static int neigh_forced_gc(struct neigh_table *tbl)
254 {
255 	int max_clean = atomic_read(&tbl->gc_entries) -
256 			READ_ONCE(tbl->gc_thresh2);
257 	u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
258 	unsigned long tref = jiffies - 5 * HZ;
259 	struct neighbour *n, *tmp;
260 	int shrunk = 0;
261 	int loop = 0;
262 
263 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
264 
265 	spin_lock_bh(&tbl->lock);
266 
267 	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
268 		if (refcount_read(&n->refcnt) == 1) {
269 			bool remove = false;
270 
271 			write_lock(&n->lock);
272 			if ((n->nud_state == NUD_FAILED) ||
273 			    (n->nud_state == NUD_NOARP) ||
274 			    (tbl->is_multicast &&
275 			     tbl->is_multicast(n->primary_key)) ||
276 			    !time_in_range(n->updated, tref, jiffies))
277 				remove = true;
278 			write_unlock(&n->lock);
279 
280 			if (remove && neigh_remove_one(n))
281 				shrunk++;
282 			if (shrunk >= max_clean)
283 				break;
284 			if (++loop == 16) {
285 				if (ktime_get_ns() > tmax)
286 					goto unlock;
287 				loop = 0;
288 			}
289 		}
290 	}
291 
292 	WRITE_ONCE(tbl->last_flush, jiffies);
293 unlock:
294 	spin_unlock_bh(&tbl->lock);
295 
296 	return shrunk;
297 }
298 
299 static void neigh_add_timer(struct neighbour *n, unsigned long when)
300 {
301 	/* Use safe distance from the jiffies - LONG_MAX point while timer
302 	 * is running in DELAY/PROBE state but still show to user space
303 	 * large times in the past.
304 	 */
305 	unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
306 
307 	neigh_hold(n);
308 	if (!time_in_range(n->confirmed, mint, jiffies))
309 		n->confirmed = mint;
310 	if (time_before(n->used, n->confirmed))
311 		n->used = n->confirmed;
312 	if (unlikely(mod_timer(&n->timer, when))) {
313 		printk("NEIGH: BUG, double timer add, state is %x\n",
314 		       n->nud_state);
315 		dump_stack();
316 	}
317 }
318 
319 static int neigh_del_timer(struct neighbour *n)
320 {
321 	if ((n->nud_state & NUD_IN_TIMER) &&
322 	    timer_delete(&n->timer)) {
323 		neigh_release(n);
324 		return 1;
325 	}
326 	return 0;
327 }
328 
329 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
330 						   int family)
331 {
332 	switch (family) {
333 	case AF_INET:
334 		return __in_dev_arp_parms_get_rcu(dev);
335 	case AF_INET6:
336 		return __in6_dev_nd_parms_get_rcu(dev);
337 	}
338 	return NULL;
339 }
340 
341 static void neigh_parms_qlen_dec(struct net_device *dev, int family)
342 {
343 	struct neigh_parms *p;
344 
345 	rcu_read_lock();
346 	p = neigh_get_dev_parms_rcu(dev, family);
347 	if (p)
348 		p->qlen--;
349 	rcu_read_unlock();
350 }
351 
352 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
353 			       int family)
354 {
355 	struct sk_buff_head tmp;
356 	unsigned long flags;
357 	struct sk_buff *skb;
358 
359 	skb_queue_head_init(&tmp);
360 	spin_lock_irqsave(&list->lock, flags);
361 	skb = skb_peek(list);
362 	while (skb != NULL) {
363 		struct sk_buff *skb_next = skb_peek_next(skb, list);
364 		struct net_device *dev = skb->dev;
365 
366 		if (net == NULL || net_eq(dev_net(dev), net)) {
367 			neigh_parms_qlen_dec(dev, family);
368 			__skb_unlink(skb, list);
369 			__skb_queue_tail(&tmp, skb);
370 		}
371 		skb = skb_next;
372 	}
373 	spin_unlock_irqrestore(&list->lock, flags);
374 
375 	while ((skb = __skb_dequeue(&tmp))) {
376 		dev_put(skb->dev);
377 		kfree_skb(skb);
378 	}
379 }
380 
381 static void neigh_flush_one(struct neighbour *n)
382 {
383 	hlist_del_rcu(&n->hash);
384 	hlist_del_rcu(&n->dev_list);
385 
386 	write_lock(&n->lock);
387 
388 	neigh_del_timer(n);
389 	neigh_mark_dead(n);
390 
391 	if (refcount_read(&n->refcnt) != 1) {
392 		/* The most unpleasant situation.
393 		 * We must destroy neighbour entry,
394 		 * but someone still uses it.
395 		 *
396 		 * The destroy will be delayed until
397 		 * the last user releases us, but
398 		 * we must kill timers etc. and move
399 		 * it to safe state.
400 		 */
401 		__skb_queue_purge(&n->arp_queue);
402 		n->arp_queue_len_bytes = 0;
403 		WRITE_ONCE(n->output, neigh_blackhole);
404 
405 		if (n->nud_state & NUD_VALID)
406 			n->nud_state = NUD_NOARP;
407 		else
408 			n->nud_state = NUD_NONE;
409 
410 		neigh_dbg(2, "neigh %p is stray\n", n);
411 	}
412 
413 	write_unlock(&n->lock);
414 
415 	neigh_cleanup_and_release(n);
416 }
417 
418 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
419 			    bool skip_perm)
420 {
421 	struct hlist_head *dev_head;
422 	struct hlist_node *tmp;
423 	struct neighbour *n;
424 
425 	dev_head = neigh_get_dev_table(dev, tbl->family);
426 
427 	hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
428 		if (skip_perm &&
429 		    (n->nud_state & NUD_PERMANENT ||
430 		     n->flags & NTF_EXT_VALIDATED))
431 			continue;
432 
433 		neigh_flush_one(n);
434 	}
435 }
436 
437 static void neigh_flush_table(struct neigh_table *tbl)
438 {
439 	struct neigh_hash_table *nht;
440 	int i;
441 
442 	nht = rcu_dereference_protected(tbl->nht,
443 					lockdep_is_held(&tbl->lock));
444 
445 	for (i = 0; i < (1 << nht->hash_shift); i++) {
446 		struct hlist_node *tmp;
447 		struct neighbour *n;
448 
449 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i])
450 			neigh_flush_one(n);
451 	}
452 }
453 
454 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
455 {
456 	spin_lock_bh(&tbl->lock);
457 	neigh_flush_dev(tbl, dev, false);
458 	spin_unlock_bh(&tbl->lock);
459 }
460 
461 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
462 			  bool skip_perm)
463 {
464 	spin_lock_bh(&tbl->lock);
465 	if (likely(dev)) {
466 		neigh_flush_dev(tbl, dev, skip_perm);
467 	} else {
468 		DEBUG_NET_WARN_ON_ONCE(skip_perm);
469 		neigh_flush_table(tbl);
470 	}
471 	spin_unlock_bh(&tbl->lock);
472 
473 	pneigh_ifdown(tbl, dev, skip_perm);
474 	pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
475 			   tbl->family);
476 	if (skb_queue_empty_lockless(&tbl->proxy_queue))
477 		timer_delete_sync(&tbl->proxy_timer);
478 	return 0;
479 }
480 
481 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
482 {
483 	__neigh_ifdown(tbl, dev, true);
484 	return 0;
485 }
486 
487 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
488 {
489 	__neigh_ifdown(tbl, dev, false);
490 	return 0;
491 }
492 
493 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
494 				     struct net_device *dev,
495 				     u32 flags, bool exempt_from_gc)
496 {
497 	struct neighbour *n = NULL;
498 	unsigned long now = jiffies;
499 	int entries, gc_thresh3;
500 
501 	if (exempt_from_gc)
502 		goto do_alloc;
503 
504 	entries = atomic_inc_return(&tbl->gc_entries) - 1;
505 	gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
506 	if (entries >= gc_thresh3 ||
507 	    (entries >= READ_ONCE(tbl->gc_thresh2) &&
508 	     time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
509 		if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
510 			net_info_ratelimited("%s: neighbor table overflow!\n",
511 					     tbl->id);
512 			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
513 			goto out_entries;
514 		}
515 	}
516 
517 do_alloc:
518 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
519 	if (!n)
520 		goto out_entries;
521 
522 	__skb_queue_head_init(&n->arp_queue);
523 	rwlock_init(&n->lock);
524 	seqlock_init(&n->ha_lock);
525 	n->updated	  = n->used = now;
526 	n->nud_state	  = NUD_NONE;
527 	n->output	  = neigh_blackhole;
528 	n->flags	  = flags;
529 	seqlock_init(&n->hh.hh_lock);
530 	n->parms	  = neigh_parms_clone(&tbl->parms);
531 	timer_setup(&n->timer, neigh_timer_handler, 0);
532 
533 	NEIGH_CACHE_STAT_INC(tbl, allocs);
534 	n->tbl		  = tbl;
535 	refcount_set(&n->refcnt, 1);
536 	n->dead		  = 1;
537 	INIT_LIST_HEAD(&n->gc_list);
538 	INIT_LIST_HEAD(&n->managed_list);
539 
540 	atomic_inc(&tbl->entries);
541 out:
542 	return n;
543 
544 out_entries:
545 	if (!exempt_from_gc)
546 		atomic_dec(&tbl->gc_entries);
547 	goto out;
548 }
549 
550 static void neigh_get_hash_rnd(u32 *x)
551 {
552 	*x = get_random_u32() | 1;
553 }
554 
555 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
556 {
557 	size_t size = (1 << shift) * sizeof(struct hlist_head);
558 	struct hlist_head *hash_heads;
559 	struct neigh_hash_table *ret;
560 	int i;
561 
562 	ret = kmalloc_obj(*ret, GFP_ATOMIC);
563 	if (!ret)
564 		return NULL;
565 
566 	hash_heads = kzalloc(size, GFP_ATOMIC);
567 	if (!hash_heads) {
568 		kfree(ret);
569 		return NULL;
570 	}
571 	ret->hash_heads = hash_heads;
572 	ret->hash_shift = shift;
573 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
574 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
575 	return ret;
576 }
577 
578 static void neigh_hash_free_rcu(struct rcu_head *head)
579 {
580 	struct neigh_hash_table *nht = container_of(head,
581 						    struct neigh_hash_table,
582 						    rcu);
583 
584 	kfree(nht->hash_heads);
585 	kfree(nht);
586 }
587 
588 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
589 						unsigned long new_shift)
590 {
591 	unsigned int i, hash;
592 	struct neigh_hash_table *new_nht, *old_nht;
593 
594 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
595 
596 	old_nht = rcu_dereference_protected(tbl->nht,
597 					    lockdep_is_held(&tbl->lock));
598 	new_nht = neigh_hash_alloc(new_shift);
599 	if (!new_nht)
600 		return old_nht;
601 
602 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
603 		struct hlist_node *tmp;
604 		struct neighbour *n;
605 
606 		neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
607 			hash = tbl->hash(n->primary_key, n->dev,
608 					 new_nht->hash_rnd);
609 
610 			hash >>= (32 - new_nht->hash_shift);
611 
612 			hlist_del_rcu(&n->hash);
613 			hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
614 		}
615 	}
616 
617 	rcu_assign_pointer(tbl->nht, new_nht);
618 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
619 	return new_nht;
620 }
621 
622 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
623 			       struct net_device *dev)
624 {
625 	struct neighbour *n;
626 
627 	NEIGH_CACHE_STAT_INC(tbl, lookups);
628 
629 	rcu_read_lock();
630 	n = __neigh_lookup_noref(tbl, pkey, dev);
631 	if (n) {
632 		if (!refcount_inc_not_zero(&n->refcnt))
633 			n = NULL;
634 		NEIGH_CACHE_STAT_INC(tbl, hits);
635 	}
636 
637 	rcu_read_unlock();
638 	return n;
639 }
640 EXPORT_SYMBOL(neigh_lookup);
641 
642 static struct neighbour *
643 ___neigh_create(struct neigh_table *tbl, const void *pkey,
644 		struct net_device *dev, u32 flags,
645 		bool exempt_from_gc, bool want_ref)
646 {
647 	u32 hash_val, key_len = tbl->key_len;
648 	struct neighbour *n1, *rc, *n;
649 	struct neigh_hash_table *nht;
650 	int error;
651 
652 	n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
653 	trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
654 	if (!n) {
655 		rc = ERR_PTR(-ENOBUFS);
656 		goto out;
657 	}
658 
659 	memcpy(n->primary_key, pkey, key_len);
660 	n->dev = dev;
661 	netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
662 
663 	/* Protocol specific setup. */
664 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
665 		rc = ERR_PTR(error);
666 		goto out_neigh_release;
667 	}
668 
669 	if (dev->netdev_ops->ndo_neigh_construct) {
670 		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
671 		if (error < 0) {
672 			rc = ERR_PTR(error);
673 			goto out_neigh_release;
674 		}
675 	}
676 
677 	/* Device specific setup. */
678 	if (n->parms->neigh_setup &&
679 	    (error = n->parms->neigh_setup(n)) < 0) {
680 		rc = ERR_PTR(error);
681 		goto out_neigh_release;
682 	}
683 
684 	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
685 
686 	spin_lock_bh(&tbl->lock);
687 	nht = rcu_dereference_protected(tbl->nht,
688 					lockdep_is_held(&tbl->lock));
689 
690 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
691 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
692 
693 	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
694 
695 	if (n->parms->dead) {
696 		rc = ERR_PTR(-EINVAL);
697 		goto out_tbl_unlock;
698 	}
699 
700 	neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
701 		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
702 			if (want_ref)
703 				neigh_hold(n1);
704 			rc = n1;
705 			goto out_tbl_unlock;
706 		}
707 	}
708 
709 	n->dead = 0;
710 	if (!exempt_from_gc)
711 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
712 	if (n->flags & NTF_MANAGED)
713 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
714 	if (want_ref)
715 		neigh_hold(n);
716 	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
717 
718 	hlist_add_head_rcu(&n->dev_list,
719 			   neigh_get_dev_table(dev, tbl->family));
720 
721 	spin_unlock_bh(&tbl->lock);
722 	neigh_dbg(2, "neigh %p is created\n", n);
723 	rc = n;
724 out:
725 	return rc;
726 out_tbl_unlock:
727 	spin_unlock_bh(&tbl->lock);
728 out_neigh_release:
729 	if (!exempt_from_gc)
730 		atomic_dec(&tbl->gc_entries);
731 	neigh_release(n);
732 	goto out;
733 }
734 
735 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
736 				 struct net_device *dev, bool want_ref)
737 {
738 	bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
739 
740 	return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
741 }
742 EXPORT_SYMBOL(__neigh_create);
743 
744 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
745 {
746 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
747 	hash_val ^= (hash_val >> 16);
748 	hash_val ^= hash_val >> 8;
749 	hash_val ^= hash_val >> 4;
750 	hash_val &= PNEIGH_HASHMASK;
751 	return hash_val;
752 }
753 
754 struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
755 				   struct net *net, const void *pkey,
756 				   struct net_device *dev)
757 {
758 	struct pneigh_entry *n;
759 	unsigned int key_len;
760 	u32 hash_val;
761 
762 	key_len = tbl->key_len;
763 	hash_val = pneigh_hash(pkey, key_len);
764 	n = rcu_dereference_check(tbl->phash_buckets[hash_val],
765 				  lockdep_is_held(&tbl->phash_lock));
766 
767 	while (n) {
768 		if (!memcmp(n->key, pkey, key_len) &&
769 		    net_eq(pneigh_net(n), net) &&
770 		    (n->dev == dev || !n->dev))
771 			return n;
772 
773 		n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
774 	}
775 
776 	return NULL;
777 }
778 
779 int pneigh_create(struct neigh_table *tbl, struct net *net,
780 		  const void *pkey, struct net_device *dev,
781 		  u32 flags, u8 protocol, bool permanent)
782 {
783 	struct pneigh_entry *n;
784 	unsigned int key_len;
785 	u32 hash_val;
786 	int err = 0;
787 
788 	mutex_lock(&tbl->phash_lock);
789 
790 	n = pneigh_lookup(tbl, net, pkey, dev);
791 	if (n)
792 		goto update;
793 
794 	key_len = tbl->key_len;
795 	n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
796 	if (!n) {
797 		err = -ENOBUFS;
798 		goto out;
799 	}
800 
801 	write_pnet(&n->net, net);
802 	memcpy(n->key, pkey, key_len);
803 	n->dev = dev;
804 	netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
805 
806 	if (tbl->pconstructor && tbl->pconstructor(n)) {
807 		netdev_put(dev, &n->dev_tracker);
808 		kfree(n);
809 		err = -ENOBUFS;
810 		goto out;
811 	}
812 
813 	hash_val = pneigh_hash(pkey, key_len);
814 	n->next = tbl->phash_buckets[hash_val];
815 	rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
816 update:
817 	WRITE_ONCE(n->flags, flags);
818 	n->permanent = permanent;
819 	if (protocol)
820 		WRITE_ONCE(n->protocol, protocol);
821 out:
822 	mutex_unlock(&tbl->phash_lock);
823 	return err;
824 }
825 
826 static void pneigh_destroy(struct rcu_head *rcu)
827 {
828 	struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
829 
830 	netdev_put(n->dev, &n->dev_tracker);
831 	kfree(n);
832 }
833 
834 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
835 		  struct net_device *dev)
836 {
837 	struct pneigh_entry *n, __rcu **np;
838 	unsigned int key_len;
839 	u32 hash_val;
840 
841 	key_len = tbl->key_len;
842 	hash_val = pneigh_hash(pkey, key_len);
843 
844 	mutex_lock(&tbl->phash_lock);
845 
846 	for (np = &tbl->phash_buckets[hash_val];
847 	     (n = rcu_dereference_protected(*np, 1)) != NULL;
848 	     np = &n->next) {
849 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
850 		    net_eq(pneigh_net(n), net)) {
851 			rcu_assign_pointer(*np, n->next);
852 
853 			mutex_unlock(&tbl->phash_lock);
854 
855 			if (tbl->pdestructor)
856 				tbl->pdestructor(n);
857 
858 			call_rcu(&n->rcu, pneigh_destroy);
859 			return 0;
860 		}
861 	}
862 
863 	mutex_unlock(&tbl->phash_lock);
864 	return -ENOENT;
865 }
866 
867 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
868 			  bool skip_perm)
869 {
870 	struct pneigh_entry *n, __rcu **np;
871 	LIST_HEAD(head);
872 	u32 h;
873 
874 	mutex_lock(&tbl->phash_lock);
875 
876 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
877 		np = &tbl->phash_buckets[h];
878 		while ((n = rcu_dereference_protected(*np, 1)) != NULL) {
879 			if (skip_perm && n->permanent)
880 				goto skip;
881 			if (!dev || n->dev == dev) {
882 				rcu_assign_pointer(*np, n->next);
883 				list_add(&n->free_node, &head);
884 				continue;
885 			}
886 skip:
887 			np = &n->next;
888 		}
889 	}
890 
891 	mutex_unlock(&tbl->phash_lock);
892 
893 	while (!list_empty(&head)) {
894 		n = list_first_entry(&head, typeof(*n), free_node);
895 		list_del(&n->free_node);
896 
897 		if (tbl->pdestructor)
898 			tbl->pdestructor(n);
899 
900 		call_rcu(&n->rcu, pneigh_destroy);
901 	}
902 }
903 
904 static inline void neigh_parms_put(struct neigh_parms *parms)
905 {
906 	if (refcount_dec_and_test(&parms->refcnt))
907 		kfree(parms);
908 }
909 
910 /*
911  *	neighbour must already be out of the table;
912  *
913  */
914 void neigh_destroy(struct neighbour *neigh)
915 {
916 	struct net_device *dev = neigh->dev;
917 
918 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
919 
920 	if (!neigh->dead) {
921 		pr_warn("Destroying alive neighbour %p\n", neigh);
922 		dump_stack();
923 		return;
924 	}
925 
926 	if (neigh_del_timer(neigh))
927 		pr_warn("Impossible event\n");
928 
929 	write_lock_bh(&neigh->lock);
930 	__skb_queue_purge(&neigh->arp_queue);
931 	write_unlock_bh(&neigh->lock);
932 	neigh->arp_queue_len_bytes = 0;
933 
934 	if (dev->netdev_ops->ndo_neigh_destroy)
935 		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
936 
937 	netdev_put(dev, &neigh->dev_tracker);
938 	neigh_parms_put(neigh->parms);
939 
940 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
941 
942 	atomic_dec(&neigh->tbl->entries);
943 	kfree_rcu(neigh, rcu);
944 }
945 EXPORT_SYMBOL(neigh_destroy);
946 
947 /* Neighbour state is suspicious;
948    disable fast path.
949 
950    Called with write_locked neigh.
951  */
952 static void neigh_suspect(struct neighbour *neigh)
953 {
954 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
955 
956 	WRITE_ONCE(neigh->output, neigh->ops->output);
957 }
958 
959 /* Neighbour state is OK;
960    enable fast path.
961 
962    Called with write_locked neigh.
963  */
964 static void neigh_connect(struct neighbour *neigh)
965 {
966 	neigh_dbg(2, "neigh %p is connected\n", neigh);
967 
968 	WRITE_ONCE(neigh->output, neigh->ops->connected_output);
969 }
970 
971 static void neigh_periodic_work(struct work_struct *work)
972 {
973 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
974 	struct neigh_hash_table *nht;
975 	struct hlist_node *tmp;
976 	struct neighbour *n;
977 	unsigned int i;
978 
979 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
980 
981 	spin_lock_bh(&tbl->lock);
982 	nht = rcu_dereference_protected(tbl->nht,
983 					lockdep_is_held(&tbl->lock));
984 
985 	/*
986 	 *	periodically recompute ReachableTime from random function
987 	 */
988 
989 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
990 		struct neigh_parms *p;
991 
992 		WRITE_ONCE(tbl->last_rand, jiffies);
993 		list_for_each_entry(p, &tbl->parms_list, list)
994 			neigh_set_reach_time(p);
995 	}
996 
997 	if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
998 		goto out;
999 
1000 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
1001 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
1002 			unsigned int state;
1003 
1004 			write_lock(&n->lock);
1005 
1006 			state = n->nud_state;
1007 			if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
1008 			    (n->flags &
1009 			     (NTF_EXT_LEARNED | NTF_EXT_VALIDATED))) {
1010 				write_unlock(&n->lock);
1011 				continue;
1012 			}
1013 
1014 			if (time_before(n->used, n->confirmed) &&
1015 			    time_is_before_eq_jiffies(n->confirmed))
1016 				n->used = n->confirmed;
1017 
1018 			if (refcount_read(&n->refcnt) == 1 &&
1019 			    (state == NUD_FAILED ||
1020 			     !time_in_range_open(jiffies, n->used,
1021 						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
1022 				hlist_del_rcu(&n->hash);
1023 				hlist_del_rcu(&n->dev_list);
1024 				neigh_mark_dead(n);
1025 				write_unlock(&n->lock);
1026 				neigh_cleanup_and_release(n);
1027 				continue;
1028 			}
1029 			write_unlock(&n->lock);
1030 		}
1031 		/*
1032 		 * It's fine to release lock here, even if hash table
1033 		 * grows while we are preempted.
1034 		 */
1035 		spin_unlock_bh(&tbl->lock);
1036 		cond_resched();
1037 		spin_lock_bh(&tbl->lock);
1038 		nht = rcu_dereference_protected(tbl->nht,
1039 						lockdep_is_held(&tbl->lock));
1040 	}
1041 out:
1042 	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
1043 	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
1044 	 * BASE_REACHABLE_TIME.
1045 	 */
1046 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1047 			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
1048 	spin_unlock_bh(&tbl->lock);
1049 }
1050 
1051 static __inline__ int neigh_max_probes(struct neighbour *n)
1052 {
1053 	struct neigh_parms *p = n->parms;
1054 	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1055 	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1056 	        NEIGH_VAR(p, MCAST_PROBES));
1057 }
1058 
1059 static void neigh_invalidate(struct neighbour *neigh)
1060 	__releases(neigh->lock)
1061 	__acquires(neigh->lock)
1062 {
1063 	struct sk_buff *skb;
1064 
1065 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1066 	neigh_dbg(2, "neigh %p is failed\n", neigh);
1067 	neigh->updated = jiffies;
1068 
1069 	/* It is very thin place. report_unreachable is very complicated
1070 	   routine. Particularly, it can hit the same neighbour entry!
1071 
1072 	   So that, we try to be accurate and avoid dead loop. --ANK
1073 	 */
1074 	while (neigh->nud_state == NUD_FAILED &&
1075 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1076 		write_unlock(&neigh->lock);
1077 		neigh->ops->error_report(neigh, skb);
1078 		write_lock(&neigh->lock);
1079 	}
1080 	__skb_queue_purge(&neigh->arp_queue);
1081 	neigh->arp_queue_len_bytes = 0;
1082 }
1083 
1084 static void neigh_probe(struct neighbour *neigh)
1085 	__releases(neigh->lock)
1086 {
1087 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1088 	/* keep skb alive even if arp_queue overflows */
1089 	if (skb)
1090 		skb = skb_clone(skb, GFP_ATOMIC);
1091 	write_unlock(&neigh->lock);
1092 	if (neigh->ops->solicit)
1093 		neigh->ops->solicit(neigh, skb);
1094 	atomic_inc(&neigh->probes);
1095 	consume_skb(skb);
1096 }
1097 
1098 /* Called when a timer expires for a neighbour entry. */
1099 
1100 static void neigh_timer_handler(struct timer_list *t)
1101 {
1102 	unsigned long now, next;
1103 	struct neighbour *neigh = timer_container_of(neigh, t, timer);
1104 	bool skip_probe = false;
1105 	unsigned int state;
1106 	int notify = 0;
1107 
1108 	write_lock(&neigh->lock);
1109 
1110 	state = neigh->nud_state;
1111 	now = jiffies;
1112 	next = now + HZ;
1113 
1114 	if (!(state & NUD_IN_TIMER))
1115 		goto out;
1116 
1117 	if (state & NUD_REACHABLE) {
1118 		if (time_before_eq(now,
1119 				   neigh->confirmed + neigh->parms->reachable_time)) {
1120 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
1121 			next = neigh->confirmed + neigh->parms->reachable_time;
1122 		} else if (time_before_eq(now,
1123 					  neigh->used +
1124 					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1125 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
1126 			WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1127 			neigh->updated = jiffies;
1128 			neigh_suspect(neigh);
1129 			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1130 		} else {
1131 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
1132 			WRITE_ONCE(neigh->nud_state, NUD_STALE);
1133 			neigh->updated = jiffies;
1134 			neigh_suspect(neigh);
1135 			notify = 1;
1136 		}
1137 	} else if (state & NUD_DELAY) {
1138 		if (time_before_eq(now,
1139 				   neigh->confirmed +
1140 				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1141 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1142 			WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
1143 			neigh->updated = jiffies;
1144 			neigh_connect(neigh);
1145 			notify = 1;
1146 			next = neigh->confirmed + neigh->parms->reachable_time;
1147 		} else {
1148 			neigh_dbg(2, "neigh %p is probed\n", neigh);
1149 			WRITE_ONCE(neigh->nud_state, NUD_PROBE);
1150 			neigh->updated = jiffies;
1151 			atomic_set(&neigh->probes, 0);
1152 			notify = 1;
1153 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1154 					 HZ/100);
1155 		}
1156 	} else {
1157 		/* NUD_PROBE|NUD_INCOMPLETE */
1158 		next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1159 	}
1160 
1161 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1162 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1163 		if (neigh->nud_state == NUD_PROBE &&
1164 		    neigh->flags & NTF_EXT_VALIDATED) {
1165 			WRITE_ONCE(neigh->nud_state, NUD_STALE);
1166 			neigh->updated = jiffies;
1167 		} else {
1168 			WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1169 			neigh_invalidate(neigh);
1170 		}
1171 		notify = 1;
1172 		skip_probe = true;
1173 	}
1174 
1175 	if (notify)
1176 		__neigh_notify(neigh, RTM_NEWNEIGH, 0, 0);
1177 
1178 	if (skip_probe)
1179 		goto out;
1180 
1181 	if (neigh->nud_state & NUD_IN_TIMER) {
1182 		if (time_before(next, jiffies + HZ/100))
1183 			next = jiffies + HZ/100;
1184 		if (!mod_timer(&neigh->timer, next))
1185 			neigh_hold(neigh);
1186 	}
1187 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1188 		neigh_probe(neigh);
1189 	} else {
1190 out:
1191 		write_unlock(&neigh->lock);
1192 	}
1193 
1194 	if (notify)
1195 		call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
1196 
1197 	trace_neigh_timer_handler(neigh, 0);
1198 
1199 	neigh_release(neigh);
1200 }
1201 
1202 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1203 		       const bool immediate_ok)
1204 {
1205 	int rc;
1206 	bool immediate_probe = false;
1207 
1208 	write_lock_bh(&neigh->lock);
1209 
1210 	rc = 0;
1211 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1212 		goto out_unlock_bh;
1213 	if (neigh->dead)
1214 		goto out_dead;
1215 
1216 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1217 		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1218 		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1219 			unsigned long next, now = jiffies;
1220 
1221 			atomic_set(&neigh->probes,
1222 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1223 			neigh_del_timer(neigh);
1224 			WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1225 			neigh->updated = now;
1226 			if (!immediate_ok) {
1227 				next = now + 1;
1228 			} else {
1229 				immediate_probe = true;
1230 				next = now + max(NEIGH_VAR(neigh->parms,
1231 							   RETRANS_TIME),
1232 						 HZ / 100);
1233 			}
1234 			neigh_add_timer(neigh, next);
1235 		} else {
1236 			WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1237 			neigh->updated = jiffies;
1238 			write_unlock_bh(&neigh->lock);
1239 
1240 			kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1241 			return 1;
1242 		}
1243 	} else if (neigh->nud_state & NUD_STALE) {
1244 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
1245 		neigh_del_timer(neigh);
1246 		WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1247 		neigh->updated = jiffies;
1248 		neigh_add_timer(neigh, jiffies +
1249 				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1250 	}
1251 
1252 	if (neigh->nud_state == NUD_INCOMPLETE) {
1253 		if (skb) {
1254 			while (neigh->arp_queue_len_bytes + skb->truesize >
1255 			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1256 				struct sk_buff *buff;
1257 
1258 				buff = __skb_dequeue(&neigh->arp_queue);
1259 				if (!buff)
1260 					break;
1261 				neigh->arp_queue_len_bytes -= buff->truesize;
1262 				kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1263 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1264 			}
1265 			skb_dst_force(skb);
1266 			__skb_queue_tail(&neigh->arp_queue, skb);
1267 			neigh->arp_queue_len_bytes += skb->truesize;
1268 		}
1269 		rc = 1;
1270 	}
1271 out_unlock_bh:
1272 	if (immediate_probe)
1273 		neigh_probe(neigh);
1274 	else
1275 		write_unlock(&neigh->lock);
1276 	local_bh_enable();
1277 	trace_neigh_event_send_done(neigh, rc);
1278 	return rc;
1279 
1280 out_dead:
1281 	if (neigh->nud_state & NUD_STALE)
1282 		goto out_unlock_bh;
1283 	write_unlock_bh(&neigh->lock);
1284 	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1285 	trace_neigh_event_send_dead(neigh, 1);
1286 	return 1;
1287 }
1288 EXPORT_SYMBOL(__neigh_event_send);
1289 
1290 static void neigh_update_hhs(struct neighbour *neigh)
1291 {
1292 	struct hh_cache *hh;
1293 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1294 		= NULL;
1295 
1296 	if (neigh->dev->header_ops)
1297 		update = neigh->dev->header_ops->cache_update;
1298 
1299 	if (update) {
1300 		hh = &neigh->hh;
1301 		if (READ_ONCE(hh->hh_len)) {
1302 			write_seqlock_bh(&hh->hh_lock);
1303 			update(hh, neigh->dev, neigh->ha);
1304 			write_sequnlock_bh(&hh->hh_lock);
1305 		}
1306 	}
1307 }
1308 
1309 static void neigh_update_process_arp_queue(struct neighbour *neigh)
1310 	__releases(neigh->lock)
1311 	__acquires(neigh->lock)
1312 {
1313 	struct sk_buff *skb;
1314 
1315 	/* Again: avoid deadlock if something went wrong. */
1316 	while (neigh->nud_state & NUD_VALID &&
1317 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1318 		struct dst_entry *dst = skb_dst(skb);
1319 		struct neighbour *n2, *n1 = neigh;
1320 
1321 		write_unlock_bh(&neigh->lock);
1322 
1323 		rcu_read_lock();
1324 
1325 		/* Why not just use 'neigh' as-is?  The problem is that
1326 		 * things such as shaper, eql, and sch_teql can end up
1327 		 * using alternative, different, neigh objects to output
1328 		 * the packet in the output path.  So what we need to do
1329 		 * here is re-lookup the top-level neigh in the path so
1330 		 * we can reinject the packet there.
1331 		 */
1332 		n2 = NULL;
1333 		if (dst &&
1334 		    READ_ONCE(dst->obsolete) != DST_OBSOLETE_DEAD) {
1335 			n2 = dst_neigh_lookup_skb(dst, skb);
1336 			if (n2)
1337 				n1 = n2;
1338 		}
1339 		READ_ONCE(n1->output)(n1, skb);
1340 		if (n2)
1341 			neigh_release(n2);
1342 		rcu_read_unlock();
1343 
1344 		write_lock_bh(&neigh->lock);
1345 	}
1346 	__skb_queue_purge(&neigh->arp_queue);
1347 	neigh->arp_queue_len_bytes = 0;
1348 }
1349 
1350 /* Generic update routine.
1351    -- lladdr is new lladdr or NULL, if it is not supplied.
1352    -- new    is new state.
1353    -- flags
1354 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1355 				if it is different.
1356 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1357 				lladdr instead of overriding it
1358 				if it is different.
1359 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1360 	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
1361 	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
1362 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1363 				NTF_ROUTER flag.
1364 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1365 				a router.
1366 	NEIGH_UPDATE_F_EXT_VALIDATED means that the entry will not be removed
1367 				or invalidated.
1368 
1369    Caller MUST hold reference count on the entry.
1370  */
1371 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1372 			  u8 new, u32 flags, u32 nlmsg_pid,
1373 			  struct netlink_ext_ack *extack)
1374 {
1375 	bool gc_update = false, managed_update = false;
1376 	bool process_arp_queue = false;
1377 	int update_isrouter = 0;
1378 	struct net_device *dev;
1379 	int err, notify = 0;
1380 	u8 old;
1381 
1382 	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1383 
1384 	write_lock_bh(&neigh->lock);
1385 
1386 	dev    = neigh->dev;
1387 	old    = neigh->nud_state;
1388 	err    = -EPERM;
1389 
1390 	if (neigh->dead) {
1391 		NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1392 		new = old;
1393 		goto out;
1394 	}
1395 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1396 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1397 		goto out;
1398 
1399 	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
1400 	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1401 		new = old & ~NUD_PERMANENT;
1402 		WRITE_ONCE(neigh->nud_state, new);
1403 		err = 0;
1404 		goto out;
1405 	}
1406 
1407 	if (!(new & NUD_VALID)) {
1408 		neigh_del_timer(neigh);
1409 		if (old & NUD_CONNECTED)
1410 			neigh_suspect(neigh);
1411 		WRITE_ONCE(neigh->nud_state, new);
1412 		err = 0;
1413 		notify = old & NUD_VALID;
1414 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1415 		    (new & NUD_FAILED)) {
1416 			neigh_invalidate(neigh);
1417 			notify = 1;
1418 		}
1419 		goto out;
1420 	}
1421 
1422 	/* Compare new lladdr with cached one */
1423 	if (!dev->addr_len) {
1424 		/* First case: device needs no address. */
1425 		lladdr = neigh->ha;
1426 	} else if (lladdr) {
1427 		/* The second case: if something is already cached
1428 		   and a new address is proposed:
1429 		   - compare new & old
1430 		   - if they are different, check override flag
1431 		 */
1432 		if ((old & NUD_VALID) &&
1433 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1434 			lladdr = neigh->ha;
1435 	} else {
1436 		/* No address is supplied; if we know something,
1437 		   use it, otherwise discard the request.
1438 		 */
1439 		err = -EINVAL;
1440 		if (!(old & NUD_VALID)) {
1441 			NL_SET_ERR_MSG(extack, "No link layer address given");
1442 			goto out;
1443 		}
1444 		lladdr = neigh->ha;
1445 	}
1446 
1447 	/* Update confirmed timestamp for neighbour entry after we
1448 	 * received ARP packet even if it doesn't change IP to MAC binding.
1449 	 */
1450 	if (new & NUD_CONNECTED)
1451 		neigh->confirmed = jiffies;
1452 
1453 	/* If entry was valid and address is not changed,
1454 	   do not change entry state, if new one is STALE.
1455 	 */
1456 	err = 0;
1457 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1458 	if (old & NUD_VALID) {
1459 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1460 			update_isrouter = 0;
1461 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1462 			    (old & NUD_CONNECTED)) {
1463 				lladdr = neigh->ha;
1464 				new = NUD_STALE;
1465 			} else
1466 				goto out;
1467 		} else {
1468 			if (lladdr == neigh->ha && new == NUD_STALE &&
1469 			    !(flags & NEIGH_UPDATE_F_ADMIN))
1470 				new = old;
1471 		}
1472 	}
1473 
1474 	/* Update timestamp only once we know we will make a change to the
1475 	 * neighbour entry. Otherwise we risk to move the locktime window with
1476 	 * noop updates and ignore relevant ARP updates.
1477 	 */
1478 	if (new != old || lladdr != neigh->ha)
1479 		neigh->updated = jiffies;
1480 
1481 	if (new != old) {
1482 		neigh_del_timer(neigh);
1483 		if (new & NUD_PROBE)
1484 			atomic_set(&neigh->probes, 0);
1485 		if (new & NUD_IN_TIMER)
1486 			neigh_add_timer(neigh, (jiffies +
1487 						((new & NUD_REACHABLE) ?
1488 						 neigh->parms->reachable_time :
1489 						 0)));
1490 		WRITE_ONCE(neigh->nud_state, new);
1491 		notify = 1;
1492 	}
1493 
1494 	if (lladdr != neigh->ha) {
1495 		write_seqlock(&neigh->ha_lock);
1496 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1497 		write_sequnlock(&neigh->ha_lock);
1498 		neigh_update_hhs(neigh);
1499 		if (!(new & NUD_CONNECTED))
1500 			neigh->confirmed = jiffies -
1501 				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1502 		notify = 1;
1503 	}
1504 	if (new == old)
1505 		goto out;
1506 	if (new & NUD_CONNECTED)
1507 		neigh_connect(neigh);
1508 	else
1509 		neigh_suspect(neigh);
1510 
1511 	if (!(old & NUD_VALID))
1512 		process_arp_queue = true;
1513 
1514 out:
1515 	if (update_isrouter)
1516 		neigh_update_is_router(neigh, flags, &notify);
1517 
1518 	if (notify)
1519 		__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
1520 
1521 	if (process_arp_queue)
1522 		neigh_update_process_arp_queue(neigh);
1523 
1524 	write_unlock_bh(&neigh->lock);
1525 
1526 	if (((new ^ old) & NUD_PERMANENT) || gc_update)
1527 		neigh_update_gc_list(neigh);
1528 	if (managed_update)
1529 		neigh_update_managed_list(neigh);
1530 
1531 	if (notify)
1532 		call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
1533 
1534 	trace_neigh_update_done(neigh, err);
1535 	return err;
1536 }
1537 
1538 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1539 		 u32 flags, u32 nlmsg_pid)
1540 {
1541 	return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1542 }
1543 EXPORT_SYMBOL(neigh_update);
1544 
1545 /* Update the neigh to listen temporarily for probe responses, even if it is
1546  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1547  */
1548 void __neigh_set_probe_once(struct neighbour *neigh)
1549 {
1550 	if (neigh->dead)
1551 		return;
1552 	neigh->updated = jiffies;
1553 	if (!(neigh->nud_state & NUD_FAILED))
1554 		return;
1555 	WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1556 	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1557 	neigh_add_timer(neigh,
1558 			jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1559 				      HZ/100));
1560 }
1561 EXPORT_SYMBOL(__neigh_set_probe_once);
1562 
1563 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1564 				 u8 *lladdr, void *saddr,
1565 				 struct net_device *dev)
1566 {
1567 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1568 						 lladdr || !dev->addr_len);
1569 	if (neigh)
1570 		neigh_update(neigh, lladdr, NUD_STALE,
1571 			     NEIGH_UPDATE_F_OVERRIDE, 0);
1572 	return neigh;
1573 }
1574 EXPORT_SYMBOL(neigh_event_ns);
1575 
1576 /* called with read_lock_bh(&n->lock); */
1577 static void neigh_hh_init(struct neighbour *n)
1578 {
1579 	struct net_device *dev = n->dev;
1580 	__be16 prot = n->tbl->protocol;
1581 	struct hh_cache	*hh = &n->hh;
1582 
1583 	write_lock_bh(&n->lock);
1584 
1585 	/* Only one thread can come in here and initialize the
1586 	 * hh_cache entry.
1587 	 */
1588 	if (!hh->hh_len)
1589 		dev->header_ops->cache(n, hh, prot);
1590 
1591 	write_unlock_bh(&n->lock);
1592 }
1593 
1594 /* Slow and careful. */
1595 
1596 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1597 {
1598 	int rc = 0;
1599 
1600 	if (!neigh_event_send(neigh, skb)) {
1601 		int err;
1602 		struct net_device *dev = neigh->dev;
1603 		unsigned int seq;
1604 
1605 		if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1606 			neigh_hh_init(neigh);
1607 
1608 		do {
1609 			__skb_pull(skb, skb_network_offset(skb));
1610 			seq = read_seqbegin(&neigh->ha_lock);
1611 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1612 					      neigh->ha, NULL, skb->len);
1613 		} while (read_seqretry(&neigh->ha_lock, seq));
1614 
1615 		if (err >= 0)
1616 			rc = dev_queue_xmit(skb);
1617 		else
1618 			goto out_kfree_skb;
1619 	}
1620 out:
1621 	return rc;
1622 out_kfree_skb:
1623 	rc = -EINVAL;
1624 	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1625 	goto out;
1626 }
1627 EXPORT_SYMBOL(neigh_resolve_output);
1628 
1629 /* As fast as possible without hh cache */
1630 
1631 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1632 {
1633 	struct net_device *dev = neigh->dev;
1634 	unsigned int seq;
1635 	int err;
1636 
1637 	do {
1638 		__skb_pull(skb, skb_network_offset(skb));
1639 		seq = read_seqbegin(&neigh->ha_lock);
1640 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1641 				      neigh->ha, NULL, skb->len);
1642 	} while (read_seqretry(&neigh->ha_lock, seq));
1643 
1644 	if (err >= 0)
1645 		err = dev_queue_xmit(skb);
1646 	else {
1647 		err = -EINVAL;
1648 		kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1649 	}
1650 	return err;
1651 }
1652 
1653 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1654 {
1655 	return dev_queue_xmit(skb);
1656 }
1657 
1658 static void neigh_managed_work(struct work_struct *work)
1659 {
1660 	struct neigh_table *tbl = container_of(work, struct neigh_table,
1661 					       managed_work.work);
1662 	struct neighbour *neigh;
1663 
1664 	spin_lock_bh(&tbl->lock);
1665 	list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1666 		neigh_event_send_probe(neigh, NULL, false);
1667 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1668 			   NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1669 	spin_unlock_bh(&tbl->lock);
1670 }
1671 
1672 static void neigh_proxy_process(struct timer_list *t)
1673 {
1674 	struct neigh_table *tbl = timer_container_of(tbl, t, proxy_timer);
1675 	long sched_next = 0;
1676 	unsigned long now = jiffies;
1677 	struct sk_buff *skb, *n;
1678 
1679 	spin_lock(&tbl->proxy_queue.lock);
1680 
1681 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1682 		long tdif = NEIGH_CB(skb)->sched_next - now;
1683 
1684 		if (tdif <= 0) {
1685 			struct net_device *dev = skb->dev;
1686 
1687 			neigh_parms_qlen_dec(dev, tbl->family);
1688 			__skb_unlink(skb, &tbl->proxy_queue);
1689 
1690 			if (tbl->proxy_redo && netif_running(dev)) {
1691 				rcu_read_lock();
1692 				tbl->proxy_redo(skb);
1693 				rcu_read_unlock();
1694 			} else {
1695 				kfree_skb(skb);
1696 			}
1697 
1698 			dev_put(dev);
1699 		} else if (!sched_next || tdif < sched_next)
1700 			sched_next = tdif;
1701 	}
1702 	timer_delete(&tbl->proxy_timer);
1703 	if (sched_next)
1704 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1705 	spin_unlock(&tbl->proxy_queue.lock);
1706 }
1707 
1708 static unsigned long neigh_proxy_delay(struct neigh_parms *p)
1709 {
1710 	/* If proxy_delay is zero, do not call get_random_u32_below()
1711 	 * as it is undefined behavior.
1712 	 */
1713 	unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
1714 
1715 	return proxy_delay ?
1716 	       jiffies + get_random_u32_below(proxy_delay) : jiffies;
1717 }
1718 
1719 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1720 		    struct sk_buff *skb)
1721 {
1722 	unsigned long sched_next = neigh_proxy_delay(p);
1723 
1724 	if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1725 		kfree_skb(skb);
1726 		return;
1727 	}
1728 
1729 	NEIGH_CB(skb)->sched_next = sched_next;
1730 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1731 
1732 	spin_lock(&tbl->proxy_queue.lock);
1733 	if (timer_delete(&tbl->proxy_timer)) {
1734 		if (time_before(tbl->proxy_timer.expires, sched_next))
1735 			sched_next = tbl->proxy_timer.expires;
1736 	}
1737 	skb_dst_drop(skb);
1738 	dev_hold(skb->dev);
1739 	__skb_queue_tail(&tbl->proxy_queue, skb);
1740 	p->qlen++;
1741 	mod_timer(&tbl->proxy_timer, sched_next);
1742 	spin_unlock(&tbl->proxy_queue.lock);
1743 }
1744 EXPORT_SYMBOL(pneigh_enqueue);
1745 
1746 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1747 						      struct net *net, int ifindex)
1748 {
1749 	struct neigh_parms *p;
1750 
1751 	list_for_each_entry(p, &tbl->parms_list, list) {
1752 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1753 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1754 			return p;
1755 	}
1756 
1757 	return NULL;
1758 }
1759 
1760 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1761 				      struct neigh_table *tbl)
1762 {
1763 	struct neigh_parms *p;
1764 	struct net *net = dev_net(dev);
1765 	const struct net_device_ops *ops = dev->netdev_ops;
1766 
1767 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1768 	if (p) {
1769 		p->tbl		  = tbl;
1770 		refcount_set(&p->refcnt, 1);
1771 		neigh_set_reach_time(p);
1772 		p->qlen = 0;
1773 		netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1774 		p->dev = dev;
1775 		write_pnet(&p->net, net);
1776 		p->sysctl_table = NULL;
1777 
1778 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1779 			netdev_put(dev, &p->dev_tracker);
1780 			kfree(p);
1781 			return NULL;
1782 		}
1783 
1784 		spin_lock_bh(&tbl->lock);
1785 		list_add_rcu(&p->list, &tbl->parms.list);
1786 		spin_unlock_bh(&tbl->lock);
1787 
1788 		neigh_parms_data_state_cleanall(p);
1789 	}
1790 	return p;
1791 }
1792 EXPORT_SYMBOL(neigh_parms_alloc);
1793 
1794 static void neigh_rcu_free_parms(struct rcu_head *head)
1795 {
1796 	struct neigh_parms *parms =
1797 		container_of(head, struct neigh_parms, rcu_head);
1798 
1799 	neigh_parms_put(parms);
1800 }
1801 
1802 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1803 {
1804 	if (!parms || parms == &tbl->parms)
1805 		return;
1806 
1807 	spin_lock_bh(&tbl->lock);
1808 	list_del_rcu(&parms->list);
1809 	parms->dead = 1;
1810 	spin_unlock_bh(&tbl->lock);
1811 
1812 	netdev_put(parms->dev, &parms->dev_tracker);
1813 	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1814 }
1815 EXPORT_SYMBOL(neigh_parms_release);
1816 
1817 static struct lock_class_key neigh_table_proxy_queue_class;
1818 
1819 static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1820 
1821 void neigh_table_init(int index, struct neigh_table *tbl)
1822 {
1823 	unsigned long now = jiffies;
1824 	unsigned long phsize;
1825 
1826 	INIT_LIST_HEAD(&tbl->parms_list);
1827 	INIT_LIST_HEAD(&tbl->gc_list);
1828 	INIT_LIST_HEAD(&tbl->managed_list);
1829 
1830 	list_add(&tbl->parms.list, &tbl->parms_list);
1831 	write_pnet(&tbl->parms.net, &init_net);
1832 	refcount_set(&tbl->parms.refcnt, 1);
1833 	neigh_set_reach_time(&tbl->parms);
1834 	tbl->parms.qlen = 0;
1835 
1836 	tbl->stats = alloc_percpu(struct neigh_statistics);
1837 	if (!tbl->stats)
1838 		panic("cannot create neighbour cache statistics");
1839 
1840 #ifdef CONFIG_PROC_FS
1841 	if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1842 			      &neigh_stat_seq_ops, tbl))
1843 		panic("cannot create neighbour proc dir entry");
1844 #endif
1845 
1846 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1847 
1848 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1849 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1850 
1851 	if (!tbl->nht || !tbl->phash_buckets)
1852 		panic("cannot allocate neighbour cache hashes");
1853 
1854 	if (!tbl->entry_size)
1855 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1856 					tbl->key_len, NEIGH_PRIV_ALIGN);
1857 	else
1858 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1859 
1860 	spin_lock_init(&tbl->lock);
1861 	mutex_init(&tbl->phash_lock);
1862 
1863 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1864 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1865 			tbl->parms.reachable_time);
1866 	INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1867 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1868 
1869 	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1870 	skb_queue_head_init_class(&tbl->proxy_queue,
1871 			&neigh_table_proxy_queue_class);
1872 
1873 	tbl->last_flush = now;
1874 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1875 
1876 	rcu_assign_pointer(neigh_tables[index], tbl);
1877 }
1878 
1879 /*
1880  * Only called from ndisc_cleanup(), which means this is dead code
1881  * because we no longer can unload IPv6 module.
1882  */
1883 int neigh_table_clear(int index, struct neigh_table *tbl)
1884 {
1885 	RCU_INIT_POINTER(neigh_tables[index], NULL);
1886 	synchronize_rcu();
1887 
1888 	/* It is not clean... Fix it to unload IPv6 module safely */
1889 	cancel_delayed_work_sync(&tbl->managed_work);
1890 	cancel_delayed_work_sync(&tbl->gc_work);
1891 	timer_delete_sync(&tbl->proxy_timer);
1892 	pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
1893 	neigh_ifdown(tbl, NULL);
1894 	if (atomic_read(&tbl->entries))
1895 		pr_crit("neighbour leakage\n");
1896 
1897 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1898 		 neigh_hash_free_rcu);
1899 	tbl->nht = NULL;
1900 
1901 	kfree(tbl->phash_buckets);
1902 	tbl->phash_buckets = NULL;
1903 
1904 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1905 
1906 	free_percpu(tbl->stats);
1907 	tbl->stats = NULL;
1908 
1909 	return 0;
1910 }
1911 
1912 static struct neigh_table *neigh_find_table(int family)
1913 {
1914 	struct neigh_table *tbl = NULL;
1915 
1916 	switch (family) {
1917 	case AF_INET:
1918 		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
1919 		break;
1920 	case AF_INET6:
1921 		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
1922 		break;
1923 	}
1924 
1925 	return tbl;
1926 }
1927 
1928 const struct nla_policy nda_policy[NDA_MAX+1] = {
1929 	[NDA_UNSPEC]		= { .strict_start_type = NDA_NH_ID },
1930 	[NDA_DST]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1931 	[NDA_LLADDR]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1932 	[NDA_CACHEINFO]		= { .len = sizeof(struct nda_cacheinfo) },
1933 	[NDA_PROBES]		= { .type = NLA_U32 },
1934 	[NDA_VLAN]		= { .type = NLA_U16 },
1935 	[NDA_PORT]		= { .type = NLA_U16 },
1936 	[NDA_VNI]		= { .type = NLA_U32 },
1937 	[NDA_IFINDEX]		= { .type = NLA_U32 },
1938 	[NDA_MASTER]		= { .type = NLA_U32 },
1939 	[NDA_PROTOCOL]		= { .type = NLA_U8 },
1940 	[NDA_NH_ID]		= { .type = NLA_U32 },
1941 	[NDA_FLAGS_EXT]		= NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1942 	[NDA_FDB_EXT_ATTRS]	= { .type = NLA_NESTED },
1943 };
1944 
1945 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1946 			struct netlink_ext_ack *extack)
1947 {
1948 	struct net *net = sock_net(skb->sk);
1949 	struct ndmsg *ndm;
1950 	struct nlattr *dst_attr;
1951 	struct neigh_table *tbl;
1952 	struct neighbour *neigh;
1953 	struct net_device *dev = NULL;
1954 	int err = -EINVAL;
1955 
1956 	ASSERT_RTNL();
1957 	if (nlmsg_len(nlh) < sizeof(*ndm))
1958 		goto out;
1959 
1960 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1961 	if (!dst_attr) {
1962 		NL_SET_ERR_MSG(extack, "Network address not specified");
1963 		goto out;
1964 	}
1965 
1966 	ndm = nlmsg_data(nlh);
1967 	if (ndm->ndm_ifindex) {
1968 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1969 		if (dev == NULL) {
1970 			err = -ENODEV;
1971 			goto out;
1972 		}
1973 	}
1974 
1975 	tbl = neigh_find_table(ndm->ndm_family);
1976 	if (tbl == NULL)
1977 		return -EAFNOSUPPORT;
1978 
1979 	if (nla_len(dst_attr) < (int)tbl->key_len) {
1980 		NL_SET_ERR_MSG(extack, "Invalid network address");
1981 		goto out;
1982 	}
1983 
1984 	if (ndm->ndm_flags & NTF_PROXY) {
1985 		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1986 		goto out;
1987 	}
1988 
1989 	if (dev == NULL)
1990 		goto out;
1991 
1992 	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1993 	if (neigh == NULL) {
1994 		err = -ENOENT;
1995 		goto out;
1996 	}
1997 
1998 	err = __neigh_update(neigh, NULL, NUD_FAILED,
1999 			     NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
2000 			     NETLINK_CB(skb).portid, extack);
2001 	spin_lock_bh(&tbl->lock);
2002 	neigh_release(neigh);
2003 	neigh_remove_one(neigh);
2004 	spin_unlock_bh(&tbl->lock);
2005 
2006 out:
2007 	return err;
2008 }
2009 
2010 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
2011 		     struct netlink_ext_ack *extack)
2012 {
2013 	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
2014 		    NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
2015 	struct net *net = sock_net(skb->sk);
2016 	struct ndmsg *ndm;
2017 	struct nlattr *tb[NDA_MAX+1];
2018 	struct neigh_table *tbl;
2019 	struct net_device *dev = NULL;
2020 	struct neighbour *neigh;
2021 	void *dst, *lladdr;
2022 	u8 protocol = 0;
2023 	u32 ndm_flags;
2024 	int err;
2025 
2026 	ASSERT_RTNL();
2027 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
2028 				     nda_policy, extack);
2029 	if (err < 0)
2030 		goto out;
2031 
2032 	err = -EINVAL;
2033 	if (!tb[NDA_DST]) {
2034 		NL_SET_ERR_MSG(extack, "Network address not specified");
2035 		goto out;
2036 	}
2037 
2038 	ndm = nlmsg_data(nlh);
2039 	ndm_flags = ndm->ndm_flags;
2040 	if (tb[NDA_FLAGS_EXT]) {
2041 		u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
2042 
2043 		BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
2044 			     (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
2045 			      hweight32(NTF_EXT_MASK)));
2046 		ndm_flags |= (ext << NTF_EXT_SHIFT);
2047 	}
2048 	if (ndm->ndm_ifindex) {
2049 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
2050 		if (dev == NULL) {
2051 			err = -ENODEV;
2052 			goto out;
2053 		}
2054 
2055 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
2056 			NL_SET_ERR_MSG(extack, "Invalid link address");
2057 			goto out;
2058 		}
2059 	}
2060 
2061 	tbl = neigh_find_table(ndm->ndm_family);
2062 	if (tbl == NULL)
2063 		return -EAFNOSUPPORT;
2064 
2065 	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
2066 		NL_SET_ERR_MSG(extack, "Invalid network address");
2067 		goto out;
2068 	}
2069 
2070 	dst = nla_data(tb[NDA_DST]);
2071 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
2072 
2073 	if (tb[NDA_PROTOCOL])
2074 		protocol = nla_get_u8(tb[NDA_PROTOCOL]);
2075 	if (ndm_flags & NTF_PROXY) {
2076 		if (ndm_flags & (NTF_MANAGED | NTF_EXT_VALIDATED)) {
2077 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
2078 			goto out;
2079 		}
2080 
2081 		err = pneigh_create(tbl, net, dst, dev, ndm_flags, protocol,
2082 				    !!(ndm->ndm_state & NUD_PERMANENT));
2083 		goto out;
2084 	}
2085 
2086 	if (!dev) {
2087 		NL_SET_ERR_MSG(extack, "Device not specified");
2088 		goto out;
2089 	}
2090 
2091 	if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2092 		err = -EINVAL;
2093 		goto out;
2094 	}
2095 
2096 	neigh = neigh_lookup(tbl, dst, dev);
2097 	if (neigh == NULL) {
2098 		bool ndm_permanent  = ndm->ndm_state & NUD_PERMANENT;
2099 		bool exempt_from_gc = ndm_permanent ||
2100 				      ndm_flags & (NTF_EXT_LEARNED |
2101 						   NTF_EXT_VALIDATED);
2102 
2103 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2104 			err = -ENOENT;
2105 			goto out;
2106 		}
2107 		if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2108 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2109 			err = -EINVAL;
2110 			goto out;
2111 		}
2112 		if (ndm_flags & NTF_EXT_VALIDATED) {
2113 			u8 state = ndm->ndm_state;
2114 
2115 			/* NTF_USE and NTF_MANAGED will result in the neighbor
2116 			 * being created with an invalid state (NUD_NONE).
2117 			 */
2118 			if (ndm_flags & (NTF_USE | NTF_MANAGED))
2119 				state = NUD_NONE;
2120 
2121 			if (!(state & NUD_VALID)) {
2122 				NL_SET_ERR_MSG(extack,
2123 					       "Cannot create externally validated neighbor with an invalid state");
2124 				err = -EINVAL;
2125 				goto out;
2126 			}
2127 		}
2128 
2129 		neigh = ___neigh_create(tbl, dst, dev,
2130 					ndm_flags &
2131 					(NTF_EXT_LEARNED | NTF_MANAGED |
2132 					 NTF_EXT_VALIDATED),
2133 					exempt_from_gc, true);
2134 		if (IS_ERR(neigh)) {
2135 			err = PTR_ERR(neigh);
2136 			goto out;
2137 		}
2138 	} else {
2139 		if (nlh->nlmsg_flags & NLM_F_EXCL) {
2140 			err = -EEXIST;
2141 			neigh_release(neigh);
2142 			goto out;
2143 		}
2144 		if (ndm_flags & NTF_EXT_VALIDATED) {
2145 			u8 state = ndm->ndm_state;
2146 
2147 			/* NTF_USE and NTF_MANAGED do not update the existing
2148 			 * state other than clearing it if it was
2149 			 * NUD_PERMANENT.
2150 			 */
2151 			if (ndm_flags & (NTF_USE | NTF_MANAGED))
2152 				state = READ_ONCE(neigh->nud_state) & ~NUD_PERMANENT;
2153 
2154 			if (!(state & NUD_VALID)) {
2155 				NL_SET_ERR_MSG(extack,
2156 					       "Cannot mark neighbor as externally validated with an invalid state");
2157 				err = -EINVAL;
2158 				neigh_release(neigh);
2159 				goto out;
2160 			}
2161 		}
2162 
2163 		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2164 			flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2165 				   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2166 	}
2167 
2168 	if (protocol)
2169 		neigh->protocol = protocol;
2170 	if (ndm_flags & NTF_EXT_LEARNED)
2171 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2172 	if (ndm_flags & NTF_ROUTER)
2173 		flags |= NEIGH_UPDATE_F_ISROUTER;
2174 	if (ndm_flags & NTF_MANAGED)
2175 		flags |= NEIGH_UPDATE_F_MANAGED;
2176 	if (ndm_flags & NTF_USE)
2177 		flags |= NEIGH_UPDATE_F_USE;
2178 	if (ndm_flags & NTF_EXT_VALIDATED)
2179 		flags |= NEIGH_UPDATE_F_EXT_VALIDATED;
2180 
2181 	err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2182 			     NETLINK_CB(skb).portid, extack);
2183 	if (!err && ndm_flags & (NTF_USE | NTF_MANAGED))
2184 		neigh_event_send(neigh, NULL);
2185 	neigh_release(neigh);
2186 out:
2187 	return err;
2188 }
2189 
2190 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2191 {
2192 	struct nlattr *nest;
2193 
2194 	nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2195 	if (nest == NULL)
2196 		return -ENOBUFS;
2197 
2198 	if ((parms->dev &&
2199 	     nla_put_u32(skb, NDTPA_IFINDEX, READ_ONCE(parms->dev->ifindex))) ||
2200 	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2201 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2202 			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2203 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
2204 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
2205 			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2206 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2207 	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2208 	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
2209 			NEIGH_VAR(parms, UCAST_PROBES)) ||
2210 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
2211 			NEIGH_VAR(parms, MCAST_PROBES)) ||
2212 	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2213 			NEIGH_VAR(parms, MCAST_REPROBES)) ||
2214 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, READ_ONCE(parms->reachable_time),
2215 			  NDTPA_PAD) ||
2216 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2217 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2218 	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
2219 			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2220 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2221 			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2222 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2223 			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2224 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2225 			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2226 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2227 			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2228 	    nla_put_msecs(skb, NDTPA_LOCKTIME,
2229 			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2230 	    nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2231 			  NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2232 		goto nla_put_failure;
2233 	return nla_nest_end(skb, nest);
2234 
2235 nla_put_failure:
2236 	nla_nest_cancel(skb, nest);
2237 	return -EMSGSIZE;
2238 }
2239 
2240 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2241 			      u32 pid, u32 seq, int type, int flags)
2242 {
2243 	struct nlmsghdr *nlh;
2244 	struct ndtmsg *ndtmsg;
2245 
2246 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2247 	if (nlh == NULL)
2248 		return -EMSGSIZE;
2249 
2250 	ndtmsg = nlmsg_data(nlh);
2251 	ndtmsg->ndtm_family = tbl->family;
2252 	ndtmsg->ndtm_pad1   = 0;
2253 	ndtmsg->ndtm_pad2   = 0;
2254 
2255 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2256 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
2257 			  NDTA_PAD) ||
2258 	    nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
2259 	    nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
2260 	    nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
2261 		goto nla_put_failure;
2262 	{
2263 		unsigned long now = jiffies;
2264 		long flush_delta = now - READ_ONCE(tbl->last_flush);
2265 		long rand_delta = now - READ_ONCE(tbl->last_rand);
2266 		struct neigh_hash_table *nht;
2267 		struct ndt_config ndc = {
2268 			.ndtc_key_len		= tbl->key_len,
2269 			.ndtc_entry_size	= tbl->entry_size,
2270 			.ndtc_entries		= atomic_read(&tbl->entries),
2271 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
2272 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
2273 			.ndtc_proxy_qlen	= READ_ONCE(tbl->proxy_queue.qlen),
2274 		};
2275 
2276 		nht = rcu_dereference(tbl->nht);
2277 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2278 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2279 
2280 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2281 			goto nla_put_failure;
2282 	}
2283 
2284 	{
2285 		int cpu;
2286 		struct ndt_stats ndst;
2287 
2288 		memset(&ndst, 0, sizeof(ndst));
2289 
2290 		for_each_possible_cpu(cpu) {
2291 			struct neigh_statistics	*st;
2292 
2293 			st = per_cpu_ptr(tbl->stats, cpu);
2294 			ndst.ndts_allocs		+= READ_ONCE(st->allocs);
2295 			ndst.ndts_destroys		+= READ_ONCE(st->destroys);
2296 			ndst.ndts_hash_grows		+= READ_ONCE(st->hash_grows);
2297 			ndst.ndts_res_failed		+= READ_ONCE(st->res_failed);
2298 			ndst.ndts_lookups		+= READ_ONCE(st->lookups);
2299 			ndst.ndts_hits			+= READ_ONCE(st->hits);
2300 			ndst.ndts_rcv_probes_mcast	+= READ_ONCE(st->rcv_probes_mcast);
2301 			ndst.ndts_rcv_probes_ucast	+= READ_ONCE(st->rcv_probes_ucast);
2302 			ndst.ndts_periodic_gc_runs	+= READ_ONCE(st->periodic_gc_runs);
2303 			ndst.ndts_forced_gc_runs	+= READ_ONCE(st->forced_gc_runs);
2304 			ndst.ndts_table_fulls		+= READ_ONCE(st->table_fulls);
2305 		}
2306 
2307 		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2308 				  NDTA_PAD))
2309 			goto nla_put_failure;
2310 	}
2311 
2312 	BUG_ON(tbl->parms.dev);
2313 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2314 		goto nla_put_failure;
2315 
2316 	nlmsg_end(skb, nlh);
2317 	return 0;
2318 
2319 nla_put_failure:
2320 	nlmsg_cancel(skb, nlh);
2321 	return -EMSGSIZE;
2322 }
2323 
2324 static int neightbl_fill_param_info(struct sk_buff *skb,
2325 				    struct neigh_table *tbl,
2326 				    struct neigh_parms *parms,
2327 				    u32 pid, u32 seq, int type,
2328 				    unsigned int flags)
2329 {
2330 	struct ndtmsg *ndtmsg;
2331 	struct nlmsghdr *nlh;
2332 
2333 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2334 	if (nlh == NULL)
2335 		return -EMSGSIZE;
2336 
2337 	ndtmsg = nlmsg_data(nlh);
2338 	ndtmsg->ndtm_family = tbl->family;
2339 	ndtmsg->ndtm_pad1   = 0;
2340 	ndtmsg->ndtm_pad2   = 0;
2341 
2342 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2343 	    neightbl_fill_parms(skb, parms) < 0)
2344 		goto errout;
2345 
2346 	nlmsg_end(skb, nlh);
2347 	return 0;
2348 errout:
2349 	nlmsg_cancel(skb, nlh);
2350 	return -EMSGSIZE;
2351 }
2352 
2353 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2354 	[NDTA_NAME]		= { .type = NLA_STRING },
2355 	[NDTA_THRESH1]		= { .type = NLA_U32 },
2356 	[NDTA_THRESH2]		= { .type = NLA_U32 },
2357 	[NDTA_THRESH3]		= { .type = NLA_U32 },
2358 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
2359 	[NDTA_PARMS]		= { .type = NLA_NESTED },
2360 };
2361 
2362 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2363 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
2364 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
2365 	[NDTPA_QUEUE_LENBYTES]		= { .type = NLA_U32 },
2366 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
2367 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
2368 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
2369 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
2370 	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
2371 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
2372 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
2373 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
2374 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
2375 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
2376 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
2377 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
2378 	[NDTPA_INTERVAL_PROBE_TIME_MS]	= { .type = NLA_U64, .min = 1 },
2379 };
2380 
2381 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2382 			struct netlink_ext_ack *extack)
2383 {
2384 	struct net *net = sock_net(skb->sk);
2385 	struct nlattr *tb[NDTA_MAX + 1];
2386 	struct neigh_table *tbl;
2387 	struct ndtmsg *ndtmsg;
2388 	bool found = false;
2389 	int err, tidx;
2390 
2391 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2392 				     nl_neightbl_policy, extack);
2393 	if (err < 0)
2394 		goto errout;
2395 
2396 	if (tb[NDTA_NAME] == NULL) {
2397 		err = -EINVAL;
2398 		goto errout;
2399 	}
2400 
2401 	ndtmsg = nlmsg_data(nlh);
2402 
2403 	rcu_read_lock();
2404 
2405 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2406 		tbl = rcu_dereference(neigh_tables[tidx]);
2407 		if (!tbl)
2408 			continue;
2409 
2410 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2411 			continue;
2412 
2413 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2414 			found = true;
2415 			break;
2416 		}
2417 	}
2418 
2419 	if (!found) {
2420 		rcu_read_unlock();
2421 		err = -ENOENT;
2422 		goto errout;
2423 	}
2424 
2425 	/*
2426 	 * We acquire tbl->lock to be nice to the periodic timers and
2427 	 * make sure they always see a consistent set of values.
2428 	 */
2429 	spin_lock_bh(&tbl->lock);
2430 
2431 	if (tb[NDTA_PARMS]) {
2432 		struct nlattr *tbp[NDTPA_MAX+1];
2433 		struct neigh_parms *p;
2434 		int i, ifindex = 0;
2435 
2436 		err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2437 						  tb[NDTA_PARMS],
2438 						  nl_ntbl_parm_policy, extack);
2439 		if (err < 0)
2440 			goto errout_tbl_lock;
2441 
2442 		if (tbp[NDTPA_IFINDEX])
2443 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2444 
2445 		p = lookup_neigh_parms(tbl, net, ifindex);
2446 		if (p == NULL) {
2447 			err = -ENOENT;
2448 			goto errout_tbl_lock;
2449 		}
2450 
2451 		for (i = 1; i <= NDTPA_MAX; i++) {
2452 			if (tbp[i] == NULL)
2453 				continue;
2454 
2455 			switch (i) {
2456 			case NDTPA_QUEUE_LEN:
2457 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2458 					      nla_get_u32(tbp[i]) *
2459 					      SKB_TRUESIZE(ETH_FRAME_LEN));
2460 				break;
2461 			case NDTPA_QUEUE_LENBYTES:
2462 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2463 					      nla_get_u32(tbp[i]));
2464 				break;
2465 			case NDTPA_PROXY_QLEN:
2466 				NEIGH_VAR_SET(p, PROXY_QLEN,
2467 					      nla_get_u32(tbp[i]));
2468 				break;
2469 			case NDTPA_APP_PROBES:
2470 				NEIGH_VAR_SET(p, APP_PROBES,
2471 					      nla_get_u32(tbp[i]));
2472 				break;
2473 			case NDTPA_UCAST_PROBES:
2474 				NEIGH_VAR_SET(p, UCAST_PROBES,
2475 					      nla_get_u32(tbp[i]));
2476 				break;
2477 			case NDTPA_MCAST_PROBES:
2478 				NEIGH_VAR_SET(p, MCAST_PROBES,
2479 					      nla_get_u32(tbp[i]));
2480 				break;
2481 			case NDTPA_MCAST_REPROBES:
2482 				NEIGH_VAR_SET(p, MCAST_REPROBES,
2483 					      nla_get_u32(tbp[i]));
2484 				break;
2485 			case NDTPA_BASE_REACHABLE_TIME:
2486 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2487 					      nla_get_msecs(tbp[i]));
2488 				/* update reachable_time as well, otherwise, the change will
2489 				 * only be effective after the next time neigh_periodic_work
2490 				 * decides to recompute it (can be multiple minutes)
2491 				 */
2492 				neigh_set_reach_time(p);
2493 				break;
2494 			case NDTPA_GC_STALETIME:
2495 				NEIGH_VAR_SET(p, GC_STALETIME,
2496 					      nla_get_msecs(tbp[i]));
2497 				break;
2498 			case NDTPA_DELAY_PROBE_TIME:
2499 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2500 					      nla_get_msecs(tbp[i]));
2501 				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2502 				break;
2503 			case NDTPA_INTERVAL_PROBE_TIME_MS:
2504 				NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2505 					      nla_get_msecs(tbp[i]));
2506 				break;
2507 			case NDTPA_RETRANS_TIME:
2508 				NEIGH_VAR_SET(p, RETRANS_TIME,
2509 					      nla_get_msecs(tbp[i]));
2510 				break;
2511 			case NDTPA_ANYCAST_DELAY:
2512 				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2513 					      nla_get_msecs(tbp[i]));
2514 				break;
2515 			case NDTPA_PROXY_DELAY:
2516 				NEIGH_VAR_SET(p, PROXY_DELAY,
2517 					      nla_get_msecs(tbp[i]));
2518 				break;
2519 			case NDTPA_LOCKTIME:
2520 				NEIGH_VAR_SET(p, LOCKTIME,
2521 					      nla_get_msecs(tbp[i]));
2522 				break;
2523 			}
2524 		}
2525 	}
2526 
2527 	err = -ENOENT;
2528 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2529 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2530 	    !net_eq(net, &init_net))
2531 		goto errout_tbl_lock;
2532 
2533 	if (tb[NDTA_THRESH1])
2534 		WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
2535 
2536 	if (tb[NDTA_THRESH2])
2537 		WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
2538 
2539 	if (tb[NDTA_THRESH3])
2540 		WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
2541 
2542 	if (tb[NDTA_GC_INTERVAL])
2543 		WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
2544 
2545 	err = 0;
2546 
2547 errout_tbl_lock:
2548 	spin_unlock_bh(&tbl->lock);
2549 	rcu_read_unlock();
2550 errout:
2551 	return err;
2552 }
2553 
2554 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2555 				    struct netlink_ext_ack *extack)
2556 {
2557 	struct ndtmsg *ndtm;
2558 
2559 	ndtm = nlmsg_payload(nlh, sizeof(*ndtm));
2560 	if (!ndtm) {
2561 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2562 		return -EINVAL;
2563 	}
2564 
2565 	if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2566 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2567 		return -EINVAL;
2568 	}
2569 
2570 	if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2571 		NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2572 		return -EINVAL;
2573 	}
2574 
2575 	return 0;
2576 }
2577 
2578 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2579 {
2580 	const struct nlmsghdr *nlh = cb->nlh;
2581 	struct net *net = sock_net(skb->sk);
2582 	int family, tidx, nidx = 0;
2583 	int tbl_skip = cb->args[0];
2584 	int neigh_skip = cb->args[1];
2585 	struct neigh_table *tbl;
2586 
2587 	if (cb->strict_check) {
2588 		int err = neightbl_valid_dump_info(nlh, cb->extack);
2589 
2590 		if (err < 0)
2591 			return err;
2592 	}
2593 
2594 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2595 
2596 	rcu_read_lock();
2597 
2598 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2599 		struct neigh_parms *p;
2600 
2601 		tbl = rcu_dereference(neigh_tables[tidx]);
2602 		if (!tbl)
2603 			continue;
2604 
2605 		if (tidx < tbl_skip || (family && tbl->family != family))
2606 			continue;
2607 
2608 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2609 				       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2610 				       NLM_F_MULTI) < 0)
2611 			break;
2612 
2613 		nidx = 0;
2614 		p = list_next_entry(&tbl->parms, list);
2615 		list_for_each_entry_from_rcu(p, &tbl->parms_list, list) {
2616 			if (!net_eq(neigh_parms_net(p), net))
2617 				continue;
2618 
2619 			if (nidx < neigh_skip)
2620 				goto next;
2621 
2622 			if (neightbl_fill_param_info(skb, tbl, p,
2623 						     NETLINK_CB(cb->skb).portid,
2624 						     nlh->nlmsg_seq,
2625 						     RTM_NEWNEIGHTBL,
2626 						     NLM_F_MULTI) < 0)
2627 				goto out;
2628 		next:
2629 			nidx++;
2630 		}
2631 
2632 		neigh_skip = 0;
2633 	}
2634 out:
2635 	rcu_read_unlock();
2636 
2637 	cb->args[0] = tidx;
2638 	cb->args[1] = nidx;
2639 
2640 	return skb->len;
2641 }
2642 
2643 static int __neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2644 			     u32 pid, u32 seq, int type, unsigned int flags)
2645 {
2646 	u32 neigh_flags, neigh_flags_ext;
2647 	unsigned long now = jiffies;
2648 	struct nda_cacheinfo ci;
2649 	struct nlmsghdr *nlh;
2650 	struct ndmsg *ndm;
2651 
2652 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2653 	if (nlh == NULL)
2654 		return -EMSGSIZE;
2655 
2656 	neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2657 	neigh_flags     = neigh->flags & NTF_OLD_MASK;
2658 
2659 	ndm = nlmsg_data(nlh);
2660 	ndm->ndm_family	 = neigh->ops->family;
2661 	ndm->ndm_pad1    = 0;
2662 	ndm->ndm_pad2    = 0;
2663 	ndm->ndm_flags	 = neigh_flags;
2664 	ndm->ndm_type	 = neigh->type;
2665 	ndm->ndm_ifindex = neigh->dev->ifindex;
2666 
2667 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2668 		goto nla_put_failure;
2669 
2670 	ndm->ndm_state	 = neigh->nud_state;
2671 	if (neigh->nud_state & NUD_VALID) {
2672 		char haddr[MAX_ADDR_LEN];
2673 
2674 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2675 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0)
2676 			goto nla_put_failure;
2677 	}
2678 
2679 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2680 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2681 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2682 	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
2683 
2684 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2685 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2686 		goto nla_put_failure;
2687 
2688 	if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2689 		goto nla_put_failure;
2690 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2691 		goto nla_put_failure;
2692 
2693 	nlmsg_end(skb, nlh);
2694 	return 0;
2695 
2696 nla_put_failure:
2697 	nlmsg_cancel(skb, nlh);
2698 	return -EMSGSIZE;
2699 }
2700 
2701 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2702 			   u32 pid, u32 seq, int type, unsigned int flags)
2703 	__releases(neigh->lock)
2704 	__acquires(neigh->lock)
2705 {
2706 	int err;
2707 
2708 	read_lock_bh(&neigh->lock);
2709 	err = __neigh_fill_info(skb, neigh, pid, seq, type, flags);
2710 	read_unlock_bh(&neigh->lock);
2711 
2712 	return err;
2713 }
2714 
2715 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2716 			    u32 pid, u32 seq, int type, unsigned int flags,
2717 			    struct neigh_table *tbl)
2718 {
2719 	u32 neigh_flags, neigh_flags_ext;
2720 	struct nlmsghdr *nlh;
2721 	struct ndmsg *ndm;
2722 	u8 protocol;
2723 
2724 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2725 	if (nlh == NULL)
2726 		return -EMSGSIZE;
2727 
2728 	neigh_flags = READ_ONCE(pn->flags);
2729 	neigh_flags_ext = neigh_flags >> NTF_EXT_SHIFT;
2730 	neigh_flags &= NTF_OLD_MASK;
2731 
2732 	ndm = nlmsg_data(nlh);
2733 	ndm->ndm_family	 = tbl->family;
2734 	ndm->ndm_pad1    = 0;
2735 	ndm->ndm_pad2    = 0;
2736 	ndm->ndm_flags	 = neigh_flags | NTF_PROXY;
2737 	ndm->ndm_type	 = RTN_UNICAST;
2738 	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2739 	ndm->ndm_state	 = NUD_NONE;
2740 
2741 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2742 		goto nla_put_failure;
2743 
2744 	protocol = READ_ONCE(pn->protocol);
2745 	if (protocol && nla_put_u8(skb, NDA_PROTOCOL, protocol))
2746 		goto nla_put_failure;
2747 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2748 		goto nla_put_failure;
2749 
2750 	nlmsg_end(skb, nlh);
2751 	return 0;
2752 
2753 nla_put_failure:
2754 	nlmsg_cancel(skb, nlh);
2755 	return -EMSGSIZE;
2756 }
2757 
2758 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2759 {
2760 	struct net_device *master;
2761 
2762 	if (!master_idx)
2763 		return false;
2764 
2765 	master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
2766 
2767 	/* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2768 	 * invalid value for ifindex to denote "no master".
2769 	 */
2770 	if (master_idx == -1)
2771 		return !!master;
2772 
2773 	if (!master || master->ifindex != master_idx)
2774 		return true;
2775 
2776 	return false;
2777 }
2778 
2779 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2780 {
2781 	if (filter_idx && (!dev || dev->ifindex != filter_idx))
2782 		return true;
2783 
2784 	return false;
2785 }
2786 
2787 struct neigh_dump_filter {
2788 	int master_idx;
2789 	int dev_idx;
2790 };
2791 
2792 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2793 			    struct netlink_callback *cb,
2794 			    struct neigh_dump_filter *filter)
2795 {
2796 	struct net *net = sock_net(skb->sk);
2797 	struct neighbour *n;
2798 	int err = 0, h, s_h = cb->args[1];
2799 	int idx, s_idx = idx = cb->args[2];
2800 	struct neigh_hash_table *nht;
2801 	unsigned int flags = NLM_F_MULTI;
2802 
2803 	if (filter->dev_idx || filter->master_idx)
2804 		flags |= NLM_F_DUMP_FILTERED;
2805 
2806 	nht = rcu_dereference(tbl->nht);
2807 
2808 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2809 		if (h > s_h)
2810 			s_idx = 0;
2811 		idx = 0;
2812 		neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
2813 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2814 				goto next;
2815 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2816 			    neigh_master_filtered(n->dev, filter->master_idx))
2817 				goto next;
2818 			err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2819 					      cb->nlh->nlmsg_seq,
2820 					      RTM_NEWNEIGH, flags);
2821 			if (err < 0)
2822 				goto out;
2823 next:
2824 			idx++;
2825 		}
2826 	}
2827 out:
2828 	cb->args[1] = h;
2829 	cb->args[2] = idx;
2830 	return err;
2831 }
2832 
2833 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2834 			     struct netlink_callback *cb,
2835 			     struct neigh_dump_filter *filter)
2836 {
2837 	struct pneigh_entry *n;
2838 	struct net *net = sock_net(skb->sk);
2839 	int err = 0, h, s_h = cb->args[3];
2840 	int idx, s_idx = idx = cb->args[4];
2841 	unsigned int flags = NLM_F_MULTI;
2842 
2843 	if (filter->dev_idx || filter->master_idx)
2844 		flags |= NLM_F_DUMP_FILTERED;
2845 
2846 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2847 		if (h > s_h)
2848 			s_idx = 0;
2849 		for (n = rcu_dereference(tbl->phash_buckets[h]), idx = 0;
2850 		     n;
2851 		     n = rcu_dereference(n->next)) {
2852 			if (idx < s_idx || pneigh_net(n) != net)
2853 				goto next;
2854 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2855 			    neigh_master_filtered(n->dev, filter->master_idx))
2856 				goto next;
2857 			err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2858 					       cb->nlh->nlmsg_seq,
2859 					       RTM_NEWNEIGH, flags, tbl);
2860 			if (err < 0)
2861 				goto out;
2862 		next:
2863 			idx++;
2864 		}
2865 	}
2866 
2867 out:
2868 	cb->args[3] = h;
2869 	cb->args[4] = idx;
2870 	return err;
2871 }
2872 
2873 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2874 				bool strict_check,
2875 				struct neigh_dump_filter *filter,
2876 				struct netlink_ext_ack *extack)
2877 {
2878 	struct nlattr *tb[NDA_MAX + 1];
2879 	int err, i;
2880 
2881 	if (strict_check) {
2882 		struct ndmsg *ndm;
2883 
2884 		ndm = nlmsg_payload(nlh, sizeof(*ndm));
2885 		if (!ndm) {
2886 			NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2887 			return -EINVAL;
2888 		}
2889 
2890 		if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2891 		    ndm->ndm_state || ndm->ndm_type) {
2892 			NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2893 			return -EINVAL;
2894 		}
2895 
2896 		if (ndm->ndm_flags & ~NTF_PROXY) {
2897 			NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2898 			return -EINVAL;
2899 		}
2900 
2901 		err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2902 						    tb, NDA_MAX, nda_policy,
2903 						    extack);
2904 	} else {
2905 		err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2906 					     NDA_MAX, nda_policy, extack);
2907 	}
2908 	if (err < 0)
2909 		return err;
2910 
2911 	for (i = 0; i <= NDA_MAX; ++i) {
2912 		if (!tb[i])
2913 			continue;
2914 
2915 		/* all new attributes should require strict_check */
2916 		switch (i) {
2917 		case NDA_IFINDEX:
2918 			filter->dev_idx = nla_get_u32(tb[i]);
2919 			break;
2920 		case NDA_MASTER:
2921 			filter->master_idx = nla_get_u32(tb[i]);
2922 			break;
2923 		default:
2924 			if (strict_check) {
2925 				NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2926 				return -EINVAL;
2927 			}
2928 		}
2929 	}
2930 
2931 	return 0;
2932 }
2933 
2934 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2935 {
2936 	const struct nlmsghdr *nlh = cb->nlh;
2937 	struct neigh_dump_filter filter = {};
2938 	struct neigh_table *tbl;
2939 	int t, family, s_t;
2940 	int proxy = 0;
2941 	int err;
2942 
2943 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2944 
2945 	/* check for full ndmsg structure presence, family member is
2946 	 * the same for both structures
2947 	 */
2948 	if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2949 	    ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2950 		proxy = 1;
2951 
2952 	err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2953 	if (err < 0 && cb->strict_check)
2954 		return err;
2955 	err = 0;
2956 
2957 	s_t = cb->args[0];
2958 
2959 	rcu_read_lock();
2960 	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2961 		tbl = rcu_dereference(neigh_tables[t]);
2962 
2963 		if (!tbl)
2964 			continue;
2965 		if (t < s_t || (family && tbl->family != family))
2966 			continue;
2967 		if (t > s_t)
2968 			memset(&cb->args[1], 0, sizeof(cb->args) -
2969 						sizeof(cb->args[0]));
2970 		if (proxy)
2971 			err = pneigh_dump_table(tbl, skb, cb, &filter);
2972 		else
2973 			err = neigh_dump_table(tbl, skb, cb, &filter);
2974 		if (err < 0)
2975 			break;
2976 	}
2977 	rcu_read_unlock();
2978 
2979 	cb->args[0] = t;
2980 	return err;
2981 }
2982 
2983 static struct ndmsg *neigh_valid_get_req(const struct nlmsghdr *nlh,
2984 					 struct nlattr **tb,
2985 					 struct netlink_ext_ack *extack)
2986 {
2987 	struct ndmsg *ndm;
2988 	int err, i;
2989 
2990 	ndm = nlmsg_payload(nlh, sizeof(*ndm));
2991 	if (!ndm) {
2992 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2993 		return ERR_PTR(-EINVAL);
2994 	}
2995 
2996 	if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
2997 	    ndm->ndm_type) {
2998 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2999 		return ERR_PTR(-EINVAL);
3000 	}
3001 
3002 	if (ndm->ndm_flags & ~NTF_PROXY) {
3003 		NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
3004 		return ERR_PTR(-EINVAL);
3005 	}
3006 
3007 	if (!(ndm->ndm_flags & NTF_PROXY) && !ndm->ndm_ifindex) {
3008 		NL_SET_ERR_MSG(extack, "No device specified");
3009 		return ERR_PTR(-EINVAL);
3010 	}
3011 
3012 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
3013 					    NDA_MAX, nda_policy, extack);
3014 	if (err < 0)
3015 		return ERR_PTR(err);
3016 
3017 	for (i = 0; i <= NDA_MAX; ++i) {
3018 		switch (i) {
3019 		case NDA_DST:
3020 			if (!tb[i]) {
3021 				NL_SET_ERR_ATTR_MISS(extack, NULL, NDA_DST);
3022 				return ERR_PTR(-EINVAL);
3023 			}
3024 			break;
3025 		default:
3026 			if (!tb[i])
3027 				continue;
3028 
3029 			NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
3030 			return ERR_PTR(-EINVAL);
3031 		}
3032 	}
3033 
3034 	return ndm;
3035 }
3036 
3037 static inline size_t neigh_nlmsg_size(void)
3038 {
3039 	return NLMSG_ALIGN(sizeof(struct ndmsg))
3040 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3041 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
3042 	       + nla_total_size(sizeof(struct nda_cacheinfo))
3043 	       + nla_total_size(4)  /* NDA_PROBES */
3044 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
3045 	       + nla_total_size(1); /* NDA_PROTOCOL */
3046 }
3047 
3048 static inline size_t pneigh_nlmsg_size(void)
3049 {
3050 	return NLMSG_ALIGN(sizeof(struct ndmsg))
3051 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3052 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
3053 	       + nla_total_size(1); /* NDA_PROTOCOL */
3054 }
3055 
3056 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3057 		     struct netlink_ext_ack *extack)
3058 {
3059 	struct net *net = sock_net(in_skb->sk);
3060 	u32 pid = NETLINK_CB(in_skb).portid;
3061 	struct nlattr *tb[NDA_MAX + 1];
3062 	struct net_device *dev = NULL;
3063 	u32 seq = nlh->nlmsg_seq;
3064 	struct neigh_table *tbl;
3065 	struct neighbour *neigh;
3066 	struct sk_buff *skb;
3067 	struct ndmsg *ndm;
3068 	void *dst;
3069 	int err;
3070 
3071 	ndm = neigh_valid_get_req(nlh, tb, extack);
3072 	if (IS_ERR(ndm))
3073 		return PTR_ERR(ndm);
3074 
3075 	if (ndm->ndm_flags & NTF_PROXY)
3076 		skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
3077 	else
3078 		skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
3079 	if (!skb)
3080 		return -ENOBUFS;
3081 
3082 	rcu_read_lock();
3083 
3084 	tbl = neigh_find_table(ndm->ndm_family);
3085 	if (!tbl) {
3086 		NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
3087 		err = -EAFNOSUPPORT;
3088 		goto err_unlock;
3089 	}
3090 
3091 	if (nla_len(tb[NDA_DST]) != (int)tbl->key_len) {
3092 		NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
3093 		err = -EINVAL;
3094 		goto err_unlock;
3095 	}
3096 
3097 	dst = nla_data(tb[NDA_DST]);
3098 
3099 	if (ndm->ndm_ifindex) {
3100 		dev = dev_get_by_index_rcu(net, ndm->ndm_ifindex);
3101 		if (!dev) {
3102 			NL_SET_ERR_MSG(extack, "Unknown device ifindex");
3103 			err = -ENODEV;
3104 			goto err_unlock;
3105 		}
3106 	}
3107 
3108 	if (ndm->ndm_flags & NTF_PROXY) {
3109 		struct pneigh_entry *pn;
3110 
3111 		pn = pneigh_lookup(tbl, net, dst, dev);
3112 		if (!pn) {
3113 			NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3114 			err = -ENOENT;
3115 			goto err_unlock;
3116 		}
3117 
3118 		err = pneigh_fill_info(skb, pn, pid, seq, RTM_NEWNEIGH, 0, tbl);
3119 		if (err)
3120 			goto err_unlock;
3121 	} else {
3122 		neigh = neigh_lookup(tbl, dst, dev);
3123 		if (!neigh) {
3124 			NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3125 			err = -ENOENT;
3126 			goto err_unlock;
3127 		}
3128 
3129 		err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
3130 		neigh_release(neigh);
3131 		if (err)
3132 			goto err_unlock;
3133 	}
3134 
3135 	rcu_read_unlock();
3136 
3137 	return rtnl_unicast(skb, net, pid);
3138 err_unlock:
3139 	rcu_read_unlock();
3140 	kfree_skb(skb);
3141 	return err;
3142 }
3143 
3144 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3145 {
3146 	int chain;
3147 	struct neigh_hash_table *nht;
3148 
3149 	rcu_read_lock();
3150 	nht = rcu_dereference(tbl->nht);
3151 
3152 	spin_lock_bh(&tbl->lock); /* avoid resizes */
3153 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3154 		struct neighbour *n;
3155 
3156 		neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
3157 			cb(n, cookie);
3158 	}
3159 	spin_unlock_bh(&tbl->lock);
3160 	rcu_read_unlock();
3161 }
3162 EXPORT_SYMBOL(neigh_for_each);
3163 
3164 /* The tbl->lock must be held as a writer and BH disabled. */
3165 void __neigh_for_each_release(struct neigh_table *tbl,
3166 			      int (*cb)(struct neighbour *))
3167 {
3168 	struct neigh_hash_table *nht;
3169 	int chain;
3170 
3171 	nht = rcu_dereference_protected(tbl->nht,
3172 					lockdep_is_held(&tbl->lock));
3173 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3174 		struct hlist_node *tmp;
3175 		struct neighbour *n;
3176 
3177 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
3178 			int release;
3179 
3180 			write_lock(&n->lock);
3181 			release = cb(n);
3182 			if (release) {
3183 				hlist_del_rcu(&n->hash);
3184 				hlist_del_rcu(&n->dev_list);
3185 				neigh_mark_dead(n);
3186 			}
3187 			write_unlock(&n->lock);
3188 			if (release)
3189 				neigh_cleanup_and_release(n);
3190 		}
3191 	}
3192 }
3193 EXPORT_SYMBOL(__neigh_for_each_release);
3194 
3195 int neigh_xmit(int index, struct net_device *dev,
3196 	       const void *addr, struct sk_buff *skb)
3197 {
3198 	int err = -EAFNOSUPPORT;
3199 
3200 	if (likely(index < NEIGH_NR_TABLES)) {
3201 		struct neigh_table *tbl;
3202 		struct neighbour *neigh;
3203 
3204 		rcu_read_lock();
3205 		tbl = rcu_dereference(neigh_tables[index]);
3206 		if (!tbl) {
3207 			rcu_read_unlock();
3208 			goto out_kfree_skb;
3209 		}
3210 		if (index == NEIGH_ARP_TABLE) {
3211 			u32 key = *((u32 *)addr);
3212 
3213 			neigh = __ipv4_neigh_lookup_noref(dev, key);
3214 		} else {
3215 			neigh = __neigh_lookup_noref(tbl, addr, dev);
3216 		}
3217 		if (!neigh)
3218 			neigh = __neigh_create(tbl, addr, dev, false);
3219 		err = PTR_ERR(neigh);
3220 		if (IS_ERR(neigh)) {
3221 			rcu_read_unlock();
3222 			goto out_kfree_skb;
3223 		}
3224 		err = READ_ONCE(neigh->output)(neigh, skb);
3225 		rcu_read_unlock();
3226 	}
3227 	else if (index == NEIGH_LINK_TABLE) {
3228 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3229 				      addr, NULL, skb->len);
3230 		if (err < 0)
3231 			goto out_kfree_skb;
3232 		err = dev_queue_xmit(skb);
3233 	}
3234 	return err;
3235 out_kfree_skb:
3236 	kfree_skb(skb);
3237 	return err;
3238 }
3239 EXPORT_SYMBOL(neigh_xmit);
3240 
3241 #ifdef CONFIG_PROC_FS
3242 
3243 static struct neighbour *neigh_get_valid(struct seq_file *seq,
3244 					 struct neighbour *n,
3245 					 loff_t *pos)
3246 {
3247 	struct neigh_seq_state *state = seq->private;
3248 	struct net *net = seq_file_net(seq);
3249 
3250 	if (!net_eq(dev_net(n->dev), net))
3251 		return NULL;
3252 
3253 	if (state->neigh_sub_iter) {
3254 		loff_t fakep = 0;
3255 		void *v;
3256 
3257 		v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
3258 		if (!v)
3259 			return NULL;
3260 		if (pos)
3261 			return v;
3262 	}
3263 
3264 	if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3265 		return n;
3266 
3267 	if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3268 		return n;
3269 
3270 	return NULL;
3271 }
3272 
3273 static struct neighbour *neigh_get_first(struct seq_file *seq)
3274 {
3275 	struct neigh_seq_state *state = seq->private;
3276 	struct neigh_hash_table *nht = state->nht;
3277 	struct neighbour *n, *tmp;
3278 
3279 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3280 
3281 	while (++state->bucket < (1 << nht->hash_shift)) {
3282 		neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
3283 			tmp = neigh_get_valid(seq, n, NULL);
3284 			if (tmp)
3285 				return tmp;
3286 		}
3287 	}
3288 
3289 	return NULL;
3290 }
3291 
3292 static struct neighbour *neigh_get_next(struct seq_file *seq,
3293 					struct neighbour *n,
3294 					loff_t *pos)
3295 {
3296 	struct neigh_seq_state *state = seq->private;
3297 	struct neighbour *tmp;
3298 
3299 	if (state->neigh_sub_iter) {
3300 		void *v = state->neigh_sub_iter(state, n, pos);
3301 
3302 		if (v)
3303 			return n;
3304 	}
3305 
3306 	hlist_for_each_entry_continue(n, hash) {
3307 		tmp = neigh_get_valid(seq, n, pos);
3308 		if (tmp) {
3309 			n = tmp;
3310 			goto out;
3311 		}
3312 	}
3313 
3314 	n = neigh_get_first(seq);
3315 out:
3316 	if (n && pos)
3317 		--(*pos);
3318 
3319 	return n;
3320 }
3321 
3322 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3323 {
3324 	struct neighbour *n = neigh_get_first(seq);
3325 
3326 	if (n) {
3327 		--(*pos);
3328 		while (*pos) {
3329 			n = neigh_get_next(seq, n, pos);
3330 			if (!n)
3331 				break;
3332 		}
3333 	}
3334 	return *pos ? NULL : n;
3335 }
3336 
3337 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3338 {
3339 	struct neigh_seq_state *state = seq->private;
3340 	struct net *net = seq_file_net(seq);
3341 	struct neigh_table *tbl = state->tbl;
3342 	struct pneigh_entry *pn = NULL;
3343 	int bucket;
3344 
3345 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
3346 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3347 		pn = rcu_dereference(tbl->phash_buckets[bucket]);
3348 
3349 		while (pn && !net_eq(pneigh_net(pn), net))
3350 			pn = rcu_dereference(pn->next);
3351 		if (pn)
3352 			break;
3353 	}
3354 	state->bucket = bucket;
3355 
3356 	return pn;
3357 }
3358 
3359 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3360 					    struct pneigh_entry *pn,
3361 					    loff_t *pos)
3362 {
3363 	struct neigh_seq_state *state = seq->private;
3364 	struct net *net = seq_file_net(seq);
3365 	struct neigh_table *tbl = state->tbl;
3366 
3367 	do {
3368 		pn = rcu_dereference(pn->next);
3369 	} while (pn && !net_eq(pneigh_net(pn), net));
3370 
3371 	while (!pn) {
3372 		if (++state->bucket > PNEIGH_HASHMASK)
3373 			break;
3374 
3375 		pn = rcu_dereference(tbl->phash_buckets[state->bucket]);
3376 
3377 		while (pn && !net_eq(pneigh_net(pn), net))
3378 			pn = rcu_dereference(pn->next);
3379 		if (pn)
3380 			break;
3381 	}
3382 
3383 	if (pn && pos)
3384 		--(*pos);
3385 
3386 	return pn;
3387 }
3388 
3389 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3390 {
3391 	struct pneigh_entry *pn = pneigh_get_first(seq);
3392 
3393 	if (pn) {
3394 		--(*pos);
3395 		while (*pos) {
3396 			pn = pneigh_get_next(seq, pn, pos);
3397 			if (!pn)
3398 				break;
3399 		}
3400 	}
3401 	return *pos ? NULL : pn;
3402 }
3403 
3404 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3405 {
3406 	struct neigh_seq_state *state = seq->private;
3407 	void *rc;
3408 	loff_t idxpos = *pos;
3409 
3410 	rc = neigh_get_idx(seq, &idxpos);
3411 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3412 		rc = pneigh_get_idx(seq, &idxpos);
3413 
3414 	return rc;
3415 }
3416 
3417 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3418 	__acquires(tbl->lock)
3419 	__acquires(rcu)
3420 {
3421 	struct neigh_seq_state *state = seq->private;
3422 
3423 	state->tbl = tbl;
3424 	state->bucket = -1;
3425 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3426 
3427 	rcu_read_lock();
3428 	state->nht = rcu_dereference(tbl->nht);
3429 	spin_lock_bh(&tbl->lock);
3430 
3431 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3432 }
3433 EXPORT_SYMBOL(neigh_seq_start);
3434 
3435 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3436 {
3437 	struct neigh_seq_state *state;
3438 	void *rc;
3439 
3440 	if (v == SEQ_START_TOKEN) {
3441 		rc = neigh_get_first(seq);
3442 		goto out;
3443 	}
3444 
3445 	state = seq->private;
3446 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3447 		rc = neigh_get_next(seq, v, NULL);
3448 		if (rc)
3449 			goto out;
3450 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3451 			rc = pneigh_get_first(seq);
3452 	} else {
3453 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3454 		rc = pneigh_get_next(seq, v, NULL);
3455 	}
3456 out:
3457 	++(*pos);
3458 	return rc;
3459 }
3460 EXPORT_SYMBOL(neigh_seq_next);
3461 
3462 void neigh_seq_stop(struct seq_file *seq, void *v)
3463 	__releases(tbl->lock)
3464 	__releases(rcu)
3465 {
3466 	struct neigh_seq_state *state = seq->private;
3467 	struct neigh_table *tbl = state->tbl;
3468 
3469 	spin_unlock_bh(&tbl->lock);
3470 	rcu_read_unlock();
3471 }
3472 EXPORT_SYMBOL(neigh_seq_stop);
3473 
3474 /* statistics via seq_file */
3475 
3476 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3477 {
3478 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3479 	int cpu;
3480 
3481 	if (*pos == 0)
3482 		return SEQ_START_TOKEN;
3483 
3484 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3485 		if (!cpu_possible(cpu))
3486 			continue;
3487 		*pos = cpu+1;
3488 		return per_cpu_ptr(tbl->stats, cpu);
3489 	}
3490 	return NULL;
3491 }
3492 
3493 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3494 {
3495 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3496 	int cpu;
3497 
3498 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3499 		if (!cpu_possible(cpu))
3500 			continue;
3501 		*pos = cpu+1;
3502 		return per_cpu_ptr(tbl->stats, cpu);
3503 	}
3504 	(*pos)++;
3505 	return NULL;
3506 }
3507 
3508 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3509 {
3510 
3511 }
3512 
3513 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3514 {
3515 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3516 	struct neigh_statistics *st = v;
3517 
3518 	if (v == SEQ_START_TOKEN) {
3519 		seq_puts(seq, "entries  allocs   destroys hash_grows lookups  hits     res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3520 		return 0;
3521 	}
3522 
3523 	seq_printf(seq, "%08x %08lx %08lx %08lx   %08lx %08lx %08lx   "
3524 			"%08lx         %08lx         %08lx         "
3525 			"%08lx       %08lx            %08lx\n",
3526 		   atomic_read(&tbl->entries),
3527 
3528 		   st->allocs,
3529 		   st->destroys,
3530 		   st->hash_grows,
3531 
3532 		   st->lookups,
3533 		   st->hits,
3534 
3535 		   st->res_failed,
3536 
3537 		   st->rcv_probes_mcast,
3538 		   st->rcv_probes_ucast,
3539 
3540 		   st->periodic_gc_runs,
3541 		   st->forced_gc_runs,
3542 		   st->unres_discards,
3543 		   st->table_fulls
3544 		   );
3545 
3546 	return 0;
3547 }
3548 
3549 static const struct seq_operations neigh_stat_seq_ops = {
3550 	.start	= neigh_stat_seq_start,
3551 	.next	= neigh_stat_seq_next,
3552 	.stop	= neigh_stat_seq_stop,
3553 	.show	= neigh_stat_seq_show,
3554 };
3555 #endif /* CONFIG_PROC_FS */
3556 
3557 static void __neigh_notify(struct neighbour *n, int type, int flags,
3558 			   u32 pid)
3559 {
3560 	struct sk_buff *skb;
3561 	int err = -ENOBUFS;
3562 	struct net *net;
3563 
3564 	rcu_read_lock();
3565 	net = dev_net_rcu(n->dev);
3566 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3567 	if (skb == NULL)
3568 		goto errout;
3569 
3570 	err = __neigh_fill_info(skb, n, pid, 0, type, flags);
3571 	if (err < 0) {
3572 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3573 		WARN_ON(err == -EMSGSIZE);
3574 		kfree_skb(skb);
3575 		goto errout;
3576 	}
3577 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3578 	goto out;
3579 errout:
3580 	rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3581 out:
3582 	rcu_read_unlock();
3583 }
3584 
3585 static void neigh_notify(struct neighbour *neigh, int type, int flags, u32 pid)
3586 {
3587 	read_lock_bh(&neigh->lock);
3588 	__neigh_notify(neigh, type, flags, pid);
3589 	read_unlock_bh(&neigh->lock);
3590 }
3591 
3592 void neigh_app_ns(struct neighbour *n)
3593 {
3594 	neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3595 }
3596 EXPORT_SYMBOL(neigh_app_ns);
3597 
3598 #ifdef CONFIG_SYSCTL
3599 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3600 
3601 static int proc_unres_qlen(const struct ctl_table *ctl, int write,
3602 			   void *buffer, size_t *lenp, loff_t *ppos)
3603 {
3604 	int size, ret;
3605 	struct ctl_table tmp = *ctl;
3606 
3607 	tmp.extra1 = SYSCTL_ZERO;
3608 	tmp.extra2 = &unres_qlen_max;
3609 	tmp.data = &size;
3610 
3611 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3612 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3613 
3614 	if (write && !ret)
3615 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3616 	return ret;
3617 }
3618 
3619 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3620 				  int index)
3621 {
3622 	struct net_device *dev;
3623 	int family = neigh_parms_family(p);
3624 
3625 	rcu_read_lock();
3626 	for_each_netdev_rcu(net, dev) {
3627 		struct neigh_parms *dst_p =
3628 				neigh_get_dev_parms_rcu(dev, family);
3629 
3630 		if (dst_p && !test_bit(index, dst_p->data_state))
3631 			dst_p->data[index] = p->data[index];
3632 	}
3633 	rcu_read_unlock();
3634 }
3635 
3636 static void neigh_proc_update(const struct ctl_table *ctl, int write)
3637 {
3638 	struct net_device *dev = ctl->extra1;
3639 	struct neigh_parms *p = ctl->extra2;
3640 	struct net *net = neigh_parms_net(p);
3641 	int index = (int *) ctl->data - p->data;
3642 
3643 	if (!write)
3644 		return;
3645 
3646 	set_bit(index, p->data_state);
3647 	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3648 		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3649 	if (!dev) /* NULL dev means this is default value */
3650 		neigh_copy_dflt_parms(net, p, index);
3651 }
3652 
3653 static int neigh_proc_dointvec_zero_intmax(const struct ctl_table *ctl, int write,
3654 					   void *buffer, size_t *lenp,
3655 					   loff_t *ppos)
3656 {
3657 	struct ctl_table tmp = *ctl;
3658 	int ret;
3659 
3660 	tmp.extra1 = SYSCTL_ZERO;
3661 	tmp.extra2 = SYSCTL_INT_MAX;
3662 
3663 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3664 	neigh_proc_update(ctl, write);
3665 	return ret;
3666 }
3667 
3668 static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table *ctl, int write,
3669 						   void *buffer, size_t *lenp, loff_t *ppos)
3670 {
3671 	struct ctl_table tmp = *ctl;
3672 	int ret;
3673 
3674 	int min = msecs_to_jiffies(1);
3675 
3676 	tmp.extra1 = &min;
3677 	tmp.extra2 = NULL;
3678 
3679 	ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3680 	neigh_proc_update(ctl, write);
3681 	return ret;
3682 }
3683 
3684 int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer,
3685 			size_t *lenp, loff_t *ppos)
3686 {
3687 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3688 
3689 	neigh_proc_update(ctl, write);
3690 	return ret;
3691 }
3692 EXPORT_SYMBOL(neigh_proc_dointvec);
3693 
3694 int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer,
3695 				size_t *lenp, loff_t *ppos)
3696 {
3697 	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3698 
3699 	neigh_proc_update(ctl, write);
3700 	return ret;
3701 }
3702 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3703 
3704 static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table *ctl, int write,
3705 					      void *buffer, size_t *lenp,
3706 					      loff_t *ppos)
3707 {
3708 	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3709 
3710 	neigh_proc_update(ctl, write);
3711 	return ret;
3712 }
3713 
3714 int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
3715 				   void *buffer, size_t *lenp, loff_t *ppos)
3716 {
3717 	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3718 
3719 	neigh_proc_update(ctl, write);
3720 	return ret;
3721 }
3722 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3723 
3724 static int neigh_proc_dointvec_unres_qlen(const struct ctl_table *ctl, int write,
3725 					  void *buffer, size_t *lenp,
3726 					  loff_t *ppos)
3727 {
3728 	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3729 
3730 	neigh_proc_update(ctl, write);
3731 	return ret;
3732 }
3733 
3734 static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write,
3735 					  void *buffer, size_t *lenp,
3736 					  loff_t *ppos)
3737 {
3738 	struct neigh_parms *p = ctl->extra2;
3739 	int ret;
3740 
3741 	if (strcmp(ctl->procname, "base_reachable_time") == 0)
3742 		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3743 	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3744 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3745 	else
3746 		ret = -1;
3747 
3748 	if (write && ret == 0) {
3749 		/* update reachable_time as well, otherwise, the change will
3750 		 * only be effective after the next time neigh_periodic_work
3751 		 * decides to recompute it
3752 		 */
3753 		neigh_set_reach_time(p);
3754 	}
3755 	return ret;
3756 }
3757 
3758 #define NEIGH_PARMS_DATA_OFFSET(index)	\
3759 	(&((struct neigh_parms *) 0)->data[index])
3760 
3761 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3762 	[NEIGH_VAR_ ## attr] = { \
3763 		.procname	= name, \
3764 		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3765 		.maxlen		= sizeof(int), \
3766 		.mode		= mval, \
3767 		.proc_handler	= proc, \
3768 	}
3769 
3770 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3771 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3772 
3773 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3774 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3775 
3776 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3777 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3778 
3779 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3780 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3781 
3782 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3783 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3784 
3785 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3786 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3787 
3788 static struct neigh_sysctl_table {
3789 	struct ctl_table_header *sysctl_header;
3790 	struct ctl_table neigh_vars[NEIGH_VAR_MAX];
3791 } neigh_sysctl_template __read_mostly = {
3792 	.neigh_vars = {
3793 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3794 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3795 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3796 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3797 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3798 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3799 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3800 		NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3801 						       "interval_probe_time_ms"),
3802 		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3803 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3804 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3805 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3806 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3807 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3808 		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3809 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3810 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3811 		[NEIGH_VAR_GC_INTERVAL] = {
3812 			.procname	= "gc_interval",
3813 			.maxlen		= sizeof(int),
3814 			.mode		= 0644,
3815 			.proc_handler	= proc_dointvec_jiffies,
3816 		},
3817 		[NEIGH_VAR_GC_THRESH1] = {
3818 			.procname	= "gc_thresh1",
3819 			.maxlen		= sizeof(int),
3820 			.mode		= 0644,
3821 			.extra1		= SYSCTL_ZERO,
3822 			.extra2		= SYSCTL_INT_MAX,
3823 			.proc_handler	= proc_dointvec_minmax,
3824 		},
3825 		[NEIGH_VAR_GC_THRESH2] = {
3826 			.procname	= "gc_thresh2",
3827 			.maxlen		= sizeof(int),
3828 			.mode		= 0644,
3829 			.extra1		= SYSCTL_ZERO,
3830 			.extra2		= SYSCTL_INT_MAX,
3831 			.proc_handler	= proc_dointvec_minmax,
3832 		},
3833 		[NEIGH_VAR_GC_THRESH3] = {
3834 			.procname	= "gc_thresh3",
3835 			.maxlen		= sizeof(int),
3836 			.mode		= 0644,
3837 			.extra1		= SYSCTL_ZERO,
3838 			.extra2		= SYSCTL_INT_MAX,
3839 			.proc_handler	= proc_dointvec_minmax,
3840 		},
3841 	},
3842 };
3843 
3844 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3845 			  proc_handler *handler)
3846 {
3847 	int i;
3848 	struct neigh_sysctl_table *t;
3849 	const char *dev_name_source;
3850 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3851 	char *p_name;
3852 	size_t neigh_vars_size;
3853 
3854 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3855 	if (!t)
3856 		goto err;
3857 
3858 	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3859 		t->neigh_vars[i].data += (long) p;
3860 		t->neigh_vars[i].extra1 = dev;
3861 		t->neigh_vars[i].extra2 = p;
3862 	}
3863 
3864 	neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
3865 	if (dev) {
3866 		dev_name_source = dev->name;
3867 		/* Terminate the table early */
3868 		neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
3869 	} else {
3870 		struct neigh_table *tbl = p->tbl;
3871 		dev_name_source = "default";
3872 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3873 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3874 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3875 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3876 	}
3877 
3878 	if (handler) {
3879 		/* RetransTime */
3880 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3881 		/* ReachableTime */
3882 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3883 		/* RetransTime (in milliseconds)*/
3884 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3885 		/* ReachableTime (in milliseconds) */
3886 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3887 	} else {
3888 		/* Those handlers will update p->reachable_time after
3889 		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3890 		 * applied after the next neighbour update instead of waiting for
3891 		 * neigh_periodic_work to update its value (can be multiple minutes)
3892 		 * So any handler that replaces them should do this as well
3893 		 */
3894 		/* ReachableTime */
3895 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3896 			neigh_proc_base_reachable_time;
3897 		/* ReachableTime (in milliseconds) */
3898 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3899 			neigh_proc_base_reachable_time;
3900 	}
3901 
3902 	switch (neigh_parms_family(p)) {
3903 	case AF_INET:
3904 	      p_name = "ipv4";
3905 	      break;
3906 	case AF_INET6:
3907 	      p_name = "ipv6";
3908 	      break;
3909 	default:
3910 	      BUG();
3911 	}
3912 
3913 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3914 		p_name, dev_name_source);
3915 	t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
3916 						  neigh_path, t->neigh_vars,
3917 						  neigh_vars_size);
3918 	if (!t->sysctl_header)
3919 		goto free;
3920 
3921 	p->sysctl_table = t;
3922 	return 0;
3923 
3924 free:
3925 	kfree(t);
3926 err:
3927 	return -ENOBUFS;
3928 }
3929 EXPORT_SYMBOL(neigh_sysctl_register);
3930 
3931 void neigh_sysctl_unregister(struct neigh_parms *p)
3932 {
3933 	if (p->sysctl_table) {
3934 		struct neigh_sysctl_table *t = p->sysctl_table;
3935 		p->sysctl_table = NULL;
3936 		unregister_net_sysctl_table(t->sysctl_header);
3937 		kfree(t);
3938 	}
3939 }
3940 EXPORT_SYMBOL(neigh_sysctl_unregister);
3941 
3942 #endif	/* CONFIG_SYSCTL */
3943 
3944 static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
3945 	{.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
3946 	{.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
3947 	{.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
3948 	 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
3949 	{.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info,
3950 	 .flags = RTNL_FLAG_DUMP_UNLOCKED},
3951 	{.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set,
3952 	 .flags = RTNL_FLAG_DOIT_UNLOCKED},
3953 };
3954 
3955 static int __init neigh_init(void)
3956 {
3957 	rtnl_register_many(neigh_rtnl_msg_handlers);
3958 	return 0;
3959 }
3960 
3961 subsys_initcall(neigh_init);
3962