xref: /linux/net/core/neighbour.c (revision 5feaa7a07b85ebbef418ba4b80e4e0d23dc379f5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Generic address resolution entity
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
8  *
9  *	Fixes:
10  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
11  *	Harald Welte		Add neighbour cache statistics like rtstat
12  */
13 
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 
16 #include <linux/slab.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/socket.h>
21 #include <linux/netdevice.h>
22 #include <linux/proc_fs.h>
23 #ifdef CONFIG_SYSCTL
24 #include <linux/sysctl.h>
25 #endif
26 #include <linux/times.h>
27 #include <net/net_namespace.h>
28 #include <net/neighbour.h>
29 #include <net/arp.h>
30 #include <net/dst.h>
31 #include <net/ip.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
41 
42 #include <trace/events/neigh.h>
43 
44 #define NEIGH_DEBUG 1
45 #define neigh_dbg(level, fmt, ...)		\
46 do {						\
47 	if (level <= NEIGH_DEBUG)		\
48 		pr_debug(fmt, ##__VA_ARGS__);	\
49 } while (0)
50 
51 #define PNEIGH_HASHMASK		0xF
52 
53 static void neigh_timer_handler(struct timer_list *t);
54 static void __neigh_notify(struct neighbour *n, int type, int flags,
55 			   u32 pid);
56 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
57 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
58 			  bool skip_perm);
59 
60 #ifdef CONFIG_PROC_FS
61 static const struct seq_operations neigh_stat_seq_ops;
62 #endif
63 
64 static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
65 {
66 	int i;
67 
68 	switch (family) {
69 	default:
70 		DEBUG_NET_WARN_ON_ONCE(1);
71 		fallthrough; /* to avoid panic by null-ptr-deref */
72 	case AF_INET:
73 		i = NEIGH_ARP_TABLE;
74 		break;
75 	case AF_INET6:
76 		i = NEIGH_ND_TABLE;
77 		break;
78 	}
79 
80 	return &dev->neighbours[i];
81 }
82 
83 /*
84    Neighbour hash table buckets are protected with rwlock tbl->lock.
85 
86    - All the scans/updates to hash buckets MUST be made under this lock.
87    - NOTHING clever should be made under this lock: no callbacks
88      to protocol backends, no attempts to send something to network.
89      It will result in deadlocks, if backend/driver wants to use neighbour
90      cache.
91    - If the entry requires some non-trivial actions, increase
92      its reference count and release table lock.
93 
94    Neighbour entries are protected:
95    - with reference count.
96    - with rwlock neigh->lock
97 
98    Reference count prevents destruction.
99 
100    neigh->lock mainly serializes ll address data and its validity state.
101    However, the same lock is used to protect another entry fields:
102     - timer
103     - resolution queue
104 
105    Again, nothing clever shall be made under neigh->lock,
106    the most complicated procedure, which we allow is dev->hard_header.
107    It is supposed, that dev->hard_header is simplistic and does
108    not make callbacks to neighbour tables.
109  */
110 
111 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
112 {
113 	kfree_skb(skb);
114 	return -ENETDOWN;
115 }
116 
117 static void neigh_cleanup_and_release(struct neighbour *neigh)
118 {
119 	trace_neigh_cleanup_and_release(neigh, 0);
120 	__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
121 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
122 	neigh_release(neigh);
123 }
124 
125 /*
126  * It is random distribution in the interval (1/2)*base...(3/2)*base.
127  * It corresponds to default IPv6 settings and is not overridable,
128  * because it is really reasonable choice.
129  */
130 
131 unsigned long neigh_rand_reach_time(unsigned long base)
132 {
133 	return base ? get_random_u32_below(base) + (base >> 1) : 0;
134 }
135 EXPORT_SYMBOL(neigh_rand_reach_time);
136 
137 static void neigh_mark_dead(struct neighbour *n)
138 {
139 	n->dead = 1;
140 	if (!list_empty(&n->gc_list)) {
141 		list_del_init(&n->gc_list);
142 		atomic_dec(&n->tbl->gc_entries);
143 	}
144 	if (!list_empty(&n->managed_list))
145 		list_del_init(&n->managed_list);
146 }
147 
148 static void neigh_update_gc_list(struct neighbour *n)
149 {
150 	bool on_gc_list, exempt_from_gc;
151 
152 	write_lock_bh(&n->tbl->lock);
153 	write_lock(&n->lock);
154 	if (n->dead)
155 		goto out;
156 
157 	/* remove from the gc list if new state is permanent or if neighbor is
158 	 * externally learned / validated; otherwise entry should be on the gc
159 	 * list
160 	 */
161 	exempt_from_gc = n->nud_state & NUD_PERMANENT ||
162 			 n->flags & (NTF_EXT_LEARNED | NTF_EXT_VALIDATED);
163 	on_gc_list = !list_empty(&n->gc_list);
164 
165 	if (exempt_from_gc && on_gc_list) {
166 		list_del_init(&n->gc_list);
167 		atomic_dec(&n->tbl->gc_entries);
168 	} else if (!exempt_from_gc && !on_gc_list) {
169 		/* add entries to the tail; cleaning removes from the front */
170 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
171 		atomic_inc(&n->tbl->gc_entries);
172 	}
173 out:
174 	write_unlock(&n->lock);
175 	write_unlock_bh(&n->tbl->lock);
176 }
177 
178 static void neigh_update_managed_list(struct neighbour *n)
179 {
180 	bool on_managed_list, add_to_managed;
181 
182 	write_lock_bh(&n->tbl->lock);
183 	write_lock(&n->lock);
184 	if (n->dead)
185 		goto out;
186 
187 	add_to_managed = n->flags & NTF_MANAGED;
188 	on_managed_list = !list_empty(&n->managed_list);
189 
190 	if (!add_to_managed && on_managed_list)
191 		list_del_init(&n->managed_list);
192 	else if (add_to_managed && !on_managed_list)
193 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
194 out:
195 	write_unlock(&n->lock);
196 	write_unlock_bh(&n->tbl->lock);
197 }
198 
199 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
200 			       bool *gc_update, bool *managed_update)
201 {
202 	u32 ndm_flags, old_flags = neigh->flags;
203 
204 	if (!(flags & NEIGH_UPDATE_F_ADMIN))
205 		return;
206 
207 	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
208 	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
209 	ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_VALIDATED) ? NTF_EXT_VALIDATED : 0;
210 
211 	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
212 		if (ndm_flags & NTF_EXT_LEARNED)
213 			neigh->flags |= NTF_EXT_LEARNED;
214 		else
215 			neigh->flags &= ~NTF_EXT_LEARNED;
216 		*notify = 1;
217 		*gc_update = true;
218 	}
219 	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
220 		if (ndm_flags & NTF_MANAGED)
221 			neigh->flags |= NTF_MANAGED;
222 		else
223 			neigh->flags &= ~NTF_MANAGED;
224 		*notify = 1;
225 		*managed_update = true;
226 	}
227 	if ((old_flags ^ ndm_flags) & NTF_EXT_VALIDATED) {
228 		if (ndm_flags & NTF_EXT_VALIDATED)
229 			neigh->flags |= NTF_EXT_VALIDATED;
230 		else
231 			neigh->flags &= ~NTF_EXT_VALIDATED;
232 		*notify = 1;
233 		*gc_update = true;
234 	}
235 }
236 
237 bool neigh_remove_one(struct neighbour *n)
238 {
239 	bool retval = false;
240 
241 	write_lock(&n->lock);
242 	if (refcount_read(&n->refcnt) == 1) {
243 		hlist_del_rcu(&n->hash);
244 		hlist_del_rcu(&n->dev_list);
245 		neigh_mark_dead(n);
246 		retval = true;
247 	}
248 	write_unlock(&n->lock);
249 	if (retval)
250 		neigh_cleanup_and_release(n);
251 	return retval;
252 }
253 
254 static int neigh_forced_gc(struct neigh_table *tbl)
255 {
256 	int max_clean = atomic_read(&tbl->gc_entries) -
257 			READ_ONCE(tbl->gc_thresh2);
258 	u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
259 	unsigned long tref = jiffies - 5 * HZ;
260 	struct neighbour *n, *tmp;
261 	int shrunk = 0;
262 	int loop = 0;
263 
264 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
265 
266 	write_lock_bh(&tbl->lock);
267 
268 	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
269 		if (refcount_read(&n->refcnt) == 1) {
270 			bool remove = false;
271 
272 			write_lock(&n->lock);
273 			if ((n->nud_state == NUD_FAILED) ||
274 			    (n->nud_state == NUD_NOARP) ||
275 			    (tbl->is_multicast &&
276 			     tbl->is_multicast(n->primary_key)) ||
277 			    !time_in_range(n->updated, tref, jiffies))
278 				remove = true;
279 			write_unlock(&n->lock);
280 
281 			if (remove && neigh_remove_one(n))
282 				shrunk++;
283 			if (shrunk >= max_clean)
284 				break;
285 			if (++loop == 16) {
286 				if (ktime_get_ns() > tmax)
287 					goto unlock;
288 				loop = 0;
289 			}
290 		}
291 	}
292 
293 	WRITE_ONCE(tbl->last_flush, jiffies);
294 unlock:
295 	write_unlock_bh(&tbl->lock);
296 
297 	return shrunk;
298 }
299 
300 static void neigh_add_timer(struct neighbour *n, unsigned long when)
301 {
302 	/* Use safe distance from the jiffies - LONG_MAX point while timer
303 	 * is running in DELAY/PROBE state but still show to user space
304 	 * large times in the past.
305 	 */
306 	unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
307 
308 	neigh_hold(n);
309 	if (!time_in_range(n->confirmed, mint, jiffies))
310 		n->confirmed = mint;
311 	if (time_before(n->used, n->confirmed))
312 		n->used = n->confirmed;
313 	if (unlikely(mod_timer(&n->timer, when))) {
314 		printk("NEIGH: BUG, double timer add, state is %x\n",
315 		       n->nud_state);
316 		dump_stack();
317 	}
318 }
319 
320 static int neigh_del_timer(struct neighbour *n)
321 {
322 	if ((n->nud_state & NUD_IN_TIMER) &&
323 	    timer_delete(&n->timer)) {
324 		neigh_release(n);
325 		return 1;
326 	}
327 	return 0;
328 }
329 
330 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
331 						   int family)
332 {
333 	switch (family) {
334 	case AF_INET:
335 		return __in_dev_arp_parms_get_rcu(dev);
336 	case AF_INET6:
337 		return __in6_dev_nd_parms_get_rcu(dev);
338 	}
339 	return NULL;
340 }
341 
342 static void neigh_parms_qlen_dec(struct net_device *dev, int family)
343 {
344 	struct neigh_parms *p;
345 
346 	rcu_read_lock();
347 	p = neigh_get_dev_parms_rcu(dev, family);
348 	if (p)
349 		p->qlen--;
350 	rcu_read_unlock();
351 }
352 
353 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
354 			       int family)
355 {
356 	struct sk_buff_head tmp;
357 	unsigned long flags;
358 	struct sk_buff *skb;
359 
360 	skb_queue_head_init(&tmp);
361 	spin_lock_irqsave(&list->lock, flags);
362 	skb = skb_peek(list);
363 	while (skb != NULL) {
364 		struct sk_buff *skb_next = skb_peek_next(skb, list);
365 		struct net_device *dev = skb->dev;
366 
367 		if (net == NULL || net_eq(dev_net(dev), net)) {
368 			neigh_parms_qlen_dec(dev, family);
369 			__skb_unlink(skb, list);
370 			__skb_queue_tail(&tmp, skb);
371 		}
372 		skb = skb_next;
373 	}
374 	spin_unlock_irqrestore(&list->lock, flags);
375 
376 	while ((skb = __skb_dequeue(&tmp))) {
377 		dev_put(skb->dev);
378 		kfree_skb(skb);
379 	}
380 }
381 
382 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
383 			    bool skip_perm)
384 {
385 	struct hlist_head *dev_head;
386 	struct hlist_node *tmp;
387 	struct neighbour *n;
388 
389 	dev_head = neigh_get_dev_table(dev, tbl->family);
390 
391 	hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
392 		if (skip_perm &&
393 		    (n->nud_state & NUD_PERMANENT ||
394 		     n->flags & NTF_EXT_VALIDATED))
395 			continue;
396 
397 		hlist_del_rcu(&n->hash);
398 		hlist_del_rcu(&n->dev_list);
399 		write_lock(&n->lock);
400 		neigh_del_timer(n);
401 		neigh_mark_dead(n);
402 		if (refcount_read(&n->refcnt) != 1) {
403 			/* The most unpleasant situation.
404 			 * We must destroy neighbour entry,
405 			 * but someone still uses it.
406 			 *
407 			 * The destroy will be delayed until
408 			 * the last user releases us, but
409 			 * we must kill timers etc. and move
410 			 * it to safe state.
411 			 */
412 			__skb_queue_purge(&n->arp_queue);
413 			n->arp_queue_len_bytes = 0;
414 			WRITE_ONCE(n->output, neigh_blackhole);
415 			if (n->nud_state & NUD_VALID)
416 				n->nud_state = NUD_NOARP;
417 			else
418 				n->nud_state = NUD_NONE;
419 			neigh_dbg(2, "neigh %p is stray\n", n);
420 		}
421 		write_unlock(&n->lock);
422 		neigh_cleanup_and_release(n);
423 	}
424 }
425 
426 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
427 {
428 	write_lock_bh(&tbl->lock);
429 	neigh_flush_dev(tbl, dev, false);
430 	write_unlock_bh(&tbl->lock);
431 }
432 EXPORT_SYMBOL(neigh_changeaddr);
433 
434 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
435 			  bool skip_perm)
436 {
437 	write_lock_bh(&tbl->lock);
438 	neigh_flush_dev(tbl, dev, skip_perm);
439 	write_unlock_bh(&tbl->lock);
440 
441 	pneigh_ifdown(tbl, dev, skip_perm);
442 	pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
443 			   tbl->family);
444 	if (skb_queue_empty_lockless(&tbl->proxy_queue))
445 		timer_delete_sync(&tbl->proxy_timer);
446 	return 0;
447 }
448 
449 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
450 {
451 	__neigh_ifdown(tbl, dev, true);
452 	return 0;
453 }
454 EXPORT_SYMBOL(neigh_carrier_down);
455 
456 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
457 {
458 	__neigh_ifdown(tbl, dev, false);
459 	return 0;
460 }
461 EXPORT_SYMBOL(neigh_ifdown);
462 
463 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
464 				     struct net_device *dev,
465 				     u32 flags, bool exempt_from_gc)
466 {
467 	struct neighbour *n = NULL;
468 	unsigned long now = jiffies;
469 	int entries, gc_thresh3;
470 
471 	if (exempt_from_gc)
472 		goto do_alloc;
473 
474 	entries = atomic_inc_return(&tbl->gc_entries) - 1;
475 	gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
476 	if (entries >= gc_thresh3 ||
477 	    (entries >= READ_ONCE(tbl->gc_thresh2) &&
478 	     time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
479 		if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
480 			net_info_ratelimited("%s: neighbor table overflow!\n",
481 					     tbl->id);
482 			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
483 			goto out_entries;
484 		}
485 	}
486 
487 do_alloc:
488 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
489 	if (!n)
490 		goto out_entries;
491 
492 	__skb_queue_head_init(&n->arp_queue);
493 	rwlock_init(&n->lock);
494 	seqlock_init(&n->ha_lock);
495 	n->updated	  = n->used = now;
496 	n->nud_state	  = NUD_NONE;
497 	n->output	  = neigh_blackhole;
498 	n->flags	  = flags;
499 	seqlock_init(&n->hh.hh_lock);
500 	n->parms	  = neigh_parms_clone(&tbl->parms);
501 	timer_setup(&n->timer, neigh_timer_handler, 0);
502 
503 	NEIGH_CACHE_STAT_INC(tbl, allocs);
504 	n->tbl		  = tbl;
505 	refcount_set(&n->refcnt, 1);
506 	n->dead		  = 1;
507 	INIT_LIST_HEAD(&n->gc_list);
508 	INIT_LIST_HEAD(&n->managed_list);
509 
510 	atomic_inc(&tbl->entries);
511 out:
512 	return n;
513 
514 out_entries:
515 	if (!exempt_from_gc)
516 		atomic_dec(&tbl->gc_entries);
517 	goto out;
518 }
519 
520 static void neigh_get_hash_rnd(u32 *x)
521 {
522 	*x = get_random_u32() | 1;
523 }
524 
525 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
526 {
527 	size_t size = (1 << shift) * sizeof(struct hlist_head);
528 	struct hlist_head *hash_heads;
529 	struct neigh_hash_table *ret;
530 	int i;
531 
532 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
533 	if (!ret)
534 		return NULL;
535 
536 	hash_heads = kzalloc(size, GFP_ATOMIC);
537 	if (!hash_heads) {
538 		kfree(ret);
539 		return NULL;
540 	}
541 	ret->hash_heads = hash_heads;
542 	ret->hash_shift = shift;
543 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
544 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
545 	return ret;
546 }
547 
548 static void neigh_hash_free_rcu(struct rcu_head *head)
549 {
550 	struct neigh_hash_table *nht = container_of(head,
551 						    struct neigh_hash_table,
552 						    rcu);
553 
554 	kfree(nht->hash_heads);
555 	kfree(nht);
556 }
557 
558 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
559 						unsigned long new_shift)
560 {
561 	unsigned int i, hash;
562 	struct neigh_hash_table *new_nht, *old_nht;
563 
564 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
565 
566 	old_nht = rcu_dereference_protected(tbl->nht,
567 					    lockdep_is_held(&tbl->lock));
568 	new_nht = neigh_hash_alloc(new_shift);
569 	if (!new_nht)
570 		return old_nht;
571 
572 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
573 		struct hlist_node *tmp;
574 		struct neighbour *n;
575 
576 		neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
577 			hash = tbl->hash(n->primary_key, n->dev,
578 					 new_nht->hash_rnd);
579 
580 			hash >>= (32 - new_nht->hash_shift);
581 
582 			hlist_del_rcu(&n->hash);
583 			hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
584 		}
585 	}
586 
587 	rcu_assign_pointer(tbl->nht, new_nht);
588 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
589 	return new_nht;
590 }
591 
592 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
593 			       struct net_device *dev)
594 {
595 	struct neighbour *n;
596 
597 	NEIGH_CACHE_STAT_INC(tbl, lookups);
598 
599 	rcu_read_lock();
600 	n = __neigh_lookup_noref(tbl, pkey, dev);
601 	if (n) {
602 		if (!refcount_inc_not_zero(&n->refcnt))
603 			n = NULL;
604 		NEIGH_CACHE_STAT_INC(tbl, hits);
605 	}
606 
607 	rcu_read_unlock();
608 	return n;
609 }
610 EXPORT_SYMBOL(neigh_lookup);
611 
612 static struct neighbour *
613 ___neigh_create(struct neigh_table *tbl, const void *pkey,
614 		struct net_device *dev, u32 flags,
615 		bool exempt_from_gc, bool want_ref)
616 {
617 	u32 hash_val, key_len = tbl->key_len;
618 	struct neighbour *n1, *rc, *n;
619 	struct neigh_hash_table *nht;
620 	int error;
621 
622 	n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
623 	trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
624 	if (!n) {
625 		rc = ERR_PTR(-ENOBUFS);
626 		goto out;
627 	}
628 
629 	memcpy(n->primary_key, pkey, key_len);
630 	n->dev = dev;
631 	netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
632 
633 	/* Protocol specific setup. */
634 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
635 		rc = ERR_PTR(error);
636 		goto out_neigh_release;
637 	}
638 
639 	if (dev->netdev_ops->ndo_neigh_construct) {
640 		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
641 		if (error < 0) {
642 			rc = ERR_PTR(error);
643 			goto out_neigh_release;
644 		}
645 	}
646 
647 	/* Device specific setup. */
648 	if (n->parms->neigh_setup &&
649 	    (error = n->parms->neigh_setup(n)) < 0) {
650 		rc = ERR_PTR(error);
651 		goto out_neigh_release;
652 	}
653 
654 	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
655 
656 	write_lock_bh(&tbl->lock);
657 	nht = rcu_dereference_protected(tbl->nht,
658 					lockdep_is_held(&tbl->lock));
659 
660 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
661 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
662 
663 	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
664 
665 	if (n->parms->dead) {
666 		rc = ERR_PTR(-EINVAL);
667 		goto out_tbl_unlock;
668 	}
669 
670 	neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
671 		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
672 			if (want_ref)
673 				neigh_hold(n1);
674 			rc = n1;
675 			goto out_tbl_unlock;
676 		}
677 	}
678 
679 	n->dead = 0;
680 	if (!exempt_from_gc)
681 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
682 	if (n->flags & NTF_MANAGED)
683 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
684 	if (want_ref)
685 		neigh_hold(n);
686 	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
687 
688 	hlist_add_head_rcu(&n->dev_list,
689 			   neigh_get_dev_table(dev, tbl->family));
690 
691 	write_unlock_bh(&tbl->lock);
692 	neigh_dbg(2, "neigh %p is created\n", n);
693 	rc = n;
694 out:
695 	return rc;
696 out_tbl_unlock:
697 	write_unlock_bh(&tbl->lock);
698 out_neigh_release:
699 	if (!exempt_from_gc)
700 		atomic_dec(&tbl->gc_entries);
701 	neigh_release(n);
702 	goto out;
703 }
704 
705 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
706 				 struct net_device *dev, bool want_ref)
707 {
708 	bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
709 
710 	return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
711 }
712 EXPORT_SYMBOL(__neigh_create);
713 
714 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
715 {
716 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
717 	hash_val ^= (hash_val >> 16);
718 	hash_val ^= hash_val >> 8;
719 	hash_val ^= hash_val >> 4;
720 	hash_val &= PNEIGH_HASHMASK;
721 	return hash_val;
722 }
723 
724 struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
725 				   struct net *net, const void *pkey,
726 				   struct net_device *dev)
727 {
728 	struct pneigh_entry *n;
729 	unsigned int key_len;
730 	u32 hash_val;
731 
732 	key_len = tbl->key_len;
733 	hash_val = pneigh_hash(pkey, key_len);
734 	n = rcu_dereference_check(tbl->phash_buckets[hash_val],
735 				  lockdep_is_held(&tbl->phash_lock));
736 
737 	while (n) {
738 		if (!memcmp(n->key, pkey, key_len) &&
739 		    net_eq(pneigh_net(n), net) &&
740 		    (n->dev == dev || !n->dev))
741 			return n;
742 
743 		n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
744 	}
745 
746 	return NULL;
747 }
748 EXPORT_IPV6_MOD(pneigh_lookup);
749 
750 int pneigh_create(struct neigh_table *tbl, struct net *net,
751 		  const void *pkey, struct net_device *dev,
752 		  u32 flags, u8 protocol, bool permanent)
753 {
754 	struct pneigh_entry *n;
755 	unsigned int key_len;
756 	u32 hash_val;
757 	int err = 0;
758 
759 	mutex_lock(&tbl->phash_lock);
760 
761 	n = pneigh_lookup(tbl, net, pkey, dev);
762 	if (n)
763 		goto update;
764 
765 	key_len = tbl->key_len;
766 	n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
767 	if (!n) {
768 		err = -ENOBUFS;
769 		goto out;
770 	}
771 
772 	write_pnet(&n->net, net);
773 	memcpy(n->key, pkey, key_len);
774 	n->dev = dev;
775 	netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
776 
777 	if (tbl->pconstructor && tbl->pconstructor(n)) {
778 		netdev_put(dev, &n->dev_tracker);
779 		kfree(n);
780 		err = -ENOBUFS;
781 		goto out;
782 	}
783 
784 	hash_val = pneigh_hash(pkey, key_len);
785 	n->next = tbl->phash_buckets[hash_val];
786 	rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
787 update:
788 	WRITE_ONCE(n->flags, flags);
789 	n->permanent = permanent;
790 	WRITE_ONCE(n->protocol, protocol);
791 out:
792 	mutex_unlock(&tbl->phash_lock);
793 	return err;
794 }
795 
796 static void pneigh_destroy(struct rcu_head *rcu)
797 {
798 	struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
799 
800 	netdev_put(n->dev, &n->dev_tracker);
801 	kfree(n);
802 }
803 
804 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
805 		  struct net_device *dev)
806 {
807 	struct pneigh_entry *n, __rcu **np;
808 	unsigned int key_len;
809 	u32 hash_val;
810 
811 	key_len = tbl->key_len;
812 	hash_val = pneigh_hash(pkey, key_len);
813 
814 	mutex_lock(&tbl->phash_lock);
815 
816 	for (np = &tbl->phash_buckets[hash_val];
817 	     (n = rcu_dereference_protected(*np, 1)) != NULL;
818 	     np = &n->next) {
819 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
820 		    net_eq(pneigh_net(n), net)) {
821 			rcu_assign_pointer(*np, n->next);
822 
823 			mutex_unlock(&tbl->phash_lock);
824 
825 			if (tbl->pdestructor)
826 				tbl->pdestructor(n);
827 
828 			call_rcu(&n->rcu, pneigh_destroy);
829 			return 0;
830 		}
831 	}
832 
833 	mutex_unlock(&tbl->phash_lock);
834 	return -ENOENT;
835 }
836 
837 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
838 			  bool skip_perm)
839 {
840 	struct pneigh_entry *n, __rcu **np;
841 	LIST_HEAD(head);
842 	u32 h;
843 
844 	mutex_lock(&tbl->phash_lock);
845 
846 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
847 		np = &tbl->phash_buckets[h];
848 		while ((n = rcu_dereference_protected(*np, 1)) != NULL) {
849 			if (skip_perm && n->permanent)
850 				goto skip;
851 			if (!dev || n->dev == dev) {
852 				rcu_assign_pointer(*np, n->next);
853 				list_add(&n->free_node, &head);
854 				continue;
855 			}
856 skip:
857 			np = &n->next;
858 		}
859 	}
860 
861 	mutex_unlock(&tbl->phash_lock);
862 
863 	while (!list_empty(&head)) {
864 		n = list_first_entry(&head, typeof(*n), free_node);
865 		list_del(&n->free_node);
866 
867 		if (tbl->pdestructor)
868 			tbl->pdestructor(n);
869 
870 		call_rcu(&n->rcu, pneigh_destroy);
871 	}
872 }
873 
874 static inline void neigh_parms_put(struct neigh_parms *parms)
875 {
876 	if (refcount_dec_and_test(&parms->refcnt))
877 		kfree(parms);
878 }
879 
880 /*
881  *	neighbour must already be out of the table;
882  *
883  */
884 void neigh_destroy(struct neighbour *neigh)
885 {
886 	struct net_device *dev = neigh->dev;
887 
888 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
889 
890 	if (!neigh->dead) {
891 		pr_warn("Destroying alive neighbour %p\n", neigh);
892 		dump_stack();
893 		return;
894 	}
895 
896 	if (neigh_del_timer(neigh))
897 		pr_warn("Impossible event\n");
898 
899 	write_lock_bh(&neigh->lock);
900 	__skb_queue_purge(&neigh->arp_queue);
901 	write_unlock_bh(&neigh->lock);
902 	neigh->arp_queue_len_bytes = 0;
903 
904 	if (dev->netdev_ops->ndo_neigh_destroy)
905 		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
906 
907 	netdev_put(dev, &neigh->dev_tracker);
908 	neigh_parms_put(neigh->parms);
909 
910 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
911 
912 	atomic_dec(&neigh->tbl->entries);
913 	kfree_rcu(neigh, rcu);
914 }
915 EXPORT_SYMBOL(neigh_destroy);
916 
917 /* Neighbour state is suspicious;
918    disable fast path.
919 
920    Called with write_locked neigh.
921  */
922 static void neigh_suspect(struct neighbour *neigh)
923 {
924 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
925 
926 	WRITE_ONCE(neigh->output, neigh->ops->output);
927 }
928 
929 /* Neighbour state is OK;
930    enable fast path.
931 
932    Called with write_locked neigh.
933  */
934 static void neigh_connect(struct neighbour *neigh)
935 {
936 	neigh_dbg(2, "neigh %p is connected\n", neigh);
937 
938 	WRITE_ONCE(neigh->output, neigh->ops->connected_output);
939 }
940 
941 static void neigh_periodic_work(struct work_struct *work)
942 {
943 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
944 	struct neigh_hash_table *nht;
945 	struct hlist_node *tmp;
946 	struct neighbour *n;
947 	unsigned int i;
948 
949 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
950 
951 	write_lock_bh(&tbl->lock);
952 	nht = rcu_dereference_protected(tbl->nht,
953 					lockdep_is_held(&tbl->lock));
954 
955 	/*
956 	 *	periodically recompute ReachableTime from random function
957 	 */
958 
959 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
960 		struct neigh_parms *p;
961 
962 		WRITE_ONCE(tbl->last_rand, jiffies);
963 		list_for_each_entry(p, &tbl->parms_list, list)
964 			p->reachable_time =
965 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
966 	}
967 
968 	if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
969 		goto out;
970 
971 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
972 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
973 			unsigned int state;
974 
975 			write_lock(&n->lock);
976 
977 			state = n->nud_state;
978 			if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
979 			    (n->flags &
980 			     (NTF_EXT_LEARNED | NTF_EXT_VALIDATED))) {
981 				write_unlock(&n->lock);
982 				continue;
983 			}
984 
985 			if (time_before(n->used, n->confirmed) &&
986 			    time_is_before_eq_jiffies(n->confirmed))
987 				n->used = n->confirmed;
988 
989 			if (refcount_read(&n->refcnt) == 1 &&
990 			    (state == NUD_FAILED ||
991 			     !time_in_range_open(jiffies, n->used,
992 						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
993 				hlist_del_rcu(&n->hash);
994 				hlist_del_rcu(&n->dev_list);
995 				neigh_mark_dead(n);
996 				write_unlock(&n->lock);
997 				neigh_cleanup_and_release(n);
998 				continue;
999 			}
1000 			write_unlock(&n->lock);
1001 		}
1002 		/*
1003 		 * It's fine to release lock here, even if hash table
1004 		 * grows while we are preempted.
1005 		 */
1006 		write_unlock_bh(&tbl->lock);
1007 		cond_resched();
1008 		write_lock_bh(&tbl->lock);
1009 		nht = rcu_dereference_protected(tbl->nht,
1010 						lockdep_is_held(&tbl->lock));
1011 	}
1012 out:
1013 	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
1014 	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
1015 	 * BASE_REACHABLE_TIME.
1016 	 */
1017 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1018 			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
1019 	write_unlock_bh(&tbl->lock);
1020 }
1021 
1022 static __inline__ int neigh_max_probes(struct neighbour *n)
1023 {
1024 	struct neigh_parms *p = n->parms;
1025 	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1026 	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1027 	        NEIGH_VAR(p, MCAST_PROBES));
1028 }
1029 
1030 static void neigh_invalidate(struct neighbour *neigh)
1031 	__releases(neigh->lock)
1032 	__acquires(neigh->lock)
1033 {
1034 	struct sk_buff *skb;
1035 
1036 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1037 	neigh_dbg(2, "neigh %p is failed\n", neigh);
1038 	neigh->updated = jiffies;
1039 
1040 	/* It is very thin place. report_unreachable is very complicated
1041 	   routine. Particularly, it can hit the same neighbour entry!
1042 
1043 	   So that, we try to be accurate and avoid dead loop. --ANK
1044 	 */
1045 	while (neigh->nud_state == NUD_FAILED &&
1046 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1047 		write_unlock(&neigh->lock);
1048 		neigh->ops->error_report(neigh, skb);
1049 		write_lock(&neigh->lock);
1050 	}
1051 	__skb_queue_purge(&neigh->arp_queue);
1052 	neigh->arp_queue_len_bytes = 0;
1053 }
1054 
1055 static void neigh_probe(struct neighbour *neigh)
1056 	__releases(neigh->lock)
1057 {
1058 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1059 	/* keep skb alive even if arp_queue overflows */
1060 	if (skb)
1061 		skb = skb_clone(skb, GFP_ATOMIC);
1062 	write_unlock(&neigh->lock);
1063 	if (neigh->ops->solicit)
1064 		neigh->ops->solicit(neigh, skb);
1065 	atomic_inc(&neigh->probes);
1066 	consume_skb(skb);
1067 }
1068 
1069 /* Called when a timer expires for a neighbour entry. */
1070 
1071 static void neigh_timer_handler(struct timer_list *t)
1072 {
1073 	unsigned long now, next;
1074 	struct neighbour *neigh = timer_container_of(neigh, t, timer);
1075 	unsigned int state;
1076 	int notify = 0;
1077 
1078 	write_lock(&neigh->lock);
1079 
1080 	state = neigh->nud_state;
1081 	now = jiffies;
1082 	next = now + HZ;
1083 
1084 	if (!(state & NUD_IN_TIMER))
1085 		goto out;
1086 
1087 	if (state & NUD_REACHABLE) {
1088 		if (time_before_eq(now,
1089 				   neigh->confirmed + neigh->parms->reachable_time)) {
1090 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
1091 			next = neigh->confirmed + neigh->parms->reachable_time;
1092 		} else if (time_before_eq(now,
1093 					  neigh->used +
1094 					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1095 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
1096 			WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1097 			neigh->updated = jiffies;
1098 			neigh_suspect(neigh);
1099 			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1100 		} else {
1101 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
1102 			WRITE_ONCE(neigh->nud_state, NUD_STALE);
1103 			neigh->updated = jiffies;
1104 			neigh_suspect(neigh);
1105 			notify = 1;
1106 		}
1107 	} else if (state & NUD_DELAY) {
1108 		if (time_before_eq(now,
1109 				   neigh->confirmed +
1110 				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1111 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1112 			WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
1113 			neigh->updated = jiffies;
1114 			neigh_connect(neigh);
1115 			notify = 1;
1116 			next = neigh->confirmed + neigh->parms->reachable_time;
1117 		} else {
1118 			neigh_dbg(2, "neigh %p is probed\n", neigh);
1119 			WRITE_ONCE(neigh->nud_state, NUD_PROBE);
1120 			neigh->updated = jiffies;
1121 			atomic_set(&neigh->probes, 0);
1122 			notify = 1;
1123 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1124 					 HZ/100);
1125 		}
1126 	} else {
1127 		/* NUD_PROBE|NUD_INCOMPLETE */
1128 		next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1129 	}
1130 
1131 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1132 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1133 		if (neigh->nud_state == NUD_PROBE &&
1134 		    neigh->flags & NTF_EXT_VALIDATED) {
1135 			WRITE_ONCE(neigh->nud_state, NUD_STALE);
1136 			neigh->updated = jiffies;
1137 		} else {
1138 			WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1139 			neigh_invalidate(neigh);
1140 		}
1141 		notify = 1;
1142 		goto out;
1143 	}
1144 
1145 	if (neigh->nud_state & NUD_IN_TIMER) {
1146 		if (time_before(next, jiffies + HZ/100))
1147 			next = jiffies + HZ/100;
1148 		if (!mod_timer(&neigh->timer, next))
1149 			neigh_hold(neigh);
1150 	}
1151 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1152 		neigh_probe(neigh);
1153 	} else {
1154 out:
1155 		write_unlock(&neigh->lock);
1156 	}
1157 
1158 	if (notify)
1159 		neigh_update_notify(neigh, 0);
1160 
1161 	trace_neigh_timer_handler(neigh, 0);
1162 
1163 	neigh_release(neigh);
1164 }
1165 
1166 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1167 		       const bool immediate_ok)
1168 {
1169 	int rc;
1170 	bool immediate_probe = false;
1171 
1172 	write_lock_bh(&neigh->lock);
1173 
1174 	rc = 0;
1175 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1176 		goto out_unlock_bh;
1177 	if (neigh->dead)
1178 		goto out_dead;
1179 
1180 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1181 		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1182 		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1183 			unsigned long next, now = jiffies;
1184 
1185 			atomic_set(&neigh->probes,
1186 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1187 			neigh_del_timer(neigh);
1188 			WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1189 			neigh->updated = now;
1190 			if (!immediate_ok) {
1191 				next = now + 1;
1192 			} else {
1193 				immediate_probe = true;
1194 				next = now + max(NEIGH_VAR(neigh->parms,
1195 							   RETRANS_TIME),
1196 						 HZ / 100);
1197 			}
1198 			neigh_add_timer(neigh, next);
1199 		} else {
1200 			WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1201 			neigh->updated = jiffies;
1202 			write_unlock_bh(&neigh->lock);
1203 
1204 			kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1205 			return 1;
1206 		}
1207 	} else if (neigh->nud_state & NUD_STALE) {
1208 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
1209 		neigh_del_timer(neigh);
1210 		WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1211 		neigh->updated = jiffies;
1212 		neigh_add_timer(neigh, jiffies +
1213 				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1214 	}
1215 
1216 	if (neigh->nud_state == NUD_INCOMPLETE) {
1217 		if (skb) {
1218 			while (neigh->arp_queue_len_bytes + skb->truesize >
1219 			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1220 				struct sk_buff *buff;
1221 
1222 				buff = __skb_dequeue(&neigh->arp_queue);
1223 				if (!buff)
1224 					break;
1225 				neigh->arp_queue_len_bytes -= buff->truesize;
1226 				kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1227 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1228 			}
1229 			skb_dst_force(skb);
1230 			__skb_queue_tail(&neigh->arp_queue, skb);
1231 			neigh->arp_queue_len_bytes += skb->truesize;
1232 		}
1233 		rc = 1;
1234 	}
1235 out_unlock_bh:
1236 	if (immediate_probe)
1237 		neigh_probe(neigh);
1238 	else
1239 		write_unlock(&neigh->lock);
1240 	local_bh_enable();
1241 	trace_neigh_event_send_done(neigh, rc);
1242 	return rc;
1243 
1244 out_dead:
1245 	if (neigh->nud_state & NUD_STALE)
1246 		goto out_unlock_bh;
1247 	write_unlock_bh(&neigh->lock);
1248 	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1249 	trace_neigh_event_send_dead(neigh, 1);
1250 	return 1;
1251 }
1252 EXPORT_SYMBOL(__neigh_event_send);
1253 
1254 static void neigh_update_hhs(struct neighbour *neigh)
1255 {
1256 	struct hh_cache *hh;
1257 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1258 		= NULL;
1259 
1260 	if (neigh->dev->header_ops)
1261 		update = neigh->dev->header_ops->cache_update;
1262 
1263 	if (update) {
1264 		hh = &neigh->hh;
1265 		if (READ_ONCE(hh->hh_len)) {
1266 			write_seqlock_bh(&hh->hh_lock);
1267 			update(hh, neigh->dev, neigh->ha);
1268 			write_sequnlock_bh(&hh->hh_lock);
1269 		}
1270 	}
1271 }
1272 
1273 /* Generic update routine.
1274    -- lladdr is new lladdr or NULL, if it is not supplied.
1275    -- new    is new state.
1276    -- flags
1277 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1278 				if it is different.
1279 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1280 				lladdr instead of overriding it
1281 				if it is different.
1282 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1283 	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
1284 	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
1285 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1286 				NTF_ROUTER flag.
1287 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1288 				a router.
1289 	NEIGH_UPDATE_F_EXT_VALIDATED means that the entry will not be removed
1290 				or invalidated.
1291 
1292    Caller MUST hold reference count on the entry.
1293  */
1294 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1295 			  u8 new, u32 flags, u32 nlmsg_pid,
1296 			  struct netlink_ext_ack *extack)
1297 {
1298 	bool gc_update = false, managed_update = false;
1299 	int update_isrouter = 0;
1300 	struct net_device *dev;
1301 	int err, notify = 0;
1302 	u8 old;
1303 
1304 	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1305 
1306 	write_lock_bh(&neigh->lock);
1307 
1308 	dev    = neigh->dev;
1309 	old    = neigh->nud_state;
1310 	err    = -EPERM;
1311 
1312 	if (neigh->dead) {
1313 		NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1314 		new = old;
1315 		goto out;
1316 	}
1317 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1318 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1319 		goto out;
1320 
1321 	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
1322 	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1323 		new = old & ~NUD_PERMANENT;
1324 		WRITE_ONCE(neigh->nud_state, new);
1325 		err = 0;
1326 		goto out;
1327 	}
1328 
1329 	if (!(new & NUD_VALID)) {
1330 		neigh_del_timer(neigh);
1331 		if (old & NUD_CONNECTED)
1332 			neigh_suspect(neigh);
1333 		WRITE_ONCE(neigh->nud_state, new);
1334 		err = 0;
1335 		notify = old & NUD_VALID;
1336 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1337 		    (new & NUD_FAILED)) {
1338 			neigh_invalidate(neigh);
1339 			notify = 1;
1340 		}
1341 		goto out;
1342 	}
1343 
1344 	/* Compare new lladdr with cached one */
1345 	if (!dev->addr_len) {
1346 		/* First case: device needs no address. */
1347 		lladdr = neigh->ha;
1348 	} else if (lladdr) {
1349 		/* The second case: if something is already cached
1350 		   and a new address is proposed:
1351 		   - compare new & old
1352 		   - if they are different, check override flag
1353 		 */
1354 		if ((old & NUD_VALID) &&
1355 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1356 			lladdr = neigh->ha;
1357 	} else {
1358 		/* No address is supplied; if we know something,
1359 		   use it, otherwise discard the request.
1360 		 */
1361 		err = -EINVAL;
1362 		if (!(old & NUD_VALID)) {
1363 			NL_SET_ERR_MSG(extack, "No link layer address given");
1364 			goto out;
1365 		}
1366 		lladdr = neigh->ha;
1367 	}
1368 
1369 	/* Update confirmed timestamp for neighbour entry after we
1370 	 * received ARP packet even if it doesn't change IP to MAC binding.
1371 	 */
1372 	if (new & NUD_CONNECTED)
1373 		neigh->confirmed = jiffies;
1374 
1375 	/* If entry was valid and address is not changed,
1376 	   do not change entry state, if new one is STALE.
1377 	 */
1378 	err = 0;
1379 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1380 	if (old & NUD_VALID) {
1381 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1382 			update_isrouter = 0;
1383 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1384 			    (old & NUD_CONNECTED)) {
1385 				lladdr = neigh->ha;
1386 				new = NUD_STALE;
1387 			} else
1388 				goto out;
1389 		} else {
1390 			if (lladdr == neigh->ha && new == NUD_STALE &&
1391 			    !(flags & NEIGH_UPDATE_F_ADMIN))
1392 				new = old;
1393 		}
1394 	}
1395 
1396 	/* Update timestamp only once we know we will make a change to the
1397 	 * neighbour entry. Otherwise we risk to move the locktime window with
1398 	 * noop updates and ignore relevant ARP updates.
1399 	 */
1400 	if (new != old || lladdr != neigh->ha)
1401 		neigh->updated = jiffies;
1402 
1403 	if (new != old) {
1404 		neigh_del_timer(neigh);
1405 		if (new & NUD_PROBE)
1406 			atomic_set(&neigh->probes, 0);
1407 		if (new & NUD_IN_TIMER)
1408 			neigh_add_timer(neigh, (jiffies +
1409 						((new & NUD_REACHABLE) ?
1410 						 neigh->parms->reachable_time :
1411 						 0)));
1412 		WRITE_ONCE(neigh->nud_state, new);
1413 		notify = 1;
1414 	}
1415 
1416 	if (lladdr != neigh->ha) {
1417 		write_seqlock(&neigh->ha_lock);
1418 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1419 		write_sequnlock(&neigh->ha_lock);
1420 		neigh_update_hhs(neigh);
1421 		if (!(new & NUD_CONNECTED))
1422 			neigh->confirmed = jiffies -
1423 				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1424 		notify = 1;
1425 	}
1426 	if (new == old)
1427 		goto out;
1428 	if (new & NUD_CONNECTED)
1429 		neigh_connect(neigh);
1430 	else
1431 		neigh_suspect(neigh);
1432 	if (!(old & NUD_VALID)) {
1433 		struct sk_buff *skb;
1434 
1435 		/* Again: avoid dead loop if something went wrong */
1436 
1437 		while (neigh->nud_state & NUD_VALID &&
1438 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1439 			struct dst_entry *dst = skb_dst(skb);
1440 			struct neighbour *n2, *n1 = neigh;
1441 			write_unlock_bh(&neigh->lock);
1442 
1443 			rcu_read_lock();
1444 
1445 			/* Why not just use 'neigh' as-is?  The problem is that
1446 			 * things such as shaper, eql, and sch_teql can end up
1447 			 * using alternative, different, neigh objects to output
1448 			 * the packet in the output path.  So what we need to do
1449 			 * here is re-lookup the top-level neigh in the path so
1450 			 * we can reinject the packet there.
1451 			 */
1452 			n2 = NULL;
1453 			if (dst &&
1454 			    READ_ONCE(dst->obsolete) != DST_OBSOLETE_DEAD) {
1455 				n2 = dst_neigh_lookup_skb(dst, skb);
1456 				if (n2)
1457 					n1 = n2;
1458 			}
1459 			READ_ONCE(n1->output)(n1, skb);
1460 			if (n2)
1461 				neigh_release(n2);
1462 			rcu_read_unlock();
1463 
1464 			write_lock_bh(&neigh->lock);
1465 		}
1466 		__skb_queue_purge(&neigh->arp_queue);
1467 		neigh->arp_queue_len_bytes = 0;
1468 	}
1469 out:
1470 	if (update_isrouter)
1471 		neigh_update_is_router(neigh, flags, &notify);
1472 	write_unlock_bh(&neigh->lock);
1473 	if (((new ^ old) & NUD_PERMANENT) || gc_update)
1474 		neigh_update_gc_list(neigh);
1475 	if (managed_update)
1476 		neigh_update_managed_list(neigh);
1477 	if (notify)
1478 		neigh_update_notify(neigh, nlmsg_pid);
1479 	trace_neigh_update_done(neigh, err);
1480 	return err;
1481 }
1482 
1483 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1484 		 u32 flags, u32 nlmsg_pid)
1485 {
1486 	return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1487 }
1488 EXPORT_SYMBOL(neigh_update);
1489 
1490 /* Update the neigh to listen temporarily for probe responses, even if it is
1491  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1492  */
1493 void __neigh_set_probe_once(struct neighbour *neigh)
1494 {
1495 	if (neigh->dead)
1496 		return;
1497 	neigh->updated = jiffies;
1498 	if (!(neigh->nud_state & NUD_FAILED))
1499 		return;
1500 	WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1501 	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1502 	neigh_add_timer(neigh,
1503 			jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1504 				      HZ/100));
1505 }
1506 EXPORT_SYMBOL(__neigh_set_probe_once);
1507 
1508 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1509 				 u8 *lladdr, void *saddr,
1510 				 struct net_device *dev)
1511 {
1512 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1513 						 lladdr || !dev->addr_len);
1514 	if (neigh)
1515 		neigh_update(neigh, lladdr, NUD_STALE,
1516 			     NEIGH_UPDATE_F_OVERRIDE, 0);
1517 	return neigh;
1518 }
1519 EXPORT_SYMBOL(neigh_event_ns);
1520 
1521 /* called with read_lock_bh(&n->lock); */
1522 static void neigh_hh_init(struct neighbour *n)
1523 {
1524 	struct net_device *dev = n->dev;
1525 	__be16 prot = n->tbl->protocol;
1526 	struct hh_cache	*hh = &n->hh;
1527 
1528 	write_lock_bh(&n->lock);
1529 
1530 	/* Only one thread can come in here and initialize the
1531 	 * hh_cache entry.
1532 	 */
1533 	if (!hh->hh_len)
1534 		dev->header_ops->cache(n, hh, prot);
1535 
1536 	write_unlock_bh(&n->lock);
1537 }
1538 
1539 /* Slow and careful. */
1540 
1541 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1542 {
1543 	int rc = 0;
1544 
1545 	if (!neigh_event_send(neigh, skb)) {
1546 		int err;
1547 		struct net_device *dev = neigh->dev;
1548 		unsigned int seq;
1549 
1550 		if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1551 			neigh_hh_init(neigh);
1552 
1553 		do {
1554 			__skb_pull(skb, skb_network_offset(skb));
1555 			seq = read_seqbegin(&neigh->ha_lock);
1556 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1557 					      neigh->ha, NULL, skb->len);
1558 		} while (read_seqretry(&neigh->ha_lock, seq));
1559 
1560 		if (err >= 0)
1561 			rc = dev_queue_xmit(skb);
1562 		else
1563 			goto out_kfree_skb;
1564 	}
1565 out:
1566 	return rc;
1567 out_kfree_skb:
1568 	rc = -EINVAL;
1569 	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1570 	goto out;
1571 }
1572 EXPORT_SYMBOL(neigh_resolve_output);
1573 
1574 /* As fast as possible without hh cache */
1575 
1576 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1577 {
1578 	struct net_device *dev = neigh->dev;
1579 	unsigned int seq;
1580 	int err;
1581 
1582 	do {
1583 		__skb_pull(skb, skb_network_offset(skb));
1584 		seq = read_seqbegin(&neigh->ha_lock);
1585 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1586 				      neigh->ha, NULL, skb->len);
1587 	} while (read_seqretry(&neigh->ha_lock, seq));
1588 
1589 	if (err >= 0)
1590 		err = dev_queue_xmit(skb);
1591 	else {
1592 		err = -EINVAL;
1593 		kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1594 	}
1595 	return err;
1596 }
1597 EXPORT_SYMBOL(neigh_connected_output);
1598 
1599 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1600 {
1601 	return dev_queue_xmit(skb);
1602 }
1603 EXPORT_SYMBOL(neigh_direct_output);
1604 
1605 static void neigh_managed_work(struct work_struct *work)
1606 {
1607 	struct neigh_table *tbl = container_of(work, struct neigh_table,
1608 					       managed_work.work);
1609 	struct neighbour *neigh;
1610 
1611 	write_lock_bh(&tbl->lock);
1612 	list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1613 		neigh_event_send_probe(neigh, NULL, false);
1614 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1615 			   NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1616 	write_unlock_bh(&tbl->lock);
1617 }
1618 
1619 static void neigh_proxy_process(struct timer_list *t)
1620 {
1621 	struct neigh_table *tbl = timer_container_of(tbl, t, proxy_timer);
1622 	long sched_next = 0;
1623 	unsigned long now = jiffies;
1624 	struct sk_buff *skb, *n;
1625 
1626 	spin_lock(&tbl->proxy_queue.lock);
1627 
1628 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1629 		long tdif = NEIGH_CB(skb)->sched_next - now;
1630 
1631 		if (tdif <= 0) {
1632 			struct net_device *dev = skb->dev;
1633 
1634 			neigh_parms_qlen_dec(dev, tbl->family);
1635 			__skb_unlink(skb, &tbl->proxy_queue);
1636 
1637 			if (tbl->proxy_redo && netif_running(dev)) {
1638 				rcu_read_lock();
1639 				tbl->proxy_redo(skb);
1640 				rcu_read_unlock();
1641 			} else {
1642 				kfree_skb(skb);
1643 			}
1644 
1645 			dev_put(dev);
1646 		} else if (!sched_next || tdif < sched_next)
1647 			sched_next = tdif;
1648 	}
1649 	timer_delete(&tbl->proxy_timer);
1650 	if (sched_next)
1651 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1652 	spin_unlock(&tbl->proxy_queue.lock);
1653 }
1654 
1655 static unsigned long neigh_proxy_delay(struct neigh_parms *p)
1656 {
1657 	/* If proxy_delay is zero, do not call get_random_u32_below()
1658 	 * as it is undefined behavior.
1659 	 */
1660 	unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
1661 
1662 	return proxy_delay ?
1663 	       jiffies + get_random_u32_below(proxy_delay) : jiffies;
1664 }
1665 
1666 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1667 		    struct sk_buff *skb)
1668 {
1669 	unsigned long sched_next = neigh_proxy_delay(p);
1670 
1671 	if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1672 		kfree_skb(skb);
1673 		return;
1674 	}
1675 
1676 	NEIGH_CB(skb)->sched_next = sched_next;
1677 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1678 
1679 	spin_lock(&tbl->proxy_queue.lock);
1680 	if (timer_delete(&tbl->proxy_timer)) {
1681 		if (time_before(tbl->proxy_timer.expires, sched_next))
1682 			sched_next = tbl->proxy_timer.expires;
1683 	}
1684 	skb_dst_drop(skb);
1685 	dev_hold(skb->dev);
1686 	__skb_queue_tail(&tbl->proxy_queue, skb);
1687 	p->qlen++;
1688 	mod_timer(&tbl->proxy_timer, sched_next);
1689 	spin_unlock(&tbl->proxy_queue.lock);
1690 }
1691 EXPORT_SYMBOL(pneigh_enqueue);
1692 
1693 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1694 						      struct net *net, int ifindex)
1695 {
1696 	struct neigh_parms *p;
1697 
1698 	list_for_each_entry(p, &tbl->parms_list, list) {
1699 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1700 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1701 			return p;
1702 	}
1703 
1704 	return NULL;
1705 }
1706 
1707 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1708 				      struct neigh_table *tbl)
1709 {
1710 	struct neigh_parms *p;
1711 	struct net *net = dev_net(dev);
1712 	const struct net_device_ops *ops = dev->netdev_ops;
1713 
1714 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1715 	if (p) {
1716 		p->tbl		  = tbl;
1717 		refcount_set(&p->refcnt, 1);
1718 		p->reachable_time =
1719 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1720 		p->qlen = 0;
1721 		netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1722 		p->dev = dev;
1723 		write_pnet(&p->net, net);
1724 		p->sysctl_table = NULL;
1725 
1726 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1727 			netdev_put(dev, &p->dev_tracker);
1728 			kfree(p);
1729 			return NULL;
1730 		}
1731 
1732 		write_lock_bh(&tbl->lock);
1733 		list_add(&p->list, &tbl->parms.list);
1734 		write_unlock_bh(&tbl->lock);
1735 
1736 		neigh_parms_data_state_cleanall(p);
1737 	}
1738 	return p;
1739 }
1740 EXPORT_SYMBOL(neigh_parms_alloc);
1741 
1742 static void neigh_rcu_free_parms(struct rcu_head *head)
1743 {
1744 	struct neigh_parms *parms =
1745 		container_of(head, struct neigh_parms, rcu_head);
1746 
1747 	neigh_parms_put(parms);
1748 }
1749 
1750 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1751 {
1752 	if (!parms || parms == &tbl->parms)
1753 		return;
1754 	write_lock_bh(&tbl->lock);
1755 	list_del(&parms->list);
1756 	parms->dead = 1;
1757 	write_unlock_bh(&tbl->lock);
1758 	netdev_put(parms->dev, &parms->dev_tracker);
1759 	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1760 }
1761 EXPORT_SYMBOL(neigh_parms_release);
1762 
1763 static struct lock_class_key neigh_table_proxy_queue_class;
1764 
1765 static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1766 
1767 void neigh_table_init(int index, struct neigh_table *tbl)
1768 {
1769 	unsigned long now = jiffies;
1770 	unsigned long phsize;
1771 
1772 	INIT_LIST_HEAD(&tbl->parms_list);
1773 	INIT_LIST_HEAD(&tbl->gc_list);
1774 	INIT_LIST_HEAD(&tbl->managed_list);
1775 
1776 	list_add(&tbl->parms.list, &tbl->parms_list);
1777 	write_pnet(&tbl->parms.net, &init_net);
1778 	refcount_set(&tbl->parms.refcnt, 1);
1779 	tbl->parms.reachable_time =
1780 			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1781 	tbl->parms.qlen = 0;
1782 
1783 	tbl->stats = alloc_percpu(struct neigh_statistics);
1784 	if (!tbl->stats)
1785 		panic("cannot create neighbour cache statistics");
1786 
1787 #ifdef CONFIG_PROC_FS
1788 	if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1789 			      &neigh_stat_seq_ops, tbl))
1790 		panic("cannot create neighbour proc dir entry");
1791 #endif
1792 
1793 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1794 
1795 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1796 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1797 
1798 	if (!tbl->nht || !tbl->phash_buckets)
1799 		panic("cannot allocate neighbour cache hashes");
1800 
1801 	if (!tbl->entry_size)
1802 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1803 					tbl->key_len, NEIGH_PRIV_ALIGN);
1804 	else
1805 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1806 
1807 	rwlock_init(&tbl->lock);
1808 	mutex_init(&tbl->phash_lock);
1809 
1810 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1811 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1812 			tbl->parms.reachable_time);
1813 	INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1814 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1815 
1816 	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1817 	skb_queue_head_init_class(&tbl->proxy_queue,
1818 			&neigh_table_proxy_queue_class);
1819 
1820 	tbl->last_flush = now;
1821 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1822 
1823 	rcu_assign_pointer(neigh_tables[index], tbl);
1824 }
1825 EXPORT_SYMBOL(neigh_table_init);
1826 
1827 /*
1828  * Only called from ndisc_cleanup(), which means this is dead code
1829  * because we no longer can unload IPv6 module.
1830  */
1831 int neigh_table_clear(int index, struct neigh_table *tbl)
1832 {
1833 	RCU_INIT_POINTER(neigh_tables[index], NULL);
1834 	synchronize_rcu();
1835 
1836 	/* It is not clean... Fix it to unload IPv6 module safely */
1837 	cancel_delayed_work_sync(&tbl->managed_work);
1838 	cancel_delayed_work_sync(&tbl->gc_work);
1839 	timer_delete_sync(&tbl->proxy_timer);
1840 	pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
1841 	neigh_ifdown(tbl, NULL);
1842 	if (atomic_read(&tbl->entries))
1843 		pr_crit("neighbour leakage\n");
1844 
1845 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1846 		 neigh_hash_free_rcu);
1847 	tbl->nht = NULL;
1848 
1849 	kfree(tbl->phash_buckets);
1850 	tbl->phash_buckets = NULL;
1851 
1852 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1853 
1854 	free_percpu(tbl->stats);
1855 	tbl->stats = NULL;
1856 
1857 	return 0;
1858 }
1859 EXPORT_SYMBOL(neigh_table_clear);
1860 
1861 static struct neigh_table *neigh_find_table(int family)
1862 {
1863 	struct neigh_table *tbl = NULL;
1864 
1865 	switch (family) {
1866 	case AF_INET:
1867 		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
1868 		break;
1869 	case AF_INET6:
1870 		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
1871 		break;
1872 	}
1873 
1874 	return tbl;
1875 }
1876 
1877 const struct nla_policy nda_policy[NDA_MAX+1] = {
1878 	[NDA_UNSPEC]		= { .strict_start_type = NDA_NH_ID },
1879 	[NDA_DST]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1880 	[NDA_LLADDR]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1881 	[NDA_CACHEINFO]		= { .len = sizeof(struct nda_cacheinfo) },
1882 	[NDA_PROBES]		= { .type = NLA_U32 },
1883 	[NDA_VLAN]		= { .type = NLA_U16 },
1884 	[NDA_PORT]		= { .type = NLA_U16 },
1885 	[NDA_VNI]		= { .type = NLA_U32 },
1886 	[NDA_IFINDEX]		= { .type = NLA_U32 },
1887 	[NDA_MASTER]		= { .type = NLA_U32 },
1888 	[NDA_PROTOCOL]		= { .type = NLA_U8 },
1889 	[NDA_NH_ID]		= { .type = NLA_U32 },
1890 	[NDA_FLAGS_EXT]		= NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1891 	[NDA_FDB_EXT_ATTRS]	= { .type = NLA_NESTED },
1892 };
1893 
1894 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1895 			struct netlink_ext_ack *extack)
1896 {
1897 	struct net *net = sock_net(skb->sk);
1898 	struct ndmsg *ndm;
1899 	struct nlattr *dst_attr;
1900 	struct neigh_table *tbl;
1901 	struct neighbour *neigh;
1902 	struct net_device *dev = NULL;
1903 	int err = -EINVAL;
1904 
1905 	ASSERT_RTNL();
1906 	if (nlmsg_len(nlh) < sizeof(*ndm))
1907 		goto out;
1908 
1909 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1910 	if (!dst_attr) {
1911 		NL_SET_ERR_MSG(extack, "Network address not specified");
1912 		goto out;
1913 	}
1914 
1915 	ndm = nlmsg_data(nlh);
1916 	if (ndm->ndm_ifindex) {
1917 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1918 		if (dev == NULL) {
1919 			err = -ENODEV;
1920 			goto out;
1921 		}
1922 	}
1923 
1924 	tbl = neigh_find_table(ndm->ndm_family);
1925 	if (tbl == NULL)
1926 		return -EAFNOSUPPORT;
1927 
1928 	if (nla_len(dst_attr) < (int)tbl->key_len) {
1929 		NL_SET_ERR_MSG(extack, "Invalid network address");
1930 		goto out;
1931 	}
1932 
1933 	if (ndm->ndm_flags & NTF_PROXY) {
1934 		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1935 		goto out;
1936 	}
1937 
1938 	if (dev == NULL)
1939 		goto out;
1940 
1941 	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1942 	if (neigh == NULL) {
1943 		err = -ENOENT;
1944 		goto out;
1945 	}
1946 
1947 	err = __neigh_update(neigh, NULL, NUD_FAILED,
1948 			     NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1949 			     NETLINK_CB(skb).portid, extack);
1950 	write_lock_bh(&tbl->lock);
1951 	neigh_release(neigh);
1952 	neigh_remove_one(neigh);
1953 	write_unlock_bh(&tbl->lock);
1954 
1955 out:
1956 	return err;
1957 }
1958 
1959 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1960 		     struct netlink_ext_ack *extack)
1961 {
1962 	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1963 		    NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1964 	struct net *net = sock_net(skb->sk);
1965 	struct ndmsg *ndm;
1966 	struct nlattr *tb[NDA_MAX+1];
1967 	struct neigh_table *tbl;
1968 	struct net_device *dev = NULL;
1969 	struct neighbour *neigh;
1970 	void *dst, *lladdr;
1971 	u8 protocol = 0;
1972 	u32 ndm_flags;
1973 	int err;
1974 
1975 	ASSERT_RTNL();
1976 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1977 				     nda_policy, extack);
1978 	if (err < 0)
1979 		goto out;
1980 
1981 	err = -EINVAL;
1982 	if (!tb[NDA_DST]) {
1983 		NL_SET_ERR_MSG(extack, "Network address not specified");
1984 		goto out;
1985 	}
1986 
1987 	ndm = nlmsg_data(nlh);
1988 	ndm_flags = ndm->ndm_flags;
1989 	if (tb[NDA_FLAGS_EXT]) {
1990 		u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
1991 
1992 		BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
1993 			     (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
1994 			      hweight32(NTF_EXT_MASK)));
1995 		ndm_flags |= (ext << NTF_EXT_SHIFT);
1996 	}
1997 	if (ndm->ndm_ifindex) {
1998 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1999 		if (dev == NULL) {
2000 			err = -ENODEV;
2001 			goto out;
2002 		}
2003 
2004 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
2005 			NL_SET_ERR_MSG(extack, "Invalid link address");
2006 			goto out;
2007 		}
2008 	}
2009 
2010 	tbl = neigh_find_table(ndm->ndm_family);
2011 	if (tbl == NULL)
2012 		return -EAFNOSUPPORT;
2013 
2014 	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
2015 		NL_SET_ERR_MSG(extack, "Invalid network address");
2016 		goto out;
2017 	}
2018 
2019 	dst = nla_data(tb[NDA_DST]);
2020 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
2021 
2022 	if (tb[NDA_PROTOCOL])
2023 		protocol = nla_get_u8(tb[NDA_PROTOCOL]);
2024 	if (ndm_flags & NTF_PROXY) {
2025 		if (ndm_flags & (NTF_MANAGED | NTF_EXT_VALIDATED)) {
2026 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
2027 			goto out;
2028 		}
2029 
2030 		err = pneigh_create(tbl, net, dst, dev, ndm_flags, protocol,
2031 				    !!(ndm->ndm_state & NUD_PERMANENT));
2032 		goto out;
2033 	}
2034 
2035 	if (!dev) {
2036 		NL_SET_ERR_MSG(extack, "Device not specified");
2037 		goto out;
2038 	}
2039 
2040 	if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2041 		err = -EINVAL;
2042 		goto out;
2043 	}
2044 
2045 	neigh = neigh_lookup(tbl, dst, dev);
2046 	if (neigh == NULL) {
2047 		bool ndm_permanent  = ndm->ndm_state & NUD_PERMANENT;
2048 		bool exempt_from_gc = ndm_permanent ||
2049 				      ndm_flags & (NTF_EXT_LEARNED |
2050 						   NTF_EXT_VALIDATED);
2051 
2052 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2053 			err = -ENOENT;
2054 			goto out;
2055 		}
2056 		if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2057 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2058 			err = -EINVAL;
2059 			goto out;
2060 		}
2061 		if (ndm_flags & NTF_EXT_VALIDATED) {
2062 			u8 state = ndm->ndm_state;
2063 
2064 			/* NTF_USE and NTF_MANAGED will result in the neighbor
2065 			 * being created with an invalid state (NUD_NONE).
2066 			 */
2067 			if (ndm_flags & (NTF_USE | NTF_MANAGED))
2068 				state = NUD_NONE;
2069 
2070 			if (!(state & NUD_VALID)) {
2071 				NL_SET_ERR_MSG(extack,
2072 					       "Cannot create externally validated neighbor with an invalid state");
2073 				err = -EINVAL;
2074 				goto out;
2075 			}
2076 		}
2077 
2078 		neigh = ___neigh_create(tbl, dst, dev,
2079 					ndm_flags &
2080 					(NTF_EXT_LEARNED | NTF_MANAGED |
2081 					 NTF_EXT_VALIDATED),
2082 					exempt_from_gc, true);
2083 		if (IS_ERR(neigh)) {
2084 			err = PTR_ERR(neigh);
2085 			goto out;
2086 		}
2087 	} else {
2088 		if (nlh->nlmsg_flags & NLM_F_EXCL) {
2089 			err = -EEXIST;
2090 			neigh_release(neigh);
2091 			goto out;
2092 		}
2093 		if (ndm_flags & NTF_EXT_VALIDATED) {
2094 			u8 state = ndm->ndm_state;
2095 
2096 			/* NTF_USE and NTF_MANAGED do not update the existing
2097 			 * state other than clearing it if it was
2098 			 * NUD_PERMANENT.
2099 			 */
2100 			if (ndm_flags & (NTF_USE | NTF_MANAGED))
2101 				state = READ_ONCE(neigh->nud_state) & ~NUD_PERMANENT;
2102 
2103 			if (!(state & NUD_VALID)) {
2104 				NL_SET_ERR_MSG(extack,
2105 					       "Cannot mark neighbor as externally validated with an invalid state");
2106 				err = -EINVAL;
2107 				neigh_release(neigh);
2108 				goto out;
2109 			}
2110 		}
2111 
2112 		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2113 			flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2114 				   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2115 	}
2116 
2117 	if (protocol)
2118 		neigh->protocol = protocol;
2119 	if (ndm_flags & NTF_EXT_LEARNED)
2120 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2121 	if (ndm_flags & NTF_ROUTER)
2122 		flags |= NEIGH_UPDATE_F_ISROUTER;
2123 	if (ndm_flags & NTF_MANAGED)
2124 		flags |= NEIGH_UPDATE_F_MANAGED;
2125 	if (ndm_flags & NTF_USE)
2126 		flags |= NEIGH_UPDATE_F_USE;
2127 	if (ndm_flags & NTF_EXT_VALIDATED)
2128 		flags |= NEIGH_UPDATE_F_EXT_VALIDATED;
2129 
2130 	err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2131 			     NETLINK_CB(skb).portid, extack);
2132 	if (!err && ndm_flags & (NTF_USE | NTF_MANAGED))
2133 		neigh_event_send(neigh, NULL);
2134 	neigh_release(neigh);
2135 out:
2136 	return err;
2137 }
2138 
2139 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2140 {
2141 	struct nlattr *nest;
2142 
2143 	nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2144 	if (nest == NULL)
2145 		return -ENOBUFS;
2146 
2147 	if ((parms->dev &&
2148 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2149 	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2150 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2151 			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2152 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
2153 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
2154 			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2155 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2156 	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2157 	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
2158 			NEIGH_VAR(parms, UCAST_PROBES)) ||
2159 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
2160 			NEIGH_VAR(parms, MCAST_PROBES)) ||
2161 	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2162 			NEIGH_VAR(parms, MCAST_REPROBES)) ||
2163 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2164 			  NDTPA_PAD) ||
2165 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2166 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2167 	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
2168 			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2169 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2170 			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2171 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2172 			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2173 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2174 			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2175 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2176 			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2177 	    nla_put_msecs(skb, NDTPA_LOCKTIME,
2178 			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2179 	    nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2180 			  NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2181 		goto nla_put_failure;
2182 	return nla_nest_end(skb, nest);
2183 
2184 nla_put_failure:
2185 	nla_nest_cancel(skb, nest);
2186 	return -EMSGSIZE;
2187 }
2188 
2189 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2190 			      u32 pid, u32 seq, int type, int flags)
2191 {
2192 	struct nlmsghdr *nlh;
2193 	struct ndtmsg *ndtmsg;
2194 
2195 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2196 	if (nlh == NULL)
2197 		return -EMSGSIZE;
2198 
2199 	ndtmsg = nlmsg_data(nlh);
2200 
2201 	read_lock_bh(&tbl->lock);
2202 	ndtmsg->ndtm_family = tbl->family;
2203 	ndtmsg->ndtm_pad1   = 0;
2204 	ndtmsg->ndtm_pad2   = 0;
2205 
2206 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2207 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
2208 			  NDTA_PAD) ||
2209 	    nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
2210 	    nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
2211 	    nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
2212 		goto nla_put_failure;
2213 	{
2214 		unsigned long now = jiffies;
2215 		long flush_delta = now - READ_ONCE(tbl->last_flush);
2216 		long rand_delta = now - READ_ONCE(tbl->last_rand);
2217 		struct neigh_hash_table *nht;
2218 		struct ndt_config ndc = {
2219 			.ndtc_key_len		= tbl->key_len,
2220 			.ndtc_entry_size	= tbl->entry_size,
2221 			.ndtc_entries		= atomic_read(&tbl->entries),
2222 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
2223 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
2224 			.ndtc_proxy_qlen	= READ_ONCE(tbl->proxy_queue.qlen),
2225 		};
2226 
2227 		rcu_read_lock();
2228 		nht = rcu_dereference(tbl->nht);
2229 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2230 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2231 		rcu_read_unlock();
2232 
2233 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2234 			goto nla_put_failure;
2235 	}
2236 
2237 	{
2238 		int cpu;
2239 		struct ndt_stats ndst;
2240 
2241 		memset(&ndst, 0, sizeof(ndst));
2242 
2243 		for_each_possible_cpu(cpu) {
2244 			struct neigh_statistics	*st;
2245 
2246 			st = per_cpu_ptr(tbl->stats, cpu);
2247 			ndst.ndts_allocs		+= READ_ONCE(st->allocs);
2248 			ndst.ndts_destroys		+= READ_ONCE(st->destroys);
2249 			ndst.ndts_hash_grows		+= READ_ONCE(st->hash_grows);
2250 			ndst.ndts_res_failed		+= READ_ONCE(st->res_failed);
2251 			ndst.ndts_lookups		+= READ_ONCE(st->lookups);
2252 			ndst.ndts_hits			+= READ_ONCE(st->hits);
2253 			ndst.ndts_rcv_probes_mcast	+= READ_ONCE(st->rcv_probes_mcast);
2254 			ndst.ndts_rcv_probes_ucast	+= READ_ONCE(st->rcv_probes_ucast);
2255 			ndst.ndts_periodic_gc_runs	+= READ_ONCE(st->periodic_gc_runs);
2256 			ndst.ndts_forced_gc_runs	+= READ_ONCE(st->forced_gc_runs);
2257 			ndst.ndts_table_fulls		+= READ_ONCE(st->table_fulls);
2258 		}
2259 
2260 		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2261 				  NDTA_PAD))
2262 			goto nla_put_failure;
2263 	}
2264 
2265 	BUG_ON(tbl->parms.dev);
2266 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2267 		goto nla_put_failure;
2268 
2269 	read_unlock_bh(&tbl->lock);
2270 	nlmsg_end(skb, nlh);
2271 	return 0;
2272 
2273 nla_put_failure:
2274 	read_unlock_bh(&tbl->lock);
2275 	nlmsg_cancel(skb, nlh);
2276 	return -EMSGSIZE;
2277 }
2278 
2279 static int neightbl_fill_param_info(struct sk_buff *skb,
2280 				    struct neigh_table *tbl,
2281 				    struct neigh_parms *parms,
2282 				    u32 pid, u32 seq, int type,
2283 				    unsigned int flags)
2284 {
2285 	struct ndtmsg *ndtmsg;
2286 	struct nlmsghdr *nlh;
2287 
2288 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2289 	if (nlh == NULL)
2290 		return -EMSGSIZE;
2291 
2292 	ndtmsg = nlmsg_data(nlh);
2293 
2294 	read_lock_bh(&tbl->lock);
2295 	ndtmsg->ndtm_family = tbl->family;
2296 	ndtmsg->ndtm_pad1   = 0;
2297 	ndtmsg->ndtm_pad2   = 0;
2298 
2299 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2300 	    neightbl_fill_parms(skb, parms) < 0)
2301 		goto errout;
2302 
2303 	read_unlock_bh(&tbl->lock);
2304 	nlmsg_end(skb, nlh);
2305 	return 0;
2306 errout:
2307 	read_unlock_bh(&tbl->lock);
2308 	nlmsg_cancel(skb, nlh);
2309 	return -EMSGSIZE;
2310 }
2311 
2312 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2313 	[NDTA_NAME]		= { .type = NLA_STRING },
2314 	[NDTA_THRESH1]		= { .type = NLA_U32 },
2315 	[NDTA_THRESH2]		= { .type = NLA_U32 },
2316 	[NDTA_THRESH3]		= { .type = NLA_U32 },
2317 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
2318 	[NDTA_PARMS]		= { .type = NLA_NESTED },
2319 };
2320 
2321 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2322 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
2323 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
2324 	[NDTPA_QUEUE_LENBYTES]		= { .type = NLA_U32 },
2325 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
2326 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
2327 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
2328 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
2329 	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
2330 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
2331 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
2332 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
2333 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
2334 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
2335 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
2336 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
2337 	[NDTPA_INTERVAL_PROBE_TIME_MS]	= { .type = NLA_U64, .min = 1 },
2338 };
2339 
2340 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2341 			struct netlink_ext_ack *extack)
2342 {
2343 	struct net *net = sock_net(skb->sk);
2344 	struct neigh_table *tbl;
2345 	struct ndtmsg *ndtmsg;
2346 	struct nlattr *tb[NDTA_MAX+1];
2347 	bool found = false;
2348 	int err, tidx;
2349 
2350 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2351 				     nl_neightbl_policy, extack);
2352 	if (err < 0)
2353 		goto errout;
2354 
2355 	if (tb[NDTA_NAME] == NULL) {
2356 		err = -EINVAL;
2357 		goto errout;
2358 	}
2359 
2360 	ndtmsg = nlmsg_data(nlh);
2361 
2362 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2363 		tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2364 		if (!tbl)
2365 			continue;
2366 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2367 			continue;
2368 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2369 			found = true;
2370 			break;
2371 		}
2372 	}
2373 
2374 	if (!found)
2375 		return -ENOENT;
2376 
2377 	/*
2378 	 * We acquire tbl->lock to be nice to the periodic timers and
2379 	 * make sure they always see a consistent set of values.
2380 	 */
2381 	write_lock_bh(&tbl->lock);
2382 
2383 	if (tb[NDTA_PARMS]) {
2384 		struct nlattr *tbp[NDTPA_MAX+1];
2385 		struct neigh_parms *p;
2386 		int i, ifindex = 0;
2387 
2388 		err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2389 						  tb[NDTA_PARMS],
2390 						  nl_ntbl_parm_policy, extack);
2391 		if (err < 0)
2392 			goto errout_tbl_lock;
2393 
2394 		if (tbp[NDTPA_IFINDEX])
2395 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2396 
2397 		p = lookup_neigh_parms(tbl, net, ifindex);
2398 		if (p == NULL) {
2399 			err = -ENOENT;
2400 			goto errout_tbl_lock;
2401 		}
2402 
2403 		for (i = 1; i <= NDTPA_MAX; i++) {
2404 			if (tbp[i] == NULL)
2405 				continue;
2406 
2407 			switch (i) {
2408 			case NDTPA_QUEUE_LEN:
2409 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2410 					      nla_get_u32(tbp[i]) *
2411 					      SKB_TRUESIZE(ETH_FRAME_LEN));
2412 				break;
2413 			case NDTPA_QUEUE_LENBYTES:
2414 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2415 					      nla_get_u32(tbp[i]));
2416 				break;
2417 			case NDTPA_PROXY_QLEN:
2418 				NEIGH_VAR_SET(p, PROXY_QLEN,
2419 					      nla_get_u32(tbp[i]));
2420 				break;
2421 			case NDTPA_APP_PROBES:
2422 				NEIGH_VAR_SET(p, APP_PROBES,
2423 					      nla_get_u32(tbp[i]));
2424 				break;
2425 			case NDTPA_UCAST_PROBES:
2426 				NEIGH_VAR_SET(p, UCAST_PROBES,
2427 					      nla_get_u32(tbp[i]));
2428 				break;
2429 			case NDTPA_MCAST_PROBES:
2430 				NEIGH_VAR_SET(p, MCAST_PROBES,
2431 					      nla_get_u32(tbp[i]));
2432 				break;
2433 			case NDTPA_MCAST_REPROBES:
2434 				NEIGH_VAR_SET(p, MCAST_REPROBES,
2435 					      nla_get_u32(tbp[i]));
2436 				break;
2437 			case NDTPA_BASE_REACHABLE_TIME:
2438 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2439 					      nla_get_msecs(tbp[i]));
2440 				/* update reachable_time as well, otherwise, the change will
2441 				 * only be effective after the next time neigh_periodic_work
2442 				 * decides to recompute it (can be multiple minutes)
2443 				 */
2444 				p->reachable_time =
2445 					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2446 				break;
2447 			case NDTPA_GC_STALETIME:
2448 				NEIGH_VAR_SET(p, GC_STALETIME,
2449 					      nla_get_msecs(tbp[i]));
2450 				break;
2451 			case NDTPA_DELAY_PROBE_TIME:
2452 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2453 					      nla_get_msecs(tbp[i]));
2454 				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2455 				break;
2456 			case NDTPA_INTERVAL_PROBE_TIME_MS:
2457 				NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2458 					      nla_get_msecs(tbp[i]));
2459 				break;
2460 			case NDTPA_RETRANS_TIME:
2461 				NEIGH_VAR_SET(p, RETRANS_TIME,
2462 					      nla_get_msecs(tbp[i]));
2463 				break;
2464 			case NDTPA_ANYCAST_DELAY:
2465 				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2466 					      nla_get_msecs(tbp[i]));
2467 				break;
2468 			case NDTPA_PROXY_DELAY:
2469 				NEIGH_VAR_SET(p, PROXY_DELAY,
2470 					      nla_get_msecs(tbp[i]));
2471 				break;
2472 			case NDTPA_LOCKTIME:
2473 				NEIGH_VAR_SET(p, LOCKTIME,
2474 					      nla_get_msecs(tbp[i]));
2475 				break;
2476 			}
2477 		}
2478 	}
2479 
2480 	err = -ENOENT;
2481 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2482 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2483 	    !net_eq(net, &init_net))
2484 		goto errout_tbl_lock;
2485 
2486 	if (tb[NDTA_THRESH1])
2487 		WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
2488 
2489 	if (tb[NDTA_THRESH2])
2490 		WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
2491 
2492 	if (tb[NDTA_THRESH3])
2493 		WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
2494 
2495 	if (tb[NDTA_GC_INTERVAL])
2496 		WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
2497 
2498 	err = 0;
2499 
2500 errout_tbl_lock:
2501 	write_unlock_bh(&tbl->lock);
2502 errout:
2503 	return err;
2504 }
2505 
2506 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2507 				    struct netlink_ext_ack *extack)
2508 {
2509 	struct ndtmsg *ndtm;
2510 
2511 	ndtm = nlmsg_payload(nlh, sizeof(*ndtm));
2512 	if (!ndtm) {
2513 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2514 		return -EINVAL;
2515 	}
2516 
2517 	if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2518 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2519 		return -EINVAL;
2520 	}
2521 
2522 	if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2523 		NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2524 		return -EINVAL;
2525 	}
2526 
2527 	return 0;
2528 }
2529 
2530 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2531 {
2532 	const struct nlmsghdr *nlh = cb->nlh;
2533 	struct net *net = sock_net(skb->sk);
2534 	int family, tidx, nidx = 0;
2535 	int tbl_skip = cb->args[0];
2536 	int neigh_skip = cb->args[1];
2537 	struct neigh_table *tbl;
2538 
2539 	if (cb->strict_check) {
2540 		int err = neightbl_valid_dump_info(nlh, cb->extack);
2541 
2542 		if (err < 0)
2543 			return err;
2544 	}
2545 
2546 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2547 
2548 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2549 		struct neigh_parms *p;
2550 
2551 		tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2552 		if (!tbl)
2553 			continue;
2554 
2555 		if (tidx < tbl_skip || (family && tbl->family != family))
2556 			continue;
2557 
2558 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2559 				       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2560 				       NLM_F_MULTI) < 0)
2561 			break;
2562 
2563 		nidx = 0;
2564 		p = list_next_entry(&tbl->parms, list);
2565 		list_for_each_entry_from(p, &tbl->parms_list, list) {
2566 			if (!net_eq(neigh_parms_net(p), net))
2567 				continue;
2568 
2569 			if (nidx < neigh_skip)
2570 				goto next;
2571 
2572 			if (neightbl_fill_param_info(skb, tbl, p,
2573 						     NETLINK_CB(cb->skb).portid,
2574 						     nlh->nlmsg_seq,
2575 						     RTM_NEWNEIGHTBL,
2576 						     NLM_F_MULTI) < 0)
2577 				goto out;
2578 		next:
2579 			nidx++;
2580 		}
2581 
2582 		neigh_skip = 0;
2583 	}
2584 out:
2585 	cb->args[0] = tidx;
2586 	cb->args[1] = nidx;
2587 
2588 	return skb->len;
2589 }
2590 
2591 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2592 			   u32 pid, u32 seq, int type, unsigned int flags)
2593 {
2594 	u32 neigh_flags, neigh_flags_ext;
2595 	unsigned long now = jiffies;
2596 	struct nda_cacheinfo ci;
2597 	struct nlmsghdr *nlh;
2598 	struct ndmsg *ndm;
2599 
2600 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2601 	if (nlh == NULL)
2602 		return -EMSGSIZE;
2603 
2604 	neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2605 	neigh_flags     = neigh->flags & NTF_OLD_MASK;
2606 
2607 	ndm = nlmsg_data(nlh);
2608 	ndm->ndm_family	 = neigh->ops->family;
2609 	ndm->ndm_pad1    = 0;
2610 	ndm->ndm_pad2    = 0;
2611 	ndm->ndm_flags	 = neigh_flags;
2612 	ndm->ndm_type	 = neigh->type;
2613 	ndm->ndm_ifindex = neigh->dev->ifindex;
2614 
2615 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2616 		goto nla_put_failure;
2617 
2618 	read_lock_bh(&neigh->lock);
2619 	ndm->ndm_state	 = neigh->nud_state;
2620 	if (neigh->nud_state & NUD_VALID) {
2621 		char haddr[MAX_ADDR_LEN];
2622 
2623 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2624 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2625 			read_unlock_bh(&neigh->lock);
2626 			goto nla_put_failure;
2627 		}
2628 	}
2629 
2630 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2631 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2632 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2633 	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
2634 	read_unlock_bh(&neigh->lock);
2635 
2636 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2637 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2638 		goto nla_put_failure;
2639 
2640 	if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2641 		goto nla_put_failure;
2642 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2643 		goto nla_put_failure;
2644 
2645 	nlmsg_end(skb, nlh);
2646 	return 0;
2647 
2648 nla_put_failure:
2649 	nlmsg_cancel(skb, nlh);
2650 	return -EMSGSIZE;
2651 }
2652 
2653 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2654 			    u32 pid, u32 seq, int type, unsigned int flags,
2655 			    struct neigh_table *tbl)
2656 {
2657 	u32 neigh_flags, neigh_flags_ext;
2658 	struct nlmsghdr *nlh;
2659 	struct ndmsg *ndm;
2660 	u8 protocol;
2661 
2662 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2663 	if (nlh == NULL)
2664 		return -EMSGSIZE;
2665 
2666 	neigh_flags = READ_ONCE(pn->flags);
2667 	neigh_flags_ext = neigh_flags >> NTF_EXT_SHIFT;
2668 	neigh_flags &= NTF_OLD_MASK;
2669 
2670 	ndm = nlmsg_data(nlh);
2671 	ndm->ndm_family	 = tbl->family;
2672 	ndm->ndm_pad1    = 0;
2673 	ndm->ndm_pad2    = 0;
2674 	ndm->ndm_flags	 = neigh_flags | NTF_PROXY;
2675 	ndm->ndm_type	 = RTN_UNICAST;
2676 	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2677 	ndm->ndm_state	 = NUD_NONE;
2678 
2679 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2680 		goto nla_put_failure;
2681 
2682 	protocol = READ_ONCE(pn->protocol);
2683 	if (protocol && nla_put_u8(skb, NDA_PROTOCOL, protocol))
2684 		goto nla_put_failure;
2685 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2686 		goto nla_put_failure;
2687 
2688 	nlmsg_end(skb, nlh);
2689 	return 0;
2690 
2691 nla_put_failure:
2692 	nlmsg_cancel(skb, nlh);
2693 	return -EMSGSIZE;
2694 }
2695 
2696 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2697 {
2698 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2699 	__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2700 }
2701 
2702 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2703 {
2704 	struct net_device *master;
2705 
2706 	if (!master_idx)
2707 		return false;
2708 
2709 	master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
2710 
2711 	/* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2712 	 * invalid value for ifindex to denote "no master".
2713 	 */
2714 	if (master_idx == -1)
2715 		return !!master;
2716 
2717 	if (!master || master->ifindex != master_idx)
2718 		return true;
2719 
2720 	return false;
2721 }
2722 
2723 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2724 {
2725 	if (filter_idx && (!dev || dev->ifindex != filter_idx))
2726 		return true;
2727 
2728 	return false;
2729 }
2730 
2731 struct neigh_dump_filter {
2732 	int master_idx;
2733 	int dev_idx;
2734 };
2735 
2736 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2737 			    struct netlink_callback *cb,
2738 			    struct neigh_dump_filter *filter)
2739 {
2740 	struct net *net = sock_net(skb->sk);
2741 	struct neighbour *n;
2742 	int err = 0, h, s_h = cb->args[1];
2743 	int idx, s_idx = idx = cb->args[2];
2744 	struct neigh_hash_table *nht;
2745 	unsigned int flags = NLM_F_MULTI;
2746 
2747 	if (filter->dev_idx || filter->master_idx)
2748 		flags |= NLM_F_DUMP_FILTERED;
2749 
2750 	nht = rcu_dereference(tbl->nht);
2751 
2752 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2753 		if (h > s_h)
2754 			s_idx = 0;
2755 		idx = 0;
2756 		neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
2757 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2758 				goto next;
2759 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2760 			    neigh_master_filtered(n->dev, filter->master_idx))
2761 				goto next;
2762 			err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2763 					      cb->nlh->nlmsg_seq,
2764 					      RTM_NEWNEIGH, flags);
2765 			if (err < 0)
2766 				goto out;
2767 next:
2768 			idx++;
2769 		}
2770 	}
2771 out:
2772 	cb->args[1] = h;
2773 	cb->args[2] = idx;
2774 	return err;
2775 }
2776 
2777 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2778 			     struct netlink_callback *cb,
2779 			     struct neigh_dump_filter *filter)
2780 {
2781 	struct pneigh_entry *n;
2782 	struct net *net = sock_net(skb->sk);
2783 	int err = 0, h, s_h = cb->args[3];
2784 	int idx, s_idx = idx = cb->args[4];
2785 	unsigned int flags = NLM_F_MULTI;
2786 
2787 	if (filter->dev_idx || filter->master_idx)
2788 		flags |= NLM_F_DUMP_FILTERED;
2789 
2790 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2791 		if (h > s_h)
2792 			s_idx = 0;
2793 		for (n = rcu_dereference(tbl->phash_buckets[h]), idx = 0;
2794 		     n;
2795 		     n = rcu_dereference(n->next)) {
2796 			if (idx < s_idx || pneigh_net(n) != net)
2797 				goto next;
2798 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2799 			    neigh_master_filtered(n->dev, filter->master_idx))
2800 				goto next;
2801 			err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2802 					       cb->nlh->nlmsg_seq,
2803 					       RTM_NEWNEIGH, flags, tbl);
2804 			if (err < 0)
2805 				goto out;
2806 		next:
2807 			idx++;
2808 		}
2809 	}
2810 
2811 out:
2812 	cb->args[3] = h;
2813 	cb->args[4] = idx;
2814 	return err;
2815 }
2816 
2817 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2818 				bool strict_check,
2819 				struct neigh_dump_filter *filter,
2820 				struct netlink_ext_ack *extack)
2821 {
2822 	struct nlattr *tb[NDA_MAX + 1];
2823 	int err, i;
2824 
2825 	if (strict_check) {
2826 		struct ndmsg *ndm;
2827 
2828 		ndm = nlmsg_payload(nlh, sizeof(*ndm));
2829 		if (!ndm) {
2830 			NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2831 			return -EINVAL;
2832 		}
2833 
2834 		if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2835 		    ndm->ndm_state || ndm->ndm_type) {
2836 			NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2837 			return -EINVAL;
2838 		}
2839 
2840 		if (ndm->ndm_flags & ~NTF_PROXY) {
2841 			NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2842 			return -EINVAL;
2843 		}
2844 
2845 		err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2846 						    tb, NDA_MAX, nda_policy,
2847 						    extack);
2848 	} else {
2849 		err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2850 					     NDA_MAX, nda_policy, extack);
2851 	}
2852 	if (err < 0)
2853 		return err;
2854 
2855 	for (i = 0; i <= NDA_MAX; ++i) {
2856 		if (!tb[i])
2857 			continue;
2858 
2859 		/* all new attributes should require strict_check */
2860 		switch (i) {
2861 		case NDA_IFINDEX:
2862 			filter->dev_idx = nla_get_u32(tb[i]);
2863 			break;
2864 		case NDA_MASTER:
2865 			filter->master_idx = nla_get_u32(tb[i]);
2866 			break;
2867 		default:
2868 			if (strict_check) {
2869 				NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2870 				return -EINVAL;
2871 			}
2872 		}
2873 	}
2874 
2875 	return 0;
2876 }
2877 
2878 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2879 {
2880 	const struct nlmsghdr *nlh = cb->nlh;
2881 	struct neigh_dump_filter filter = {};
2882 	struct neigh_table *tbl;
2883 	int t, family, s_t;
2884 	int proxy = 0;
2885 	int err;
2886 
2887 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2888 
2889 	/* check for full ndmsg structure presence, family member is
2890 	 * the same for both structures
2891 	 */
2892 	if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2893 	    ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2894 		proxy = 1;
2895 
2896 	err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2897 	if (err < 0 && cb->strict_check)
2898 		return err;
2899 	err = 0;
2900 
2901 	s_t = cb->args[0];
2902 
2903 	rcu_read_lock();
2904 	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2905 		tbl = rcu_dereference(neigh_tables[t]);
2906 
2907 		if (!tbl)
2908 			continue;
2909 		if (t < s_t || (family && tbl->family != family))
2910 			continue;
2911 		if (t > s_t)
2912 			memset(&cb->args[1], 0, sizeof(cb->args) -
2913 						sizeof(cb->args[0]));
2914 		if (proxy)
2915 			err = pneigh_dump_table(tbl, skb, cb, &filter);
2916 		else
2917 			err = neigh_dump_table(tbl, skb, cb, &filter);
2918 		if (err < 0)
2919 			break;
2920 	}
2921 	rcu_read_unlock();
2922 
2923 	cb->args[0] = t;
2924 	return err;
2925 }
2926 
2927 static struct ndmsg *neigh_valid_get_req(const struct nlmsghdr *nlh,
2928 					 struct nlattr **tb,
2929 					 struct netlink_ext_ack *extack)
2930 {
2931 	struct ndmsg *ndm;
2932 	int err, i;
2933 
2934 	ndm = nlmsg_payload(nlh, sizeof(*ndm));
2935 	if (!ndm) {
2936 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2937 		return ERR_PTR(-EINVAL);
2938 	}
2939 
2940 	if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
2941 	    ndm->ndm_type) {
2942 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2943 		return ERR_PTR(-EINVAL);
2944 	}
2945 
2946 	if (ndm->ndm_flags & ~NTF_PROXY) {
2947 		NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2948 		return ERR_PTR(-EINVAL);
2949 	}
2950 
2951 	if (!(ndm->ndm_flags & NTF_PROXY) && !ndm->ndm_ifindex) {
2952 		NL_SET_ERR_MSG(extack, "No device specified");
2953 		return ERR_PTR(-EINVAL);
2954 	}
2955 
2956 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2957 					    NDA_MAX, nda_policy, extack);
2958 	if (err < 0)
2959 		return ERR_PTR(err);
2960 
2961 	for (i = 0; i <= NDA_MAX; ++i) {
2962 		switch (i) {
2963 		case NDA_DST:
2964 			if (!tb[i]) {
2965 				NL_SET_ERR_ATTR_MISS(extack, NULL, NDA_DST);
2966 				return ERR_PTR(-EINVAL);
2967 			}
2968 			break;
2969 		default:
2970 			if (!tb[i])
2971 				continue;
2972 
2973 			NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2974 			return ERR_PTR(-EINVAL);
2975 		}
2976 	}
2977 
2978 	return ndm;
2979 }
2980 
2981 static inline size_t neigh_nlmsg_size(void)
2982 {
2983 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2984 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2985 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2986 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2987 	       + nla_total_size(4)  /* NDA_PROBES */
2988 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
2989 	       + nla_total_size(1); /* NDA_PROTOCOL */
2990 }
2991 
2992 static inline size_t pneigh_nlmsg_size(void)
2993 {
2994 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2995 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2996 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
2997 	       + nla_total_size(1); /* NDA_PROTOCOL */
2998 }
2999 
3000 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3001 		     struct netlink_ext_ack *extack)
3002 {
3003 	struct net *net = sock_net(in_skb->sk);
3004 	u32 pid = NETLINK_CB(in_skb).portid;
3005 	struct nlattr *tb[NDA_MAX + 1];
3006 	struct net_device *dev = NULL;
3007 	u32 seq = nlh->nlmsg_seq;
3008 	struct neigh_table *tbl;
3009 	struct neighbour *neigh;
3010 	struct sk_buff *skb;
3011 	struct ndmsg *ndm;
3012 	void *dst;
3013 	int err;
3014 
3015 	ndm = neigh_valid_get_req(nlh, tb, extack);
3016 	if (IS_ERR(ndm))
3017 		return PTR_ERR(ndm);
3018 
3019 	if (ndm->ndm_flags & NTF_PROXY)
3020 		skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
3021 	else
3022 		skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
3023 	if (!skb)
3024 		return -ENOBUFS;
3025 
3026 	rcu_read_lock();
3027 
3028 	tbl = neigh_find_table(ndm->ndm_family);
3029 	if (!tbl) {
3030 		NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
3031 		err = -EAFNOSUPPORT;
3032 		goto err_unlock;
3033 	}
3034 
3035 	if (nla_len(tb[NDA_DST]) != (int)tbl->key_len) {
3036 		NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
3037 		err = -EINVAL;
3038 		goto err_unlock;
3039 	}
3040 
3041 	dst = nla_data(tb[NDA_DST]);
3042 
3043 	if (ndm->ndm_ifindex) {
3044 		dev = dev_get_by_index_rcu(net, ndm->ndm_ifindex);
3045 		if (!dev) {
3046 			NL_SET_ERR_MSG(extack, "Unknown device ifindex");
3047 			err = -ENODEV;
3048 			goto err_unlock;
3049 		}
3050 	}
3051 
3052 	if (ndm->ndm_flags & NTF_PROXY) {
3053 		struct pneigh_entry *pn;
3054 
3055 		pn = pneigh_lookup(tbl, net, dst, dev);
3056 		if (!pn) {
3057 			NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3058 			err = -ENOENT;
3059 			goto err_unlock;
3060 		}
3061 
3062 		err = pneigh_fill_info(skb, pn, pid, seq, RTM_NEWNEIGH, 0, tbl);
3063 		if (err)
3064 			goto err_unlock;
3065 	} else {
3066 		neigh = neigh_lookup(tbl, dst, dev);
3067 		if (!neigh) {
3068 			NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3069 			err = -ENOENT;
3070 			goto err_unlock;
3071 		}
3072 
3073 		err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
3074 		neigh_release(neigh);
3075 		if (err)
3076 			goto err_unlock;
3077 	}
3078 
3079 	rcu_read_unlock();
3080 
3081 	return rtnl_unicast(skb, net, pid);
3082 err_unlock:
3083 	rcu_read_unlock();
3084 	kfree_skb(skb);
3085 	return err;
3086 }
3087 
3088 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3089 {
3090 	int chain;
3091 	struct neigh_hash_table *nht;
3092 
3093 	rcu_read_lock();
3094 	nht = rcu_dereference(tbl->nht);
3095 
3096 	read_lock_bh(&tbl->lock); /* avoid resizes */
3097 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3098 		struct neighbour *n;
3099 
3100 		neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
3101 			cb(n, cookie);
3102 	}
3103 	read_unlock_bh(&tbl->lock);
3104 	rcu_read_unlock();
3105 }
3106 EXPORT_SYMBOL(neigh_for_each);
3107 
3108 /* The tbl->lock must be held as a writer and BH disabled. */
3109 void __neigh_for_each_release(struct neigh_table *tbl,
3110 			      int (*cb)(struct neighbour *))
3111 {
3112 	struct neigh_hash_table *nht;
3113 	int chain;
3114 
3115 	nht = rcu_dereference_protected(tbl->nht,
3116 					lockdep_is_held(&tbl->lock));
3117 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3118 		struct hlist_node *tmp;
3119 		struct neighbour *n;
3120 
3121 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
3122 			int release;
3123 
3124 			write_lock(&n->lock);
3125 			release = cb(n);
3126 			if (release) {
3127 				hlist_del_rcu(&n->hash);
3128 				hlist_del_rcu(&n->dev_list);
3129 				neigh_mark_dead(n);
3130 			}
3131 			write_unlock(&n->lock);
3132 			if (release)
3133 				neigh_cleanup_and_release(n);
3134 		}
3135 	}
3136 }
3137 EXPORT_SYMBOL(__neigh_for_each_release);
3138 
3139 int neigh_xmit(int index, struct net_device *dev,
3140 	       const void *addr, struct sk_buff *skb)
3141 {
3142 	int err = -EAFNOSUPPORT;
3143 
3144 	if (likely(index < NEIGH_NR_TABLES)) {
3145 		struct neigh_table *tbl;
3146 		struct neighbour *neigh;
3147 
3148 		rcu_read_lock();
3149 		tbl = rcu_dereference(neigh_tables[index]);
3150 		if (!tbl)
3151 			goto out_unlock;
3152 		if (index == NEIGH_ARP_TABLE) {
3153 			u32 key = *((u32 *)addr);
3154 
3155 			neigh = __ipv4_neigh_lookup_noref(dev, key);
3156 		} else {
3157 			neigh = __neigh_lookup_noref(tbl, addr, dev);
3158 		}
3159 		if (!neigh)
3160 			neigh = __neigh_create(tbl, addr, dev, false);
3161 		err = PTR_ERR(neigh);
3162 		if (IS_ERR(neigh)) {
3163 			rcu_read_unlock();
3164 			goto out_kfree_skb;
3165 		}
3166 		err = READ_ONCE(neigh->output)(neigh, skb);
3167 out_unlock:
3168 		rcu_read_unlock();
3169 	}
3170 	else if (index == NEIGH_LINK_TABLE) {
3171 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3172 				      addr, NULL, skb->len);
3173 		if (err < 0)
3174 			goto out_kfree_skb;
3175 		err = dev_queue_xmit(skb);
3176 	}
3177 out:
3178 	return err;
3179 out_kfree_skb:
3180 	kfree_skb(skb);
3181 	goto out;
3182 }
3183 EXPORT_SYMBOL(neigh_xmit);
3184 
3185 #ifdef CONFIG_PROC_FS
3186 
3187 static struct neighbour *neigh_get_valid(struct seq_file *seq,
3188 					 struct neighbour *n,
3189 					 loff_t *pos)
3190 {
3191 	struct neigh_seq_state *state = seq->private;
3192 	struct net *net = seq_file_net(seq);
3193 
3194 	if (!net_eq(dev_net(n->dev), net))
3195 		return NULL;
3196 
3197 	if (state->neigh_sub_iter) {
3198 		loff_t fakep = 0;
3199 		void *v;
3200 
3201 		v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
3202 		if (!v)
3203 			return NULL;
3204 		if (pos)
3205 			return v;
3206 	}
3207 
3208 	if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3209 		return n;
3210 
3211 	if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3212 		return n;
3213 
3214 	return NULL;
3215 }
3216 
3217 static struct neighbour *neigh_get_first(struct seq_file *seq)
3218 {
3219 	struct neigh_seq_state *state = seq->private;
3220 	struct neigh_hash_table *nht = state->nht;
3221 	struct neighbour *n, *tmp;
3222 
3223 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3224 
3225 	while (++state->bucket < (1 << nht->hash_shift)) {
3226 		neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
3227 			tmp = neigh_get_valid(seq, n, NULL);
3228 			if (tmp)
3229 				return tmp;
3230 		}
3231 	}
3232 
3233 	return NULL;
3234 }
3235 
3236 static struct neighbour *neigh_get_next(struct seq_file *seq,
3237 					struct neighbour *n,
3238 					loff_t *pos)
3239 {
3240 	struct neigh_seq_state *state = seq->private;
3241 	struct neighbour *tmp;
3242 
3243 	if (state->neigh_sub_iter) {
3244 		void *v = state->neigh_sub_iter(state, n, pos);
3245 
3246 		if (v)
3247 			return n;
3248 	}
3249 
3250 	hlist_for_each_entry_continue(n, hash) {
3251 		tmp = neigh_get_valid(seq, n, pos);
3252 		if (tmp) {
3253 			n = tmp;
3254 			goto out;
3255 		}
3256 	}
3257 
3258 	n = neigh_get_first(seq);
3259 out:
3260 	if (n && pos)
3261 		--(*pos);
3262 
3263 	return n;
3264 }
3265 
3266 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3267 {
3268 	struct neighbour *n = neigh_get_first(seq);
3269 
3270 	if (n) {
3271 		--(*pos);
3272 		while (*pos) {
3273 			n = neigh_get_next(seq, n, pos);
3274 			if (!n)
3275 				break;
3276 		}
3277 	}
3278 	return *pos ? NULL : n;
3279 }
3280 
3281 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3282 {
3283 	struct neigh_seq_state *state = seq->private;
3284 	struct net *net = seq_file_net(seq);
3285 	struct neigh_table *tbl = state->tbl;
3286 	struct pneigh_entry *pn = NULL;
3287 	int bucket;
3288 
3289 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
3290 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3291 		pn = rcu_dereference(tbl->phash_buckets[bucket]);
3292 
3293 		while (pn && !net_eq(pneigh_net(pn), net))
3294 			pn = rcu_dereference(pn->next);
3295 		if (pn)
3296 			break;
3297 	}
3298 	state->bucket = bucket;
3299 
3300 	return pn;
3301 }
3302 
3303 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3304 					    struct pneigh_entry *pn,
3305 					    loff_t *pos)
3306 {
3307 	struct neigh_seq_state *state = seq->private;
3308 	struct net *net = seq_file_net(seq);
3309 	struct neigh_table *tbl = state->tbl;
3310 
3311 	do {
3312 		pn = rcu_dereference(pn->next);
3313 	} while (pn && !net_eq(pneigh_net(pn), net));
3314 
3315 	while (!pn) {
3316 		if (++state->bucket > PNEIGH_HASHMASK)
3317 			break;
3318 
3319 		pn = rcu_dereference(tbl->phash_buckets[state->bucket]);
3320 
3321 		while (pn && !net_eq(pneigh_net(pn), net))
3322 			pn = rcu_dereference(pn->next);
3323 		if (pn)
3324 			break;
3325 	}
3326 
3327 	if (pn && pos)
3328 		--(*pos);
3329 
3330 	return pn;
3331 }
3332 
3333 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3334 {
3335 	struct pneigh_entry *pn = pneigh_get_first(seq);
3336 
3337 	if (pn) {
3338 		--(*pos);
3339 		while (*pos) {
3340 			pn = pneigh_get_next(seq, pn, pos);
3341 			if (!pn)
3342 				break;
3343 		}
3344 	}
3345 	return *pos ? NULL : pn;
3346 }
3347 
3348 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3349 {
3350 	struct neigh_seq_state *state = seq->private;
3351 	void *rc;
3352 	loff_t idxpos = *pos;
3353 
3354 	rc = neigh_get_idx(seq, &idxpos);
3355 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3356 		rc = pneigh_get_idx(seq, &idxpos);
3357 
3358 	return rc;
3359 }
3360 
3361 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3362 	__acquires(tbl->lock)
3363 	__acquires(rcu)
3364 {
3365 	struct neigh_seq_state *state = seq->private;
3366 
3367 	state->tbl = tbl;
3368 	state->bucket = -1;
3369 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3370 
3371 	rcu_read_lock();
3372 	state->nht = rcu_dereference(tbl->nht);
3373 	read_lock_bh(&tbl->lock);
3374 
3375 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3376 }
3377 EXPORT_SYMBOL(neigh_seq_start);
3378 
3379 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3380 {
3381 	struct neigh_seq_state *state;
3382 	void *rc;
3383 
3384 	if (v == SEQ_START_TOKEN) {
3385 		rc = neigh_get_first(seq);
3386 		goto out;
3387 	}
3388 
3389 	state = seq->private;
3390 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3391 		rc = neigh_get_next(seq, v, NULL);
3392 		if (rc)
3393 			goto out;
3394 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3395 			rc = pneigh_get_first(seq);
3396 	} else {
3397 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3398 		rc = pneigh_get_next(seq, v, NULL);
3399 	}
3400 out:
3401 	++(*pos);
3402 	return rc;
3403 }
3404 EXPORT_SYMBOL(neigh_seq_next);
3405 
3406 void neigh_seq_stop(struct seq_file *seq, void *v)
3407 	__releases(tbl->lock)
3408 	__releases(rcu)
3409 {
3410 	struct neigh_seq_state *state = seq->private;
3411 	struct neigh_table *tbl = state->tbl;
3412 
3413 	read_unlock_bh(&tbl->lock);
3414 	rcu_read_unlock();
3415 }
3416 EXPORT_SYMBOL(neigh_seq_stop);
3417 
3418 /* statistics via seq_file */
3419 
3420 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3421 {
3422 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3423 	int cpu;
3424 
3425 	if (*pos == 0)
3426 		return SEQ_START_TOKEN;
3427 
3428 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3429 		if (!cpu_possible(cpu))
3430 			continue;
3431 		*pos = cpu+1;
3432 		return per_cpu_ptr(tbl->stats, cpu);
3433 	}
3434 	return NULL;
3435 }
3436 
3437 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3438 {
3439 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3440 	int cpu;
3441 
3442 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3443 		if (!cpu_possible(cpu))
3444 			continue;
3445 		*pos = cpu+1;
3446 		return per_cpu_ptr(tbl->stats, cpu);
3447 	}
3448 	(*pos)++;
3449 	return NULL;
3450 }
3451 
3452 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3453 {
3454 
3455 }
3456 
3457 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3458 {
3459 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3460 	struct neigh_statistics *st = v;
3461 
3462 	if (v == SEQ_START_TOKEN) {
3463 		seq_puts(seq, "entries  allocs   destroys hash_grows lookups  hits     res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3464 		return 0;
3465 	}
3466 
3467 	seq_printf(seq, "%08x %08lx %08lx %08lx   %08lx %08lx %08lx   "
3468 			"%08lx         %08lx         %08lx         "
3469 			"%08lx       %08lx            %08lx\n",
3470 		   atomic_read(&tbl->entries),
3471 
3472 		   st->allocs,
3473 		   st->destroys,
3474 		   st->hash_grows,
3475 
3476 		   st->lookups,
3477 		   st->hits,
3478 
3479 		   st->res_failed,
3480 
3481 		   st->rcv_probes_mcast,
3482 		   st->rcv_probes_ucast,
3483 
3484 		   st->periodic_gc_runs,
3485 		   st->forced_gc_runs,
3486 		   st->unres_discards,
3487 		   st->table_fulls
3488 		   );
3489 
3490 	return 0;
3491 }
3492 
3493 static const struct seq_operations neigh_stat_seq_ops = {
3494 	.start	= neigh_stat_seq_start,
3495 	.next	= neigh_stat_seq_next,
3496 	.stop	= neigh_stat_seq_stop,
3497 	.show	= neigh_stat_seq_show,
3498 };
3499 #endif /* CONFIG_PROC_FS */
3500 
3501 static void __neigh_notify(struct neighbour *n, int type, int flags,
3502 			   u32 pid)
3503 {
3504 	struct sk_buff *skb;
3505 	int err = -ENOBUFS;
3506 	struct net *net;
3507 
3508 	rcu_read_lock();
3509 	net = dev_net_rcu(n->dev);
3510 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3511 	if (skb == NULL)
3512 		goto errout;
3513 
3514 	err = neigh_fill_info(skb, n, pid, 0, type, flags);
3515 	if (err < 0) {
3516 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3517 		WARN_ON(err == -EMSGSIZE);
3518 		kfree_skb(skb);
3519 		goto errout;
3520 	}
3521 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3522 	goto out;
3523 errout:
3524 	rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3525 out:
3526 	rcu_read_unlock();
3527 }
3528 
3529 void neigh_app_ns(struct neighbour *n)
3530 {
3531 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3532 }
3533 EXPORT_SYMBOL(neigh_app_ns);
3534 
3535 #ifdef CONFIG_SYSCTL
3536 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3537 
3538 static int proc_unres_qlen(const struct ctl_table *ctl, int write,
3539 			   void *buffer, size_t *lenp, loff_t *ppos)
3540 {
3541 	int size, ret;
3542 	struct ctl_table tmp = *ctl;
3543 
3544 	tmp.extra1 = SYSCTL_ZERO;
3545 	tmp.extra2 = &unres_qlen_max;
3546 	tmp.data = &size;
3547 
3548 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3549 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3550 
3551 	if (write && !ret)
3552 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3553 	return ret;
3554 }
3555 
3556 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3557 				  int index)
3558 {
3559 	struct net_device *dev;
3560 	int family = neigh_parms_family(p);
3561 
3562 	rcu_read_lock();
3563 	for_each_netdev_rcu(net, dev) {
3564 		struct neigh_parms *dst_p =
3565 				neigh_get_dev_parms_rcu(dev, family);
3566 
3567 		if (dst_p && !test_bit(index, dst_p->data_state))
3568 			dst_p->data[index] = p->data[index];
3569 	}
3570 	rcu_read_unlock();
3571 }
3572 
3573 static void neigh_proc_update(const struct ctl_table *ctl, int write)
3574 {
3575 	struct net_device *dev = ctl->extra1;
3576 	struct neigh_parms *p = ctl->extra2;
3577 	struct net *net = neigh_parms_net(p);
3578 	int index = (int *) ctl->data - p->data;
3579 
3580 	if (!write)
3581 		return;
3582 
3583 	set_bit(index, p->data_state);
3584 	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3585 		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3586 	if (!dev) /* NULL dev means this is default value */
3587 		neigh_copy_dflt_parms(net, p, index);
3588 }
3589 
3590 static int neigh_proc_dointvec_zero_intmax(const struct ctl_table *ctl, int write,
3591 					   void *buffer, size_t *lenp,
3592 					   loff_t *ppos)
3593 {
3594 	struct ctl_table tmp = *ctl;
3595 	int ret;
3596 
3597 	tmp.extra1 = SYSCTL_ZERO;
3598 	tmp.extra2 = SYSCTL_INT_MAX;
3599 
3600 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3601 	neigh_proc_update(ctl, write);
3602 	return ret;
3603 }
3604 
3605 static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table *ctl, int write,
3606 						   void *buffer, size_t *lenp, loff_t *ppos)
3607 {
3608 	struct ctl_table tmp = *ctl;
3609 	int ret;
3610 
3611 	int min = msecs_to_jiffies(1);
3612 
3613 	tmp.extra1 = &min;
3614 	tmp.extra2 = NULL;
3615 
3616 	ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3617 	neigh_proc_update(ctl, write);
3618 	return ret;
3619 }
3620 
3621 int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer,
3622 			size_t *lenp, loff_t *ppos)
3623 {
3624 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3625 
3626 	neigh_proc_update(ctl, write);
3627 	return ret;
3628 }
3629 EXPORT_SYMBOL(neigh_proc_dointvec);
3630 
3631 int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer,
3632 				size_t *lenp, loff_t *ppos)
3633 {
3634 	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3635 
3636 	neigh_proc_update(ctl, write);
3637 	return ret;
3638 }
3639 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3640 
3641 static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table *ctl, int write,
3642 					      void *buffer, size_t *lenp,
3643 					      loff_t *ppos)
3644 {
3645 	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3646 
3647 	neigh_proc_update(ctl, write);
3648 	return ret;
3649 }
3650 
3651 int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
3652 				   void *buffer, size_t *lenp, loff_t *ppos)
3653 {
3654 	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3655 
3656 	neigh_proc_update(ctl, write);
3657 	return ret;
3658 }
3659 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3660 
3661 static int neigh_proc_dointvec_unres_qlen(const struct ctl_table *ctl, int write,
3662 					  void *buffer, size_t *lenp,
3663 					  loff_t *ppos)
3664 {
3665 	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3666 
3667 	neigh_proc_update(ctl, write);
3668 	return ret;
3669 }
3670 
3671 static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write,
3672 					  void *buffer, size_t *lenp,
3673 					  loff_t *ppos)
3674 {
3675 	struct neigh_parms *p = ctl->extra2;
3676 	int ret;
3677 
3678 	if (strcmp(ctl->procname, "base_reachable_time") == 0)
3679 		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3680 	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3681 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3682 	else
3683 		ret = -1;
3684 
3685 	if (write && ret == 0) {
3686 		/* update reachable_time as well, otherwise, the change will
3687 		 * only be effective after the next time neigh_periodic_work
3688 		 * decides to recompute it
3689 		 */
3690 		p->reachable_time =
3691 			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3692 	}
3693 	return ret;
3694 }
3695 
3696 #define NEIGH_PARMS_DATA_OFFSET(index)	\
3697 	(&((struct neigh_parms *) 0)->data[index])
3698 
3699 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3700 	[NEIGH_VAR_ ## attr] = { \
3701 		.procname	= name, \
3702 		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3703 		.maxlen		= sizeof(int), \
3704 		.mode		= mval, \
3705 		.proc_handler	= proc, \
3706 	}
3707 
3708 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3709 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3710 
3711 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3712 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3713 
3714 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3715 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3716 
3717 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3718 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3719 
3720 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3721 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3722 
3723 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3724 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3725 
3726 static struct neigh_sysctl_table {
3727 	struct ctl_table_header *sysctl_header;
3728 	struct ctl_table neigh_vars[NEIGH_VAR_MAX];
3729 } neigh_sysctl_template __read_mostly = {
3730 	.neigh_vars = {
3731 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3732 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3733 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3734 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3735 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3736 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3737 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3738 		NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3739 						       "interval_probe_time_ms"),
3740 		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3741 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3742 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3743 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3744 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3745 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3746 		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3747 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3748 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3749 		[NEIGH_VAR_GC_INTERVAL] = {
3750 			.procname	= "gc_interval",
3751 			.maxlen		= sizeof(int),
3752 			.mode		= 0644,
3753 			.proc_handler	= proc_dointvec_jiffies,
3754 		},
3755 		[NEIGH_VAR_GC_THRESH1] = {
3756 			.procname	= "gc_thresh1",
3757 			.maxlen		= sizeof(int),
3758 			.mode		= 0644,
3759 			.extra1		= SYSCTL_ZERO,
3760 			.extra2		= SYSCTL_INT_MAX,
3761 			.proc_handler	= proc_dointvec_minmax,
3762 		},
3763 		[NEIGH_VAR_GC_THRESH2] = {
3764 			.procname	= "gc_thresh2",
3765 			.maxlen		= sizeof(int),
3766 			.mode		= 0644,
3767 			.extra1		= SYSCTL_ZERO,
3768 			.extra2		= SYSCTL_INT_MAX,
3769 			.proc_handler	= proc_dointvec_minmax,
3770 		},
3771 		[NEIGH_VAR_GC_THRESH3] = {
3772 			.procname	= "gc_thresh3",
3773 			.maxlen		= sizeof(int),
3774 			.mode		= 0644,
3775 			.extra1		= SYSCTL_ZERO,
3776 			.extra2		= SYSCTL_INT_MAX,
3777 			.proc_handler	= proc_dointvec_minmax,
3778 		},
3779 	},
3780 };
3781 
3782 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3783 			  proc_handler *handler)
3784 {
3785 	int i;
3786 	struct neigh_sysctl_table *t;
3787 	const char *dev_name_source;
3788 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3789 	char *p_name;
3790 	size_t neigh_vars_size;
3791 
3792 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3793 	if (!t)
3794 		goto err;
3795 
3796 	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3797 		t->neigh_vars[i].data += (long) p;
3798 		t->neigh_vars[i].extra1 = dev;
3799 		t->neigh_vars[i].extra2 = p;
3800 	}
3801 
3802 	neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
3803 	if (dev) {
3804 		dev_name_source = dev->name;
3805 		/* Terminate the table early */
3806 		neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
3807 	} else {
3808 		struct neigh_table *tbl = p->tbl;
3809 		dev_name_source = "default";
3810 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3811 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3812 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3813 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3814 	}
3815 
3816 	if (handler) {
3817 		/* RetransTime */
3818 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3819 		/* ReachableTime */
3820 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3821 		/* RetransTime (in milliseconds)*/
3822 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3823 		/* ReachableTime (in milliseconds) */
3824 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3825 	} else {
3826 		/* Those handlers will update p->reachable_time after
3827 		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3828 		 * applied after the next neighbour update instead of waiting for
3829 		 * neigh_periodic_work to update its value (can be multiple minutes)
3830 		 * So any handler that replaces them should do this as well
3831 		 */
3832 		/* ReachableTime */
3833 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3834 			neigh_proc_base_reachable_time;
3835 		/* ReachableTime (in milliseconds) */
3836 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3837 			neigh_proc_base_reachable_time;
3838 	}
3839 
3840 	switch (neigh_parms_family(p)) {
3841 	case AF_INET:
3842 	      p_name = "ipv4";
3843 	      break;
3844 	case AF_INET6:
3845 	      p_name = "ipv6";
3846 	      break;
3847 	default:
3848 	      BUG();
3849 	}
3850 
3851 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3852 		p_name, dev_name_source);
3853 	t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
3854 						  neigh_path, t->neigh_vars,
3855 						  neigh_vars_size);
3856 	if (!t->sysctl_header)
3857 		goto free;
3858 
3859 	p->sysctl_table = t;
3860 	return 0;
3861 
3862 free:
3863 	kfree(t);
3864 err:
3865 	return -ENOBUFS;
3866 }
3867 EXPORT_SYMBOL(neigh_sysctl_register);
3868 
3869 void neigh_sysctl_unregister(struct neigh_parms *p)
3870 {
3871 	if (p->sysctl_table) {
3872 		struct neigh_sysctl_table *t = p->sysctl_table;
3873 		p->sysctl_table = NULL;
3874 		unregister_net_sysctl_table(t->sysctl_header);
3875 		kfree(t);
3876 	}
3877 }
3878 EXPORT_SYMBOL(neigh_sysctl_unregister);
3879 
3880 #endif	/* CONFIG_SYSCTL */
3881 
3882 static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
3883 	{.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
3884 	{.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
3885 	{.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
3886 	 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
3887 	{.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info},
3888 	{.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set},
3889 };
3890 
3891 static int __init neigh_init(void)
3892 {
3893 	rtnl_register_many(neigh_rtnl_msg_handlers);
3894 	return 0;
3895 }
3896 
3897 subsys_initcall(neigh_init);
3898