xref: /linux/net/core/neighbour.c (revision a00266969c8ecaa15d8170490e407131287d7a71)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Generic address resolution entity
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
8  *
9  *	Fixes:
10  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
11  *	Harald Welte		Add neighbour cache statistics like rtstat
12  */
13 
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 
16 #include <linux/slab.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/socket.h>
21 #include <linux/netdevice.h>
22 #include <linux/proc_fs.h>
23 #ifdef CONFIG_SYSCTL
24 #include <linux/sysctl.h>
25 #endif
26 #include <linux/times.h>
27 #include <net/net_namespace.h>
28 #include <net/neighbour.h>
29 #include <net/arp.h>
30 #include <net/dst.h>
31 #include <net/ip.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
41 
42 #include <trace/events/neigh.h>
43 
44 #define NEIGH_DEBUG 1
45 #define neigh_dbg(level, fmt, ...)		\
46 do {						\
47 	if (level <= NEIGH_DEBUG)		\
48 		pr_debug(fmt, ##__VA_ARGS__);	\
49 } while (0)
50 
51 #define PNEIGH_HASHMASK		0xF
52 
53 static void neigh_timer_handler(struct timer_list *t);
54 static void neigh_notify(struct neighbour *n, int type, int flags, u32 pid);
55 static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid);
56 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
57 			  bool skip_perm);
58 
59 #ifdef CONFIG_PROC_FS
60 static const struct seq_operations neigh_stat_seq_ops;
61 #endif
62 
63 static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
64 {
65 	int i;
66 
67 	switch (family) {
68 	default:
69 		DEBUG_NET_WARN_ON_ONCE(1);
70 		fallthrough; /* to avoid panic by null-ptr-deref */
71 	case AF_INET:
72 		i = NEIGH_ARP_TABLE;
73 		break;
74 	case AF_INET6:
75 		i = NEIGH_ND_TABLE;
76 		break;
77 	}
78 
79 	return &dev->neighbours[i];
80 }
81 
82 /*
83    Neighbour hash table buckets are protected with tbl->lock.
84 
85    - All the scans/updates to hash buckets MUST be made under this lock.
86    - NOTHING clever should be made under this lock: no callbacks
87      to protocol backends, no attempts to send something to network.
88      It will result in deadlocks, if backend/driver wants to use neighbour
89      cache.
90    - If the entry requires some non-trivial actions, increase
91      its reference count and release table lock.
92 
93    Neighbour entries are protected:
94    - with reference count.
95    - with rwlock neigh->lock
96 
97    Reference count prevents destruction.
98 
99    neigh->lock mainly serializes ll address data and its validity state.
100    However, the same lock is used to protect another entry fields:
101     - timer
102     - resolution queue
103 
104    Again, nothing clever shall be made under neigh->lock,
105    the most complicated procedure, which we allow is dev->hard_header.
106    It is supposed, that dev->hard_header is simplistic and does
107    not make callbacks to neighbour tables.
108  */
109 
110 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
111 {
112 	kfree_skb(skb);
113 	return -ENETDOWN;
114 }
115 
116 static void neigh_cleanup_and_release(struct neighbour *neigh)
117 {
118 	trace_neigh_cleanup_and_release(neigh, 0);
119 	neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
120 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
121 	neigh_release(neigh);
122 }
123 
124 /*
125  * It is random distribution in the interval (1/2)*base...(3/2)*base.
126  * It corresponds to default IPv6 settings and is not overridable,
127  * because it is really reasonable choice.
128  */
129 
130 unsigned long neigh_rand_reach_time(unsigned long base)
131 {
132 	return base ? get_random_u32_below(base) + (base >> 1) : 0;
133 }
134 EXPORT_SYMBOL(neigh_rand_reach_time);
135 
136 static void neigh_mark_dead(struct neighbour *n)
137 {
138 	n->dead = 1;
139 	if (!list_empty(&n->gc_list)) {
140 		list_del_init(&n->gc_list);
141 		atomic_dec(&n->tbl->gc_entries);
142 	}
143 	if (!list_empty(&n->managed_list))
144 		list_del_init(&n->managed_list);
145 }
146 
147 static void neigh_update_gc_list(struct neighbour *n)
148 {
149 	bool on_gc_list, exempt_from_gc;
150 
151 	spin_lock_bh(&n->tbl->lock);
152 	write_lock(&n->lock);
153 	if (n->dead)
154 		goto out;
155 
156 	/* remove from the gc list if new state is permanent or if neighbor is
157 	 * externally learned / validated; otherwise entry should be on the gc
158 	 * list
159 	 */
160 	exempt_from_gc = n->nud_state & NUD_PERMANENT ||
161 			 n->flags & (NTF_EXT_LEARNED | NTF_EXT_VALIDATED);
162 	on_gc_list = !list_empty(&n->gc_list);
163 
164 	if (exempt_from_gc && on_gc_list) {
165 		list_del_init(&n->gc_list);
166 		atomic_dec(&n->tbl->gc_entries);
167 	} else if (!exempt_from_gc && !on_gc_list) {
168 		/* add entries to the tail; cleaning removes from the front */
169 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
170 		atomic_inc(&n->tbl->gc_entries);
171 	}
172 out:
173 	write_unlock(&n->lock);
174 	spin_unlock_bh(&n->tbl->lock);
175 }
176 
177 static void neigh_update_managed_list(struct neighbour *n)
178 {
179 	bool on_managed_list, add_to_managed;
180 
181 	spin_lock_bh(&n->tbl->lock);
182 	write_lock(&n->lock);
183 	if (n->dead)
184 		goto out;
185 
186 	add_to_managed = n->flags & NTF_MANAGED;
187 	on_managed_list = !list_empty(&n->managed_list);
188 
189 	if (!add_to_managed && on_managed_list)
190 		list_del_init(&n->managed_list);
191 	else if (add_to_managed && !on_managed_list)
192 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
193 out:
194 	write_unlock(&n->lock);
195 	spin_unlock_bh(&n->tbl->lock);
196 }
197 
198 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
199 			       bool *gc_update, bool *managed_update)
200 {
201 	u32 ndm_flags, old_flags = neigh->flags;
202 
203 	if (!(flags & NEIGH_UPDATE_F_ADMIN))
204 		return;
205 
206 	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
207 	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
208 	ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_VALIDATED) ? NTF_EXT_VALIDATED : 0;
209 
210 	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
211 		if (ndm_flags & NTF_EXT_LEARNED)
212 			neigh->flags |= NTF_EXT_LEARNED;
213 		else
214 			neigh->flags &= ~NTF_EXT_LEARNED;
215 		*notify = 1;
216 		*gc_update = true;
217 	}
218 	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
219 		if (ndm_flags & NTF_MANAGED)
220 			neigh->flags |= NTF_MANAGED;
221 		else
222 			neigh->flags &= ~NTF_MANAGED;
223 		*notify = 1;
224 		*managed_update = true;
225 	}
226 	if ((old_flags ^ ndm_flags) & NTF_EXT_VALIDATED) {
227 		if (ndm_flags & NTF_EXT_VALIDATED)
228 			neigh->flags |= NTF_EXT_VALIDATED;
229 		else
230 			neigh->flags &= ~NTF_EXT_VALIDATED;
231 		*notify = 1;
232 		*gc_update = true;
233 	}
234 }
235 
236 bool neigh_remove_one(struct neighbour *n)
237 {
238 	bool retval = false;
239 
240 	write_lock(&n->lock);
241 	if (refcount_read(&n->refcnt) == 1) {
242 		hlist_del_rcu(&n->hash);
243 		hlist_del_rcu(&n->dev_list);
244 		neigh_mark_dead(n);
245 		retval = true;
246 	}
247 	write_unlock(&n->lock);
248 	if (retval)
249 		neigh_cleanup_and_release(n);
250 	return retval;
251 }
252 
253 static int neigh_forced_gc(struct neigh_table *tbl)
254 {
255 	int max_clean = atomic_read(&tbl->gc_entries) -
256 			READ_ONCE(tbl->gc_thresh2);
257 	u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
258 	unsigned long tref = jiffies - 5 * HZ;
259 	struct neighbour *n, *tmp;
260 	int shrunk = 0;
261 	int loop = 0;
262 
263 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
264 
265 	spin_lock_bh(&tbl->lock);
266 
267 	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
268 		if (refcount_read(&n->refcnt) == 1) {
269 			bool remove = false;
270 
271 			write_lock(&n->lock);
272 			if ((n->nud_state == NUD_FAILED) ||
273 			    (n->nud_state == NUD_NOARP) ||
274 			    (tbl->is_multicast &&
275 			     tbl->is_multicast(n->primary_key)) ||
276 			    !time_in_range(n->updated, tref, jiffies))
277 				remove = true;
278 			write_unlock(&n->lock);
279 
280 			if (remove && neigh_remove_one(n))
281 				shrunk++;
282 			if (shrunk >= max_clean)
283 				break;
284 			if (++loop == 16) {
285 				if (ktime_get_ns() > tmax)
286 					goto unlock;
287 				loop = 0;
288 			}
289 		}
290 	}
291 
292 	WRITE_ONCE(tbl->last_flush, jiffies);
293 unlock:
294 	spin_unlock_bh(&tbl->lock);
295 
296 	return shrunk;
297 }
298 
299 static void neigh_add_timer(struct neighbour *n, unsigned long when)
300 {
301 	/* Use safe distance from the jiffies - LONG_MAX point while timer
302 	 * is running in DELAY/PROBE state but still show to user space
303 	 * large times in the past.
304 	 */
305 	unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
306 
307 	neigh_hold(n);
308 	if (!time_in_range(n->confirmed, mint, jiffies))
309 		n->confirmed = mint;
310 	if (time_before(n->used, n->confirmed))
311 		n->used = n->confirmed;
312 	if (unlikely(mod_timer(&n->timer, when))) {
313 		printk("NEIGH: BUG, double timer add, state is %x\n",
314 		       n->nud_state);
315 		dump_stack();
316 	}
317 }
318 
319 static int neigh_del_timer(struct neighbour *n)
320 {
321 	if ((n->nud_state & NUD_IN_TIMER) &&
322 	    timer_delete(&n->timer)) {
323 		neigh_release(n);
324 		return 1;
325 	}
326 	return 0;
327 }
328 
329 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
330 						   int family)
331 {
332 	switch (family) {
333 	case AF_INET:
334 		return __in_dev_arp_parms_get_rcu(dev);
335 	case AF_INET6:
336 		return __in6_dev_nd_parms_get_rcu(dev);
337 	}
338 	return NULL;
339 }
340 
341 static void neigh_parms_qlen_dec(struct net_device *dev, int family)
342 {
343 	struct neigh_parms *p;
344 
345 	rcu_read_lock();
346 	p = neigh_get_dev_parms_rcu(dev, family);
347 	if (p)
348 		p->qlen--;
349 	rcu_read_unlock();
350 }
351 
352 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
353 			       int family)
354 {
355 	struct sk_buff_head tmp;
356 	unsigned long flags;
357 	struct sk_buff *skb;
358 
359 	skb_queue_head_init(&tmp);
360 	spin_lock_irqsave(&list->lock, flags);
361 	skb = skb_peek(list);
362 	while (skb != NULL) {
363 		struct sk_buff *skb_next = skb_peek_next(skb, list);
364 		struct net_device *dev = skb->dev;
365 
366 		if (net == NULL || net_eq(dev_net(dev), net)) {
367 			neigh_parms_qlen_dec(dev, family);
368 			__skb_unlink(skb, list);
369 			__skb_queue_tail(&tmp, skb);
370 		}
371 		skb = skb_next;
372 	}
373 	spin_unlock_irqrestore(&list->lock, flags);
374 
375 	while ((skb = __skb_dequeue(&tmp))) {
376 		dev_put(skb->dev);
377 		kfree_skb(skb);
378 	}
379 }
380 
381 static void neigh_flush_one(struct neighbour *n)
382 {
383 	hlist_del_rcu(&n->hash);
384 	hlist_del_rcu(&n->dev_list);
385 
386 	write_lock(&n->lock);
387 
388 	neigh_del_timer(n);
389 	neigh_mark_dead(n);
390 
391 	if (refcount_read(&n->refcnt) != 1) {
392 		/* The most unpleasant situation.
393 		 * We must destroy neighbour entry,
394 		 * but someone still uses it.
395 		 *
396 		 * The destroy will be delayed until
397 		 * the last user releases us, but
398 		 * we must kill timers etc. and move
399 		 * it to safe state.
400 		 */
401 		__skb_queue_purge(&n->arp_queue);
402 		n->arp_queue_len_bytes = 0;
403 		WRITE_ONCE(n->output, neigh_blackhole);
404 
405 		if (n->nud_state & NUD_VALID)
406 			n->nud_state = NUD_NOARP;
407 		else
408 			n->nud_state = NUD_NONE;
409 
410 		neigh_dbg(2, "neigh %p is stray\n", n);
411 	}
412 
413 	write_unlock(&n->lock);
414 
415 	neigh_cleanup_and_release(n);
416 }
417 
418 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
419 			    bool skip_perm)
420 {
421 	struct hlist_head *dev_head;
422 	struct hlist_node *tmp;
423 	struct neighbour *n;
424 
425 	dev_head = neigh_get_dev_table(dev, tbl->family);
426 
427 	hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
428 		if (skip_perm &&
429 		    (n->nud_state & NUD_PERMANENT ||
430 		     n->flags & NTF_EXT_VALIDATED))
431 			continue;
432 
433 		neigh_flush_one(n);
434 	}
435 }
436 
437 static void neigh_flush_table(struct neigh_table *tbl)
438 {
439 	struct neigh_hash_table *nht;
440 	int i;
441 
442 	nht = rcu_dereference_protected(tbl->nht,
443 					lockdep_is_held(&tbl->lock));
444 
445 	for (i = 0; i < (1 << nht->hash_shift); i++) {
446 		struct hlist_node *tmp;
447 		struct neighbour *n;
448 
449 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i])
450 			neigh_flush_one(n);
451 	}
452 }
453 
454 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
455 {
456 	spin_lock_bh(&tbl->lock);
457 	neigh_flush_dev(tbl, dev, false);
458 	spin_unlock_bh(&tbl->lock);
459 }
460 EXPORT_SYMBOL(neigh_changeaddr);
461 
462 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
463 			  bool skip_perm)
464 {
465 	spin_lock_bh(&tbl->lock);
466 	if (likely(dev)) {
467 		neigh_flush_dev(tbl, dev, skip_perm);
468 	} else {
469 		DEBUG_NET_WARN_ON_ONCE(skip_perm);
470 		neigh_flush_table(tbl);
471 	}
472 	spin_unlock_bh(&tbl->lock);
473 
474 	pneigh_ifdown(tbl, dev, skip_perm);
475 	pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
476 			   tbl->family);
477 	if (skb_queue_empty_lockless(&tbl->proxy_queue))
478 		timer_delete_sync(&tbl->proxy_timer);
479 	return 0;
480 }
481 
482 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
483 {
484 	__neigh_ifdown(tbl, dev, true);
485 	return 0;
486 }
487 EXPORT_SYMBOL(neigh_carrier_down);
488 
489 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
490 {
491 	__neigh_ifdown(tbl, dev, false);
492 	return 0;
493 }
494 EXPORT_SYMBOL(neigh_ifdown);
495 
496 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
497 				     struct net_device *dev,
498 				     u32 flags, bool exempt_from_gc)
499 {
500 	struct neighbour *n = NULL;
501 	unsigned long now = jiffies;
502 	int entries, gc_thresh3;
503 
504 	if (exempt_from_gc)
505 		goto do_alloc;
506 
507 	entries = atomic_inc_return(&tbl->gc_entries) - 1;
508 	gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
509 	if (entries >= gc_thresh3 ||
510 	    (entries >= READ_ONCE(tbl->gc_thresh2) &&
511 	     time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
512 		if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
513 			net_info_ratelimited("%s: neighbor table overflow!\n",
514 					     tbl->id);
515 			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
516 			goto out_entries;
517 		}
518 	}
519 
520 do_alloc:
521 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
522 	if (!n)
523 		goto out_entries;
524 
525 	__skb_queue_head_init(&n->arp_queue);
526 	rwlock_init(&n->lock);
527 	seqlock_init(&n->ha_lock);
528 	n->updated	  = n->used = now;
529 	n->nud_state	  = NUD_NONE;
530 	n->output	  = neigh_blackhole;
531 	n->flags	  = flags;
532 	seqlock_init(&n->hh.hh_lock);
533 	n->parms	  = neigh_parms_clone(&tbl->parms);
534 	timer_setup(&n->timer, neigh_timer_handler, 0);
535 
536 	NEIGH_CACHE_STAT_INC(tbl, allocs);
537 	n->tbl		  = tbl;
538 	refcount_set(&n->refcnt, 1);
539 	n->dead		  = 1;
540 	INIT_LIST_HEAD(&n->gc_list);
541 	INIT_LIST_HEAD(&n->managed_list);
542 
543 	atomic_inc(&tbl->entries);
544 out:
545 	return n;
546 
547 out_entries:
548 	if (!exempt_from_gc)
549 		atomic_dec(&tbl->gc_entries);
550 	goto out;
551 }
552 
553 static void neigh_get_hash_rnd(u32 *x)
554 {
555 	*x = get_random_u32() | 1;
556 }
557 
558 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
559 {
560 	size_t size = (1 << shift) * sizeof(struct hlist_head);
561 	struct hlist_head *hash_heads;
562 	struct neigh_hash_table *ret;
563 	int i;
564 
565 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
566 	if (!ret)
567 		return NULL;
568 
569 	hash_heads = kzalloc(size, GFP_ATOMIC);
570 	if (!hash_heads) {
571 		kfree(ret);
572 		return NULL;
573 	}
574 	ret->hash_heads = hash_heads;
575 	ret->hash_shift = shift;
576 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
577 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
578 	return ret;
579 }
580 
581 static void neigh_hash_free_rcu(struct rcu_head *head)
582 {
583 	struct neigh_hash_table *nht = container_of(head,
584 						    struct neigh_hash_table,
585 						    rcu);
586 
587 	kfree(nht->hash_heads);
588 	kfree(nht);
589 }
590 
591 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
592 						unsigned long new_shift)
593 {
594 	unsigned int i, hash;
595 	struct neigh_hash_table *new_nht, *old_nht;
596 
597 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
598 
599 	old_nht = rcu_dereference_protected(tbl->nht,
600 					    lockdep_is_held(&tbl->lock));
601 	new_nht = neigh_hash_alloc(new_shift);
602 	if (!new_nht)
603 		return old_nht;
604 
605 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
606 		struct hlist_node *tmp;
607 		struct neighbour *n;
608 
609 		neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
610 			hash = tbl->hash(n->primary_key, n->dev,
611 					 new_nht->hash_rnd);
612 
613 			hash >>= (32 - new_nht->hash_shift);
614 
615 			hlist_del_rcu(&n->hash);
616 			hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
617 		}
618 	}
619 
620 	rcu_assign_pointer(tbl->nht, new_nht);
621 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
622 	return new_nht;
623 }
624 
625 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
626 			       struct net_device *dev)
627 {
628 	struct neighbour *n;
629 
630 	NEIGH_CACHE_STAT_INC(tbl, lookups);
631 
632 	rcu_read_lock();
633 	n = __neigh_lookup_noref(tbl, pkey, dev);
634 	if (n) {
635 		if (!refcount_inc_not_zero(&n->refcnt))
636 			n = NULL;
637 		NEIGH_CACHE_STAT_INC(tbl, hits);
638 	}
639 
640 	rcu_read_unlock();
641 	return n;
642 }
643 EXPORT_SYMBOL(neigh_lookup);
644 
645 static struct neighbour *
646 ___neigh_create(struct neigh_table *tbl, const void *pkey,
647 		struct net_device *dev, u32 flags,
648 		bool exempt_from_gc, bool want_ref)
649 {
650 	u32 hash_val, key_len = tbl->key_len;
651 	struct neighbour *n1, *rc, *n;
652 	struct neigh_hash_table *nht;
653 	int error;
654 
655 	n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
656 	trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
657 	if (!n) {
658 		rc = ERR_PTR(-ENOBUFS);
659 		goto out;
660 	}
661 
662 	memcpy(n->primary_key, pkey, key_len);
663 	n->dev = dev;
664 	netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
665 
666 	/* Protocol specific setup. */
667 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
668 		rc = ERR_PTR(error);
669 		goto out_neigh_release;
670 	}
671 
672 	if (dev->netdev_ops->ndo_neigh_construct) {
673 		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
674 		if (error < 0) {
675 			rc = ERR_PTR(error);
676 			goto out_neigh_release;
677 		}
678 	}
679 
680 	/* Device specific setup. */
681 	if (n->parms->neigh_setup &&
682 	    (error = n->parms->neigh_setup(n)) < 0) {
683 		rc = ERR_PTR(error);
684 		goto out_neigh_release;
685 	}
686 
687 	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
688 
689 	spin_lock_bh(&tbl->lock);
690 	nht = rcu_dereference_protected(tbl->nht,
691 					lockdep_is_held(&tbl->lock));
692 
693 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
694 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
695 
696 	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
697 
698 	if (n->parms->dead) {
699 		rc = ERR_PTR(-EINVAL);
700 		goto out_tbl_unlock;
701 	}
702 
703 	neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
704 		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
705 			if (want_ref)
706 				neigh_hold(n1);
707 			rc = n1;
708 			goto out_tbl_unlock;
709 		}
710 	}
711 
712 	n->dead = 0;
713 	if (!exempt_from_gc)
714 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
715 	if (n->flags & NTF_MANAGED)
716 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
717 	if (want_ref)
718 		neigh_hold(n);
719 	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
720 
721 	hlist_add_head_rcu(&n->dev_list,
722 			   neigh_get_dev_table(dev, tbl->family));
723 
724 	spin_unlock_bh(&tbl->lock);
725 	neigh_dbg(2, "neigh %p is created\n", n);
726 	rc = n;
727 out:
728 	return rc;
729 out_tbl_unlock:
730 	spin_unlock_bh(&tbl->lock);
731 out_neigh_release:
732 	if (!exempt_from_gc)
733 		atomic_dec(&tbl->gc_entries);
734 	neigh_release(n);
735 	goto out;
736 }
737 
738 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
739 				 struct net_device *dev, bool want_ref)
740 {
741 	bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
742 
743 	return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
744 }
745 EXPORT_SYMBOL(__neigh_create);
746 
747 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
748 {
749 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
750 	hash_val ^= (hash_val >> 16);
751 	hash_val ^= hash_val >> 8;
752 	hash_val ^= hash_val >> 4;
753 	hash_val &= PNEIGH_HASHMASK;
754 	return hash_val;
755 }
756 
757 struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
758 				   struct net *net, const void *pkey,
759 				   struct net_device *dev)
760 {
761 	struct pneigh_entry *n;
762 	unsigned int key_len;
763 	u32 hash_val;
764 
765 	key_len = tbl->key_len;
766 	hash_val = pneigh_hash(pkey, key_len);
767 	n = rcu_dereference_check(tbl->phash_buckets[hash_val],
768 				  lockdep_is_held(&tbl->phash_lock));
769 
770 	while (n) {
771 		if (!memcmp(n->key, pkey, key_len) &&
772 		    net_eq(pneigh_net(n), net) &&
773 		    (n->dev == dev || !n->dev))
774 			return n;
775 
776 		n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
777 	}
778 
779 	return NULL;
780 }
781 EXPORT_IPV6_MOD(pneigh_lookup);
782 
783 int pneigh_create(struct neigh_table *tbl, struct net *net,
784 		  const void *pkey, struct net_device *dev,
785 		  u32 flags, u8 protocol, bool permanent)
786 {
787 	struct pneigh_entry *n;
788 	unsigned int key_len;
789 	u32 hash_val;
790 	int err = 0;
791 
792 	mutex_lock(&tbl->phash_lock);
793 
794 	n = pneigh_lookup(tbl, net, pkey, dev);
795 	if (n)
796 		goto update;
797 
798 	key_len = tbl->key_len;
799 	n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
800 	if (!n) {
801 		err = -ENOBUFS;
802 		goto out;
803 	}
804 
805 	write_pnet(&n->net, net);
806 	memcpy(n->key, pkey, key_len);
807 	n->dev = dev;
808 	netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
809 
810 	if (tbl->pconstructor && tbl->pconstructor(n)) {
811 		netdev_put(dev, &n->dev_tracker);
812 		kfree(n);
813 		err = -ENOBUFS;
814 		goto out;
815 	}
816 
817 	hash_val = pneigh_hash(pkey, key_len);
818 	n->next = tbl->phash_buckets[hash_val];
819 	rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
820 update:
821 	WRITE_ONCE(n->flags, flags);
822 	n->permanent = permanent;
823 	WRITE_ONCE(n->protocol, protocol);
824 out:
825 	mutex_unlock(&tbl->phash_lock);
826 	return err;
827 }
828 
829 static void pneigh_destroy(struct rcu_head *rcu)
830 {
831 	struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
832 
833 	netdev_put(n->dev, &n->dev_tracker);
834 	kfree(n);
835 }
836 
837 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
838 		  struct net_device *dev)
839 {
840 	struct pneigh_entry *n, __rcu **np;
841 	unsigned int key_len;
842 	u32 hash_val;
843 
844 	key_len = tbl->key_len;
845 	hash_val = pneigh_hash(pkey, key_len);
846 
847 	mutex_lock(&tbl->phash_lock);
848 
849 	for (np = &tbl->phash_buckets[hash_val];
850 	     (n = rcu_dereference_protected(*np, 1)) != NULL;
851 	     np = &n->next) {
852 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
853 		    net_eq(pneigh_net(n), net)) {
854 			rcu_assign_pointer(*np, n->next);
855 
856 			mutex_unlock(&tbl->phash_lock);
857 
858 			if (tbl->pdestructor)
859 				tbl->pdestructor(n);
860 
861 			call_rcu(&n->rcu, pneigh_destroy);
862 			return 0;
863 		}
864 	}
865 
866 	mutex_unlock(&tbl->phash_lock);
867 	return -ENOENT;
868 }
869 
870 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
871 			  bool skip_perm)
872 {
873 	struct pneigh_entry *n, __rcu **np;
874 	LIST_HEAD(head);
875 	u32 h;
876 
877 	mutex_lock(&tbl->phash_lock);
878 
879 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
880 		np = &tbl->phash_buckets[h];
881 		while ((n = rcu_dereference_protected(*np, 1)) != NULL) {
882 			if (skip_perm && n->permanent)
883 				goto skip;
884 			if (!dev || n->dev == dev) {
885 				rcu_assign_pointer(*np, n->next);
886 				list_add(&n->free_node, &head);
887 				continue;
888 			}
889 skip:
890 			np = &n->next;
891 		}
892 	}
893 
894 	mutex_unlock(&tbl->phash_lock);
895 
896 	while (!list_empty(&head)) {
897 		n = list_first_entry(&head, typeof(*n), free_node);
898 		list_del(&n->free_node);
899 
900 		if (tbl->pdestructor)
901 			tbl->pdestructor(n);
902 
903 		call_rcu(&n->rcu, pneigh_destroy);
904 	}
905 }
906 
907 static inline void neigh_parms_put(struct neigh_parms *parms)
908 {
909 	if (refcount_dec_and_test(&parms->refcnt))
910 		kfree(parms);
911 }
912 
913 /*
914  *	neighbour must already be out of the table;
915  *
916  */
917 void neigh_destroy(struct neighbour *neigh)
918 {
919 	struct net_device *dev = neigh->dev;
920 
921 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
922 
923 	if (!neigh->dead) {
924 		pr_warn("Destroying alive neighbour %p\n", neigh);
925 		dump_stack();
926 		return;
927 	}
928 
929 	if (neigh_del_timer(neigh))
930 		pr_warn("Impossible event\n");
931 
932 	write_lock_bh(&neigh->lock);
933 	__skb_queue_purge(&neigh->arp_queue);
934 	write_unlock_bh(&neigh->lock);
935 	neigh->arp_queue_len_bytes = 0;
936 
937 	if (dev->netdev_ops->ndo_neigh_destroy)
938 		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
939 
940 	netdev_put(dev, &neigh->dev_tracker);
941 	neigh_parms_put(neigh->parms);
942 
943 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
944 
945 	atomic_dec(&neigh->tbl->entries);
946 	kfree_rcu(neigh, rcu);
947 }
948 EXPORT_SYMBOL(neigh_destroy);
949 
950 /* Neighbour state is suspicious;
951    disable fast path.
952 
953    Called with write_locked neigh.
954  */
955 static void neigh_suspect(struct neighbour *neigh)
956 {
957 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
958 
959 	WRITE_ONCE(neigh->output, neigh->ops->output);
960 }
961 
962 /* Neighbour state is OK;
963    enable fast path.
964 
965    Called with write_locked neigh.
966  */
967 static void neigh_connect(struct neighbour *neigh)
968 {
969 	neigh_dbg(2, "neigh %p is connected\n", neigh);
970 
971 	WRITE_ONCE(neigh->output, neigh->ops->connected_output);
972 }
973 
974 static void neigh_periodic_work(struct work_struct *work)
975 {
976 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
977 	struct neigh_hash_table *nht;
978 	struct hlist_node *tmp;
979 	struct neighbour *n;
980 	unsigned int i;
981 
982 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
983 
984 	spin_lock_bh(&tbl->lock);
985 	nht = rcu_dereference_protected(tbl->nht,
986 					lockdep_is_held(&tbl->lock));
987 
988 	/*
989 	 *	periodically recompute ReachableTime from random function
990 	 */
991 
992 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
993 		struct neigh_parms *p;
994 
995 		WRITE_ONCE(tbl->last_rand, jiffies);
996 		list_for_each_entry(p, &tbl->parms_list, list)
997 			neigh_set_reach_time(p);
998 	}
999 
1000 	if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
1001 		goto out;
1002 
1003 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
1004 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
1005 			unsigned int state;
1006 
1007 			write_lock(&n->lock);
1008 
1009 			state = n->nud_state;
1010 			if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
1011 			    (n->flags &
1012 			     (NTF_EXT_LEARNED | NTF_EXT_VALIDATED))) {
1013 				write_unlock(&n->lock);
1014 				continue;
1015 			}
1016 
1017 			if (time_before(n->used, n->confirmed) &&
1018 			    time_is_before_eq_jiffies(n->confirmed))
1019 				n->used = n->confirmed;
1020 
1021 			if (refcount_read(&n->refcnt) == 1 &&
1022 			    (state == NUD_FAILED ||
1023 			     !time_in_range_open(jiffies, n->used,
1024 						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
1025 				hlist_del_rcu(&n->hash);
1026 				hlist_del_rcu(&n->dev_list);
1027 				neigh_mark_dead(n);
1028 				write_unlock(&n->lock);
1029 				neigh_cleanup_and_release(n);
1030 				continue;
1031 			}
1032 			write_unlock(&n->lock);
1033 		}
1034 		/*
1035 		 * It's fine to release lock here, even if hash table
1036 		 * grows while we are preempted.
1037 		 */
1038 		spin_unlock_bh(&tbl->lock);
1039 		cond_resched();
1040 		spin_lock_bh(&tbl->lock);
1041 		nht = rcu_dereference_protected(tbl->nht,
1042 						lockdep_is_held(&tbl->lock));
1043 	}
1044 out:
1045 	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
1046 	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
1047 	 * BASE_REACHABLE_TIME.
1048 	 */
1049 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1050 			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
1051 	spin_unlock_bh(&tbl->lock);
1052 }
1053 
1054 static __inline__ int neigh_max_probes(struct neighbour *n)
1055 {
1056 	struct neigh_parms *p = n->parms;
1057 	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1058 	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1059 	        NEIGH_VAR(p, MCAST_PROBES));
1060 }
1061 
1062 static void neigh_invalidate(struct neighbour *neigh)
1063 	__releases(neigh->lock)
1064 	__acquires(neigh->lock)
1065 {
1066 	struct sk_buff *skb;
1067 
1068 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1069 	neigh_dbg(2, "neigh %p is failed\n", neigh);
1070 	neigh->updated = jiffies;
1071 
1072 	/* It is very thin place. report_unreachable is very complicated
1073 	   routine. Particularly, it can hit the same neighbour entry!
1074 
1075 	   So that, we try to be accurate and avoid dead loop. --ANK
1076 	 */
1077 	while (neigh->nud_state == NUD_FAILED &&
1078 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1079 		write_unlock(&neigh->lock);
1080 		neigh->ops->error_report(neigh, skb);
1081 		write_lock(&neigh->lock);
1082 	}
1083 	__skb_queue_purge(&neigh->arp_queue);
1084 	neigh->arp_queue_len_bytes = 0;
1085 }
1086 
1087 static void neigh_probe(struct neighbour *neigh)
1088 	__releases(neigh->lock)
1089 {
1090 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1091 	/* keep skb alive even if arp_queue overflows */
1092 	if (skb)
1093 		skb = skb_clone(skb, GFP_ATOMIC);
1094 	write_unlock(&neigh->lock);
1095 	if (neigh->ops->solicit)
1096 		neigh->ops->solicit(neigh, skb);
1097 	atomic_inc(&neigh->probes);
1098 	consume_skb(skb);
1099 }
1100 
1101 /* Called when a timer expires for a neighbour entry. */
1102 
1103 static void neigh_timer_handler(struct timer_list *t)
1104 {
1105 	unsigned long now, next;
1106 	struct neighbour *neigh = timer_container_of(neigh, t, timer);
1107 	bool skip_probe = false;
1108 	unsigned int state;
1109 	int notify = 0;
1110 
1111 	write_lock(&neigh->lock);
1112 
1113 	state = neigh->nud_state;
1114 	now = jiffies;
1115 	next = now + HZ;
1116 
1117 	if (!(state & NUD_IN_TIMER))
1118 		goto out;
1119 
1120 	if (state & NUD_REACHABLE) {
1121 		if (time_before_eq(now,
1122 				   neigh->confirmed + neigh->parms->reachable_time)) {
1123 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
1124 			next = neigh->confirmed + neigh->parms->reachable_time;
1125 		} else if (time_before_eq(now,
1126 					  neigh->used +
1127 					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1128 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
1129 			WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1130 			neigh->updated = jiffies;
1131 			neigh_suspect(neigh);
1132 			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1133 		} else {
1134 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
1135 			WRITE_ONCE(neigh->nud_state, NUD_STALE);
1136 			neigh->updated = jiffies;
1137 			neigh_suspect(neigh);
1138 			notify = 1;
1139 		}
1140 	} else if (state & NUD_DELAY) {
1141 		if (time_before_eq(now,
1142 				   neigh->confirmed +
1143 				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1144 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1145 			WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
1146 			neigh->updated = jiffies;
1147 			neigh_connect(neigh);
1148 			notify = 1;
1149 			next = neigh->confirmed + neigh->parms->reachable_time;
1150 		} else {
1151 			neigh_dbg(2, "neigh %p is probed\n", neigh);
1152 			WRITE_ONCE(neigh->nud_state, NUD_PROBE);
1153 			neigh->updated = jiffies;
1154 			atomic_set(&neigh->probes, 0);
1155 			notify = 1;
1156 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1157 					 HZ/100);
1158 		}
1159 	} else {
1160 		/* NUD_PROBE|NUD_INCOMPLETE */
1161 		next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1162 	}
1163 
1164 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1165 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1166 		if (neigh->nud_state == NUD_PROBE &&
1167 		    neigh->flags & NTF_EXT_VALIDATED) {
1168 			WRITE_ONCE(neigh->nud_state, NUD_STALE);
1169 			neigh->updated = jiffies;
1170 		} else {
1171 			WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1172 			neigh_invalidate(neigh);
1173 		}
1174 		notify = 1;
1175 		skip_probe = true;
1176 	}
1177 
1178 	if (notify)
1179 		__neigh_notify(neigh, RTM_NEWNEIGH, 0, 0);
1180 
1181 	if (skip_probe)
1182 		goto out;
1183 
1184 	if (neigh->nud_state & NUD_IN_TIMER) {
1185 		if (time_before(next, jiffies + HZ/100))
1186 			next = jiffies + HZ/100;
1187 		if (!mod_timer(&neigh->timer, next))
1188 			neigh_hold(neigh);
1189 	}
1190 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1191 		neigh_probe(neigh);
1192 	} else {
1193 out:
1194 		write_unlock(&neigh->lock);
1195 	}
1196 
1197 	if (notify)
1198 		call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
1199 
1200 	trace_neigh_timer_handler(neigh, 0);
1201 
1202 	neigh_release(neigh);
1203 }
1204 
1205 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1206 		       const bool immediate_ok)
1207 {
1208 	int rc;
1209 	bool immediate_probe = false;
1210 
1211 	write_lock_bh(&neigh->lock);
1212 
1213 	rc = 0;
1214 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1215 		goto out_unlock_bh;
1216 	if (neigh->dead)
1217 		goto out_dead;
1218 
1219 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1220 		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1221 		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1222 			unsigned long next, now = jiffies;
1223 
1224 			atomic_set(&neigh->probes,
1225 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1226 			neigh_del_timer(neigh);
1227 			WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1228 			neigh->updated = now;
1229 			if (!immediate_ok) {
1230 				next = now + 1;
1231 			} else {
1232 				immediate_probe = true;
1233 				next = now + max(NEIGH_VAR(neigh->parms,
1234 							   RETRANS_TIME),
1235 						 HZ / 100);
1236 			}
1237 			neigh_add_timer(neigh, next);
1238 		} else {
1239 			WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1240 			neigh->updated = jiffies;
1241 			write_unlock_bh(&neigh->lock);
1242 
1243 			kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1244 			return 1;
1245 		}
1246 	} else if (neigh->nud_state & NUD_STALE) {
1247 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
1248 		neigh_del_timer(neigh);
1249 		WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1250 		neigh->updated = jiffies;
1251 		neigh_add_timer(neigh, jiffies +
1252 				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1253 	}
1254 
1255 	if (neigh->nud_state == NUD_INCOMPLETE) {
1256 		if (skb) {
1257 			while (neigh->arp_queue_len_bytes + skb->truesize >
1258 			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1259 				struct sk_buff *buff;
1260 
1261 				buff = __skb_dequeue(&neigh->arp_queue);
1262 				if (!buff)
1263 					break;
1264 				neigh->arp_queue_len_bytes -= buff->truesize;
1265 				kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1266 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1267 			}
1268 			skb_dst_force(skb);
1269 			__skb_queue_tail(&neigh->arp_queue, skb);
1270 			neigh->arp_queue_len_bytes += skb->truesize;
1271 		}
1272 		rc = 1;
1273 	}
1274 out_unlock_bh:
1275 	if (immediate_probe)
1276 		neigh_probe(neigh);
1277 	else
1278 		write_unlock(&neigh->lock);
1279 	local_bh_enable();
1280 	trace_neigh_event_send_done(neigh, rc);
1281 	return rc;
1282 
1283 out_dead:
1284 	if (neigh->nud_state & NUD_STALE)
1285 		goto out_unlock_bh;
1286 	write_unlock_bh(&neigh->lock);
1287 	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1288 	trace_neigh_event_send_dead(neigh, 1);
1289 	return 1;
1290 }
1291 EXPORT_SYMBOL(__neigh_event_send);
1292 
1293 static void neigh_update_hhs(struct neighbour *neigh)
1294 {
1295 	struct hh_cache *hh;
1296 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1297 		= NULL;
1298 
1299 	if (neigh->dev->header_ops)
1300 		update = neigh->dev->header_ops->cache_update;
1301 
1302 	if (update) {
1303 		hh = &neigh->hh;
1304 		if (READ_ONCE(hh->hh_len)) {
1305 			write_seqlock_bh(&hh->hh_lock);
1306 			update(hh, neigh->dev, neigh->ha);
1307 			write_sequnlock_bh(&hh->hh_lock);
1308 		}
1309 	}
1310 }
1311 
1312 static void neigh_update_process_arp_queue(struct neighbour *neigh)
1313 	__releases(neigh->lock)
1314 	__acquires(neigh->lock)
1315 {
1316 	struct sk_buff *skb;
1317 
1318 	/* Again: avoid deadlock if something went wrong. */
1319 	while (neigh->nud_state & NUD_VALID &&
1320 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1321 		struct dst_entry *dst = skb_dst(skb);
1322 		struct neighbour *n2, *n1 = neigh;
1323 
1324 		write_unlock_bh(&neigh->lock);
1325 
1326 		rcu_read_lock();
1327 
1328 		/* Why not just use 'neigh' as-is?  The problem is that
1329 		 * things such as shaper, eql, and sch_teql can end up
1330 		 * using alternative, different, neigh objects to output
1331 		 * the packet in the output path.  So what we need to do
1332 		 * here is re-lookup the top-level neigh in the path so
1333 		 * we can reinject the packet there.
1334 		 */
1335 		n2 = NULL;
1336 		if (dst &&
1337 		    READ_ONCE(dst->obsolete) != DST_OBSOLETE_DEAD) {
1338 			n2 = dst_neigh_lookup_skb(dst, skb);
1339 			if (n2)
1340 				n1 = n2;
1341 		}
1342 		READ_ONCE(n1->output)(n1, skb);
1343 		if (n2)
1344 			neigh_release(n2);
1345 		rcu_read_unlock();
1346 
1347 		write_lock_bh(&neigh->lock);
1348 	}
1349 	__skb_queue_purge(&neigh->arp_queue);
1350 	neigh->arp_queue_len_bytes = 0;
1351 }
1352 
1353 /* Generic update routine.
1354    -- lladdr is new lladdr or NULL, if it is not supplied.
1355    -- new    is new state.
1356    -- flags
1357 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1358 				if it is different.
1359 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1360 				lladdr instead of overriding it
1361 				if it is different.
1362 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1363 	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
1364 	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
1365 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1366 				NTF_ROUTER flag.
1367 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1368 				a router.
1369 	NEIGH_UPDATE_F_EXT_VALIDATED means that the entry will not be removed
1370 				or invalidated.
1371 
1372    Caller MUST hold reference count on the entry.
1373  */
1374 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1375 			  u8 new, u32 flags, u32 nlmsg_pid,
1376 			  struct netlink_ext_ack *extack)
1377 {
1378 	bool gc_update = false, managed_update = false;
1379 	bool process_arp_queue = false;
1380 	int update_isrouter = 0;
1381 	struct net_device *dev;
1382 	int err, notify = 0;
1383 	u8 old;
1384 
1385 	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1386 
1387 	write_lock_bh(&neigh->lock);
1388 
1389 	dev    = neigh->dev;
1390 	old    = neigh->nud_state;
1391 	err    = -EPERM;
1392 
1393 	if (neigh->dead) {
1394 		NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1395 		new = old;
1396 		goto out;
1397 	}
1398 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1399 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1400 		goto out;
1401 
1402 	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
1403 	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1404 		new = old & ~NUD_PERMANENT;
1405 		WRITE_ONCE(neigh->nud_state, new);
1406 		err = 0;
1407 		goto out;
1408 	}
1409 
1410 	if (!(new & NUD_VALID)) {
1411 		neigh_del_timer(neigh);
1412 		if (old & NUD_CONNECTED)
1413 			neigh_suspect(neigh);
1414 		WRITE_ONCE(neigh->nud_state, new);
1415 		err = 0;
1416 		notify = old & NUD_VALID;
1417 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1418 		    (new & NUD_FAILED)) {
1419 			neigh_invalidate(neigh);
1420 			notify = 1;
1421 		}
1422 		goto out;
1423 	}
1424 
1425 	/* Compare new lladdr with cached one */
1426 	if (!dev->addr_len) {
1427 		/* First case: device needs no address. */
1428 		lladdr = neigh->ha;
1429 	} else if (lladdr) {
1430 		/* The second case: if something is already cached
1431 		   and a new address is proposed:
1432 		   - compare new & old
1433 		   - if they are different, check override flag
1434 		 */
1435 		if ((old & NUD_VALID) &&
1436 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1437 			lladdr = neigh->ha;
1438 	} else {
1439 		/* No address is supplied; if we know something,
1440 		   use it, otherwise discard the request.
1441 		 */
1442 		err = -EINVAL;
1443 		if (!(old & NUD_VALID)) {
1444 			NL_SET_ERR_MSG(extack, "No link layer address given");
1445 			goto out;
1446 		}
1447 		lladdr = neigh->ha;
1448 	}
1449 
1450 	/* Update confirmed timestamp for neighbour entry after we
1451 	 * received ARP packet even if it doesn't change IP to MAC binding.
1452 	 */
1453 	if (new & NUD_CONNECTED)
1454 		neigh->confirmed = jiffies;
1455 
1456 	/* If entry was valid and address is not changed,
1457 	   do not change entry state, if new one is STALE.
1458 	 */
1459 	err = 0;
1460 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1461 	if (old & NUD_VALID) {
1462 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1463 			update_isrouter = 0;
1464 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1465 			    (old & NUD_CONNECTED)) {
1466 				lladdr = neigh->ha;
1467 				new = NUD_STALE;
1468 			} else
1469 				goto out;
1470 		} else {
1471 			if (lladdr == neigh->ha && new == NUD_STALE &&
1472 			    !(flags & NEIGH_UPDATE_F_ADMIN))
1473 				new = old;
1474 		}
1475 	}
1476 
1477 	/* Update timestamp only once we know we will make a change to the
1478 	 * neighbour entry. Otherwise we risk to move the locktime window with
1479 	 * noop updates and ignore relevant ARP updates.
1480 	 */
1481 	if (new != old || lladdr != neigh->ha)
1482 		neigh->updated = jiffies;
1483 
1484 	if (new != old) {
1485 		neigh_del_timer(neigh);
1486 		if (new & NUD_PROBE)
1487 			atomic_set(&neigh->probes, 0);
1488 		if (new & NUD_IN_TIMER)
1489 			neigh_add_timer(neigh, (jiffies +
1490 						((new & NUD_REACHABLE) ?
1491 						 neigh->parms->reachable_time :
1492 						 0)));
1493 		WRITE_ONCE(neigh->nud_state, new);
1494 		notify = 1;
1495 	}
1496 
1497 	if (lladdr != neigh->ha) {
1498 		write_seqlock(&neigh->ha_lock);
1499 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1500 		write_sequnlock(&neigh->ha_lock);
1501 		neigh_update_hhs(neigh);
1502 		if (!(new & NUD_CONNECTED))
1503 			neigh->confirmed = jiffies -
1504 				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1505 		notify = 1;
1506 	}
1507 	if (new == old)
1508 		goto out;
1509 	if (new & NUD_CONNECTED)
1510 		neigh_connect(neigh);
1511 	else
1512 		neigh_suspect(neigh);
1513 
1514 	if (!(old & NUD_VALID))
1515 		process_arp_queue = true;
1516 
1517 out:
1518 	if (update_isrouter)
1519 		neigh_update_is_router(neigh, flags, &notify);
1520 
1521 	if (notify)
1522 		__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
1523 
1524 	if (process_arp_queue)
1525 		neigh_update_process_arp_queue(neigh);
1526 
1527 	write_unlock_bh(&neigh->lock);
1528 
1529 	if (((new ^ old) & NUD_PERMANENT) || gc_update)
1530 		neigh_update_gc_list(neigh);
1531 	if (managed_update)
1532 		neigh_update_managed_list(neigh);
1533 
1534 	if (notify)
1535 		call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
1536 
1537 	trace_neigh_update_done(neigh, err);
1538 	return err;
1539 }
1540 
1541 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1542 		 u32 flags, u32 nlmsg_pid)
1543 {
1544 	return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1545 }
1546 EXPORT_SYMBOL(neigh_update);
1547 
1548 /* Update the neigh to listen temporarily for probe responses, even if it is
1549  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1550  */
1551 void __neigh_set_probe_once(struct neighbour *neigh)
1552 {
1553 	if (neigh->dead)
1554 		return;
1555 	neigh->updated = jiffies;
1556 	if (!(neigh->nud_state & NUD_FAILED))
1557 		return;
1558 	WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1559 	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1560 	neigh_add_timer(neigh,
1561 			jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1562 				      HZ/100));
1563 }
1564 EXPORT_SYMBOL(__neigh_set_probe_once);
1565 
1566 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1567 				 u8 *lladdr, void *saddr,
1568 				 struct net_device *dev)
1569 {
1570 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1571 						 lladdr || !dev->addr_len);
1572 	if (neigh)
1573 		neigh_update(neigh, lladdr, NUD_STALE,
1574 			     NEIGH_UPDATE_F_OVERRIDE, 0);
1575 	return neigh;
1576 }
1577 EXPORT_SYMBOL(neigh_event_ns);
1578 
1579 /* called with read_lock_bh(&n->lock); */
1580 static void neigh_hh_init(struct neighbour *n)
1581 {
1582 	struct net_device *dev = n->dev;
1583 	__be16 prot = n->tbl->protocol;
1584 	struct hh_cache	*hh = &n->hh;
1585 
1586 	write_lock_bh(&n->lock);
1587 
1588 	/* Only one thread can come in here and initialize the
1589 	 * hh_cache entry.
1590 	 */
1591 	if (!hh->hh_len)
1592 		dev->header_ops->cache(n, hh, prot);
1593 
1594 	write_unlock_bh(&n->lock);
1595 }
1596 
1597 /* Slow and careful. */
1598 
1599 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1600 {
1601 	int rc = 0;
1602 
1603 	if (!neigh_event_send(neigh, skb)) {
1604 		int err;
1605 		struct net_device *dev = neigh->dev;
1606 		unsigned int seq;
1607 
1608 		if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1609 			neigh_hh_init(neigh);
1610 
1611 		do {
1612 			__skb_pull(skb, skb_network_offset(skb));
1613 			seq = read_seqbegin(&neigh->ha_lock);
1614 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1615 					      neigh->ha, NULL, skb->len);
1616 		} while (read_seqretry(&neigh->ha_lock, seq));
1617 
1618 		if (err >= 0)
1619 			rc = dev_queue_xmit(skb);
1620 		else
1621 			goto out_kfree_skb;
1622 	}
1623 out:
1624 	return rc;
1625 out_kfree_skb:
1626 	rc = -EINVAL;
1627 	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1628 	goto out;
1629 }
1630 EXPORT_SYMBOL(neigh_resolve_output);
1631 
1632 /* As fast as possible without hh cache */
1633 
1634 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1635 {
1636 	struct net_device *dev = neigh->dev;
1637 	unsigned int seq;
1638 	int err;
1639 
1640 	do {
1641 		__skb_pull(skb, skb_network_offset(skb));
1642 		seq = read_seqbegin(&neigh->ha_lock);
1643 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1644 				      neigh->ha, NULL, skb->len);
1645 	} while (read_seqretry(&neigh->ha_lock, seq));
1646 
1647 	if (err >= 0)
1648 		err = dev_queue_xmit(skb);
1649 	else {
1650 		err = -EINVAL;
1651 		kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1652 	}
1653 	return err;
1654 }
1655 EXPORT_SYMBOL(neigh_connected_output);
1656 
1657 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1658 {
1659 	return dev_queue_xmit(skb);
1660 }
1661 EXPORT_SYMBOL(neigh_direct_output);
1662 
1663 static void neigh_managed_work(struct work_struct *work)
1664 {
1665 	struct neigh_table *tbl = container_of(work, struct neigh_table,
1666 					       managed_work.work);
1667 	struct neighbour *neigh;
1668 
1669 	spin_lock_bh(&tbl->lock);
1670 	list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1671 		neigh_event_send_probe(neigh, NULL, false);
1672 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1673 			   NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1674 	spin_unlock_bh(&tbl->lock);
1675 }
1676 
1677 static void neigh_proxy_process(struct timer_list *t)
1678 {
1679 	struct neigh_table *tbl = timer_container_of(tbl, t, proxy_timer);
1680 	long sched_next = 0;
1681 	unsigned long now = jiffies;
1682 	struct sk_buff *skb, *n;
1683 
1684 	spin_lock(&tbl->proxy_queue.lock);
1685 
1686 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1687 		long tdif = NEIGH_CB(skb)->sched_next - now;
1688 
1689 		if (tdif <= 0) {
1690 			struct net_device *dev = skb->dev;
1691 
1692 			neigh_parms_qlen_dec(dev, tbl->family);
1693 			__skb_unlink(skb, &tbl->proxy_queue);
1694 
1695 			if (tbl->proxy_redo && netif_running(dev)) {
1696 				rcu_read_lock();
1697 				tbl->proxy_redo(skb);
1698 				rcu_read_unlock();
1699 			} else {
1700 				kfree_skb(skb);
1701 			}
1702 
1703 			dev_put(dev);
1704 		} else if (!sched_next || tdif < sched_next)
1705 			sched_next = tdif;
1706 	}
1707 	timer_delete(&tbl->proxy_timer);
1708 	if (sched_next)
1709 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1710 	spin_unlock(&tbl->proxy_queue.lock);
1711 }
1712 
1713 static unsigned long neigh_proxy_delay(struct neigh_parms *p)
1714 {
1715 	/* If proxy_delay is zero, do not call get_random_u32_below()
1716 	 * as it is undefined behavior.
1717 	 */
1718 	unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
1719 
1720 	return proxy_delay ?
1721 	       jiffies + get_random_u32_below(proxy_delay) : jiffies;
1722 }
1723 
1724 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1725 		    struct sk_buff *skb)
1726 {
1727 	unsigned long sched_next = neigh_proxy_delay(p);
1728 
1729 	if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1730 		kfree_skb(skb);
1731 		return;
1732 	}
1733 
1734 	NEIGH_CB(skb)->sched_next = sched_next;
1735 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1736 
1737 	spin_lock(&tbl->proxy_queue.lock);
1738 	if (timer_delete(&tbl->proxy_timer)) {
1739 		if (time_before(tbl->proxy_timer.expires, sched_next))
1740 			sched_next = tbl->proxy_timer.expires;
1741 	}
1742 	skb_dst_drop(skb);
1743 	dev_hold(skb->dev);
1744 	__skb_queue_tail(&tbl->proxy_queue, skb);
1745 	p->qlen++;
1746 	mod_timer(&tbl->proxy_timer, sched_next);
1747 	spin_unlock(&tbl->proxy_queue.lock);
1748 }
1749 EXPORT_SYMBOL(pneigh_enqueue);
1750 
1751 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1752 						      struct net *net, int ifindex)
1753 {
1754 	struct neigh_parms *p;
1755 
1756 	list_for_each_entry(p, &tbl->parms_list, list) {
1757 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1758 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1759 			return p;
1760 	}
1761 
1762 	return NULL;
1763 }
1764 
1765 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1766 				      struct neigh_table *tbl)
1767 {
1768 	struct neigh_parms *p;
1769 	struct net *net = dev_net(dev);
1770 	const struct net_device_ops *ops = dev->netdev_ops;
1771 
1772 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1773 	if (p) {
1774 		p->tbl		  = tbl;
1775 		refcount_set(&p->refcnt, 1);
1776 		neigh_set_reach_time(p);
1777 		p->qlen = 0;
1778 		netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1779 		p->dev = dev;
1780 		write_pnet(&p->net, net);
1781 		p->sysctl_table = NULL;
1782 
1783 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1784 			netdev_put(dev, &p->dev_tracker);
1785 			kfree(p);
1786 			return NULL;
1787 		}
1788 
1789 		spin_lock_bh(&tbl->lock);
1790 		list_add_rcu(&p->list, &tbl->parms.list);
1791 		spin_unlock_bh(&tbl->lock);
1792 
1793 		neigh_parms_data_state_cleanall(p);
1794 	}
1795 	return p;
1796 }
1797 EXPORT_SYMBOL(neigh_parms_alloc);
1798 
1799 static void neigh_rcu_free_parms(struct rcu_head *head)
1800 {
1801 	struct neigh_parms *parms =
1802 		container_of(head, struct neigh_parms, rcu_head);
1803 
1804 	neigh_parms_put(parms);
1805 }
1806 
1807 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1808 {
1809 	if (!parms || parms == &tbl->parms)
1810 		return;
1811 
1812 	spin_lock_bh(&tbl->lock);
1813 	list_del_rcu(&parms->list);
1814 	parms->dead = 1;
1815 	spin_unlock_bh(&tbl->lock);
1816 
1817 	netdev_put(parms->dev, &parms->dev_tracker);
1818 	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1819 }
1820 EXPORT_SYMBOL(neigh_parms_release);
1821 
1822 static struct lock_class_key neigh_table_proxy_queue_class;
1823 
1824 static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1825 
1826 void neigh_table_init(int index, struct neigh_table *tbl)
1827 {
1828 	unsigned long now = jiffies;
1829 	unsigned long phsize;
1830 
1831 	INIT_LIST_HEAD(&tbl->parms_list);
1832 	INIT_LIST_HEAD(&tbl->gc_list);
1833 	INIT_LIST_HEAD(&tbl->managed_list);
1834 
1835 	list_add(&tbl->parms.list, &tbl->parms_list);
1836 	write_pnet(&tbl->parms.net, &init_net);
1837 	refcount_set(&tbl->parms.refcnt, 1);
1838 	neigh_set_reach_time(&tbl->parms);
1839 	tbl->parms.qlen = 0;
1840 
1841 	tbl->stats = alloc_percpu(struct neigh_statistics);
1842 	if (!tbl->stats)
1843 		panic("cannot create neighbour cache statistics");
1844 
1845 #ifdef CONFIG_PROC_FS
1846 	if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1847 			      &neigh_stat_seq_ops, tbl))
1848 		panic("cannot create neighbour proc dir entry");
1849 #endif
1850 
1851 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1852 
1853 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1854 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1855 
1856 	if (!tbl->nht || !tbl->phash_buckets)
1857 		panic("cannot allocate neighbour cache hashes");
1858 
1859 	if (!tbl->entry_size)
1860 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1861 					tbl->key_len, NEIGH_PRIV_ALIGN);
1862 	else
1863 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1864 
1865 	spin_lock_init(&tbl->lock);
1866 	mutex_init(&tbl->phash_lock);
1867 
1868 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1869 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1870 			tbl->parms.reachable_time);
1871 	INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1872 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1873 
1874 	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1875 	skb_queue_head_init_class(&tbl->proxy_queue,
1876 			&neigh_table_proxy_queue_class);
1877 
1878 	tbl->last_flush = now;
1879 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1880 
1881 	rcu_assign_pointer(neigh_tables[index], tbl);
1882 }
1883 EXPORT_SYMBOL(neigh_table_init);
1884 
1885 /*
1886  * Only called from ndisc_cleanup(), which means this is dead code
1887  * because we no longer can unload IPv6 module.
1888  */
1889 int neigh_table_clear(int index, struct neigh_table *tbl)
1890 {
1891 	RCU_INIT_POINTER(neigh_tables[index], NULL);
1892 	synchronize_rcu();
1893 
1894 	/* It is not clean... Fix it to unload IPv6 module safely */
1895 	cancel_delayed_work_sync(&tbl->managed_work);
1896 	cancel_delayed_work_sync(&tbl->gc_work);
1897 	timer_delete_sync(&tbl->proxy_timer);
1898 	pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
1899 	neigh_ifdown(tbl, NULL);
1900 	if (atomic_read(&tbl->entries))
1901 		pr_crit("neighbour leakage\n");
1902 
1903 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1904 		 neigh_hash_free_rcu);
1905 	tbl->nht = NULL;
1906 
1907 	kfree(tbl->phash_buckets);
1908 	tbl->phash_buckets = NULL;
1909 
1910 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1911 
1912 	free_percpu(tbl->stats);
1913 	tbl->stats = NULL;
1914 
1915 	return 0;
1916 }
1917 EXPORT_SYMBOL(neigh_table_clear);
1918 
1919 static struct neigh_table *neigh_find_table(int family)
1920 {
1921 	struct neigh_table *tbl = NULL;
1922 
1923 	switch (family) {
1924 	case AF_INET:
1925 		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
1926 		break;
1927 	case AF_INET6:
1928 		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
1929 		break;
1930 	}
1931 
1932 	return tbl;
1933 }
1934 
1935 const struct nla_policy nda_policy[NDA_MAX+1] = {
1936 	[NDA_UNSPEC]		= { .strict_start_type = NDA_NH_ID },
1937 	[NDA_DST]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1938 	[NDA_LLADDR]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1939 	[NDA_CACHEINFO]		= { .len = sizeof(struct nda_cacheinfo) },
1940 	[NDA_PROBES]		= { .type = NLA_U32 },
1941 	[NDA_VLAN]		= { .type = NLA_U16 },
1942 	[NDA_PORT]		= { .type = NLA_U16 },
1943 	[NDA_VNI]		= { .type = NLA_U32 },
1944 	[NDA_IFINDEX]		= { .type = NLA_U32 },
1945 	[NDA_MASTER]		= { .type = NLA_U32 },
1946 	[NDA_PROTOCOL]		= { .type = NLA_U8 },
1947 	[NDA_NH_ID]		= { .type = NLA_U32 },
1948 	[NDA_FLAGS_EXT]		= NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1949 	[NDA_FDB_EXT_ATTRS]	= { .type = NLA_NESTED },
1950 };
1951 
1952 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1953 			struct netlink_ext_ack *extack)
1954 {
1955 	struct net *net = sock_net(skb->sk);
1956 	struct ndmsg *ndm;
1957 	struct nlattr *dst_attr;
1958 	struct neigh_table *tbl;
1959 	struct neighbour *neigh;
1960 	struct net_device *dev = NULL;
1961 	int err = -EINVAL;
1962 
1963 	ASSERT_RTNL();
1964 	if (nlmsg_len(nlh) < sizeof(*ndm))
1965 		goto out;
1966 
1967 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1968 	if (!dst_attr) {
1969 		NL_SET_ERR_MSG(extack, "Network address not specified");
1970 		goto out;
1971 	}
1972 
1973 	ndm = nlmsg_data(nlh);
1974 	if (ndm->ndm_ifindex) {
1975 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1976 		if (dev == NULL) {
1977 			err = -ENODEV;
1978 			goto out;
1979 		}
1980 	}
1981 
1982 	tbl = neigh_find_table(ndm->ndm_family);
1983 	if (tbl == NULL)
1984 		return -EAFNOSUPPORT;
1985 
1986 	if (nla_len(dst_attr) < (int)tbl->key_len) {
1987 		NL_SET_ERR_MSG(extack, "Invalid network address");
1988 		goto out;
1989 	}
1990 
1991 	if (ndm->ndm_flags & NTF_PROXY) {
1992 		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1993 		goto out;
1994 	}
1995 
1996 	if (dev == NULL)
1997 		goto out;
1998 
1999 	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
2000 	if (neigh == NULL) {
2001 		err = -ENOENT;
2002 		goto out;
2003 	}
2004 
2005 	err = __neigh_update(neigh, NULL, NUD_FAILED,
2006 			     NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
2007 			     NETLINK_CB(skb).portid, extack);
2008 	spin_lock_bh(&tbl->lock);
2009 	neigh_release(neigh);
2010 	neigh_remove_one(neigh);
2011 	spin_unlock_bh(&tbl->lock);
2012 
2013 out:
2014 	return err;
2015 }
2016 
2017 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
2018 		     struct netlink_ext_ack *extack)
2019 {
2020 	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
2021 		    NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
2022 	struct net *net = sock_net(skb->sk);
2023 	struct ndmsg *ndm;
2024 	struct nlattr *tb[NDA_MAX+1];
2025 	struct neigh_table *tbl;
2026 	struct net_device *dev = NULL;
2027 	struct neighbour *neigh;
2028 	void *dst, *lladdr;
2029 	u8 protocol = 0;
2030 	u32 ndm_flags;
2031 	int err;
2032 
2033 	ASSERT_RTNL();
2034 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
2035 				     nda_policy, extack);
2036 	if (err < 0)
2037 		goto out;
2038 
2039 	err = -EINVAL;
2040 	if (!tb[NDA_DST]) {
2041 		NL_SET_ERR_MSG(extack, "Network address not specified");
2042 		goto out;
2043 	}
2044 
2045 	ndm = nlmsg_data(nlh);
2046 	ndm_flags = ndm->ndm_flags;
2047 	if (tb[NDA_FLAGS_EXT]) {
2048 		u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
2049 
2050 		BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
2051 			     (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
2052 			      hweight32(NTF_EXT_MASK)));
2053 		ndm_flags |= (ext << NTF_EXT_SHIFT);
2054 	}
2055 	if (ndm->ndm_ifindex) {
2056 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
2057 		if (dev == NULL) {
2058 			err = -ENODEV;
2059 			goto out;
2060 		}
2061 
2062 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
2063 			NL_SET_ERR_MSG(extack, "Invalid link address");
2064 			goto out;
2065 		}
2066 	}
2067 
2068 	tbl = neigh_find_table(ndm->ndm_family);
2069 	if (tbl == NULL)
2070 		return -EAFNOSUPPORT;
2071 
2072 	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
2073 		NL_SET_ERR_MSG(extack, "Invalid network address");
2074 		goto out;
2075 	}
2076 
2077 	dst = nla_data(tb[NDA_DST]);
2078 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
2079 
2080 	if (tb[NDA_PROTOCOL])
2081 		protocol = nla_get_u8(tb[NDA_PROTOCOL]);
2082 	if (ndm_flags & NTF_PROXY) {
2083 		if (ndm_flags & (NTF_MANAGED | NTF_EXT_VALIDATED)) {
2084 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
2085 			goto out;
2086 		}
2087 
2088 		err = pneigh_create(tbl, net, dst, dev, ndm_flags, protocol,
2089 				    !!(ndm->ndm_state & NUD_PERMANENT));
2090 		goto out;
2091 	}
2092 
2093 	if (!dev) {
2094 		NL_SET_ERR_MSG(extack, "Device not specified");
2095 		goto out;
2096 	}
2097 
2098 	if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2099 		err = -EINVAL;
2100 		goto out;
2101 	}
2102 
2103 	neigh = neigh_lookup(tbl, dst, dev);
2104 	if (neigh == NULL) {
2105 		bool ndm_permanent  = ndm->ndm_state & NUD_PERMANENT;
2106 		bool exempt_from_gc = ndm_permanent ||
2107 				      ndm_flags & (NTF_EXT_LEARNED |
2108 						   NTF_EXT_VALIDATED);
2109 
2110 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2111 			err = -ENOENT;
2112 			goto out;
2113 		}
2114 		if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2115 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2116 			err = -EINVAL;
2117 			goto out;
2118 		}
2119 		if (ndm_flags & NTF_EXT_VALIDATED) {
2120 			u8 state = ndm->ndm_state;
2121 
2122 			/* NTF_USE and NTF_MANAGED will result in the neighbor
2123 			 * being created with an invalid state (NUD_NONE).
2124 			 */
2125 			if (ndm_flags & (NTF_USE | NTF_MANAGED))
2126 				state = NUD_NONE;
2127 
2128 			if (!(state & NUD_VALID)) {
2129 				NL_SET_ERR_MSG(extack,
2130 					       "Cannot create externally validated neighbor with an invalid state");
2131 				err = -EINVAL;
2132 				goto out;
2133 			}
2134 		}
2135 
2136 		neigh = ___neigh_create(tbl, dst, dev,
2137 					ndm_flags &
2138 					(NTF_EXT_LEARNED | NTF_MANAGED |
2139 					 NTF_EXT_VALIDATED),
2140 					exempt_from_gc, true);
2141 		if (IS_ERR(neigh)) {
2142 			err = PTR_ERR(neigh);
2143 			goto out;
2144 		}
2145 	} else {
2146 		if (nlh->nlmsg_flags & NLM_F_EXCL) {
2147 			err = -EEXIST;
2148 			neigh_release(neigh);
2149 			goto out;
2150 		}
2151 		if (ndm_flags & NTF_EXT_VALIDATED) {
2152 			u8 state = ndm->ndm_state;
2153 
2154 			/* NTF_USE and NTF_MANAGED do not update the existing
2155 			 * state other than clearing it if it was
2156 			 * NUD_PERMANENT.
2157 			 */
2158 			if (ndm_flags & (NTF_USE | NTF_MANAGED))
2159 				state = READ_ONCE(neigh->nud_state) & ~NUD_PERMANENT;
2160 
2161 			if (!(state & NUD_VALID)) {
2162 				NL_SET_ERR_MSG(extack,
2163 					       "Cannot mark neighbor as externally validated with an invalid state");
2164 				err = -EINVAL;
2165 				neigh_release(neigh);
2166 				goto out;
2167 			}
2168 		}
2169 
2170 		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2171 			flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2172 				   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2173 	}
2174 
2175 	if (protocol)
2176 		neigh->protocol = protocol;
2177 	if (ndm_flags & NTF_EXT_LEARNED)
2178 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2179 	if (ndm_flags & NTF_ROUTER)
2180 		flags |= NEIGH_UPDATE_F_ISROUTER;
2181 	if (ndm_flags & NTF_MANAGED)
2182 		flags |= NEIGH_UPDATE_F_MANAGED;
2183 	if (ndm_flags & NTF_USE)
2184 		flags |= NEIGH_UPDATE_F_USE;
2185 	if (ndm_flags & NTF_EXT_VALIDATED)
2186 		flags |= NEIGH_UPDATE_F_EXT_VALIDATED;
2187 
2188 	err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2189 			     NETLINK_CB(skb).portid, extack);
2190 	if (!err && ndm_flags & (NTF_USE | NTF_MANAGED))
2191 		neigh_event_send(neigh, NULL);
2192 	neigh_release(neigh);
2193 out:
2194 	return err;
2195 }
2196 
2197 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2198 {
2199 	struct nlattr *nest;
2200 
2201 	nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2202 	if (nest == NULL)
2203 		return -ENOBUFS;
2204 
2205 	if ((parms->dev &&
2206 	     nla_put_u32(skb, NDTPA_IFINDEX, READ_ONCE(parms->dev->ifindex))) ||
2207 	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2208 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2209 			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2210 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
2211 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
2212 			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2213 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2214 	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2215 	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
2216 			NEIGH_VAR(parms, UCAST_PROBES)) ||
2217 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
2218 			NEIGH_VAR(parms, MCAST_PROBES)) ||
2219 	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2220 			NEIGH_VAR(parms, MCAST_REPROBES)) ||
2221 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, READ_ONCE(parms->reachable_time),
2222 			  NDTPA_PAD) ||
2223 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2224 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2225 	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
2226 			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2227 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2228 			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2229 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2230 			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2231 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2232 			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2233 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2234 			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2235 	    nla_put_msecs(skb, NDTPA_LOCKTIME,
2236 			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2237 	    nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2238 			  NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2239 		goto nla_put_failure;
2240 	return nla_nest_end(skb, nest);
2241 
2242 nla_put_failure:
2243 	nla_nest_cancel(skb, nest);
2244 	return -EMSGSIZE;
2245 }
2246 
2247 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2248 			      u32 pid, u32 seq, int type, int flags)
2249 {
2250 	struct nlmsghdr *nlh;
2251 	struct ndtmsg *ndtmsg;
2252 
2253 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2254 	if (nlh == NULL)
2255 		return -EMSGSIZE;
2256 
2257 	ndtmsg = nlmsg_data(nlh);
2258 	ndtmsg->ndtm_family = tbl->family;
2259 	ndtmsg->ndtm_pad1   = 0;
2260 	ndtmsg->ndtm_pad2   = 0;
2261 
2262 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2263 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
2264 			  NDTA_PAD) ||
2265 	    nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
2266 	    nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
2267 	    nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
2268 		goto nla_put_failure;
2269 	{
2270 		unsigned long now = jiffies;
2271 		long flush_delta = now - READ_ONCE(tbl->last_flush);
2272 		long rand_delta = now - READ_ONCE(tbl->last_rand);
2273 		struct neigh_hash_table *nht;
2274 		struct ndt_config ndc = {
2275 			.ndtc_key_len		= tbl->key_len,
2276 			.ndtc_entry_size	= tbl->entry_size,
2277 			.ndtc_entries		= atomic_read(&tbl->entries),
2278 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
2279 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
2280 			.ndtc_proxy_qlen	= READ_ONCE(tbl->proxy_queue.qlen),
2281 		};
2282 
2283 		nht = rcu_dereference(tbl->nht);
2284 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2285 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2286 
2287 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2288 			goto nla_put_failure;
2289 	}
2290 
2291 	{
2292 		int cpu;
2293 		struct ndt_stats ndst;
2294 
2295 		memset(&ndst, 0, sizeof(ndst));
2296 
2297 		for_each_possible_cpu(cpu) {
2298 			struct neigh_statistics	*st;
2299 
2300 			st = per_cpu_ptr(tbl->stats, cpu);
2301 			ndst.ndts_allocs		+= READ_ONCE(st->allocs);
2302 			ndst.ndts_destroys		+= READ_ONCE(st->destroys);
2303 			ndst.ndts_hash_grows		+= READ_ONCE(st->hash_grows);
2304 			ndst.ndts_res_failed		+= READ_ONCE(st->res_failed);
2305 			ndst.ndts_lookups		+= READ_ONCE(st->lookups);
2306 			ndst.ndts_hits			+= READ_ONCE(st->hits);
2307 			ndst.ndts_rcv_probes_mcast	+= READ_ONCE(st->rcv_probes_mcast);
2308 			ndst.ndts_rcv_probes_ucast	+= READ_ONCE(st->rcv_probes_ucast);
2309 			ndst.ndts_periodic_gc_runs	+= READ_ONCE(st->periodic_gc_runs);
2310 			ndst.ndts_forced_gc_runs	+= READ_ONCE(st->forced_gc_runs);
2311 			ndst.ndts_table_fulls		+= READ_ONCE(st->table_fulls);
2312 		}
2313 
2314 		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2315 				  NDTA_PAD))
2316 			goto nla_put_failure;
2317 	}
2318 
2319 	BUG_ON(tbl->parms.dev);
2320 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2321 		goto nla_put_failure;
2322 
2323 	nlmsg_end(skb, nlh);
2324 	return 0;
2325 
2326 nla_put_failure:
2327 	nlmsg_cancel(skb, nlh);
2328 	return -EMSGSIZE;
2329 }
2330 
2331 static int neightbl_fill_param_info(struct sk_buff *skb,
2332 				    struct neigh_table *tbl,
2333 				    struct neigh_parms *parms,
2334 				    u32 pid, u32 seq, int type,
2335 				    unsigned int flags)
2336 {
2337 	struct ndtmsg *ndtmsg;
2338 	struct nlmsghdr *nlh;
2339 
2340 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2341 	if (nlh == NULL)
2342 		return -EMSGSIZE;
2343 
2344 	ndtmsg = nlmsg_data(nlh);
2345 	ndtmsg->ndtm_family = tbl->family;
2346 	ndtmsg->ndtm_pad1   = 0;
2347 	ndtmsg->ndtm_pad2   = 0;
2348 
2349 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2350 	    neightbl_fill_parms(skb, parms) < 0)
2351 		goto errout;
2352 
2353 	nlmsg_end(skb, nlh);
2354 	return 0;
2355 errout:
2356 	nlmsg_cancel(skb, nlh);
2357 	return -EMSGSIZE;
2358 }
2359 
2360 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2361 	[NDTA_NAME]		= { .type = NLA_STRING },
2362 	[NDTA_THRESH1]		= { .type = NLA_U32 },
2363 	[NDTA_THRESH2]		= { .type = NLA_U32 },
2364 	[NDTA_THRESH3]		= { .type = NLA_U32 },
2365 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
2366 	[NDTA_PARMS]		= { .type = NLA_NESTED },
2367 };
2368 
2369 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2370 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
2371 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
2372 	[NDTPA_QUEUE_LENBYTES]		= { .type = NLA_U32 },
2373 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
2374 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
2375 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
2376 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
2377 	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
2378 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
2379 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
2380 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
2381 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
2382 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
2383 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
2384 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
2385 	[NDTPA_INTERVAL_PROBE_TIME_MS]	= { .type = NLA_U64, .min = 1 },
2386 };
2387 
2388 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2389 			struct netlink_ext_ack *extack)
2390 {
2391 	struct net *net = sock_net(skb->sk);
2392 	struct nlattr *tb[NDTA_MAX + 1];
2393 	struct neigh_table *tbl;
2394 	struct ndtmsg *ndtmsg;
2395 	bool found = false;
2396 	int err, tidx;
2397 
2398 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2399 				     nl_neightbl_policy, extack);
2400 	if (err < 0)
2401 		goto errout;
2402 
2403 	if (tb[NDTA_NAME] == NULL) {
2404 		err = -EINVAL;
2405 		goto errout;
2406 	}
2407 
2408 	ndtmsg = nlmsg_data(nlh);
2409 
2410 	rcu_read_lock();
2411 
2412 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2413 		tbl = rcu_dereference(neigh_tables[tidx]);
2414 		if (!tbl)
2415 			continue;
2416 
2417 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2418 			continue;
2419 
2420 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2421 			found = true;
2422 			break;
2423 		}
2424 	}
2425 
2426 	if (!found) {
2427 		rcu_read_unlock();
2428 		err = -ENOENT;
2429 		goto errout;
2430 	}
2431 
2432 	/*
2433 	 * We acquire tbl->lock to be nice to the periodic timers and
2434 	 * make sure they always see a consistent set of values.
2435 	 */
2436 	spin_lock_bh(&tbl->lock);
2437 
2438 	if (tb[NDTA_PARMS]) {
2439 		struct nlattr *tbp[NDTPA_MAX+1];
2440 		struct neigh_parms *p;
2441 		int i, ifindex = 0;
2442 
2443 		err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2444 						  tb[NDTA_PARMS],
2445 						  nl_ntbl_parm_policy, extack);
2446 		if (err < 0)
2447 			goto errout_tbl_lock;
2448 
2449 		if (tbp[NDTPA_IFINDEX])
2450 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2451 
2452 		p = lookup_neigh_parms(tbl, net, ifindex);
2453 		if (p == NULL) {
2454 			err = -ENOENT;
2455 			goto errout_tbl_lock;
2456 		}
2457 
2458 		for (i = 1; i <= NDTPA_MAX; i++) {
2459 			if (tbp[i] == NULL)
2460 				continue;
2461 
2462 			switch (i) {
2463 			case NDTPA_QUEUE_LEN:
2464 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2465 					      nla_get_u32(tbp[i]) *
2466 					      SKB_TRUESIZE(ETH_FRAME_LEN));
2467 				break;
2468 			case NDTPA_QUEUE_LENBYTES:
2469 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2470 					      nla_get_u32(tbp[i]));
2471 				break;
2472 			case NDTPA_PROXY_QLEN:
2473 				NEIGH_VAR_SET(p, PROXY_QLEN,
2474 					      nla_get_u32(tbp[i]));
2475 				break;
2476 			case NDTPA_APP_PROBES:
2477 				NEIGH_VAR_SET(p, APP_PROBES,
2478 					      nla_get_u32(tbp[i]));
2479 				break;
2480 			case NDTPA_UCAST_PROBES:
2481 				NEIGH_VAR_SET(p, UCAST_PROBES,
2482 					      nla_get_u32(tbp[i]));
2483 				break;
2484 			case NDTPA_MCAST_PROBES:
2485 				NEIGH_VAR_SET(p, MCAST_PROBES,
2486 					      nla_get_u32(tbp[i]));
2487 				break;
2488 			case NDTPA_MCAST_REPROBES:
2489 				NEIGH_VAR_SET(p, MCAST_REPROBES,
2490 					      nla_get_u32(tbp[i]));
2491 				break;
2492 			case NDTPA_BASE_REACHABLE_TIME:
2493 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2494 					      nla_get_msecs(tbp[i]));
2495 				/* update reachable_time as well, otherwise, the change will
2496 				 * only be effective after the next time neigh_periodic_work
2497 				 * decides to recompute it (can be multiple minutes)
2498 				 */
2499 				neigh_set_reach_time(p);
2500 				break;
2501 			case NDTPA_GC_STALETIME:
2502 				NEIGH_VAR_SET(p, GC_STALETIME,
2503 					      nla_get_msecs(tbp[i]));
2504 				break;
2505 			case NDTPA_DELAY_PROBE_TIME:
2506 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2507 					      nla_get_msecs(tbp[i]));
2508 				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2509 				break;
2510 			case NDTPA_INTERVAL_PROBE_TIME_MS:
2511 				NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2512 					      nla_get_msecs(tbp[i]));
2513 				break;
2514 			case NDTPA_RETRANS_TIME:
2515 				NEIGH_VAR_SET(p, RETRANS_TIME,
2516 					      nla_get_msecs(tbp[i]));
2517 				break;
2518 			case NDTPA_ANYCAST_DELAY:
2519 				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2520 					      nla_get_msecs(tbp[i]));
2521 				break;
2522 			case NDTPA_PROXY_DELAY:
2523 				NEIGH_VAR_SET(p, PROXY_DELAY,
2524 					      nla_get_msecs(tbp[i]));
2525 				break;
2526 			case NDTPA_LOCKTIME:
2527 				NEIGH_VAR_SET(p, LOCKTIME,
2528 					      nla_get_msecs(tbp[i]));
2529 				break;
2530 			}
2531 		}
2532 	}
2533 
2534 	err = -ENOENT;
2535 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2536 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2537 	    !net_eq(net, &init_net))
2538 		goto errout_tbl_lock;
2539 
2540 	if (tb[NDTA_THRESH1])
2541 		WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
2542 
2543 	if (tb[NDTA_THRESH2])
2544 		WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
2545 
2546 	if (tb[NDTA_THRESH3])
2547 		WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
2548 
2549 	if (tb[NDTA_GC_INTERVAL])
2550 		WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
2551 
2552 	err = 0;
2553 
2554 errout_tbl_lock:
2555 	spin_unlock_bh(&tbl->lock);
2556 	rcu_read_unlock();
2557 errout:
2558 	return err;
2559 }
2560 
2561 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2562 				    struct netlink_ext_ack *extack)
2563 {
2564 	struct ndtmsg *ndtm;
2565 
2566 	ndtm = nlmsg_payload(nlh, sizeof(*ndtm));
2567 	if (!ndtm) {
2568 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2569 		return -EINVAL;
2570 	}
2571 
2572 	if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2573 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2574 		return -EINVAL;
2575 	}
2576 
2577 	if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2578 		NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2579 		return -EINVAL;
2580 	}
2581 
2582 	return 0;
2583 }
2584 
2585 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2586 {
2587 	const struct nlmsghdr *nlh = cb->nlh;
2588 	struct net *net = sock_net(skb->sk);
2589 	int family, tidx, nidx = 0;
2590 	int tbl_skip = cb->args[0];
2591 	int neigh_skip = cb->args[1];
2592 	struct neigh_table *tbl;
2593 
2594 	if (cb->strict_check) {
2595 		int err = neightbl_valid_dump_info(nlh, cb->extack);
2596 
2597 		if (err < 0)
2598 			return err;
2599 	}
2600 
2601 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2602 
2603 	rcu_read_lock();
2604 
2605 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2606 		struct neigh_parms *p;
2607 
2608 		tbl = rcu_dereference(neigh_tables[tidx]);
2609 		if (!tbl)
2610 			continue;
2611 
2612 		if (tidx < tbl_skip || (family && tbl->family != family))
2613 			continue;
2614 
2615 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2616 				       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2617 				       NLM_F_MULTI) < 0)
2618 			break;
2619 
2620 		nidx = 0;
2621 		p = list_next_entry(&tbl->parms, list);
2622 		list_for_each_entry_from_rcu(p, &tbl->parms_list, list) {
2623 			if (!net_eq(neigh_parms_net(p), net))
2624 				continue;
2625 
2626 			if (nidx < neigh_skip)
2627 				goto next;
2628 
2629 			if (neightbl_fill_param_info(skb, tbl, p,
2630 						     NETLINK_CB(cb->skb).portid,
2631 						     nlh->nlmsg_seq,
2632 						     RTM_NEWNEIGHTBL,
2633 						     NLM_F_MULTI) < 0)
2634 				goto out;
2635 		next:
2636 			nidx++;
2637 		}
2638 
2639 		neigh_skip = 0;
2640 	}
2641 out:
2642 	rcu_read_unlock();
2643 
2644 	cb->args[0] = tidx;
2645 	cb->args[1] = nidx;
2646 
2647 	return skb->len;
2648 }
2649 
2650 static int __neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2651 			     u32 pid, u32 seq, int type, unsigned int flags)
2652 {
2653 	u32 neigh_flags, neigh_flags_ext;
2654 	unsigned long now = jiffies;
2655 	struct nda_cacheinfo ci;
2656 	struct nlmsghdr *nlh;
2657 	struct ndmsg *ndm;
2658 
2659 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2660 	if (nlh == NULL)
2661 		return -EMSGSIZE;
2662 
2663 	neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2664 	neigh_flags     = neigh->flags & NTF_OLD_MASK;
2665 
2666 	ndm = nlmsg_data(nlh);
2667 	ndm->ndm_family	 = neigh->ops->family;
2668 	ndm->ndm_pad1    = 0;
2669 	ndm->ndm_pad2    = 0;
2670 	ndm->ndm_flags	 = neigh_flags;
2671 	ndm->ndm_type	 = neigh->type;
2672 	ndm->ndm_ifindex = neigh->dev->ifindex;
2673 
2674 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2675 		goto nla_put_failure;
2676 
2677 	ndm->ndm_state	 = neigh->nud_state;
2678 	if (neigh->nud_state & NUD_VALID) {
2679 		char haddr[MAX_ADDR_LEN];
2680 
2681 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2682 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0)
2683 			goto nla_put_failure;
2684 	}
2685 
2686 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2687 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2688 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2689 	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
2690 
2691 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2692 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2693 		goto nla_put_failure;
2694 
2695 	if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2696 		goto nla_put_failure;
2697 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2698 		goto nla_put_failure;
2699 
2700 	nlmsg_end(skb, nlh);
2701 	return 0;
2702 
2703 nla_put_failure:
2704 	nlmsg_cancel(skb, nlh);
2705 	return -EMSGSIZE;
2706 }
2707 
2708 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2709 			   u32 pid, u32 seq, int type, unsigned int flags)
2710 	__releases(neigh->lock)
2711 	__acquires(neigh->lock)
2712 {
2713 	int err;
2714 
2715 	read_lock_bh(&neigh->lock);
2716 	err = __neigh_fill_info(skb, neigh, pid, seq, type, flags);
2717 	read_unlock_bh(&neigh->lock);
2718 
2719 	return err;
2720 }
2721 
2722 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2723 			    u32 pid, u32 seq, int type, unsigned int flags,
2724 			    struct neigh_table *tbl)
2725 {
2726 	u32 neigh_flags, neigh_flags_ext;
2727 	struct nlmsghdr *nlh;
2728 	struct ndmsg *ndm;
2729 	u8 protocol;
2730 
2731 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2732 	if (nlh == NULL)
2733 		return -EMSGSIZE;
2734 
2735 	neigh_flags = READ_ONCE(pn->flags);
2736 	neigh_flags_ext = neigh_flags >> NTF_EXT_SHIFT;
2737 	neigh_flags &= NTF_OLD_MASK;
2738 
2739 	ndm = nlmsg_data(nlh);
2740 	ndm->ndm_family	 = tbl->family;
2741 	ndm->ndm_pad1    = 0;
2742 	ndm->ndm_pad2    = 0;
2743 	ndm->ndm_flags	 = neigh_flags | NTF_PROXY;
2744 	ndm->ndm_type	 = RTN_UNICAST;
2745 	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2746 	ndm->ndm_state	 = NUD_NONE;
2747 
2748 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2749 		goto nla_put_failure;
2750 
2751 	protocol = READ_ONCE(pn->protocol);
2752 	if (protocol && nla_put_u8(skb, NDA_PROTOCOL, protocol))
2753 		goto nla_put_failure;
2754 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2755 		goto nla_put_failure;
2756 
2757 	nlmsg_end(skb, nlh);
2758 	return 0;
2759 
2760 nla_put_failure:
2761 	nlmsg_cancel(skb, nlh);
2762 	return -EMSGSIZE;
2763 }
2764 
2765 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2766 {
2767 	struct net_device *master;
2768 
2769 	if (!master_idx)
2770 		return false;
2771 
2772 	master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
2773 
2774 	/* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2775 	 * invalid value for ifindex to denote "no master".
2776 	 */
2777 	if (master_idx == -1)
2778 		return !!master;
2779 
2780 	if (!master || master->ifindex != master_idx)
2781 		return true;
2782 
2783 	return false;
2784 }
2785 
2786 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2787 {
2788 	if (filter_idx && (!dev || dev->ifindex != filter_idx))
2789 		return true;
2790 
2791 	return false;
2792 }
2793 
2794 struct neigh_dump_filter {
2795 	int master_idx;
2796 	int dev_idx;
2797 };
2798 
2799 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2800 			    struct netlink_callback *cb,
2801 			    struct neigh_dump_filter *filter)
2802 {
2803 	struct net *net = sock_net(skb->sk);
2804 	struct neighbour *n;
2805 	int err = 0, h, s_h = cb->args[1];
2806 	int idx, s_idx = idx = cb->args[2];
2807 	struct neigh_hash_table *nht;
2808 	unsigned int flags = NLM_F_MULTI;
2809 
2810 	if (filter->dev_idx || filter->master_idx)
2811 		flags |= NLM_F_DUMP_FILTERED;
2812 
2813 	nht = rcu_dereference(tbl->nht);
2814 
2815 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2816 		if (h > s_h)
2817 			s_idx = 0;
2818 		idx = 0;
2819 		neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
2820 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2821 				goto next;
2822 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2823 			    neigh_master_filtered(n->dev, filter->master_idx))
2824 				goto next;
2825 			err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2826 					      cb->nlh->nlmsg_seq,
2827 					      RTM_NEWNEIGH, flags);
2828 			if (err < 0)
2829 				goto out;
2830 next:
2831 			idx++;
2832 		}
2833 	}
2834 out:
2835 	cb->args[1] = h;
2836 	cb->args[2] = idx;
2837 	return err;
2838 }
2839 
2840 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2841 			     struct netlink_callback *cb,
2842 			     struct neigh_dump_filter *filter)
2843 {
2844 	struct pneigh_entry *n;
2845 	struct net *net = sock_net(skb->sk);
2846 	int err = 0, h, s_h = cb->args[3];
2847 	int idx, s_idx = idx = cb->args[4];
2848 	unsigned int flags = NLM_F_MULTI;
2849 
2850 	if (filter->dev_idx || filter->master_idx)
2851 		flags |= NLM_F_DUMP_FILTERED;
2852 
2853 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2854 		if (h > s_h)
2855 			s_idx = 0;
2856 		for (n = rcu_dereference(tbl->phash_buckets[h]), idx = 0;
2857 		     n;
2858 		     n = rcu_dereference(n->next)) {
2859 			if (idx < s_idx || pneigh_net(n) != net)
2860 				goto next;
2861 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2862 			    neigh_master_filtered(n->dev, filter->master_idx))
2863 				goto next;
2864 			err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2865 					       cb->nlh->nlmsg_seq,
2866 					       RTM_NEWNEIGH, flags, tbl);
2867 			if (err < 0)
2868 				goto out;
2869 		next:
2870 			idx++;
2871 		}
2872 	}
2873 
2874 out:
2875 	cb->args[3] = h;
2876 	cb->args[4] = idx;
2877 	return err;
2878 }
2879 
2880 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2881 				bool strict_check,
2882 				struct neigh_dump_filter *filter,
2883 				struct netlink_ext_ack *extack)
2884 {
2885 	struct nlattr *tb[NDA_MAX + 1];
2886 	int err, i;
2887 
2888 	if (strict_check) {
2889 		struct ndmsg *ndm;
2890 
2891 		ndm = nlmsg_payload(nlh, sizeof(*ndm));
2892 		if (!ndm) {
2893 			NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2894 			return -EINVAL;
2895 		}
2896 
2897 		if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2898 		    ndm->ndm_state || ndm->ndm_type) {
2899 			NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2900 			return -EINVAL;
2901 		}
2902 
2903 		if (ndm->ndm_flags & ~NTF_PROXY) {
2904 			NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2905 			return -EINVAL;
2906 		}
2907 
2908 		err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2909 						    tb, NDA_MAX, nda_policy,
2910 						    extack);
2911 	} else {
2912 		err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2913 					     NDA_MAX, nda_policy, extack);
2914 	}
2915 	if (err < 0)
2916 		return err;
2917 
2918 	for (i = 0; i <= NDA_MAX; ++i) {
2919 		if (!tb[i])
2920 			continue;
2921 
2922 		/* all new attributes should require strict_check */
2923 		switch (i) {
2924 		case NDA_IFINDEX:
2925 			filter->dev_idx = nla_get_u32(tb[i]);
2926 			break;
2927 		case NDA_MASTER:
2928 			filter->master_idx = nla_get_u32(tb[i]);
2929 			break;
2930 		default:
2931 			if (strict_check) {
2932 				NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2933 				return -EINVAL;
2934 			}
2935 		}
2936 	}
2937 
2938 	return 0;
2939 }
2940 
2941 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2942 {
2943 	const struct nlmsghdr *nlh = cb->nlh;
2944 	struct neigh_dump_filter filter = {};
2945 	struct neigh_table *tbl;
2946 	int t, family, s_t;
2947 	int proxy = 0;
2948 	int err;
2949 
2950 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2951 
2952 	/* check for full ndmsg structure presence, family member is
2953 	 * the same for both structures
2954 	 */
2955 	if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2956 	    ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2957 		proxy = 1;
2958 
2959 	err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2960 	if (err < 0 && cb->strict_check)
2961 		return err;
2962 	err = 0;
2963 
2964 	s_t = cb->args[0];
2965 
2966 	rcu_read_lock();
2967 	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2968 		tbl = rcu_dereference(neigh_tables[t]);
2969 
2970 		if (!tbl)
2971 			continue;
2972 		if (t < s_t || (family && tbl->family != family))
2973 			continue;
2974 		if (t > s_t)
2975 			memset(&cb->args[1], 0, sizeof(cb->args) -
2976 						sizeof(cb->args[0]));
2977 		if (proxy)
2978 			err = pneigh_dump_table(tbl, skb, cb, &filter);
2979 		else
2980 			err = neigh_dump_table(tbl, skb, cb, &filter);
2981 		if (err < 0)
2982 			break;
2983 	}
2984 	rcu_read_unlock();
2985 
2986 	cb->args[0] = t;
2987 	return err;
2988 }
2989 
2990 static struct ndmsg *neigh_valid_get_req(const struct nlmsghdr *nlh,
2991 					 struct nlattr **tb,
2992 					 struct netlink_ext_ack *extack)
2993 {
2994 	struct ndmsg *ndm;
2995 	int err, i;
2996 
2997 	ndm = nlmsg_payload(nlh, sizeof(*ndm));
2998 	if (!ndm) {
2999 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
3000 		return ERR_PTR(-EINVAL);
3001 	}
3002 
3003 	if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
3004 	    ndm->ndm_type) {
3005 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
3006 		return ERR_PTR(-EINVAL);
3007 	}
3008 
3009 	if (ndm->ndm_flags & ~NTF_PROXY) {
3010 		NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
3011 		return ERR_PTR(-EINVAL);
3012 	}
3013 
3014 	if (!(ndm->ndm_flags & NTF_PROXY) && !ndm->ndm_ifindex) {
3015 		NL_SET_ERR_MSG(extack, "No device specified");
3016 		return ERR_PTR(-EINVAL);
3017 	}
3018 
3019 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
3020 					    NDA_MAX, nda_policy, extack);
3021 	if (err < 0)
3022 		return ERR_PTR(err);
3023 
3024 	for (i = 0; i <= NDA_MAX; ++i) {
3025 		switch (i) {
3026 		case NDA_DST:
3027 			if (!tb[i]) {
3028 				NL_SET_ERR_ATTR_MISS(extack, NULL, NDA_DST);
3029 				return ERR_PTR(-EINVAL);
3030 			}
3031 			break;
3032 		default:
3033 			if (!tb[i])
3034 				continue;
3035 
3036 			NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
3037 			return ERR_PTR(-EINVAL);
3038 		}
3039 	}
3040 
3041 	return ndm;
3042 }
3043 
3044 static inline size_t neigh_nlmsg_size(void)
3045 {
3046 	return NLMSG_ALIGN(sizeof(struct ndmsg))
3047 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3048 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
3049 	       + nla_total_size(sizeof(struct nda_cacheinfo))
3050 	       + nla_total_size(4)  /* NDA_PROBES */
3051 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
3052 	       + nla_total_size(1); /* NDA_PROTOCOL */
3053 }
3054 
3055 static inline size_t pneigh_nlmsg_size(void)
3056 {
3057 	return NLMSG_ALIGN(sizeof(struct ndmsg))
3058 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3059 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
3060 	       + nla_total_size(1); /* NDA_PROTOCOL */
3061 }
3062 
3063 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3064 		     struct netlink_ext_ack *extack)
3065 {
3066 	struct net *net = sock_net(in_skb->sk);
3067 	u32 pid = NETLINK_CB(in_skb).portid;
3068 	struct nlattr *tb[NDA_MAX + 1];
3069 	struct net_device *dev = NULL;
3070 	u32 seq = nlh->nlmsg_seq;
3071 	struct neigh_table *tbl;
3072 	struct neighbour *neigh;
3073 	struct sk_buff *skb;
3074 	struct ndmsg *ndm;
3075 	void *dst;
3076 	int err;
3077 
3078 	ndm = neigh_valid_get_req(nlh, tb, extack);
3079 	if (IS_ERR(ndm))
3080 		return PTR_ERR(ndm);
3081 
3082 	if (ndm->ndm_flags & NTF_PROXY)
3083 		skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
3084 	else
3085 		skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
3086 	if (!skb)
3087 		return -ENOBUFS;
3088 
3089 	rcu_read_lock();
3090 
3091 	tbl = neigh_find_table(ndm->ndm_family);
3092 	if (!tbl) {
3093 		NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
3094 		err = -EAFNOSUPPORT;
3095 		goto err_unlock;
3096 	}
3097 
3098 	if (nla_len(tb[NDA_DST]) != (int)tbl->key_len) {
3099 		NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
3100 		err = -EINVAL;
3101 		goto err_unlock;
3102 	}
3103 
3104 	dst = nla_data(tb[NDA_DST]);
3105 
3106 	if (ndm->ndm_ifindex) {
3107 		dev = dev_get_by_index_rcu(net, ndm->ndm_ifindex);
3108 		if (!dev) {
3109 			NL_SET_ERR_MSG(extack, "Unknown device ifindex");
3110 			err = -ENODEV;
3111 			goto err_unlock;
3112 		}
3113 	}
3114 
3115 	if (ndm->ndm_flags & NTF_PROXY) {
3116 		struct pneigh_entry *pn;
3117 
3118 		pn = pneigh_lookup(tbl, net, dst, dev);
3119 		if (!pn) {
3120 			NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3121 			err = -ENOENT;
3122 			goto err_unlock;
3123 		}
3124 
3125 		err = pneigh_fill_info(skb, pn, pid, seq, RTM_NEWNEIGH, 0, tbl);
3126 		if (err)
3127 			goto err_unlock;
3128 	} else {
3129 		neigh = neigh_lookup(tbl, dst, dev);
3130 		if (!neigh) {
3131 			NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3132 			err = -ENOENT;
3133 			goto err_unlock;
3134 		}
3135 
3136 		err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
3137 		neigh_release(neigh);
3138 		if (err)
3139 			goto err_unlock;
3140 	}
3141 
3142 	rcu_read_unlock();
3143 
3144 	return rtnl_unicast(skb, net, pid);
3145 err_unlock:
3146 	rcu_read_unlock();
3147 	kfree_skb(skb);
3148 	return err;
3149 }
3150 
3151 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3152 {
3153 	int chain;
3154 	struct neigh_hash_table *nht;
3155 
3156 	rcu_read_lock();
3157 	nht = rcu_dereference(tbl->nht);
3158 
3159 	spin_lock_bh(&tbl->lock); /* avoid resizes */
3160 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3161 		struct neighbour *n;
3162 
3163 		neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
3164 			cb(n, cookie);
3165 	}
3166 	spin_unlock_bh(&tbl->lock);
3167 	rcu_read_unlock();
3168 }
3169 EXPORT_SYMBOL(neigh_for_each);
3170 
3171 /* The tbl->lock must be held as a writer and BH disabled. */
3172 void __neigh_for_each_release(struct neigh_table *tbl,
3173 			      int (*cb)(struct neighbour *))
3174 {
3175 	struct neigh_hash_table *nht;
3176 	int chain;
3177 
3178 	nht = rcu_dereference_protected(tbl->nht,
3179 					lockdep_is_held(&tbl->lock));
3180 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3181 		struct hlist_node *tmp;
3182 		struct neighbour *n;
3183 
3184 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
3185 			int release;
3186 
3187 			write_lock(&n->lock);
3188 			release = cb(n);
3189 			if (release) {
3190 				hlist_del_rcu(&n->hash);
3191 				hlist_del_rcu(&n->dev_list);
3192 				neigh_mark_dead(n);
3193 			}
3194 			write_unlock(&n->lock);
3195 			if (release)
3196 				neigh_cleanup_and_release(n);
3197 		}
3198 	}
3199 }
3200 EXPORT_SYMBOL(__neigh_for_each_release);
3201 
3202 int neigh_xmit(int index, struct net_device *dev,
3203 	       const void *addr, struct sk_buff *skb)
3204 {
3205 	int err = -EAFNOSUPPORT;
3206 
3207 	if (likely(index < NEIGH_NR_TABLES)) {
3208 		struct neigh_table *tbl;
3209 		struct neighbour *neigh;
3210 
3211 		rcu_read_lock();
3212 		tbl = rcu_dereference(neigh_tables[index]);
3213 		if (!tbl)
3214 			goto out_unlock;
3215 		if (index == NEIGH_ARP_TABLE) {
3216 			u32 key = *((u32 *)addr);
3217 
3218 			neigh = __ipv4_neigh_lookup_noref(dev, key);
3219 		} else {
3220 			neigh = __neigh_lookup_noref(tbl, addr, dev);
3221 		}
3222 		if (!neigh)
3223 			neigh = __neigh_create(tbl, addr, dev, false);
3224 		err = PTR_ERR(neigh);
3225 		if (IS_ERR(neigh)) {
3226 			rcu_read_unlock();
3227 			goto out_kfree_skb;
3228 		}
3229 		err = READ_ONCE(neigh->output)(neigh, skb);
3230 out_unlock:
3231 		rcu_read_unlock();
3232 	}
3233 	else if (index == NEIGH_LINK_TABLE) {
3234 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3235 				      addr, NULL, skb->len);
3236 		if (err < 0)
3237 			goto out_kfree_skb;
3238 		err = dev_queue_xmit(skb);
3239 	}
3240 out:
3241 	return err;
3242 out_kfree_skb:
3243 	kfree_skb(skb);
3244 	goto out;
3245 }
3246 EXPORT_SYMBOL(neigh_xmit);
3247 
3248 #ifdef CONFIG_PROC_FS
3249 
3250 static struct neighbour *neigh_get_valid(struct seq_file *seq,
3251 					 struct neighbour *n,
3252 					 loff_t *pos)
3253 {
3254 	struct neigh_seq_state *state = seq->private;
3255 	struct net *net = seq_file_net(seq);
3256 
3257 	if (!net_eq(dev_net(n->dev), net))
3258 		return NULL;
3259 
3260 	if (state->neigh_sub_iter) {
3261 		loff_t fakep = 0;
3262 		void *v;
3263 
3264 		v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
3265 		if (!v)
3266 			return NULL;
3267 		if (pos)
3268 			return v;
3269 	}
3270 
3271 	if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3272 		return n;
3273 
3274 	if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3275 		return n;
3276 
3277 	return NULL;
3278 }
3279 
3280 static struct neighbour *neigh_get_first(struct seq_file *seq)
3281 {
3282 	struct neigh_seq_state *state = seq->private;
3283 	struct neigh_hash_table *nht = state->nht;
3284 	struct neighbour *n, *tmp;
3285 
3286 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3287 
3288 	while (++state->bucket < (1 << nht->hash_shift)) {
3289 		neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
3290 			tmp = neigh_get_valid(seq, n, NULL);
3291 			if (tmp)
3292 				return tmp;
3293 		}
3294 	}
3295 
3296 	return NULL;
3297 }
3298 
3299 static struct neighbour *neigh_get_next(struct seq_file *seq,
3300 					struct neighbour *n,
3301 					loff_t *pos)
3302 {
3303 	struct neigh_seq_state *state = seq->private;
3304 	struct neighbour *tmp;
3305 
3306 	if (state->neigh_sub_iter) {
3307 		void *v = state->neigh_sub_iter(state, n, pos);
3308 
3309 		if (v)
3310 			return n;
3311 	}
3312 
3313 	hlist_for_each_entry_continue(n, hash) {
3314 		tmp = neigh_get_valid(seq, n, pos);
3315 		if (tmp) {
3316 			n = tmp;
3317 			goto out;
3318 		}
3319 	}
3320 
3321 	n = neigh_get_first(seq);
3322 out:
3323 	if (n && pos)
3324 		--(*pos);
3325 
3326 	return n;
3327 }
3328 
3329 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3330 {
3331 	struct neighbour *n = neigh_get_first(seq);
3332 
3333 	if (n) {
3334 		--(*pos);
3335 		while (*pos) {
3336 			n = neigh_get_next(seq, n, pos);
3337 			if (!n)
3338 				break;
3339 		}
3340 	}
3341 	return *pos ? NULL : n;
3342 }
3343 
3344 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3345 {
3346 	struct neigh_seq_state *state = seq->private;
3347 	struct net *net = seq_file_net(seq);
3348 	struct neigh_table *tbl = state->tbl;
3349 	struct pneigh_entry *pn = NULL;
3350 	int bucket;
3351 
3352 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
3353 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3354 		pn = rcu_dereference(tbl->phash_buckets[bucket]);
3355 
3356 		while (pn && !net_eq(pneigh_net(pn), net))
3357 			pn = rcu_dereference(pn->next);
3358 		if (pn)
3359 			break;
3360 	}
3361 	state->bucket = bucket;
3362 
3363 	return pn;
3364 }
3365 
3366 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3367 					    struct pneigh_entry *pn,
3368 					    loff_t *pos)
3369 {
3370 	struct neigh_seq_state *state = seq->private;
3371 	struct net *net = seq_file_net(seq);
3372 	struct neigh_table *tbl = state->tbl;
3373 
3374 	do {
3375 		pn = rcu_dereference(pn->next);
3376 	} while (pn && !net_eq(pneigh_net(pn), net));
3377 
3378 	while (!pn) {
3379 		if (++state->bucket > PNEIGH_HASHMASK)
3380 			break;
3381 
3382 		pn = rcu_dereference(tbl->phash_buckets[state->bucket]);
3383 
3384 		while (pn && !net_eq(pneigh_net(pn), net))
3385 			pn = rcu_dereference(pn->next);
3386 		if (pn)
3387 			break;
3388 	}
3389 
3390 	if (pn && pos)
3391 		--(*pos);
3392 
3393 	return pn;
3394 }
3395 
3396 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3397 {
3398 	struct pneigh_entry *pn = pneigh_get_first(seq);
3399 
3400 	if (pn) {
3401 		--(*pos);
3402 		while (*pos) {
3403 			pn = pneigh_get_next(seq, pn, pos);
3404 			if (!pn)
3405 				break;
3406 		}
3407 	}
3408 	return *pos ? NULL : pn;
3409 }
3410 
3411 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3412 {
3413 	struct neigh_seq_state *state = seq->private;
3414 	void *rc;
3415 	loff_t idxpos = *pos;
3416 
3417 	rc = neigh_get_idx(seq, &idxpos);
3418 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3419 		rc = pneigh_get_idx(seq, &idxpos);
3420 
3421 	return rc;
3422 }
3423 
3424 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3425 	__acquires(tbl->lock)
3426 	__acquires(rcu)
3427 {
3428 	struct neigh_seq_state *state = seq->private;
3429 
3430 	state->tbl = tbl;
3431 	state->bucket = -1;
3432 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3433 
3434 	rcu_read_lock();
3435 	state->nht = rcu_dereference(tbl->nht);
3436 	spin_lock_bh(&tbl->lock);
3437 
3438 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3439 }
3440 EXPORT_SYMBOL(neigh_seq_start);
3441 
3442 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3443 {
3444 	struct neigh_seq_state *state;
3445 	void *rc;
3446 
3447 	if (v == SEQ_START_TOKEN) {
3448 		rc = neigh_get_first(seq);
3449 		goto out;
3450 	}
3451 
3452 	state = seq->private;
3453 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3454 		rc = neigh_get_next(seq, v, NULL);
3455 		if (rc)
3456 			goto out;
3457 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3458 			rc = pneigh_get_first(seq);
3459 	} else {
3460 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3461 		rc = pneigh_get_next(seq, v, NULL);
3462 	}
3463 out:
3464 	++(*pos);
3465 	return rc;
3466 }
3467 EXPORT_SYMBOL(neigh_seq_next);
3468 
3469 void neigh_seq_stop(struct seq_file *seq, void *v)
3470 	__releases(tbl->lock)
3471 	__releases(rcu)
3472 {
3473 	struct neigh_seq_state *state = seq->private;
3474 	struct neigh_table *tbl = state->tbl;
3475 
3476 	spin_unlock_bh(&tbl->lock);
3477 	rcu_read_unlock();
3478 }
3479 EXPORT_SYMBOL(neigh_seq_stop);
3480 
3481 /* statistics via seq_file */
3482 
3483 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3484 {
3485 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3486 	int cpu;
3487 
3488 	if (*pos == 0)
3489 		return SEQ_START_TOKEN;
3490 
3491 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3492 		if (!cpu_possible(cpu))
3493 			continue;
3494 		*pos = cpu+1;
3495 		return per_cpu_ptr(tbl->stats, cpu);
3496 	}
3497 	return NULL;
3498 }
3499 
3500 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3501 {
3502 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3503 	int cpu;
3504 
3505 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3506 		if (!cpu_possible(cpu))
3507 			continue;
3508 		*pos = cpu+1;
3509 		return per_cpu_ptr(tbl->stats, cpu);
3510 	}
3511 	(*pos)++;
3512 	return NULL;
3513 }
3514 
3515 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3516 {
3517 
3518 }
3519 
3520 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3521 {
3522 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3523 	struct neigh_statistics *st = v;
3524 
3525 	if (v == SEQ_START_TOKEN) {
3526 		seq_puts(seq, "entries  allocs   destroys hash_grows lookups  hits     res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3527 		return 0;
3528 	}
3529 
3530 	seq_printf(seq, "%08x %08lx %08lx %08lx   %08lx %08lx %08lx   "
3531 			"%08lx         %08lx         %08lx         "
3532 			"%08lx       %08lx            %08lx\n",
3533 		   atomic_read(&tbl->entries),
3534 
3535 		   st->allocs,
3536 		   st->destroys,
3537 		   st->hash_grows,
3538 
3539 		   st->lookups,
3540 		   st->hits,
3541 
3542 		   st->res_failed,
3543 
3544 		   st->rcv_probes_mcast,
3545 		   st->rcv_probes_ucast,
3546 
3547 		   st->periodic_gc_runs,
3548 		   st->forced_gc_runs,
3549 		   st->unres_discards,
3550 		   st->table_fulls
3551 		   );
3552 
3553 	return 0;
3554 }
3555 
3556 static const struct seq_operations neigh_stat_seq_ops = {
3557 	.start	= neigh_stat_seq_start,
3558 	.next	= neigh_stat_seq_next,
3559 	.stop	= neigh_stat_seq_stop,
3560 	.show	= neigh_stat_seq_show,
3561 };
3562 #endif /* CONFIG_PROC_FS */
3563 
3564 static void __neigh_notify(struct neighbour *n, int type, int flags,
3565 			   u32 pid)
3566 {
3567 	struct sk_buff *skb;
3568 	int err = -ENOBUFS;
3569 	struct net *net;
3570 
3571 	rcu_read_lock();
3572 	net = dev_net_rcu(n->dev);
3573 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3574 	if (skb == NULL)
3575 		goto errout;
3576 
3577 	err = __neigh_fill_info(skb, n, pid, 0, type, flags);
3578 	if (err < 0) {
3579 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3580 		WARN_ON(err == -EMSGSIZE);
3581 		kfree_skb(skb);
3582 		goto errout;
3583 	}
3584 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3585 	goto out;
3586 errout:
3587 	rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3588 out:
3589 	rcu_read_unlock();
3590 }
3591 
3592 static void neigh_notify(struct neighbour *neigh, int type, int flags, u32 pid)
3593 {
3594 	read_lock_bh(&neigh->lock);
3595 	__neigh_notify(neigh, type, flags, pid);
3596 	read_unlock_bh(&neigh->lock);
3597 }
3598 
3599 void neigh_app_ns(struct neighbour *n)
3600 {
3601 	neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3602 }
3603 EXPORT_SYMBOL(neigh_app_ns);
3604 
3605 #ifdef CONFIG_SYSCTL
3606 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3607 
3608 static int proc_unres_qlen(const struct ctl_table *ctl, int write,
3609 			   void *buffer, size_t *lenp, loff_t *ppos)
3610 {
3611 	int size, ret;
3612 	struct ctl_table tmp = *ctl;
3613 
3614 	tmp.extra1 = SYSCTL_ZERO;
3615 	tmp.extra2 = &unres_qlen_max;
3616 	tmp.data = &size;
3617 
3618 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3619 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3620 
3621 	if (write && !ret)
3622 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3623 	return ret;
3624 }
3625 
3626 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3627 				  int index)
3628 {
3629 	struct net_device *dev;
3630 	int family = neigh_parms_family(p);
3631 
3632 	rcu_read_lock();
3633 	for_each_netdev_rcu(net, dev) {
3634 		struct neigh_parms *dst_p =
3635 				neigh_get_dev_parms_rcu(dev, family);
3636 
3637 		if (dst_p && !test_bit(index, dst_p->data_state))
3638 			dst_p->data[index] = p->data[index];
3639 	}
3640 	rcu_read_unlock();
3641 }
3642 
3643 static void neigh_proc_update(const struct ctl_table *ctl, int write)
3644 {
3645 	struct net_device *dev = ctl->extra1;
3646 	struct neigh_parms *p = ctl->extra2;
3647 	struct net *net = neigh_parms_net(p);
3648 	int index = (int *) ctl->data - p->data;
3649 
3650 	if (!write)
3651 		return;
3652 
3653 	set_bit(index, p->data_state);
3654 	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3655 		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3656 	if (!dev) /* NULL dev means this is default value */
3657 		neigh_copy_dflt_parms(net, p, index);
3658 }
3659 
3660 static int neigh_proc_dointvec_zero_intmax(const struct ctl_table *ctl, int write,
3661 					   void *buffer, size_t *lenp,
3662 					   loff_t *ppos)
3663 {
3664 	struct ctl_table tmp = *ctl;
3665 	int ret;
3666 
3667 	tmp.extra1 = SYSCTL_ZERO;
3668 	tmp.extra2 = SYSCTL_INT_MAX;
3669 
3670 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3671 	neigh_proc_update(ctl, write);
3672 	return ret;
3673 }
3674 
3675 static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table *ctl, int write,
3676 						   void *buffer, size_t *lenp, loff_t *ppos)
3677 {
3678 	struct ctl_table tmp = *ctl;
3679 	int ret;
3680 
3681 	int min = msecs_to_jiffies(1);
3682 
3683 	tmp.extra1 = &min;
3684 	tmp.extra2 = NULL;
3685 
3686 	ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3687 	neigh_proc_update(ctl, write);
3688 	return ret;
3689 }
3690 
3691 int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer,
3692 			size_t *lenp, loff_t *ppos)
3693 {
3694 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3695 
3696 	neigh_proc_update(ctl, write);
3697 	return ret;
3698 }
3699 EXPORT_SYMBOL(neigh_proc_dointvec);
3700 
3701 int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer,
3702 				size_t *lenp, loff_t *ppos)
3703 {
3704 	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3705 
3706 	neigh_proc_update(ctl, write);
3707 	return ret;
3708 }
3709 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3710 
3711 static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table *ctl, int write,
3712 					      void *buffer, size_t *lenp,
3713 					      loff_t *ppos)
3714 {
3715 	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3716 
3717 	neigh_proc_update(ctl, write);
3718 	return ret;
3719 }
3720 
3721 int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
3722 				   void *buffer, size_t *lenp, loff_t *ppos)
3723 {
3724 	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3725 
3726 	neigh_proc_update(ctl, write);
3727 	return ret;
3728 }
3729 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3730 
3731 static int neigh_proc_dointvec_unres_qlen(const struct ctl_table *ctl, int write,
3732 					  void *buffer, size_t *lenp,
3733 					  loff_t *ppos)
3734 {
3735 	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3736 
3737 	neigh_proc_update(ctl, write);
3738 	return ret;
3739 }
3740 
3741 static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write,
3742 					  void *buffer, size_t *lenp,
3743 					  loff_t *ppos)
3744 {
3745 	struct neigh_parms *p = ctl->extra2;
3746 	int ret;
3747 
3748 	if (strcmp(ctl->procname, "base_reachable_time") == 0)
3749 		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3750 	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3751 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3752 	else
3753 		ret = -1;
3754 
3755 	if (write && ret == 0) {
3756 		/* update reachable_time as well, otherwise, the change will
3757 		 * only be effective after the next time neigh_periodic_work
3758 		 * decides to recompute it
3759 		 */
3760 		neigh_set_reach_time(p);
3761 	}
3762 	return ret;
3763 }
3764 
3765 #define NEIGH_PARMS_DATA_OFFSET(index)	\
3766 	(&((struct neigh_parms *) 0)->data[index])
3767 
3768 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3769 	[NEIGH_VAR_ ## attr] = { \
3770 		.procname	= name, \
3771 		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3772 		.maxlen		= sizeof(int), \
3773 		.mode		= mval, \
3774 		.proc_handler	= proc, \
3775 	}
3776 
3777 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3778 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3779 
3780 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3781 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3782 
3783 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3784 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3785 
3786 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3787 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3788 
3789 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3790 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3791 
3792 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3793 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3794 
3795 static struct neigh_sysctl_table {
3796 	struct ctl_table_header *sysctl_header;
3797 	struct ctl_table neigh_vars[NEIGH_VAR_MAX];
3798 } neigh_sysctl_template __read_mostly = {
3799 	.neigh_vars = {
3800 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3801 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3802 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3803 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3804 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3805 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3806 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3807 		NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3808 						       "interval_probe_time_ms"),
3809 		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3810 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3811 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3812 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3813 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3814 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3815 		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3816 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3817 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3818 		[NEIGH_VAR_GC_INTERVAL] = {
3819 			.procname	= "gc_interval",
3820 			.maxlen		= sizeof(int),
3821 			.mode		= 0644,
3822 			.proc_handler	= proc_dointvec_jiffies,
3823 		},
3824 		[NEIGH_VAR_GC_THRESH1] = {
3825 			.procname	= "gc_thresh1",
3826 			.maxlen		= sizeof(int),
3827 			.mode		= 0644,
3828 			.extra1		= SYSCTL_ZERO,
3829 			.extra2		= SYSCTL_INT_MAX,
3830 			.proc_handler	= proc_dointvec_minmax,
3831 		},
3832 		[NEIGH_VAR_GC_THRESH2] = {
3833 			.procname	= "gc_thresh2",
3834 			.maxlen		= sizeof(int),
3835 			.mode		= 0644,
3836 			.extra1		= SYSCTL_ZERO,
3837 			.extra2		= SYSCTL_INT_MAX,
3838 			.proc_handler	= proc_dointvec_minmax,
3839 		},
3840 		[NEIGH_VAR_GC_THRESH3] = {
3841 			.procname	= "gc_thresh3",
3842 			.maxlen		= sizeof(int),
3843 			.mode		= 0644,
3844 			.extra1		= SYSCTL_ZERO,
3845 			.extra2		= SYSCTL_INT_MAX,
3846 			.proc_handler	= proc_dointvec_minmax,
3847 		},
3848 	},
3849 };
3850 
3851 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3852 			  proc_handler *handler)
3853 {
3854 	int i;
3855 	struct neigh_sysctl_table *t;
3856 	const char *dev_name_source;
3857 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3858 	char *p_name;
3859 	size_t neigh_vars_size;
3860 
3861 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3862 	if (!t)
3863 		goto err;
3864 
3865 	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3866 		t->neigh_vars[i].data += (long) p;
3867 		t->neigh_vars[i].extra1 = dev;
3868 		t->neigh_vars[i].extra2 = p;
3869 	}
3870 
3871 	neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
3872 	if (dev) {
3873 		dev_name_source = dev->name;
3874 		/* Terminate the table early */
3875 		neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
3876 	} else {
3877 		struct neigh_table *tbl = p->tbl;
3878 		dev_name_source = "default";
3879 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3880 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3881 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3882 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3883 	}
3884 
3885 	if (handler) {
3886 		/* RetransTime */
3887 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3888 		/* ReachableTime */
3889 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3890 		/* RetransTime (in milliseconds)*/
3891 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3892 		/* ReachableTime (in milliseconds) */
3893 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3894 	} else {
3895 		/* Those handlers will update p->reachable_time after
3896 		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3897 		 * applied after the next neighbour update instead of waiting for
3898 		 * neigh_periodic_work to update its value (can be multiple minutes)
3899 		 * So any handler that replaces them should do this as well
3900 		 */
3901 		/* ReachableTime */
3902 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3903 			neigh_proc_base_reachable_time;
3904 		/* ReachableTime (in milliseconds) */
3905 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3906 			neigh_proc_base_reachable_time;
3907 	}
3908 
3909 	switch (neigh_parms_family(p)) {
3910 	case AF_INET:
3911 	      p_name = "ipv4";
3912 	      break;
3913 	case AF_INET6:
3914 	      p_name = "ipv6";
3915 	      break;
3916 	default:
3917 	      BUG();
3918 	}
3919 
3920 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3921 		p_name, dev_name_source);
3922 	t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
3923 						  neigh_path, t->neigh_vars,
3924 						  neigh_vars_size);
3925 	if (!t->sysctl_header)
3926 		goto free;
3927 
3928 	p->sysctl_table = t;
3929 	return 0;
3930 
3931 free:
3932 	kfree(t);
3933 err:
3934 	return -ENOBUFS;
3935 }
3936 EXPORT_SYMBOL(neigh_sysctl_register);
3937 
3938 void neigh_sysctl_unregister(struct neigh_parms *p)
3939 {
3940 	if (p->sysctl_table) {
3941 		struct neigh_sysctl_table *t = p->sysctl_table;
3942 		p->sysctl_table = NULL;
3943 		unregister_net_sysctl_table(t->sysctl_header);
3944 		kfree(t);
3945 	}
3946 }
3947 EXPORT_SYMBOL(neigh_sysctl_unregister);
3948 
3949 #endif	/* CONFIG_SYSCTL */
3950 
3951 static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
3952 	{.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
3953 	{.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
3954 	{.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
3955 	 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
3956 	{.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info,
3957 	 .flags = RTNL_FLAG_DUMP_UNLOCKED},
3958 	{.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set,
3959 	 .flags = RTNL_FLAG_DOIT_UNLOCKED},
3960 };
3961 
3962 static int __init neigh_init(void)
3963 {
3964 	rtnl_register_many(neigh_rtnl_msg_handlers);
3965 	return 0;
3966 }
3967 
3968 subsys_initcall(neigh_init);
3969