xref: /linux/net/core/neighbour.c (revision 300a0cfe9f375b2843bcb331bcfa7503475ef5dd)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Generic address resolution entity
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
8  *
9  *	Fixes:
10  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
11  *	Harald Welte		Add neighbour cache statistics like rtstat
12  */
13 
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 
16 #include <linux/slab.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/socket.h>
21 #include <linux/netdevice.h>
22 #include <linux/proc_fs.h>
23 #ifdef CONFIG_SYSCTL
24 #include <linux/sysctl.h>
25 #endif
26 #include <linux/times.h>
27 #include <net/net_namespace.h>
28 #include <net/neighbour.h>
29 #include <net/arp.h>
30 #include <net/dst.h>
31 #include <net/ip.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
41 
42 #include <trace/events/neigh.h>
43 
44 #define NEIGH_DEBUG 1
45 #define neigh_dbg(level, fmt, ...)		\
46 do {						\
47 	if (level <= NEIGH_DEBUG)		\
48 		pr_debug(fmt, ##__VA_ARGS__);	\
49 } while (0)
50 
51 #define PNEIGH_HASHMASK		0xF
52 
53 static void neigh_timer_handler(struct timer_list *t);
54 static void __neigh_notify(struct neighbour *n, int type, int flags,
55 			   u32 pid);
56 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
57 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
58 			  bool skip_perm);
59 
60 #ifdef CONFIG_PROC_FS
61 static const struct seq_operations neigh_stat_seq_ops;
62 #endif
63 
64 static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
65 {
66 	int i;
67 
68 	switch (family) {
69 	default:
70 		DEBUG_NET_WARN_ON_ONCE(1);
71 		fallthrough; /* to avoid panic by null-ptr-deref */
72 	case AF_INET:
73 		i = NEIGH_ARP_TABLE;
74 		break;
75 	case AF_INET6:
76 		i = NEIGH_ND_TABLE;
77 		break;
78 	}
79 
80 	return &dev->neighbours[i];
81 }
82 
83 /*
84    Neighbour hash table buckets are protected with rwlock tbl->lock.
85 
86    - All the scans/updates to hash buckets MUST be made under this lock.
87    - NOTHING clever should be made under this lock: no callbacks
88      to protocol backends, no attempts to send something to network.
89      It will result in deadlocks, if backend/driver wants to use neighbour
90      cache.
91    - If the entry requires some non-trivial actions, increase
92      its reference count and release table lock.
93 
94    Neighbour entries are protected:
95    - with reference count.
96    - with rwlock neigh->lock
97 
98    Reference count prevents destruction.
99 
100    neigh->lock mainly serializes ll address data and its validity state.
101    However, the same lock is used to protect another entry fields:
102     - timer
103     - resolution queue
104 
105    Again, nothing clever shall be made under neigh->lock,
106    the most complicated procedure, which we allow is dev->hard_header.
107    It is supposed, that dev->hard_header is simplistic and does
108    not make callbacks to neighbour tables.
109  */
110 
111 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
112 {
113 	kfree_skb(skb);
114 	return -ENETDOWN;
115 }
116 
117 static void neigh_cleanup_and_release(struct neighbour *neigh)
118 {
119 	trace_neigh_cleanup_and_release(neigh, 0);
120 	__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
121 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
122 	neigh_release(neigh);
123 }
124 
125 /*
126  * It is random distribution in the interval (1/2)*base...(3/2)*base.
127  * It corresponds to default IPv6 settings and is not overridable,
128  * because it is really reasonable choice.
129  */
130 
131 unsigned long neigh_rand_reach_time(unsigned long base)
132 {
133 	return base ? get_random_u32_below(base) + (base >> 1) : 0;
134 }
135 EXPORT_SYMBOL(neigh_rand_reach_time);
136 
137 static void neigh_mark_dead(struct neighbour *n)
138 {
139 	n->dead = 1;
140 	if (!list_empty(&n->gc_list)) {
141 		list_del_init(&n->gc_list);
142 		atomic_dec(&n->tbl->gc_entries);
143 	}
144 	if (!list_empty(&n->managed_list))
145 		list_del_init(&n->managed_list);
146 }
147 
148 static void neigh_update_gc_list(struct neighbour *n)
149 {
150 	bool on_gc_list, exempt_from_gc;
151 
152 	write_lock_bh(&n->tbl->lock);
153 	write_lock(&n->lock);
154 	if (n->dead)
155 		goto out;
156 
157 	/* remove from the gc list if new state is permanent or if neighbor is
158 	 * externally learned / validated; otherwise entry should be on the gc
159 	 * list
160 	 */
161 	exempt_from_gc = n->nud_state & NUD_PERMANENT ||
162 			 n->flags & (NTF_EXT_LEARNED | NTF_EXT_VALIDATED);
163 	on_gc_list = !list_empty(&n->gc_list);
164 
165 	if (exempt_from_gc && on_gc_list) {
166 		list_del_init(&n->gc_list);
167 		atomic_dec(&n->tbl->gc_entries);
168 	} else if (!exempt_from_gc && !on_gc_list) {
169 		/* add entries to the tail; cleaning removes from the front */
170 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
171 		atomic_inc(&n->tbl->gc_entries);
172 	}
173 out:
174 	write_unlock(&n->lock);
175 	write_unlock_bh(&n->tbl->lock);
176 }
177 
178 static void neigh_update_managed_list(struct neighbour *n)
179 {
180 	bool on_managed_list, add_to_managed;
181 
182 	write_lock_bh(&n->tbl->lock);
183 	write_lock(&n->lock);
184 	if (n->dead)
185 		goto out;
186 
187 	add_to_managed = n->flags & NTF_MANAGED;
188 	on_managed_list = !list_empty(&n->managed_list);
189 
190 	if (!add_to_managed && on_managed_list)
191 		list_del_init(&n->managed_list);
192 	else if (add_to_managed && !on_managed_list)
193 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
194 out:
195 	write_unlock(&n->lock);
196 	write_unlock_bh(&n->tbl->lock);
197 }
198 
199 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
200 			       bool *gc_update, bool *managed_update)
201 {
202 	u32 ndm_flags, old_flags = neigh->flags;
203 
204 	if (!(flags & NEIGH_UPDATE_F_ADMIN))
205 		return;
206 
207 	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
208 	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
209 	ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_VALIDATED) ? NTF_EXT_VALIDATED : 0;
210 
211 	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
212 		if (ndm_flags & NTF_EXT_LEARNED)
213 			neigh->flags |= NTF_EXT_LEARNED;
214 		else
215 			neigh->flags &= ~NTF_EXT_LEARNED;
216 		*notify = 1;
217 		*gc_update = true;
218 	}
219 	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
220 		if (ndm_flags & NTF_MANAGED)
221 			neigh->flags |= NTF_MANAGED;
222 		else
223 			neigh->flags &= ~NTF_MANAGED;
224 		*notify = 1;
225 		*managed_update = true;
226 	}
227 	if ((old_flags ^ ndm_flags) & NTF_EXT_VALIDATED) {
228 		if (ndm_flags & NTF_EXT_VALIDATED)
229 			neigh->flags |= NTF_EXT_VALIDATED;
230 		else
231 			neigh->flags &= ~NTF_EXT_VALIDATED;
232 		*notify = 1;
233 		*gc_update = true;
234 	}
235 }
236 
237 bool neigh_remove_one(struct neighbour *n)
238 {
239 	bool retval = false;
240 
241 	write_lock(&n->lock);
242 	if (refcount_read(&n->refcnt) == 1) {
243 		hlist_del_rcu(&n->hash);
244 		hlist_del_rcu(&n->dev_list);
245 		neigh_mark_dead(n);
246 		retval = true;
247 	}
248 	write_unlock(&n->lock);
249 	if (retval)
250 		neigh_cleanup_and_release(n);
251 	return retval;
252 }
253 
254 static int neigh_forced_gc(struct neigh_table *tbl)
255 {
256 	int max_clean = atomic_read(&tbl->gc_entries) -
257 			READ_ONCE(tbl->gc_thresh2);
258 	u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
259 	unsigned long tref = jiffies - 5 * HZ;
260 	struct neighbour *n, *tmp;
261 	int shrunk = 0;
262 	int loop = 0;
263 
264 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
265 
266 	write_lock_bh(&tbl->lock);
267 
268 	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
269 		if (refcount_read(&n->refcnt) == 1) {
270 			bool remove = false;
271 
272 			write_lock(&n->lock);
273 			if ((n->nud_state == NUD_FAILED) ||
274 			    (n->nud_state == NUD_NOARP) ||
275 			    (tbl->is_multicast &&
276 			     tbl->is_multicast(n->primary_key)) ||
277 			    !time_in_range(n->updated, tref, jiffies))
278 				remove = true;
279 			write_unlock(&n->lock);
280 
281 			if (remove && neigh_remove_one(n))
282 				shrunk++;
283 			if (shrunk >= max_clean)
284 				break;
285 			if (++loop == 16) {
286 				if (ktime_get_ns() > tmax)
287 					goto unlock;
288 				loop = 0;
289 			}
290 		}
291 	}
292 
293 	WRITE_ONCE(tbl->last_flush, jiffies);
294 unlock:
295 	write_unlock_bh(&tbl->lock);
296 
297 	return shrunk;
298 }
299 
300 static void neigh_add_timer(struct neighbour *n, unsigned long when)
301 {
302 	/* Use safe distance from the jiffies - LONG_MAX point while timer
303 	 * is running in DELAY/PROBE state but still show to user space
304 	 * large times in the past.
305 	 */
306 	unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
307 
308 	neigh_hold(n);
309 	if (!time_in_range(n->confirmed, mint, jiffies))
310 		n->confirmed = mint;
311 	if (time_before(n->used, n->confirmed))
312 		n->used = n->confirmed;
313 	if (unlikely(mod_timer(&n->timer, when))) {
314 		printk("NEIGH: BUG, double timer add, state is %x\n",
315 		       n->nud_state);
316 		dump_stack();
317 	}
318 }
319 
320 static int neigh_del_timer(struct neighbour *n)
321 {
322 	if ((n->nud_state & NUD_IN_TIMER) &&
323 	    timer_delete(&n->timer)) {
324 		neigh_release(n);
325 		return 1;
326 	}
327 	return 0;
328 }
329 
330 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
331 						   int family)
332 {
333 	switch (family) {
334 	case AF_INET:
335 		return __in_dev_arp_parms_get_rcu(dev);
336 	case AF_INET6:
337 		return __in6_dev_nd_parms_get_rcu(dev);
338 	}
339 	return NULL;
340 }
341 
342 static void neigh_parms_qlen_dec(struct net_device *dev, int family)
343 {
344 	struct neigh_parms *p;
345 
346 	rcu_read_lock();
347 	p = neigh_get_dev_parms_rcu(dev, family);
348 	if (p)
349 		p->qlen--;
350 	rcu_read_unlock();
351 }
352 
353 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
354 			       int family)
355 {
356 	struct sk_buff_head tmp;
357 	unsigned long flags;
358 	struct sk_buff *skb;
359 
360 	skb_queue_head_init(&tmp);
361 	spin_lock_irqsave(&list->lock, flags);
362 	skb = skb_peek(list);
363 	while (skb != NULL) {
364 		struct sk_buff *skb_next = skb_peek_next(skb, list);
365 		struct net_device *dev = skb->dev;
366 
367 		if (net == NULL || net_eq(dev_net(dev), net)) {
368 			neigh_parms_qlen_dec(dev, family);
369 			__skb_unlink(skb, list);
370 			__skb_queue_tail(&tmp, skb);
371 		}
372 		skb = skb_next;
373 	}
374 	spin_unlock_irqrestore(&list->lock, flags);
375 
376 	while ((skb = __skb_dequeue(&tmp))) {
377 		dev_put(skb->dev);
378 		kfree_skb(skb);
379 	}
380 }
381 
382 static void neigh_flush_one(struct neighbour *n)
383 {
384 	hlist_del_rcu(&n->hash);
385 	hlist_del_rcu(&n->dev_list);
386 
387 	write_lock(&n->lock);
388 
389 	neigh_del_timer(n);
390 	neigh_mark_dead(n);
391 
392 	if (refcount_read(&n->refcnt) != 1) {
393 		/* The most unpleasant situation.
394 		 * We must destroy neighbour entry,
395 		 * but someone still uses it.
396 		 *
397 		 * The destroy will be delayed until
398 		 * the last user releases us, but
399 		 * we must kill timers etc. and move
400 		 * it to safe state.
401 		 */
402 		__skb_queue_purge(&n->arp_queue);
403 		n->arp_queue_len_bytes = 0;
404 		WRITE_ONCE(n->output, neigh_blackhole);
405 
406 		if (n->nud_state & NUD_VALID)
407 			n->nud_state = NUD_NOARP;
408 		else
409 			n->nud_state = NUD_NONE;
410 
411 		neigh_dbg(2, "neigh %p is stray\n", n);
412 	}
413 
414 	write_unlock(&n->lock);
415 
416 	neigh_cleanup_and_release(n);
417 }
418 
419 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
420 			    bool skip_perm)
421 {
422 	struct hlist_head *dev_head;
423 	struct hlist_node *tmp;
424 	struct neighbour *n;
425 
426 	dev_head = neigh_get_dev_table(dev, tbl->family);
427 
428 	hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
429 		if (skip_perm &&
430 		    (n->nud_state & NUD_PERMANENT ||
431 		     n->flags & NTF_EXT_VALIDATED))
432 			continue;
433 
434 		neigh_flush_one(n);
435 	}
436 }
437 
438 static void neigh_flush_table(struct neigh_table *tbl)
439 {
440 	struct neigh_hash_table *nht;
441 	int i;
442 
443 	nht = rcu_dereference_protected(tbl->nht,
444 					lockdep_is_held(&tbl->lock));
445 
446 	for (i = 0; i < (1 << nht->hash_shift); i++) {
447 		struct hlist_node *tmp;
448 		struct neighbour *n;
449 
450 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i])
451 			neigh_flush_one(n);
452 	}
453 }
454 
455 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
456 {
457 	write_lock_bh(&tbl->lock);
458 	neigh_flush_dev(tbl, dev, false);
459 	write_unlock_bh(&tbl->lock);
460 }
461 EXPORT_SYMBOL(neigh_changeaddr);
462 
463 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
464 			  bool skip_perm)
465 {
466 	write_lock_bh(&tbl->lock);
467 	if (likely(dev)) {
468 		neigh_flush_dev(tbl, dev, skip_perm);
469 	} else {
470 		DEBUG_NET_WARN_ON_ONCE(skip_perm);
471 		neigh_flush_table(tbl);
472 	}
473 	write_unlock_bh(&tbl->lock);
474 
475 	pneigh_ifdown(tbl, dev, skip_perm);
476 	pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
477 			   tbl->family);
478 	if (skb_queue_empty_lockless(&tbl->proxy_queue))
479 		timer_delete_sync(&tbl->proxy_timer);
480 	return 0;
481 }
482 
483 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
484 {
485 	__neigh_ifdown(tbl, dev, true);
486 	return 0;
487 }
488 EXPORT_SYMBOL(neigh_carrier_down);
489 
490 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
491 {
492 	__neigh_ifdown(tbl, dev, false);
493 	return 0;
494 }
495 EXPORT_SYMBOL(neigh_ifdown);
496 
497 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
498 				     struct net_device *dev,
499 				     u32 flags, bool exempt_from_gc)
500 {
501 	struct neighbour *n = NULL;
502 	unsigned long now = jiffies;
503 	int entries, gc_thresh3;
504 
505 	if (exempt_from_gc)
506 		goto do_alloc;
507 
508 	entries = atomic_inc_return(&tbl->gc_entries) - 1;
509 	gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
510 	if (entries >= gc_thresh3 ||
511 	    (entries >= READ_ONCE(tbl->gc_thresh2) &&
512 	     time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
513 		if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
514 			net_info_ratelimited("%s: neighbor table overflow!\n",
515 					     tbl->id);
516 			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
517 			goto out_entries;
518 		}
519 	}
520 
521 do_alloc:
522 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
523 	if (!n)
524 		goto out_entries;
525 
526 	__skb_queue_head_init(&n->arp_queue);
527 	rwlock_init(&n->lock);
528 	seqlock_init(&n->ha_lock);
529 	n->updated	  = n->used = now;
530 	n->nud_state	  = NUD_NONE;
531 	n->output	  = neigh_blackhole;
532 	n->flags	  = flags;
533 	seqlock_init(&n->hh.hh_lock);
534 	n->parms	  = neigh_parms_clone(&tbl->parms);
535 	timer_setup(&n->timer, neigh_timer_handler, 0);
536 
537 	NEIGH_CACHE_STAT_INC(tbl, allocs);
538 	n->tbl		  = tbl;
539 	refcount_set(&n->refcnt, 1);
540 	n->dead		  = 1;
541 	INIT_LIST_HEAD(&n->gc_list);
542 	INIT_LIST_HEAD(&n->managed_list);
543 
544 	atomic_inc(&tbl->entries);
545 out:
546 	return n;
547 
548 out_entries:
549 	if (!exempt_from_gc)
550 		atomic_dec(&tbl->gc_entries);
551 	goto out;
552 }
553 
554 static void neigh_get_hash_rnd(u32 *x)
555 {
556 	*x = get_random_u32() | 1;
557 }
558 
559 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
560 {
561 	size_t size = (1 << shift) * sizeof(struct hlist_head);
562 	struct hlist_head *hash_heads;
563 	struct neigh_hash_table *ret;
564 	int i;
565 
566 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
567 	if (!ret)
568 		return NULL;
569 
570 	hash_heads = kzalloc(size, GFP_ATOMIC);
571 	if (!hash_heads) {
572 		kfree(ret);
573 		return NULL;
574 	}
575 	ret->hash_heads = hash_heads;
576 	ret->hash_shift = shift;
577 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
578 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
579 	return ret;
580 }
581 
582 static void neigh_hash_free_rcu(struct rcu_head *head)
583 {
584 	struct neigh_hash_table *nht = container_of(head,
585 						    struct neigh_hash_table,
586 						    rcu);
587 
588 	kfree(nht->hash_heads);
589 	kfree(nht);
590 }
591 
592 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
593 						unsigned long new_shift)
594 {
595 	unsigned int i, hash;
596 	struct neigh_hash_table *new_nht, *old_nht;
597 
598 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
599 
600 	old_nht = rcu_dereference_protected(tbl->nht,
601 					    lockdep_is_held(&tbl->lock));
602 	new_nht = neigh_hash_alloc(new_shift);
603 	if (!new_nht)
604 		return old_nht;
605 
606 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
607 		struct hlist_node *tmp;
608 		struct neighbour *n;
609 
610 		neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
611 			hash = tbl->hash(n->primary_key, n->dev,
612 					 new_nht->hash_rnd);
613 
614 			hash >>= (32 - new_nht->hash_shift);
615 
616 			hlist_del_rcu(&n->hash);
617 			hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
618 		}
619 	}
620 
621 	rcu_assign_pointer(tbl->nht, new_nht);
622 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
623 	return new_nht;
624 }
625 
626 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
627 			       struct net_device *dev)
628 {
629 	struct neighbour *n;
630 
631 	NEIGH_CACHE_STAT_INC(tbl, lookups);
632 
633 	rcu_read_lock();
634 	n = __neigh_lookup_noref(tbl, pkey, dev);
635 	if (n) {
636 		if (!refcount_inc_not_zero(&n->refcnt))
637 			n = NULL;
638 		NEIGH_CACHE_STAT_INC(tbl, hits);
639 	}
640 
641 	rcu_read_unlock();
642 	return n;
643 }
644 EXPORT_SYMBOL(neigh_lookup);
645 
646 static struct neighbour *
647 ___neigh_create(struct neigh_table *tbl, const void *pkey,
648 		struct net_device *dev, u32 flags,
649 		bool exempt_from_gc, bool want_ref)
650 {
651 	u32 hash_val, key_len = tbl->key_len;
652 	struct neighbour *n1, *rc, *n;
653 	struct neigh_hash_table *nht;
654 	int error;
655 
656 	n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
657 	trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
658 	if (!n) {
659 		rc = ERR_PTR(-ENOBUFS);
660 		goto out;
661 	}
662 
663 	memcpy(n->primary_key, pkey, key_len);
664 	n->dev = dev;
665 	netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
666 
667 	/* Protocol specific setup. */
668 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
669 		rc = ERR_PTR(error);
670 		goto out_neigh_release;
671 	}
672 
673 	if (dev->netdev_ops->ndo_neigh_construct) {
674 		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
675 		if (error < 0) {
676 			rc = ERR_PTR(error);
677 			goto out_neigh_release;
678 		}
679 	}
680 
681 	/* Device specific setup. */
682 	if (n->parms->neigh_setup &&
683 	    (error = n->parms->neigh_setup(n)) < 0) {
684 		rc = ERR_PTR(error);
685 		goto out_neigh_release;
686 	}
687 
688 	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
689 
690 	write_lock_bh(&tbl->lock);
691 	nht = rcu_dereference_protected(tbl->nht,
692 					lockdep_is_held(&tbl->lock));
693 
694 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
695 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
696 
697 	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
698 
699 	if (n->parms->dead) {
700 		rc = ERR_PTR(-EINVAL);
701 		goto out_tbl_unlock;
702 	}
703 
704 	neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
705 		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
706 			if (want_ref)
707 				neigh_hold(n1);
708 			rc = n1;
709 			goto out_tbl_unlock;
710 		}
711 	}
712 
713 	n->dead = 0;
714 	if (!exempt_from_gc)
715 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
716 	if (n->flags & NTF_MANAGED)
717 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
718 	if (want_ref)
719 		neigh_hold(n);
720 	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
721 
722 	hlist_add_head_rcu(&n->dev_list,
723 			   neigh_get_dev_table(dev, tbl->family));
724 
725 	write_unlock_bh(&tbl->lock);
726 	neigh_dbg(2, "neigh %p is created\n", n);
727 	rc = n;
728 out:
729 	return rc;
730 out_tbl_unlock:
731 	write_unlock_bh(&tbl->lock);
732 out_neigh_release:
733 	if (!exempt_from_gc)
734 		atomic_dec(&tbl->gc_entries);
735 	neigh_release(n);
736 	goto out;
737 }
738 
739 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
740 				 struct net_device *dev, bool want_ref)
741 {
742 	bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
743 
744 	return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
745 }
746 EXPORT_SYMBOL(__neigh_create);
747 
748 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
749 {
750 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
751 	hash_val ^= (hash_val >> 16);
752 	hash_val ^= hash_val >> 8;
753 	hash_val ^= hash_val >> 4;
754 	hash_val &= PNEIGH_HASHMASK;
755 	return hash_val;
756 }
757 
758 struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
759 				   struct net *net, const void *pkey,
760 				   struct net_device *dev)
761 {
762 	struct pneigh_entry *n;
763 	unsigned int key_len;
764 	u32 hash_val;
765 
766 	key_len = tbl->key_len;
767 	hash_val = pneigh_hash(pkey, key_len);
768 	n = rcu_dereference_check(tbl->phash_buckets[hash_val],
769 				  lockdep_is_held(&tbl->phash_lock));
770 
771 	while (n) {
772 		if (!memcmp(n->key, pkey, key_len) &&
773 		    net_eq(pneigh_net(n), net) &&
774 		    (n->dev == dev || !n->dev))
775 			return n;
776 
777 		n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
778 	}
779 
780 	return NULL;
781 }
782 EXPORT_IPV6_MOD(pneigh_lookup);
783 
784 int pneigh_create(struct neigh_table *tbl, struct net *net,
785 		  const void *pkey, struct net_device *dev,
786 		  u32 flags, u8 protocol, bool permanent)
787 {
788 	struct pneigh_entry *n;
789 	unsigned int key_len;
790 	u32 hash_val;
791 	int err = 0;
792 
793 	mutex_lock(&tbl->phash_lock);
794 
795 	n = pneigh_lookup(tbl, net, pkey, dev);
796 	if (n)
797 		goto update;
798 
799 	key_len = tbl->key_len;
800 	n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
801 	if (!n) {
802 		err = -ENOBUFS;
803 		goto out;
804 	}
805 
806 	write_pnet(&n->net, net);
807 	memcpy(n->key, pkey, key_len);
808 	n->dev = dev;
809 	netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
810 
811 	if (tbl->pconstructor && tbl->pconstructor(n)) {
812 		netdev_put(dev, &n->dev_tracker);
813 		kfree(n);
814 		err = -ENOBUFS;
815 		goto out;
816 	}
817 
818 	hash_val = pneigh_hash(pkey, key_len);
819 	n->next = tbl->phash_buckets[hash_val];
820 	rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
821 update:
822 	WRITE_ONCE(n->flags, flags);
823 	n->permanent = permanent;
824 	WRITE_ONCE(n->protocol, protocol);
825 out:
826 	mutex_unlock(&tbl->phash_lock);
827 	return err;
828 }
829 
830 static void pneigh_destroy(struct rcu_head *rcu)
831 {
832 	struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
833 
834 	netdev_put(n->dev, &n->dev_tracker);
835 	kfree(n);
836 }
837 
838 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
839 		  struct net_device *dev)
840 {
841 	struct pneigh_entry *n, __rcu **np;
842 	unsigned int key_len;
843 	u32 hash_val;
844 
845 	key_len = tbl->key_len;
846 	hash_val = pneigh_hash(pkey, key_len);
847 
848 	mutex_lock(&tbl->phash_lock);
849 
850 	for (np = &tbl->phash_buckets[hash_val];
851 	     (n = rcu_dereference_protected(*np, 1)) != NULL;
852 	     np = &n->next) {
853 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
854 		    net_eq(pneigh_net(n), net)) {
855 			rcu_assign_pointer(*np, n->next);
856 
857 			mutex_unlock(&tbl->phash_lock);
858 
859 			if (tbl->pdestructor)
860 				tbl->pdestructor(n);
861 
862 			call_rcu(&n->rcu, pneigh_destroy);
863 			return 0;
864 		}
865 	}
866 
867 	mutex_unlock(&tbl->phash_lock);
868 	return -ENOENT;
869 }
870 
871 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
872 			  bool skip_perm)
873 {
874 	struct pneigh_entry *n, __rcu **np;
875 	LIST_HEAD(head);
876 	u32 h;
877 
878 	mutex_lock(&tbl->phash_lock);
879 
880 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
881 		np = &tbl->phash_buckets[h];
882 		while ((n = rcu_dereference_protected(*np, 1)) != NULL) {
883 			if (skip_perm && n->permanent)
884 				goto skip;
885 			if (!dev || n->dev == dev) {
886 				rcu_assign_pointer(*np, n->next);
887 				list_add(&n->free_node, &head);
888 				continue;
889 			}
890 skip:
891 			np = &n->next;
892 		}
893 	}
894 
895 	mutex_unlock(&tbl->phash_lock);
896 
897 	while (!list_empty(&head)) {
898 		n = list_first_entry(&head, typeof(*n), free_node);
899 		list_del(&n->free_node);
900 
901 		if (tbl->pdestructor)
902 			tbl->pdestructor(n);
903 
904 		call_rcu(&n->rcu, pneigh_destroy);
905 	}
906 }
907 
908 static inline void neigh_parms_put(struct neigh_parms *parms)
909 {
910 	if (refcount_dec_and_test(&parms->refcnt))
911 		kfree(parms);
912 }
913 
914 /*
915  *	neighbour must already be out of the table;
916  *
917  */
918 void neigh_destroy(struct neighbour *neigh)
919 {
920 	struct net_device *dev = neigh->dev;
921 
922 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
923 
924 	if (!neigh->dead) {
925 		pr_warn("Destroying alive neighbour %p\n", neigh);
926 		dump_stack();
927 		return;
928 	}
929 
930 	if (neigh_del_timer(neigh))
931 		pr_warn("Impossible event\n");
932 
933 	write_lock_bh(&neigh->lock);
934 	__skb_queue_purge(&neigh->arp_queue);
935 	write_unlock_bh(&neigh->lock);
936 	neigh->arp_queue_len_bytes = 0;
937 
938 	if (dev->netdev_ops->ndo_neigh_destroy)
939 		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
940 
941 	netdev_put(dev, &neigh->dev_tracker);
942 	neigh_parms_put(neigh->parms);
943 
944 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
945 
946 	atomic_dec(&neigh->tbl->entries);
947 	kfree_rcu(neigh, rcu);
948 }
949 EXPORT_SYMBOL(neigh_destroy);
950 
951 /* Neighbour state is suspicious;
952    disable fast path.
953 
954    Called with write_locked neigh.
955  */
956 static void neigh_suspect(struct neighbour *neigh)
957 {
958 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
959 
960 	WRITE_ONCE(neigh->output, neigh->ops->output);
961 }
962 
963 /* Neighbour state is OK;
964    enable fast path.
965 
966    Called with write_locked neigh.
967  */
968 static void neigh_connect(struct neighbour *neigh)
969 {
970 	neigh_dbg(2, "neigh %p is connected\n", neigh);
971 
972 	WRITE_ONCE(neigh->output, neigh->ops->connected_output);
973 }
974 
975 static void neigh_periodic_work(struct work_struct *work)
976 {
977 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
978 	struct neigh_hash_table *nht;
979 	struct hlist_node *tmp;
980 	struct neighbour *n;
981 	unsigned int i;
982 
983 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
984 
985 	write_lock_bh(&tbl->lock);
986 	nht = rcu_dereference_protected(tbl->nht,
987 					lockdep_is_held(&tbl->lock));
988 
989 	/*
990 	 *	periodically recompute ReachableTime from random function
991 	 */
992 
993 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
994 		struct neigh_parms *p;
995 
996 		WRITE_ONCE(tbl->last_rand, jiffies);
997 		list_for_each_entry(p, &tbl->parms_list, list)
998 			p->reachable_time =
999 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1000 	}
1001 
1002 	if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
1003 		goto out;
1004 
1005 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
1006 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
1007 			unsigned int state;
1008 
1009 			write_lock(&n->lock);
1010 
1011 			state = n->nud_state;
1012 			if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
1013 			    (n->flags &
1014 			     (NTF_EXT_LEARNED | NTF_EXT_VALIDATED))) {
1015 				write_unlock(&n->lock);
1016 				continue;
1017 			}
1018 
1019 			if (time_before(n->used, n->confirmed) &&
1020 			    time_is_before_eq_jiffies(n->confirmed))
1021 				n->used = n->confirmed;
1022 
1023 			if (refcount_read(&n->refcnt) == 1 &&
1024 			    (state == NUD_FAILED ||
1025 			     !time_in_range_open(jiffies, n->used,
1026 						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
1027 				hlist_del_rcu(&n->hash);
1028 				hlist_del_rcu(&n->dev_list);
1029 				neigh_mark_dead(n);
1030 				write_unlock(&n->lock);
1031 				neigh_cleanup_and_release(n);
1032 				continue;
1033 			}
1034 			write_unlock(&n->lock);
1035 		}
1036 		/*
1037 		 * It's fine to release lock here, even if hash table
1038 		 * grows while we are preempted.
1039 		 */
1040 		write_unlock_bh(&tbl->lock);
1041 		cond_resched();
1042 		write_lock_bh(&tbl->lock);
1043 		nht = rcu_dereference_protected(tbl->nht,
1044 						lockdep_is_held(&tbl->lock));
1045 	}
1046 out:
1047 	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
1048 	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
1049 	 * BASE_REACHABLE_TIME.
1050 	 */
1051 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1052 			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
1053 	write_unlock_bh(&tbl->lock);
1054 }
1055 
1056 static __inline__ int neigh_max_probes(struct neighbour *n)
1057 {
1058 	struct neigh_parms *p = n->parms;
1059 	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1060 	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1061 	        NEIGH_VAR(p, MCAST_PROBES));
1062 }
1063 
1064 static void neigh_invalidate(struct neighbour *neigh)
1065 	__releases(neigh->lock)
1066 	__acquires(neigh->lock)
1067 {
1068 	struct sk_buff *skb;
1069 
1070 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1071 	neigh_dbg(2, "neigh %p is failed\n", neigh);
1072 	neigh->updated = jiffies;
1073 
1074 	/* It is very thin place. report_unreachable is very complicated
1075 	   routine. Particularly, it can hit the same neighbour entry!
1076 
1077 	   So that, we try to be accurate and avoid dead loop. --ANK
1078 	 */
1079 	while (neigh->nud_state == NUD_FAILED &&
1080 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1081 		write_unlock(&neigh->lock);
1082 		neigh->ops->error_report(neigh, skb);
1083 		write_lock(&neigh->lock);
1084 	}
1085 	__skb_queue_purge(&neigh->arp_queue);
1086 	neigh->arp_queue_len_bytes = 0;
1087 }
1088 
1089 static void neigh_probe(struct neighbour *neigh)
1090 	__releases(neigh->lock)
1091 {
1092 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1093 	/* keep skb alive even if arp_queue overflows */
1094 	if (skb)
1095 		skb = skb_clone(skb, GFP_ATOMIC);
1096 	write_unlock(&neigh->lock);
1097 	if (neigh->ops->solicit)
1098 		neigh->ops->solicit(neigh, skb);
1099 	atomic_inc(&neigh->probes);
1100 	consume_skb(skb);
1101 }
1102 
1103 /* Called when a timer expires for a neighbour entry. */
1104 
1105 static void neigh_timer_handler(struct timer_list *t)
1106 {
1107 	unsigned long now, next;
1108 	struct neighbour *neigh = timer_container_of(neigh, t, timer);
1109 	unsigned int state;
1110 	int notify = 0;
1111 
1112 	write_lock(&neigh->lock);
1113 
1114 	state = neigh->nud_state;
1115 	now = jiffies;
1116 	next = now + HZ;
1117 
1118 	if (!(state & NUD_IN_TIMER))
1119 		goto out;
1120 
1121 	if (state & NUD_REACHABLE) {
1122 		if (time_before_eq(now,
1123 				   neigh->confirmed + neigh->parms->reachable_time)) {
1124 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
1125 			next = neigh->confirmed + neigh->parms->reachable_time;
1126 		} else if (time_before_eq(now,
1127 					  neigh->used +
1128 					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1129 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
1130 			WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1131 			neigh->updated = jiffies;
1132 			neigh_suspect(neigh);
1133 			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1134 		} else {
1135 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
1136 			WRITE_ONCE(neigh->nud_state, NUD_STALE);
1137 			neigh->updated = jiffies;
1138 			neigh_suspect(neigh);
1139 			notify = 1;
1140 		}
1141 	} else if (state & NUD_DELAY) {
1142 		if (time_before_eq(now,
1143 				   neigh->confirmed +
1144 				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1145 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1146 			WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
1147 			neigh->updated = jiffies;
1148 			neigh_connect(neigh);
1149 			notify = 1;
1150 			next = neigh->confirmed + neigh->parms->reachable_time;
1151 		} else {
1152 			neigh_dbg(2, "neigh %p is probed\n", neigh);
1153 			WRITE_ONCE(neigh->nud_state, NUD_PROBE);
1154 			neigh->updated = jiffies;
1155 			atomic_set(&neigh->probes, 0);
1156 			notify = 1;
1157 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1158 					 HZ/100);
1159 		}
1160 	} else {
1161 		/* NUD_PROBE|NUD_INCOMPLETE */
1162 		next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1163 	}
1164 
1165 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1166 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1167 		if (neigh->nud_state == NUD_PROBE &&
1168 		    neigh->flags & NTF_EXT_VALIDATED) {
1169 			WRITE_ONCE(neigh->nud_state, NUD_STALE);
1170 			neigh->updated = jiffies;
1171 		} else {
1172 			WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1173 			neigh_invalidate(neigh);
1174 		}
1175 		notify = 1;
1176 		goto out;
1177 	}
1178 
1179 	if (neigh->nud_state & NUD_IN_TIMER) {
1180 		if (time_before(next, jiffies + HZ/100))
1181 			next = jiffies + HZ/100;
1182 		if (!mod_timer(&neigh->timer, next))
1183 			neigh_hold(neigh);
1184 	}
1185 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1186 		neigh_probe(neigh);
1187 	} else {
1188 out:
1189 		write_unlock(&neigh->lock);
1190 	}
1191 
1192 	if (notify)
1193 		neigh_update_notify(neigh, 0);
1194 
1195 	trace_neigh_timer_handler(neigh, 0);
1196 
1197 	neigh_release(neigh);
1198 }
1199 
1200 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1201 		       const bool immediate_ok)
1202 {
1203 	int rc;
1204 	bool immediate_probe = false;
1205 
1206 	write_lock_bh(&neigh->lock);
1207 
1208 	rc = 0;
1209 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1210 		goto out_unlock_bh;
1211 	if (neigh->dead)
1212 		goto out_dead;
1213 
1214 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1215 		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1216 		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1217 			unsigned long next, now = jiffies;
1218 
1219 			atomic_set(&neigh->probes,
1220 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1221 			neigh_del_timer(neigh);
1222 			WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1223 			neigh->updated = now;
1224 			if (!immediate_ok) {
1225 				next = now + 1;
1226 			} else {
1227 				immediate_probe = true;
1228 				next = now + max(NEIGH_VAR(neigh->parms,
1229 							   RETRANS_TIME),
1230 						 HZ / 100);
1231 			}
1232 			neigh_add_timer(neigh, next);
1233 		} else {
1234 			WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1235 			neigh->updated = jiffies;
1236 			write_unlock_bh(&neigh->lock);
1237 
1238 			kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1239 			return 1;
1240 		}
1241 	} else if (neigh->nud_state & NUD_STALE) {
1242 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
1243 		neigh_del_timer(neigh);
1244 		WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1245 		neigh->updated = jiffies;
1246 		neigh_add_timer(neigh, jiffies +
1247 				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1248 	}
1249 
1250 	if (neigh->nud_state == NUD_INCOMPLETE) {
1251 		if (skb) {
1252 			while (neigh->arp_queue_len_bytes + skb->truesize >
1253 			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1254 				struct sk_buff *buff;
1255 
1256 				buff = __skb_dequeue(&neigh->arp_queue);
1257 				if (!buff)
1258 					break;
1259 				neigh->arp_queue_len_bytes -= buff->truesize;
1260 				kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1261 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1262 			}
1263 			skb_dst_force(skb);
1264 			__skb_queue_tail(&neigh->arp_queue, skb);
1265 			neigh->arp_queue_len_bytes += skb->truesize;
1266 		}
1267 		rc = 1;
1268 	}
1269 out_unlock_bh:
1270 	if (immediate_probe)
1271 		neigh_probe(neigh);
1272 	else
1273 		write_unlock(&neigh->lock);
1274 	local_bh_enable();
1275 	trace_neigh_event_send_done(neigh, rc);
1276 	return rc;
1277 
1278 out_dead:
1279 	if (neigh->nud_state & NUD_STALE)
1280 		goto out_unlock_bh;
1281 	write_unlock_bh(&neigh->lock);
1282 	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1283 	trace_neigh_event_send_dead(neigh, 1);
1284 	return 1;
1285 }
1286 EXPORT_SYMBOL(__neigh_event_send);
1287 
1288 static void neigh_update_hhs(struct neighbour *neigh)
1289 {
1290 	struct hh_cache *hh;
1291 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1292 		= NULL;
1293 
1294 	if (neigh->dev->header_ops)
1295 		update = neigh->dev->header_ops->cache_update;
1296 
1297 	if (update) {
1298 		hh = &neigh->hh;
1299 		if (READ_ONCE(hh->hh_len)) {
1300 			write_seqlock_bh(&hh->hh_lock);
1301 			update(hh, neigh->dev, neigh->ha);
1302 			write_sequnlock_bh(&hh->hh_lock);
1303 		}
1304 	}
1305 }
1306 
1307 /* Generic update routine.
1308    -- lladdr is new lladdr or NULL, if it is not supplied.
1309    -- new    is new state.
1310    -- flags
1311 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1312 				if it is different.
1313 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1314 				lladdr instead of overriding it
1315 				if it is different.
1316 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1317 	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
1318 	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
1319 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1320 				NTF_ROUTER flag.
1321 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1322 				a router.
1323 	NEIGH_UPDATE_F_EXT_VALIDATED means that the entry will not be removed
1324 				or invalidated.
1325 
1326    Caller MUST hold reference count on the entry.
1327  */
1328 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1329 			  u8 new, u32 flags, u32 nlmsg_pid,
1330 			  struct netlink_ext_ack *extack)
1331 {
1332 	bool gc_update = false, managed_update = false;
1333 	int update_isrouter = 0;
1334 	struct net_device *dev;
1335 	int err, notify = 0;
1336 	u8 old;
1337 
1338 	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1339 
1340 	write_lock_bh(&neigh->lock);
1341 
1342 	dev    = neigh->dev;
1343 	old    = neigh->nud_state;
1344 	err    = -EPERM;
1345 
1346 	if (neigh->dead) {
1347 		NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1348 		new = old;
1349 		goto out;
1350 	}
1351 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1352 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1353 		goto out;
1354 
1355 	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
1356 	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1357 		new = old & ~NUD_PERMANENT;
1358 		WRITE_ONCE(neigh->nud_state, new);
1359 		err = 0;
1360 		goto out;
1361 	}
1362 
1363 	if (!(new & NUD_VALID)) {
1364 		neigh_del_timer(neigh);
1365 		if (old & NUD_CONNECTED)
1366 			neigh_suspect(neigh);
1367 		WRITE_ONCE(neigh->nud_state, new);
1368 		err = 0;
1369 		notify = old & NUD_VALID;
1370 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1371 		    (new & NUD_FAILED)) {
1372 			neigh_invalidate(neigh);
1373 			notify = 1;
1374 		}
1375 		goto out;
1376 	}
1377 
1378 	/* Compare new lladdr with cached one */
1379 	if (!dev->addr_len) {
1380 		/* First case: device needs no address. */
1381 		lladdr = neigh->ha;
1382 	} else if (lladdr) {
1383 		/* The second case: if something is already cached
1384 		   and a new address is proposed:
1385 		   - compare new & old
1386 		   - if they are different, check override flag
1387 		 */
1388 		if ((old & NUD_VALID) &&
1389 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1390 			lladdr = neigh->ha;
1391 	} else {
1392 		/* No address is supplied; if we know something,
1393 		   use it, otherwise discard the request.
1394 		 */
1395 		err = -EINVAL;
1396 		if (!(old & NUD_VALID)) {
1397 			NL_SET_ERR_MSG(extack, "No link layer address given");
1398 			goto out;
1399 		}
1400 		lladdr = neigh->ha;
1401 	}
1402 
1403 	/* Update confirmed timestamp for neighbour entry after we
1404 	 * received ARP packet even if it doesn't change IP to MAC binding.
1405 	 */
1406 	if (new & NUD_CONNECTED)
1407 		neigh->confirmed = jiffies;
1408 
1409 	/* If entry was valid and address is not changed,
1410 	   do not change entry state, if new one is STALE.
1411 	 */
1412 	err = 0;
1413 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1414 	if (old & NUD_VALID) {
1415 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1416 			update_isrouter = 0;
1417 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1418 			    (old & NUD_CONNECTED)) {
1419 				lladdr = neigh->ha;
1420 				new = NUD_STALE;
1421 			} else
1422 				goto out;
1423 		} else {
1424 			if (lladdr == neigh->ha && new == NUD_STALE &&
1425 			    !(flags & NEIGH_UPDATE_F_ADMIN))
1426 				new = old;
1427 		}
1428 	}
1429 
1430 	/* Update timestamp only once we know we will make a change to the
1431 	 * neighbour entry. Otherwise we risk to move the locktime window with
1432 	 * noop updates and ignore relevant ARP updates.
1433 	 */
1434 	if (new != old || lladdr != neigh->ha)
1435 		neigh->updated = jiffies;
1436 
1437 	if (new != old) {
1438 		neigh_del_timer(neigh);
1439 		if (new & NUD_PROBE)
1440 			atomic_set(&neigh->probes, 0);
1441 		if (new & NUD_IN_TIMER)
1442 			neigh_add_timer(neigh, (jiffies +
1443 						((new & NUD_REACHABLE) ?
1444 						 neigh->parms->reachable_time :
1445 						 0)));
1446 		WRITE_ONCE(neigh->nud_state, new);
1447 		notify = 1;
1448 	}
1449 
1450 	if (lladdr != neigh->ha) {
1451 		write_seqlock(&neigh->ha_lock);
1452 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1453 		write_sequnlock(&neigh->ha_lock);
1454 		neigh_update_hhs(neigh);
1455 		if (!(new & NUD_CONNECTED))
1456 			neigh->confirmed = jiffies -
1457 				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1458 		notify = 1;
1459 	}
1460 	if (new == old)
1461 		goto out;
1462 	if (new & NUD_CONNECTED)
1463 		neigh_connect(neigh);
1464 	else
1465 		neigh_suspect(neigh);
1466 	if (!(old & NUD_VALID)) {
1467 		struct sk_buff *skb;
1468 
1469 		/* Again: avoid dead loop if something went wrong */
1470 
1471 		while (neigh->nud_state & NUD_VALID &&
1472 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1473 			struct dst_entry *dst = skb_dst(skb);
1474 			struct neighbour *n2, *n1 = neigh;
1475 			write_unlock_bh(&neigh->lock);
1476 
1477 			rcu_read_lock();
1478 
1479 			/* Why not just use 'neigh' as-is?  The problem is that
1480 			 * things such as shaper, eql, and sch_teql can end up
1481 			 * using alternative, different, neigh objects to output
1482 			 * the packet in the output path.  So what we need to do
1483 			 * here is re-lookup the top-level neigh in the path so
1484 			 * we can reinject the packet there.
1485 			 */
1486 			n2 = NULL;
1487 			if (dst &&
1488 			    READ_ONCE(dst->obsolete) != DST_OBSOLETE_DEAD) {
1489 				n2 = dst_neigh_lookup_skb(dst, skb);
1490 				if (n2)
1491 					n1 = n2;
1492 			}
1493 			READ_ONCE(n1->output)(n1, skb);
1494 			if (n2)
1495 				neigh_release(n2);
1496 			rcu_read_unlock();
1497 
1498 			write_lock_bh(&neigh->lock);
1499 		}
1500 		__skb_queue_purge(&neigh->arp_queue);
1501 		neigh->arp_queue_len_bytes = 0;
1502 	}
1503 out:
1504 	if (update_isrouter)
1505 		neigh_update_is_router(neigh, flags, &notify);
1506 	write_unlock_bh(&neigh->lock);
1507 	if (((new ^ old) & NUD_PERMANENT) || gc_update)
1508 		neigh_update_gc_list(neigh);
1509 	if (managed_update)
1510 		neigh_update_managed_list(neigh);
1511 	if (notify)
1512 		neigh_update_notify(neigh, nlmsg_pid);
1513 	trace_neigh_update_done(neigh, err);
1514 	return err;
1515 }
1516 
1517 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1518 		 u32 flags, u32 nlmsg_pid)
1519 {
1520 	return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1521 }
1522 EXPORT_SYMBOL(neigh_update);
1523 
1524 /* Update the neigh to listen temporarily for probe responses, even if it is
1525  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1526  */
1527 void __neigh_set_probe_once(struct neighbour *neigh)
1528 {
1529 	if (neigh->dead)
1530 		return;
1531 	neigh->updated = jiffies;
1532 	if (!(neigh->nud_state & NUD_FAILED))
1533 		return;
1534 	WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1535 	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1536 	neigh_add_timer(neigh,
1537 			jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1538 				      HZ/100));
1539 }
1540 EXPORT_SYMBOL(__neigh_set_probe_once);
1541 
1542 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1543 				 u8 *lladdr, void *saddr,
1544 				 struct net_device *dev)
1545 {
1546 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1547 						 lladdr || !dev->addr_len);
1548 	if (neigh)
1549 		neigh_update(neigh, lladdr, NUD_STALE,
1550 			     NEIGH_UPDATE_F_OVERRIDE, 0);
1551 	return neigh;
1552 }
1553 EXPORT_SYMBOL(neigh_event_ns);
1554 
1555 /* called with read_lock_bh(&n->lock); */
1556 static void neigh_hh_init(struct neighbour *n)
1557 {
1558 	struct net_device *dev = n->dev;
1559 	__be16 prot = n->tbl->protocol;
1560 	struct hh_cache	*hh = &n->hh;
1561 
1562 	write_lock_bh(&n->lock);
1563 
1564 	/* Only one thread can come in here and initialize the
1565 	 * hh_cache entry.
1566 	 */
1567 	if (!hh->hh_len)
1568 		dev->header_ops->cache(n, hh, prot);
1569 
1570 	write_unlock_bh(&n->lock);
1571 }
1572 
1573 /* Slow and careful. */
1574 
1575 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1576 {
1577 	int rc = 0;
1578 
1579 	if (!neigh_event_send(neigh, skb)) {
1580 		int err;
1581 		struct net_device *dev = neigh->dev;
1582 		unsigned int seq;
1583 
1584 		if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1585 			neigh_hh_init(neigh);
1586 
1587 		do {
1588 			__skb_pull(skb, skb_network_offset(skb));
1589 			seq = read_seqbegin(&neigh->ha_lock);
1590 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1591 					      neigh->ha, NULL, skb->len);
1592 		} while (read_seqretry(&neigh->ha_lock, seq));
1593 
1594 		if (err >= 0)
1595 			rc = dev_queue_xmit(skb);
1596 		else
1597 			goto out_kfree_skb;
1598 	}
1599 out:
1600 	return rc;
1601 out_kfree_skb:
1602 	rc = -EINVAL;
1603 	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1604 	goto out;
1605 }
1606 EXPORT_SYMBOL(neigh_resolve_output);
1607 
1608 /* As fast as possible without hh cache */
1609 
1610 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1611 {
1612 	struct net_device *dev = neigh->dev;
1613 	unsigned int seq;
1614 	int err;
1615 
1616 	do {
1617 		__skb_pull(skb, skb_network_offset(skb));
1618 		seq = read_seqbegin(&neigh->ha_lock);
1619 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1620 				      neigh->ha, NULL, skb->len);
1621 	} while (read_seqretry(&neigh->ha_lock, seq));
1622 
1623 	if (err >= 0)
1624 		err = dev_queue_xmit(skb);
1625 	else {
1626 		err = -EINVAL;
1627 		kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1628 	}
1629 	return err;
1630 }
1631 EXPORT_SYMBOL(neigh_connected_output);
1632 
1633 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1634 {
1635 	return dev_queue_xmit(skb);
1636 }
1637 EXPORT_SYMBOL(neigh_direct_output);
1638 
1639 static void neigh_managed_work(struct work_struct *work)
1640 {
1641 	struct neigh_table *tbl = container_of(work, struct neigh_table,
1642 					       managed_work.work);
1643 	struct neighbour *neigh;
1644 
1645 	write_lock_bh(&tbl->lock);
1646 	list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1647 		neigh_event_send_probe(neigh, NULL, false);
1648 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1649 			   NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1650 	write_unlock_bh(&tbl->lock);
1651 }
1652 
1653 static void neigh_proxy_process(struct timer_list *t)
1654 {
1655 	struct neigh_table *tbl = timer_container_of(tbl, t, proxy_timer);
1656 	long sched_next = 0;
1657 	unsigned long now = jiffies;
1658 	struct sk_buff *skb, *n;
1659 
1660 	spin_lock(&tbl->proxy_queue.lock);
1661 
1662 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1663 		long tdif = NEIGH_CB(skb)->sched_next - now;
1664 
1665 		if (tdif <= 0) {
1666 			struct net_device *dev = skb->dev;
1667 
1668 			neigh_parms_qlen_dec(dev, tbl->family);
1669 			__skb_unlink(skb, &tbl->proxy_queue);
1670 
1671 			if (tbl->proxy_redo && netif_running(dev)) {
1672 				rcu_read_lock();
1673 				tbl->proxy_redo(skb);
1674 				rcu_read_unlock();
1675 			} else {
1676 				kfree_skb(skb);
1677 			}
1678 
1679 			dev_put(dev);
1680 		} else if (!sched_next || tdif < sched_next)
1681 			sched_next = tdif;
1682 	}
1683 	timer_delete(&tbl->proxy_timer);
1684 	if (sched_next)
1685 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1686 	spin_unlock(&tbl->proxy_queue.lock);
1687 }
1688 
1689 static unsigned long neigh_proxy_delay(struct neigh_parms *p)
1690 {
1691 	/* If proxy_delay is zero, do not call get_random_u32_below()
1692 	 * as it is undefined behavior.
1693 	 */
1694 	unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
1695 
1696 	return proxy_delay ?
1697 	       jiffies + get_random_u32_below(proxy_delay) : jiffies;
1698 }
1699 
1700 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1701 		    struct sk_buff *skb)
1702 {
1703 	unsigned long sched_next = neigh_proxy_delay(p);
1704 
1705 	if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1706 		kfree_skb(skb);
1707 		return;
1708 	}
1709 
1710 	NEIGH_CB(skb)->sched_next = sched_next;
1711 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1712 
1713 	spin_lock(&tbl->proxy_queue.lock);
1714 	if (timer_delete(&tbl->proxy_timer)) {
1715 		if (time_before(tbl->proxy_timer.expires, sched_next))
1716 			sched_next = tbl->proxy_timer.expires;
1717 	}
1718 	skb_dst_drop(skb);
1719 	dev_hold(skb->dev);
1720 	__skb_queue_tail(&tbl->proxy_queue, skb);
1721 	p->qlen++;
1722 	mod_timer(&tbl->proxy_timer, sched_next);
1723 	spin_unlock(&tbl->proxy_queue.lock);
1724 }
1725 EXPORT_SYMBOL(pneigh_enqueue);
1726 
1727 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1728 						      struct net *net, int ifindex)
1729 {
1730 	struct neigh_parms *p;
1731 
1732 	list_for_each_entry(p, &tbl->parms_list, list) {
1733 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1734 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1735 			return p;
1736 	}
1737 
1738 	return NULL;
1739 }
1740 
1741 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1742 				      struct neigh_table *tbl)
1743 {
1744 	struct neigh_parms *p;
1745 	struct net *net = dev_net(dev);
1746 	const struct net_device_ops *ops = dev->netdev_ops;
1747 
1748 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1749 	if (p) {
1750 		p->tbl		  = tbl;
1751 		refcount_set(&p->refcnt, 1);
1752 		p->reachable_time =
1753 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1754 		p->qlen = 0;
1755 		netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1756 		p->dev = dev;
1757 		write_pnet(&p->net, net);
1758 		p->sysctl_table = NULL;
1759 
1760 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1761 			netdev_put(dev, &p->dev_tracker);
1762 			kfree(p);
1763 			return NULL;
1764 		}
1765 
1766 		write_lock_bh(&tbl->lock);
1767 		list_add(&p->list, &tbl->parms.list);
1768 		write_unlock_bh(&tbl->lock);
1769 
1770 		neigh_parms_data_state_cleanall(p);
1771 	}
1772 	return p;
1773 }
1774 EXPORT_SYMBOL(neigh_parms_alloc);
1775 
1776 static void neigh_rcu_free_parms(struct rcu_head *head)
1777 {
1778 	struct neigh_parms *parms =
1779 		container_of(head, struct neigh_parms, rcu_head);
1780 
1781 	neigh_parms_put(parms);
1782 }
1783 
1784 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1785 {
1786 	if (!parms || parms == &tbl->parms)
1787 		return;
1788 	write_lock_bh(&tbl->lock);
1789 	list_del(&parms->list);
1790 	parms->dead = 1;
1791 	write_unlock_bh(&tbl->lock);
1792 	netdev_put(parms->dev, &parms->dev_tracker);
1793 	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1794 }
1795 EXPORT_SYMBOL(neigh_parms_release);
1796 
1797 static struct lock_class_key neigh_table_proxy_queue_class;
1798 
1799 static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1800 
1801 void neigh_table_init(int index, struct neigh_table *tbl)
1802 {
1803 	unsigned long now = jiffies;
1804 	unsigned long phsize;
1805 
1806 	INIT_LIST_HEAD(&tbl->parms_list);
1807 	INIT_LIST_HEAD(&tbl->gc_list);
1808 	INIT_LIST_HEAD(&tbl->managed_list);
1809 
1810 	list_add(&tbl->parms.list, &tbl->parms_list);
1811 	write_pnet(&tbl->parms.net, &init_net);
1812 	refcount_set(&tbl->parms.refcnt, 1);
1813 	tbl->parms.reachable_time =
1814 			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1815 	tbl->parms.qlen = 0;
1816 
1817 	tbl->stats = alloc_percpu(struct neigh_statistics);
1818 	if (!tbl->stats)
1819 		panic("cannot create neighbour cache statistics");
1820 
1821 #ifdef CONFIG_PROC_FS
1822 	if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1823 			      &neigh_stat_seq_ops, tbl))
1824 		panic("cannot create neighbour proc dir entry");
1825 #endif
1826 
1827 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1828 
1829 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1830 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1831 
1832 	if (!tbl->nht || !tbl->phash_buckets)
1833 		panic("cannot allocate neighbour cache hashes");
1834 
1835 	if (!tbl->entry_size)
1836 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1837 					tbl->key_len, NEIGH_PRIV_ALIGN);
1838 	else
1839 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1840 
1841 	rwlock_init(&tbl->lock);
1842 	mutex_init(&tbl->phash_lock);
1843 
1844 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1845 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1846 			tbl->parms.reachable_time);
1847 	INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1848 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1849 
1850 	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1851 	skb_queue_head_init_class(&tbl->proxy_queue,
1852 			&neigh_table_proxy_queue_class);
1853 
1854 	tbl->last_flush = now;
1855 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1856 
1857 	rcu_assign_pointer(neigh_tables[index], tbl);
1858 }
1859 EXPORT_SYMBOL(neigh_table_init);
1860 
1861 /*
1862  * Only called from ndisc_cleanup(), which means this is dead code
1863  * because we no longer can unload IPv6 module.
1864  */
1865 int neigh_table_clear(int index, struct neigh_table *tbl)
1866 {
1867 	RCU_INIT_POINTER(neigh_tables[index], NULL);
1868 	synchronize_rcu();
1869 
1870 	/* It is not clean... Fix it to unload IPv6 module safely */
1871 	cancel_delayed_work_sync(&tbl->managed_work);
1872 	cancel_delayed_work_sync(&tbl->gc_work);
1873 	timer_delete_sync(&tbl->proxy_timer);
1874 	pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
1875 	neigh_ifdown(tbl, NULL);
1876 	if (atomic_read(&tbl->entries))
1877 		pr_crit("neighbour leakage\n");
1878 
1879 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1880 		 neigh_hash_free_rcu);
1881 	tbl->nht = NULL;
1882 
1883 	kfree(tbl->phash_buckets);
1884 	tbl->phash_buckets = NULL;
1885 
1886 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1887 
1888 	free_percpu(tbl->stats);
1889 	tbl->stats = NULL;
1890 
1891 	return 0;
1892 }
1893 EXPORT_SYMBOL(neigh_table_clear);
1894 
1895 static struct neigh_table *neigh_find_table(int family)
1896 {
1897 	struct neigh_table *tbl = NULL;
1898 
1899 	switch (family) {
1900 	case AF_INET:
1901 		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
1902 		break;
1903 	case AF_INET6:
1904 		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
1905 		break;
1906 	}
1907 
1908 	return tbl;
1909 }
1910 
1911 const struct nla_policy nda_policy[NDA_MAX+1] = {
1912 	[NDA_UNSPEC]		= { .strict_start_type = NDA_NH_ID },
1913 	[NDA_DST]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1914 	[NDA_LLADDR]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1915 	[NDA_CACHEINFO]		= { .len = sizeof(struct nda_cacheinfo) },
1916 	[NDA_PROBES]		= { .type = NLA_U32 },
1917 	[NDA_VLAN]		= { .type = NLA_U16 },
1918 	[NDA_PORT]		= { .type = NLA_U16 },
1919 	[NDA_VNI]		= { .type = NLA_U32 },
1920 	[NDA_IFINDEX]		= { .type = NLA_U32 },
1921 	[NDA_MASTER]		= { .type = NLA_U32 },
1922 	[NDA_PROTOCOL]		= { .type = NLA_U8 },
1923 	[NDA_NH_ID]		= { .type = NLA_U32 },
1924 	[NDA_FLAGS_EXT]		= NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1925 	[NDA_FDB_EXT_ATTRS]	= { .type = NLA_NESTED },
1926 };
1927 
1928 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1929 			struct netlink_ext_ack *extack)
1930 {
1931 	struct net *net = sock_net(skb->sk);
1932 	struct ndmsg *ndm;
1933 	struct nlattr *dst_attr;
1934 	struct neigh_table *tbl;
1935 	struct neighbour *neigh;
1936 	struct net_device *dev = NULL;
1937 	int err = -EINVAL;
1938 
1939 	ASSERT_RTNL();
1940 	if (nlmsg_len(nlh) < sizeof(*ndm))
1941 		goto out;
1942 
1943 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1944 	if (!dst_attr) {
1945 		NL_SET_ERR_MSG(extack, "Network address not specified");
1946 		goto out;
1947 	}
1948 
1949 	ndm = nlmsg_data(nlh);
1950 	if (ndm->ndm_ifindex) {
1951 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1952 		if (dev == NULL) {
1953 			err = -ENODEV;
1954 			goto out;
1955 		}
1956 	}
1957 
1958 	tbl = neigh_find_table(ndm->ndm_family);
1959 	if (tbl == NULL)
1960 		return -EAFNOSUPPORT;
1961 
1962 	if (nla_len(dst_attr) < (int)tbl->key_len) {
1963 		NL_SET_ERR_MSG(extack, "Invalid network address");
1964 		goto out;
1965 	}
1966 
1967 	if (ndm->ndm_flags & NTF_PROXY) {
1968 		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1969 		goto out;
1970 	}
1971 
1972 	if (dev == NULL)
1973 		goto out;
1974 
1975 	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1976 	if (neigh == NULL) {
1977 		err = -ENOENT;
1978 		goto out;
1979 	}
1980 
1981 	err = __neigh_update(neigh, NULL, NUD_FAILED,
1982 			     NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1983 			     NETLINK_CB(skb).portid, extack);
1984 	write_lock_bh(&tbl->lock);
1985 	neigh_release(neigh);
1986 	neigh_remove_one(neigh);
1987 	write_unlock_bh(&tbl->lock);
1988 
1989 out:
1990 	return err;
1991 }
1992 
1993 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1994 		     struct netlink_ext_ack *extack)
1995 {
1996 	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1997 		    NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1998 	struct net *net = sock_net(skb->sk);
1999 	struct ndmsg *ndm;
2000 	struct nlattr *tb[NDA_MAX+1];
2001 	struct neigh_table *tbl;
2002 	struct net_device *dev = NULL;
2003 	struct neighbour *neigh;
2004 	void *dst, *lladdr;
2005 	u8 protocol = 0;
2006 	u32 ndm_flags;
2007 	int err;
2008 
2009 	ASSERT_RTNL();
2010 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
2011 				     nda_policy, extack);
2012 	if (err < 0)
2013 		goto out;
2014 
2015 	err = -EINVAL;
2016 	if (!tb[NDA_DST]) {
2017 		NL_SET_ERR_MSG(extack, "Network address not specified");
2018 		goto out;
2019 	}
2020 
2021 	ndm = nlmsg_data(nlh);
2022 	ndm_flags = ndm->ndm_flags;
2023 	if (tb[NDA_FLAGS_EXT]) {
2024 		u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
2025 
2026 		BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
2027 			     (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
2028 			      hweight32(NTF_EXT_MASK)));
2029 		ndm_flags |= (ext << NTF_EXT_SHIFT);
2030 	}
2031 	if (ndm->ndm_ifindex) {
2032 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
2033 		if (dev == NULL) {
2034 			err = -ENODEV;
2035 			goto out;
2036 		}
2037 
2038 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
2039 			NL_SET_ERR_MSG(extack, "Invalid link address");
2040 			goto out;
2041 		}
2042 	}
2043 
2044 	tbl = neigh_find_table(ndm->ndm_family);
2045 	if (tbl == NULL)
2046 		return -EAFNOSUPPORT;
2047 
2048 	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
2049 		NL_SET_ERR_MSG(extack, "Invalid network address");
2050 		goto out;
2051 	}
2052 
2053 	dst = nla_data(tb[NDA_DST]);
2054 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
2055 
2056 	if (tb[NDA_PROTOCOL])
2057 		protocol = nla_get_u8(tb[NDA_PROTOCOL]);
2058 	if (ndm_flags & NTF_PROXY) {
2059 		if (ndm_flags & (NTF_MANAGED | NTF_EXT_VALIDATED)) {
2060 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
2061 			goto out;
2062 		}
2063 
2064 		err = pneigh_create(tbl, net, dst, dev, ndm_flags, protocol,
2065 				    !!(ndm->ndm_state & NUD_PERMANENT));
2066 		goto out;
2067 	}
2068 
2069 	if (!dev) {
2070 		NL_SET_ERR_MSG(extack, "Device not specified");
2071 		goto out;
2072 	}
2073 
2074 	if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2075 		err = -EINVAL;
2076 		goto out;
2077 	}
2078 
2079 	neigh = neigh_lookup(tbl, dst, dev);
2080 	if (neigh == NULL) {
2081 		bool ndm_permanent  = ndm->ndm_state & NUD_PERMANENT;
2082 		bool exempt_from_gc = ndm_permanent ||
2083 				      ndm_flags & (NTF_EXT_LEARNED |
2084 						   NTF_EXT_VALIDATED);
2085 
2086 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2087 			err = -ENOENT;
2088 			goto out;
2089 		}
2090 		if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2091 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2092 			err = -EINVAL;
2093 			goto out;
2094 		}
2095 		if (ndm_flags & NTF_EXT_VALIDATED) {
2096 			u8 state = ndm->ndm_state;
2097 
2098 			/* NTF_USE and NTF_MANAGED will result in the neighbor
2099 			 * being created with an invalid state (NUD_NONE).
2100 			 */
2101 			if (ndm_flags & (NTF_USE | NTF_MANAGED))
2102 				state = NUD_NONE;
2103 
2104 			if (!(state & NUD_VALID)) {
2105 				NL_SET_ERR_MSG(extack,
2106 					       "Cannot create externally validated neighbor with an invalid state");
2107 				err = -EINVAL;
2108 				goto out;
2109 			}
2110 		}
2111 
2112 		neigh = ___neigh_create(tbl, dst, dev,
2113 					ndm_flags &
2114 					(NTF_EXT_LEARNED | NTF_MANAGED |
2115 					 NTF_EXT_VALIDATED),
2116 					exempt_from_gc, true);
2117 		if (IS_ERR(neigh)) {
2118 			err = PTR_ERR(neigh);
2119 			goto out;
2120 		}
2121 	} else {
2122 		if (nlh->nlmsg_flags & NLM_F_EXCL) {
2123 			err = -EEXIST;
2124 			neigh_release(neigh);
2125 			goto out;
2126 		}
2127 		if (ndm_flags & NTF_EXT_VALIDATED) {
2128 			u8 state = ndm->ndm_state;
2129 
2130 			/* NTF_USE and NTF_MANAGED do not update the existing
2131 			 * state other than clearing it if it was
2132 			 * NUD_PERMANENT.
2133 			 */
2134 			if (ndm_flags & (NTF_USE | NTF_MANAGED))
2135 				state = READ_ONCE(neigh->nud_state) & ~NUD_PERMANENT;
2136 
2137 			if (!(state & NUD_VALID)) {
2138 				NL_SET_ERR_MSG(extack,
2139 					       "Cannot mark neighbor as externally validated with an invalid state");
2140 				err = -EINVAL;
2141 				neigh_release(neigh);
2142 				goto out;
2143 			}
2144 		}
2145 
2146 		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2147 			flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2148 				   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2149 	}
2150 
2151 	if (protocol)
2152 		neigh->protocol = protocol;
2153 	if (ndm_flags & NTF_EXT_LEARNED)
2154 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2155 	if (ndm_flags & NTF_ROUTER)
2156 		flags |= NEIGH_UPDATE_F_ISROUTER;
2157 	if (ndm_flags & NTF_MANAGED)
2158 		flags |= NEIGH_UPDATE_F_MANAGED;
2159 	if (ndm_flags & NTF_USE)
2160 		flags |= NEIGH_UPDATE_F_USE;
2161 	if (ndm_flags & NTF_EXT_VALIDATED)
2162 		flags |= NEIGH_UPDATE_F_EXT_VALIDATED;
2163 
2164 	err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2165 			     NETLINK_CB(skb).portid, extack);
2166 	if (!err && ndm_flags & (NTF_USE | NTF_MANAGED))
2167 		neigh_event_send(neigh, NULL);
2168 	neigh_release(neigh);
2169 out:
2170 	return err;
2171 }
2172 
2173 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2174 {
2175 	struct nlattr *nest;
2176 
2177 	nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2178 	if (nest == NULL)
2179 		return -ENOBUFS;
2180 
2181 	if ((parms->dev &&
2182 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2183 	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2184 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2185 			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2186 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
2187 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
2188 			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2189 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2190 	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2191 	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
2192 			NEIGH_VAR(parms, UCAST_PROBES)) ||
2193 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
2194 			NEIGH_VAR(parms, MCAST_PROBES)) ||
2195 	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2196 			NEIGH_VAR(parms, MCAST_REPROBES)) ||
2197 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2198 			  NDTPA_PAD) ||
2199 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2200 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2201 	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
2202 			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2203 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2204 			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2205 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2206 			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2207 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2208 			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2209 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2210 			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2211 	    nla_put_msecs(skb, NDTPA_LOCKTIME,
2212 			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2213 	    nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2214 			  NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2215 		goto nla_put_failure;
2216 	return nla_nest_end(skb, nest);
2217 
2218 nla_put_failure:
2219 	nla_nest_cancel(skb, nest);
2220 	return -EMSGSIZE;
2221 }
2222 
2223 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2224 			      u32 pid, u32 seq, int type, int flags)
2225 {
2226 	struct nlmsghdr *nlh;
2227 	struct ndtmsg *ndtmsg;
2228 
2229 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2230 	if (nlh == NULL)
2231 		return -EMSGSIZE;
2232 
2233 	ndtmsg = nlmsg_data(nlh);
2234 
2235 	read_lock_bh(&tbl->lock);
2236 	ndtmsg->ndtm_family = tbl->family;
2237 	ndtmsg->ndtm_pad1   = 0;
2238 	ndtmsg->ndtm_pad2   = 0;
2239 
2240 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2241 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
2242 			  NDTA_PAD) ||
2243 	    nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
2244 	    nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
2245 	    nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
2246 		goto nla_put_failure;
2247 	{
2248 		unsigned long now = jiffies;
2249 		long flush_delta = now - READ_ONCE(tbl->last_flush);
2250 		long rand_delta = now - READ_ONCE(tbl->last_rand);
2251 		struct neigh_hash_table *nht;
2252 		struct ndt_config ndc = {
2253 			.ndtc_key_len		= tbl->key_len,
2254 			.ndtc_entry_size	= tbl->entry_size,
2255 			.ndtc_entries		= atomic_read(&tbl->entries),
2256 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
2257 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
2258 			.ndtc_proxy_qlen	= READ_ONCE(tbl->proxy_queue.qlen),
2259 		};
2260 
2261 		rcu_read_lock();
2262 		nht = rcu_dereference(tbl->nht);
2263 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2264 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2265 		rcu_read_unlock();
2266 
2267 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2268 			goto nla_put_failure;
2269 	}
2270 
2271 	{
2272 		int cpu;
2273 		struct ndt_stats ndst;
2274 
2275 		memset(&ndst, 0, sizeof(ndst));
2276 
2277 		for_each_possible_cpu(cpu) {
2278 			struct neigh_statistics	*st;
2279 
2280 			st = per_cpu_ptr(tbl->stats, cpu);
2281 			ndst.ndts_allocs		+= READ_ONCE(st->allocs);
2282 			ndst.ndts_destroys		+= READ_ONCE(st->destroys);
2283 			ndst.ndts_hash_grows		+= READ_ONCE(st->hash_grows);
2284 			ndst.ndts_res_failed		+= READ_ONCE(st->res_failed);
2285 			ndst.ndts_lookups		+= READ_ONCE(st->lookups);
2286 			ndst.ndts_hits			+= READ_ONCE(st->hits);
2287 			ndst.ndts_rcv_probes_mcast	+= READ_ONCE(st->rcv_probes_mcast);
2288 			ndst.ndts_rcv_probes_ucast	+= READ_ONCE(st->rcv_probes_ucast);
2289 			ndst.ndts_periodic_gc_runs	+= READ_ONCE(st->periodic_gc_runs);
2290 			ndst.ndts_forced_gc_runs	+= READ_ONCE(st->forced_gc_runs);
2291 			ndst.ndts_table_fulls		+= READ_ONCE(st->table_fulls);
2292 		}
2293 
2294 		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2295 				  NDTA_PAD))
2296 			goto nla_put_failure;
2297 	}
2298 
2299 	BUG_ON(tbl->parms.dev);
2300 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2301 		goto nla_put_failure;
2302 
2303 	read_unlock_bh(&tbl->lock);
2304 	nlmsg_end(skb, nlh);
2305 	return 0;
2306 
2307 nla_put_failure:
2308 	read_unlock_bh(&tbl->lock);
2309 	nlmsg_cancel(skb, nlh);
2310 	return -EMSGSIZE;
2311 }
2312 
2313 static int neightbl_fill_param_info(struct sk_buff *skb,
2314 				    struct neigh_table *tbl,
2315 				    struct neigh_parms *parms,
2316 				    u32 pid, u32 seq, int type,
2317 				    unsigned int flags)
2318 {
2319 	struct ndtmsg *ndtmsg;
2320 	struct nlmsghdr *nlh;
2321 
2322 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2323 	if (nlh == NULL)
2324 		return -EMSGSIZE;
2325 
2326 	ndtmsg = nlmsg_data(nlh);
2327 
2328 	read_lock_bh(&tbl->lock);
2329 	ndtmsg->ndtm_family = tbl->family;
2330 	ndtmsg->ndtm_pad1   = 0;
2331 	ndtmsg->ndtm_pad2   = 0;
2332 
2333 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2334 	    neightbl_fill_parms(skb, parms) < 0)
2335 		goto errout;
2336 
2337 	read_unlock_bh(&tbl->lock);
2338 	nlmsg_end(skb, nlh);
2339 	return 0;
2340 errout:
2341 	read_unlock_bh(&tbl->lock);
2342 	nlmsg_cancel(skb, nlh);
2343 	return -EMSGSIZE;
2344 }
2345 
2346 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2347 	[NDTA_NAME]		= { .type = NLA_STRING },
2348 	[NDTA_THRESH1]		= { .type = NLA_U32 },
2349 	[NDTA_THRESH2]		= { .type = NLA_U32 },
2350 	[NDTA_THRESH3]		= { .type = NLA_U32 },
2351 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
2352 	[NDTA_PARMS]		= { .type = NLA_NESTED },
2353 };
2354 
2355 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2356 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
2357 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
2358 	[NDTPA_QUEUE_LENBYTES]		= { .type = NLA_U32 },
2359 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
2360 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
2361 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
2362 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
2363 	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
2364 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
2365 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
2366 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
2367 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
2368 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
2369 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
2370 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
2371 	[NDTPA_INTERVAL_PROBE_TIME_MS]	= { .type = NLA_U64, .min = 1 },
2372 };
2373 
2374 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2375 			struct netlink_ext_ack *extack)
2376 {
2377 	struct net *net = sock_net(skb->sk);
2378 	struct neigh_table *tbl;
2379 	struct ndtmsg *ndtmsg;
2380 	struct nlattr *tb[NDTA_MAX+1];
2381 	bool found = false;
2382 	int err, tidx;
2383 
2384 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2385 				     nl_neightbl_policy, extack);
2386 	if (err < 0)
2387 		goto errout;
2388 
2389 	if (tb[NDTA_NAME] == NULL) {
2390 		err = -EINVAL;
2391 		goto errout;
2392 	}
2393 
2394 	ndtmsg = nlmsg_data(nlh);
2395 
2396 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2397 		tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2398 		if (!tbl)
2399 			continue;
2400 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2401 			continue;
2402 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2403 			found = true;
2404 			break;
2405 		}
2406 	}
2407 
2408 	if (!found)
2409 		return -ENOENT;
2410 
2411 	/*
2412 	 * We acquire tbl->lock to be nice to the periodic timers and
2413 	 * make sure they always see a consistent set of values.
2414 	 */
2415 	write_lock_bh(&tbl->lock);
2416 
2417 	if (tb[NDTA_PARMS]) {
2418 		struct nlattr *tbp[NDTPA_MAX+1];
2419 		struct neigh_parms *p;
2420 		int i, ifindex = 0;
2421 
2422 		err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2423 						  tb[NDTA_PARMS],
2424 						  nl_ntbl_parm_policy, extack);
2425 		if (err < 0)
2426 			goto errout_tbl_lock;
2427 
2428 		if (tbp[NDTPA_IFINDEX])
2429 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2430 
2431 		p = lookup_neigh_parms(tbl, net, ifindex);
2432 		if (p == NULL) {
2433 			err = -ENOENT;
2434 			goto errout_tbl_lock;
2435 		}
2436 
2437 		for (i = 1; i <= NDTPA_MAX; i++) {
2438 			if (tbp[i] == NULL)
2439 				continue;
2440 
2441 			switch (i) {
2442 			case NDTPA_QUEUE_LEN:
2443 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2444 					      nla_get_u32(tbp[i]) *
2445 					      SKB_TRUESIZE(ETH_FRAME_LEN));
2446 				break;
2447 			case NDTPA_QUEUE_LENBYTES:
2448 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2449 					      nla_get_u32(tbp[i]));
2450 				break;
2451 			case NDTPA_PROXY_QLEN:
2452 				NEIGH_VAR_SET(p, PROXY_QLEN,
2453 					      nla_get_u32(tbp[i]));
2454 				break;
2455 			case NDTPA_APP_PROBES:
2456 				NEIGH_VAR_SET(p, APP_PROBES,
2457 					      nla_get_u32(tbp[i]));
2458 				break;
2459 			case NDTPA_UCAST_PROBES:
2460 				NEIGH_VAR_SET(p, UCAST_PROBES,
2461 					      nla_get_u32(tbp[i]));
2462 				break;
2463 			case NDTPA_MCAST_PROBES:
2464 				NEIGH_VAR_SET(p, MCAST_PROBES,
2465 					      nla_get_u32(tbp[i]));
2466 				break;
2467 			case NDTPA_MCAST_REPROBES:
2468 				NEIGH_VAR_SET(p, MCAST_REPROBES,
2469 					      nla_get_u32(tbp[i]));
2470 				break;
2471 			case NDTPA_BASE_REACHABLE_TIME:
2472 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2473 					      nla_get_msecs(tbp[i]));
2474 				/* update reachable_time as well, otherwise, the change will
2475 				 * only be effective after the next time neigh_periodic_work
2476 				 * decides to recompute it (can be multiple minutes)
2477 				 */
2478 				p->reachable_time =
2479 					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2480 				break;
2481 			case NDTPA_GC_STALETIME:
2482 				NEIGH_VAR_SET(p, GC_STALETIME,
2483 					      nla_get_msecs(tbp[i]));
2484 				break;
2485 			case NDTPA_DELAY_PROBE_TIME:
2486 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2487 					      nla_get_msecs(tbp[i]));
2488 				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2489 				break;
2490 			case NDTPA_INTERVAL_PROBE_TIME_MS:
2491 				NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2492 					      nla_get_msecs(tbp[i]));
2493 				break;
2494 			case NDTPA_RETRANS_TIME:
2495 				NEIGH_VAR_SET(p, RETRANS_TIME,
2496 					      nla_get_msecs(tbp[i]));
2497 				break;
2498 			case NDTPA_ANYCAST_DELAY:
2499 				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2500 					      nla_get_msecs(tbp[i]));
2501 				break;
2502 			case NDTPA_PROXY_DELAY:
2503 				NEIGH_VAR_SET(p, PROXY_DELAY,
2504 					      nla_get_msecs(tbp[i]));
2505 				break;
2506 			case NDTPA_LOCKTIME:
2507 				NEIGH_VAR_SET(p, LOCKTIME,
2508 					      nla_get_msecs(tbp[i]));
2509 				break;
2510 			}
2511 		}
2512 	}
2513 
2514 	err = -ENOENT;
2515 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2516 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2517 	    !net_eq(net, &init_net))
2518 		goto errout_tbl_lock;
2519 
2520 	if (tb[NDTA_THRESH1])
2521 		WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
2522 
2523 	if (tb[NDTA_THRESH2])
2524 		WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
2525 
2526 	if (tb[NDTA_THRESH3])
2527 		WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
2528 
2529 	if (tb[NDTA_GC_INTERVAL])
2530 		WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
2531 
2532 	err = 0;
2533 
2534 errout_tbl_lock:
2535 	write_unlock_bh(&tbl->lock);
2536 errout:
2537 	return err;
2538 }
2539 
2540 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2541 				    struct netlink_ext_ack *extack)
2542 {
2543 	struct ndtmsg *ndtm;
2544 
2545 	ndtm = nlmsg_payload(nlh, sizeof(*ndtm));
2546 	if (!ndtm) {
2547 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2548 		return -EINVAL;
2549 	}
2550 
2551 	if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2552 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2553 		return -EINVAL;
2554 	}
2555 
2556 	if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2557 		NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2558 		return -EINVAL;
2559 	}
2560 
2561 	return 0;
2562 }
2563 
2564 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2565 {
2566 	const struct nlmsghdr *nlh = cb->nlh;
2567 	struct net *net = sock_net(skb->sk);
2568 	int family, tidx, nidx = 0;
2569 	int tbl_skip = cb->args[0];
2570 	int neigh_skip = cb->args[1];
2571 	struct neigh_table *tbl;
2572 
2573 	if (cb->strict_check) {
2574 		int err = neightbl_valid_dump_info(nlh, cb->extack);
2575 
2576 		if (err < 0)
2577 			return err;
2578 	}
2579 
2580 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2581 
2582 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2583 		struct neigh_parms *p;
2584 
2585 		tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2586 		if (!tbl)
2587 			continue;
2588 
2589 		if (tidx < tbl_skip || (family && tbl->family != family))
2590 			continue;
2591 
2592 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2593 				       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2594 				       NLM_F_MULTI) < 0)
2595 			break;
2596 
2597 		nidx = 0;
2598 		p = list_next_entry(&tbl->parms, list);
2599 		list_for_each_entry_from(p, &tbl->parms_list, list) {
2600 			if (!net_eq(neigh_parms_net(p), net))
2601 				continue;
2602 
2603 			if (nidx < neigh_skip)
2604 				goto next;
2605 
2606 			if (neightbl_fill_param_info(skb, tbl, p,
2607 						     NETLINK_CB(cb->skb).portid,
2608 						     nlh->nlmsg_seq,
2609 						     RTM_NEWNEIGHTBL,
2610 						     NLM_F_MULTI) < 0)
2611 				goto out;
2612 		next:
2613 			nidx++;
2614 		}
2615 
2616 		neigh_skip = 0;
2617 	}
2618 out:
2619 	cb->args[0] = tidx;
2620 	cb->args[1] = nidx;
2621 
2622 	return skb->len;
2623 }
2624 
2625 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2626 			   u32 pid, u32 seq, int type, unsigned int flags)
2627 {
2628 	u32 neigh_flags, neigh_flags_ext;
2629 	unsigned long now = jiffies;
2630 	struct nda_cacheinfo ci;
2631 	struct nlmsghdr *nlh;
2632 	struct ndmsg *ndm;
2633 
2634 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2635 	if (nlh == NULL)
2636 		return -EMSGSIZE;
2637 
2638 	neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2639 	neigh_flags     = neigh->flags & NTF_OLD_MASK;
2640 
2641 	ndm = nlmsg_data(nlh);
2642 	ndm->ndm_family	 = neigh->ops->family;
2643 	ndm->ndm_pad1    = 0;
2644 	ndm->ndm_pad2    = 0;
2645 	ndm->ndm_flags	 = neigh_flags;
2646 	ndm->ndm_type	 = neigh->type;
2647 	ndm->ndm_ifindex = neigh->dev->ifindex;
2648 
2649 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2650 		goto nla_put_failure;
2651 
2652 	read_lock_bh(&neigh->lock);
2653 	ndm->ndm_state	 = neigh->nud_state;
2654 	if (neigh->nud_state & NUD_VALID) {
2655 		char haddr[MAX_ADDR_LEN];
2656 
2657 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2658 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2659 			read_unlock_bh(&neigh->lock);
2660 			goto nla_put_failure;
2661 		}
2662 	}
2663 
2664 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2665 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2666 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2667 	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
2668 	read_unlock_bh(&neigh->lock);
2669 
2670 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2671 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2672 		goto nla_put_failure;
2673 
2674 	if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2675 		goto nla_put_failure;
2676 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2677 		goto nla_put_failure;
2678 
2679 	nlmsg_end(skb, nlh);
2680 	return 0;
2681 
2682 nla_put_failure:
2683 	nlmsg_cancel(skb, nlh);
2684 	return -EMSGSIZE;
2685 }
2686 
2687 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2688 			    u32 pid, u32 seq, int type, unsigned int flags,
2689 			    struct neigh_table *tbl)
2690 {
2691 	u32 neigh_flags, neigh_flags_ext;
2692 	struct nlmsghdr *nlh;
2693 	struct ndmsg *ndm;
2694 	u8 protocol;
2695 
2696 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2697 	if (nlh == NULL)
2698 		return -EMSGSIZE;
2699 
2700 	neigh_flags = READ_ONCE(pn->flags);
2701 	neigh_flags_ext = neigh_flags >> NTF_EXT_SHIFT;
2702 	neigh_flags &= NTF_OLD_MASK;
2703 
2704 	ndm = nlmsg_data(nlh);
2705 	ndm->ndm_family	 = tbl->family;
2706 	ndm->ndm_pad1    = 0;
2707 	ndm->ndm_pad2    = 0;
2708 	ndm->ndm_flags	 = neigh_flags | NTF_PROXY;
2709 	ndm->ndm_type	 = RTN_UNICAST;
2710 	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2711 	ndm->ndm_state	 = NUD_NONE;
2712 
2713 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2714 		goto nla_put_failure;
2715 
2716 	protocol = READ_ONCE(pn->protocol);
2717 	if (protocol && nla_put_u8(skb, NDA_PROTOCOL, protocol))
2718 		goto nla_put_failure;
2719 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2720 		goto nla_put_failure;
2721 
2722 	nlmsg_end(skb, nlh);
2723 	return 0;
2724 
2725 nla_put_failure:
2726 	nlmsg_cancel(skb, nlh);
2727 	return -EMSGSIZE;
2728 }
2729 
2730 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2731 {
2732 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2733 	__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2734 }
2735 
2736 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2737 {
2738 	struct net_device *master;
2739 
2740 	if (!master_idx)
2741 		return false;
2742 
2743 	master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
2744 
2745 	/* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2746 	 * invalid value for ifindex to denote "no master".
2747 	 */
2748 	if (master_idx == -1)
2749 		return !!master;
2750 
2751 	if (!master || master->ifindex != master_idx)
2752 		return true;
2753 
2754 	return false;
2755 }
2756 
2757 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2758 {
2759 	if (filter_idx && (!dev || dev->ifindex != filter_idx))
2760 		return true;
2761 
2762 	return false;
2763 }
2764 
2765 struct neigh_dump_filter {
2766 	int master_idx;
2767 	int dev_idx;
2768 };
2769 
2770 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2771 			    struct netlink_callback *cb,
2772 			    struct neigh_dump_filter *filter)
2773 {
2774 	struct net *net = sock_net(skb->sk);
2775 	struct neighbour *n;
2776 	int err = 0, h, s_h = cb->args[1];
2777 	int idx, s_idx = idx = cb->args[2];
2778 	struct neigh_hash_table *nht;
2779 	unsigned int flags = NLM_F_MULTI;
2780 
2781 	if (filter->dev_idx || filter->master_idx)
2782 		flags |= NLM_F_DUMP_FILTERED;
2783 
2784 	nht = rcu_dereference(tbl->nht);
2785 
2786 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2787 		if (h > s_h)
2788 			s_idx = 0;
2789 		idx = 0;
2790 		neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
2791 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2792 				goto next;
2793 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2794 			    neigh_master_filtered(n->dev, filter->master_idx))
2795 				goto next;
2796 			err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2797 					      cb->nlh->nlmsg_seq,
2798 					      RTM_NEWNEIGH, flags);
2799 			if (err < 0)
2800 				goto out;
2801 next:
2802 			idx++;
2803 		}
2804 	}
2805 out:
2806 	cb->args[1] = h;
2807 	cb->args[2] = idx;
2808 	return err;
2809 }
2810 
2811 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2812 			     struct netlink_callback *cb,
2813 			     struct neigh_dump_filter *filter)
2814 {
2815 	struct pneigh_entry *n;
2816 	struct net *net = sock_net(skb->sk);
2817 	int err = 0, h, s_h = cb->args[3];
2818 	int idx, s_idx = idx = cb->args[4];
2819 	unsigned int flags = NLM_F_MULTI;
2820 
2821 	if (filter->dev_idx || filter->master_idx)
2822 		flags |= NLM_F_DUMP_FILTERED;
2823 
2824 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2825 		if (h > s_h)
2826 			s_idx = 0;
2827 		for (n = rcu_dereference(tbl->phash_buckets[h]), idx = 0;
2828 		     n;
2829 		     n = rcu_dereference(n->next)) {
2830 			if (idx < s_idx || pneigh_net(n) != net)
2831 				goto next;
2832 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2833 			    neigh_master_filtered(n->dev, filter->master_idx))
2834 				goto next;
2835 			err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2836 					       cb->nlh->nlmsg_seq,
2837 					       RTM_NEWNEIGH, flags, tbl);
2838 			if (err < 0)
2839 				goto out;
2840 		next:
2841 			idx++;
2842 		}
2843 	}
2844 
2845 out:
2846 	cb->args[3] = h;
2847 	cb->args[4] = idx;
2848 	return err;
2849 }
2850 
2851 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2852 				bool strict_check,
2853 				struct neigh_dump_filter *filter,
2854 				struct netlink_ext_ack *extack)
2855 {
2856 	struct nlattr *tb[NDA_MAX + 1];
2857 	int err, i;
2858 
2859 	if (strict_check) {
2860 		struct ndmsg *ndm;
2861 
2862 		ndm = nlmsg_payload(nlh, sizeof(*ndm));
2863 		if (!ndm) {
2864 			NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2865 			return -EINVAL;
2866 		}
2867 
2868 		if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2869 		    ndm->ndm_state || ndm->ndm_type) {
2870 			NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2871 			return -EINVAL;
2872 		}
2873 
2874 		if (ndm->ndm_flags & ~NTF_PROXY) {
2875 			NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2876 			return -EINVAL;
2877 		}
2878 
2879 		err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2880 						    tb, NDA_MAX, nda_policy,
2881 						    extack);
2882 	} else {
2883 		err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2884 					     NDA_MAX, nda_policy, extack);
2885 	}
2886 	if (err < 0)
2887 		return err;
2888 
2889 	for (i = 0; i <= NDA_MAX; ++i) {
2890 		if (!tb[i])
2891 			continue;
2892 
2893 		/* all new attributes should require strict_check */
2894 		switch (i) {
2895 		case NDA_IFINDEX:
2896 			filter->dev_idx = nla_get_u32(tb[i]);
2897 			break;
2898 		case NDA_MASTER:
2899 			filter->master_idx = nla_get_u32(tb[i]);
2900 			break;
2901 		default:
2902 			if (strict_check) {
2903 				NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2904 				return -EINVAL;
2905 			}
2906 		}
2907 	}
2908 
2909 	return 0;
2910 }
2911 
2912 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2913 {
2914 	const struct nlmsghdr *nlh = cb->nlh;
2915 	struct neigh_dump_filter filter = {};
2916 	struct neigh_table *tbl;
2917 	int t, family, s_t;
2918 	int proxy = 0;
2919 	int err;
2920 
2921 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2922 
2923 	/* check for full ndmsg structure presence, family member is
2924 	 * the same for both structures
2925 	 */
2926 	if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2927 	    ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2928 		proxy = 1;
2929 
2930 	err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2931 	if (err < 0 && cb->strict_check)
2932 		return err;
2933 	err = 0;
2934 
2935 	s_t = cb->args[0];
2936 
2937 	rcu_read_lock();
2938 	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2939 		tbl = rcu_dereference(neigh_tables[t]);
2940 
2941 		if (!tbl)
2942 			continue;
2943 		if (t < s_t || (family && tbl->family != family))
2944 			continue;
2945 		if (t > s_t)
2946 			memset(&cb->args[1], 0, sizeof(cb->args) -
2947 						sizeof(cb->args[0]));
2948 		if (proxy)
2949 			err = pneigh_dump_table(tbl, skb, cb, &filter);
2950 		else
2951 			err = neigh_dump_table(tbl, skb, cb, &filter);
2952 		if (err < 0)
2953 			break;
2954 	}
2955 	rcu_read_unlock();
2956 
2957 	cb->args[0] = t;
2958 	return err;
2959 }
2960 
2961 static struct ndmsg *neigh_valid_get_req(const struct nlmsghdr *nlh,
2962 					 struct nlattr **tb,
2963 					 struct netlink_ext_ack *extack)
2964 {
2965 	struct ndmsg *ndm;
2966 	int err, i;
2967 
2968 	ndm = nlmsg_payload(nlh, sizeof(*ndm));
2969 	if (!ndm) {
2970 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2971 		return ERR_PTR(-EINVAL);
2972 	}
2973 
2974 	if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
2975 	    ndm->ndm_type) {
2976 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2977 		return ERR_PTR(-EINVAL);
2978 	}
2979 
2980 	if (ndm->ndm_flags & ~NTF_PROXY) {
2981 		NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2982 		return ERR_PTR(-EINVAL);
2983 	}
2984 
2985 	if (!(ndm->ndm_flags & NTF_PROXY) && !ndm->ndm_ifindex) {
2986 		NL_SET_ERR_MSG(extack, "No device specified");
2987 		return ERR_PTR(-EINVAL);
2988 	}
2989 
2990 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2991 					    NDA_MAX, nda_policy, extack);
2992 	if (err < 0)
2993 		return ERR_PTR(err);
2994 
2995 	for (i = 0; i <= NDA_MAX; ++i) {
2996 		switch (i) {
2997 		case NDA_DST:
2998 			if (!tb[i]) {
2999 				NL_SET_ERR_ATTR_MISS(extack, NULL, NDA_DST);
3000 				return ERR_PTR(-EINVAL);
3001 			}
3002 			break;
3003 		default:
3004 			if (!tb[i])
3005 				continue;
3006 
3007 			NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
3008 			return ERR_PTR(-EINVAL);
3009 		}
3010 	}
3011 
3012 	return ndm;
3013 }
3014 
3015 static inline size_t neigh_nlmsg_size(void)
3016 {
3017 	return NLMSG_ALIGN(sizeof(struct ndmsg))
3018 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3019 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
3020 	       + nla_total_size(sizeof(struct nda_cacheinfo))
3021 	       + nla_total_size(4)  /* NDA_PROBES */
3022 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
3023 	       + nla_total_size(1); /* NDA_PROTOCOL */
3024 }
3025 
3026 static inline size_t pneigh_nlmsg_size(void)
3027 {
3028 	return NLMSG_ALIGN(sizeof(struct ndmsg))
3029 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3030 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
3031 	       + nla_total_size(1); /* NDA_PROTOCOL */
3032 }
3033 
3034 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3035 		     struct netlink_ext_ack *extack)
3036 {
3037 	struct net *net = sock_net(in_skb->sk);
3038 	u32 pid = NETLINK_CB(in_skb).portid;
3039 	struct nlattr *tb[NDA_MAX + 1];
3040 	struct net_device *dev = NULL;
3041 	u32 seq = nlh->nlmsg_seq;
3042 	struct neigh_table *tbl;
3043 	struct neighbour *neigh;
3044 	struct sk_buff *skb;
3045 	struct ndmsg *ndm;
3046 	void *dst;
3047 	int err;
3048 
3049 	ndm = neigh_valid_get_req(nlh, tb, extack);
3050 	if (IS_ERR(ndm))
3051 		return PTR_ERR(ndm);
3052 
3053 	if (ndm->ndm_flags & NTF_PROXY)
3054 		skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
3055 	else
3056 		skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
3057 	if (!skb)
3058 		return -ENOBUFS;
3059 
3060 	rcu_read_lock();
3061 
3062 	tbl = neigh_find_table(ndm->ndm_family);
3063 	if (!tbl) {
3064 		NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
3065 		err = -EAFNOSUPPORT;
3066 		goto err_unlock;
3067 	}
3068 
3069 	if (nla_len(tb[NDA_DST]) != (int)tbl->key_len) {
3070 		NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
3071 		err = -EINVAL;
3072 		goto err_unlock;
3073 	}
3074 
3075 	dst = nla_data(tb[NDA_DST]);
3076 
3077 	if (ndm->ndm_ifindex) {
3078 		dev = dev_get_by_index_rcu(net, ndm->ndm_ifindex);
3079 		if (!dev) {
3080 			NL_SET_ERR_MSG(extack, "Unknown device ifindex");
3081 			err = -ENODEV;
3082 			goto err_unlock;
3083 		}
3084 	}
3085 
3086 	if (ndm->ndm_flags & NTF_PROXY) {
3087 		struct pneigh_entry *pn;
3088 
3089 		pn = pneigh_lookup(tbl, net, dst, dev);
3090 		if (!pn) {
3091 			NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3092 			err = -ENOENT;
3093 			goto err_unlock;
3094 		}
3095 
3096 		err = pneigh_fill_info(skb, pn, pid, seq, RTM_NEWNEIGH, 0, tbl);
3097 		if (err)
3098 			goto err_unlock;
3099 	} else {
3100 		neigh = neigh_lookup(tbl, dst, dev);
3101 		if (!neigh) {
3102 			NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3103 			err = -ENOENT;
3104 			goto err_unlock;
3105 		}
3106 
3107 		err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
3108 		neigh_release(neigh);
3109 		if (err)
3110 			goto err_unlock;
3111 	}
3112 
3113 	rcu_read_unlock();
3114 
3115 	return rtnl_unicast(skb, net, pid);
3116 err_unlock:
3117 	rcu_read_unlock();
3118 	kfree_skb(skb);
3119 	return err;
3120 }
3121 
3122 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3123 {
3124 	int chain;
3125 	struct neigh_hash_table *nht;
3126 
3127 	rcu_read_lock();
3128 	nht = rcu_dereference(tbl->nht);
3129 
3130 	read_lock_bh(&tbl->lock); /* avoid resizes */
3131 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3132 		struct neighbour *n;
3133 
3134 		neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
3135 			cb(n, cookie);
3136 	}
3137 	read_unlock_bh(&tbl->lock);
3138 	rcu_read_unlock();
3139 }
3140 EXPORT_SYMBOL(neigh_for_each);
3141 
3142 /* The tbl->lock must be held as a writer and BH disabled. */
3143 void __neigh_for_each_release(struct neigh_table *tbl,
3144 			      int (*cb)(struct neighbour *))
3145 {
3146 	struct neigh_hash_table *nht;
3147 	int chain;
3148 
3149 	nht = rcu_dereference_protected(tbl->nht,
3150 					lockdep_is_held(&tbl->lock));
3151 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3152 		struct hlist_node *tmp;
3153 		struct neighbour *n;
3154 
3155 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
3156 			int release;
3157 
3158 			write_lock(&n->lock);
3159 			release = cb(n);
3160 			if (release) {
3161 				hlist_del_rcu(&n->hash);
3162 				hlist_del_rcu(&n->dev_list);
3163 				neigh_mark_dead(n);
3164 			}
3165 			write_unlock(&n->lock);
3166 			if (release)
3167 				neigh_cleanup_and_release(n);
3168 		}
3169 	}
3170 }
3171 EXPORT_SYMBOL(__neigh_for_each_release);
3172 
3173 int neigh_xmit(int index, struct net_device *dev,
3174 	       const void *addr, struct sk_buff *skb)
3175 {
3176 	int err = -EAFNOSUPPORT;
3177 
3178 	if (likely(index < NEIGH_NR_TABLES)) {
3179 		struct neigh_table *tbl;
3180 		struct neighbour *neigh;
3181 
3182 		rcu_read_lock();
3183 		tbl = rcu_dereference(neigh_tables[index]);
3184 		if (!tbl)
3185 			goto out_unlock;
3186 		if (index == NEIGH_ARP_TABLE) {
3187 			u32 key = *((u32 *)addr);
3188 
3189 			neigh = __ipv4_neigh_lookup_noref(dev, key);
3190 		} else {
3191 			neigh = __neigh_lookup_noref(tbl, addr, dev);
3192 		}
3193 		if (!neigh)
3194 			neigh = __neigh_create(tbl, addr, dev, false);
3195 		err = PTR_ERR(neigh);
3196 		if (IS_ERR(neigh)) {
3197 			rcu_read_unlock();
3198 			goto out_kfree_skb;
3199 		}
3200 		err = READ_ONCE(neigh->output)(neigh, skb);
3201 out_unlock:
3202 		rcu_read_unlock();
3203 	}
3204 	else if (index == NEIGH_LINK_TABLE) {
3205 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3206 				      addr, NULL, skb->len);
3207 		if (err < 0)
3208 			goto out_kfree_skb;
3209 		err = dev_queue_xmit(skb);
3210 	}
3211 out:
3212 	return err;
3213 out_kfree_skb:
3214 	kfree_skb(skb);
3215 	goto out;
3216 }
3217 EXPORT_SYMBOL(neigh_xmit);
3218 
3219 #ifdef CONFIG_PROC_FS
3220 
3221 static struct neighbour *neigh_get_valid(struct seq_file *seq,
3222 					 struct neighbour *n,
3223 					 loff_t *pos)
3224 {
3225 	struct neigh_seq_state *state = seq->private;
3226 	struct net *net = seq_file_net(seq);
3227 
3228 	if (!net_eq(dev_net(n->dev), net))
3229 		return NULL;
3230 
3231 	if (state->neigh_sub_iter) {
3232 		loff_t fakep = 0;
3233 		void *v;
3234 
3235 		v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
3236 		if (!v)
3237 			return NULL;
3238 		if (pos)
3239 			return v;
3240 	}
3241 
3242 	if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3243 		return n;
3244 
3245 	if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3246 		return n;
3247 
3248 	return NULL;
3249 }
3250 
3251 static struct neighbour *neigh_get_first(struct seq_file *seq)
3252 {
3253 	struct neigh_seq_state *state = seq->private;
3254 	struct neigh_hash_table *nht = state->nht;
3255 	struct neighbour *n, *tmp;
3256 
3257 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3258 
3259 	while (++state->bucket < (1 << nht->hash_shift)) {
3260 		neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
3261 			tmp = neigh_get_valid(seq, n, NULL);
3262 			if (tmp)
3263 				return tmp;
3264 		}
3265 	}
3266 
3267 	return NULL;
3268 }
3269 
3270 static struct neighbour *neigh_get_next(struct seq_file *seq,
3271 					struct neighbour *n,
3272 					loff_t *pos)
3273 {
3274 	struct neigh_seq_state *state = seq->private;
3275 	struct neighbour *tmp;
3276 
3277 	if (state->neigh_sub_iter) {
3278 		void *v = state->neigh_sub_iter(state, n, pos);
3279 
3280 		if (v)
3281 			return n;
3282 	}
3283 
3284 	hlist_for_each_entry_continue(n, hash) {
3285 		tmp = neigh_get_valid(seq, n, pos);
3286 		if (tmp) {
3287 			n = tmp;
3288 			goto out;
3289 		}
3290 	}
3291 
3292 	n = neigh_get_first(seq);
3293 out:
3294 	if (n && pos)
3295 		--(*pos);
3296 
3297 	return n;
3298 }
3299 
3300 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3301 {
3302 	struct neighbour *n = neigh_get_first(seq);
3303 
3304 	if (n) {
3305 		--(*pos);
3306 		while (*pos) {
3307 			n = neigh_get_next(seq, n, pos);
3308 			if (!n)
3309 				break;
3310 		}
3311 	}
3312 	return *pos ? NULL : n;
3313 }
3314 
3315 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3316 {
3317 	struct neigh_seq_state *state = seq->private;
3318 	struct net *net = seq_file_net(seq);
3319 	struct neigh_table *tbl = state->tbl;
3320 	struct pneigh_entry *pn = NULL;
3321 	int bucket;
3322 
3323 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
3324 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3325 		pn = rcu_dereference(tbl->phash_buckets[bucket]);
3326 
3327 		while (pn && !net_eq(pneigh_net(pn), net))
3328 			pn = rcu_dereference(pn->next);
3329 		if (pn)
3330 			break;
3331 	}
3332 	state->bucket = bucket;
3333 
3334 	return pn;
3335 }
3336 
3337 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3338 					    struct pneigh_entry *pn,
3339 					    loff_t *pos)
3340 {
3341 	struct neigh_seq_state *state = seq->private;
3342 	struct net *net = seq_file_net(seq);
3343 	struct neigh_table *tbl = state->tbl;
3344 
3345 	do {
3346 		pn = rcu_dereference(pn->next);
3347 	} while (pn && !net_eq(pneigh_net(pn), net));
3348 
3349 	while (!pn) {
3350 		if (++state->bucket > PNEIGH_HASHMASK)
3351 			break;
3352 
3353 		pn = rcu_dereference(tbl->phash_buckets[state->bucket]);
3354 
3355 		while (pn && !net_eq(pneigh_net(pn), net))
3356 			pn = rcu_dereference(pn->next);
3357 		if (pn)
3358 			break;
3359 	}
3360 
3361 	if (pn && pos)
3362 		--(*pos);
3363 
3364 	return pn;
3365 }
3366 
3367 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3368 {
3369 	struct pneigh_entry *pn = pneigh_get_first(seq);
3370 
3371 	if (pn) {
3372 		--(*pos);
3373 		while (*pos) {
3374 			pn = pneigh_get_next(seq, pn, pos);
3375 			if (!pn)
3376 				break;
3377 		}
3378 	}
3379 	return *pos ? NULL : pn;
3380 }
3381 
3382 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3383 {
3384 	struct neigh_seq_state *state = seq->private;
3385 	void *rc;
3386 	loff_t idxpos = *pos;
3387 
3388 	rc = neigh_get_idx(seq, &idxpos);
3389 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3390 		rc = pneigh_get_idx(seq, &idxpos);
3391 
3392 	return rc;
3393 }
3394 
3395 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3396 	__acquires(tbl->lock)
3397 	__acquires(rcu)
3398 {
3399 	struct neigh_seq_state *state = seq->private;
3400 
3401 	state->tbl = tbl;
3402 	state->bucket = -1;
3403 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3404 
3405 	rcu_read_lock();
3406 	state->nht = rcu_dereference(tbl->nht);
3407 	read_lock_bh(&tbl->lock);
3408 
3409 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3410 }
3411 EXPORT_SYMBOL(neigh_seq_start);
3412 
3413 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3414 {
3415 	struct neigh_seq_state *state;
3416 	void *rc;
3417 
3418 	if (v == SEQ_START_TOKEN) {
3419 		rc = neigh_get_first(seq);
3420 		goto out;
3421 	}
3422 
3423 	state = seq->private;
3424 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3425 		rc = neigh_get_next(seq, v, NULL);
3426 		if (rc)
3427 			goto out;
3428 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3429 			rc = pneigh_get_first(seq);
3430 	} else {
3431 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3432 		rc = pneigh_get_next(seq, v, NULL);
3433 	}
3434 out:
3435 	++(*pos);
3436 	return rc;
3437 }
3438 EXPORT_SYMBOL(neigh_seq_next);
3439 
3440 void neigh_seq_stop(struct seq_file *seq, void *v)
3441 	__releases(tbl->lock)
3442 	__releases(rcu)
3443 {
3444 	struct neigh_seq_state *state = seq->private;
3445 	struct neigh_table *tbl = state->tbl;
3446 
3447 	read_unlock_bh(&tbl->lock);
3448 	rcu_read_unlock();
3449 }
3450 EXPORT_SYMBOL(neigh_seq_stop);
3451 
3452 /* statistics via seq_file */
3453 
3454 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3455 {
3456 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3457 	int cpu;
3458 
3459 	if (*pos == 0)
3460 		return SEQ_START_TOKEN;
3461 
3462 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3463 		if (!cpu_possible(cpu))
3464 			continue;
3465 		*pos = cpu+1;
3466 		return per_cpu_ptr(tbl->stats, cpu);
3467 	}
3468 	return NULL;
3469 }
3470 
3471 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3472 {
3473 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3474 	int cpu;
3475 
3476 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3477 		if (!cpu_possible(cpu))
3478 			continue;
3479 		*pos = cpu+1;
3480 		return per_cpu_ptr(tbl->stats, cpu);
3481 	}
3482 	(*pos)++;
3483 	return NULL;
3484 }
3485 
3486 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3487 {
3488 
3489 }
3490 
3491 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3492 {
3493 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3494 	struct neigh_statistics *st = v;
3495 
3496 	if (v == SEQ_START_TOKEN) {
3497 		seq_puts(seq, "entries  allocs   destroys hash_grows lookups  hits     res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3498 		return 0;
3499 	}
3500 
3501 	seq_printf(seq, "%08x %08lx %08lx %08lx   %08lx %08lx %08lx   "
3502 			"%08lx         %08lx         %08lx         "
3503 			"%08lx       %08lx            %08lx\n",
3504 		   atomic_read(&tbl->entries),
3505 
3506 		   st->allocs,
3507 		   st->destroys,
3508 		   st->hash_grows,
3509 
3510 		   st->lookups,
3511 		   st->hits,
3512 
3513 		   st->res_failed,
3514 
3515 		   st->rcv_probes_mcast,
3516 		   st->rcv_probes_ucast,
3517 
3518 		   st->periodic_gc_runs,
3519 		   st->forced_gc_runs,
3520 		   st->unres_discards,
3521 		   st->table_fulls
3522 		   );
3523 
3524 	return 0;
3525 }
3526 
3527 static const struct seq_operations neigh_stat_seq_ops = {
3528 	.start	= neigh_stat_seq_start,
3529 	.next	= neigh_stat_seq_next,
3530 	.stop	= neigh_stat_seq_stop,
3531 	.show	= neigh_stat_seq_show,
3532 };
3533 #endif /* CONFIG_PROC_FS */
3534 
3535 static void __neigh_notify(struct neighbour *n, int type, int flags,
3536 			   u32 pid)
3537 {
3538 	struct sk_buff *skb;
3539 	int err = -ENOBUFS;
3540 	struct net *net;
3541 
3542 	rcu_read_lock();
3543 	net = dev_net_rcu(n->dev);
3544 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3545 	if (skb == NULL)
3546 		goto errout;
3547 
3548 	err = neigh_fill_info(skb, n, pid, 0, type, flags);
3549 	if (err < 0) {
3550 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3551 		WARN_ON(err == -EMSGSIZE);
3552 		kfree_skb(skb);
3553 		goto errout;
3554 	}
3555 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3556 	goto out;
3557 errout:
3558 	rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3559 out:
3560 	rcu_read_unlock();
3561 }
3562 
3563 void neigh_app_ns(struct neighbour *n)
3564 {
3565 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3566 }
3567 EXPORT_SYMBOL(neigh_app_ns);
3568 
3569 #ifdef CONFIG_SYSCTL
3570 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3571 
3572 static int proc_unres_qlen(const struct ctl_table *ctl, int write,
3573 			   void *buffer, size_t *lenp, loff_t *ppos)
3574 {
3575 	int size, ret;
3576 	struct ctl_table tmp = *ctl;
3577 
3578 	tmp.extra1 = SYSCTL_ZERO;
3579 	tmp.extra2 = &unres_qlen_max;
3580 	tmp.data = &size;
3581 
3582 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3583 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3584 
3585 	if (write && !ret)
3586 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3587 	return ret;
3588 }
3589 
3590 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3591 				  int index)
3592 {
3593 	struct net_device *dev;
3594 	int family = neigh_parms_family(p);
3595 
3596 	rcu_read_lock();
3597 	for_each_netdev_rcu(net, dev) {
3598 		struct neigh_parms *dst_p =
3599 				neigh_get_dev_parms_rcu(dev, family);
3600 
3601 		if (dst_p && !test_bit(index, dst_p->data_state))
3602 			dst_p->data[index] = p->data[index];
3603 	}
3604 	rcu_read_unlock();
3605 }
3606 
3607 static void neigh_proc_update(const struct ctl_table *ctl, int write)
3608 {
3609 	struct net_device *dev = ctl->extra1;
3610 	struct neigh_parms *p = ctl->extra2;
3611 	struct net *net = neigh_parms_net(p);
3612 	int index = (int *) ctl->data - p->data;
3613 
3614 	if (!write)
3615 		return;
3616 
3617 	set_bit(index, p->data_state);
3618 	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3619 		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3620 	if (!dev) /* NULL dev means this is default value */
3621 		neigh_copy_dflt_parms(net, p, index);
3622 }
3623 
3624 static int neigh_proc_dointvec_zero_intmax(const struct ctl_table *ctl, int write,
3625 					   void *buffer, size_t *lenp,
3626 					   loff_t *ppos)
3627 {
3628 	struct ctl_table tmp = *ctl;
3629 	int ret;
3630 
3631 	tmp.extra1 = SYSCTL_ZERO;
3632 	tmp.extra2 = SYSCTL_INT_MAX;
3633 
3634 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3635 	neigh_proc_update(ctl, write);
3636 	return ret;
3637 }
3638 
3639 static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table *ctl, int write,
3640 						   void *buffer, size_t *lenp, loff_t *ppos)
3641 {
3642 	struct ctl_table tmp = *ctl;
3643 	int ret;
3644 
3645 	int min = msecs_to_jiffies(1);
3646 
3647 	tmp.extra1 = &min;
3648 	tmp.extra2 = NULL;
3649 
3650 	ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3651 	neigh_proc_update(ctl, write);
3652 	return ret;
3653 }
3654 
3655 int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer,
3656 			size_t *lenp, loff_t *ppos)
3657 {
3658 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3659 
3660 	neigh_proc_update(ctl, write);
3661 	return ret;
3662 }
3663 EXPORT_SYMBOL(neigh_proc_dointvec);
3664 
3665 int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer,
3666 				size_t *lenp, loff_t *ppos)
3667 {
3668 	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3669 
3670 	neigh_proc_update(ctl, write);
3671 	return ret;
3672 }
3673 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3674 
3675 static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table *ctl, int write,
3676 					      void *buffer, size_t *lenp,
3677 					      loff_t *ppos)
3678 {
3679 	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3680 
3681 	neigh_proc_update(ctl, write);
3682 	return ret;
3683 }
3684 
3685 int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
3686 				   void *buffer, size_t *lenp, loff_t *ppos)
3687 {
3688 	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3689 
3690 	neigh_proc_update(ctl, write);
3691 	return ret;
3692 }
3693 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3694 
3695 static int neigh_proc_dointvec_unres_qlen(const struct ctl_table *ctl, int write,
3696 					  void *buffer, size_t *lenp,
3697 					  loff_t *ppos)
3698 {
3699 	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3700 
3701 	neigh_proc_update(ctl, write);
3702 	return ret;
3703 }
3704 
3705 static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write,
3706 					  void *buffer, size_t *lenp,
3707 					  loff_t *ppos)
3708 {
3709 	struct neigh_parms *p = ctl->extra2;
3710 	int ret;
3711 
3712 	if (strcmp(ctl->procname, "base_reachable_time") == 0)
3713 		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3714 	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3715 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3716 	else
3717 		ret = -1;
3718 
3719 	if (write && ret == 0) {
3720 		/* update reachable_time as well, otherwise, the change will
3721 		 * only be effective after the next time neigh_periodic_work
3722 		 * decides to recompute it
3723 		 */
3724 		p->reachable_time =
3725 			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3726 	}
3727 	return ret;
3728 }
3729 
3730 #define NEIGH_PARMS_DATA_OFFSET(index)	\
3731 	(&((struct neigh_parms *) 0)->data[index])
3732 
3733 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3734 	[NEIGH_VAR_ ## attr] = { \
3735 		.procname	= name, \
3736 		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3737 		.maxlen		= sizeof(int), \
3738 		.mode		= mval, \
3739 		.proc_handler	= proc, \
3740 	}
3741 
3742 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3743 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3744 
3745 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3746 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3747 
3748 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3749 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3750 
3751 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3752 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3753 
3754 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3755 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3756 
3757 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3758 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3759 
3760 static struct neigh_sysctl_table {
3761 	struct ctl_table_header *sysctl_header;
3762 	struct ctl_table neigh_vars[NEIGH_VAR_MAX];
3763 } neigh_sysctl_template __read_mostly = {
3764 	.neigh_vars = {
3765 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3766 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3767 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3768 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3769 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3770 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3771 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3772 		NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3773 						       "interval_probe_time_ms"),
3774 		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3775 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3776 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3777 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3778 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3779 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3780 		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3781 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3782 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3783 		[NEIGH_VAR_GC_INTERVAL] = {
3784 			.procname	= "gc_interval",
3785 			.maxlen		= sizeof(int),
3786 			.mode		= 0644,
3787 			.proc_handler	= proc_dointvec_jiffies,
3788 		},
3789 		[NEIGH_VAR_GC_THRESH1] = {
3790 			.procname	= "gc_thresh1",
3791 			.maxlen		= sizeof(int),
3792 			.mode		= 0644,
3793 			.extra1		= SYSCTL_ZERO,
3794 			.extra2		= SYSCTL_INT_MAX,
3795 			.proc_handler	= proc_dointvec_minmax,
3796 		},
3797 		[NEIGH_VAR_GC_THRESH2] = {
3798 			.procname	= "gc_thresh2",
3799 			.maxlen		= sizeof(int),
3800 			.mode		= 0644,
3801 			.extra1		= SYSCTL_ZERO,
3802 			.extra2		= SYSCTL_INT_MAX,
3803 			.proc_handler	= proc_dointvec_minmax,
3804 		},
3805 		[NEIGH_VAR_GC_THRESH3] = {
3806 			.procname	= "gc_thresh3",
3807 			.maxlen		= sizeof(int),
3808 			.mode		= 0644,
3809 			.extra1		= SYSCTL_ZERO,
3810 			.extra2		= SYSCTL_INT_MAX,
3811 			.proc_handler	= proc_dointvec_minmax,
3812 		},
3813 	},
3814 };
3815 
3816 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3817 			  proc_handler *handler)
3818 {
3819 	int i;
3820 	struct neigh_sysctl_table *t;
3821 	const char *dev_name_source;
3822 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3823 	char *p_name;
3824 	size_t neigh_vars_size;
3825 
3826 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3827 	if (!t)
3828 		goto err;
3829 
3830 	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3831 		t->neigh_vars[i].data += (long) p;
3832 		t->neigh_vars[i].extra1 = dev;
3833 		t->neigh_vars[i].extra2 = p;
3834 	}
3835 
3836 	neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
3837 	if (dev) {
3838 		dev_name_source = dev->name;
3839 		/* Terminate the table early */
3840 		neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
3841 	} else {
3842 		struct neigh_table *tbl = p->tbl;
3843 		dev_name_source = "default";
3844 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3845 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3846 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3847 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3848 	}
3849 
3850 	if (handler) {
3851 		/* RetransTime */
3852 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3853 		/* ReachableTime */
3854 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3855 		/* RetransTime (in milliseconds)*/
3856 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3857 		/* ReachableTime (in milliseconds) */
3858 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3859 	} else {
3860 		/* Those handlers will update p->reachable_time after
3861 		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3862 		 * applied after the next neighbour update instead of waiting for
3863 		 * neigh_periodic_work to update its value (can be multiple minutes)
3864 		 * So any handler that replaces them should do this as well
3865 		 */
3866 		/* ReachableTime */
3867 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3868 			neigh_proc_base_reachable_time;
3869 		/* ReachableTime (in milliseconds) */
3870 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3871 			neigh_proc_base_reachable_time;
3872 	}
3873 
3874 	switch (neigh_parms_family(p)) {
3875 	case AF_INET:
3876 	      p_name = "ipv4";
3877 	      break;
3878 	case AF_INET6:
3879 	      p_name = "ipv6";
3880 	      break;
3881 	default:
3882 	      BUG();
3883 	}
3884 
3885 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3886 		p_name, dev_name_source);
3887 	t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
3888 						  neigh_path, t->neigh_vars,
3889 						  neigh_vars_size);
3890 	if (!t->sysctl_header)
3891 		goto free;
3892 
3893 	p->sysctl_table = t;
3894 	return 0;
3895 
3896 free:
3897 	kfree(t);
3898 err:
3899 	return -ENOBUFS;
3900 }
3901 EXPORT_SYMBOL(neigh_sysctl_register);
3902 
3903 void neigh_sysctl_unregister(struct neigh_parms *p)
3904 {
3905 	if (p->sysctl_table) {
3906 		struct neigh_sysctl_table *t = p->sysctl_table;
3907 		p->sysctl_table = NULL;
3908 		unregister_net_sysctl_table(t->sysctl_header);
3909 		kfree(t);
3910 	}
3911 }
3912 EXPORT_SYMBOL(neigh_sysctl_unregister);
3913 
3914 #endif	/* CONFIG_SYSCTL */
3915 
3916 static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
3917 	{.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
3918 	{.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
3919 	{.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
3920 	 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
3921 	{.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info},
3922 	{.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set},
3923 };
3924 
3925 static int __init neigh_init(void)
3926 {
3927 	rtnl_register_many(neigh_rtnl_msg_handlers);
3928 	return 0;
3929 }
3930 
3931 subsys_initcall(neigh_init);
3932