xref: /linux/net/core/neighbour.c (revision 4ab5a5d2a4a2289c2af07accbec7170ca5671f41)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42 #include <net/addrconf.h>
43 
44 #define DEBUG
45 #define NEIGH_DEBUG 1
46 #define neigh_dbg(level, fmt, ...)		\
47 do {						\
48 	if (level <= NEIGH_DEBUG)		\
49 		pr_debug(fmt, ##__VA_ARGS__);	\
50 } while (0)
51 
52 #define PNEIGH_HASHMASK		0xF
53 
54 static void neigh_timer_handler(struct timer_list *t);
55 static void __neigh_notify(struct neighbour *n, int type, int flags,
56 			   u32 pid);
57 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
58 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
59 				    struct net_device *dev);
60 
61 #ifdef CONFIG_PROC_FS
62 static const struct seq_operations neigh_stat_seq_ops;
63 #endif
64 
65 /*
66    Neighbour hash table buckets are protected with rwlock tbl->lock.
67 
68    - All the scans/updates to hash buckets MUST be made under this lock.
69    - NOTHING clever should be made under this lock: no callbacks
70      to protocol backends, no attempts to send something to network.
71      It will result in deadlocks, if backend/driver wants to use neighbour
72      cache.
73    - If the entry requires some non-trivial actions, increase
74      its reference count and release table lock.
75 
76    Neighbour entries are protected:
77    - with reference count.
78    - with rwlock neigh->lock
79 
80    Reference count prevents destruction.
81 
82    neigh->lock mainly serializes ll address data and its validity state.
83    However, the same lock is used to protect another entry fields:
84     - timer
85     - resolution queue
86 
87    Again, nothing clever shall be made under neigh->lock,
88    the most complicated procedure, which we allow is dev->hard_header.
89    It is supposed, that dev->hard_header is simplistic and does
90    not make callbacks to neighbour tables.
91  */
92 
93 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
94 {
95 	kfree_skb(skb);
96 	return -ENETDOWN;
97 }
98 
99 static void neigh_cleanup_and_release(struct neighbour *neigh)
100 {
101 	if (neigh->parms->neigh_cleanup)
102 		neigh->parms->neigh_cleanup(neigh);
103 
104 	__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
105 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
106 	neigh_release(neigh);
107 }
108 
109 /*
110  * It is random distribution in the interval (1/2)*base...(3/2)*base.
111  * It corresponds to default IPv6 settings and is not overridable,
112  * because it is really reasonable choice.
113  */
114 
115 unsigned long neigh_rand_reach_time(unsigned long base)
116 {
117 	return base ? (prandom_u32() % base) + (base >> 1) : 0;
118 }
119 EXPORT_SYMBOL(neigh_rand_reach_time);
120 
121 
122 static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
123 		      struct neighbour __rcu **np, struct neigh_table *tbl)
124 {
125 	bool retval = false;
126 
127 	write_lock(&n->lock);
128 	if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
129 	    !(n->flags & flags)) {
130 		struct neighbour *neigh;
131 
132 		neigh = rcu_dereference_protected(n->next,
133 						  lockdep_is_held(&tbl->lock));
134 		rcu_assign_pointer(*np, neigh);
135 		n->dead = 1;
136 		retval = true;
137 	}
138 	write_unlock(&n->lock);
139 	if (retval)
140 		neigh_cleanup_and_release(n);
141 	return retval;
142 }
143 
144 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
145 {
146 	struct neigh_hash_table *nht;
147 	void *pkey = ndel->primary_key;
148 	u32 hash_val;
149 	struct neighbour *n;
150 	struct neighbour __rcu **np;
151 
152 	nht = rcu_dereference_protected(tbl->nht,
153 					lockdep_is_held(&tbl->lock));
154 	hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
155 	hash_val = hash_val >> (32 - nht->hash_shift);
156 
157 	np = &nht->hash_buckets[hash_val];
158 	while ((n = rcu_dereference_protected(*np,
159 					      lockdep_is_held(&tbl->lock)))) {
160 		if (n == ndel)
161 			return neigh_del(n, 0, 0, np, tbl);
162 		np = &n->next;
163 	}
164 	return false;
165 }
166 
167 static int neigh_forced_gc(struct neigh_table *tbl)
168 {
169 	int shrunk = 0;
170 	int i;
171 	struct neigh_hash_table *nht;
172 
173 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
174 
175 	write_lock_bh(&tbl->lock);
176 	nht = rcu_dereference_protected(tbl->nht,
177 					lockdep_is_held(&tbl->lock));
178 	for (i = 0; i < (1 << nht->hash_shift); i++) {
179 		struct neighbour *n;
180 		struct neighbour __rcu **np;
181 
182 		np = &nht->hash_buckets[i];
183 		while ((n = rcu_dereference_protected(*np,
184 					lockdep_is_held(&tbl->lock))) != NULL) {
185 			/* Neighbour record may be discarded if:
186 			 * - nobody refers to it.
187 			 * - it is not permanent
188 			 */
189 			if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
190 				      tbl)) {
191 				shrunk = 1;
192 				continue;
193 			}
194 			np = &n->next;
195 		}
196 	}
197 
198 	tbl->last_flush = jiffies;
199 
200 	write_unlock_bh(&tbl->lock);
201 
202 	return shrunk;
203 }
204 
205 static void neigh_add_timer(struct neighbour *n, unsigned long when)
206 {
207 	neigh_hold(n);
208 	if (unlikely(mod_timer(&n->timer, when))) {
209 		printk("NEIGH: BUG, double timer add, state is %x\n",
210 		       n->nud_state);
211 		dump_stack();
212 	}
213 }
214 
215 static int neigh_del_timer(struct neighbour *n)
216 {
217 	if ((n->nud_state & NUD_IN_TIMER) &&
218 	    del_timer(&n->timer)) {
219 		neigh_release(n);
220 		return 1;
221 	}
222 	return 0;
223 }
224 
225 static void pneigh_queue_purge(struct sk_buff_head *list)
226 {
227 	struct sk_buff *skb;
228 
229 	while ((skb = skb_dequeue(list)) != NULL) {
230 		dev_put(skb->dev);
231 		kfree_skb(skb);
232 	}
233 }
234 
235 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
236 			    bool skip_perm)
237 {
238 	int i;
239 	struct neigh_hash_table *nht;
240 
241 	nht = rcu_dereference_protected(tbl->nht,
242 					lockdep_is_held(&tbl->lock));
243 
244 	for (i = 0; i < (1 << nht->hash_shift); i++) {
245 		struct neighbour *n;
246 		struct neighbour __rcu **np = &nht->hash_buckets[i];
247 
248 		while ((n = rcu_dereference_protected(*np,
249 					lockdep_is_held(&tbl->lock))) != NULL) {
250 			if (dev && n->dev != dev) {
251 				np = &n->next;
252 				continue;
253 			}
254 			if (skip_perm && n->nud_state & NUD_PERMANENT) {
255 				np = &n->next;
256 				continue;
257 			}
258 			rcu_assign_pointer(*np,
259 				   rcu_dereference_protected(n->next,
260 						lockdep_is_held(&tbl->lock)));
261 			write_lock(&n->lock);
262 			neigh_del_timer(n);
263 			n->dead = 1;
264 
265 			if (refcount_read(&n->refcnt) != 1) {
266 				/* The most unpleasant situation.
267 				   We must destroy neighbour entry,
268 				   but someone still uses it.
269 
270 				   The destroy will be delayed until
271 				   the last user releases us, but
272 				   we must kill timers etc. and move
273 				   it to safe state.
274 				 */
275 				__skb_queue_purge(&n->arp_queue);
276 				n->arp_queue_len_bytes = 0;
277 				n->output = neigh_blackhole;
278 				if (n->nud_state & NUD_VALID)
279 					n->nud_state = NUD_NOARP;
280 				else
281 					n->nud_state = NUD_NONE;
282 				neigh_dbg(2, "neigh %p is stray\n", n);
283 			}
284 			write_unlock(&n->lock);
285 			neigh_cleanup_and_release(n);
286 		}
287 	}
288 }
289 
290 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
291 {
292 	write_lock_bh(&tbl->lock);
293 	neigh_flush_dev(tbl, dev, false);
294 	write_unlock_bh(&tbl->lock);
295 }
296 EXPORT_SYMBOL(neigh_changeaddr);
297 
298 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
299 			  bool skip_perm)
300 {
301 	write_lock_bh(&tbl->lock);
302 	neigh_flush_dev(tbl, dev, skip_perm);
303 	pneigh_ifdown_and_unlock(tbl, dev);
304 
305 	del_timer_sync(&tbl->proxy_timer);
306 	pneigh_queue_purge(&tbl->proxy_queue);
307 	return 0;
308 }
309 
310 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
311 {
312 	__neigh_ifdown(tbl, dev, true);
313 	return 0;
314 }
315 EXPORT_SYMBOL(neigh_carrier_down);
316 
317 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
318 {
319 	__neigh_ifdown(tbl, dev, false);
320 	return 0;
321 }
322 EXPORT_SYMBOL(neigh_ifdown);
323 
324 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
325 {
326 	struct neighbour *n = NULL;
327 	unsigned long now = jiffies;
328 	int entries;
329 
330 	entries = atomic_inc_return(&tbl->entries) - 1;
331 	if (entries >= tbl->gc_thresh3 ||
332 	    (entries >= tbl->gc_thresh2 &&
333 	     time_after(now, tbl->last_flush + 5 * HZ))) {
334 		if (!neigh_forced_gc(tbl) &&
335 		    entries >= tbl->gc_thresh3) {
336 			net_info_ratelimited("%s: neighbor table overflow!\n",
337 					     tbl->id);
338 			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
339 			goto out_entries;
340 		}
341 	}
342 
343 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
344 	if (!n)
345 		goto out_entries;
346 
347 	__skb_queue_head_init(&n->arp_queue);
348 	rwlock_init(&n->lock);
349 	seqlock_init(&n->ha_lock);
350 	n->updated	  = n->used = now;
351 	n->nud_state	  = NUD_NONE;
352 	n->output	  = neigh_blackhole;
353 	seqlock_init(&n->hh.hh_lock);
354 	n->parms	  = neigh_parms_clone(&tbl->parms);
355 	timer_setup(&n->timer, neigh_timer_handler, 0);
356 
357 	NEIGH_CACHE_STAT_INC(tbl, allocs);
358 	n->tbl		  = tbl;
359 	refcount_set(&n->refcnt, 1);
360 	n->dead		  = 1;
361 out:
362 	return n;
363 
364 out_entries:
365 	atomic_dec(&tbl->entries);
366 	goto out;
367 }
368 
369 static void neigh_get_hash_rnd(u32 *x)
370 {
371 	*x = get_random_u32() | 1;
372 }
373 
374 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
375 {
376 	size_t size = (1 << shift) * sizeof(struct neighbour *);
377 	struct neigh_hash_table *ret;
378 	struct neighbour __rcu **buckets;
379 	int i;
380 
381 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
382 	if (!ret)
383 		return NULL;
384 	if (size <= PAGE_SIZE)
385 		buckets = kzalloc(size, GFP_ATOMIC);
386 	else
387 		buckets = (struct neighbour __rcu **)
388 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
389 					   get_order(size));
390 	if (!buckets) {
391 		kfree(ret);
392 		return NULL;
393 	}
394 	ret->hash_buckets = buckets;
395 	ret->hash_shift = shift;
396 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
397 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
398 	return ret;
399 }
400 
401 static void neigh_hash_free_rcu(struct rcu_head *head)
402 {
403 	struct neigh_hash_table *nht = container_of(head,
404 						    struct neigh_hash_table,
405 						    rcu);
406 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
407 	struct neighbour __rcu **buckets = nht->hash_buckets;
408 
409 	if (size <= PAGE_SIZE)
410 		kfree(buckets);
411 	else
412 		free_pages((unsigned long)buckets, get_order(size));
413 	kfree(nht);
414 }
415 
416 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
417 						unsigned long new_shift)
418 {
419 	unsigned int i, hash;
420 	struct neigh_hash_table *new_nht, *old_nht;
421 
422 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
423 
424 	old_nht = rcu_dereference_protected(tbl->nht,
425 					    lockdep_is_held(&tbl->lock));
426 	new_nht = neigh_hash_alloc(new_shift);
427 	if (!new_nht)
428 		return old_nht;
429 
430 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
431 		struct neighbour *n, *next;
432 
433 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
434 						   lockdep_is_held(&tbl->lock));
435 		     n != NULL;
436 		     n = next) {
437 			hash = tbl->hash(n->primary_key, n->dev,
438 					 new_nht->hash_rnd);
439 
440 			hash >>= (32 - new_nht->hash_shift);
441 			next = rcu_dereference_protected(n->next,
442 						lockdep_is_held(&tbl->lock));
443 
444 			rcu_assign_pointer(n->next,
445 					   rcu_dereference_protected(
446 						new_nht->hash_buckets[hash],
447 						lockdep_is_held(&tbl->lock)));
448 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
449 		}
450 	}
451 
452 	rcu_assign_pointer(tbl->nht, new_nht);
453 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
454 	return new_nht;
455 }
456 
457 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
458 			       struct net_device *dev)
459 {
460 	struct neighbour *n;
461 
462 	NEIGH_CACHE_STAT_INC(tbl, lookups);
463 
464 	rcu_read_lock_bh();
465 	n = __neigh_lookup_noref(tbl, pkey, dev);
466 	if (n) {
467 		if (!refcount_inc_not_zero(&n->refcnt))
468 			n = NULL;
469 		NEIGH_CACHE_STAT_INC(tbl, hits);
470 	}
471 
472 	rcu_read_unlock_bh();
473 	return n;
474 }
475 EXPORT_SYMBOL(neigh_lookup);
476 
477 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
478 				     const void *pkey)
479 {
480 	struct neighbour *n;
481 	unsigned int key_len = tbl->key_len;
482 	u32 hash_val;
483 	struct neigh_hash_table *nht;
484 
485 	NEIGH_CACHE_STAT_INC(tbl, lookups);
486 
487 	rcu_read_lock_bh();
488 	nht = rcu_dereference_bh(tbl->nht);
489 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
490 
491 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
492 	     n != NULL;
493 	     n = rcu_dereference_bh(n->next)) {
494 		if (!memcmp(n->primary_key, pkey, key_len) &&
495 		    net_eq(dev_net(n->dev), net)) {
496 			if (!refcount_inc_not_zero(&n->refcnt))
497 				n = NULL;
498 			NEIGH_CACHE_STAT_INC(tbl, hits);
499 			break;
500 		}
501 	}
502 
503 	rcu_read_unlock_bh();
504 	return n;
505 }
506 EXPORT_SYMBOL(neigh_lookup_nodev);
507 
508 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
509 				 struct net_device *dev, bool want_ref)
510 {
511 	u32 hash_val;
512 	unsigned int key_len = tbl->key_len;
513 	int error;
514 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
515 	struct neigh_hash_table *nht;
516 
517 	if (!n) {
518 		rc = ERR_PTR(-ENOBUFS);
519 		goto out;
520 	}
521 
522 	memcpy(n->primary_key, pkey, key_len);
523 	n->dev = dev;
524 	dev_hold(dev);
525 
526 	/* Protocol specific setup. */
527 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
528 		rc = ERR_PTR(error);
529 		goto out_neigh_release;
530 	}
531 
532 	if (dev->netdev_ops->ndo_neigh_construct) {
533 		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
534 		if (error < 0) {
535 			rc = ERR_PTR(error);
536 			goto out_neigh_release;
537 		}
538 	}
539 
540 	/* Device specific setup. */
541 	if (n->parms->neigh_setup &&
542 	    (error = n->parms->neigh_setup(n)) < 0) {
543 		rc = ERR_PTR(error);
544 		goto out_neigh_release;
545 	}
546 
547 	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
548 
549 	write_lock_bh(&tbl->lock);
550 	nht = rcu_dereference_protected(tbl->nht,
551 					lockdep_is_held(&tbl->lock));
552 
553 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
554 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
555 
556 	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
557 
558 	if (n->parms->dead) {
559 		rc = ERR_PTR(-EINVAL);
560 		goto out_tbl_unlock;
561 	}
562 
563 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
564 					    lockdep_is_held(&tbl->lock));
565 	     n1 != NULL;
566 	     n1 = rcu_dereference_protected(n1->next,
567 			lockdep_is_held(&tbl->lock))) {
568 		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
569 			if (want_ref)
570 				neigh_hold(n1);
571 			rc = n1;
572 			goto out_tbl_unlock;
573 		}
574 	}
575 
576 	n->dead = 0;
577 	if (want_ref)
578 		neigh_hold(n);
579 	rcu_assign_pointer(n->next,
580 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
581 						     lockdep_is_held(&tbl->lock)));
582 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
583 	write_unlock_bh(&tbl->lock);
584 	neigh_dbg(2, "neigh %p is created\n", n);
585 	rc = n;
586 out:
587 	return rc;
588 out_tbl_unlock:
589 	write_unlock_bh(&tbl->lock);
590 out_neigh_release:
591 	neigh_release(n);
592 	goto out;
593 }
594 EXPORT_SYMBOL(__neigh_create);
595 
596 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
597 {
598 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
599 	hash_val ^= (hash_val >> 16);
600 	hash_val ^= hash_val >> 8;
601 	hash_val ^= hash_val >> 4;
602 	hash_val &= PNEIGH_HASHMASK;
603 	return hash_val;
604 }
605 
606 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
607 					      struct net *net,
608 					      const void *pkey,
609 					      unsigned int key_len,
610 					      struct net_device *dev)
611 {
612 	while (n) {
613 		if (!memcmp(n->key, pkey, key_len) &&
614 		    net_eq(pneigh_net(n), net) &&
615 		    (n->dev == dev || !n->dev))
616 			return n;
617 		n = n->next;
618 	}
619 	return NULL;
620 }
621 
622 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
623 		struct net *net, const void *pkey, struct net_device *dev)
624 {
625 	unsigned int key_len = tbl->key_len;
626 	u32 hash_val = pneigh_hash(pkey, key_len);
627 
628 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
629 				 net, pkey, key_len, dev);
630 }
631 EXPORT_SYMBOL_GPL(__pneigh_lookup);
632 
633 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
634 				    struct net *net, const void *pkey,
635 				    struct net_device *dev, int creat)
636 {
637 	struct pneigh_entry *n;
638 	unsigned int key_len = tbl->key_len;
639 	u32 hash_val = pneigh_hash(pkey, key_len);
640 
641 	read_lock_bh(&tbl->lock);
642 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
643 			      net, pkey, key_len, dev);
644 	read_unlock_bh(&tbl->lock);
645 
646 	if (n || !creat)
647 		goto out;
648 
649 	ASSERT_RTNL();
650 
651 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
652 	if (!n)
653 		goto out;
654 
655 	write_pnet(&n->net, net);
656 	memcpy(n->key, pkey, key_len);
657 	n->dev = dev;
658 	if (dev)
659 		dev_hold(dev);
660 
661 	if (tbl->pconstructor && tbl->pconstructor(n)) {
662 		if (dev)
663 			dev_put(dev);
664 		kfree(n);
665 		n = NULL;
666 		goto out;
667 	}
668 
669 	write_lock_bh(&tbl->lock);
670 	n->next = tbl->phash_buckets[hash_val];
671 	tbl->phash_buckets[hash_val] = n;
672 	write_unlock_bh(&tbl->lock);
673 out:
674 	return n;
675 }
676 EXPORT_SYMBOL(pneigh_lookup);
677 
678 
679 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
680 		  struct net_device *dev)
681 {
682 	struct pneigh_entry *n, **np;
683 	unsigned int key_len = tbl->key_len;
684 	u32 hash_val = pneigh_hash(pkey, key_len);
685 
686 	write_lock_bh(&tbl->lock);
687 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
688 	     np = &n->next) {
689 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
690 		    net_eq(pneigh_net(n), net)) {
691 			*np = n->next;
692 			write_unlock_bh(&tbl->lock);
693 			if (tbl->pdestructor)
694 				tbl->pdestructor(n);
695 			if (n->dev)
696 				dev_put(n->dev);
697 			kfree(n);
698 			return 0;
699 		}
700 	}
701 	write_unlock_bh(&tbl->lock);
702 	return -ENOENT;
703 }
704 
705 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
706 				    struct net_device *dev)
707 {
708 	struct pneigh_entry *n, **np, *freelist = NULL;
709 	u32 h;
710 
711 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
712 		np = &tbl->phash_buckets[h];
713 		while ((n = *np) != NULL) {
714 			if (!dev || n->dev == dev) {
715 				*np = n->next;
716 				n->next = freelist;
717 				freelist = n;
718 				continue;
719 			}
720 			np = &n->next;
721 		}
722 	}
723 	write_unlock_bh(&tbl->lock);
724 	while ((n = freelist)) {
725 		freelist = n->next;
726 		n->next = NULL;
727 		if (tbl->pdestructor)
728 			tbl->pdestructor(n);
729 		if (n->dev)
730 			dev_put(n->dev);
731 		kfree(n);
732 	}
733 	return -ENOENT;
734 }
735 
736 static void neigh_parms_destroy(struct neigh_parms *parms);
737 
738 static inline void neigh_parms_put(struct neigh_parms *parms)
739 {
740 	if (refcount_dec_and_test(&parms->refcnt))
741 		neigh_parms_destroy(parms);
742 }
743 
744 /*
745  *	neighbour must already be out of the table;
746  *
747  */
748 void neigh_destroy(struct neighbour *neigh)
749 {
750 	struct net_device *dev = neigh->dev;
751 
752 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
753 
754 	if (!neigh->dead) {
755 		pr_warn("Destroying alive neighbour %p\n", neigh);
756 		dump_stack();
757 		return;
758 	}
759 
760 	if (neigh_del_timer(neigh))
761 		pr_warn("Impossible event\n");
762 
763 	write_lock_bh(&neigh->lock);
764 	__skb_queue_purge(&neigh->arp_queue);
765 	write_unlock_bh(&neigh->lock);
766 	neigh->arp_queue_len_bytes = 0;
767 
768 	if (dev->netdev_ops->ndo_neigh_destroy)
769 		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
770 
771 	dev_put(dev);
772 	neigh_parms_put(neigh->parms);
773 
774 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
775 
776 	atomic_dec(&neigh->tbl->entries);
777 	kfree_rcu(neigh, rcu);
778 }
779 EXPORT_SYMBOL(neigh_destroy);
780 
781 /* Neighbour state is suspicious;
782    disable fast path.
783 
784    Called with write_locked neigh.
785  */
786 static void neigh_suspect(struct neighbour *neigh)
787 {
788 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
789 
790 	neigh->output = neigh->ops->output;
791 }
792 
793 /* Neighbour state is OK;
794    enable fast path.
795 
796    Called with write_locked neigh.
797  */
798 static void neigh_connect(struct neighbour *neigh)
799 {
800 	neigh_dbg(2, "neigh %p is connected\n", neigh);
801 
802 	neigh->output = neigh->ops->connected_output;
803 }
804 
805 static void neigh_periodic_work(struct work_struct *work)
806 {
807 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
808 	struct neighbour *n;
809 	struct neighbour __rcu **np;
810 	unsigned int i;
811 	struct neigh_hash_table *nht;
812 
813 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
814 
815 	write_lock_bh(&tbl->lock);
816 	nht = rcu_dereference_protected(tbl->nht,
817 					lockdep_is_held(&tbl->lock));
818 
819 	/*
820 	 *	periodically recompute ReachableTime from random function
821 	 */
822 
823 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
824 		struct neigh_parms *p;
825 		tbl->last_rand = jiffies;
826 		list_for_each_entry(p, &tbl->parms_list, list)
827 			p->reachable_time =
828 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
829 	}
830 
831 	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
832 		goto out;
833 
834 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
835 		np = &nht->hash_buckets[i];
836 
837 		while ((n = rcu_dereference_protected(*np,
838 				lockdep_is_held(&tbl->lock))) != NULL) {
839 			unsigned int state;
840 
841 			write_lock(&n->lock);
842 
843 			state = n->nud_state;
844 			if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
845 			    (n->flags & NTF_EXT_LEARNED)) {
846 				write_unlock(&n->lock);
847 				goto next_elt;
848 			}
849 
850 			if (time_before(n->used, n->confirmed))
851 				n->used = n->confirmed;
852 
853 			if (refcount_read(&n->refcnt) == 1 &&
854 			    (state == NUD_FAILED ||
855 			     time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
856 				*np = n->next;
857 				n->dead = 1;
858 				write_unlock(&n->lock);
859 				neigh_cleanup_and_release(n);
860 				continue;
861 			}
862 			write_unlock(&n->lock);
863 
864 next_elt:
865 			np = &n->next;
866 		}
867 		/*
868 		 * It's fine to release lock here, even if hash table
869 		 * grows while we are preempted.
870 		 */
871 		write_unlock_bh(&tbl->lock);
872 		cond_resched();
873 		write_lock_bh(&tbl->lock);
874 		nht = rcu_dereference_protected(tbl->nht,
875 						lockdep_is_held(&tbl->lock));
876 	}
877 out:
878 	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
879 	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
880 	 * BASE_REACHABLE_TIME.
881 	 */
882 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
883 			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
884 	write_unlock_bh(&tbl->lock);
885 }
886 
887 static __inline__ int neigh_max_probes(struct neighbour *n)
888 {
889 	struct neigh_parms *p = n->parms;
890 	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
891 	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
892 	        NEIGH_VAR(p, MCAST_PROBES));
893 }
894 
895 static void neigh_invalidate(struct neighbour *neigh)
896 	__releases(neigh->lock)
897 	__acquires(neigh->lock)
898 {
899 	struct sk_buff *skb;
900 
901 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
902 	neigh_dbg(2, "neigh %p is failed\n", neigh);
903 	neigh->updated = jiffies;
904 
905 	/* It is very thin place. report_unreachable is very complicated
906 	   routine. Particularly, it can hit the same neighbour entry!
907 
908 	   So that, we try to be accurate and avoid dead loop. --ANK
909 	 */
910 	while (neigh->nud_state == NUD_FAILED &&
911 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
912 		write_unlock(&neigh->lock);
913 		neigh->ops->error_report(neigh, skb);
914 		write_lock(&neigh->lock);
915 	}
916 	__skb_queue_purge(&neigh->arp_queue);
917 	neigh->arp_queue_len_bytes = 0;
918 }
919 
920 static void neigh_probe(struct neighbour *neigh)
921 	__releases(neigh->lock)
922 {
923 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
924 	/* keep skb alive even if arp_queue overflows */
925 	if (skb)
926 		skb = skb_clone(skb, GFP_ATOMIC);
927 	write_unlock(&neigh->lock);
928 	if (neigh->ops->solicit)
929 		neigh->ops->solicit(neigh, skb);
930 	atomic_inc(&neigh->probes);
931 	kfree_skb(skb);
932 }
933 
934 /* Called when a timer expires for a neighbour entry. */
935 
936 static void neigh_timer_handler(struct timer_list *t)
937 {
938 	unsigned long now, next;
939 	struct neighbour *neigh = from_timer(neigh, t, timer);
940 	unsigned int state;
941 	int notify = 0;
942 
943 	write_lock(&neigh->lock);
944 
945 	state = neigh->nud_state;
946 	now = jiffies;
947 	next = now + HZ;
948 
949 	if (!(state & NUD_IN_TIMER))
950 		goto out;
951 
952 	if (state & NUD_REACHABLE) {
953 		if (time_before_eq(now,
954 				   neigh->confirmed + neigh->parms->reachable_time)) {
955 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
956 			next = neigh->confirmed + neigh->parms->reachable_time;
957 		} else if (time_before_eq(now,
958 					  neigh->used +
959 					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
960 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
961 			neigh->nud_state = NUD_DELAY;
962 			neigh->updated = jiffies;
963 			neigh_suspect(neigh);
964 			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
965 		} else {
966 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
967 			neigh->nud_state = NUD_STALE;
968 			neigh->updated = jiffies;
969 			neigh_suspect(neigh);
970 			notify = 1;
971 		}
972 	} else if (state & NUD_DELAY) {
973 		if (time_before_eq(now,
974 				   neigh->confirmed +
975 				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
976 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
977 			neigh->nud_state = NUD_REACHABLE;
978 			neigh->updated = jiffies;
979 			neigh_connect(neigh);
980 			notify = 1;
981 			next = neigh->confirmed + neigh->parms->reachable_time;
982 		} else {
983 			neigh_dbg(2, "neigh %p is probed\n", neigh);
984 			neigh->nud_state = NUD_PROBE;
985 			neigh->updated = jiffies;
986 			atomic_set(&neigh->probes, 0);
987 			notify = 1;
988 			next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
989 		}
990 	} else {
991 		/* NUD_PROBE|NUD_INCOMPLETE */
992 		next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
993 	}
994 
995 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
996 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
997 		neigh->nud_state = NUD_FAILED;
998 		notify = 1;
999 		neigh_invalidate(neigh);
1000 		goto out;
1001 	}
1002 
1003 	if (neigh->nud_state & NUD_IN_TIMER) {
1004 		if (time_before(next, jiffies + HZ/2))
1005 			next = jiffies + HZ/2;
1006 		if (!mod_timer(&neigh->timer, next))
1007 			neigh_hold(neigh);
1008 	}
1009 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1010 		neigh_probe(neigh);
1011 	} else {
1012 out:
1013 		write_unlock(&neigh->lock);
1014 	}
1015 
1016 	if (notify)
1017 		neigh_update_notify(neigh, 0);
1018 
1019 	neigh_release(neigh);
1020 }
1021 
1022 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
1023 {
1024 	int rc;
1025 	bool immediate_probe = false;
1026 
1027 	write_lock_bh(&neigh->lock);
1028 
1029 	rc = 0;
1030 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1031 		goto out_unlock_bh;
1032 	if (neigh->dead)
1033 		goto out_dead;
1034 
1035 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1036 		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1037 		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1038 			unsigned long next, now = jiffies;
1039 
1040 			atomic_set(&neigh->probes,
1041 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1042 			neigh->nud_state     = NUD_INCOMPLETE;
1043 			neigh->updated = now;
1044 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1045 					 HZ/2);
1046 			neigh_add_timer(neigh, next);
1047 			immediate_probe = true;
1048 		} else {
1049 			neigh->nud_state = NUD_FAILED;
1050 			neigh->updated = jiffies;
1051 			write_unlock_bh(&neigh->lock);
1052 
1053 			kfree_skb(skb);
1054 			return 1;
1055 		}
1056 	} else if (neigh->nud_state & NUD_STALE) {
1057 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
1058 		neigh->nud_state = NUD_DELAY;
1059 		neigh->updated = jiffies;
1060 		neigh_add_timer(neigh, jiffies +
1061 				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1062 	}
1063 
1064 	if (neigh->nud_state == NUD_INCOMPLETE) {
1065 		if (skb) {
1066 			while (neigh->arp_queue_len_bytes + skb->truesize >
1067 			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1068 				struct sk_buff *buff;
1069 
1070 				buff = __skb_dequeue(&neigh->arp_queue);
1071 				if (!buff)
1072 					break;
1073 				neigh->arp_queue_len_bytes -= buff->truesize;
1074 				kfree_skb(buff);
1075 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1076 			}
1077 			skb_dst_force(skb);
1078 			__skb_queue_tail(&neigh->arp_queue, skb);
1079 			neigh->arp_queue_len_bytes += skb->truesize;
1080 		}
1081 		rc = 1;
1082 	}
1083 out_unlock_bh:
1084 	if (immediate_probe)
1085 		neigh_probe(neigh);
1086 	else
1087 		write_unlock(&neigh->lock);
1088 	local_bh_enable();
1089 	return rc;
1090 
1091 out_dead:
1092 	if (neigh->nud_state & NUD_STALE)
1093 		goto out_unlock_bh;
1094 	write_unlock_bh(&neigh->lock);
1095 	kfree_skb(skb);
1096 	return 1;
1097 }
1098 EXPORT_SYMBOL(__neigh_event_send);
1099 
1100 static void neigh_update_hhs(struct neighbour *neigh)
1101 {
1102 	struct hh_cache *hh;
1103 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1104 		= NULL;
1105 
1106 	if (neigh->dev->header_ops)
1107 		update = neigh->dev->header_ops->cache_update;
1108 
1109 	if (update) {
1110 		hh = &neigh->hh;
1111 		if (hh->hh_len) {
1112 			write_seqlock_bh(&hh->hh_lock);
1113 			update(hh, neigh->dev, neigh->ha);
1114 			write_sequnlock_bh(&hh->hh_lock);
1115 		}
1116 	}
1117 }
1118 
1119 
1120 
1121 /* Generic update routine.
1122    -- lladdr is new lladdr or NULL, if it is not supplied.
1123    -- new    is new state.
1124    -- flags
1125 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1126 				if it is different.
1127 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1128 				lladdr instead of overriding it
1129 				if it is different.
1130 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1131 
1132 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1133 				NTF_ROUTER flag.
1134 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1135 				a router.
1136 
1137    Caller MUST hold reference count on the entry.
1138  */
1139 
1140 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1141 		 u32 flags, u32 nlmsg_pid)
1142 {
1143 	u8 old;
1144 	int err;
1145 	int notify = 0;
1146 	struct net_device *dev;
1147 	int update_isrouter = 0;
1148 
1149 	write_lock_bh(&neigh->lock);
1150 
1151 	dev    = neigh->dev;
1152 	old    = neigh->nud_state;
1153 	err    = -EPERM;
1154 
1155 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1156 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1157 		goto out;
1158 	if (neigh->dead)
1159 		goto out;
1160 
1161 	neigh_update_ext_learned(neigh, flags, &notify);
1162 
1163 	if (!(new & NUD_VALID)) {
1164 		neigh_del_timer(neigh);
1165 		if (old & NUD_CONNECTED)
1166 			neigh_suspect(neigh);
1167 		neigh->nud_state = new;
1168 		err = 0;
1169 		notify = old & NUD_VALID;
1170 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1171 		    (new & NUD_FAILED)) {
1172 			neigh_invalidate(neigh);
1173 			notify = 1;
1174 		}
1175 		goto out;
1176 	}
1177 
1178 	/* Compare new lladdr with cached one */
1179 	if (!dev->addr_len) {
1180 		/* First case: device needs no address. */
1181 		lladdr = neigh->ha;
1182 	} else if (lladdr) {
1183 		/* The second case: if something is already cached
1184 		   and a new address is proposed:
1185 		   - compare new & old
1186 		   - if they are different, check override flag
1187 		 */
1188 		if ((old & NUD_VALID) &&
1189 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1190 			lladdr = neigh->ha;
1191 	} else {
1192 		/* No address is supplied; if we know something,
1193 		   use it, otherwise discard the request.
1194 		 */
1195 		err = -EINVAL;
1196 		if (!(old & NUD_VALID))
1197 			goto out;
1198 		lladdr = neigh->ha;
1199 	}
1200 
1201 	/* Update confirmed timestamp for neighbour entry after we
1202 	 * received ARP packet even if it doesn't change IP to MAC binding.
1203 	 */
1204 	if (new & NUD_CONNECTED)
1205 		neigh->confirmed = jiffies;
1206 
1207 	/* If entry was valid and address is not changed,
1208 	   do not change entry state, if new one is STALE.
1209 	 */
1210 	err = 0;
1211 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1212 	if (old & NUD_VALID) {
1213 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1214 			update_isrouter = 0;
1215 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1216 			    (old & NUD_CONNECTED)) {
1217 				lladdr = neigh->ha;
1218 				new = NUD_STALE;
1219 			} else
1220 				goto out;
1221 		} else {
1222 			if (lladdr == neigh->ha && new == NUD_STALE &&
1223 			    !(flags & NEIGH_UPDATE_F_ADMIN))
1224 				new = old;
1225 		}
1226 	}
1227 
1228 	/* Update timestamp only once we know we will make a change to the
1229 	 * neighbour entry. Otherwise we risk to move the locktime window with
1230 	 * noop updates and ignore relevant ARP updates.
1231 	 */
1232 	if (new != old || lladdr != neigh->ha)
1233 		neigh->updated = jiffies;
1234 
1235 	if (new != old) {
1236 		neigh_del_timer(neigh);
1237 		if (new & NUD_PROBE)
1238 			atomic_set(&neigh->probes, 0);
1239 		if (new & NUD_IN_TIMER)
1240 			neigh_add_timer(neigh, (jiffies +
1241 						((new & NUD_REACHABLE) ?
1242 						 neigh->parms->reachable_time :
1243 						 0)));
1244 		neigh->nud_state = new;
1245 		notify = 1;
1246 	}
1247 
1248 	if (lladdr != neigh->ha) {
1249 		write_seqlock(&neigh->ha_lock);
1250 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1251 		write_sequnlock(&neigh->ha_lock);
1252 		neigh_update_hhs(neigh);
1253 		if (!(new & NUD_CONNECTED))
1254 			neigh->confirmed = jiffies -
1255 				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1256 		notify = 1;
1257 	}
1258 	if (new == old)
1259 		goto out;
1260 	if (new & NUD_CONNECTED)
1261 		neigh_connect(neigh);
1262 	else
1263 		neigh_suspect(neigh);
1264 	if (!(old & NUD_VALID)) {
1265 		struct sk_buff *skb;
1266 
1267 		/* Again: avoid dead loop if something went wrong */
1268 
1269 		while (neigh->nud_state & NUD_VALID &&
1270 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1271 			struct dst_entry *dst = skb_dst(skb);
1272 			struct neighbour *n2, *n1 = neigh;
1273 			write_unlock_bh(&neigh->lock);
1274 
1275 			rcu_read_lock();
1276 
1277 			/* Why not just use 'neigh' as-is?  The problem is that
1278 			 * things such as shaper, eql, and sch_teql can end up
1279 			 * using alternative, different, neigh objects to output
1280 			 * the packet in the output path.  So what we need to do
1281 			 * here is re-lookup the top-level neigh in the path so
1282 			 * we can reinject the packet there.
1283 			 */
1284 			n2 = NULL;
1285 			if (dst) {
1286 				n2 = dst_neigh_lookup_skb(dst, skb);
1287 				if (n2)
1288 					n1 = n2;
1289 			}
1290 			n1->output(n1, skb);
1291 			if (n2)
1292 				neigh_release(n2);
1293 			rcu_read_unlock();
1294 
1295 			write_lock_bh(&neigh->lock);
1296 		}
1297 		__skb_queue_purge(&neigh->arp_queue);
1298 		neigh->arp_queue_len_bytes = 0;
1299 	}
1300 out:
1301 	if (update_isrouter)
1302 		neigh_update_is_router(neigh, flags, &notify);
1303 	write_unlock_bh(&neigh->lock);
1304 
1305 	if (notify)
1306 		neigh_update_notify(neigh, nlmsg_pid);
1307 
1308 	return err;
1309 }
1310 EXPORT_SYMBOL(neigh_update);
1311 
1312 /* Update the neigh to listen temporarily for probe responses, even if it is
1313  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1314  */
1315 void __neigh_set_probe_once(struct neighbour *neigh)
1316 {
1317 	if (neigh->dead)
1318 		return;
1319 	neigh->updated = jiffies;
1320 	if (!(neigh->nud_state & NUD_FAILED))
1321 		return;
1322 	neigh->nud_state = NUD_INCOMPLETE;
1323 	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1324 	neigh_add_timer(neigh,
1325 			jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME));
1326 }
1327 EXPORT_SYMBOL(__neigh_set_probe_once);
1328 
1329 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1330 				 u8 *lladdr, void *saddr,
1331 				 struct net_device *dev)
1332 {
1333 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1334 						 lladdr || !dev->addr_len);
1335 	if (neigh)
1336 		neigh_update(neigh, lladdr, NUD_STALE,
1337 			     NEIGH_UPDATE_F_OVERRIDE, 0);
1338 	return neigh;
1339 }
1340 EXPORT_SYMBOL(neigh_event_ns);
1341 
1342 /* called with read_lock_bh(&n->lock); */
1343 static void neigh_hh_init(struct neighbour *n)
1344 {
1345 	struct net_device *dev = n->dev;
1346 	__be16 prot = n->tbl->protocol;
1347 	struct hh_cache	*hh = &n->hh;
1348 
1349 	write_lock_bh(&n->lock);
1350 
1351 	/* Only one thread can come in here and initialize the
1352 	 * hh_cache entry.
1353 	 */
1354 	if (!hh->hh_len)
1355 		dev->header_ops->cache(n, hh, prot);
1356 
1357 	write_unlock_bh(&n->lock);
1358 }
1359 
1360 /* Slow and careful. */
1361 
1362 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1363 {
1364 	int rc = 0;
1365 
1366 	if (!neigh_event_send(neigh, skb)) {
1367 		int err;
1368 		struct net_device *dev = neigh->dev;
1369 		unsigned int seq;
1370 
1371 		if (dev->header_ops->cache && !neigh->hh.hh_len)
1372 			neigh_hh_init(neigh);
1373 
1374 		do {
1375 			__skb_pull(skb, skb_network_offset(skb));
1376 			seq = read_seqbegin(&neigh->ha_lock);
1377 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1378 					      neigh->ha, NULL, skb->len);
1379 		} while (read_seqretry(&neigh->ha_lock, seq));
1380 
1381 		if (err >= 0)
1382 			rc = dev_queue_xmit(skb);
1383 		else
1384 			goto out_kfree_skb;
1385 	}
1386 out:
1387 	return rc;
1388 out_kfree_skb:
1389 	rc = -EINVAL;
1390 	kfree_skb(skb);
1391 	goto out;
1392 }
1393 EXPORT_SYMBOL(neigh_resolve_output);
1394 
1395 /* As fast as possible without hh cache */
1396 
1397 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1398 {
1399 	struct net_device *dev = neigh->dev;
1400 	unsigned int seq;
1401 	int err;
1402 
1403 	do {
1404 		__skb_pull(skb, skb_network_offset(skb));
1405 		seq = read_seqbegin(&neigh->ha_lock);
1406 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1407 				      neigh->ha, NULL, skb->len);
1408 	} while (read_seqretry(&neigh->ha_lock, seq));
1409 
1410 	if (err >= 0)
1411 		err = dev_queue_xmit(skb);
1412 	else {
1413 		err = -EINVAL;
1414 		kfree_skb(skb);
1415 	}
1416 	return err;
1417 }
1418 EXPORT_SYMBOL(neigh_connected_output);
1419 
1420 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1421 {
1422 	return dev_queue_xmit(skb);
1423 }
1424 EXPORT_SYMBOL(neigh_direct_output);
1425 
1426 static void neigh_proxy_process(struct timer_list *t)
1427 {
1428 	struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1429 	long sched_next = 0;
1430 	unsigned long now = jiffies;
1431 	struct sk_buff *skb, *n;
1432 
1433 	spin_lock(&tbl->proxy_queue.lock);
1434 
1435 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1436 		long tdif = NEIGH_CB(skb)->sched_next - now;
1437 
1438 		if (tdif <= 0) {
1439 			struct net_device *dev = skb->dev;
1440 
1441 			__skb_unlink(skb, &tbl->proxy_queue);
1442 			if (tbl->proxy_redo && netif_running(dev)) {
1443 				rcu_read_lock();
1444 				tbl->proxy_redo(skb);
1445 				rcu_read_unlock();
1446 			} else {
1447 				kfree_skb(skb);
1448 			}
1449 
1450 			dev_put(dev);
1451 		} else if (!sched_next || tdif < sched_next)
1452 			sched_next = tdif;
1453 	}
1454 	del_timer(&tbl->proxy_timer);
1455 	if (sched_next)
1456 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1457 	spin_unlock(&tbl->proxy_queue.lock);
1458 }
1459 
1460 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1461 		    struct sk_buff *skb)
1462 {
1463 	unsigned long now = jiffies;
1464 
1465 	unsigned long sched_next = now + (prandom_u32() %
1466 					  NEIGH_VAR(p, PROXY_DELAY));
1467 
1468 	if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1469 		kfree_skb(skb);
1470 		return;
1471 	}
1472 
1473 	NEIGH_CB(skb)->sched_next = sched_next;
1474 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1475 
1476 	spin_lock(&tbl->proxy_queue.lock);
1477 	if (del_timer(&tbl->proxy_timer)) {
1478 		if (time_before(tbl->proxy_timer.expires, sched_next))
1479 			sched_next = tbl->proxy_timer.expires;
1480 	}
1481 	skb_dst_drop(skb);
1482 	dev_hold(skb->dev);
1483 	__skb_queue_tail(&tbl->proxy_queue, skb);
1484 	mod_timer(&tbl->proxy_timer, sched_next);
1485 	spin_unlock(&tbl->proxy_queue.lock);
1486 }
1487 EXPORT_SYMBOL(pneigh_enqueue);
1488 
1489 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1490 						      struct net *net, int ifindex)
1491 {
1492 	struct neigh_parms *p;
1493 
1494 	list_for_each_entry(p, &tbl->parms_list, list) {
1495 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1496 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1497 			return p;
1498 	}
1499 
1500 	return NULL;
1501 }
1502 
1503 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1504 				      struct neigh_table *tbl)
1505 {
1506 	struct neigh_parms *p;
1507 	struct net *net = dev_net(dev);
1508 	const struct net_device_ops *ops = dev->netdev_ops;
1509 
1510 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1511 	if (p) {
1512 		p->tbl		  = tbl;
1513 		refcount_set(&p->refcnt, 1);
1514 		p->reachable_time =
1515 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1516 		dev_hold(dev);
1517 		p->dev = dev;
1518 		write_pnet(&p->net, net);
1519 		p->sysctl_table = NULL;
1520 
1521 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1522 			dev_put(dev);
1523 			kfree(p);
1524 			return NULL;
1525 		}
1526 
1527 		write_lock_bh(&tbl->lock);
1528 		list_add(&p->list, &tbl->parms.list);
1529 		write_unlock_bh(&tbl->lock);
1530 
1531 		neigh_parms_data_state_cleanall(p);
1532 	}
1533 	return p;
1534 }
1535 EXPORT_SYMBOL(neigh_parms_alloc);
1536 
1537 static void neigh_rcu_free_parms(struct rcu_head *head)
1538 {
1539 	struct neigh_parms *parms =
1540 		container_of(head, struct neigh_parms, rcu_head);
1541 
1542 	neigh_parms_put(parms);
1543 }
1544 
1545 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1546 {
1547 	if (!parms || parms == &tbl->parms)
1548 		return;
1549 	write_lock_bh(&tbl->lock);
1550 	list_del(&parms->list);
1551 	parms->dead = 1;
1552 	write_unlock_bh(&tbl->lock);
1553 	if (parms->dev)
1554 		dev_put(parms->dev);
1555 	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1556 }
1557 EXPORT_SYMBOL(neigh_parms_release);
1558 
1559 static void neigh_parms_destroy(struct neigh_parms *parms)
1560 {
1561 	kfree(parms);
1562 }
1563 
1564 static struct lock_class_key neigh_table_proxy_queue_class;
1565 
1566 static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1567 
1568 void neigh_table_init(int index, struct neigh_table *tbl)
1569 {
1570 	unsigned long now = jiffies;
1571 	unsigned long phsize;
1572 
1573 	INIT_LIST_HEAD(&tbl->parms_list);
1574 	list_add(&tbl->parms.list, &tbl->parms_list);
1575 	write_pnet(&tbl->parms.net, &init_net);
1576 	refcount_set(&tbl->parms.refcnt, 1);
1577 	tbl->parms.reachable_time =
1578 			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1579 
1580 	tbl->stats = alloc_percpu(struct neigh_statistics);
1581 	if (!tbl->stats)
1582 		panic("cannot create neighbour cache statistics");
1583 
1584 #ifdef CONFIG_PROC_FS
1585 	if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1586 			      &neigh_stat_seq_ops, tbl))
1587 		panic("cannot create neighbour proc dir entry");
1588 #endif
1589 
1590 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1591 
1592 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1593 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1594 
1595 	if (!tbl->nht || !tbl->phash_buckets)
1596 		panic("cannot allocate neighbour cache hashes");
1597 
1598 	if (!tbl->entry_size)
1599 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1600 					tbl->key_len, NEIGH_PRIV_ALIGN);
1601 	else
1602 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1603 
1604 	rwlock_init(&tbl->lock);
1605 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1606 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1607 			tbl->parms.reachable_time);
1608 	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1609 	skb_queue_head_init_class(&tbl->proxy_queue,
1610 			&neigh_table_proxy_queue_class);
1611 
1612 	tbl->last_flush = now;
1613 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1614 
1615 	neigh_tables[index] = tbl;
1616 }
1617 EXPORT_SYMBOL(neigh_table_init);
1618 
1619 int neigh_table_clear(int index, struct neigh_table *tbl)
1620 {
1621 	neigh_tables[index] = NULL;
1622 	/* It is not clean... Fix it to unload IPv6 module safely */
1623 	cancel_delayed_work_sync(&tbl->gc_work);
1624 	del_timer_sync(&tbl->proxy_timer);
1625 	pneigh_queue_purge(&tbl->proxy_queue);
1626 	neigh_ifdown(tbl, NULL);
1627 	if (atomic_read(&tbl->entries))
1628 		pr_crit("neighbour leakage\n");
1629 
1630 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1631 		 neigh_hash_free_rcu);
1632 	tbl->nht = NULL;
1633 
1634 	kfree(tbl->phash_buckets);
1635 	tbl->phash_buckets = NULL;
1636 
1637 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1638 
1639 	free_percpu(tbl->stats);
1640 	tbl->stats = NULL;
1641 
1642 	return 0;
1643 }
1644 EXPORT_SYMBOL(neigh_table_clear);
1645 
1646 static struct neigh_table *neigh_find_table(int family)
1647 {
1648 	struct neigh_table *tbl = NULL;
1649 
1650 	switch (family) {
1651 	case AF_INET:
1652 		tbl = neigh_tables[NEIGH_ARP_TABLE];
1653 		break;
1654 	case AF_INET6:
1655 		tbl = neigh_tables[NEIGH_ND_TABLE];
1656 		break;
1657 	case AF_DECnet:
1658 		tbl = neigh_tables[NEIGH_DN_TABLE];
1659 		break;
1660 	}
1661 
1662 	return tbl;
1663 }
1664 
1665 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1666 			struct netlink_ext_ack *extack)
1667 {
1668 	struct net *net = sock_net(skb->sk);
1669 	struct ndmsg *ndm;
1670 	struct nlattr *dst_attr;
1671 	struct neigh_table *tbl;
1672 	struct neighbour *neigh;
1673 	struct net_device *dev = NULL;
1674 	int err = -EINVAL;
1675 
1676 	ASSERT_RTNL();
1677 	if (nlmsg_len(nlh) < sizeof(*ndm))
1678 		goto out;
1679 
1680 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1681 	if (dst_attr == NULL)
1682 		goto out;
1683 
1684 	ndm = nlmsg_data(nlh);
1685 	if (ndm->ndm_ifindex) {
1686 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1687 		if (dev == NULL) {
1688 			err = -ENODEV;
1689 			goto out;
1690 		}
1691 	}
1692 
1693 	tbl = neigh_find_table(ndm->ndm_family);
1694 	if (tbl == NULL)
1695 		return -EAFNOSUPPORT;
1696 
1697 	if (nla_len(dst_attr) < (int)tbl->key_len)
1698 		goto out;
1699 
1700 	if (ndm->ndm_flags & NTF_PROXY) {
1701 		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1702 		goto out;
1703 	}
1704 
1705 	if (dev == NULL)
1706 		goto out;
1707 
1708 	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1709 	if (neigh == NULL) {
1710 		err = -ENOENT;
1711 		goto out;
1712 	}
1713 
1714 	err = neigh_update(neigh, NULL, NUD_FAILED,
1715 			   NEIGH_UPDATE_F_OVERRIDE |
1716 			   NEIGH_UPDATE_F_ADMIN,
1717 			   NETLINK_CB(skb).portid);
1718 	write_lock_bh(&tbl->lock);
1719 	neigh_release(neigh);
1720 	neigh_remove_one(neigh, tbl);
1721 	write_unlock_bh(&tbl->lock);
1722 
1723 out:
1724 	return err;
1725 }
1726 
1727 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1728 		     struct netlink_ext_ack *extack)
1729 {
1730 	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1731 		NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1732 	struct net *net = sock_net(skb->sk);
1733 	struct ndmsg *ndm;
1734 	struct nlattr *tb[NDA_MAX+1];
1735 	struct neigh_table *tbl;
1736 	struct net_device *dev = NULL;
1737 	struct neighbour *neigh;
1738 	void *dst, *lladdr;
1739 	int err;
1740 
1741 	ASSERT_RTNL();
1742 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
1743 	if (err < 0)
1744 		goto out;
1745 
1746 	err = -EINVAL;
1747 	if (tb[NDA_DST] == NULL)
1748 		goto out;
1749 
1750 	ndm = nlmsg_data(nlh);
1751 	if (ndm->ndm_ifindex) {
1752 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1753 		if (dev == NULL) {
1754 			err = -ENODEV;
1755 			goto out;
1756 		}
1757 
1758 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1759 			goto out;
1760 	}
1761 
1762 	tbl = neigh_find_table(ndm->ndm_family);
1763 	if (tbl == NULL)
1764 		return -EAFNOSUPPORT;
1765 
1766 	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
1767 		goto out;
1768 	dst = nla_data(tb[NDA_DST]);
1769 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1770 
1771 	if (ndm->ndm_flags & NTF_PROXY) {
1772 		struct pneigh_entry *pn;
1773 
1774 		err = -ENOBUFS;
1775 		pn = pneigh_lookup(tbl, net, dst, dev, 1);
1776 		if (pn) {
1777 			pn->flags = ndm->ndm_flags;
1778 			err = 0;
1779 		}
1780 		goto out;
1781 	}
1782 
1783 	if (dev == NULL)
1784 		goto out;
1785 
1786 	neigh = neigh_lookup(tbl, dst, dev);
1787 	if (neigh == NULL) {
1788 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1789 			err = -ENOENT;
1790 			goto out;
1791 		}
1792 
1793 		neigh = __neigh_lookup_errno(tbl, dst, dev);
1794 		if (IS_ERR(neigh)) {
1795 			err = PTR_ERR(neigh);
1796 			goto out;
1797 		}
1798 	} else {
1799 		if (nlh->nlmsg_flags & NLM_F_EXCL) {
1800 			err = -EEXIST;
1801 			neigh_release(neigh);
1802 			goto out;
1803 		}
1804 
1805 		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1806 			flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
1807 				   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1808 	}
1809 
1810 	if (ndm->ndm_flags & NTF_EXT_LEARNED)
1811 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
1812 
1813 	if (ndm->ndm_flags & NTF_ROUTER)
1814 		flags |= NEIGH_UPDATE_F_ISROUTER;
1815 
1816 	if (ndm->ndm_flags & NTF_USE) {
1817 		neigh_event_send(neigh, NULL);
1818 		err = 0;
1819 	} else
1820 		err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
1821 				   NETLINK_CB(skb).portid);
1822 	neigh_release(neigh);
1823 
1824 out:
1825 	return err;
1826 }
1827 
1828 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1829 {
1830 	struct nlattr *nest;
1831 
1832 	nest = nla_nest_start(skb, NDTA_PARMS);
1833 	if (nest == NULL)
1834 		return -ENOBUFS;
1835 
1836 	if ((parms->dev &&
1837 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1838 	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
1839 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1840 			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1841 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
1842 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
1843 			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1844 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1845 	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1846 	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
1847 			NEIGH_VAR(parms, UCAST_PROBES)) ||
1848 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
1849 			NEIGH_VAR(parms, MCAST_PROBES)) ||
1850 	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
1851 			NEIGH_VAR(parms, MCAST_REPROBES)) ||
1852 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
1853 			  NDTPA_PAD) ||
1854 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1855 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
1856 	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
1857 			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
1858 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1859 			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
1860 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1861 			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
1862 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1863 			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
1864 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1865 			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
1866 	    nla_put_msecs(skb, NDTPA_LOCKTIME,
1867 			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD))
1868 		goto nla_put_failure;
1869 	return nla_nest_end(skb, nest);
1870 
1871 nla_put_failure:
1872 	nla_nest_cancel(skb, nest);
1873 	return -EMSGSIZE;
1874 }
1875 
1876 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1877 			      u32 pid, u32 seq, int type, int flags)
1878 {
1879 	struct nlmsghdr *nlh;
1880 	struct ndtmsg *ndtmsg;
1881 
1882 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1883 	if (nlh == NULL)
1884 		return -EMSGSIZE;
1885 
1886 	ndtmsg = nlmsg_data(nlh);
1887 
1888 	read_lock_bh(&tbl->lock);
1889 	ndtmsg->ndtm_family = tbl->family;
1890 	ndtmsg->ndtm_pad1   = 0;
1891 	ndtmsg->ndtm_pad2   = 0;
1892 
1893 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1894 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval, NDTA_PAD) ||
1895 	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1896 	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1897 	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1898 		goto nla_put_failure;
1899 	{
1900 		unsigned long now = jiffies;
1901 		unsigned int flush_delta = now - tbl->last_flush;
1902 		unsigned int rand_delta = now - tbl->last_rand;
1903 		struct neigh_hash_table *nht;
1904 		struct ndt_config ndc = {
1905 			.ndtc_key_len		= tbl->key_len,
1906 			.ndtc_entry_size	= tbl->entry_size,
1907 			.ndtc_entries		= atomic_read(&tbl->entries),
1908 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1909 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1910 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1911 		};
1912 
1913 		rcu_read_lock_bh();
1914 		nht = rcu_dereference_bh(tbl->nht);
1915 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1916 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1917 		rcu_read_unlock_bh();
1918 
1919 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1920 			goto nla_put_failure;
1921 	}
1922 
1923 	{
1924 		int cpu;
1925 		struct ndt_stats ndst;
1926 
1927 		memset(&ndst, 0, sizeof(ndst));
1928 
1929 		for_each_possible_cpu(cpu) {
1930 			struct neigh_statistics	*st;
1931 
1932 			st = per_cpu_ptr(tbl->stats, cpu);
1933 			ndst.ndts_allocs		+= st->allocs;
1934 			ndst.ndts_destroys		+= st->destroys;
1935 			ndst.ndts_hash_grows		+= st->hash_grows;
1936 			ndst.ndts_res_failed		+= st->res_failed;
1937 			ndst.ndts_lookups		+= st->lookups;
1938 			ndst.ndts_hits			+= st->hits;
1939 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1940 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1941 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1942 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1943 			ndst.ndts_table_fulls		+= st->table_fulls;
1944 		}
1945 
1946 		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
1947 				  NDTA_PAD))
1948 			goto nla_put_failure;
1949 	}
1950 
1951 	BUG_ON(tbl->parms.dev);
1952 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1953 		goto nla_put_failure;
1954 
1955 	read_unlock_bh(&tbl->lock);
1956 	nlmsg_end(skb, nlh);
1957 	return 0;
1958 
1959 nla_put_failure:
1960 	read_unlock_bh(&tbl->lock);
1961 	nlmsg_cancel(skb, nlh);
1962 	return -EMSGSIZE;
1963 }
1964 
1965 static int neightbl_fill_param_info(struct sk_buff *skb,
1966 				    struct neigh_table *tbl,
1967 				    struct neigh_parms *parms,
1968 				    u32 pid, u32 seq, int type,
1969 				    unsigned int flags)
1970 {
1971 	struct ndtmsg *ndtmsg;
1972 	struct nlmsghdr *nlh;
1973 
1974 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1975 	if (nlh == NULL)
1976 		return -EMSGSIZE;
1977 
1978 	ndtmsg = nlmsg_data(nlh);
1979 
1980 	read_lock_bh(&tbl->lock);
1981 	ndtmsg->ndtm_family = tbl->family;
1982 	ndtmsg->ndtm_pad1   = 0;
1983 	ndtmsg->ndtm_pad2   = 0;
1984 
1985 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1986 	    neightbl_fill_parms(skb, parms) < 0)
1987 		goto errout;
1988 
1989 	read_unlock_bh(&tbl->lock);
1990 	nlmsg_end(skb, nlh);
1991 	return 0;
1992 errout:
1993 	read_unlock_bh(&tbl->lock);
1994 	nlmsg_cancel(skb, nlh);
1995 	return -EMSGSIZE;
1996 }
1997 
1998 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1999 	[NDTA_NAME]		= { .type = NLA_STRING },
2000 	[NDTA_THRESH1]		= { .type = NLA_U32 },
2001 	[NDTA_THRESH2]		= { .type = NLA_U32 },
2002 	[NDTA_THRESH3]		= { .type = NLA_U32 },
2003 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
2004 	[NDTA_PARMS]		= { .type = NLA_NESTED },
2005 };
2006 
2007 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2008 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
2009 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
2010 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
2011 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
2012 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
2013 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
2014 	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
2015 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
2016 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
2017 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
2018 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
2019 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
2020 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
2021 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
2022 };
2023 
2024 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2025 			struct netlink_ext_ack *extack)
2026 {
2027 	struct net *net = sock_net(skb->sk);
2028 	struct neigh_table *tbl;
2029 	struct ndtmsg *ndtmsg;
2030 	struct nlattr *tb[NDTA_MAX+1];
2031 	bool found = false;
2032 	int err, tidx;
2033 
2034 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2035 			  nl_neightbl_policy, extack);
2036 	if (err < 0)
2037 		goto errout;
2038 
2039 	if (tb[NDTA_NAME] == NULL) {
2040 		err = -EINVAL;
2041 		goto errout;
2042 	}
2043 
2044 	ndtmsg = nlmsg_data(nlh);
2045 
2046 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2047 		tbl = neigh_tables[tidx];
2048 		if (!tbl)
2049 			continue;
2050 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2051 			continue;
2052 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2053 			found = true;
2054 			break;
2055 		}
2056 	}
2057 
2058 	if (!found)
2059 		return -ENOENT;
2060 
2061 	/*
2062 	 * We acquire tbl->lock to be nice to the periodic timers and
2063 	 * make sure they always see a consistent set of values.
2064 	 */
2065 	write_lock_bh(&tbl->lock);
2066 
2067 	if (tb[NDTA_PARMS]) {
2068 		struct nlattr *tbp[NDTPA_MAX+1];
2069 		struct neigh_parms *p;
2070 		int i, ifindex = 0;
2071 
2072 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2073 				       nl_ntbl_parm_policy, extack);
2074 		if (err < 0)
2075 			goto errout_tbl_lock;
2076 
2077 		if (tbp[NDTPA_IFINDEX])
2078 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2079 
2080 		p = lookup_neigh_parms(tbl, net, ifindex);
2081 		if (p == NULL) {
2082 			err = -ENOENT;
2083 			goto errout_tbl_lock;
2084 		}
2085 
2086 		for (i = 1; i <= NDTPA_MAX; i++) {
2087 			if (tbp[i] == NULL)
2088 				continue;
2089 
2090 			switch (i) {
2091 			case NDTPA_QUEUE_LEN:
2092 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2093 					      nla_get_u32(tbp[i]) *
2094 					      SKB_TRUESIZE(ETH_FRAME_LEN));
2095 				break;
2096 			case NDTPA_QUEUE_LENBYTES:
2097 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2098 					      nla_get_u32(tbp[i]));
2099 				break;
2100 			case NDTPA_PROXY_QLEN:
2101 				NEIGH_VAR_SET(p, PROXY_QLEN,
2102 					      nla_get_u32(tbp[i]));
2103 				break;
2104 			case NDTPA_APP_PROBES:
2105 				NEIGH_VAR_SET(p, APP_PROBES,
2106 					      nla_get_u32(tbp[i]));
2107 				break;
2108 			case NDTPA_UCAST_PROBES:
2109 				NEIGH_VAR_SET(p, UCAST_PROBES,
2110 					      nla_get_u32(tbp[i]));
2111 				break;
2112 			case NDTPA_MCAST_PROBES:
2113 				NEIGH_VAR_SET(p, MCAST_PROBES,
2114 					      nla_get_u32(tbp[i]));
2115 				break;
2116 			case NDTPA_MCAST_REPROBES:
2117 				NEIGH_VAR_SET(p, MCAST_REPROBES,
2118 					      nla_get_u32(tbp[i]));
2119 				break;
2120 			case NDTPA_BASE_REACHABLE_TIME:
2121 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2122 					      nla_get_msecs(tbp[i]));
2123 				/* update reachable_time as well, otherwise, the change will
2124 				 * only be effective after the next time neigh_periodic_work
2125 				 * decides to recompute it (can be multiple minutes)
2126 				 */
2127 				p->reachable_time =
2128 					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2129 				break;
2130 			case NDTPA_GC_STALETIME:
2131 				NEIGH_VAR_SET(p, GC_STALETIME,
2132 					      nla_get_msecs(tbp[i]));
2133 				break;
2134 			case NDTPA_DELAY_PROBE_TIME:
2135 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2136 					      nla_get_msecs(tbp[i]));
2137 				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2138 				break;
2139 			case NDTPA_RETRANS_TIME:
2140 				NEIGH_VAR_SET(p, RETRANS_TIME,
2141 					      nla_get_msecs(tbp[i]));
2142 				break;
2143 			case NDTPA_ANYCAST_DELAY:
2144 				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2145 					      nla_get_msecs(tbp[i]));
2146 				break;
2147 			case NDTPA_PROXY_DELAY:
2148 				NEIGH_VAR_SET(p, PROXY_DELAY,
2149 					      nla_get_msecs(tbp[i]));
2150 				break;
2151 			case NDTPA_LOCKTIME:
2152 				NEIGH_VAR_SET(p, LOCKTIME,
2153 					      nla_get_msecs(tbp[i]));
2154 				break;
2155 			}
2156 		}
2157 	}
2158 
2159 	err = -ENOENT;
2160 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2161 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2162 	    !net_eq(net, &init_net))
2163 		goto errout_tbl_lock;
2164 
2165 	if (tb[NDTA_THRESH1])
2166 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2167 
2168 	if (tb[NDTA_THRESH2])
2169 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2170 
2171 	if (tb[NDTA_THRESH3])
2172 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2173 
2174 	if (tb[NDTA_GC_INTERVAL])
2175 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2176 
2177 	err = 0;
2178 
2179 errout_tbl_lock:
2180 	write_unlock_bh(&tbl->lock);
2181 errout:
2182 	return err;
2183 }
2184 
2185 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2186 				    struct netlink_ext_ack *extack)
2187 {
2188 	struct ndtmsg *ndtm;
2189 
2190 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2191 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2192 		return -EINVAL;
2193 	}
2194 
2195 	ndtm = nlmsg_data(nlh);
2196 	if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2197 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2198 		return -EINVAL;
2199 	}
2200 
2201 	if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2202 		NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2203 		return -EINVAL;
2204 	}
2205 
2206 	return 0;
2207 }
2208 
2209 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2210 {
2211 	const struct nlmsghdr *nlh = cb->nlh;
2212 	struct net *net = sock_net(skb->sk);
2213 	int family, tidx, nidx = 0;
2214 	int tbl_skip = cb->args[0];
2215 	int neigh_skip = cb->args[1];
2216 	struct neigh_table *tbl;
2217 
2218 	if (cb->strict_check) {
2219 		int err = neightbl_valid_dump_info(nlh, cb->extack);
2220 
2221 		if (err < 0)
2222 			return err;
2223 	}
2224 
2225 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2226 
2227 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2228 		struct neigh_parms *p;
2229 
2230 		tbl = neigh_tables[tidx];
2231 		if (!tbl)
2232 			continue;
2233 
2234 		if (tidx < tbl_skip || (family && tbl->family != family))
2235 			continue;
2236 
2237 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2238 				       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2239 				       NLM_F_MULTI) < 0)
2240 			break;
2241 
2242 		nidx = 0;
2243 		p = list_next_entry(&tbl->parms, list);
2244 		list_for_each_entry_from(p, &tbl->parms_list, list) {
2245 			if (!net_eq(neigh_parms_net(p), net))
2246 				continue;
2247 
2248 			if (nidx < neigh_skip)
2249 				goto next;
2250 
2251 			if (neightbl_fill_param_info(skb, tbl, p,
2252 						     NETLINK_CB(cb->skb).portid,
2253 						     nlh->nlmsg_seq,
2254 						     RTM_NEWNEIGHTBL,
2255 						     NLM_F_MULTI) < 0)
2256 				goto out;
2257 		next:
2258 			nidx++;
2259 		}
2260 
2261 		neigh_skip = 0;
2262 	}
2263 out:
2264 	cb->args[0] = tidx;
2265 	cb->args[1] = nidx;
2266 
2267 	return skb->len;
2268 }
2269 
2270 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2271 			   u32 pid, u32 seq, int type, unsigned int flags)
2272 {
2273 	unsigned long now = jiffies;
2274 	struct nda_cacheinfo ci;
2275 	struct nlmsghdr *nlh;
2276 	struct ndmsg *ndm;
2277 
2278 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2279 	if (nlh == NULL)
2280 		return -EMSGSIZE;
2281 
2282 	ndm = nlmsg_data(nlh);
2283 	ndm->ndm_family	 = neigh->ops->family;
2284 	ndm->ndm_pad1    = 0;
2285 	ndm->ndm_pad2    = 0;
2286 	ndm->ndm_flags	 = neigh->flags;
2287 	ndm->ndm_type	 = neigh->type;
2288 	ndm->ndm_ifindex = neigh->dev->ifindex;
2289 
2290 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2291 		goto nla_put_failure;
2292 
2293 	read_lock_bh(&neigh->lock);
2294 	ndm->ndm_state	 = neigh->nud_state;
2295 	if (neigh->nud_state & NUD_VALID) {
2296 		char haddr[MAX_ADDR_LEN];
2297 
2298 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2299 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2300 			read_unlock_bh(&neigh->lock);
2301 			goto nla_put_failure;
2302 		}
2303 	}
2304 
2305 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2306 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2307 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2308 	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
2309 	read_unlock_bh(&neigh->lock);
2310 
2311 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2312 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2313 		goto nla_put_failure;
2314 
2315 	nlmsg_end(skb, nlh);
2316 	return 0;
2317 
2318 nla_put_failure:
2319 	nlmsg_cancel(skb, nlh);
2320 	return -EMSGSIZE;
2321 }
2322 
2323 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2324 			    u32 pid, u32 seq, int type, unsigned int flags,
2325 			    struct neigh_table *tbl)
2326 {
2327 	struct nlmsghdr *nlh;
2328 	struct ndmsg *ndm;
2329 
2330 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2331 	if (nlh == NULL)
2332 		return -EMSGSIZE;
2333 
2334 	ndm = nlmsg_data(nlh);
2335 	ndm->ndm_family	 = tbl->family;
2336 	ndm->ndm_pad1    = 0;
2337 	ndm->ndm_pad2    = 0;
2338 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2339 	ndm->ndm_type	 = RTN_UNICAST;
2340 	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2341 	ndm->ndm_state	 = NUD_NONE;
2342 
2343 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2344 		goto nla_put_failure;
2345 
2346 	nlmsg_end(skb, nlh);
2347 	return 0;
2348 
2349 nla_put_failure:
2350 	nlmsg_cancel(skb, nlh);
2351 	return -EMSGSIZE;
2352 }
2353 
2354 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2355 {
2356 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2357 	__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2358 }
2359 
2360 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2361 {
2362 	struct net_device *master;
2363 
2364 	if (!master_idx)
2365 		return false;
2366 
2367 	master = dev ? netdev_master_upper_dev_get(dev) : NULL;
2368 	if (!master || master->ifindex != master_idx)
2369 		return true;
2370 
2371 	return false;
2372 }
2373 
2374 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2375 {
2376 	if (filter_idx && (!dev || dev->ifindex != filter_idx))
2377 		return true;
2378 
2379 	return false;
2380 }
2381 
2382 struct neigh_dump_filter {
2383 	int master_idx;
2384 	int dev_idx;
2385 };
2386 
2387 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2388 			    struct netlink_callback *cb,
2389 			    struct neigh_dump_filter *filter)
2390 {
2391 	struct net *net = sock_net(skb->sk);
2392 	struct neighbour *n;
2393 	int rc, h, s_h = cb->args[1];
2394 	int idx, s_idx = idx = cb->args[2];
2395 	struct neigh_hash_table *nht;
2396 	unsigned int flags = NLM_F_MULTI;
2397 
2398 	if (filter->dev_idx || filter->master_idx)
2399 		flags |= NLM_F_DUMP_FILTERED;
2400 
2401 	rcu_read_lock_bh();
2402 	nht = rcu_dereference_bh(tbl->nht);
2403 
2404 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2405 		if (h > s_h)
2406 			s_idx = 0;
2407 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2408 		     n != NULL;
2409 		     n = rcu_dereference_bh(n->next)) {
2410 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2411 				goto next;
2412 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2413 			    neigh_master_filtered(n->dev, filter->master_idx))
2414 				goto next;
2415 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2416 					    cb->nlh->nlmsg_seq,
2417 					    RTM_NEWNEIGH,
2418 					    flags) < 0) {
2419 				rc = -1;
2420 				goto out;
2421 			}
2422 next:
2423 			idx++;
2424 		}
2425 	}
2426 	rc = skb->len;
2427 out:
2428 	rcu_read_unlock_bh();
2429 	cb->args[1] = h;
2430 	cb->args[2] = idx;
2431 	return rc;
2432 }
2433 
2434 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2435 			     struct netlink_callback *cb,
2436 			     struct neigh_dump_filter *filter)
2437 {
2438 	struct pneigh_entry *n;
2439 	struct net *net = sock_net(skb->sk);
2440 	int rc, h, s_h = cb->args[3];
2441 	int idx, s_idx = idx = cb->args[4];
2442 	unsigned int flags = NLM_F_MULTI;
2443 
2444 	if (filter->dev_idx || filter->master_idx)
2445 		flags |= NLM_F_DUMP_FILTERED;
2446 
2447 	read_lock_bh(&tbl->lock);
2448 
2449 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2450 		if (h > s_h)
2451 			s_idx = 0;
2452 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2453 			if (idx < s_idx || pneigh_net(n) != net)
2454 				goto next;
2455 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2456 			    neigh_master_filtered(n->dev, filter->master_idx))
2457 				goto next;
2458 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2459 					    cb->nlh->nlmsg_seq,
2460 					    RTM_NEWNEIGH, flags, tbl) < 0) {
2461 				read_unlock_bh(&tbl->lock);
2462 				rc = -1;
2463 				goto out;
2464 			}
2465 		next:
2466 			idx++;
2467 		}
2468 	}
2469 
2470 	read_unlock_bh(&tbl->lock);
2471 	rc = skb->len;
2472 out:
2473 	cb->args[3] = h;
2474 	cb->args[4] = idx;
2475 	return rc;
2476 
2477 }
2478 
2479 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2480 				bool strict_check,
2481 				struct neigh_dump_filter *filter,
2482 				struct netlink_ext_ack *extack)
2483 {
2484 	struct nlattr *tb[NDA_MAX + 1];
2485 	int err, i;
2486 
2487 	if (strict_check) {
2488 		struct ndmsg *ndm;
2489 
2490 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2491 			NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2492 			return -EINVAL;
2493 		}
2494 
2495 		ndm = nlmsg_data(nlh);
2496 		if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2497 		    ndm->ndm_state || ndm->ndm_type) {
2498 			NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2499 			return -EINVAL;
2500 		}
2501 
2502 		if (ndm->ndm_flags & ~NTF_PROXY) {
2503 			NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2504 			return -EINVAL;
2505 		}
2506 
2507 		err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2508 					 NULL, extack);
2509 	} else {
2510 		err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
2511 				  NULL, extack);
2512 	}
2513 	if (err < 0)
2514 		return err;
2515 
2516 	for (i = 0; i <= NDA_MAX; ++i) {
2517 		if (!tb[i])
2518 			continue;
2519 
2520 		/* all new attributes should require strict_check */
2521 		switch (i) {
2522 		case NDA_IFINDEX:
2523 			if (nla_len(tb[i]) != sizeof(u32)) {
2524 				NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in neighbor dump request");
2525 				return -EINVAL;
2526 			}
2527 			filter->dev_idx = nla_get_u32(tb[i]);
2528 			break;
2529 		case NDA_MASTER:
2530 			if (nla_len(tb[i]) != sizeof(u32)) {
2531 				NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in neighbor dump request");
2532 				return -EINVAL;
2533 			}
2534 			filter->master_idx = nla_get_u32(tb[i]);
2535 			break;
2536 		default:
2537 			if (strict_check) {
2538 				NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2539 				return -EINVAL;
2540 			}
2541 		}
2542 	}
2543 
2544 	return 0;
2545 }
2546 
2547 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2548 {
2549 	const struct nlmsghdr *nlh = cb->nlh;
2550 	struct neigh_dump_filter filter = {};
2551 	struct neigh_table *tbl;
2552 	int t, family, s_t;
2553 	int proxy = 0;
2554 	int err;
2555 
2556 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2557 
2558 	/* check for full ndmsg structure presence, family member is
2559 	 * the same for both structures
2560 	 */
2561 	if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2562 	    ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2563 		proxy = 1;
2564 
2565 	err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2566 	if (err < 0 && cb->strict_check)
2567 		return err;
2568 
2569 	s_t = cb->args[0];
2570 
2571 	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2572 		tbl = neigh_tables[t];
2573 
2574 		if (!tbl)
2575 			continue;
2576 		if (t < s_t || (family && tbl->family != family))
2577 			continue;
2578 		if (t > s_t)
2579 			memset(&cb->args[1], 0, sizeof(cb->args) -
2580 						sizeof(cb->args[0]));
2581 		if (proxy)
2582 			err = pneigh_dump_table(tbl, skb, cb, &filter);
2583 		else
2584 			err = neigh_dump_table(tbl, skb, cb, &filter);
2585 		if (err < 0)
2586 			break;
2587 	}
2588 
2589 	cb->args[0] = t;
2590 	return skb->len;
2591 }
2592 
2593 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2594 {
2595 	int chain;
2596 	struct neigh_hash_table *nht;
2597 
2598 	rcu_read_lock_bh();
2599 	nht = rcu_dereference_bh(tbl->nht);
2600 
2601 	read_lock(&tbl->lock); /* avoid resizes */
2602 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2603 		struct neighbour *n;
2604 
2605 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2606 		     n != NULL;
2607 		     n = rcu_dereference_bh(n->next))
2608 			cb(n, cookie);
2609 	}
2610 	read_unlock(&tbl->lock);
2611 	rcu_read_unlock_bh();
2612 }
2613 EXPORT_SYMBOL(neigh_for_each);
2614 
2615 /* The tbl->lock must be held as a writer and BH disabled. */
2616 void __neigh_for_each_release(struct neigh_table *tbl,
2617 			      int (*cb)(struct neighbour *))
2618 {
2619 	int chain;
2620 	struct neigh_hash_table *nht;
2621 
2622 	nht = rcu_dereference_protected(tbl->nht,
2623 					lockdep_is_held(&tbl->lock));
2624 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2625 		struct neighbour *n;
2626 		struct neighbour __rcu **np;
2627 
2628 		np = &nht->hash_buckets[chain];
2629 		while ((n = rcu_dereference_protected(*np,
2630 					lockdep_is_held(&tbl->lock))) != NULL) {
2631 			int release;
2632 
2633 			write_lock(&n->lock);
2634 			release = cb(n);
2635 			if (release) {
2636 				rcu_assign_pointer(*np,
2637 					rcu_dereference_protected(n->next,
2638 						lockdep_is_held(&tbl->lock)));
2639 				n->dead = 1;
2640 			} else
2641 				np = &n->next;
2642 			write_unlock(&n->lock);
2643 			if (release)
2644 				neigh_cleanup_and_release(n);
2645 		}
2646 	}
2647 }
2648 EXPORT_SYMBOL(__neigh_for_each_release);
2649 
2650 int neigh_xmit(int index, struct net_device *dev,
2651 	       const void *addr, struct sk_buff *skb)
2652 {
2653 	int err = -EAFNOSUPPORT;
2654 	if (likely(index < NEIGH_NR_TABLES)) {
2655 		struct neigh_table *tbl;
2656 		struct neighbour *neigh;
2657 
2658 		tbl = neigh_tables[index];
2659 		if (!tbl)
2660 			goto out;
2661 		rcu_read_lock_bh();
2662 		neigh = __neigh_lookup_noref(tbl, addr, dev);
2663 		if (!neigh)
2664 			neigh = __neigh_create(tbl, addr, dev, false);
2665 		err = PTR_ERR(neigh);
2666 		if (IS_ERR(neigh)) {
2667 			rcu_read_unlock_bh();
2668 			goto out_kfree_skb;
2669 		}
2670 		err = neigh->output(neigh, skb);
2671 		rcu_read_unlock_bh();
2672 	}
2673 	else if (index == NEIGH_LINK_TABLE) {
2674 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
2675 				      addr, NULL, skb->len);
2676 		if (err < 0)
2677 			goto out_kfree_skb;
2678 		err = dev_queue_xmit(skb);
2679 	}
2680 out:
2681 	return err;
2682 out_kfree_skb:
2683 	kfree_skb(skb);
2684 	goto out;
2685 }
2686 EXPORT_SYMBOL(neigh_xmit);
2687 
2688 #ifdef CONFIG_PROC_FS
2689 
2690 static struct neighbour *neigh_get_first(struct seq_file *seq)
2691 {
2692 	struct neigh_seq_state *state = seq->private;
2693 	struct net *net = seq_file_net(seq);
2694 	struct neigh_hash_table *nht = state->nht;
2695 	struct neighbour *n = NULL;
2696 	int bucket = state->bucket;
2697 
2698 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2699 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2700 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2701 
2702 		while (n) {
2703 			if (!net_eq(dev_net(n->dev), net))
2704 				goto next;
2705 			if (state->neigh_sub_iter) {
2706 				loff_t fakep = 0;
2707 				void *v;
2708 
2709 				v = state->neigh_sub_iter(state, n, &fakep);
2710 				if (!v)
2711 					goto next;
2712 			}
2713 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2714 				break;
2715 			if (n->nud_state & ~NUD_NOARP)
2716 				break;
2717 next:
2718 			n = rcu_dereference_bh(n->next);
2719 		}
2720 
2721 		if (n)
2722 			break;
2723 	}
2724 	state->bucket = bucket;
2725 
2726 	return n;
2727 }
2728 
2729 static struct neighbour *neigh_get_next(struct seq_file *seq,
2730 					struct neighbour *n,
2731 					loff_t *pos)
2732 {
2733 	struct neigh_seq_state *state = seq->private;
2734 	struct net *net = seq_file_net(seq);
2735 	struct neigh_hash_table *nht = state->nht;
2736 
2737 	if (state->neigh_sub_iter) {
2738 		void *v = state->neigh_sub_iter(state, n, pos);
2739 		if (v)
2740 			return n;
2741 	}
2742 	n = rcu_dereference_bh(n->next);
2743 
2744 	while (1) {
2745 		while (n) {
2746 			if (!net_eq(dev_net(n->dev), net))
2747 				goto next;
2748 			if (state->neigh_sub_iter) {
2749 				void *v = state->neigh_sub_iter(state, n, pos);
2750 				if (v)
2751 					return n;
2752 				goto next;
2753 			}
2754 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2755 				break;
2756 
2757 			if (n->nud_state & ~NUD_NOARP)
2758 				break;
2759 next:
2760 			n = rcu_dereference_bh(n->next);
2761 		}
2762 
2763 		if (n)
2764 			break;
2765 
2766 		if (++state->bucket >= (1 << nht->hash_shift))
2767 			break;
2768 
2769 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2770 	}
2771 
2772 	if (n && pos)
2773 		--(*pos);
2774 	return n;
2775 }
2776 
2777 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2778 {
2779 	struct neighbour *n = neigh_get_first(seq);
2780 
2781 	if (n) {
2782 		--(*pos);
2783 		while (*pos) {
2784 			n = neigh_get_next(seq, n, pos);
2785 			if (!n)
2786 				break;
2787 		}
2788 	}
2789 	return *pos ? NULL : n;
2790 }
2791 
2792 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2793 {
2794 	struct neigh_seq_state *state = seq->private;
2795 	struct net *net = seq_file_net(seq);
2796 	struct neigh_table *tbl = state->tbl;
2797 	struct pneigh_entry *pn = NULL;
2798 	int bucket = state->bucket;
2799 
2800 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2801 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2802 		pn = tbl->phash_buckets[bucket];
2803 		while (pn && !net_eq(pneigh_net(pn), net))
2804 			pn = pn->next;
2805 		if (pn)
2806 			break;
2807 	}
2808 	state->bucket = bucket;
2809 
2810 	return pn;
2811 }
2812 
2813 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2814 					    struct pneigh_entry *pn,
2815 					    loff_t *pos)
2816 {
2817 	struct neigh_seq_state *state = seq->private;
2818 	struct net *net = seq_file_net(seq);
2819 	struct neigh_table *tbl = state->tbl;
2820 
2821 	do {
2822 		pn = pn->next;
2823 	} while (pn && !net_eq(pneigh_net(pn), net));
2824 
2825 	while (!pn) {
2826 		if (++state->bucket > PNEIGH_HASHMASK)
2827 			break;
2828 		pn = tbl->phash_buckets[state->bucket];
2829 		while (pn && !net_eq(pneigh_net(pn), net))
2830 			pn = pn->next;
2831 		if (pn)
2832 			break;
2833 	}
2834 
2835 	if (pn && pos)
2836 		--(*pos);
2837 
2838 	return pn;
2839 }
2840 
2841 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2842 {
2843 	struct pneigh_entry *pn = pneigh_get_first(seq);
2844 
2845 	if (pn) {
2846 		--(*pos);
2847 		while (*pos) {
2848 			pn = pneigh_get_next(seq, pn, pos);
2849 			if (!pn)
2850 				break;
2851 		}
2852 	}
2853 	return *pos ? NULL : pn;
2854 }
2855 
2856 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2857 {
2858 	struct neigh_seq_state *state = seq->private;
2859 	void *rc;
2860 	loff_t idxpos = *pos;
2861 
2862 	rc = neigh_get_idx(seq, &idxpos);
2863 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2864 		rc = pneigh_get_idx(seq, &idxpos);
2865 
2866 	return rc;
2867 }
2868 
2869 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2870 	__acquires(rcu_bh)
2871 {
2872 	struct neigh_seq_state *state = seq->private;
2873 
2874 	state->tbl = tbl;
2875 	state->bucket = 0;
2876 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2877 
2878 	rcu_read_lock_bh();
2879 	state->nht = rcu_dereference_bh(tbl->nht);
2880 
2881 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2882 }
2883 EXPORT_SYMBOL(neigh_seq_start);
2884 
2885 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2886 {
2887 	struct neigh_seq_state *state;
2888 	void *rc;
2889 
2890 	if (v == SEQ_START_TOKEN) {
2891 		rc = neigh_get_first(seq);
2892 		goto out;
2893 	}
2894 
2895 	state = seq->private;
2896 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2897 		rc = neigh_get_next(seq, v, NULL);
2898 		if (rc)
2899 			goto out;
2900 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2901 			rc = pneigh_get_first(seq);
2902 	} else {
2903 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2904 		rc = pneigh_get_next(seq, v, NULL);
2905 	}
2906 out:
2907 	++(*pos);
2908 	return rc;
2909 }
2910 EXPORT_SYMBOL(neigh_seq_next);
2911 
2912 void neigh_seq_stop(struct seq_file *seq, void *v)
2913 	__releases(rcu_bh)
2914 {
2915 	rcu_read_unlock_bh();
2916 }
2917 EXPORT_SYMBOL(neigh_seq_stop);
2918 
2919 /* statistics via seq_file */
2920 
2921 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2922 {
2923 	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2924 	int cpu;
2925 
2926 	if (*pos == 0)
2927 		return SEQ_START_TOKEN;
2928 
2929 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2930 		if (!cpu_possible(cpu))
2931 			continue;
2932 		*pos = cpu+1;
2933 		return per_cpu_ptr(tbl->stats, cpu);
2934 	}
2935 	return NULL;
2936 }
2937 
2938 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2939 {
2940 	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2941 	int cpu;
2942 
2943 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2944 		if (!cpu_possible(cpu))
2945 			continue;
2946 		*pos = cpu+1;
2947 		return per_cpu_ptr(tbl->stats, cpu);
2948 	}
2949 	return NULL;
2950 }
2951 
2952 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2953 {
2954 
2955 }
2956 
2957 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2958 {
2959 	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
2960 	struct neigh_statistics *st = v;
2961 
2962 	if (v == SEQ_START_TOKEN) {
2963 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
2964 		return 0;
2965 	}
2966 
2967 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2968 			"%08lx %08lx  %08lx %08lx %08lx %08lx\n",
2969 		   atomic_read(&tbl->entries),
2970 
2971 		   st->allocs,
2972 		   st->destroys,
2973 		   st->hash_grows,
2974 
2975 		   st->lookups,
2976 		   st->hits,
2977 
2978 		   st->res_failed,
2979 
2980 		   st->rcv_probes_mcast,
2981 		   st->rcv_probes_ucast,
2982 
2983 		   st->periodic_gc_runs,
2984 		   st->forced_gc_runs,
2985 		   st->unres_discards,
2986 		   st->table_fulls
2987 		   );
2988 
2989 	return 0;
2990 }
2991 
2992 static const struct seq_operations neigh_stat_seq_ops = {
2993 	.start	= neigh_stat_seq_start,
2994 	.next	= neigh_stat_seq_next,
2995 	.stop	= neigh_stat_seq_stop,
2996 	.show	= neigh_stat_seq_show,
2997 };
2998 #endif /* CONFIG_PROC_FS */
2999 
3000 static inline size_t neigh_nlmsg_size(void)
3001 {
3002 	return NLMSG_ALIGN(sizeof(struct ndmsg))
3003 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3004 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
3005 	       + nla_total_size(sizeof(struct nda_cacheinfo))
3006 	       + nla_total_size(4); /* NDA_PROBES */
3007 }
3008 
3009 static void __neigh_notify(struct neighbour *n, int type, int flags,
3010 			   u32 pid)
3011 {
3012 	struct net *net = dev_net(n->dev);
3013 	struct sk_buff *skb;
3014 	int err = -ENOBUFS;
3015 
3016 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3017 	if (skb == NULL)
3018 		goto errout;
3019 
3020 	err = neigh_fill_info(skb, n, pid, 0, type, flags);
3021 	if (err < 0) {
3022 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3023 		WARN_ON(err == -EMSGSIZE);
3024 		kfree_skb(skb);
3025 		goto errout;
3026 	}
3027 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3028 	return;
3029 errout:
3030 	if (err < 0)
3031 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3032 }
3033 
3034 void neigh_app_ns(struct neighbour *n)
3035 {
3036 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3037 }
3038 EXPORT_SYMBOL(neigh_app_ns);
3039 
3040 #ifdef CONFIG_SYSCTL
3041 static int zero;
3042 static int int_max = INT_MAX;
3043 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3044 
3045 static int proc_unres_qlen(struct ctl_table *ctl, int write,
3046 			   void __user *buffer, size_t *lenp, loff_t *ppos)
3047 {
3048 	int size, ret;
3049 	struct ctl_table tmp = *ctl;
3050 
3051 	tmp.extra1 = &zero;
3052 	tmp.extra2 = &unres_qlen_max;
3053 	tmp.data = &size;
3054 
3055 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3056 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3057 
3058 	if (write && !ret)
3059 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3060 	return ret;
3061 }
3062 
3063 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
3064 						   int family)
3065 {
3066 	switch (family) {
3067 	case AF_INET:
3068 		return __in_dev_arp_parms_get_rcu(dev);
3069 	case AF_INET6:
3070 		return __in6_dev_nd_parms_get_rcu(dev);
3071 	}
3072 	return NULL;
3073 }
3074 
3075 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3076 				  int index)
3077 {
3078 	struct net_device *dev;
3079 	int family = neigh_parms_family(p);
3080 
3081 	rcu_read_lock();
3082 	for_each_netdev_rcu(net, dev) {
3083 		struct neigh_parms *dst_p =
3084 				neigh_get_dev_parms_rcu(dev, family);
3085 
3086 		if (dst_p && !test_bit(index, dst_p->data_state))
3087 			dst_p->data[index] = p->data[index];
3088 	}
3089 	rcu_read_unlock();
3090 }
3091 
3092 static void neigh_proc_update(struct ctl_table *ctl, int write)
3093 {
3094 	struct net_device *dev = ctl->extra1;
3095 	struct neigh_parms *p = ctl->extra2;
3096 	struct net *net = neigh_parms_net(p);
3097 	int index = (int *) ctl->data - p->data;
3098 
3099 	if (!write)
3100 		return;
3101 
3102 	set_bit(index, p->data_state);
3103 	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3104 		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3105 	if (!dev) /* NULL dev means this is default value */
3106 		neigh_copy_dflt_parms(net, p, index);
3107 }
3108 
3109 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3110 					   void __user *buffer,
3111 					   size_t *lenp, loff_t *ppos)
3112 {
3113 	struct ctl_table tmp = *ctl;
3114 	int ret;
3115 
3116 	tmp.extra1 = &zero;
3117 	tmp.extra2 = &int_max;
3118 
3119 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3120 	neigh_proc_update(ctl, write);
3121 	return ret;
3122 }
3123 
3124 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
3125 			void __user *buffer, size_t *lenp, loff_t *ppos)
3126 {
3127 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3128 
3129 	neigh_proc_update(ctl, write);
3130 	return ret;
3131 }
3132 EXPORT_SYMBOL(neigh_proc_dointvec);
3133 
3134 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
3135 				void __user *buffer,
3136 				size_t *lenp, loff_t *ppos)
3137 {
3138 	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3139 
3140 	neigh_proc_update(ctl, write);
3141 	return ret;
3142 }
3143 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3144 
3145 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3146 					      void __user *buffer,
3147 					      size_t *lenp, loff_t *ppos)
3148 {
3149 	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3150 
3151 	neigh_proc_update(ctl, write);
3152 	return ret;
3153 }
3154 
3155 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3156 				   void __user *buffer,
3157 				   size_t *lenp, loff_t *ppos)
3158 {
3159 	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3160 
3161 	neigh_proc_update(ctl, write);
3162 	return ret;
3163 }
3164 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3165 
3166 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3167 					  void __user *buffer,
3168 					  size_t *lenp, loff_t *ppos)
3169 {
3170 	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3171 
3172 	neigh_proc_update(ctl, write);
3173 	return ret;
3174 }
3175 
3176 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3177 					  void __user *buffer,
3178 					  size_t *lenp, loff_t *ppos)
3179 {
3180 	struct neigh_parms *p = ctl->extra2;
3181 	int ret;
3182 
3183 	if (strcmp(ctl->procname, "base_reachable_time") == 0)
3184 		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3185 	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3186 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3187 	else
3188 		ret = -1;
3189 
3190 	if (write && ret == 0) {
3191 		/* update reachable_time as well, otherwise, the change will
3192 		 * only be effective after the next time neigh_periodic_work
3193 		 * decides to recompute it
3194 		 */
3195 		p->reachable_time =
3196 			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3197 	}
3198 	return ret;
3199 }
3200 
3201 #define NEIGH_PARMS_DATA_OFFSET(index)	\
3202 	(&((struct neigh_parms *) 0)->data[index])
3203 
3204 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3205 	[NEIGH_VAR_ ## attr] = { \
3206 		.procname	= name, \
3207 		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3208 		.maxlen		= sizeof(int), \
3209 		.mode		= mval, \
3210 		.proc_handler	= proc, \
3211 	}
3212 
3213 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3214 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3215 
3216 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3217 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3218 
3219 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3220 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3221 
3222 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
3223 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3224 
3225 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3226 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3227 
3228 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3229 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3230 
3231 static struct neigh_sysctl_table {
3232 	struct ctl_table_header *sysctl_header;
3233 	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
3234 } neigh_sysctl_template __read_mostly = {
3235 	.neigh_vars = {
3236 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3237 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3238 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3239 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3240 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3241 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3242 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3243 		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3244 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3245 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3246 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3247 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3248 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3249 		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3250 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3251 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3252 		[NEIGH_VAR_GC_INTERVAL] = {
3253 			.procname	= "gc_interval",
3254 			.maxlen		= sizeof(int),
3255 			.mode		= 0644,
3256 			.proc_handler	= proc_dointvec_jiffies,
3257 		},
3258 		[NEIGH_VAR_GC_THRESH1] = {
3259 			.procname	= "gc_thresh1",
3260 			.maxlen		= sizeof(int),
3261 			.mode		= 0644,
3262 			.extra1 	= &zero,
3263 			.extra2		= &int_max,
3264 			.proc_handler	= proc_dointvec_minmax,
3265 		},
3266 		[NEIGH_VAR_GC_THRESH2] = {
3267 			.procname	= "gc_thresh2",
3268 			.maxlen		= sizeof(int),
3269 			.mode		= 0644,
3270 			.extra1 	= &zero,
3271 			.extra2		= &int_max,
3272 			.proc_handler	= proc_dointvec_minmax,
3273 		},
3274 		[NEIGH_VAR_GC_THRESH3] = {
3275 			.procname	= "gc_thresh3",
3276 			.maxlen		= sizeof(int),
3277 			.mode		= 0644,
3278 			.extra1 	= &zero,
3279 			.extra2		= &int_max,
3280 			.proc_handler	= proc_dointvec_minmax,
3281 		},
3282 		{},
3283 	},
3284 };
3285 
3286 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3287 			  proc_handler *handler)
3288 {
3289 	int i;
3290 	struct neigh_sysctl_table *t;
3291 	const char *dev_name_source;
3292 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3293 	char *p_name;
3294 
3295 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3296 	if (!t)
3297 		goto err;
3298 
3299 	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3300 		t->neigh_vars[i].data += (long) p;
3301 		t->neigh_vars[i].extra1 = dev;
3302 		t->neigh_vars[i].extra2 = p;
3303 	}
3304 
3305 	if (dev) {
3306 		dev_name_source = dev->name;
3307 		/* Terminate the table early */
3308 		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3309 		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3310 	} else {
3311 		struct neigh_table *tbl = p->tbl;
3312 		dev_name_source = "default";
3313 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3314 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3315 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3316 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3317 	}
3318 
3319 	if (handler) {
3320 		/* RetransTime */
3321 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3322 		/* ReachableTime */
3323 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3324 		/* RetransTime (in milliseconds)*/
3325 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3326 		/* ReachableTime (in milliseconds) */
3327 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3328 	} else {
3329 		/* Those handlers will update p->reachable_time after
3330 		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3331 		 * applied after the next neighbour update instead of waiting for
3332 		 * neigh_periodic_work to update its value (can be multiple minutes)
3333 		 * So any handler that replaces them should do this as well
3334 		 */
3335 		/* ReachableTime */
3336 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3337 			neigh_proc_base_reachable_time;
3338 		/* ReachableTime (in milliseconds) */
3339 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3340 			neigh_proc_base_reachable_time;
3341 	}
3342 
3343 	/* Don't export sysctls to unprivileged users */
3344 	if (neigh_parms_net(p)->user_ns != &init_user_ns)
3345 		t->neigh_vars[0].procname = NULL;
3346 
3347 	switch (neigh_parms_family(p)) {
3348 	case AF_INET:
3349 	      p_name = "ipv4";
3350 	      break;
3351 	case AF_INET6:
3352 	      p_name = "ipv6";
3353 	      break;
3354 	default:
3355 	      BUG();
3356 	}
3357 
3358 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3359 		p_name, dev_name_source);
3360 	t->sysctl_header =
3361 		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3362 	if (!t->sysctl_header)
3363 		goto free;
3364 
3365 	p->sysctl_table = t;
3366 	return 0;
3367 
3368 free:
3369 	kfree(t);
3370 err:
3371 	return -ENOBUFS;
3372 }
3373 EXPORT_SYMBOL(neigh_sysctl_register);
3374 
3375 void neigh_sysctl_unregister(struct neigh_parms *p)
3376 {
3377 	if (p->sysctl_table) {
3378 		struct neigh_sysctl_table *t = p->sysctl_table;
3379 		p->sysctl_table = NULL;
3380 		unregister_net_sysctl_table(t->sysctl_header);
3381 		kfree(t);
3382 	}
3383 }
3384 EXPORT_SYMBOL(neigh_sysctl_unregister);
3385 
3386 #endif	/* CONFIG_SYSCTL */
3387 
3388 static int __init neigh_init(void)
3389 {
3390 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3391 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3392 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
3393 
3394 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3395 		      0);
3396 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3397 
3398 	return 0;
3399 }
3400 
3401 subsys_initcall(neigh_init);
3402