xref: /linux/net/core/neighbour.c (revision 148f9bb87745ed45f7a11b2cbd3bc0f017d5d257)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 
42 #define DEBUG
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...)		\
45 do {						\
46 	if (level <= NEIGH_DEBUG)		\
47 		pr_debug(fmt, ##__VA_ARGS__);	\
48 } while (0)
49 
50 #define PNEIGH_HASHMASK		0xF
51 
52 static void neigh_timer_handler(unsigned long arg);
53 static void __neigh_notify(struct neighbour *n, int type, int flags);
54 static void neigh_update_notify(struct neighbour *neigh);
55 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
56 
57 static struct neigh_table *neigh_tables;
58 #ifdef CONFIG_PROC_FS
59 static const struct file_operations neigh_stat_seq_fops;
60 #endif
61 
62 /*
63    Neighbour hash table buckets are protected with rwlock tbl->lock.
64 
65    - All the scans/updates to hash buckets MUST be made under this lock.
66    - NOTHING clever should be made under this lock: no callbacks
67      to protocol backends, no attempts to send something to network.
68      It will result in deadlocks, if backend/driver wants to use neighbour
69      cache.
70    - If the entry requires some non-trivial actions, increase
71      its reference count and release table lock.
72 
73    Neighbour entries are protected:
74    - with reference count.
75    - with rwlock neigh->lock
76 
77    Reference count prevents destruction.
78 
79    neigh->lock mainly serializes ll address data and its validity state.
80    However, the same lock is used to protect another entry fields:
81     - timer
82     - resolution queue
83 
84    Again, nothing clever shall be made under neigh->lock,
85    the most complicated procedure, which we allow is dev->hard_header.
86    It is supposed, that dev->hard_header is simplistic and does
87    not make callbacks to neighbour tables.
88 
89    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90    list of neighbour tables. This list is used only in process context,
91  */
92 
93 static DEFINE_RWLOCK(neigh_tbl_lock);
94 
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97 	kfree_skb(skb);
98 	return -ENETDOWN;
99 }
100 
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103 	if (neigh->parms->neigh_cleanup)
104 		neigh->parms->neigh_cleanup(neigh);
105 
106 	__neigh_notify(neigh, RTM_DELNEIGH, 0);
107 	neigh_release(neigh);
108 }
109 
110 /*
111  * It is random distribution in the interval (1/2)*base...(3/2)*base.
112  * It corresponds to default IPv6 settings and is not overridable,
113  * because it is really reasonable choice.
114  */
115 
116 unsigned long neigh_rand_reach_time(unsigned long base)
117 {
118 	return base ? (net_random() % base) + (base >> 1) : 0;
119 }
120 EXPORT_SYMBOL(neigh_rand_reach_time);
121 
122 
123 static int neigh_forced_gc(struct neigh_table *tbl)
124 {
125 	int shrunk = 0;
126 	int i;
127 	struct neigh_hash_table *nht;
128 
129 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
130 
131 	write_lock_bh(&tbl->lock);
132 	nht = rcu_dereference_protected(tbl->nht,
133 					lockdep_is_held(&tbl->lock));
134 	for (i = 0; i < (1 << nht->hash_shift); i++) {
135 		struct neighbour *n;
136 		struct neighbour __rcu **np;
137 
138 		np = &nht->hash_buckets[i];
139 		while ((n = rcu_dereference_protected(*np,
140 					lockdep_is_held(&tbl->lock))) != NULL) {
141 			/* Neighbour record may be discarded if:
142 			 * - nobody refers to it.
143 			 * - it is not permanent
144 			 */
145 			write_lock(&n->lock);
146 			if (atomic_read(&n->refcnt) == 1 &&
147 			    !(n->nud_state & NUD_PERMANENT)) {
148 				rcu_assign_pointer(*np,
149 					rcu_dereference_protected(n->next,
150 						  lockdep_is_held(&tbl->lock)));
151 				n->dead = 1;
152 				shrunk	= 1;
153 				write_unlock(&n->lock);
154 				neigh_cleanup_and_release(n);
155 				continue;
156 			}
157 			write_unlock(&n->lock);
158 			np = &n->next;
159 		}
160 	}
161 
162 	tbl->last_flush = jiffies;
163 
164 	write_unlock_bh(&tbl->lock);
165 
166 	return shrunk;
167 }
168 
169 static void neigh_add_timer(struct neighbour *n, unsigned long when)
170 {
171 	neigh_hold(n);
172 	if (unlikely(mod_timer(&n->timer, when))) {
173 		printk("NEIGH: BUG, double timer add, state is %x\n",
174 		       n->nud_state);
175 		dump_stack();
176 	}
177 }
178 
179 static int neigh_del_timer(struct neighbour *n)
180 {
181 	if ((n->nud_state & NUD_IN_TIMER) &&
182 	    del_timer(&n->timer)) {
183 		neigh_release(n);
184 		return 1;
185 	}
186 	return 0;
187 }
188 
189 static void pneigh_queue_purge(struct sk_buff_head *list)
190 {
191 	struct sk_buff *skb;
192 
193 	while ((skb = skb_dequeue(list)) != NULL) {
194 		dev_put(skb->dev);
195 		kfree_skb(skb);
196 	}
197 }
198 
199 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200 {
201 	int i;
202 	struct neigh_hash_table *nht;
203 
204 	nht = rcu_dereference_protected(tbl->nht,
205 					lockdep_is_held(&tbl->lock));
206 
207 	for (i = 0; i < (1 << nht->hash_shift); i++) {
208 		struct neighbour *n;
209 		struct neighbour __rcu **np = &nht->hash_buckets[i];
210 
211 		while ((n = rcu_dereference_protected(*np,
212 					lockdep_is_held(&tbl->lock))) != NULL) {
213 			if (dev && n->dev != dev) {
214 				np = &n->next;
215 				continue;
216 			}
217 			rcu_assign_pointer(*np,
218 				   rcu_dereference_protected(n->next,
219 						lockdep_is_held(&tbl->lock)));
220 			write_lock(&n->lock);
221 			neigh_del_timer(n);
222 			n->dead = 1;
223 
224 			if (atomic_read(&n->refcnt) != 1) {
225 				/* The most unpleasant situation.
226 				   We must destroy neighbour entry,
227 				   but someone still uses it.
228 
229 				   The destroy will be delayed until
230 				   the last user releases us, but
231 				   we must kill timers etc. and move
232 				   it to safe state.
233 				 */
234 				__skb_queue_purge(&n->arp_queue);
235 				n->arp_queue_len_bytes = 0;
236 				n->output = neigh_blackhole;
237 				if (n->nud_state & NUD_VALID)
238 					n->nud_state = NUD_NOARP;
239 				else
240 					n->nud_state = NUD_NONE;
241 				neigh_dbg(2, "neigh %p is stray\n", n);
242 			}
243 			write_unlock(&n->lock);
244 			neigh_cleanup_and_release(n);
245 		}
246 	}
247 }
248 
249 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
250 {
251 	write_lock_bh(&tbl->lock);
252 	neigh_flush_dev(tbl, dev);
253 	write_unlock_bh(&tbl->lock);
254 }
255 EXPORT_SYMBOL(neigh_changeaddr);
256 
257 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
258 {
259 	write_lock_bh(&tbl->lock);
260 	neigh_flush_dev(tbl, dev);
261 	pneigh_ifdown(tbl, dev);
262 	write_unlock_bh(&tbl->lock);
263 
264 	del_timer_sync(&tbl->proxy_timer);
265 	pneigh_queue_purge(&tbl->proxy_queue);
266 	return 0;
267 }
268 EXPORT_SYMBOL(neigh_ifdown);
269 
270 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
271 {
272 	struct neighbour *n = NULL;
273 	unsigned long now = jiffies;
274 	int entries;
275 
276 	entries = atomic_inc_return(&tbl->entries) - 1;
277 	if (entries >= tbl->gc_thresh3 ||
278 	    (entries >= tbl->gc_thresh2 &&
279 	     time_after(now, tbl->last_flush + 5 * HZ))) {
280 		if (!neigh_forced_gc(tbl) &&
281 		    entries >= tbl->gc_thresh3)
282 			goto out_entries;
283 	}
284 
285 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286 	if (!n)
287 		goto out_entries;
288 
289 	__skb_queue_head_init(&n->arp_queue);
290 	rwlock_init(&n->lock);
291 	seqlock_init(&n->ha_lock);
292 	n->updated	  = n->used = now;
293 	n->nud_state	  = NUD_NONE;
294 	n->output	  = neigh_blackhole;
295 	seqlock_init(&n->hh.hh_lock);
296 	n->parms	  = neigh_parms_clone(&tbl->parms);
297 	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
298 
299 	NEIGH_CACHE_STAT_INC(tbl, allocs);
300 	n->tbl		  = tbl;
301 	atomic_set(&n->refcnt, 1);
302 	n->dead		  = 1;
303 out:
304 	return n;
305 
306 out_entries:
307 	atomic_dec(&tbl->entries);
308 	goto out;
309 }
310 
311 static void neigh_get_hash_rnd(u32 *x)
312 {
313 	get_random_bytes(x, sizeof(*x));
314 	*x |= 1;
315 }
316 
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319 	size_t size = (1 << shift) * sizeof(struct neighbour *);
320 	struct neigh_hash_table *ret;
321 	struct neighbour __rcu **buckets;
322 	int i;
323 
324 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325 	if (!ret)
326 		return NULL;
327 	if (size <= PAGE_SIZE)
328 		buckets = kzalloc(size, GFP_ATOMIC);
329 	else
330 		buckets = (struct neighbour __rcu **)
331 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332 					   get_order(size));
333 	if (!buckets) {
334 		kfree(ret);
335 		return NULL;
336 	}
337 	ret->hash_buckets = buckets;
338 	ret->hash_shift = shift;
339 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
341 	return ret;
342 }
343 
344 static void neigh_hash_free_rcu(struct rcu_head *head)
345 {
346 	struct neigh_hash_table *nht = container_of(head,
347 						    struct neigh_hash_table,
348 						    rcu);
349 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350 	struct neighbour __rcu **buckets = nht->hash_buckets;
351 
352 	if (size <= PAGE_SIZE)
353 		kfree(buckets);
354 	else
355 		free_pages((unsigned long)buckets, get_order(size));
356 	kfree(nht);
357 }
358 
359 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360 						unsigned long new_shift)
361 {
362 	unsigned int i, hash;
363 	struct neigh_hash_table *new_nht, *old_nht;
364 
365 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
366 
367 	old_nht = rcu_dereference_protected(tbl->nht,
368 					    lockdep_is_held(&tbl->lock));
369 	new_nht = neigh_hash_alloc(new_shift);
370 	if (!new_nht)
371 		return old_nht;
372 
373 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374 		struct neighbour *n, *next;
375 
376 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377 						   lockdep_is_held(&tbl->lock));
378 		     n != NULL;
379 		     n = next) {
380 			hash = tbl->hash(n->primary_key, n->dev,
381 					 new_nht->hash_rnd);
382 
383 			hash >>= (32 - new_nht->hash_shift);
384 			next = rcu_dereference_protected(n->next,
385 						lockdep_is_held(&tbl->lock));
386 
387 			rcu_assign_pointer(n->next,
388 					   rcu_dereference_protected(
389 						new_nht->hash_buckets[hash],
390 						lockdep_is_held(&tbl->lock)));
391 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
392 		}
393 	}
394 
395 	rcu_assign_pointer(tbl->nht, new_nht);
396 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397 	return new_nht;
398 }
399 
400 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401 			       struct net_device *dev)
402 {
403 	struct neighbour *n;
404 	int key_len = tbl->key_len;
405 	u32 hash_val;
406 	struct neigh_hash_table *nht;
407 
408 	NEIGH_CACHE_STAT_INC(tbl, lookups);
409 
410 	rcu_read_lock_bh();
411 	nht = rcu_dereference_bh(tbl->nht);
412 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
413 
414 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415 	     n != NULL;
416 	     n = rcu_dereference_bh(n->next)) {
417 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418 			if (!atomic_inc_not_zero(&n->refcnt))
419 				n = NULL;
420 			NEIGH_CACHE_STAT_INC(tbl, hits);
421 			break;
422 		}
423 	}
424 
425 	rcu_read_unlock_bh();
426 	return n;
427 }
428 EXPORT_SYMBOL(neigh_lookup);
429 
430 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431 				     const void *pkey)
432 {
433 	struct neighbour *n;
434 	int key_len = tbl->key_len;
435 	u32 hash_val;
436 	struct neigh_hash_table *nht;
437 
438 	NEIGH_CACHE_STAT_INC(tbl, lookups);
439 
440 	rcu_read_lock_bh();
441 	nht = rcu_dereference_bh(tbl->nht);
442 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
443 
444 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445 	     n != NULL;
446 	     n = rcu_dereference_bh(n->next)) {
447 		if (!memcmp(n->primary_key, pkey, key_len) &&
448 		    net_eq(dev_net(n->dev), net)) {
449 			if (!atomic_inc_not_zero(&n->refcnt))
450 				n = NULL;
451 			NEIGH_CACHE_STAT_INC(tbl, hits);
452 			break;
453 		}
454 	}
455 
456 	rcu_read_unlock_bh();
457 	return n;
458 }
459 EXPORT_SYMBOL(neigh_lookup_nodev);
460 
461 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462 				 struct net_device *dev, bool want_ref)
463 {
464 	u32 hash_val;
465 	int key_len = tbl->key_len;
466 	int error;
467 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468 	struct neigh_hash_table *nht;
469 
470 	if (!n) {
471 		rc = ERR_PTR(-ENOBUFS);
472 		goto out;
473 	}
474 
475 	memcpy(n->primary_key, pkey, key_len);
476 	n->dev = dev;
477 	dev_hold(dev);
478 
479 	/* Protocol specific setup. */
480 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
481 		rc = ERR_PTR(error);
482 		goto out_neigh_release;
483 	}
484 
485 	if (dev->netdev_ops->ndo_neigh_construct) {
486 		error = dev->netdev_ops->ndo_neigh_construct(n);
487 		if (error < 0) {
488 			rc = ERR_PTR(error);
489 			goto out_neigh_release;
490 		}
491 	}
492 
493 	/* Device specific setup. */
494 	if (n->parms->neigh_setup &&
495 	    (error = n->parms->neigh_setup(n)) < 0) {
496 		rc = ERR_PTR(error);
497 		goto out_neigh_release;
498 	}
499 
500 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
501 
502 	write_lock_bh(&tbl->lock);
503 	nht = rcu_dereference_protected(tbl->nht,
504 					lockdep_is_held(&tbl->lock));
505 
506 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
508 
509 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
510 
511 	if (n->parms->dead) {
512 		rc = ERR_PTR(-EINVAL);
513 		goto out_tbl_unlock;
514 	}
515 
516 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517 					    lockdep_is_held(&tbl->lock));
518 	     n1 != NULL;
519 	     n1 = rcu_dereference_protected(n1->next,
520 			lockdep_is_held(&tbl->lock))) {
521 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522 			if (want_ref)
523 				neigh_hold(n1);
524 			rc = n1;
525 			goto out_tbl_unlock;
526 		}
527 	}
528 
529 	n->dead = 0;
530 	if (want_ref)
531 		neigh_hold(n);
532 	rcu_assign_pointer(n->next,
533 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
534 						     lockdep_is_held(&tbl->lock)));
535 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536 	write_unlock_bh(&tbl->lock);
537 	neigh_dbg(2, "neigh %p is created\n", n);
538 	rc = n;
539 out:
540 	return rc;
541 out_tbl_unlock:
542 	write_unlock_bh(&tbl->lock);
543 out_neigh_release:
544 	neigh_release(n);
545 	goto out;
546 }
547 EXPORT_SYMBOL(__neigh_create);
548 
549 static u32 pneigh_hash(const void *pkey, int key_len)
550 {
551 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
552 	hash_val ^= (hash_val >> 16);
553 	hash_val ^= hash_val >> 8;
554 	hash_val ^= hash_val >> 4;
555 	hash_val &= PNEIGH_HASHMASK;
556 	return hash_val;
557 }
558 
559 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560 					      struct net *net,
561 					      const void *pkey,
562 					      int key_len,
563 					      struct net_device *dev)
564 {
565 	while (n) {
566 		if (!memcmp(n->key, pkey, key_len) &&
567 		    net_eq(pneigh_net(n), net) &&
568 		    (n->dev == dev || !n->dev))
569 			return n;
570 		n = n->next;
571 	}
572 	return NULL;
573 }
574 
575 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576 		struct net *net, const void *pkey, struct net_device *dev)
577 {
578 	int key_len = tbl->key_len;
579 	u32 hash_val = pneigh_hash(pkey, key_len);
580 
581 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582 				 net, pkey, key_len, dev);
583 }
584 EXPORT_SYMBOL_GPL(__pneigh_lookup);
585 
586 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587 				    struct net *net, const void *pkey,
588 				    struct net_device *dev, int creat)
589 {
590 	struct pneigh_entry *n;
591 	int key_len = tbl->key_len;
592 	u32 hash_val = pneigh_hash(pkey, key_len);
593 
594 	read_lock_bh(&tbl->lock);
595 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596 			      net, pkey, key_len, dev);
597 	read_unlock_bh(&tbl->lock);
598 
599 	if (n || !creat)
600 		goto out;
601 
602 	ASSERT_RTNL();
603 
604 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605 	if (!n)
606 		goto out;
607 
608 	write_pnet(&n->net, hold_net(net));
609 	memcpy(n->key, pkey, key_len);
610 	n->dev = dev;
611 	if (dev)
612 		dev_hold(dev);
613 
614 	if (tbl->pconstructor && tbl->pconstructor(n)) {
615 		if (dev)
616 			dev_put(dev);
617 		release_net(net);
618 		kfree(n);
619 		n = NULL;
620 		goto out;
621 	}
622 
623 	write_lock_bh(&tbl->lock);
624 	n->next = tbl->phash_buckets[hash_val];
625 	tbl->phash_buckets[hash_val] = n;
626 	write_unlock_bh(&tbl->lock);
627 out:
628 	return n;
629 }
630 EXPORT_SYMBOL(pneigh_lookup);
631 
632 
633 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634 		  struct net_device *dev)
635 {
636 	struct pneigh_entry *n, **np;
637 	int key_len = tbl->key_len;
638 	u32 hash_val = pneigh_hash(pkey, key_len);
639 
640 	write_lock_bh(&tbl->lock);
641 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642 	     np = &n->next) {
643 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644 		    net_eq(pneigh_net(n), net)) {
645 			*np = n->next;
646 			write_unlock_bh(&tbl->lock);
647 			if (tbl->pdestructor)
648 				tbl->pdestructor(n);
649 			if (n->dev)
650 				dev_put(n->dev);
651 			release_net(pneigh_net(n));
652 			kfree(n);
653 			return 0;
654 		}
655 	}
656 	write_unlock_bh(&tbl->lock);
657 	return -ENOENT;
658 }
659 
660 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
661 {
662 	struct pneigh_entry *n, **np;
663 	u32 h;
664 
665 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666 		np = &tbl->phash_buckets[h];
667 		while ((n = *np) != NULL) {
668 			if (!dev || n->dev == dev) {
669 				*np = n->next;
670 				if (tbl->pdestructor)
671 					tbl->pdestructor(n);
672 				if (n->dev)
673 					dev_put(n->dev);
674 				release_net(pneigh_net(n));
675 				kfree(n);
676 				continue;
677 			}
678 			np = &n->next;
679 		}
680 	}
681 	return -ENOENT;
682 }
683 
684 static void neigh_parms_destroy(struct neigh_parms *parms);
685 
686 static inline void neigh_parms_put(struct neigh_parms *parms)
687 {
688 	if (atomic_dec_and_test(&parms->refcnt))
689 		neigh_parms_destroy(parms);
690 }
691 
692 /*
693  *	neighbour must already be out of the table;
694  *
695  */
696 void neigh_destroy(struct neighbour *neigh)
697 {
698 	struct net_device *dev = neigh->dev;
699 
700 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
701 
702 	if (!neigh->dead) {
703 		pr_warn("Destroying alive neighbour %p\n", neigh);
704 		dump_stack();
705 		return;
706 	}
707 
708 	if (neigh_del_timer(neigh))
709 		pr_warn("Impossible event\n");
710 
711 	write_lock_bh(&neigh->lock);
712 	__skb_queue_purge(&neigh->arp_queue);
713 	write_unlock_bh(&neigh->lock);
714 	neigh->arp_queue_len_bytes = 0;
715 
716 	if (dev->netdev_ops->ndo_neigh_destroy)
717 		dev->netdev_ops->ndo_neigh_destroy(neigh);
718 
719 	dev_put(dev);
720 	neigh_parms_put(neigh->parms);
721 
722 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
723 
724 	atomic_dec(&neigh->tbl->entries);
725 	kfree_rcu(neigh, rcu);
726 }
727 EXPORT_SYMBOL(neigh_destroy);
728 
729 /* Neighbour state is suspicious;
730    disable fast path.
731 
732    Called with write_locked neigh.
733  */
734 static void neigh_suspect(struct neighbour *neigh)
735 {
736 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
737 
738 	neigh->output = neigh->ops->output;
739 }
740 
741 /* Neighbour state is OK;
742    enable fast path.
743 
744    Called with write_locked neigh.
745  */
746 static void neigh_connect(struct neighbour *neigh)
747 {
748 	neigh_dbg(2, "neigh %p is connected\n", neigh);
749 
750 	neigh->output = neigh->ops->connected_output;
751 }
752 
753 static void neigh_periodic_work(struct work_struct *work)
754 {
755 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
756 	struct neighbour *n;
757 	struct neighbour __rcu **np;
758 	unsigned int i;
759 	struct neigh_hash_table *nht;
760 
761 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
762 
763 	write_lock_bh(&tbl->lock);
764 	nht = rcu_dereference_protected(tbl->nht,
765 					lockdep_is_held(&tbl->lock));
766 
767 	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
768 		goto out;
769 
770 	/*
771 	 *	periodically recompute ReachableTime from random function
772 	 */
773 
774 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
775 		struct neigh_parms *p;
776 		tbl->last_rand = jiffies;
777 		for (p = &tbl->parms; p; p = p->next)
778 			p->reachable_time =
779 				neigh_rand_reach_time(p->base_reachable_time);
780 	}
781 
782 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
783 		np = &nht->hash_buckets[i];
784 
785 		while ((n = rcu_dereference_protected(*np,
786 				lockdep_is_held(&tbl->lock))) != NULL) {
787 			unsigned int state;
788 
789 			write_lock(&n->lock);
790 
791 			state = n->nud_state;
792 			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
793 				write_unlock(&n->lock);
794 				goto next_elt;
795 			}
796 
797 			if (time_before(n->used, n->confirmed))
798 				n->used = n->confirmed;
799 
800 			if (atomic_read(&n->refcnt) == 1 &&
801 			    (state == NUD_FAILED ||
802 			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
803 				*np = n->next;
804 				n->dead = 1;
805 				write_unlock(&n->lock);
806 				neigh_cleanup_and_release(n);
807 				continue;
808 			}
809 			write_unlock(&n->lock);
810 
811 next_elt:
812 			np = &n->next;
813 		}
814 		/*
815 		 * It's fine to release lock here, even if hash table
816 		 * grows while we are preempted.
817 		 */
818 		write_unlock_bh(&tbl->lock);
819 		cond_resched();
820 		write_lock_bh(&tbl->lock);
821 		nht = rcu_dereference_protected(tbl->nht,
822 						lockdep_is_held(&tbl->lock));
823 	}
824 out:
825 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
826 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
827 	 * base_reachable_time.
828 	 */
829 	schedule_delayed_work(&tbl->gc_work,
830 			      tbl->parms.base_reachable_time >> 1);
831 	write_unlock_bh(&tbl->lock);
832 }
833 
834 static __inline__ int neigh_max_probes(struct neighbour *n)
835 {
836 	struct neigh_parms *p = n->parms;
837 	return (n->nud_state & NUD_PROBE) ?
838 		p->ucast_probes :
839 		p->ucast_probes + p->app_probes + p->mcast_probes;
840 }
841 
842 static void neigh_invalidate(struct neighbour *neigh)
843 	__releases(neigh->lock)
844 	__acquires(neigh->lock)
845 {
846 	struct sk_buff *skb;
847 
848 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
849 	neigh_dbg(2, "neigh %p is failed\n", neigh);
850 	neigh->updated = jiffies;
851 
852 	/* It is very thin place. report_unreachable is very complicated
853 	   routine. Particularly, it can hit the same neighbour entry!
854 
855 	   So that, we try to be accurate and avoid dead loop. --ANK
856 	 */
857 	while (neigh->nud_state == NUD_FAILED &&
858 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
859 		write_unlock(&neigh->lock);
860 		neigh->ops->error_report(neigh, skb);
861 		write_lock(&neigh->lock);
862 	}
863 	__skb_queue_purge(&neigh->arp_queue);
864 	neigh->arp_queue_len_bytes = 0;
865 }
866 
867 static void neigh_probe(struct neighbour *neigh)
868 	__releases(neigh->lock)
869 {
870 	struct sk_buff *skb = skb_peek(&neigh->arp_queue);
871 	/* keep skb alive even if arp_queue overflows */
872 	if (skb)
873 		skb = skb_copy(skb, GFP_ATOMIC);
874 	write_unlock(&neigh->lock);
875 	neigh->ops->solicit(neigh, skb);
876 	atomic_inc(&neigh->probes);
877 	kfree_skb(skb);
878 }
879 
880 /* Called when a timer expires for a neighbour entry. */
881 
882 static void neigh_timer_handler(unsigned long arg)
883 {
884 	unsigned long now, next;
885 	struct neighbour *neigh = (struct neighbour *)arg;
886 	unsigned int state;
887 	int notify = 0;
888 
889 	write_lock(&neigh->lock);
890 
891 	state = neigh->nud_state;
892 	now = jiffies;
893 	next = now + HZ;
894 
895 	if (!(state & NUD_IN_TIMER))
896 		goto out;
897 
898 	if (state & NUD_REACHABLE) {
899 		if (time_before_eq(now,
900 				   neigh->confirmed + neigh->parms->reachable_time)) {
901 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
902 			next = neigh->confirmed + neigh->parms->reachable_time;
903 		} else if (time_before_eq(now,
904 					  neigh->used + neigh->parms->delay_probe_time)) {
905 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
906 			neigh->nud_state = NUD_DELAY;
907 			neigh->updated = jiffies;
908 			neigh_suspect(neigh);
909 			next = now + neigh->parms->delay_probe_time;
910 		} else {
911 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
912 			neigh->nud_state = NUD_STALE;
913 			neigh->updated = jiffies;
914 			neigh_suspect(neigh);
915 			notify = 1;
916 		}
917 	} else if (state & NUD_DELAY) {
918 		if (time_before_eq(now,
919 				   neigh->confirmed + neigh->parms->delay_probe_time)) {
920 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
921 			neigh->nud_state = NUD_REACHABLE;
922 			neigh->updated = jiffies;
923 			neigh_connect(neigh);
924 			notify = 1;
925 			next = neigh->confirmed + neigh->parms->reachable_time;
926 		} else {
927 			neigh_dbg(2, "neigh %p is probed\n", neigh);
928 			neigh->nud_state = NUD_PROBE;
929 			neigh->updated = jiffies;
930 			atomic_set(&neigh->probes, 0);
931 			next = now + neigh->parms->retrans_time;
932 		}
933 	} else {
934 		/* NUD_PROBE|NUD_INCOMPLETE */
935 		next = now + neigh->parms->retrans_time;
936 	}
937 
938 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
939 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
940 		neigh->nud_state = NUD_FAILED;
941 		notify = 1;
942 		neigh_invalidate(neigh);
943 	}
944 
945 	if (neigh->nud_state & NUD_IN_TIMER) {
946 		if (time_before(next, jiffies + HZ/2))
947 			next = jiffies + HZ/2;
948 		if (!mod_timer(&neigh->timer, next))
949 			neigh_hold(neigh);
950 	}
951 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
952 		neigh_probe(neigh);
953 	} else {
954 out:
955 		write_unlock(&neigh->lock);
956 	}
957 
958 	if (notify)
959 		neigh_update_notify(neigh);
960 
961 	neigh_release(neigh);
962 }
963 
964 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
965 {
966 	int rc;
967 	bool immediate_probe = false;
968 
969 	write_lock_bh(&neigh->lock);
970 
971 	rc = 0;
972 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
973 		goto out_unlock_bh;
974 
975 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
976 		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
977 			unsigned long next, now = jiffies;
978 
979 			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
980 			neigh->nud_state     = NUD_INCOMPLETE;
981 			neigh->updated = now;
982 			next = now + max(neigh->parms->retrans_time, HZ/2);
983 			neigh_add_timer(neigh, next);
984 			immediate_probe = true;
985 		} else {
986 			neigh->nud_state = NUD_FAILED;
987 			neigh->updated = jiffies;
988 			write_unlock_bh(&neigh->lock);
989 
990 			kfree_skb(skb);
991 			return 1;
992 		}
993 	} else if (neigh->nud_state & NUD_STALE) {
994 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
995 		neigh->nud_state = NUD_DELAY;
996 		neigh->updated = jiffies;
997 		neigh_add_timer(neigh,
998 				jiffies + neigh->parms->delay_probe_time);
999 	}
1000 
1001 	if (neigh->nud_state == NUD_INCOMPLETE) {
1002 		if (skb) {
1003 			while (neigh->arp_queue_len_bytes + skb->truesize >
1004 			       neigh->parms->queue_len_bytes) {
1005 				struct sk_buff *buff;
1006 
1007 				buff = __skb_dequeue(&neigh->arp_queue);
1008 				if (!buff)
1009 					break;
1010 				neigh->arp_queue_len_bytes -= buff->truesize;
1011 				kfree_skb(buff);
1012 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1013 			}
1014 			skb_dst_force(skb);
1015 			__skb_queue_tail(&neigh->arp_queue, skb);
1016 			neigh->arp_queue_len_bytes += skb->truesize;
1017 		}
1018 		rc = 1;
1019 	}
1020 out_unlock_bh:
1021 	if (immediate_probe)
1022 		neigh_probe(neigh);
1023 	else
1024 		write_unlock(&neigh->lock);
1025 	local_bh_enable();
1026 	return rc;
1027 }
1028 EXPORT_SYMBOL(__neigh_event_send);
1029 
1030 static void neigh_update_hhs(struct neighbour *neigh)
1031 {
1032 	struct hh_cache *hh;
1033 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1034 		= NULL;
1035 
1036 	if (neigh->dev->header_ops)
1037 		update = neigh->dev->header_ops->cache_update;
1038 
1039 	if (update) {
1040 		hh = &neigh->hh;
1041 		if (hh->hh_len) {
1042 			write_seqlock_bh(&hh->hh_lock);
1043 			update(hh, neigh->dev, neigh->ha);
1044 			write_sequnlock_bh(&hh->hh_lock);
1045 		}
1046 	}
1047 }
1048 
1049 
1050 
1051 /* Generic update routine.
1052    -- lladdr is new lladdr or NULL, if it is not supplied.
1053    -- new    is new state.
1054    -- flags
1055 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1056 				if it is different.
1057 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1058 				lladdr instead of overriding it
1059 				if it is different.
1060 				It also allows to retain current state
1061 				if lladdr is unchanged.
1062 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1063 
1064 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1065 				NTF_ROUTER flag.
1066 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1067 				a router.
1068 
1069    Caller MUST hold reference count on the entry.
1070  */
1071 
1072 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1073 		 u32 flags)
1074 {
1075 	u8 old;
1076 	int err;
1077 	int notify = 0;
1078 	struct net_device *dev;
1079 	int update_isrouter = 0;
1080 
1081 	write_lock_bh(&neigh->lock);
1082 
1083 	dev    = neigh->dev;
1084 	old    = neigh->nud_state;
1085 	err    = -EPERM;
1086 
1087 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1088 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1089 		goto out;
1090 
1091 	if (!(new & NUD_VALID)) {
1092 		neigh_del_timer(neigh);
1093 		if (old & NUD_CONNECTED)
1094 			neigh_suspect(neigh);
1095 		neigh->nud_state = new;
1096 		err = 0;
1097 		notify = old & NUD_VALID;
1098 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1099 		    (new & NUD_FAILED)) {
1100 			neigh_invalidate(neigh);
1101 			notify = 1;
1102 		}
1103 		goto out;
1104 	}
1105 
1106 	/* Compare new lladdr with cached one */
1107 	if (!dev->addr_len) {
1108 		/* First case: device needs no address. */
1109 		lladdr = neigh->ha;
1110 	} else if (lladdr) {
1111 		/* The second case: if something is already cached
1112 		   and a new address is proposed:
1113 		   - compare new & old
1114 		   - if they are different, check override flag
1115 		 */
1116 		if ((old & NUD_VALID) &&
1117 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1118 			lladdr = neigh->ha;
1119 	} else {
1120 		/* No address is supplied; if we know something,
1121 		   use it, otherwise discard the request.
1122 		 */
1123 		err = -EINVAL;
1124 		if (!(old & NUD_VALID))
1125 			goto out;
1126 		lladdr = neigh->ha;
1127 	}
1128 
1129 	if (new & NUD_CONNECTED)
1130 		neigh->confirmed = jiffies;
1131 	neigh->updated = jiffies;
1132 
1133 	/* If entry was valid and address is not changed,
1134 	   do not change entry state, if new one is STALE.
1135 	 */
1136 	err = 0;
1137 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1138 	if (old & NUD_VALID) {
1139 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1140 			update_isrouter = 0;
1141 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1142 			    (old & NUD_CONNECTED)) {
1143 				lladdr = neigh->ha;
1144 				new = NUD_STALE;
1145 			} else
1146 				goto out;
1147 		} else {
1148 			if (lladdr == neigh->ha && new == NUD_STALE &&
1149 			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1150 			     (old & NUD_CONNECTED))
1151 			    )
1152 				new = old;
1153 		}
1154 	}
1155 
1156 	if (new != old) {
1157 		neigh_del_timer(neigh);
1158 		if (new & NUD_IN_TIMER)
1159 			neigh_add_timer(neigh, (jiffies +
1160 						((new & NUD_REACHABLE) ?
1161 						 neigh->parms->reachable_time :
1162 						 0)));
1163 		neigh->nud_state = new;
1164 	}
1165 
1166 	if (lladdr != neigh->ha) {
1167 		write_seqlock(&neigh->ha_lock);
1168 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1169 		write_sequnlock(&neigh->ha_lock);
1170 		neigh_update_hhs(neigh);
1171 		if (!(new & NUD_CONNECTED))
1172 			neigh->confirmed = jiffies -
1173 				      (neigh->parms->base_reachable_time << 1);
1174 		notify = 1;
1175 	}
1176 	if (new == old)
1177 		goto out;
1178 	if (new & NUD_CONNECTED)
1179 		neigh_connect(neigh);
1180 	else
1181 		neigh_suspect(neigh);
1182 	if (!(old & NUD_VALID)) {
1183 		struct sk_buff *skb;
1184 
1185 		/* Again: avoid dead loop if something went wrong */
1186 
1187 		while (neigh->nud_state & NUD_VALID &&
1188 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1189 			struct dst_entry *dst = skb_dst(skb);
1190 			struct neighbour *n2, *n1 = neigh;
1191 			write_unlock_bh(&neigh->lock);
1192 
1193 			rcu_read_lock();
1194 
1195 			/* Why not just use 'neigh' as-is?  The problem is that
1196 			 * things such as shaper, eql, and sch_teql can end up
1197 			 * using alternative, different, neigh objects to output
1198 			 * the packet in the output path.  So what we need to do
1199 			 * here is re-lookup the top-level neigh in the path so
1200 			 * we can reinject the packet there.
1201 			 */
1202 			n2 = NULL;
1203 			if (dst) {
1204 				n2 = dst_neigh_lookup_skb(dst, skb);
1205 				if (n2)
1206 					n1 = n2;
1207 			}
1208 			n1->output(n1, skb);
1209 			if (n2)
1210 				neigh_release(n2);
1211 			rcu_read_unlock();
1212 
1213 			write_lock_bh(&neigh->lock);
1214 		}
1215 		__skb_queue_purge(&neigh->arp_queue);
1216 		neigh->arp_queue_len_bytes = 0;
1217 	}
1218 out:
1219 	if (update_isrouter) {
1220 		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1221 			(neigh->flags | NTF_ROUTER) :
1222 			(neigh->flags & ~NTF_ROUTER);
1223 	}
1224 	write_unlock_bh(&neigh->lock);
1225 
1226 	if (notify)
1227 		neigh_update_notify(neigh);
1228 
1229 	return err;
1230 }
1231 EXPORT_SYMBOL(neigh_update);
1232 
1233 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1234 				 u8 *lladdr, void *saddr,
1235 				 struct net_device *dev)
1236 {
1237 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1238 						 lladdr || !dev->addr_len);
1239 	if (neigh)
1240 		neigh_update(neigh, lladdr, NUD_STALE,
1241 			     NEIGH_UPDATE_F_OVERRIDE);
1242 	return neigh;
1243 }
1244 EXPORT_SYMBOL(neigh_event_ns);
1245 
1246 /* called with read_lock_bh(&n->lock); */
1247 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1248 {
1249 	struct net_device *dev = dst->dev;
1250 	__be16 prot = dst->ops->protocol;
1251 	struct hh_cache	*hh = &n->hh;
1252 
1253 	write_lock_bh(&n->lock);
1254 
1255 	/* Only one thread can come in here and initialize the
1256 	 * hh_cache entry.
1257 	 */
1258 	if (!hh->hh_len)
1259 		dev->header_ops->cache(n, hh, prot);
1260 
1261 	write_unlock_bh(&n->lock);
1262 }
1263 
1264 /* This function can be used in contexts, where only old dev_queue_xmit
1265  * worked, f.e. if you want to override normal output path (eql, shaper),
1266  * but resolution is not made yet.
1267  */
1268 
1269 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1270 {
1271 	struct net_device *dev = skb->dev;
1272 
1273 	__skb_pull(skb, skb_network_offset(skb));
1274 
1275 	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1276 			    skb->len) < 0 &&
1277 	    dev->header_ops->rebuild(skb))
1278 		return 0;
1279 
1280 	return dev_queue_xmit(skb);
1281 }
1282 EXPORT_SYMBOL(neigh_compat_output);
1283 
1284 /* Slow and careful. */
1285 
1286 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1287 {
1288 	struct dst_entry *dst = skb_dst(skb);
1289 	int rc = 0;
1290 
1291 	if (!dst)
1292 		goto discard;
1293 
1294 	if (!neigh_event_send(neigh, skb)) {
1295 		int err;
1296 		struct net_device *dev = neigh->dev;
1297 		unsigned int seq;
1298 
1299 		if (dev->header_ops->cache && !neigh->hh.hh_len)
1300 			neigh_hh_init(neigh, dst);
1301 
1302 		do {
1303 			__skb_pull(skb, skb_network_offset(skb));
1304 			seq = read_seqbegin(&neigh->ha_lock);
1305 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1306 					      neigh->ha, NULL, skb->len);
1307 		} while (read_seqretry(&neigh->ha_lock, seq));
1308 
1309 		if (err >= 0)
1310 			rc = dev_queue_xmit(skb);
1311 		else
1312 			goto out_kfree_skb;
1313 	}
1314 out:
1315 	return rc;
1316 discard:
1317 	neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1318 out_kfree_skb:
1319 	rc = -EINVAL;
1320 	kfree_skb(skb);
1321 	goto out;
1322 }
1323 EXPORT_SYMBOL(neigh_resolve_output);
1324 
1325 /* As fast as possible without hh cache */
1326 
1327 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1328 {
1329 	struct net_device *dev = neigh->dev;
1330 	unsigned int seq;
1331 	int err;
1332 
1333 	do {
1334 		__skb_pull(skb, skb_network_offset(skb));
1335 		seq = read_seqbegin(&neigh->ha_lock);
1336 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1337 				      neigh->ha, NULL, skb->len);
1338 	} while (read_seqretry(&neigh->ha_lock, seq));
1339 
1340 	if (err >= 0)
1341 		err = dev_queue_xmit(skb);
1342 	else {
1343 		err = -EINVAL;
1344 		kfree_skb(skb);
1345 	}
1346 	return err;
1347 }
1348 EXPORT_SYMBOL(neigh_connected_output);
1349 
1350 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1351 {
1352 	return dev_queue_xmit(skb);
1353 }
1354 EXPORT_SYMBOL(neigh_direct_output);
1355 
1356 static void neigh_proxy_process(unsigned long arg)
1357 {
1358 	struct neigh_table *tbl = (struct neigh_table *)arg;
1359 	long sched_next = 0;
1360 	unsigned long now = jiffies;
1361 	struct sk_buff *skb, *n;
1362 
1363 	spin_lock(&tbl->proxy_queue.lock);
1364 
1365 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1366 		long tdif = NEIGH_CB(skb)->sched_next - now;
1367 
1368 		if (tdif <= 0) {
1369 			struct net_device *dev = skb->dev;
1370 
1371 			__skb_unlink(skb, &tbl->proxy_queue);
1372 			if (tbl->proxy_redo && netif_running(dev)) {
1373 				rcu_read_lock();
1374 				tbl->proxy_redo(skb);
1375 				rcu_read_unlock();
1376 			} else {
1377 				kfree_skb(skb);
1378 			}
1379 
1380 			dev_put(dev);
1381 		} else if (!sched_next || tdif < sched_next)
1382 			sched_next = tdif;
1383 	}
1384 	del_timer(&tbl->proxy_timer);
1385 	if (sched_next)
1386 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1387 	spin_unlock(&tbl->proxy_queue.lock);
1388 }
1389 
1390 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1391 		    struct sk_buff *skb)
1392 {
1393 	unsigned long now = jiffies;
1394 	unsigned long sched_next = now + (net_random() % p->proxy_delay);
1395 
1396 	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1397 		kfree_skb(skb);
1398 		return;
1399 	}
1400 
1401 	NEIGH_CB(skb)->sched_next = sched_next;
1402 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1403 
1404 	spin_lock(&tbl->proxy_queue.lock);
1405 	if (del_timer(&tbl->proxy_timer)) {
1406 		if (time_before(tbl->proxy_timer.expires, sched_next))
1407 			sched_next = tbl->proxy_timer.expires;
1408 	}
1409 	skb_dst_drop(skb);
1410 	dev_hold(skb->dev);
1411 	__skb_queue_tail(&tbl->proxy_queue, skb);
1412 	mod_timer(&tbl->proxy_timer, sched_next);
1413 	spin_unlock(&tbl->proxy_queue.lock);
1414 }
1415 EXPORT_SYMBOL(pneigh_enqueue);
1416 
1417 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1418 						      struct net *net, int ifindex)
1419 {
1420 	struct neigh_parms *p;
1421 
1422 	for (p = &tbl->parms; p; p = p->next) {
1423 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1424 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1425 			return p;
1426 	}
1427 
1428 	return NULL;
1429 }
1430 
1431 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1432 				      struct neigh_table *tbl)
1433 {
1434 	struct neigh_parms *p;
1435 	struct net *net = dev_net(dev);
1436 	const struct net_device_ops *ops = dev->netdev_ops;
1437 
1438 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1439 	if (p) {
1440 		p->tbl		  = tbl;
1441 		atomic_set(&p->refcnt, 1);
1442 		p->reachable_time =
1443 				neigh_rand_reach_time(p->base_reachable_time);
1444 
1445 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1446 			kfree(p);
1447 			return NULL;
1448 		}
1449 
1450 		dev_hold(dev);
1451 		p->dev = dev;
1452 		write_pnet(&p->net, hold_net(net));
1453 		p->sysctl_table = NULL;
1454 		write_lock_bh(&tbl->lock);
1455 		p->next		= tbl->parms.next;
1456 		tbl->parms.next = p;
1457 		write_unlock_bh(&tbl->lock);
1458 	}
1459 	return p;
1460 }
1461 EXPORT_SYMBOL(neigh_parms_alloc);
1462 
1463 static void neigh_rcu_free_parms(struct rcu_head *head)
1464 {
1465 	struct neigh_parms *parms =
1466 		container_of(head, struct neigh_parms, rcu_head);
1467 
1468 	neigh_parms_put(parms);
1469 }
1470 
1471 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1472 {
1473 	struct neigh_parms **p;
1474 
1475 	if (!parms || parms == &tbl->parms)
1476 		return;
1477 	write_lock_bh(&tbl->lock);
1478 	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1479 		if (*p == parms) {
1480 			*p = parms->next;
1481 			parms->dead = 1;
1482 			write_unlock_bh(&tbl->lock);
1483 			if (parms->dev)
1484 				dev_put(parms->dev);
1485 			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1486 			return;
1487 		}
1488 	}
1489 	write_unlock_bh(&tbl->lock);
1490 	neigh_dbg(1, "%s: not found\n", __func__);
1491 }
1492 EXPORT_SYMBOL(neigh_parms_release);
1493 
1494 static void neigh_parms_destroy(struct neigh_parms *parms)
1495 {
1496 	release_net(neigh_parms_net(parms));
1497 	kfree(parms);
1498 }
1499 
1500 static struct lock_class_key neigh_table_proxy_queue_class;
1501 
1502 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1503 {
1504 	unsigned long now = jiffies;
1505 	unsigned long phsize;
1506 
1507 	write_pnet(&tbl->parms.net, &init_net);
1508 	atomic_set(&tbl->parms.refcnt, 1);
1509 	tbl->parms.reachable_time =
1510 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
1511 
1512 	tbl->stats = alloc_percpu(struct neigh_statistics);
1513 	if (!tbl->stats)
1514 		panic("cannot create neighbour cache statistics");
1515 
1516 #ifdef CONFIG_PROC_FS
1517 	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1518 			      &neigh_stat_seq_fops, tbl))
1519 		panic("cannot create neighbour proc dir entry");
1520 #endif
1521 
1522 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1523 
1524 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1525 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1526 
1527 	if (!tbl->nht || !tbl->phash_buckets)
1528 		panic("cannot allocate neighbour cache hashes");
1529 
1530 	if (!tbl->entry_size)
1531 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1532 					tbl->key_len, NEIGH_PRIV_ALIGN);
1533 	else
1534 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1535 
1536 	rwlock_init(&tbl->lock);
1537 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1538 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1539 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1540 	skb_queue_head_init_class(&tbl->proxy_queue,
1541 			&neigh_table_proxy_queue_class);
1542 
1543 	tbl->last_flush = now;
1544 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1545 }
1546 
1547 void neigh_table_init(struct neigh_table *tbl)
1548 {
1549 	struct neigh_table *tmp;
1550 
1551 	neigh_table_init_no_netlink(tbl);
1552 	write_lock(&neigh_tbl_lock);
1553 	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1554 		if (tmp->family == tbl->family)
1555 			break;
1556 	}
1557 	tbl->next	= neigh_tables;
1558 	neigh_tables	= tbl;
1559 	write_unlock(&neigh_tbl_lock);
1560 
1561 	if (unlikely(tmp)) {
1562 		pr_err("Registering multiple tables for family %d\n",
1563 		       tbl->family);
1564 		dump_stack();
1565 	}
1566 }
1567 EXPORT_SYMBOL(neigh_table_init);
1568 
1569 int neigh_table_clear(struct neigh_table *tbl)
1570 {
1571 	struct neigh_table **tp;
1572 
1573 	/* It is not clean... Fix it to unload IPv6 module safely */
1574 	cancel_delayed_work_sync(&tbl->gc_work);
1575 	del_timer_sync(&tbl->proxy_timer);
1576 	pneigh_queue_purge(&tbl->proxy_queue);
1577 	neigh_ifdown(tbl, NULL);
1578 	if (atomic_read(&tbl->entries))
1579 		pr_crit("neighbour leakage\n");
1580 	write_lock(&neigh_tbl_lock);
1581 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1582 		if (*tp == tbl) {
1583 			*tp = tbl->next;
1584 			break;
1585 		}
1586 	}
1587 	write_unlock(&neigh_tbl_lock);
1588 
1589 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1590 		 neigh_hash_free_rcu);
1591 	tbl->nht = NULL;
1592 
1593 	kfree(tbl->phash_buckets);
1594 	tbl->phash_buckets = NULL;
1595 
1596 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1597 
1598 	free_percpu(tbl->stats);
1599 	tbl->stats = NULL;
1600 
1601 	return 0;
1602 }
1603 EXPORT_SYMBOL(neigh_table_clear);
1604 
1605 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1606 {
1607 	struct net *net = sock_net(skb->sk);
1608 	struct ndmsg *ndm;
1609 	struct nlattr *dst_attr;
1610 	struct neigh_table *tbl;
1611 	struct net_device *dev = NULL;
1612 	int err = -EINVAL;
1613 
1614 	ASSERT_RTNL();
1615 	if (nlmsg_len(nlh) < sizeof(*ndm))
1616 		goto out;
1617 
1618 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1619 	if (dst_attr == NULL)
1620 		goto out;
1621 
1622 	ndm = nlmsg_data(nlh);
1623 	if (ndm->ndm_ifindex) {
1624 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1625 		if (dev == NULL) {
1626 			err = -ENODEV;
1627 			goto out;
1628 		}
1629 	}
1630 
1631 	read_lock(&neigh_tbl_lock);
1632 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1633 		struct neighbour *neigh;
1634 
1635 		if (tbl->family != ndm->ndm_family)
1636 			continue;
1637 		read_unlock(&neigh_tbl_lock);
1638 
1639 		if (nla_len(dst_attr) < tbl->key_len)
1640 			goto out;
1641 
1642 		if (ndm->ndm_flags & NTF_PROXY) {
1643 			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1644 			goto out;
1645 		}
1646 
1647 		if (dev == NULL)
1648 			goto out;
1649 
1650 		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1651 		if (neigh == NULL) {
1652 			err = -ENOENT;
1653 			goto out;
1654 		}
1655 
1656 		err = neigh_update(neigh, NULL, NUD_FAILED,
1657 				   NEIGH_UPDATE_F_OVERRIDE |
1658 				   NEIGH_UPDATE_F_ADMIN);
1659 		neigh_release(neigh);
1660 		goto out;
1661 	}
1662 	read_unlock(&neigh_tbl_lock);
1663 	err = -EAFNOSUPPORT;
1664 
1665 out:
1666 	return err;
1667 }
1668 
1669 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1670 {
1671 	struct net *net = sock_net(skb->sk);
1672 	struct ndmsg *ndm;
1673 	struct nlattr *tb[NDA_MAX+1];
1674 	struct neigh_table *tbl;
1675 	struct net_device *dev = NULL;
1676 	int err;
1677 
1678 	ASSERT_RTNL();
1679 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1680 	if (err < 0)
1681 		goto out;
1682 
1683 	err = -EINVAL;
1684 	if (tb[NDA_DST] == NULL)
1685 		goto out;
1686 
1687 	ndm = nlmsg_data(nlh);
1688 	if (ndm->ndm_ifindex) {
1689 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1690 		if (dev == NULL) {
1691 			err = -ENODEV;
1692 			goto out;
1693 		}
1694 
1695 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1696 			goto out;
1697 	}
1698 
1699 	read_lock(&neigh_tbl_lock);
1700 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1701 		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1702 		struct neighbour *neigh;
1703 		void *dst, *lladdr;
1704 
1705 		if (tbl->family != ndm->ndm_family)
1706 			continue;
1707 		read_unlock(&neigh_tbl_lock);
1708 
1709 		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1710 			goto out;
1711 		dst = nla_data(tb[NDA_DST]);
1712 		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1713 
1714 		if (ndm->ndm_flags & NTF_PROXY) {
1715 			struct pneigh_entry *pn;
1716 
1717 			err = -ENOBUFS;
1718 			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1719 			if (pn) {
1720 				pn->flags = ndm->ndm_flags;
1721 				err = 0;
1722 			}
1723 			goto out;
1724 		}
1725 
1726 		if (dev == NULL)
1727 			goto out;
1728 
1729 		neigh = neigh_lookup(tbl, dst, dev);
1730 		if (neigh == NULL) {
1731 			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1732 				err = -ENOENT;
1733 				goto out;
1734 			}
1735 
1736 			neigh = __neigh_lookup_errno(tbl, dst, dev);
1737 			if (IS_ERR(neigh)) {
1738 				err = PTR_ERR(neigh);
1739 				goto out;
1740 			}
1741 		} else {
1742 			if (nlh->nlmsg_flags & NLM_F_EXCL) {
1743 				err = -EEXIST;
1744 				neigh_release(neigh);
1745 				goto out;
1746 			}
1747 
1748 			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1749 				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1750 		}
1751 
1752 		if (ndm->ndm_flags & NTF_USE) {
1753 			neigh_event_send(neigh, NULL);
1754 			err = 0;
1755 		} else
1756 			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1757 		neigh_release(neigh);
1758 		goto out;
1759 	}
1760 
1761 	read_unlock(&neigh_tbl_lock);
1762 	err = -EAFNOSUPPORT;
1763 out:
1764 	return err;
1765 }
1766 
1767 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1768 {
1769 	struct nlattr *nest;
1770 
1771 	nest = nla_nest_start(skb, NDTA_PARMS);
1772 	if (nest == NULL)
1773 		return -ENOBUFS;
1774 
1775 	if ((parms->dev &&
1776 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1777 	    nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1778 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1779 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
1780 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
1781 			parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1782 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1783 	    nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1784 	    nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1785 	    nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1786 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1787 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1788 			  parms->base_reachable_time) ||
1789 	    nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1790 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1791 			  parms->delay_probe_time) ||
1792 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1793 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1794 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1795 	    nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1796 		goto nla_put_failure;
1797 	return nla_nest_end(skb, nest);
1798 
1799 nla_put_failure:
1800 	nla_nest_cancel(skb, nest);
1801 	return -EMSGSIZE;
1802 }
1803 
1804 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1805 			      u32 pid, u32 seq, int type, int flags)
1806 {
1807 	struct nlmsghdr *nlh;
1808 	struct ndtmsg *ndtmsg;
1809 
1810 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1811 	if (nlh == NULL)
1812 		return -EMSGSIZE;
1813 
1814 	ndtmsg = nlmsg_data(nlh);
1815 
1816 	read_lock_bh(&tbl->lock);
1817 	ndtmsg->ndtm_family = tbl->family;
1818 	ndtmsg->ndtm_pad1   = 0;
1819 	ndtmsg->ndtm_pad2   = 0;
1820 
1821 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1822 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1823 	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1824 	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1825 	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1826 		goto nla_put_failure;
1827 	{
1828 		unsigned long now = jiffies;
1829 		unsigned int flush_delta = now - tbl->last_flush;
1830 		unsigned int rand_delta = now - tbl->last_rand;
1831 		struct neigh_hash_table *nht;
1832 		struct ndt_config ndc = {
1833 			.ndtc_key_len		= tbl->key_len,
1834 			.ndtc_entry_size	= tbl->entry_size,
1835 			.ndtc_entries		= atomic_read(&tbl->entries),
1836 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1837 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1838 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1839 		};
1840 
1841 		rcu_read_lock_bh();
1842 		nht = rcu_dereference_bh(tbl->nht);
1843 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1844 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1845 		rcu_read_unlock_bh();
1846 
1847 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1848 			goto nla_put_failure;
1849 	}
1850 
1851 	{
1852 		int cpu;
1853 		struct ndt_stats ndst;
1854 
1855 		memset(&ndst, 0, sizeof(ndst));
1856 
1857 		for_each_possible_cpu(cpu) {
1858 			struct neigh_statistics	*st;
1859 
1860 			st = per_cpu_ptr(tbl->stats, cpu);
1861 			ndst.ndts_allocs		+= st->allocs;
1862 			ndst.ndts_destroys		+= st->destroys;
1863 			ndst.ndts_hash_grows		+= st->hash_grows;
1864 			ndst.ndts_res_failed		+= st->res_failed;
1865 			ndst.ndts_lookups		+= st->lookups;
1866 			ndst.ndts_hits			+= st->hits;
1867 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1868 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1869 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1870 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1871 		}
1872 
1873 		if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1874 			goto nla_put_failure;
1875 	}
1876 
1877 	BUG_ON(tbl->parms.dev);
1878 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1879 		goto nla_put_failure;
1880 
1881 	read_unlock_bh(&tbl->lock);
1882 	return nlmsg_end(skb, nlh);
1883 
1884 nla_put_failure:
1885 	read_unlock_bh(&tbl->lock);
1886 	nlmsg_cancel(skb, nlh);
1887 	return -EMSGSIZE;
1888 }
1889 
1890 static int neightbl_fill_param_info(struct sk_buff *skb,
1891 				    struct neigh_table *tbl,
1892 				    struct neigh_parms *parms,
1893 				    u32 pid, u32 seq, int type,
1894 				    unsigned int flags)
1895 {
1896 	struct ndtmsg *ndtmsg;
1897 	struct nlmsghdr *nlh;
1898 
1899 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1900 	if (nlh == NULL)
1901 		return -EMSGSIZE;
1902 
1903 	ndtmsg = nlmsg_data(nlh);
1904 
1905 	read_lock_bh(&tbl->lock);
1906 	ndtmsg->ndtm_family = tbl->family;
1907 	ndtmsg->ndtm_pad1   = 0;
1908 	ndtmsg->ndtm_pad2   = 0;
1909 
1910 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1911 	    neightbl_fill_parms(skb, parms) < 0)
1912 		goto errout;
1913 
1914 	read_unlock_bh(&tbl->lock);
1915 	return nlmsg_end(skb, nlh);
1916 errout:
1917 	read_unlock_bh(&tbl->lock);
1918 	nlmsg_cancel(skb, nlh);
1919 	return -EMSGSIZE;
1920 }
1921 
1922 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1923 	[NDTA_NAME]		= { .type = NLA_STRING },
1924 	[NDTA_THRESH1]		= { .type = NLA_U32 },
1925 	[NDTA_THRESH2]		= { .type = NLA_U32 },
1926 	[NDTA_THRESH3]		= { .type = NLA_U32 },
1927 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1928 	[NDTA_PARMS]		= { .type = NLA_NESTED },
1929 };
1930 
1931 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1932 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1933 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1934 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1935 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1936 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1937 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1938 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1939 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1940 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1941 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1942 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1943 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1944 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1945 };
1946 
1947 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1948 {
1949 	struct net *net = sock_net(skb->sk);
1950 	struct neigh_table *tbl;
1951 	struct ndtmsg *ndtmsg;
1952 	struct nlattr *tb[NDTA_MAX+1];
1953 	int err;
1954 
1955 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1956 			  nl_neightbl_policy);
1957 	if (err < 0)
1958 		goto errout;
1959 
1960 	if (tb[NDTA_NAME] == NULL) {
1961 		err = -EINVAL;
1962 		goto errout;
1963 	}
1964 
1965 	ndtmsg = nlmsg_data(nlh);
1966 	read_lock(&neigh_tbl_lock);
1967 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1968 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1969 			continue;
1970 
1971 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1972 			break;
1973 	}
1974 
1975 	if (tbl == NULL) {
1976 		err = -ENOENT;
1977 		goto errout_locked;
1978 	}
1979 
1980 	/*
1981 	 * We acquire tbl->lock to be nice to the periodic timers and
1982 	 * make sure they always see a consistent set of values.
1983 	 */
1984 	write_lock_bh(&tbl->lock);
1985 
1986 	if (tb[NDTA_PARMS]) {
1987 		struct nlattr *tbp[NDTPA_MAX+1];
1988 		struct neigh_parms *p;
1989 		int i, ifindex = 0;
1990 
1991 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1992 				       nl_ntbl_parm_policy);
1993 		if (err < 0)
1994 			goto errout_tbl_lock;
1995 
1996 		if (tbp[NDTPA_IFINDEX])
1997 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1998 
1999 		p = lookup_neigh_parms(tbl, net, ifindex);
2000 		if (p == NULL) {
2001 			err = -ENOENT;
2002 			goto errout_tbl_lock;
2003 		}
2004 
2005 		for (i = 1; i <= NDTPA_MAX; i++) {
2006 			if (tbp[i] == NULL)
2007 				continue;
2008 
2009 			switch (i) {
2010 			case NDTPA_QUEUE_LEN:
2011 				p->queue_len_bytes = nla_get_u32(tbp[i]) *
2012 						     SKB_TRUESIZE(ETH_FRAME_LEN);
2013 				break;
2014 			case NDTPA_QUEUE_LENBYTES:
2015 				p->queue_len_bytes = nla_get_u32(tbp[i]);
2016 				break;
2017 			case NDTPA_PROXY_QLEN:
2018 				p->proxy_qlen = nla_get_u32(tbp[i]);
2019 				break;
2020 			case NDTPA_APP_PROBES:
2021 				p->app_probes = nla_get_u32(tbp[i]);
2022 				break;
2023 			case NDTPA_UCAST_PROBES:
2024 				p->ucast_probes = nla_get_u32(tbp[i]);
2025 				break;
2026 			case NDTPA_MCAST_PROBES:
2027 				p->mcast_probes = nla_get_u32(tbp[i]);
2028 				break;
2029 			case NDTPA_BASE_REACHABLE_TIME:
2030 				p->base_reachable_time = nla_get_msecs(tbp[i]);
2031 				break;
2032 			case NDTPA_GC_STALETIME:
2033 				p->gc_staletime = nla_get_msecs(tbp[i]);
2034 				break;
2035 			case NDTPA_DELAY_PROBE_TIME:
2036 				p->delay_probe_time = nla_get_msecs(tbp[i]);
2037 				break;
2038 			case NDTPA_RETRANS_TIME:
2039 				p->retrans_time = nla_get_msecs(tbp[i]);
2040 				break;
2041 			case NDTPA_ANYCAST_DELAY:
2042 				p->anycast_delay = nla_get_msecs(tbp[i]);
2043 				break;
2044 			case NDTPA_PROXY_DELAY:
2045 				p->proxy_delay = nla_get_msecs(tbp[i]);
2046 				break;
2047 			case NDTPA_LOCKTIME:
2048 				p->locktime = nla_get_msecs(tbp[i]);
2049 				break;
2050 			}
2051 		}
2052 	}
2053 
2054 	err = -ENOENT;
2055 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2056 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2057 	    !net_eq(net, &init_net))
2058 		goto errout_tbl_lock;
2059 
2060 	if (tb[NDTA_THRESH1])
2061 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2062 
2063 	if (tb[NDTA_THRESH2])
2064 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2065 
2066 	if (tb[NDTA_THRESH3])
2067 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2068 
2069 	if (tb[NDTA_GC_INTERVAL])
2070 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2071 
2072 	err = 0;
2073 
2074 errout_tbl_lock:
2075 	write_unlock_bh(&tbl->lock);
2076 errout_locked:
2077 	read_unlock(&neigh_tbl_lock);
2078 errout:
2079 	return err;
2080 }
2081 
2082 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2083 {
2084 	struct net *net = sock_net(skb->sk);
2085 	int family, tidx, nidx = 0;
2086 	int tbl_skip = cb->args[0];
2087 	int neigh_skip = cb->args[1];
2088 	struct neigh_table *tbl;
2089 
2090 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2091 
2092 	read_lock(&neigh_tbl_lock);
2093 	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2094 		struct neigh_parms *p;
2095 
2096 		if (tidx < tbl_skip || (family && tbl->family != family))
2097 			continue;
2098 
2099 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2100 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2101 				       NLM_F_MULTI) <= 0)
2102 			break;
2103 
2104 		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2105 			if (!net_eq(neigh_parms_net(p), net))
2106 				continue;
2107 
2108 			if (nidx < neigh_skip)
2109 				goto next;
2110 
2111 			if (neightbl_fill_param_info(skb, tbl, p,
2112 						     NETLINK_CB(cb->skb).portid,
2113 						     cb->nlh->nlmsg_seq,
2114 						     RTM_NEWNEIGHTBL,
2115 						     NLM_F_MULTI) <= 0)
2116 				goto out;
2117 		next:
2118 			nidx++;
2119 		}
2120 
2121 		neigh_skip = 0;
2122 	}
2123 out:
2124 	read_unlock(&neigh_tbl_lock);
2125 	cb->args[0] = tidx;
2126 	cb->args[1] = nidx;
2127 
2128 	return skb->len;
2129 }
2130 
2131 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2132 			   u32 pid, u32 seq, int type, unsigned int flags)
2133 {
2134 	unsigned long now = jiffies;
2135 	struct nda_cacheinfo ci;
2136 	struct nlmsghdr *nlh;
2137 	struct ndmsg *ndm;
2138 
2139 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2140 	if (nlh == NULL)
2141 		return -EMSGSIZE;
2142 
2143 	ndm = nlmsg_data(nlh);
2144 	ndm->ndm_family	 = neigh->ops->family;
2145 	ndm->ndm_pad1    = 0;
2146 	ndm->ndm_pad2    = 0;
2147 	ndm->ndm_flags	 = neigh->flags;
2148 	ndm->ndm_type	 = neigh->type;
2149 	ndm->ndm_ifindex = neigh->dev->ifindex;
2150 
2151 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2152 		goto nla_put_failure;
2153 
2154 	read_lock_bh(&neigh->lock);
2155 	ndm->ndm_state	 = neigh->nud_state;
2156 	if (neigh->nud_state & NUD_VALID) {
2157 		char haddr[MAX_ADDR_LEN];
2158 
2159 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2160 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2161 			read_unlock_bh(&neigh->lock);
2162 			goto nla_put_failure;
2163 		}
2164 	}
2165 
2166 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2167 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2168 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2169 	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
2170 	read_unlock_bh(&neigh->lock);
2171 
2172 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2173 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2174 		goto nla_put_failure;
2175 
2176 	return nlmsg_end(skb, nlh);
2177 
2178 nla_put_failure:
2179 	nlmsg_cancel(skb, nlh);
2180 	return -EMSGSIZE;
2181 }
2182 
2183 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2184 			    u32 pid, u32 seq, int type, unsigned int flags,
2185 			    struct neigh_table *tbl)
2186 {
2187 	struct nlmsghdr *nlh;
2188 	struct ndmsg *ndm;
2189 
2190 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2191 	if (nlh == NULL)
2192 		return -EMSGSIZE;
2193 
2194 	ndm = nlmsg_data(nlh);
2195 	ndm->ndm_family	 = tbl->family;
2196 	ndm->ndm_pad1    = 0;
2197 	ndm->ndm_pad2    = 0;
2198 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2199 	ndm->ndm_type	 = NDA_DST;
2200 	ndm->ndm_ifindex = pn->dev->ifindex;
2201 	ndm->ndm_state	 = NUD_NONE;
2202 
2203 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2204 		goto nla_put_failure;
2205 
2206 	return nlmsg_end(skb, nlh);
2207 
2208 nla_put_failure:
2209 	nlmsg_cancel(skb, nlh);
2210 	return -EMSGSIZE;
2211 }
2212 
2213 static void neigh_update_notify(struct neighbour *neigh)
2214 {
2215 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2216 	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
2217 }
2218 
2219 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2220 			    struct netlink_callback *cb)
2221 {
2222 	struct net *net = sock_net(skb->sk);
2223 	struct neighbour *n;
2224 	int rc, h, s_h = cb->args[1];
2225 	int idx, s_idx = idx = cb->args[2];
2226 	struct neigh_hash_table *nht;
2227 
2228 	rcu_read_lock_bh();
2229 	nht = rcu_dereference_bh(tbl->nht);
2230 
2231 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2232 		if (h > s_h)
2233 			s_idx = 0;
2234 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2235 		     n != NULL;
2236 		     n = rcu_dereference_bh(n->next)) {
2237 			if (!net_eq(dev_net(n->dev), net))
2238 				continue;
2239 			if (idx < s_idx)
2240 				goto next;
2241 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2242 					    cb->nlh->nlmsg_seq,
2243 					    RTM_NEWNEIGH,
2244 					    NLM_F_MULTI) <= 0) {
2245 				rc = -1;
2246 				goto out;
2247 			}
2248 next:
2249 			idx++;
2250 		}
2251 	}
2252 	rc = skb->len;
2253 out:
2254 	rcu_read_unlock_bh();
2255 	cb->args[1] = h;
2256 	cb->args[2] = idx;
2257 	return rc;
2258 }
2259 
2260 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2261 			     struct netlink_callback *cb)
2262 {
2263 	struct pneigh_entry *n;
2264 	struct net *net = sock_net(skb->sk);
2265 	int rc, h, s_h = cb->args[3];
2266 	int idx, s_idx = idx = cb->args[4];
2267 
2268 	read_lock_bh(&tbl->lock);
2269 
2270 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2271 		if (h > s_h)
2272 			s_idx = 0;
2273 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2274 			if (dev_net(n->dev) != net)
2275 				continue;
2276 			if (idx < s_idx)
2277 				goto next;
2278 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2279 					    cb->nlh->nlmsg_seq,
2280 					    RTM_NEWNEIGH,
2281 					    NLM_F_MULTI, tbl) <= 0) {
2282 				read_unlock_bh(&tbl->lock);
2283 				rc = -1;
2284 				goto out;
2285 			}
2286 		next:
2287 			idx++;
2288 		}
2289 	}
2290 
2291 	read_unlock_bh(&tbl->lock);
2292 	rc = skb->len;
2293 out:
2294 	cb->args[3] = h;
2295 	cb->args[4] = idx;
2296 	return rc;
2297 
2298 }
2299 
2300 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2301 {
2302 	struct neigh_table *tbl;
2303 	int t, family, s_t;
2304 	int proxy = 0;
2305 	int err;
2306 
2307 	read_lock(&neigh_tbl_lock);
2308 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2309 
2310 	/* check for full ndmsg structure presence, family member is
2311 	 * the same for both structures
2312 	 */
2313 	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2314 	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2315 		proxy = 1;
2316 
2317 	s_t = cb->args[0];
2318 
2319 	for (tbl = neigh_tables, t = 0; tbl;
2320 	     tbl = tbl->next, t++) {
2321 		if (t < s_t || (family && tbl->family != family))
2322 			continue;
2323 		if (t > s_t)
2324 			memset(&cb->args[1], 0, sizeof(cb->args) -
2325 						sizeof(cb->args[0]));
2326 		if (proxy)
2327 			err = pneigh_dump_table(tbl, skb, cb);
2328 		else
2329 			err = neigh_dump_table(tbl, skb, cb);
2330 		if (err < 0)
2331 			break;
2332 	}
2333 	read_unlock(&neigh_tbl_lock);
2334 
2335 	cb->args[0] = t;
2336 	return skb->len;
2337 }
2338 
2339 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2340 {
2341 	int chain;
2342 	struct neigh_hash_table *nht;
2343 
2344 	rcu_read_lock_bh();
2345 	nht = rcu_dereference_bh(tbl->nht);
2346 
2347 	read_lock(&tbl->lock); /* avoid resizes */
2348 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2349 		struct neighbour *n;
2350 
2351 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2352 		     n != NULL;
2353 		     n = rcu_dereference_bh(n->next))
2354 			cb(n, cookie);
2355 	}
2356 	read_unlock(&tbl->lock);
2357 	rcu_read_unlock_bh();
2358 }
2359 EXPORT_SYMBOL(neigh_for_each);
2360 
2361 /* The tbl->lock must be held as a writer and BH disabled. */
2362 void __neigh_for_each_release(struct neigh_table *tbl,
2363 			      int (*cb)(struct neighbour *))
2364 {
2365 	int chain;
2366 	struct neigh_hash_table *nht;
2367 
2368 	nht = rcu_dereference_protected(tbl->nht,
2369 					lockdep_is_held(&tbl->lock));
2370 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2371 		struct neighbour *n;
2372 		struct neighbour __rcu **np;
2373 
2374 		np = &nht->hash_buckets[chain];
2375 		while ((n = rcu_dereference_protected(*np,
2376 					lockdep_is_held(&tbl->lock))) != NULL) {
2377 			int release;
2378 
2379 			write_lock(&n->lock);
2380 			release = cb(n);
2381 			if (release) {
2382 				rcu_assign_pointer(*np,
2383 					rcu_dereference_protected(n->next,
2384 						lockdep_is_held(&tbl->lock)));
2385 				n->dead = 1;
2386 			} else
2387 				np = &n->next;
2388 			write_unlock(&n->lock);
2389 			if (release)
2390 				neigh_cleanup_and_release(n);
2391 		}
2392 	}
2393 }
2394 EXPORT_SYMBOL(__neigh_for_each_release);
2395 
2396 #ifdef CONFIG_PROC_FS
2397 
2398 static struct neighbour *neigh_get_first(struct seq_file *seq)
2399 {
2400 	struct neigh_seq_state *state = seq->private;
2401 	struct net *net = seq_file_net(seq);
2402 	struct neigh_hash_table *nht = state->nht;
2403 	struct neighbour *n = NULL;
2404 	int bucket = state->bucket;
2405 
2406 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2407 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2408 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2409 
2410 		while (n) {
2411 			if (!net_eq(dev_net(n->dev), net))
2412 				goto next;
2413 			if (state->neigh_sub_iter) {
2414 				loff_t fakep = 0;
2415 				void *v;
2416 
2417 				v = state->neigh_sub_iter(state, n, &fakep);
2418 				if (!v)
2419 					goto next;
2420 			}
2421 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2422 				break;
2423 			if (n->nud_state & ~NUD_NOARP)
2424 				break;
2425 next:
2426 			n = rcu_dereference_bh(n->next);
2427 		}
2428 
2429 		if (n)
2430 			break;
2431 	}
2432 	state->bucket = bucket;
2433 
2434 	return n;
2435 }
2436 
2437 static struct neighbour *neigh_get_next(struct seq_file *seq,
2438 					struct neighbour *n,
2439 					loff_t *pos)
2440 {
2441 	struct neigh_seq_state *state = seq->private;
2442 	struct net *net = seq_file_net(seq);
2443 	struct neigh_hash_table *nht = state->nht;
2444 
2445 	if (state->neigh_sub_iter) {
2446 		void *v = state->neigh_sub_iter(state, n, pos);
2447 		if (v)
2448 			return n;
2449 	}
2450 	n = rcu_dereference_bh(n->next);
2451 
2452 	while (1) {
2453 		while (n) {
2454 			if (!net_eq(dev_net(n->dev), net))
2455 				goto next;
2456 			if (state->neigh_sub_iter) {
2457 				void *v = state->neigh_sub_iter(state, n, pos);
2458 				if (v)
2459 					return n;
2460 				goto next;
2461 			}
2462 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2463 				break;
2464 
2465 			if (n->nud_state & ~NUD_NOARP)
2466 				break;
2467 next:
2468 			n = rcu_dereference_bh(n->next);
2469 		}
2470 
2471 		if (n)
2472 			break;
2473 
2474 		if (++state->bucket >= (1 << nht->hash_shift))
2475 			break;
2476 
2477 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2478 	}
2479 
2480 	if (n && pos)
2481 		--(*pos);
2482 	return n;
2483 }
2484 
2485 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2486 {
2487 	struct neighbour *n = neigh_get_first(seq);
2488 
2489 	if (n) {
2490 		--(*pos);
2491 		while (*pos) {
2492 			n = neigh_get_next(seq, n, pos);
2493 			if (!n)
2494 				break;
2495 		}
2496 	}
2497 	return *pos ? NULL : n;
2498 }
2499 
2500 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2501 {
2502 	struct neigh_seq_state *state = seq->private;
2503 	struct net *net = seq_file_net(seq);
2504 	struct neigh_table *tbl = state->tbl;
2505 	struct pneigh_entry *pn = NULL;
2506 	int bucket = state->bucket;
2507 
2508 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2509 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2510 		pn = tbl->phash_buckets[bucket];
2511 		while (pn && !net_eq(pneigh_net(pn), net))
2512 			pn = pn->next;
2513 		if (pn)
2514 			break;
2515 	}
2516 	state->bucket = bucket;
2517 
2518 	return pn;
2519 }
2520 
2521 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2522 					    struct pneigh_entry *pn,
2523 					    loff_t *pos)
2524 {
2525 	struct neigh_seq_state *state = seq->private;
2526 	struct net *net = seq_file_net(seq);
2527 	struct neigh_table *tbl = state->tbl;
2528 
2529 	do {
2530 		pn = pn->next;
2531 	} while (pn && !net_eq(pneigh_net(pn), net));
2532 
2533 	while (!pn) {
2534 		if (++state->bucket > PNEIGH_HASHMASK)
2535 			break;
2536 		pn = tbl->phash_buckets[state->bucket];
2537 		while (pn && !net_eq(pneigh_net(pn), net))
2538 			pn = pn->next;
2539 		if (pn)
2540 			break;
2541 	}
2542 
2543 	if (pn && pos)
2544 		--(*pos);
2545 
2546 	return pn;
2547 }
2548 
2549 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2550 {
2551 	struct pneigh_entry *pn = pneigh_get_first(seq);
2552 
2553 	if (pn) {
2554 		--(*pos);
2555 		while (*pos) {
2556 			pn = pneigh_get_next(seq, pn, pos);
2557 			if (!pn)
2558 				break;
2559 		}
2560 	}
2561 	return *pos ? NULL : pn;
2562 }
2563 
2564 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2565 {
2566 	struct neigh_seq_state *state = seq->private;
2567 	void *rc;
2568 	loff_t idxpos = *pos;
2569 
2570 	rc = neigh_get_idx(seq, &idxpos);
2571 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2572 		rc = pneigh_get_idx(seq, &idxpos);
2573 
2574 	return rc;
2575 }
2576 
2577 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2578 	__acquires(rcu_bh)
2579 {
2580 	struct neigh_seq_state *state = seq->private;
2581 
2582 	state->tbl = tbl;
2583 	state->bucket = 0;
2584 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2585 
2586 	rcu_read_lock_bh();
2587 	state->nht = rcu_dereference_bh(tbl->nht);
2588 
2589 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2590 }
2591 EXPORT_SYMBOL(neigh_seq_start);
2592 
2593 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2594 {
2595 	struct neigh_seq_state *state;
2596 	void *rc;
2597 
2598 	if (v == SEQ_START_TOKEN) {
2599 		rc = neigh_get_first(seq);
2600 		goto out;
2601 	}
2602 
2603 	state = seq->private;
2604 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2605 		rc = neigh_get_next(seq, v, NULL);
2606 		if (rc)
2607 			goto out;
2608 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2609 			rc = pneigh_get_first(seq);
2610 	} else {
2611 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2612 		rc = pneigh_get_next(seq, v, NULL);
2613 	}
2614 out:
2615 	++(*pos);
2616 	return rc;
2617 }
2618 EXPORT_SYMBOL(neigh_seq_next);
2619 
2620 void neigh_seq_stop(struct seq_file *seq, void *v)
2621 	__releases(rcu_bh)
2622 {
2623 	rcu_read_unlock_bh();
2624 }
2625 EXPORT_SYMBOL(neigh_seq_stop);
2626 
2627 /* statistics via seq_file */
2628 
2629 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2630 {
2631 	struct neigh_table *tbl = seq->private;
2632 	int cpu;
2633 
2634 	if (*pos == 0)
2635 		return SEQ_START_TOKEN;
2636 
2637 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2638 		if (!cpu_possible(cpu))
2639 			continue;
2640 		*pos = cpu+1;
2641 		return per_cpu_ptr(tbl->stats, cpu);
2642 	}
2643 	return NULL;
2644 }
2645 
2646 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2647 {
2648 	struct neigh_table *tbl = seq->private;
2649 	int cpu;
2650 
2651 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2652 		if (!cpu_possible(cpu))
2653 			continue;
2654 		*pos = cpu+1;
2655 		return per_cpu_ptr(tbl->stats, cpu);
2656 	}
2657 	return NULL;
2658 }
2659 
2660 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2661 {
2662 
2663 }
2664 
2665 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2666 {
2667 	struct neigh_table *tbl = seq->private;
2668 	struct neigh_statistics *st = v;
2669 
2670 	if (v == SEQ_START_TOKEN) {
2671 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2672 		return 0;
2673 	}
2674 
2675 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2676 			"%08lx %08lx  %08lx %08lx %08lx\n",
2677 		   atomic_read(&tbl->entries),
2678 
2679 		   st->allocs,
2680 		   st->destroys,
2681 		   st->hash_grows,
2682 
2683 		   st->lookups,
2684 		   st->hits,
2685 
2686 		   st->res_failed,
2687 
2688 		   st->rcv_probes_mcast,
2689 		   st->rcv_probes_ucast,
2690 
2691 		   st->periodic_gc_runs,
2692 		   st->forced_gc_runs,
2693 		   st->unres_discards
2694 		   );
2695 
2696 	return 0;
2697 }
2698 
2699 static const struct seq_operations neigh_stat_seq_ops = {
2700 	.start	= neigh_stat_seq_start,
2701 	.next	= neigh_stat_seq_next,
2702 	.stop	= neigh_stat_seq_stop,
2703 	.show	= neigh_stat_seq_show,
2704 };
2705 
2706 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2707 {
2708 	int ret = seq_open(file, &neigh_stat_seq_ops);
2709 
2710 	if (!ret) {
2711 		struct seq_file *sf = file->private_data;
2712 		sf->private = PDE_DATA(inode);
2713 	}
2714 	return ret;
2715 };
2716 
2717 static const struct file_operations neigh_stat_seq_fops = {
2718 	.owner	 = THIS_MODULE,
2719 	.open 	 = neigh_stat_seq_open,
2720 	.read	 = seq_read,
2721 	.llseek	 = seq_lseek,
2722 	.release = seq_release,
2723 };
2724 
2725 #endif /* CONFIG_PROC_FS */
2726 
2727 static inline size_t neigh_nlmsg_size(void)
2728 {
2729 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2730 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2731 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2732 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2733 	       + nla_total_size(4); /* NDA_PROBES */
2734 }
2735 
2736 static void __neigh_notify(struct neighbour *n, int type, int flags)
2737 {
2738 	struct net *net = dev_net(n->dev);
2739 	struct sk_buff *skb;
2740 	int err = -ENOBUFS;
2741 
2742 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2743 	if (skb == NULL)
2744 		goto errout;
2745 
2746 	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2747 	if (err < 0) {
2748 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2749 		WARN_ON(err == -EMSGSIZE);
2750 		kfree_skb(skb);
2751 		goto errout;
2752 	}
2753 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2754 	return;
2755 errout:
2756 	if (err < 0)
2757 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2758 }
2759 
2760 #ifdef CONFIG_ARPD
2761 void neigh_app_ns(struct neighbour *n)
2762 {
2763 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2764 }
2765 EXPORT_SYMBOL(neigh_app_ns);
2766 #endif /* CONFIG_ARPD */
2767 
2768 #ifdef CONFIG_SYSCTL
2769 static int zero;
2770 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2771 
2772 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2773 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2774 {
2775 	int size, ret;
2776 	struct ctl_table tmp = *ctl;
2777 
2778 	tmp.extra1 = &zero;
2779 	tmp.extra2 = &unres_qlen_max;
2780 	tmp.data = &size;
2781 
2782 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2783 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2784 
2785 	if (write && !ret)
2786 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2787 	return ret;
2788 }
2789 
2790 enum {
2791 	NEIGH_VAR_MCAST_PROBE,
2792 	NEIGH_VAR_UCAST_PROBE,
2793 	NEIGH_VAR_APP_PROBE,
2794 	NEIGH_VAR_RETRANS_TIME,
2795 	NEIGH_VAR_BASE_REACHABLE_TIME,
2796 	NEIGH_VAR_DELAY_PROBE_TIME,
2797 	NEIGH_VAR_GC_STALETIME,
2798 	NEIGH_VAR_QUEUE_LEN,
2799 	NEIGH_VAR_QUEUE_LEN_BYTES,
2800 	NEIGH_VAR_PROXY_QLEN,
2801 	NEIGH_VAR_ANYCAST_DELAY,
2802 	NEIGH_VAR_PROXY_DELAY,
2803 	NEIGH_VAR_LOCKTIME,
2804 	NEIGH_VAR_RETRANS_TIME_MS,
2805 	NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2806 	NEIGH_VAR_GC_INTERVAL,
2807 	NEIGH_VAR_GC_THRESH1,
2808 	NEIGH_VAR_GC_THRESH2,
2809 	NEIGH_VAR_GC_THRESH3,
2810 	NEIGH_VAR_MAX
2811 };
2812 
2813 static struct neigh_sysctl_table {
2814 	struct ctl_table_header *sysctl_header;
2815 	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2816 } neigh_sysctl_template __read_mostly = {
2817 	.neigh_vars = {
2818 		[NEIGH_VAR_MCAST_PROBE] = {
2819 			.procname	= "mcast_solicit",
2820 			.maxlen		= sizeof(int),
2821 			.mode		= 0644,
2822 			.proc_handler	= proc_dointvec,
2823 		},
2824 		[NEIGH_VAR_UCAST_PROBE] = {
2825 			.procname	= "ucast_solicit",
2826 			.maxlen		= sizeof(int),
2827 			.mode		= 0644,
2828 			.proc_handler	= proc_dointvec,
2829 		},
2830 		[NEIGH_VAR_APP_PROBE] = {
2831 			.procname	= "app_solicit",
2832 			.maxlen		= sizeof(int),
2833 			.mode		= 0644,
2834 			.proc_handler	= proc_dointvec,
2835 		},
2836 		[NEIGH_VAR_RETRANS_TIME] = {
2837 			.procname	= "retrans_time",
2838 			.maxlen		= sizeof(int),
2839 			.mode		= 0644,
2840 			.proc_handler	= proc_dointvec_userhz_jiffies,
2841 		},
2842 		[NEIGH_VAR_BASE_REACHABLE_TIME] = {
2843 			.procname	= "base_reachable_time",
2844 			.maxlen		= sizeof(int),
2845 			.mode		= 0644,
2846 			.proc_handler	= proc_dointvec_jiffies,
2847 		},
2848 		[NEIGH_VAR_DELAY_PROBE_TIME] = {
2849 			.procname	= "delay_first_probe_time",
2850 			.maxlen		= sizeof(int),
2851 			.mode		= 0644,
2852 			.proc_handler	= proc_dointvec_jiffies,
2853 		},
2854 		[NEIGH_VAR_GC_STALETIME] = {
2855 			.procname	= "gc_stale_time",
2856 			.maxlen		= sizeof(int),
2857 			.mode		= 0644,
2858 			.proc_handler	= proc_dointvec_jiffies,
2859 		},
2860 		[NEIGH_VAR_QUEUE_LEN] = {
2861 			.procname	= "unres_qlen",
2862 			.maxlen		= sizeof(int),
2863 			.mode		= 0644,
2864 			.proc_handler	= proc_unres_qlen,
2865 		},
2866 		[NEIGH_VAR_QUEUE_LEN_BYTES] = {
2867 			.procname	= "unres_qlen_bytes",
2868 			.maxlen		= sizeof(int),
2869 			.mode		= 0644,
2870 			.extra1		= &zero,
2871 			.proc_handler   = proc_dointvec_minmax,
2872 		},
2873 		[NEIGH_VAR_PROXY_QLEN] = {
2874 			.procname	= "proxy_qlen",
2875 			.maxlen		= sizeof(int),
2876 			.mode		= 0644,
2877 			.proc_handler	= proc_dointvec,
2878 		},
2879 		[NEIGH_VAR_ANYCAST_DELAY] = {
2880 			.procname	= "anycast_delay",
2881 			.maxlen		= sizeof(int),
2882 			.mode		= 0644,
2883 			.proc_handler	= proc_dointvec_userhz_jiffies,
2884 		},
2885 		[NEIGH_VAR_PROXY_DELAY] = {
2886 			.procname	= "proxy_delay",
2887 			.maxlen		= sizeof(int),
2888 			.mode		= 0644,
2889 			.proc_handler	= proc_dointvec_userhz_jiffies,
2890 		},
2891 		[NEIGH_VAR_LOCKTIME] = {
2892 			.procname	= "locktime",
2893 			.maxlen		= sizeof(int),
2894 			.mode		= 0644,
2895 			.proc_handler	= proc_dointvec_userhz_jiffies,
2896 		},
2897 		[NEIGH_VAR_RETRANS_TIME_MS] = {
2898 			.procname	= "retrans_time_ms",
2899 			.maxlen		= sizeof(int),
2900 			.mode		= 0644,
2901 			.proc_handler	= proc_dointvec_ms_jiffies,
2902 		},
2903 		[NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2904 			.procname	= "base_reachable_time_ms",
2905 			.maxlen		= sizeof(int),
2906 			.mode		= 0644,
2907 			.proc_handler	= proc_dointvec_ms_jiffies,
2908 		},
2909 		[NEIGH_VAR_GC_INTERVAL] = {
2910 			.procname	= "gc_interval",
2911 			.maxlen		= sizeof(int),
2912 			.mode		= 0644,
2913 			.proc_handler	= proc_dointvec_jiffies,
2914 		},
2915 		[NEIGH_VAR_GC_THRESH1] = {
2916 			.procname	= "gc_thresh1",
2917 			.maxlen		= sizeof(int),
2918 			.mode		= 0644,
2919 			.proc_handler	= proc_dointvec,
2920 		},
2921 		[NEIGH_VAR_GC_THRESH2] = {
2922 			.procname	= "gc_thresh2",
2923 			.maxlen		= sizeof(int),
2924 			.mode		= 0644,
2925 			.proc_handler	= proc_dointvec,
2926 		},
2927 		[NEIGH_VAR_GC_THRESH3] = {
2928 			.procname	= "gc_thresh3",
2929 			.maxlen		= sizeof(int),
2930 			.mode		= 0644,
2931 			.proc_handler	= proc_dointvec,
2932 		},
2933 		{},
2934 	},
2935 };
2936 
2937 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2938 			  char *p_name, proc_handler *handler)
2939 {
2940 	struct neigh_sysctl_table *t;
2941 	const char *dev_name_source = NULL;
2942 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2943 
2944 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2945 	if (!t)
2946 		goto err;
2947 
2948 	t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2949 	t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2950 	t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2951 	t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2952 	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2953 	t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2954 	t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2955 	t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2956 	t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2957 	t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2958 	t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2959 	t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2960 	t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2961 	t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2962 	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2963 
2964 	if (dev) {
2965 		dev_name_source = dev->name;
2966 		/* Terminate the table early */
2967 		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2968 		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2969 	} else {
2970 		dev_name_source = "default";
2971 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2972 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2973 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2974 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2975 	}
2976 
2977 
2978 	if (handler) {
2979 		/* RetransTime */
2980 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2981 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2982 		/* ReachableTime */
2983 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2984 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2985 		/* RetransTime (in milliseconds)*/
2986 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2987 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2988 		/* ReachableTime (in milliseconds) */
2989 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2990 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2991 	}
2992 
2993 	/* Don't export sysctls to unprivileged users */
2994 	if (neigh_parms_net(p)->user_ns != &init_user_ns)
2995 		t->neigh_vars[0].procname = NULL;
2996 
2997 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2998 		p_name, dev_name_source);
2999 	t->sysctl_header =
3000 		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3001 	if (!t->sysctl_header)
3002 		goto free;
3003 
3004 	p->sysctl_table = t;
3005 	return 0;
3006 
3007 free:
3008 	kfree(t);
3009 err:
3010 	return -ENOBUFS;
3011 }
3012 EXPORT_SYMBOL(neigh_sysctl_register);
3013 
3014 void neigh_sysctl_unregister(struct neigh_parms *p)
3015 {
3016 	if (p->sysctl_table) {
3017 		struct neigh_sysctl_table *t = p->sysctl_table;
3018 		p->sysctl_table = NULL;
3019 		unregister_net_sysctl_table(t->sysctl_header);
3020 		kfree(t);
3021 	}
3022 }
3023 EXPORT_SYMBOL(neigh_sysctl_unregister);
3024 
3025 #endif	/* CONFIG_SYSCTL */
3026 
3027 static int __init neigh_init(void)
3028 {
3029 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3030 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3031 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3032 
3033 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3034 		      NULL);
3035 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3036 
3037 	return 0;
3038 }
3039 
3040 subsys_initcall(neigh_init);
3041 
3042