xref: /linux/net/core/neighbour.c (revision 26b0d14106954ae46d2f4f7eec3481828a210f7d)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 
42 #define NEIGH_DEBUG 1
43 
44 #define NEIGH_PRINTK(x...) printk(x)
45 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
46 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
47 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
48 
49 #if NEIGH_DEBUG >= 1
50 #undef NEIGH_PRINTK1
51 #define NEIGH_PRINTK1 NEIGH_PRINTK
52 #endif
53 #if NEIGH_DEBUG >= 2
54 #undef NEIGH_PRINTK2
55 #define NEIGH_PRINTK2 NEIGH_PRINTK
56 #endif
57 
58 #define PNEIGH_HASHMASK		0xF
59 
60 static void neigh_timer_handler(unsigned long arg);
61 static void __neigh_notify(struct neighbour *n, int type, int flags);
62 static void neigh_update_notify(struct neighbour *neigh);
63 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
64 
65 static struct neigh_table *neigh_tables;
66 #ifdef CONFIG_PROC_FS
67 static const struct file_operations neigh_stat_seq_fops;
68 #endif
69 
70 /*
71    Neighbour hash table buckets are protected with rwlock tbl->lock.
72 
73    - All the scans/updates to hash buckets MUST be made under this lock.
74    - NOTHING clever should be made under this lock: no callbacks
75      to protocol backends, no attempts to send something to network.
76      It will result in deadlocks, if backend/driver wants to use neighbour
77      cache.
78    - If the entry requires some non-trivial actions, increase
79      its reference count and release table lock.
80 
81    Neighbour entries are protected:
82    - with reference count.
83    - with rwlock neigh->lock
84 
85    Reference count prevents destruction.
86 
87    neigh->lock mainly serializes ll address data and its validity state.
88    However, the same lock is used to protect another entry fields:
89     - timer
90     - resolution queue
91 
92    Again, nothing clever shall be made under neigh->lock,
93    the most complicated procedure, which we allow is dev->hard_header.
94    It is supposed, that dev->hard_header is simplistic and does
95    not make callbacks to neighbour tables.
96 
97    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
98    list of neighbour tables. This list is used only in process context,
99  */
100 
101 static DEFINE_RWLOCK(neigh_tbl_lock);
102 
103 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
104 {
105 	kfree_skb(skb);
106 	return -ENETDOWN;
107 }
108 
109 static void neigh_cleanup_and_release(struct neighbour *neigh)
110 {
111 	if (neigh->parms->neigh_cleanup)
112 		neigh->parms->neigh_cleanup(neigh);
113 
114 	__neigh_notify(neigh, RTM_DELNEIGH, 0);
115 	neigh_release(neigh);
116 }
117 
118 /*
119  * It is random distribution in the interval (1/2)*base...(3/2)*base.
120  * It corresponds to default IPv6 settings and is not overridable,
121  * because it is really reasonable choice.
122  */
123 
124 unsigned long neigh_rand_reach_time(unsigned long base)
125 {
126 	return base ? (net_random() % base) + (base >> 1) : 0;
127 }
128 EXPORT_SYMBOL(neigh_rand_reach_time);
129 
130 
131 static int neigh_forced_gc(struct neigh_table *tbl)
132 {
133 	int shrunk = 0;
134 	int i;
135 	struct neigh_hash_table *nht;
136 
137 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
138 
139 	write_lock_bh(&tbl->lock);
140 	nht = rcu_dereference_protected(tbl->nht,
141 					lockdep_is_held(&tbl->lock));
142 	for (i = 0; i < (1 << nht->hash_shift); i++) {
143 		struct neighbour *n;
144 		struct neighbour __rcu **np;
145 
146 		np = &nht->hash_buckets[i];
147 		while ((n = rcu_dereference_protected(*np,
148 					lockdep_is_held(&tbl->lock))) != NULL) {
149 			/* Neighbour record may be discarded if:
150 			 * - nobody refers to it.
151 			 * - it is not permanent
152 			 */
153 			write_lock(&n->lock);
154 			if (atomic_read(&n->refcnt) == 1 &&
155 			    !(n->nud_state & NUD_PERMANENT)) {
156 				rcu_assign_pointer(*np,
157 					rcu_dereference_protected(n->next,
158 						  lockdep_is_held(&tbl->lock)));
159 				n->dead = 1;
160 				shrunk	= 1;
161 				write_unlock(&n->lock);
162 				neigh_cleanup_and_release(n);
163 				continue;
164 			}
165 			write_unlock(&n->lock);
166 			np = &n->next;
167 		}
168 	}
169 
170 	tbl->last_flush = jiffies;
171 
172 	write_unlock_bh(&tbl->lock);
173 
174 	return shrunk;
175 }
176 
177 static void neigh_add_timer(struct neighbour *n, unsigned long when)
178 {
179 	neigh_hold(n);
180 	if (unlikely(mod_timer(&n->timer, when))) {
181 		printk("NEIGH: BUG, double timer add, state is %x\n",
182 		       n->nud_state);
183 		dump_stack();
184 	}
185 }
186 
187 static int neigh_del_timer(struct neighbour *n)
188 {
189 	if ((n->nud_state & NUD_IN_TIMER) &&
190 	    del_timer(&n->timer)) {
191 		neigh_release(n);
192 		return 1;
193 	}
194 	return 0;
195 }
196 
197 static void pneigh_queue_purge(struct sk_buff_head *list)
198 {
199 	struct sk_buff *skb;
200 
201 	while ((skb = skb_dequeue(list)) != NULL) {
202 		dev_put(skb->dev);
203 		kfree_skb(skb);
204 	}
205 }
206 
207 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
208 {
209 	int i;
210 	struct neigh_hash_table *nht;
211 
212 	nht = rcu_dereference_protected(tbl->nht,
213 					lockdep_is_held(&tbl->lock));
214 
215 	for (i = 0; i < (1 << nht->hash_shift); i++) {
216 		struct neighbour *n;
217 		struct neighbour __rcu **np = &nht->hash_buckets[i];
218 
219 		while ((n = rcu_dereference_protected(*np,
220 					lockdep_is_held(&tbl->lock))) != NULL) {
221 			if (dev && n->dev != dev) {
222 				np = &n->next;
223 				continue;
224 			}
225 			rcu_assign_pointer(*np,
226 				   rcu_dereference_protected(n->next,
227 						lockdep_is_held(&tbl->lock)));
228 			write_lock(&n->lock);
229 			neigh_del_timer(n);
230 			n->dead = 1;
231 
232 			if (atomic_read(&n->refcnt) != 1) {
233 				/* The most unpleasant situation.
234 				   We must destroy neighbour entry,
235 				   but someone still uses it.
236 
237 				   The destroy will be delayed until
238 				   the last user releases us, but
239 				   we must kill timers etc. and move
240 				   it to safe state.
241 				 */
242 				skb_queue_purge(&n->arp_queue);
243 				n->arp_queue_len_bytes = 0;
244 				n->output = neigh_blackhole;
245 				if (n->nud_state & NUD_VALID)
246 					n->nud_state = NUD_NOARP;
247 				else
248 					n->nud_state = NUD_NONE;
249 				NEIGH_PRINTK2("neigh %p is stray.\n", n);
250 			}
251 			write_unlock(&n->lock);
252 			neigh_cleanup_and_release(n);
253 		}
254 	}
255 }
256 
257 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
258 {
259 	write_lock_bh(&tbl->lock);
260 	neigh_flush_dev(tbl, dev);
261 	write_unlock_bh(&tbl->lock);
262 }
263 EXPORT_SYMBOL(neigh_changeaddr);
264 
265 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
266 {
267 	write_lock_bh(&tbl->lock);
268 	neigh_flush_dev(tbl, dev);
269 	pneigh_ifdown(tbl, dev);
270 	write_unlock_bh(&tbl->lock);
271 
272 	del_timer_sync(&tbl->proxy_timer);
273 	pneigh_queue_purge(&tbl->proxy_queue);
274 	return 0;
275 }
276 EXPORT_SYMBOL(neigh_ifdown);
277 
278 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
279 {
280 	struct neighbour *n = NULL;
281 	unsigned long now = jiffies;
282 	int entries;
283 
284 	entries = atomic_inc_return(&tbl->entries) - 1;
285 	if (entries >= tbl->gc_thresh3 ||
286 	    (entries >= tbl->gc_thresh2 &&
287 	     time_after(now, tbl->last_flush + 5 * HZ))) {
288 		if (!neigh_forced_gc(tbl) &&
289 		    entries >= tbl->gc_thresh3)
290 			goto out_entries;
291 	}
292 
293 	if (tbl->entry_size)
294 		n = kzalloc(tbl->entry_size, GFP_ATOMIC);
295 	else {
296 		int sz = sizeof(*n) + tbl->key_len;
297 
298 		sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
299 		sz += dev->neigh_priv_len;
300 		n = kzalloc(sz, GFP_ATOMIC);
301 	}
302 	if (!n)
303 		goto out_entries;
304 
305 	skb_queue_head_init(&n->arp_queue);
306 	rwlock_init(&n->lock);
307 	seqlock_init(&n->ha_lock);
308 	n->updated	  = n->used = now;
309 	n->nud_state	  = NUD_NONE;
310 	n->output	  = neigh_blackhole;
311 	seqlock_init(&n->hh.hh_lock);
312 	n->parms	  = neigh_parms_clone(&tbl->parms);
313 	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
314 
315 	NEIGH_CACHE_STAT_INC(tbl, allocs);
316 	n->tbl		  = tbl;
317 	atomic_set(&n->refcnt, 1);
318 	n->dead		  = 1;
319 out:
320 	return n;
321 
322 out_entries:
323 	atomic_dec(&tbl->entries);
324 	goto out;
325 }
326 
327 static void neigh_get_hash_rnd(u32 *x)
328 {
329 	get_random_bytes(x, sizeof(*x));
330 	*x |= 1;
331 }
332 
333 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
334 {
335 	size_t size = (1 << shift) * sizeof(struct neighbour *);
336 	struct neigh_hash_table *ret;
337 	struct neighbour __rcu **buckets;
338 	int i;
339 
340 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
341 	if (!ret)
342 		return NULL;
343 	if (size <= PAGE_SIZE)
344 		buckets = kzalloc(size, GFP_ATOMIC);
345 	else
346 		buckets = (struct neighbour __rcu **)
347 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
348 					   get_order(size));
349 	if (!buckets) {
350 		kfree(ret);
351 		return NULL;
352 	}
353 	ret->hash_buckets = buckets;
354 	ret->hash_shift = shift;
355 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
356 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
357 	return ret;
358 }
359 
360 static void neigh_hash_free_rcu(struct rcu_head *head)
361 {
362 	struct neigh_hash_table *nht = container_of(head,
363 						    struct neigh_hash_table,
364 						    rcu);
365 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
366 	struct neighbour __rcu **buckets = nht->hash_buckets;
367 
368 	if (size <= PAGE_SIZE)
369 		kfree(buckets);
370 	else
371 		free_pages((unsigned long)buckets, get_order(size));
372 	kfree(nht);
373 }
374 
375 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
376 						unsigned long new_shift)
377 {
378 	unsigned int i, hash;
379 	struct neigh_hash_table *new_nht, *old_nht;
380 
381 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
382 
383 	old_nht = rcu_dereference_protected(tbl->nht,
384 					    lockdep_is_held(&tbl->lock));
385 	new_nht = neigh_hash_alloc(new_shift);
386 	if (!new_nht)
387 		return old_nht;
388 
389 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
390 		struct neighbour *n, *next;
391 
392 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
393 						   lockdep_is_held(&tbl->lock));
394 		     n != NULL;
395 		     n = next) {
396 			hash = tbl->hash(n->primary_key, n->dev,
397 					 new_nht->hash_rnd);
398 
399 			hash >>= (32 - new_nht->hash_shift);
400 			next = rcu_dereference_protected(n->next,
401 						lockdep_is_held(&tbl->lock));
402 
403 			rcu_assign_pointer(n->next,
404 					   rcu_dereference_protected(
405 						new_nht->hash_buckets[hash],
406 						lockdep_is_held(&tbl->lock)));
407 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
408 		}
409 	}
410 
411 	rcu_assign_pointer(tbl->nht, new_nht);
412 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
413 	return new_nht;
414 }
415 
416 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
417 			       struct net_device *dev)
418 {
419 	struct neighbour *n;
420 	int key_len = tbl->key_len;
421 	u32 hash_val;
422 	struct neigh_hash_table *nht;
423 
424 	NEIGH_CACHE_STAT_INC(tbl, lookups);
425 
426 	rcu_read_lock_bh();
427 	nht = rcu_dereference_bh(tbl->nht);
428 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
429 
430 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
431 	     n != NULL;
432 	     n = rcu_dereference_bh(n->next)) {
433 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
434 			if (!atomic_inc_not_zero(&n->refcnt))
435 				n = NULL;
436 			NEIGH_CACHE_STAT_INC(tbl, hits);
437 			break;
438 		}
439 	}
440 
441 	rcu_read_unlock_bh();
442 	return n;
443 }
444 EXPORT_SYMBOL(neigh_lookup);
445 
446 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
447 				     const void *pkey)
448 {
449 	struct neighbour *n;
450 	int key_len = tbl->key_len;
451 	u32 hash_val;
452 	struct neigh_hash_table *nht;
453 
454 	NEIGH_CACHE_STAT_INC(tbl, lookups);
455 
456 	rcu_read_lock_bh();
457 	nht = rcu_dereference_bh(tbl->nht);
458 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
459 
460 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
461 	     n != NULL;
462 	     n = rcu_dereference_bh(n->next)) {
463 		if (!memcmp(n->primary_key, pkey, key_len) &&
464 		    net_eq(dev_net(n->dev), net)) {
465 			if (!atomic_inc_not_zero(&n->refcnt))
466 				n = NULL;
467 			NEIGH_CACHE_STAT_INC(tbl, hits);
468 			break;
469 		}
470 	}
471 
472 	rcu_read_unlock_bh();
473 	return n;
474 }
475 EXPORT_SYMBOL(neigh_lookup_nodev);
476 
477 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
478 			       struct net_device *dev)
479 {
480 	u32 hash_val;
481 	int key_len = tbl->key_len;
482 	int error;
483 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
484 	struct neigh_hash_table *nht;
485 
486 	if (!n) {
487 		rc = ERR_PTR(-ENOBUFS);
488 		goto out;
489 	}
490 
491 	memcpy(n->primary_key, pkey, key_len);
492 	n->dev = dev;
493 	dev_hold(dev);
494 
495 	/* Protocol specific setup. */
496 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
497 		rc = ERR_PTR(error);
498 		goto out_neigh_release;
499 	}
500 
501 	if (dev->netdev_ops->ndo_neigh_construct) {
502 		error = dev->netdev_ops->ndo_neigh_construct(n);
503 		if (error < 0) {
504 			rc = ERR_PTR(error);
505 			goto out_neigh_release;
506 		}
507 	}
508 
509 	/* Device specific setup. */
510 	if (n->parms->neigh_setup &&
511 	    (error = n->parms->neigh_setup(n)) < 0) {
512 		rc = ERR_PTR(error);
513 		goto out_neigh_release;
514 	}
515 
516 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
517 
518 	write_lock_bh(&tbl->lock);
519 	nht = rcu_dereference_protected(tbl->nht,
520 					lockdep_is_held(&tbl->lock));
521 
522 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
523 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
524 
525 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
526 
527 	if (n->parms->dead) {
528 		rc = ERR_PTR(-EINVAL);
529 		goto out_tbl_unlock;
530 	}
531 
532 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
533 					    lockdep_is_held(&tbl->lock));
534 	     n1 != NULL;
535 	     n1 = rcu_dereference_protected(n1->next,
536 			lockdep_is_held(&tbl->lock))) {
537 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
538 			neigh_hold(n1);
539 			rc = n1;
540 			goto out_tbl_unlock;
541 		}
542 	}
543 
544 	n->dead = 0;
545 	neigh_hold(n);
546 	rcu_assign_pointer(n->next,
547 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
548 						     lockdep_is_held(&tbl->lock)));
549 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
550 	write_unlock_bh(&tbl->lock);
551 	NEIGH_PRINTK2("neigh %p is created.\n", n);
552 	rc = n;
553 out:
554 	return rc;
555 out_tbl_unlock:
556 	write_unlock_bh(&tbl->lock);
557 out_neigh_release:
558 	neigh_release(n);
559 	goto out;
560 }
561 EXPORT_SYMBOL(neigh_create);
562 
563 static u32 pneigh_hash(const void *pkey, int key_len)
564 {
565 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
566 	hash_val ^= (hash_val >> 16);
567 	hash_val ^= hash_val >> 8;
568 	hash_val ^= hash_val >> 4;
569 	hash_val &= PNEIGH_HASHMASK;
570 	return hash_val;
571 }
572 
573 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
574 					      struct net *net,
575 					      const void *pkey,
576 					      int key_len,
577 					      struct net_device *dev)
578 {
579 	while (n) {
580 		if (!memcmp(n->key, pkey, key_len) &&
581 		    net_eq(pneigh_net(n), net) &&
582 		    (n->dev == dev || !n->dev))
583 			return n;
584 		n = n->next;
585 	}
586 	return NULL;
587 }
588 
589 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
590 		struct net *net, const void *pkey, struct net_device *dev)
591 {
592 	int key_len = tbl->key_len;
593 	u32 hash_val = pneigh_hash(pkey, key_len);
594 
595 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596 				 net, pkey, key_len, dev);
597 }
598 EXPORT_SYMBOL_GPL(__pneigh_lookup);
599 
600 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
601 				    struct net *net, const void *pkey,
602 				    struct net_device *dev, int creat)
603 {
604 	struct pneigh_entry *n;
605 	int key_len = tbl->key_len;
606 	u32 hash_val = pneigh_hash(pkey, key_len);
607 
608 	read_lock_bh(&tbl->lock);
609 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
610 			      net, pkey, key_len, dev);
611 	read_unlock_bh(&tbl->lock);
612 
613 	if (n || !creat)
614 		goto out;
615 
616 	ASSERT_RTNL();
617 
618 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
619 	if (!n)
620 		goto out;
621 
622 	write_pnet(&n->net, hold_net(net));
623 	memcpy(n->key, pkey, key_len);
624 	n->dev = dev;
625 	if (dev)
626 		dev_hold(dev);
627 
628 	if (tbl->pconstructor && tbl->pconstructor(n)) {
629 		if (dev)
630 			dev_put(dev);
631 		release_net(net);
632 		kfree(n);
633 		n = NULL;
634 		goto out;
635 	}
636 
637 	write_lock_bh(&tbl->lock);
638 	n->next = tbl->phash_buckets[hash_val];
639 	tbl->phash_buckets[hash_val] = n;
640 	write_unlock_bh(&tbl->lock);
641 out:
642 	return n;
643 }
644 EXPORT_SYMBOL(pneigh_lookup);
645 
646 
647 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
648 		  struct net_device *dev)
649 {
650 	struct pneigh_entry *n, **np;
651 	int key_len = tbl->key_len;
652 	u32 hash_val = pneigh_hash(pkey, key_len);
653 
654 	write_lock_bh(&tbl->lock);
655 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
656 	     np = &n->next) {
657 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
658 		    net_eq(pneigh_net(n), net)) {
659 			*np = n->next;
660 			write_unlock_bh(&tbl->lock);
661 			if (tbl->pdestructor)
662 				tbl->pdestructor(n);
663 			if (n->dev)
664 				dev_put(n->dev);
665 			release_net(pneigh_net(n));
666 			kfree(n);
667 			return 0;
668 		}
669 	}
670 	write_unlock_bh(&tbl->lock);
671 	return -ENOENT;
672 }
673 
674 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
675 {
676 	struct pneigh_entry *n, **np;
677 	u32 h;
678 
679 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
680 		np = &tbl->phash_buckets[h];
681 		while ((n = *np) != NULL) {
682 			if (!dev || n->dev == dev) {
683 				*np = n->next;
684 				if (tbl->pdestructor)
685 					tbl->pdestructor(n);
686 				if (n->dev)
687 					dev_put(n->dev);
688 				release_net(pneigh_net(n));
689 				kfree(n);
690 				continue;
691 			}
692 			np = &n->next;
693 		}
694 	}
695 	return -ENOENT;
696 }
697 
698 static void neigh_parms_destroy(struct neigh_parms *parms);
699 
700 static inline void neigh_parms_put(struct neigh_parms *parms)
701 {
702 	if (atomic_dec_and_test(&parms->refcnt))
703 		neigh_parms_destroy(parms);
704 }
705 
706 /*
707  *	neighbour must already be out of the table;
708  *
709  */
710 void neigh_destroy(struct neighbour *neigh)
711 {
712 	struct net_device *dev = neigh->dev;
713 
714 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
715 
716 	if (!neigh->dead) {
717 		pr_warn("Destroying alive neighbour %p\n", neigh);
718 		dump_stack();
719 		return;
720 	}
721 
722 	if (neigh_del_timer(neigh))
723 		pr_warn("Impossible event\n");
724 
725 	skb_queue_purge(&neigh->arp_queue);
726 	neigh->arp_queue_len_bytes = 0;
727 
728 	if (dev->netdev_ops->ndo_neigh_destroy)
729 		dev->netdev_ops->ndo_neigh_destroy(neigh);
730 
731 	dev_put(dev);
732 	neigh_parms_put(neigh->parms);
733 
734 	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
735 
736 	atomic_dec(&neigh->tbl->entries);
737 	kfree_rcu(neigh, rcu);
738 }
739 EXPORT_SYMBOL(neigh_destroy);
740 
741 /* Neighbour state is suspicious;
742    disable fast path.
743 
744    Called with write_locked neigh.
745  */
746 static void neigh_suspect(struct neighbour *neigh)
747 {
748 	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
749 
750 	neigh->output = neigh->ops->output;
751 }
752 
753 /* Neighbour state is OK;
754    enable fast path.
755 
756    Called with write_locked neigh.
757  */
758 static void neigh_connect(struct neighbour *neigh)
759 {
760 	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
761 
762 	neigh->output = neigh->ops->connected_output;
763 }
764 
765 static void neigh_periodic_work(struct work_struct *work)
766 {
767 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
768 	struct neighbour *n;
769 	struct neighbour __rcu **np;
770 	unsigned int i;
771 	struct neigh_hash_table *nht;
772 
773 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
774 
775 	write_lock_bh(&tbl->lock);
776 	nht = rcu_dereference_protected(tbl->nht,
777 					lockdep_is_held(&tbl->lock));
778 
779 	/*
780 	 *	periodically recompute ReachableTime from random function
781 	 */
782 
783 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
784 		struct neigh_parms *p;
785 		tbl->last_rand = jiffies;
786 		for (p = &tbl->parms; p; p = p->next)
787 			p->reachable_time =
788 				neigh_rand_reach_time(p->base_reachable_time);
789 	}
790 
791 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
792 		np = &nht->hash_buckets[i];
793 
794 		while ((n = rcu_dereference_protected(*np,
795 				lockdep_is_held(&tbl->lock))) != NULL) {
796 			unsigned int state;
797 
798 			write_lock(&n->lock);
799 
800 			state = n->nud_state;
801 			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
802 				write_unlock(&n->lock);
803 				goto next_elt;
804 			}
805 
806 			if (time_before(n->used, n->confirmed))
807 				n->used = n->confirmed;
808 
809 			if (atomic_read(&n->refcnt) == 1 &&
810 			    (state == NUD_FAILED ||
811 			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
812 				*np = n->next;
813 				n->dead = 1;
814 				write_unlock(&n->lock);
815 				neigh_cleanup_and_release(n);
816 				continue;
817 			}
818 			write_unlock(&n->lock);
819 
820 next_elt:
821 			np = &n->next;
822 		}
823 		/*
824 		 * It's fine to release lock here, even if hash table
825 		 * grows while we are preempted.
826 		 */
827 		write_unlock_bh(&tbl->lock);
828 		cond_resched();
829 		write_lock_bh(&tbl->lock);
830 		nht = rcu_dereference_protected(tbl->nht,
831 						lockdep_is_held(&tbl->lock));
832 	}
833 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
834 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
835 	 * base_reachable_time.
836 	 */
837 	schedule_delayed_work(&tbl->gc_work,
838 			      tbl->parms.base_reachable_time >> 1);
839 	write_unlock_bh(&tbl->lock);
840 }
841 
842 static __inline__ int neigh_max_probes(struct neighbour *n)
843 {
844 	struct neigh_parms *p = n->parms;
845 	return (n->nud_state & NUD_PROBE) ?
846 		p->ucast_probes :
847 		p->ucast_probes + p->app_probes + p->mcast_probes;
848 }
849 
850 static void neigh_invalidate(struct neighbour *neigh)
851 	__releases(neigh->lock)
852 	__acquires(neigh->lock)
853 {
854 	struct sk_buff *skb;
855 
856 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
857 	NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
858 	neigh->updated = jiffies;
859 
860 	/* It is very thin place. report_unreachable is very complicated
861 	   routine. Particularly, it can hit the same neighbour entry!
862 
863 	   So that, we try to be accurate and avoid dead loop. --ANK
864 	 */
865 	while (neigh->nud_state == NUD_FAILED &&
866 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
867 		write_unlock(&neigh->lock);
868 		neigh->ops->error_report(neigh, skb);
869 		write_lock(&neigh->lock);
870 	}
871 	skb_queue_purge(&neigh->arp_queue);
872 	neigh->arp_queue_len_bytes = 0;
873 }
874 
875 static void neigh_probe(struct neighbour *neigh)
876 	__releases(neigh->lock)
877 {
878 	struct sk_buff *skb = skb_peek(&neigh->arp_queue);
879 	/* keep skb alive even if arp_queue overflows */
880 	if (skb)
881 		skb = skb_copy(skb, GFP_ATOMIC);
882 	write_unlock(&neigh->lock);
883 	neigh->ops->solicit(neigh, skb);
884 	atomic_inc(&neigh->probes);
885 	kfree_skb(skb);
886 }
887 
888 /* Called when a timer expires for a neighbour entry. */
889 
890 static void neigh_timer_handler(unsigned long arg)
891 {
892 	unsigned long now, next;
893 	struct neighbour *neigh = (struct neighbour *)arg;
894 	unsigned int state;
895 	int notify = 0;
896 
897 	write_lock(&neigh->lock);
898 
899 	state = neigh->nud_state;
900 	now = jiffies;
901 	next = now + HZ;
902 
903 	if (!(state & NUD_IN_TIMER))
904 		goto out;
905 
906 	if (state & NUD_REACHABLE) {
907 		if (time_before_eq(now,
908 				   neigh->confirmed + neigh->parms->reachable_time)) {
909 			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
910 			next = neigh->confirmed + neigh->parms->reachable_time;
911 		} else if (time_before_eq(now,
912 					  neigh->used + neigh->parms->delay_probe_time)) {
913 			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
914 			neigh->nud_state = NUD_DELAY;
915 			neigh->updated = jiffies;
916 			neigh_suspect(neigh);
917 			next = now + neigh->parms->delay_probe_time;
918 		} else {
919 			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
920 			neigh->nud_state = NUD_STALE;
921 			neigh->updated = jiffies;
922 			neigh_suspect(neigh);
923 			notify = 1;
924 		}
925 	} else if (state & NUD_DELAY) {
926 		if (time_before_eq(now,
927 				   neigh->confirmed + neigh->parms->delay_probe_time)) {
928 			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
929 			neigh->nud_state = NUD_REACHABLE;
930 			neigh->updated = jiffies;
931 			neigh_connect(neigh);
932 			notify = 1;
933 			next = neigh->confirmed + neigh->parms->reachable_time;
934 		} else {
935 			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
936 			neigh->nud_state = NUD_PROBE;
937 			neigh->updated = jiffies;
938 			atomic_set(&neigh->probes, 0);
939 			next = now + neigh->parms->retrans_time;
940 		}
941 	} else {
942 		/* NUD_PROBE|NUD_INCOMPLETE */
943 		next = now + neigh->parms->retrans_time;
944 	}
945 
946 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
947 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
948 		neigh->nud_state = NUD_FAILED;
949 		notify = 1;
950 		neigh_invalidate(neigh);
951 	}
952 
953 	if (neigh->nud_state & NUD_IN_TIMER) {
954 		if (time_before(next, jiffies + HZ/2))
955 			next = jiffies + HZ/2;
956 		if (!mod_timer(&neigh->timer, next))
957 			neigh_hold(neigh);
958 	}
959 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
960 		neigh_probe(neigh);
961 	} else {
962 out:
963 		write_unlock(&neigh->lock);
964 	}
965 
966 	if (notify)
967 		neigh_update_notify(neigh);
968 
969 	neigh_release(neigh);
970 }
971 
972 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
973 {
974 	int rc;
975 	bool immediate_probe = false;
976 
977 	write_lock_bh(&neigh->lock);
978 
979 	rc = 0;
980 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
981 		goto out_unlock_bh;
982 
983 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
984 		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
985 			unsigned long next, now = jiffies;
986 
987 			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
988 			neigh->nud_state     = NUD_INCOMPLETE;
989 			neigh->updated = now;
990 			next = now + max(neigh->parms->retrans_time, HZ/2);
991 			neigh_add_timer(neigh, next);
992 			immediate_probe = true;
993 		} else {
994 			neigh->nud_state = NUD_FAILED;
995 			neigh->updated = jiffies;
996 			write_unlock_bh(&neigh->lock);
997 
998 			kfree_skb(skb);
999 			return 1;
1000 		}
1001 	} else if (neigh->nud_state & NUD_STALE) {
1002 		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1003 		neigh->nud_state = NUD_DELAY;
1004 		neigh->updated = jiffies;
1005 		neigh_add_timer(neigh,
1006 				jiffies + neigh->parms->delay_probe_time);
1007 	}
1008 
1009 	if (neigh->nud_state == NUD_INCOMPLETE) {
1010 		if (skb) {
1011 			while (neigh->arp_queue_len_bytes + skb->truesize >
1012 			       neigh->parms->queue_len_bytes) {
1013 				struct sk_buff *buff;
1014 
1015 				buff = __skb_dequeue(&neigh->arp_queue);
1016 				if (!buff)
1017 					break;
1018 				neigh->arp_queue_len_bytes -= buff->truesize;
1019 				kfree_skb(buff);
1020 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1021 			}
1022 			skb_dst_force(skb);
1023 			__skb_queue_tail(&neigh->arp_queue, skb);
1024 			neigh->arp_queue_len_bytes += skb->truesize;
1025 		}
1026 		rc = 1;
1027 	}
1028 out_unlock_bh:
1029 	if (immediate_probe)
1030 		neigh_probe(neigh);
1031 	else
1032 		write_unlock(&neigh->lock);
1033 	local_bh_enable();
1034 	return rc;
1035 }
1036 EXPORT_SYMBOL(__neigh_event_send);
1037 
1038 static void neigh_update_hhs(struct neighbour *neigh)
1039 {
1040 	struct hh_cache *hh;
1041 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1042 		= NULL;
1043 
1044 	if (neigh->dev->header_ops)
1045 		update = neigh->dev->header_ops->cache_update;
1046 
1047 	if (update) {
1048 		hh = &neigh->hh;
1049 		if (hh->hh_len) {
1050 			write_seqlock_bh(&hh->hh_lock);
1051 			update(hh, neigh->dev, neigh->ha);
1052 			write_sequnlock_bh(&hh->hh_lock);
1053 		}
1054 	}
1055 }
1056 
1057 
1058 
1059 /* Generic update routine.
1060    -- lladdr is new lladdr or NULL, if it is not supplied.
1061    -- new    is new state.
1062    -- flags
1063 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1064 				if it is different.
1065 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1066 				lladdr instead of overriding it
1067 				if it is different.
1068 				It also allows to retain current state
1069 				if lladdr is unchanged.
1070 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1071 
1072 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1073 				NTF_ROUTER flag.
1074 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1075 				a router.
1076 
1077    Caller MUST hold reference count on the entry.
1078  */
1079 
1080 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1081 		 u32 flags)
1082 {
1083 	u8 old;
1084 	int err;
1085 	int notify = 0;
1086 	struct net_device *dev;
1087 	int update_isrouter = 0;
1088 
1089 	write_lock_bh(&neigh->lock);
1090 
1091 	dev    = neigh->dev;
1092 	old    = neigh->nud_state;
1093 	err    = -EPERM;
1094 
1095 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1096 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1097 		goto out;
1098 
1099 	if (!(new & NUD_VALID)) {
1100 		neigh_del_timer(neigh);
1101 		if (old & NUD_CONNECTED)
1102 			neigh_suspect(neigh);
1103 		neigh->nud_state = new;
1104 		err = 0;
1105 		notify = old & NUD_VALID;
1106 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1107 		    (new & NUD_FAILED)) {
1108 			neigh_invalidate(neigh);
1109 			notify = 1;
1110 		}
1111 		goto out;
1112 	}
1113 
1114 	/* Compare new lladdr with cached one */
1115 	if (!dev->addr_len) {
1116 		/* First case: device needs no address. */
1117 		lladdr = neigh->ha;
1118 	} else if (lladdr) {
1119 		/* The second case: if something is already cached
1120 		   and a new address is proposed:
1121 		   - compare new & old
1122 		   - if they are different, check override flag
1123 		 */
1124 		if ((old & NUD_VALID) &&
1125 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1126 			lladdr = neigh->ha;
1127 	} else {
1128 		/* No address is supplied; if we know something,
1129 		   use it, otherwise discard the request.
1130 		 */
1131 		err = -EINVAL;
1132 		if (!(old & NUD_VALID))
1133 			goto out;
1134 		lladdr = neigh->ha;
1135 	}
1136 
1137 	if (new & NUD_CONNECTED)
1138 		neigh->confirmed = jiffies;
1139 	neigh->updated = jiffies;
1140 
1141 	/* If entry was valid and address is not changed,
1142 	   do not change entry state, if new one is STALE.
1143 	 */
1144 	err = 0;
1145 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1146 	if (old & NUD_VALID) {
1147 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1148 			update_isrouter = 0;
1149 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1150 			    (old & NUD_CONNECTED)) {
1151 				lladdr = neigh->ha;
1152 				new = NUD_STALE;
1153 			} else
1154 				goto out;
1155 		} else {
1156 			if (lladdr == neigh->ha && new == NUD_STALE &&
1157 			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1158 			     (old & NUD_CONNECTED))
1159 			    )
1160 				new = old;
1161 		}
1162 	}
1163 
1164 	if (new != old) {
1165 		neigh_del_timer(neigh);
1166 		if (new & NUD_IN_TIMER)
1167 			neigh_add_timer(neigh, (jiffies +
1168 						((new & NUD_REACHABLE) ?
1169 						 neigh->parms->reachable_time :
1170 						 0)));
1171 		neigh->nud_state = new;
1172 	}
1173 
1174 	if (lladdr != neigh->ha) {
1175 		write_seqlock(&neigh->ha_lock);
1176 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1177 		write_sequnlock(&neigh->ha_lock);
1178 		neigh_update_hhs(neigh);
1179 		if (!(new & NUD_CONNECTED))
1180 			neigh->confirmed = jiffies -
1181 				      (neigh->parms->base_reachable_time << 1);
1182 		notify = 1;
1183 	}
1184 	if (new == old)
1185 		goto out;
1186 	if (new & NUD_CONNECTED)
1187 		neigh_connect(neigh);
1188 	else
1189 		neigh_suspect(neigh);
1190 	if (!(old & NUD_VALID)) {
1191 		struct sk_buff *skb;
1192 
1193 		/* Again: avoid dead loop if something went wrong */
1194 
1195 		while (neigh->nud_state & NUD_VALID &&
1196 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1197 			struct dst_entry *dst = skb_dst(skb);
1198 			struct neighbour *n2, *n1 = neigh;
1199 			write_unlock_bh(&neigh->lock);
1200 
1201 			rcu_read_lock();
1202 			/* On shaper/eql skb->dst->neighbour != neigh :( */
1203 			if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL)
1204 				n1 = n2;
1205 			n1->output(n1, skb);
1206 			rcu_read_unlock();
1207 
1208 			write_lock_bh(&neigh->lock);
1209 		}
1210 		skb_queue_purge(&neigh->arp_queue);
1211 		neigh->arp_queue_len_bytes = 0;
1212 	}
1213 out:
1214 	if (update_isrouter) {
1215 		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1216 			(neigh->flags | NTF_ROUTER) :
1217 			(neigh->flags & ~NTF_ROUTER);
1218 	}
1219 	write_unlock_bh(&neigh->lock);
1220 
1221 	if (notify)
1222 		neigh_update_notify(neigh);
1223 
1224 	return err;
1225 }
1226 EXPORT_SYMBOL(neigh_update);
1227 
1228 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1229 				 u8 *lladdr, void *saddr,
1230 				 struct net_device *dev)
1231 {
1232 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1233 						 lladdr || !dev->addr_len);
1234 	if (neigh)
1235 		neigh_update(neigh, lladdr, NUD_STALE,
1236 			     NEIGH_UPDATE_F_OVERRIDE);
1237 	return neigh;
1238 }
1239 EXPORT_SYMBOL(neigh_event_ns);
1240 
1241 /* called with read_lock_bh(&n->lock); */
1242 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1243 {
1244 	struct net_device *dev = dst->dev;
1245 	__be16 prot = dst->ops->protocol;
1246 	struct hh_cache	*hh = &n->hh;
1247 
1248 	write_lock_bh(&n->lock);
1249 
1250 	/* Only one thread can come in here and initialize the
1251 	 * hh_cache entry.
1252 	 */
1253 	if (!hh->hh_len)
1254 		dev->header_ops->cache(n, hh, prot);
1255 
1256 	write_unlock_bh(&n->lock);
1257 }
1258 
1259 /* This function can be used in contexts, where only old dev_queue_xmit
1260  * worked, f.e. if you want to override normal output path (eql, shaper),
1261  * but resolution is not made yet.
1262  */
1263 
1264 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1265 {
1266 	struct net_device *dev = skb->dev;
1267 
1268 	__skb_pull(skb, skb_network_offset(skb));
1269 
1270 	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1271 			    skb->len) < 0 &&
1272 	    dev->header_ops->rebuild(skb))
1273 		return 0;
1274 
1275 	return dev_queue_xmit(skb);
1276 }
1277 EXPORT_SYMBOL(neigh_compat_output);
1278 
1279 /* Slow and careful. */
1280 
1281 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1282 {
1283 	struct dst_entry *dst = skb_dst(skb);
1284 	int rc = 0;
1285 
1286 	if (!dst)
1287 		goto discard;
1288 
1289 	__skb_pull(skb, skb_network_offset(skb));
1290 
1291 	if (!neigh_event_send(neigh, skb)) {
1292 		int err;
1293 		struct net_device *dev = neigh->dev;
1294 		unsigned int seq;
1295 
1296 		if (dev->header_ops->cache && !neigh->hh.hh_len)
1297 			neigh_hh_init(neigh, dst);
1298 
1299 		do {
1300 			seq = read_seqbegin(&neigh->ha_lock);
1301 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1302 					      neigh->ha, NULL, skb->len);
1303 		} while (read_seqretry(&neigh->ha_lock, seq));
1304 
1305 		if (err >= 0)
1306 			rc = dev_queue_xmit(skb);
1307 		else
1308 			goto out_kfree_skb;
1309 	}
1310 out:
1311 	return rc;
1312 discard:
1313 	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1314 		      dst, neigh);
1315 out_kfree_skb:
1316 	rc = -EINVAL;
1317 	kfree_skb(skb);
1318 	goto out;
1319 }
1320 EXPORT_SYMBOL(neigh_resolve_output);
1321 
1322 /* As fast as possible without hh cache */
1323 
1324 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1325 {
1326 	struct net_device *dev = neigh->dev;
1327 	unsigned int seq;
1328 	int err;
1329 
1330 	__skb_pull(skb, skb_network_offset(skb));
1331 
1332 	do {
1333 		seq = read_seqbegin(&neigh->ha_lock);
1334 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1335 				      neigh->ha, NULL, skb->len);
1336 	} while (read_seqretry(&neigh->ha_lock, seq));
1337 
1338 	if (err >= 0)
1339 		err = dev_queue_xmit(skb);
1340 	else {
1341 		err = -EINVAL;
1342 		kfree_skb(skb);
1343 	}
1344 	return err;
1345 }
1346 EXPORT_SYMBOL(neigh_connected_output);
1347 
1348 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1349 {
1350 	return dev_queue_xmit(skb);
1351 }
1352 EXPORT_SYMBOL(neigh_direct_output);
1353 
1354 static void neigh_proxy_process(unsigned long arg)
1355 {
1356 	struct neigh_table *tbl = (struct neigh_table *)arg;
1357 	long sched_next = 0;
1358 	unsigned long now = jiffies;
1359 	struct sk_buff *skb, *n;
1360 
1361 	spin_lock(&tbl->proxy_queue.lock);
1362 
1363 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1364 		long tdif = NEIGH_CB(skb)->sched_next - now;
1365 
1366 		if (tdif <= 0) {
1367 			struct net_device *dev = skb->dev;
1368 
1369 			__skb_unlink(skb, &tbl->proxy_queue);
1370 			if (tbl->proxy_redo && netif_running(dev)) {
1371 				rcu_read_lock();
1372 				tbl->proxy_redo(skb);
1373 				rcu_read_unlock();
1374 			} else {
1375 				kfree_skb(skb);
1376 			}
1377 
1378 			dev_put(dev);
1379 		} else if (!sched_next || tdif < sched_next)
1380 			sched_next = tdif;
1381 	}
1382 	del_timer(&tbl->proxy_timer);
1383 	if (sched_next)
1384 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1385 	spin_unlock(&tbl->proxy_queue.lock);
1386 }
1387 
1388 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1389 		    struct sk_buff *skb)
1390 {
1391 	unsigned long now = jiffies;
1392 	unsigned long sched_next = now + (net_random() % p->proxy_delay);
1393 
1394 	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1395 		kfree_skb(skb);
1396 		return;
1397 	}
1398 
1399 	NEIGH_CB(skb)->sched_next = sched_next;
1400 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1401 
1402 	spin_lock(&tbl->proxy_queue.lock);
1403 	if (del_timer(&tbl->proxy_timer)) {
1404 		if (time_before(tbl->proxy_timer.expires, sched_next))
1405 			sched_next = tbl->proxy_timer.expires;
1406 	}
1407 	skb_dst_drop(skb);
1408 	dev_hold(skb->dev);
1409 	__skb_queue_tail(&tbl->proxy_queue, skb);
1410 	mod_timer(&tbl->proxy_timer, sched_next);
1411 	spin_unlock(&tbl->proxy_queue.lock);
1412 }
1413 EXPORT_SYMBOL(pneigh_enqueue);
1414 
1415 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1416 						      struct net *net, int ifindex)
1417 {
1418 	struct neigh_parms *p;
1419 
1420 	for (p = &tbl->parms; p; p = p->next) {
1421 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1422 		    (!p->dev && !ifindex))
1423 			return p;
1424 	}
1425 
1426 	return NULL;
1427 }
1428 
1429 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1430 				      struct neigh_table *tbl)
1431 {
1432 	struct neigh_parms *p, *ref;
1433 	struct net *net = dev_net(dev);
1434 	const struct net_device_ops *ops = dev->netdev_ops;
1435 
1436 	ref = lookup_neigh_parms(tbl, net, 0);
1437 	if (!ref)
1438 		return NULL;
1439 
1440 	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1441 	if (p) {
1442 		p->tbl		  = tbl;
1443 		atomic_set(&p->refcnt, 1);
1444 		p->reachable_time =
1445 				neigh_rand_reach_time(p->base_reachable_time);
1446 
1447 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1448 			kfree(p);
1449 			return NULL;
1450 		}
1451 
1452 		dev_hold(dev);
1453 		p->dev = dev;
1454 		write_pnet(&p->net, hold_net(net));
1455 		p->sysctl_table = NULL;
1456 		write_lock_bh(&tbl->lock);
1457 		p->next		= tbl->parms.next;
1458 		tbl->parms.next = p;
1459 		write_unlock_bh(&tbl->lock);
1460 	}
1461 	return p;
1462 }
1463 EXPORT_SYMBOL(neigh_parms_alloc);
1464 
1465 static void neigh_rcu_free_parms(struct rcu_head *head)
1466 {
1467 	struct neigh_parms *parms =
1468 		container_of(head, struct neigh_parms, rcu_head);
1469 
1470 	neigh_parms_put(parms);
1471 }
1472 
1473 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1474 {
1475 	struct neigh_parms **p;
1476 
1477 	if (!parms || parms == &tbl->parms)
1478 		return;
1479 	write_lock_bh(&tbl->lock);
1480 	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1481 		if (*p == parms) {
1482 			*p = parms->next;
1483 			parms->dead = 1;
1484 			write_unlock_bh(&tbl->lock);
1485 			if (parms->dev)
1486 				dev_put(parms->dev);
1487 			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1488 			return;
1489 		}
1490 	}
1491 	write_unlock_bh(&tbl->lock);
1492 	NEIGH_PRINTK1("neigh_parms_release: not found\n");
1493 }
1494 EXPORT_SYMBOL(neigh_parms_release);
1495 
1496 static void neigh_parms_destroy(struct neigh_parms *parms)
1497 {
1498 	release_net(neigh_parms_net(parms));
1499 	kfree(parms);
1500 }
1501 
1502 static struct lock_class_key neigh_table_proxy_queue_class;
1503 
1504 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1505 {
1506 	unsigned long now = jiffies;
1507 	unsigned long phsize;
1508 
1509 	write_pnet(&tbl->parms.net, &init_net);
1510 	atomic_set(&tbl->parms.refcnt, 1);
1511 	tbl->parms.reachable_time =
1512 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
1513 
1514 	tbl->stats = alloc_percpu(struct neigh_statistics);
1515 	if (!tbl->stats)
1516 		panic("cannot create neighbour cache statistics");
1517 
1518 #ifdef CONFIG_PROC_FS
1519 	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1520 			      &neigh_stat_seq_fops, tbl))
1521 		panic("cannot create neighbour proc dir entry");
1522 #endif
1523 
1524 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1525 
1526 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1527 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1528 
1529 	if (!tbl->nht || !tbl->phash_buckets)
1530 		panic("cannot allocate neighbour cache hashes");
1531 
1532 	rwlock_init(&tbl->lock);
1533 	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1534 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1535 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1536 	skb_queue_head_init_class(&tbl->proxy_queue,
1537 			&neigh_table_proxy_queue_class);
1538 
1539 	tbl->last_flush = now;
1540 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1541 }
1542 
1543 void neigh_table_init(struct neigh_table *tbl)
1544 {
1545 	struct neigh_table *tmp;
1546 
1547 	neigh_table_init_no_netlink(tbl);
1548 	write_lock(&neigh_tbl_lock);
1549 	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1550 		if (tmp->family == tbl->family)
1551 			break;
1552 	}
1553 	tbl->next	= neigh_tables;
1554 	neigh_tables	= tbl;
1555 	write_unlock(&neigh_tbl_lock);
1556 
1557 	if (unlikely(tmp)) {
1558 		pr_err("Registering multiple tables for family %d\n",
1559 		       tbl->family);
1560 		dump_stack();
1561 	}
1562 }
1563 EXPORT_SYMBOL(neigh_table_init);
1564 
1565 int neigh_table_clear(struct neigh_table *tbl)
1566 {
1567 	struct neigh_table **tp;
1568 
1569 	/* It is not clean... Fix it to unload IPv6 module safely */
1570 	cancel_delayed_work_sync(&tbl->gc_work);
1571 	del_timer_sync(&tbl->proxy_timer);
1572 	pneigh_queue_purge(&tbl->proxy_queue);
1573 	neigh_ifdown(tbl, NULL);
1574 	if (atomic_read(&tbl->entries))
1575 		pr_crit("neighbour leakage\n");
1576 	write_lock(&neigh_tbl_lock);
1577 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1578 		if (*tp == tbl) {
1579 			*tp = tbl->next;
1580 			break;
1581 		}
1582 	}
1583 	write_unlock(&neigh_tbl_lock);
1584 
1585 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1586 		 neigh_hash_free_rcu);
1587 	tbl->nht = NULL;
1588 
1589 	kfree(tbl->phash_buckets);
1590 	tbl->phash_buckets = NULL;
1591 
1592 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1593 
1594 	free_percpu(tbl->stats);
1595 	tbl->stats = NULL;
1596 
1597 	return 0;
1598 }
1599 EXPORT_SYMBOL(neigh_table_clear);
1600 
1601 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1602 {
1603 	struct net *net = sock_net(skb->sk);
1604 	struct ndmsg *ndm;
1605 	struct nlattr *dst_attr;
1606 	struct neigh_table *tbl;
1607 	struct net_device *dev = NULL;
1608 	int err = -EINVAL;
1609 
1610 	ASSERT_RTNL();
1611 	if (nlmsg_len(nlh) < sizeof(*ndm))
1612 		goto out;
1613 
1614 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1615 	if (dst_attr == NULL)
1616 		goto out;
1617 
1618 	ndm = nlmsg_data(nlh);
1619 	if (ndm->ndm_ifindex) {
1620 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1621 		if (dev == NULL) {
1622 			err = -ENODEV;
1623 			goto out;
1624 		}
1625 	}
1626 
1627 	read_lock(&neigh_tbl_lock);
1628 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1629 		struct neighbour *neigh;
1630 
1631 		if (tbl->family != ndm->ndm_family)
1632 			continue;
1633 		read_unlock(&neigh_tbl_lock);
1634 
1635 		if (nla_len(dst_attr) < tbl->key_len)
1636 			goto out;
1637 
1638 		if (ndm->ndm_flags & NTF_PROXY) {
1639 			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1640 			goto out;
1641 		}
1642 
1643 		if (dev == NULL)
1644 			goto out;
1645 
1646 		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1647 		if (neigh == NULL) {
1648 			err = -ENOENT;
1649 			goto out;
1650 		}
1651 
1652 		err = neigh_update(neigh, NULL, NUD_FAILED,
1653 				   NEIGH_UPDATE_F_OVERRIDE |
1654 				   NEIGH_UPDATE_F_ADMIN);
1655 		neigh_release(neigh);
1656 		goto out;
1657 	}
1658 	read_unlock(&neigh_tbl_lock);
1659 	err = -EAFNOSUPPORT;
1660 
1661 out:
1662 	return err;
1663 }
1664 
1665 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1666 {
1667 	struct net *net = sock_net(skb->sk);
1668 	struct ndmsg *ndm;
1669 	struct nlattr *tb[NDA_MAX+1];
1670 	struct neigh_table *tbl;
1671 	struct net_device *dev = NULL;
1672 	int err;
1673 
1674 	ASSERT_RTNL();
1675 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1676 	if (err < 0)
1677 		goto out;
1678 
1679 	err = -EINVAL;
1680 	if (tb[NDA_DST] == NULL)
1681 		goto out;
1682 
1683 	ndm = nlmsg_data(nlh);
1684 	if (ndm->ndm_ifindex) {
1685 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1686 		if (dev == NULL) {
1687 			err = -ENODEV;
1688 			goto out;
1689 		}
1690 
1691 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1692 			goto out;
1693 	}
1694 
1695 	read_lock(&neigh_tbl_lock);
1696 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1697 		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1698 		struct neighbour *neigh;
1699 		void *dst, *lladdr;
1700 
1701 		if (tbl->family != ndm->ndm_family)
1702 			continue;
1703 		read_unlock(&neigh_tbl_lock);
1704 
1705 		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1706 			goto out;
1707 		dst = nla_data(tb[NDA_DST]);
1708 		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1709 
1710 		if (ndm->ndm_flags & NTF_PROXY) {
1711 			struct pneigh_entry *pn;
1712 
1713 			err = -ENOBUFS;
1714 			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1715 			if (pn) {
1716 				pn->flags = ndm->ndm_flags;
1717 				err = 0;
1718 			}
1719 			goto out;
1720 		}
1721 
1722 		if (dev == NULL)
1723 			goto out;
1724 
1725 		neigh = neigh_lookup(tbl, dst, dev);
1726 		if (neigh == NULL) {
1727 			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1728 				err = -ENOENT;
1729 				goto out;
1730 			}
1731 
1732 			neigh = __neigh_lookup_errno(tbl, dst, dev);
1733 			if (IS_ERR(neigh)) {
1734 				err = PTR_ERR(neigh);
1735 				goto out;
1736 			}
1737 		} else {
1738 			if (nlh->nlmsg_flags & NLM_F_EXCL) {
1739 				err = -EEXIST;
1740 				neigh_release(neigh);
1741 				goto out;
1742 			}
1743 
1744 			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1745 				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1746 		}
1747 
1748 		if (ndm->ndm_flags & NTF_USE) {
1749 			neigh_event_send(neigh, NULL);
1750 			err = 0;
1751 		} else
1752 			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1753 		neigh_release(neigh);
1754 		goto out;
1755 	}
1756 
1757 	read_unlock(&neigh_tbl_lock);
1758 	err = -EAFNOSUPPORT;
1759 out:
1760 	return err;
1761 }
1762 
1763 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1764 {
1765 	struct nlattr *nest;
1766 
1767 	nest = nla_nest_start(skb, NDTA_PARMS);
1768 	if (nest == NULL)
1769 		return -ENOBUFS;
1770 
1771 	if ((parms->dev &&
1772 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1773 	    nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1774 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1775 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
1776 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
1777 			DIV_ROUND_UP(parms->queue_len_bytes,
1778 				     SKB_TRUESIZE(ETH_FRAME_LEN))) ||
1779 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1780 	    nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1781 	    nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1782 	    nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1783 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1784 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1785 			  parms->base_reachable_time) ||
1786 	    nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1787 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1788 			  parms->delay_probe_time) ||
1789 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1790 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1791 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1792 	    nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1793 		goto nla_put_failure;
1794 	return nla_nest_end(skb, nest);
1795 
1796 nla_put_failure:
1797 	nla_nest_cancel(skb, nest);
1798 	return -EMSGSIZE;
1799 }
1800 
1801 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1802 			      u32 pid, u32 seq, int type, int flags)
1803 {
1804 	struct nlmsghdr *nlh;
1805 	struct ndtmsg *ndtmsg;
1806 
1807 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1808 	if (nlh == NULL)
1809 		return -EMSGSIZE;
1810 
1811 	ndtmsg = nlmsg_data(nlh);
1812 
1813 	read_lock_bh(&tbl->lock);
1814 	ndtmsg->ndtm_family = tbl->family;
1815 	ndtmsg->ndtm_pad1   = 0;
1816 	ndtmsg->ndtm_pad2   = 0;
1817 
1818 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1819 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1820 	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1821 	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1822 	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1823 		goto nla_put_failure;
1824 	{
1825 		unsigned long now = jiffies;
1826 		unsigned int flush_delta = now - tbl->last_flush;
1827 		unsigned int rand_delta = now - tbl->last_rand;
1828 		struct neigh_hash_table *nht;
1829 		struct ndt_config ndc = {
1830 			.ndtc_key_len		= tbl->key_len,
1831 			.ndtc_entry_size	= tbl->entry_size,
1832 			.ndtc_entries		= atomic_read(&tbl->entries),
1833 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1834 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1835 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1836 		};
1837 
1838 		rcu_read_lock_bh();
1839 		nht = rcu_dereference_bh(tbl->nht);
1840 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1841 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1842 		rcu_read_unlock_bh();
1843 
1844 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1845 			goto nla_put_failure;
1846 	}
1847 
1848 	{
1849 		int cpu;
1850 		struct ndt_stats ndst;
1851 
1852 		memset(&ndst, 0, sizeof(ndst));
1853 
1854 		for_each_possible_cpu(cpu) {
1855 			struct neigh_statistics	*st;
1856 
1857 			st = per_cpu_ptr(tbl->stats, cpu);
1858 			ndst.ndts_allocs		+= st->allocs;
1859 			ndst.ndts_destroys		+= st->destroys;
1860 			ndst.ndts_hash_grows		+= st->hash_grows;
1861 			ndst.ndts_res_failed		+= st->res_failed;
1862 			ndst.ndts_lookups		+= st->lookups;
1863 			ndst.ndts_hits			+= st->hits;
1864 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1865 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1866 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1867 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1868 		}
1869 
1870 		if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1871 			goto nla_put_failure;
1872 	}
1873 
1874 	BUG_ON(tbl->parms.dev);
1875 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1876 		goto nla_put_failure;
1877 
1878 	read_unlock_bh(&tbl->lock);
1879 	return nlmsg_end(skb, nlh);
1880 
1881 nla_put_failure:
1882 	read_unlock_bh(&tbl->lock);
1883 	nlmsg_cancel(skb, nlh);
1884 	return -EMSGSIZE;
1885 }
1886 
1887 static int neightbl_fill_param_info(struct sk_buff *skb,
1888 				    struct neigh_table *tbl,
1889 				    struct neigh_parms *parms,
1890 				    u32 pid, u32 seq, int type,
1891 				    unsigned int flags)
1892 {
1893 	struct ndtmsg *ndtmsg;
1894 	struct nlmsghdr *nlh;
1895 
1896 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1897 	if (nlh == NULL)
1898 		return -EMSGSIZE;
1899 
1900 	ndtmsg = nlmsg_data(nlh);
1901 
1902 	read_lock_bh(&tbl->lock);
1903 	ndtmsg->ndtm_family = tbl->family;
1904 	ndtmsg->ndtm_pad1   = 0;
1905 	ndtmsg->ndtm_pad2   = 0;
1906 
1907 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1908 	    neightbl_fill_parms(skb, parms) < 0)
1909 		goto errout;
1910 
1911 	read_unlock_bh(&tbl->lock);
1912 	return nlmsg_end(skb, nlh);
1913 errout:
1914 	read_unlock_bh(&tbl->lock);
1915 	nlmsg_cancel(skb, nlh);
1916 	return -EMSGSIZE;
1917 }
1918 
1919 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1920 	[NDTA_NAME]		= { .type = NLA_STRING },
1921 	[NDTA_THRESH1]		= { .type = NLA_U32 },
1922 	[NDTA_THRESH2]		= { .type = NLA_U32 },
1923 	[NDTA_THRESH3]		= { .type = NLA_U32 },
1924 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1925 	[NDTA_PARMS]		= { .type = NLA_NESTED },
1926 };
1927 
1928 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1929 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1930 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1931 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1932 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1933 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1934 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1935 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1936 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1937 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1938 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1939 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1940 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1941 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1942 };
1943 
1944 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1945 {
1946 	struct net *net = sock_net(skb->sk);
1947 	struct neigh_table *tbl;
1948 	struct ndtmsg *ndtmsg;
1949 	struct nlattr *tb[NDTA_MAX+1];
1950 	int err;
1951 
1952 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1953 			  nl_neightbl_policy);
1954 	if (err < 0)
1955 		goto errout;
1956 
1957 	if (tb[NDTA_NAME] == NULL) {
1958 		err = -EINVAL;
1959 		goto errout;
1960 	}
1961 
1962 	ndtmsg = nlmsg_data(nlh);
1963 	read_lock(&neigh_tbl_lock);
1964 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1965 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1966 			continue;
1967 
1968 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1969 			break;
1970 	}
1971 
1972 	if (tbl == NULL) {
1973 		err = -ENOENT;
1974 		goto errout_locked;
1975 	}
1976 
1977 	/*
1978 	 * We acquire tbl->lock to be nice to the periodic timers and
1979 	 * make sure they always see a consistent set of values.
1980 	 */
1981 	write_lock_bh(&tbl->lock);
1982 
1983 	if (tb[NDTA_PARMS]) {
1984 		struct nlattr *tbp[NDTPA_MAX+1];
1985 		struct neigh_parms *p;
1986 		int i, ifindex = 0;
1987 
1988 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1989 				       nl_ntbl_parm_policy);
1990 		if (err < 0)
1991 			goto errout_tbl_lock;
1992 
1993 		if (tbp[NDTPA_IFINDEX])
1994 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1995 
1996 		p = lookup_neigh_parms(tbl, net, ifindex);
1997 		if (p == NULL) {
1998 			err = -ENOENT;
1999 			goto errout_tbl_lock;
2000 		}
2001 
2002 		for (i = 1; i <= NDTPA_MAX; i++) {
2003 			if (tbp[i] == NULL)
2004 				continue;
2005 
2006 			switch (i) {
2007 			case NDTPA_QUEUE_LEN:
2008 				p->queue_len_bytes = nla_get_u32(tbp[i]) *
2009 						     SKB_TRUESIZE(ETH_FRAME_LEN);
2010 				break;
2011 			case NDTPA_QUEUE_LENBYTES:
2012 				p->queue_len_bytes = nla_get_u32(tbp[i]);
2013 				break;
2014 			case NDTPA_PROXY_QLEN:
2015 				p->proxy_qlen = nla_get_u32(tbp[i]);
2016 				break;
2017 			case NDTPA_APP_PROBES:
2018 				p->app_probes = nla_get_u32(tbp[i]);
2019 				break;
2020 			case NDTPA_UCAST_PROBES:
2021 				p->ucast_probes = nla_get_u32(tbp[i]);
2022 				break;
2023 			case NDTPA_MCAST_PROBES:
2024 				p->mcast_probes = nla_get_u32(tbp[i]);
2025 				break;
2026 			case NDTPA_BASE_REACHABLE_TIME:
2027 				p->base_reachable_time = nla_get_msecs(tbp[i]);
2028 				break;
2029 			case NDTPA_GC_STALETIME:
2030 				p->gc_staletime = nla_get_msecs(tbp[i]);
2031 				break;
2032 			case NDTPA_DELAY_PROBE_TIME:
2033 				p->delay_probe_time = nla_get_msecs(tbp[i]);
2034 				break;
2035 			case NDTPA_RETRANS_TIME:
2036 				p->retrans_time = nla_get_msecs(tbp[i]);
2037 				break;
2038 			case NDTPA_ANYCAST_DELAY:
2039 				p->anycast_delay = nla_get_msecs(tbp[i]);
2040 				break;
2041 			case NDTPA_PROXY_DELAY:
2042 				p->proxy_delay = nla_get_msecs(tbp[i]);
2043 				break;
2044 			case NDTPA_LOCKTIME:
2045 				p->locktime = nla_get_msecs(tbp[i]);
2046 				break;
2047 			}
2048 		}
2049 	}
2050 
2051 	if (tb[NDTA_THRESH1])
2052 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2053 
2054 	if (tb[NDTA_THRESH2])
2055 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2056 
2057 	if (tb[NDTA_THRESH3])
2058 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2059 
2060 	if (tb[NDTA_GC_INTERVAL])
2061 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2062 
2063 	err = 0;
2064 
2065 errout_tbl_lock:
2066 	write_unlock_bh(&tbl->lock);
2067 errout_locked:
2068 	read_unlock(&neigh_tbl_lock);
2069 errout:
2070 	return err;
2071 }
2072 
2073 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2074 {
2075 	struct net *net = sock_net(skb->sk);
2076 	int family, tidx, nidx = 0;
2077 	int tbl_skip = cb->args[0];
2078 	int neigh_skip = cb->args[1];
2079 	struct neigh_table *tbl;
2080 
2081 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2082 
2083 	read_lock(&neigh_tbl_lock);
2084 	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2085 		struct neigh_parms *p;
2086 
2087 		if (tidx < tbl_skip || (family && tbl->family != family))
2088 			continue;
2089 
2090 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2091 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2092 				       NLM_F_MULTI) <= 0)
2093 			break;
2094 
2095 		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2096 			if (!net_eq(neigh_parms_net(p), net))
2097 				continue;
2098 
2099 			if (nidx < neigh_skip)
2100 				goto next;
2101 
2102 			if (neightbl_fill_param_info(skb, tbl, p,
2103 						     NETLINK_CB(cb->skb).pid,
2104 						     cb->nlh->nlmsg_seq,
2105 						     RTM_NEWNEIGHTBL,
2106 						     NLM_F_MULTI) <= 0)
2107 				goto out;
2108 		next:
2109 			nidx++;
2110 		}
2111 
2112 		neigh_skip = 0;
2113 	}
2114 out:
2115 	read_unlock(&neigh_tbl_lock);
2116 	cb->args[0] = tidx;
2117 	cb->args[1] = nidx;
2118 
2119 	return skb->len;
2120 }
2121 
2122 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2123 			   u32 pid, u32 seq, int type, unsigned int flags)
2124 {
2125 	unsigned long now = jiffies;
2126 	struct nda_cacheinfo ci;
2127 	struct nlmsghdr *nlh;
2128 	struct ndmsg *ndm;
2129 
2130 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2131 	if (nlh == NULL)
2132 		return -EMSGSIZE;
2133 
2134 	ndm = nlmsg_data(nlh);
2135 	ndm->ndm_family	 = neigh->ops->family;
2136 	ndm->ndm_pad1    = 0;
2137 	ndm->ndm_pad2    = 0;
2138 	ndm->ndm_flags	 = neigh->flags;
2139 	ndm->ndm_type	 = neigh->type;
2140 	ndm->ndm_ifindex = neigh->dev->ifindex;
2141 
2142 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2143 		goto nla_put_failure;
2144 
2145 	read_lock_bh(&neigh->lock);
2146 	ndm->ndm_state	 = neigh->nud_state;
2147 	if (neigh->nud_state & NUD_VALID) {
2148 		char haddr[MAX_ADDR_LEN];
2149 
2150 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2151 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2152 			read_unlock_bh(&neigh->lock);
2153 			goto nla_put_failure;
2154 		}
2155 	}
2156 
2157 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2158 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2159 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2160 	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
2161 	read_unlock_bh(&neigh->lock);
2162 
2163 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2164 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2165 		goto nla_put_failure;
2166 
2167 	return nlmsg_end(skb, nlh);
2168 
2169 nla_put_failure:
2170 	nlmsg_cancel(skb, nlh);
2171 	return -EMSGSIZE;
2172 }
2173 
2174 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2175 			    u32 pid, u32 seq, int type, unsigned int flags,
2176 			    struct neigh_table *tbl)
2177 {
2178 	struct nlmsghdr *nlh;
2179 	struct ndmsg *ndm;
2180 
2181 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2182 	if (nlh == NULL)
2183 		return -EMSGSIZE;
2184 
2185 	ndm = nlmsg_data(nlh);
2186 	ndm->ndm_family	 = tbl->family;
2187 	ndm->ndm_pad1    = 0;
2188 	ndm->ndm_pad2    = 0;
2189 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2190 	ndm->ndm_type	 = NDA_DST;
2191 	ndm->ndm_ifindex = pn->dev->ifindex;
2192 	ndm->ndm_state	 = NUD_NONE;
2193 
2194 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2195 		goto nla_put_failure;
2196 
2197 	return nlmsg_end(skb, nlh);
2198 
2199 nla_put_failure:
2200 	nlmsg_cancel(skb, nlh);
2201 	return -EMSGSIZE;
2202 }
2203 
2204 static void neigh_update_notify(struct neighbour *neigh)
2205 {
2206 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2207 	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
2208 }
2209 
2210 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2211 			    struct netlink_callback *cb)
2212 {
2213 	struct net *net = sock_net(skb->sk);
2214 	struct neighbour *n;
2215 	int rc, h, s_h = cb->args[1];
2216 	int idx, s_idx = idx = cb->args[2];
2217 	struct neigh_hash_table *nht;
2218 
2219 	rcu_read_lock_bh();
2220 	nht = rcu_dereference_bh(tbl->nht);
2221 
2222 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2223 		if (h > s_h)
2224 			s_idx = 0;
2225 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2226 		     n != NULL;
2227 		     n = rcu_dereference_bh(n->next)) {
2228 			if (!net_eq(dev_net(n->dev), net))
2229 				continue;
2230 			if (idx < s_idx)
2231 				goto next;
2232 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2233 					    cb->nlh->nlmsg_seq,
2234 					    RTM_NEWNEIGH,
2235 					    NLM_F_MULTI) <= 0) {
2236 				rc = -1;
2237 				goto out;
2238 			}
2239 next:
2240 			idx++;
2241 		}
2242 	}
2243 	rc = skb->len;
2244 out:
2245 	rcu_read_unlock_bh();
2246 	cb->args[1] = h;
2247 	cb->args[2] = idx;
2248 	return rc;
2249 }
2250 
2251 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2252 			     struct netlink_callback *cb)
2253 {
2254 	struct pneigh_entry *n;
2255 	struct net *net = sock_net(skb->sk);
2256 	int rc, h, s_h = cb->args[3];
2257 	int idx, s_idx = idx = cb->args[4];
2258 
2259 	read_lock_bh(&tbl->lock);
2260 
2261 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2262 		if (h > s_h)
2263 			s_idx = 0;
2264 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2265 			if (dev_net(n->dev) != net)
2266 				continue;
2267 			if (idx < s_idx)
2268 				goto next;
2269 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2270 					    cb->nlh->nlmsg_seq,
2271 					    RTM_NEWNEIGH,
2272 					    NLM_F_MULTI, tbl) <= 0) {
2273 				read_unlock_bh(&tbl->lock);
2274 				rc = -1;
2275 				goto out;
2276 			}
2277 		next:
2278 			idx++;
2279 		}
2280 	}
2281 
2282 	read_unlock_bh(&tbl->lock);
2283 	rc = skb->len;
2284 out:
2285 	cb->args[3] = h;
2286 	cb->args[4] = idx;
2287 	return rc;
2288 
2289 }
2290 
2291 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2292 {
2293 	struct neigh_table *tbl;
2294 	int t, family, s_t;
2295 	int proxy = 0;
2296 	int err;
2297 
2298 	read_lock(&neigh_tbl_lock);
2299 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2300 
2301 	/* check for full ndmsg structure presence, family member is
2302 	 * the same for both structures
2303 	 */
2304 	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2305 	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2306 		proxy = 1;
2307 
2308 	s_t = cb->args[0];
2309 
2310 	for (tbl = neigh_tables, t = 0; tbl;
2311 	     tbl = tbl->next, t++) {
2312 		if (t < s_t || (family && tbl->family != family))
2313 			continue;
2314 		if (t > s_t)
2315 			memset(&cb->args[1], 0, sizeof(cb->args) -
2316 						sizeof(cb->args[0]));
2317 		if (proxy)
2318 			err = pneigh_dump_table(tbl, skb, cb);
2319 		else
2320 			err = neigh_dump_table(tbl, skb, cb);
2321 		if (err < 0)
2322 			break;
2323 	}
2324 	read_unlock(&neigh_tbl_lock);
2325 
2326 	cb->args[0] = t;
2327 	return skb->len;
2328 }
2329 
2330 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2331 {
2332 	int chain;
2333 	struct neigh_hash_table *nht;
2334 
2335 	rcu_read_lock_bh();
2336 	nht = rcu_dereference_bh(tbl->nht);
2337 
2338 	read_lock(&tbl->lock); /* avoid resizes */
2339 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2340 		struct neighbour *n;
2341 
2342 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2343 		     n != NULL;
2344 		     n = rcu_dereference_bh(n->next))
2345 			cb(n, cookie);
2346 	}
2347 	read_unlock(&tbl->lock);
2348 	rcu_read_unlock_bh();
2349 }
2350 EXPORT_SYMBOL(neigh_for_each);
2351 
2352 /* The tbl->lock must be held as a writer and BH disabled. */
2353 void __neigh_for_each_release(struct neigh_table *tbl,
2354 			      int (*cb)(struct neighbour *))
2355 {
2356 	int chain;
2357 	struct neigh_hash_table *nht;
2358 
2359 	nht = rcu_dereference_protected(tbl->nht,
2360 					lockdep_is_held(&tbl->lock));
2361 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2362 		struct neighbour *n;
2363 		struct neighbour __rcu **np;
2364 
2365 		np = &nht->hash_buckets[chain];
2366 		while ((n = rcu_dereference_protected(*np,
2367 					lockdep_is_held(&tbl->lock))) != NULL) {
2368 			int release;
2369 
2370 			write_lock(&n->lock);
2371 			release = cb(n);
2372 			if (release) {
2373 				rcu_assign_pointer(*np,
2374 					rcu_dereference_protected(n->next,
2375 						lockdep_is_held(&tbl->lock)));
2376 				n->dead = 1;
2377 			} else
2378 				np = &n->next;
2379 			write_unlock(&n->lock);
2380 			if (release)
2381 				neigh_cleanup_and_release(n);
2382 		}
2383 	}
2384 }
2385 EXPORT_SYMBOL(__neigh_for_each_release);
2386 
2387 #ifdef CONFIG_PROC_FS
2388 
2389 static struct neighbour *neigh_get_first(struct seq_file *seq)
2390 {
2391 	struct neigh_seq_state *state = seq->private;
2392 	struct net *net = seq_file_net(seq);
2393 	struct neigh_hash_table *nht = state->nht;
2394 	struct neighbour *n = NULL;
2395 	int bucket = state->bucket;
2396 
2397 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2398 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2399 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2400 
2401 		while (n) {
2402 			if (!net_eq(dev_net(n->dev), net))
2403 				goto next;
2404 			if (state->neigh_sub_iter) {
2405 				loff_t fakep = 0;
2406 				void *v;
2407 
2408 				v = state->neigh_sub_iter(state, n, &fakep);
2409 				if (!v)
2410 					goto next;
2411 			}
2412 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2413 				break;
2414 			if (n->nud_state & ~NUD_NOARP)
2415 				break;
2416 next:
2417 			n = rcu_dereference_bh(n->next);
2418 		}
2419 
2420 		if (n)
2421 			break;
2422 	}
2423 	state->bucket = bucket;
2424 
2425 	return n;
2426 }
2427 
2428 static struct neighbour *neigh_get_next(struct seq_file *seq,
2429 					struct neighbour *n,
2430 					loff_t *pos)
2431 {
2432 	struct neigh_seq_state *state = seq->private;
2433 	struct net *net = seq_file_net(seq);
2434 	struct neigh_hash_table *nht = state->nht;
2435 
2436 	if (state->neigh_sub_iter) {
2437 		void *v = state->neigh_sub_iter(state, n, pos);
2438 		if (v)
2439 			return n;
2440 	}
2441 	n = rcu_dereference_bh(n->next);
2442 
2443 	while (1) {
2444 		while (n) {
2445 			if (!net_eq(dev_net(n->dev), net))
2446 				goto next;
2447 			if (state->neigh_sub_iter) {
2448 				void *v = state->neigh_sub_iter(state, n, pos);
2449 				if (v)
2450 					return n;
2451 				goto next;
2452 			}
2453 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2454 				break;
2455 
2456 			if (n->nud_state & ~NUD_NOARP)
2457 				break;
2458 next:
2459 			n = rcu_dereference_bh(n->next);
2460 		}
2461 
2462 		if (n)
2463 			break;
2464 
2465 		if (++state->bucket >= (1 << nht->hash_shift))
2466 			break;
2467 
2468 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2469 	}
2470 
2471 	if (n && pos)
2472 		--(*pos);
2473 	return n;
2474 }
2475 
2476 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2477 {
2478 	struct neighbour *n = neigh_get_first(seq);
2479 
2480 	if (n) {
2481 		--(*pos);
2482 		while (*pos) {
2483 			n = neigh_get_next(seq, n, pos);
2484 			if (!n)
2485 				break;
2486 		}
2487 	}
2488 	return *pos ? NULL : n;
2489 }
2490 
2491 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2492 {
2493 	struct neigh_seq_state *state = seq->private;
2494 	struct net *net = seq_file_net(seq);
2495 	struct neigh_table *tbl = state->tbl;
2496 	struct pneigh_entry *pn = NULL;
2497 	int bucket = state->bucket;
2498 
2499 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2500 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2501 		pn = tbl->phash_buckets[bucket];
2502 		while (pn && !net_eq(pneigh_net(pn), net))
2503 			pn = pn->next;
2504 		if (pn)
2505 			break;
2506 	}
2507 	state->bucket = bucket;
2508 
2509 	return pn;
2510 }
2511 
2512 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2513 					    struct pneigh_entry *pn,
2514 					    loff_t *pos)
2515 {
2516 	struct neigh_seq_state *state = seq->private;
2517 	struct net *net = seq_file_net(seq);
2518 	struct neigh_table *tbl = state->tbl;
2519 
2520 	do {
2521 		pn = pn->next;
2522 	} while (pn && !net_eq(pneigh_net(pn), net));
2523 
2524 	while (!pn) {
2525 		if (++state->bucket > PNEIGH_HASHMASK)
2526 			break;
2527 		pn = tbl->phash_buckets[state->bucket];
2528 		while (pn && !net_eq(pneigh_net(pn), net))
2529 			pn = pn->next;
2530 		if (pn)
2531 			break;
2532 	}
2533 
2534 	if (pn && pos)
2535 		--(*pos);
2536 
2537 	return pn;
2538 }
2539 
2540 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2541 {
2542 	struct pneigh_entry *pn = pneigh_get_first(seq);
2543 
2544 	if (pn) {
2545 		--(*pos);
2546 		while (*pos) {
2547 			pn = pneigh_get_next(seq, pn, pos);
2548 			if (!pn)
2549 				break;
2550 		}
2551 	}
2552 	return *pos ? NULL : pn;
2553 }
2554 
2555 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2556 {
2557 	struct neigh_seq_state *state = seq->private;
2558 	void *rc;
2559 	loff_t idxpos = *pos;
2560 
2561 	rc = neigh_get_idx(seq, &idxpos);
2562 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2563 		rc = pneigh_get_idx(seq, &idxpos);
2564 
2565 	return rc;
2566 }
2567 
2568 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2569 	__acquires(rcu_bh)
2570 {
2571 	struct neigh_seq_state *state = seq->private;
2572 
2573 	state->tbl = tbl;
2574 	state->bucket = 0;
2575 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2576 
2577 	rcu_read_lock_bh();
2578 	state->nht = rcu_dereference_bh(tbl->nht);
2579 
2580 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2581 }
2582 EXPORT_SYMBOL(neigh_seq_start);
2583 
2584 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2585 {
2586 	struct neigh_seq_state *state;
2587 	void *rc;
2588 
2589 	if (v == SEQ_START_TOKEN) {
2590 		rc = neigh_get_first(seq);
2591 		goto out;
2592 	}
2593 
2594 	state = seq->private;
2595 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2596 		rc = neigh_get_next(seq, v, NULL);
2597 		if (rc)
2598 			goto out;
2599 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2600 			rc = pneigh_get_first(seq);
2601 	} else {
2602 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2603 		rc = pneigh_get_next(seq, v, NULL);
2604 	}
2605 out:
2606 	++(*pos);
2607 	return rc;
2608 }
2609 EXPORT_SYMBOL(neigh_seq_next);
2610 
2611 void neigh_seq_stop(struct seq_file *seq, void *v)
2612 	__releases(rcu_bh)
2613 {
2614 	rcu_read_unlock_bh();
2615 }
2616 EXPORT_SYMBOL(neigh_seq_stop);
2617 
2618 /* statistics via seq_file */
2619 
2620 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2621 {
2622 	struct neigh_table *tbl = seq->private;
2623 	int cpu;
2624 
2625 	if (*pos == 0)
2626 		return SEQ_START_TOKEN;
2627 
2628 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2629 		if (!cpu_possible(cpu))
2630 			continue;
2631 		*pos = cpu+1;
2632 		return per_cpu_ptr(tbl->stats, cpu);
2633 	}
2634 	return NULL;
2635 }
2636 
2637 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2638 {
2639 	struct neigh_table *tbl = seq->private;
2640 	int cpu;
2641 
2642 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2643 		if (!cpu_possible(cpu))
2644 			continue;
2645 		*pos = cpu+1;
2646 		return per_cpu_ptr(tbl->stats, cpu);
2647 	}
2648 	return NULL;
2649 }
2650 
2651 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2652 {
2653 
2654 }
2655 
2656 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2657 {
2658 	struct neigh_table *tbl = seq->private;
2659 	struct neigh_statistics *st = v;
2660 
2661 	if (v == SEQ_START_TOKEN) {
2662 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2663 		return 0;
2664 	}
2665 
2666 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2667 			"%08lx %08lx  %08lx %08lx %08lx\n",
2668 		   atomic_read(&tbl->entries),
2669 
2670 		   st->allocs,
2671 		   st->destroys,
2672 		   st->hash_grows,
2673 
2674 		   st->lookups,
2675 		   st->hits,
2676 
2677 		   st->res_failed,
2678 
2679 		   st->rcv_probes_mcast,
2680 		   st->rcv_probes_ucast,
2681 
2682 		   st->periodic_gc_runs,
2683 		   st->forced_gc_runs,
2684 		   st->unres_discards
2685 		   );
2686 
2687 	return 0;
2688 }
2689 
2690 static const struct seq_operations neigh_stat_seq_ops = {
2691 	.start	= neigh_stat_seq_start,
2692 	.next	= neigh_stat_seq_next,
2693 	.stop	= neigh_stat_seq_stop,
2694 	.show	= neigh_stat_seq_show,
2695 };
2696 
2697 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2698 {
2699 	int ret = seq_open(file, &neigh_stat_seq_ops);
2700 
2701 	if (!ret) {
2702 		struct seq_file *sf = file->private_data;
2703 		sf->private = PDE(inode)->data;
2704 	}
2705 	return ret;
2706 };
2707 
2708 static const struct file_operations neigh_stat_seq_fops = {
2709 	.owner	 = THIS_MODULE,
2710 	.open 	 = neigh_stat_seq_open,
2711 	.read	 = seq_read,
2712 	.llseek	 = seq_lseek,
2713 	.release = seq_release,
2714 };
2715 
2716 #endif /* CONFIG_PROC_FS */
2717 
2718 static inline size_t neigh_nlmsg_size(void)
2719 {
2720 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2721 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2722 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2723 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2724 	       + nla_total_size(4); /* NDA_PROBES */
2725 }
2726 
2727 static void __neigh_notify(struct neighbour *n, int type, int flags)
2728 {
2729 	struct net *net = dev_net(n->dev);
2730 	struct sk_buff *skb;
2731 	int err = -ENOBUFS;
2732 
2733 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2734 	if (skb == NULL)
2735 		goto errout;
2736 
2737 	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2738 	if (err < 0) {
2739 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2740 		WARN_ON(err == -EMSGSIZE);
2741 		kfree_skb(skb);
2742 		goto errout;
2743 	}
2744 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2745 	return;
2746 errout:
2747 	if (err < 0)
2748 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2749 }
2750 
2751 #ifdef CONFIG_ARPD
2752 void neigh_app_ns(struct neighbour *n)
2753 {
2754 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2755 }
2756 EXPORT_SYMBOL(neigh_app_ns);
2757 #endif /* CONFIG_ARPD */
2758 
2759 #ifdef CONFIG_SYSCTL
2760 
2761 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2762 			   size_t *lenp, loff_t *ppos)
2763 {
2764 	int size, ret;
2765 	ctl_table tmp = *ctl;
2766 
2767 	tmp.data = &size;
2768 	size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2769 	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2770 	if (write && !ret)
2771 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2772 	return ret;
2773 }
2774 
2775 enum {
2776 	NEIGH_VAR_MCAST_PROBE,
2777 	NEIGH_VAR_UCAST_PROBE,
2778 	NEIGH_VAR_APP_PROBE,
2779 	NEIGH_VAR_RETRANS_TIME,
2780 	NEIGH_VAR_BASE_REACHABLE_TIME,
2781 	NEIGH_VAR_DELAY_PROBE_TIME,
2782 	NEIGH_VAR_GC_STALETIME,
2783 	NEIGH_VAR_QUEUE_LEN,
2784 	NEIGH_VAR_QUEUE_LEN_BYTES,
2785 	NEIGH_VAR_PROXY_QLEN,
2786 	NEIGH_VAR_ANYCAST_DELAY,
2787 	NEIGH_VAR_PROXY_DELAY,
2788 	NEIGH_VAR_LOCKTIME,
2789 	NEIGH_VAR_RETRANS_TIME_MS,
2790 	NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2791 	NEIGH_VAR_GC_INTERVAL,
2792 	NEIGH_VAR_GC_THRESH1,
2793 	NEIGH_VAR_GC_THRESH2,
2794 	NEIGH_VAR_GC_THRESH3,
2795 	NEIGH_VAR_MAX
2796 };
2797 
2798 static struct neigh_sysctl_table {
2799 	struct ctl_table_header *sysctl_header;
2800 	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2801 } neigh_sysctl_template __read_mostly = {
2802 	.neigh_vars = {
2803 		[NEIGH_VAR_MCAST_PROBE] = {
2804 			.procname	= "mcast_solicit",
2805 			.maxlen		= sizeof(int),
2806 			.mode		= 0644,
2807 			.proc_handler	= proc_dointvec,
2808 		},
2809 		[NEIGH_VAR_UCAST_PROBE] = {
2810 			.procname	= "ucast_solicit",
2811 			.maxlen		= sizeof(int),
2812 			.mode		= 0644,
2813 			.proc_handler	= proc_dointvec,
2814 		},
2815 		[NEIGH_VAR_APP_PROBE] = {
2816 			.procname	= "app_solicit",
2817 			.maxlen		= sizeof(int),
2818 			.mode		= 0644,
2819 			.proc_handler	= proc_dointvec,
2820 		},
2821 		[NEIGH_VAR_RETRANS_TIME] = {
2822 			.procname	= "retrans_time",
2823 			.maxlen		= sizeof(int),
2824 			.mode		= 0644,
2825 			.proc_handler	= proc_dointvec_userhz_jiffies,
2826 		},
2827 		[NEIGH_VAR_BASE_REACHABLE_TIME] = {
2828 			.procname	= "base_reachable_time",
2829 			.maxlen		= sizeof(int),
2830 			.mode		= 0644,
2831 			.proc_handler	= proc_dointvec_jiffies,
2832 		},
2833 		[NEIGH_VAR_DELAY_PROBE_TIME] = {
2834 			.procname	= "delay_first_probe_time",
2835 			.maxlen		= sizeof(int),
2836 			.mode		= 0644,
2837 			.proc_handler	= proc_dointvec_jiffies,
2838 		},
2839 		[NEIGH_VAR_GC_STALETIME] = {
2840 			.procname	= "gc_stale_time",
2841 			.maxlen		= sizeof(int),
2842 			.mode		= 0644,
2843 			.proc_handler	= proc_dointvec_jiffies,
2844 		},
2845 		[NEIGH_VAR_QUEUE_LEN] = {
2846 			.procname	= "unres_qlen",
2847 			.maxlen		= sizeof(int),
2848 			.mode		= 0644,
2849 			.proc_handler	= proc_unres_qlen,
2850 		},
2851 		[NEIGH_VAR_QUEUE_LEN_BYTES] = {
2852 			.procname	= "unres_qlen_bytes",
2853 			.maxlen		= sizeof(int),
2854 			.mode		= 0644,
2855 			.proc_handler	= proc_dointvec,
2856 		},
2857 		[NEIGH_VAR_PROXY_QLEN] = {
2858 			.procname	= "proxy_qlen",
2859 			.maxlen		= sizeof(int),
2860 			.mode		= 0644,
2861 			.proc_handler	= proc_dointvec,
2862 		},
2863 		[NEIGH_VAR_ANYCAST_DELAY] = {
2864 			.procname	= "anycast_delay",
2865 			.maxlen		= sizeof(int),
2866 			.mode		= 0644,
2867 			.proc_handler	= proc_dointvec_userhz_jiffies,
2868 		},
2869 		[NEIGH_VAR_PROXY_DELAY] = {
2870 			.procname	= "proxy_delay",
2871 			.maxlen		= sizeof(int),
2872 			.mode		= 0644,
2873 			.proc_handler	= proc_dointvec_userhz_jiffies,
2874 		},
2875 		[NEIGH_VAR_LOCKTIME] = {
2876 			.procname	= "locktime",
2877 			.maxlen		= sizeof(int),
2878 			.mode		= 0644,
2879 			.proc_handler	= proc_dointvec_userhz_jiffies,
2880 		},
2881 		[NEIGH_VAR_RETRANS_TIME_MS] = {
2882 			.procname	= "retrans_time_ms",
2883 			.maxlen		= sizeof(int),
2884 			.mode		= 0644,
2885 			.proc_handler	= proc_dointvec_ms_jiffies,
2886 		},
2887 		[NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2888 			.procname	= "base_reachable_time_ms",
2889 			.maxlen		= sizeof(int),
2890 			.mode		= 0644,
2891 			.proc_handler	= proc_dointvec_ms_jiffies,
2892 		},
2893 		[NEIGH_VAR_GC_INTERVAL] = {
2894 			.procname	= "gc_interval",
2895 			.maxlen		= sizeof(int),
2896 			.mode		= 0644,
2897 			.proc_handler	= proc_dointvec_jiffies,
2898 		},
2899 		[NEIGH_VAR_GC_THRESH1] = {
2900 			.procname	= "gc_thresh1",
2901 			.maxlen		= sizeof(int),
2902 			.mode		= 0644,
2903 			.proc_handler	= proc_dointvec,
2904 		},
2905 		[NEIGH_VAR_GC_THRESH2] = {
2906 			.procname	= "gc_thresh2",
2907 			.maxlen		= sizeof(int),
2908 			.mode		= 0644,
2909 			.proc_handler	= proc_dointvec,
2910 		},
2911 		[NEIGH_VAR_GC_THRESH3] = {
2912 			.procname	= "gc_thresh3",
2913 			.maxlen		= sizeof(int),
2914 			.mode		= 0644,
2915 			.proc_handler	= proc_dointvec,
2916 		},
2917 		{},
2918 	},
2919 };
2920 
2921 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2922 			  char *p_name, proc_handler *handler)
2923 {
2924 	struct neigh_sysctl_table *t;
2925 	const char *dev_name_source = NULL;
2926 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2927 
2928 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2929 	if (!t)
2930 		goto err;
2931 
2932 	t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2933 	t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2934 	t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2935 	t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2936 	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2937 	t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2938 	t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2939 	t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2940 	t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2941 	t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2942 	t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2943 	t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2944 	t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2945 	t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2946 	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2947 
2948 	if (dev) {
2949 		dev_name_source = dev->name;
2950 		/* Terminate the table early */
2951 		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2952 		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2953 	} else {
2954 		dev_name_source = "default";
2955 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2956 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2957 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2958 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2959 	}
2960 
2961 
2962 	if (handler) {
2963 		/* RetransTime */
2964 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2965 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2966 		/* ReachableTime */
2967 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2968 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2969 		/* RetransTime (in milliseconds)*/
2970 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2971 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2972 		/* ReachableTime (in milliseconds) */
2973 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2974 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2975 	}
2976 
2977 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2978 		p_name, dev_name_source);
2979 	t->sysctl_header =
2980 		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2981 	if (!t->sysctl_header)
2982 		goto free;
2983 
2984 	p->sysctl_table = t;
2985 	return 0;
2986 
2987 free:
2988 	kfree(t);
2989 err:
2990 	return -ENOBUFS;
2991 }
2992 EXPORT_SYMBOL(neigh_sysctl_register);
2993 
2994 void neigh_sysctl_unregister(struct neigh_parms *p)
2995 {
2996 	if (p->sysctl_table) {
2997 		struct neigh_sysctl_table *t = p->sysctl_table;
2998 		p->sysctl_table = NULL;
2999 		unregister_net_sysctl_table(t->sysctl_header);
3000 		kfree(t);
3001 	}
3002 }
3003 EXPORT_SYMBOL(neigh_sysctl_unregister);
3004 
3005 #endif	/* CONFIG_SYSCTL */
3006 
3007 static int __init neigh_init(void)
3008 {
3009 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3010 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3011 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3012 
3013 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3014 		      NULL);
3015 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3016 
3017 	return 0;
3018 }
3019 
3020 subsys_initcall(neigh_init);
3021 
3022