xref: /linux/net/core/neighbour.c (revision f2ee442115c9b6219083c019939a9cc0c9abb2f8)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/module.h>
22 #include <linux/socket.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/net_namespace.h>
30 #include <net/neighbour.h>
31 #include <net/dst.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 
40 #define NEIGH_DEBUG 1
41 
42 #define NEIGH_PRINTK(x...) printk(x)
43 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46 
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
55 
56 #define PNEIGH_HASHMASK		0xF
57 
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62 
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
67 
68 /*
69    Neighbour hash table buckets are protected with rwlock tbl->lock.
70 
71    - All the scans/updates to hash buckets MUST be made under this lock.
72    - NOTHING clever should be made under this lock: no callbacks
73      to protocol backends, no attempts to send something to network.
74      It will result in deadlocks, if backend/driver wants to use neighbour
75      cache.
76    - If the entry requires some non-trivial actions, increase
77      its reference count and release table lock.
78 
79    Neighbour entries are protected:
80    - with reference count.
81    - with rwlock neigh->lock
82 
83    Reference count prevents destruction.
84 
85    neigh->lock mainly serializes ll address data and its validity state.
86    However, the same lock is used to protect another entry fields:
87     - timer
88     - resolution queue
89 
90    Again, nothing clever shall be made under neigh->lock,
91    the most complicated procedure, which we allow is dev->hard_header.
92    It is supposed, that dev->hard_header is simplistic and does
93    not make callbacks to neighbour tables.
94 
95    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96    list of neighbour tables. This list is used only in process context,
97  */
98 
99 static DEFINE_RWLOCK(neigh_tbl_lock);
100 
101 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 {
103 	kfree_skb(skb);
104 	return -ENETDOWN;
105 }
106 
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
108 {
109 	if (neigh->parms->neigh_cleanup)
110 		neigh->parms->neigh_cleanup(neigh);
111 
112 	__neigh_notify(neigh, RTM_DELNEIGH, 0);
113 	neigh_release(neigh);
114 }
115 
116 /*
117  * It is random distribution in the interval (1/2)*base...(3/2)*base.
118  * It corresponds to default IPv6 settings and is not overridable,
119  * because it is really reasonable choice.
120  */
121 
122 unsigned long neigh_rand_reach_time(unsigned long base)
123 {
124 	return base ? (net_random() % base) + (base >> 1) : 0;
125 }
126 EXPORT_SYMBOL(neigh_rand_reach_time);
127 
128 
129 static int neigh_forced_gc(struct neigh_table *tbl)
130 {
131 	int shrunk = 0;
132 	int i;
133 	struct neigh_hash_table *nht;
134 
135 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136 
137 	write_lock_bh(&tbl->lock);
138 	nht = rcu_dereference_protected(tbl->nht,
139 					lockdep_is_held(&tbl->lock));
140 	for (i = 0; i < (1 << nht->hash_shift); i++) {
141 		struct neighbour *n;
142 		struct neighbour __rcu **np;
143 
144 		np = &nht->hash_buckets[i];
145 		while ((n = rcu_dereference_protected(*np,
146 					lockdep_is_held(&tbl->lock))) != NULL) {
147 			/* Neighbour record may be discarded if:
148 			 * - nobody refers to it.
149 			 * - it is not permanent
150 			 */
151 			write_lock(&n->lock);
152 			if (atomic_read(&n->refcnt) == 1 &&
153 			    !(n->nud_state & NUD_PERMANENT)) {
154 				rcu_assign_pointer(*np,
155 					rcu_dereference_protected(n->next,
156 						  lockdep_is_held(&tbl->lock)));
157 				n->dead = 1;
158 				shrunk	= 1;
159 				write_unlock(&n->lock);
160 				neigh_cleanup_and_release(n);
161 				continue;
162 			}
163 			write_unlock(&n->lock);
164 			np = &n->next;
165 		}
166 	}
167 
168 	tbl->last_flush = jiffies;
169 
170 	write_unlock_bh(&tbl->lock);
171 
172 	return shrunk;
173 }
174 
175 static void neigh_add_timer(struct neighbour *n, unsigned long when)
176 {
177 	neigh_hold(n);
178 	if (unlikely(mod_timer(&n->timer, when))) {
179 		printk("NEIGH: BUG, double timer add, state is %x\n",
180 		       n->nud_state);
181 		dump_stack();
182 	}
183 }
184 
185 static int neigh_del_timer(struct neighbour *n)
186 {
187 	if ((n->nud_state & NUD_IN_TIMER) &&
188 	    del_timer(&n->timer)) {
189 		neigh_release(n);
190 		return 1;
191 	}
192 	return 0;
193 }
194 
195 static void pneigh_queue_purge(struct sk_buff_head *list)
196 {
197 	struct sk_buff *skb;
198 
199 	while ((skb = skb_dequeue(list)) != NULL) {
200 		dev_put(skb->dev);
201 		kfree_skb(skb);
202 	}
203 }
204 
205 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
206 {
207 	int i;
208 	struct neigh_hash_table *nht;
209 
210 	nht = rcu_dereference_protected(tbl->nht,
211 					lockdep_is_held(&tbl->lock));
212 
213 	for (i = 0; i < (1 << nht->hash_shift); i++) {
214 		struct neighbour *n;
215 		struct neighbour __rcu **np = &nht->hash_buckets[i];
216 
217 		while ((n = rcu_dereference_protected(*np,
218 					lockdep_is_held(&tbl->lock))) != NULL) {
219 			if (dev && n->dev != dev) {
220 				np = &n->next;
221 				continue;
222 			}
223 			rcu_assign_pointer(*np,
224 				   rcu_dereference_protected(n->next,
225 						lockdep_is_held(&tbl->lock)));
226 			write_lock(&n->lock);
227 			neigh_del_timer(n);
228 			n->dead = 1;
229 
230 			if (atomic_read(&n->refcnt) != 1) {
231 				/* The most unpleasant situation.
232 				   We must destroy neighbour entry,
233 				   but someone still uses it.
234 
235 				   The destroy will be delayed until
236 				   the last user releases us, but
237 				   we must kill timers etc. and move
238 				   it to safe state.
239 				 */
240 				skb_queue_purge(&n->arp_queue);
241 				n->output = neigh_blackhole;
242 				if (n->nud_state & NUD_VALID)
243 					n->nud_state = NUD_NOARP;
244 				else
245 					n->nud_state = NUD_NONE;
246 				NEIGH_PRINTK2("neigh %p is stray.\n", n);
247 			}
248 			write_unlock(&n->lock);
249 			neigh_cleanup_and_release(n);
250 		}
251 	}
252 }
253 
254 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
255 {
256 	write_lock_bh(&tbl->lock);
257 	neigh_flush_dev(tbl, dev);
258 	write_unlock_bh(&tbl->lock);
259 }
260 EXPORT_SYMBOL(neigh_changeaddr);
261 
262 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
263 {
264 	write_lock_bh(&tbl->lock);
265 	neigh_flush_dev(tbl, dev);
266 	pneigh_ifdown(tbl, dev);
267 	write_unlock_bh(&tbl->lock);
268 
269 	del_timer_sync(&tbl->proxy_timer);
270 	pneigh_queue_purge(&tbl->proxy_queue);
271 	return 0;
272 }
273 EXPORT_SYMBOL(neigh_ifdown);
274 
275 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
276 {
277 	struct neighbour *n = NULL;
278 	unsigned long now = jiffies;
279 	int entries;
280 
281 	entries = atomic_inc_return(&tbl->entries) - 1;
282 	if (entries >= tbl->gc_thresh3 ||
283 	    (entries >= tbl->gc_thresh2 &&
284 	     time_after(now, tbl->last_flush + 5 * HZ))) {
285 		if (!neigh_forced_gc(tbl) &&
286 		    entries >= tbl->gc_thresh3)
287 			goto out_entries;
288 	}
289 
290 	n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
291 	if (!n)
292 		goto out_entries;
293 
294 	skb_queue_head_init(&n->arp_queue);
295 	rwlock_init(&n->lock);
296 	seqlock_init(&n->ha_lock);
297 	n->updated	  = n->used = now;
298 	n->nud_state	  = NUD_NONE;
299 	n->output	  = neigh_blackhole;
300 	seqlock_init(&n->hh.hh_lock);
301 	n->parms	  = neigh_parms_clone(&tbl->parms);
302 	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
303 
304 	NEIGH_CACHE_STAT_INC(tbl, allocs);
305 	n->tbl		  = tbl;
306 	atomic_set(&n->refcnt, 1);
307 	n->dead		  = 1;
308 out:
309 	return n;
310 
311 out_entries:
312 	atomic_dec(&tbl->entries);
313 	goto out;
314 }
315 
316 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
317 {
318 	size_t size = (1 << shift) * sizeof(struct neighbour *);
319 	struct neigh_hash_table *ret;
320 	struct neighbour __rcu **buckets;
321 
322 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
323 	if (!ret)
324 		return NULL;
325 	if (size <= PAGE_SIZE)
326 		buckets = kzalloc(size, GFP_ATOMIC);
327 	else
328 		buckets = (struct neighbour __rcu **)
329 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
330 					   get_order(size));
331 	if (!buckets) {
332 		kfree(ret);
333 		return NULL;
334 	}
335 	ret->hash_buckets = buckets;
336 	ret->hash_shift = shift;
337 	get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
338 	ret->hash_rnd |= 1;
339 	return ret;
340 }
341 
342 static void neigh_hash_free_rcu(struct rcu_head *head)
343 {
344 	struct neigh_hash_table *nht = container_of(head,
345 						    struct neigh_hash_table,
346 						    rcu);
347 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
348 	struct neighbour __rcu **buckets = nht->hash_buckets;
349 
350 	if (size <= PAGE_SIZE)
351 		kfree(buckets);
352 	else
353 		free_pages((unsigned long)buckets, get_order(size));
354 	kfree(nht);
355 }
356 
357 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
358 						unsigned long new_shift)
359 {
360 	unsigned int i, hash;
361 	struct neigh_hash_table *new_nht, *old_nht;
362 
363 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
364 
365 	old_nht = rcu_dereference_protected(tbl->nht,
366 					    lockdep_is_held(&tbl->lock));
367 	new_nht = neigh_hash_alloc(new_shift);
368 	if (!new_nht)
369 		return old_nht;
370 
371 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
372 		struct neighbour *n, *next;
373 
374 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
375 						   lockdep_is_held(&tbl->lock));
376 		     n != NULL;
377 		     n = next) {
378 			hash = tbl->hash(n->primary_key, n->dev,
379 					 new_nht->hash_rnd);
380 
381 			hash >>= (32 - new_nht->hash_shift);
382 			next = rcu_dereference_protected(n->next,
383 						lockdep_is_held(&tbl->lock));
384 
385 			rcu_assign_pointer(n->next,
386 					   rcu_dereference_protected(
387 						new_nht->hash_buckets[hash],
388 						lockdep_is_held(&tbl->lock)));
389 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
390 		}
391 	}
392 
393 	rcu_assign_pointer(tbl->nht, new_nht);
394 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
395 	return new_nht;
396 }
397 
398 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
399 			       struct net_device *dev)
400 {
401 	struct neighbour *n;
402 	int key_len = tbl->key_len;
403 	u32 hash_val;
404 	struct neigh_hash_table *nht;
405 
406 	NEIGH_CACHE_STAT_INC(tbl, lookups);
407 
408 	rcu_read_lock_bh();
409 	nht = rcu_dereference_bh(tbl->nht);
410 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
411 
412 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
413 	     n != NULL;
414 	     n = rcu_dereference_bh(n->next)) {
415 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
416 			if (!atomic_inc_not_zero(&n->refcnt))
417 				n = NULL;
418 			NEIGH_CACHE_STAT_INC(tbl, hits);
419 			break;
420 		}
421 	}
422 
423 	rcu_read_unlock_bh();
424 	return n;
425 }
426 EXPORT_SYMBOL(neigh_lookup);
427 
428 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
429 				     const void *pkey)
430 {
431 	struct neighbour *n;
432 	int key_len = tbl->key_len;
433 	u32 hash_val;
434 	struct neigh_hash_table *nht;
435 
436 	NEIGH_CACHE_STAT_INC(tbl, lookups);
437 
438 	rcu_read_lock_bh();
439 	nht = rcu_dereference_bh(tbl->nht);
440 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
441 
442 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
443 	     n != NULL;
444 	     n = rcu_dereference_bh(n->next)) {
445 		if (!memcmp(n->primary_key, pkey, key_len) &&
446 		    net_eq(dev_net(n->dev), net)) {
447 			if (!atomic_inc_not_zero(&n->refcnt))
448 				n = NULL;
449 			NEIGH_CACHE_STAT_INC(tbl, hits);
450 			break;
451 		}
452 	}
453 
454 	rcu_read_unlock_bh();
455 	return n;
456 }
457 EXPORT_SYMBOL(neigh_lookup_nodev);
458 
459 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
460 			       struct net_device *dev)
461 {
462 	u32 hash_val;
463 	int key_len = tbl->key_len;
464 	int error;
465 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
466 	struct neigh_hash_table *nht;
467 
468 	if (!n) {
469 		rc = ERR_PTR(-ENOBUFS);
470 		goto out;
471 	}
472 
473 	memcpy(n->primary_key, pkey, key_len);
474 	n->dev = dev;
475 	dev_hold(dev);
476 
477 	/* Protocol specific setup. */
478 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
479 		rc = ERR_PTR(error);
480 		goto out_neigh_release;
481 	}
482 
483 	/* Device specific setup. */
484 	if (n->parms->neigh_setup &&
485 	    (error = n->parms->neigh_setup(n)) < 0) {
486 		rc = ERR_PTR(error);
487 		goto out_neigh_release;
488 	}
489 
490 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
491 
492 	write_lock_bh(&tbl->lock);
493 	nht = rcu_dereference_protected(tbl->nht,
494 					lockdep_is_held(&tbl->lock));
495 
496 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
497 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
498 
499 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
500 
501 	if (n->parms->dead) {
502 		rc = ERR_PTR(-EINVAL);
503 		goto out_tbl_unlock;
504 	}
505 
506 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
507 					    lockdep_is_held(&tbl->lock));
508 	     n1 != NULL;
509 	     n1 = rcu_dereference_protected(n1->next,
510 			lockdep_is_held(&tbl->lock))) {
511 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
512 			neigh_hold(n1);
513 			rc = n1;
514 			goto out_tbl_unlock;
515 		}
516 	}
517 
518 	n->dead = 0;
519 	neigh_hold(n);
520 	rcu_assign_pointer(n->next,
521 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
522 						     lockdep_is_held(&tbl->lock)));
523 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
524 	write_unlock_bh(&tbl->lock);
525 	NEIGH_PRINTK2("neigh %p is created.\n", n);
526 	rc = n;
527 out:
528 	return rc;
529 out_tbl_unlock:
530 	write_unlock_bh(&tbl->lock);
531 out_neigh_release:
532 	neigh_release(n);
533 	goto out;
534 }
535 EXPORT_SYMBOL(neigh_create);
536 
537 static u32 pneigh_hash(const void *pkey, int key_len)
538 {
539 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
540 	hash_val ^= (hash_val >> 16);
541 	hash_val ^= hash_val >> 8;
542 	hash_val ^= hash_val >> 4;
543 	hash_val &= PNEIGH_HASHMASK;
544 	return hash_val;
545 }
546 
547 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
548 					      struct net *net,
549 					      const void *pkey,
550 					      int key_len,
551 					      struct net_device *dev)
552 {
553 	while (n) {
554 		if (!memcmp(n->key, pkey, key_len) &&
555 		    net_eq(pneigh_net(n), net) &&
556 		    (n->dev == dev || !n->dev))
557 			return n;
558 		n = n->next;
559 	}
560 	return NULL;
561 }
562 
563 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
564 		struct net *net, const void *pkey, struct net_device *dev)
565 {
566 	int key_len = tbl->key_len;
567 	u32 hash_val = pneigh_hash(pkey, key_len);
568 
569 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
570 				 net, pkey, key_len, dev);
571 }
572 EXPORT_SYMBOL_GPL(__pneigh_lookup);
573 
574 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
575 				    struct net *net, const void *pkey,
576 				    struct net_device *dev, int creat)
577 {
578 	struct pneigh_entry *n;
579 	int key_len = tbl->key_len;
580 	u32 hash_val = pneigh_hash(pkey, key_len);
581 
582 	read_lock_bh(&tbl->lock);
583 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
584 			      net, pkey, key_len, dev);
585 	read_unlock_bh(&tbl->lock);
586 
587 	if (n || !creat)
588 		goto out;
589 
590 	ASSERT_RTNL();
591 
592 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
593 	if (!n)
594 		goto out;
595 
596 	write_pnet(&n->net, hold_net(net));
597 	memcpy(n->key, pkey, key_len);
598 	n->dev = dev;
599 	if (dev)
600 		dev_hold(dev);
601 
602 	if (tbl->pconstructor && tbl->pconstructor(n)) {
603 		if (dev)
604 			dev_put(dev);
605 		release_net(net);
606 		kfree(n);
607 		n = NULL;
608 		goto out;
609 	}
610 
611 	write_lock_bh(&tbl->lock);
612 	n->next = tbl->phash_buckets[hash_val];
613 	tbl->phash_buckets[hash_val] = n;
614 	write_unlock_bh(&tbl->lock);
615 out:
616 	return n;
617 }
618 EXPORT_SYMBOL(pneigh_lookup);
619 
620 
621 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
622 		  struct net_device *dev)
623 {
624 	struct pneigh_entry *n, **np;
625 	int key_len = tbl->key_len;
626 	u32 hash_val = pneigh_hash(pkey, key_len);
627 
628 	write_lock_bh(&tbl->lock);
629 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
630 	     np = &n->next) {
631 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
632 		    net_eq(pneigh_net(n), net)) {
633 			*np = n->next;
634 			write_unlock_bh(&tbl->lock);
635 			if (tbl->pdestructor)
636 				tbl->pdestructor(n);
637 			if (n->dev)
638 				dev_put(n->dev);
639 			release_net(pneigh_net(n));
640 			kfree(n);
641 			return 0;
642 		}
643 	}
644 	write_unlock_bh(&tbl->lock);
645 	return -ENOENT;
646 }
647 
648 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
649 {
650 	struct pneigh_entry *n, **np;
651 	u32 h;
652 
653 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
654 		np = &tbl->phash_buckets[h];
655 		while ((n = *np) != NULL) {
656 			if (!dev || n->dev == dev) {
657 				*np = n->next;
658 				if (tbl->pdestructor)
659 					tbl->pdestructor(n);
660 				if (n->dev)
661 					dev_put(n->dev);
662 				release_net(pneigh_net(n));
663 				kfree(n);
664 				continue;
665 			}
666 			np = &n->next;
667 		}
668 	}
669 	return -ENOENT;
670 }
671 
672 static void neigh_parms_destroy(struct neigh_parms *parms);
673 
674 static inline void neigh_parms_put(struct neigh_parms *parms)
675 {
676 	if (atomic_dec_and_test(&parms->refcnt))
677 		neigh_parms_destroy(parms);
678 }
679 
680 static void neigh_destroy_rcu(struct rcu_head *head)
681 {
682 	struct neighbour *neigh = container_of(head, struct neighbour, rcu);
683 
684 	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
685 }
686 /*
687  *	neighbour must already be out of the table;
688  *
689  */
690 void neigh_destroy(struct neighbour *neigh)
691 {
692 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
693 
694 	if (!neigh->dead) {
695 		printk(KERN_WARNING
696 		       "Destroying alive neighbour %p\n", neigh);
697 		dump_stack();
698 		return;
699 	}
700 
701 	if (neigh_del_timer(neigh))
702 		printk(KERN_WARNING "Impossible event.\n");
703 
704 	skb_queue_purge(&neigh->arp_queue);
705 
706 	dev_put(neigh->dev);
707 	neigh_parms_put(neigh->parms);
708 
709 	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
710 
711 	atomic_dec(&neigh->tbl->entries);
712 	call_rcu(&neigh->rcu, neigh_destroy_rcu);
713 }
714 EXPORT_SYMBOL(neigh_destroy);
715 
716 /* Neighbour state is suspicious;
717    disable fast path.
718 
719    Called with write_locked neigh.
720  */
721 static void neigh_suspect(struct neighbour *neigh)
722 {
723 	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
724 
725 	neigh->output = neigh->ops->output;
726 }
727 
728 /* Neighbour state is OK;
729    enable fast path.
730 
731    Called with write_locked neigh.
732  */
733 static void neigh_connect(struct neighbour *neigh)
734 {
735 	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
736 
737 	neigh->output = neigh->ops->connected_output;
738 }
739 
740 static void neigh_periodic_work(struct work_struct *work)
741 {
742 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
743 	struct neighbour *n;
744 	struct neighbour __rcu **np;
745 	unsigned int i;
746 	struct neigh_hash_table *nht;
747 
748 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
749 
750 	write_lock_bh(&tbl->lock);
751 	nht = rcu_dereference_protected(tbl->nht,
752 					lockdep_is_held(&tbl->lock));
753 
754 	/*
755 	 *	periodically recompute ReachableTime from random function
756 	 */
757 
758 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
759 		struct neigh_parms *p;
760 		tbl->last_rand = jiffies;
761 		for (p = &tbl->parms; p; p = p->next)
762 			p->reachable_time =
763 				neigh_rand_reach_time(p->base_reachable_time);
764 	}
765 
766 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
767 		np = &nht->hash_buckets[i];
768 
769 		while ((n = rcu_dereference_protected(*np,
770 				lockdep_is_held(&tbl->lock))) != NULL) {
771 			unsigned int state;
772 
773 			write_lock(&n->lock);
774 
775 			state = n->nud_state;
776 			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
777 				write_unlock(&n->lock);
778 				goto next_elt;
779 			}
780 
781 			if (time_before(n->used, n->confirmed))
782 				n->used = n->confirmed;
783 
784 			if (atomic_read(&n->refcnt) == 1 &&
785 			    (state == NUD_FAILED ||
786 			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
787 				*np = n->next;
788 				n->dead = 1;
789 				write_unlock(&n->lock);
790 				neigh_cleanup_and_release(n);
791 				continue;
792 			}
793 			write_unlock(&n->lock);
794 
795 next_elt:
796 			np = &n->next;
797 		}
798 		/*
799 		 * It's fine to release lock here, even if hash table
800 		 * grows while we are preempted.
801 		 */
802 		write_unlock_bh(&tbl->lock);
803 		cond_resched();
804 		write_lock_bh(&tbl->lock);
805 	}
806 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
807 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
808 	 * base_reachable_time.
809 	 */
810 	schedule_delayed_work(&tbl->gc_work,
811 			      tbl->parms.base_reachable_time >> 1);
812 	write_unlock_bh(&tbl->lock);
813 }
814 
815 static __inline__ int neigh_max_probes(struct neighbour *n)
816 {
817 	struct neigh_parms *p = n->parms;
818 	return (n->nud_state & NUD_PROBE) ?
819 		p->ucast_probes :
820 		p->ucast_probes + p->app_probes + p->mcast_probes;
821 }
822 
823 static void neigh_invalidate(struct neighbour *neigh)
824 	__releases(neigh->lock)
825 	__acquires(neigh->lock)
826 {
827 	struct sk_buff *skb;
828 
829 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
830 	NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
831 	neigh->updated = jiffies;
832 
833 	/* It is very thin place. report_unreachable is very complicated
834 	   routine. Particularly, it can hit the same neighbour entry!
835 
836 	   So that, we try to be accurate and avoid dead loop. --ANK
837 	 */
838 	while (neigh->nud_state == NUD_FAILED &&
839 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
840 		write_unlock(&neigh->lock);
841 		neigh->ops->error_report(neigh, skb);
842 		write_lock(&neigh->lock);
843 	}
844 	skb_queue_purge(&neigh->arp_queue);
845 }
846 
847 static void neigh_probe(struct neighbour *neigh)
848 	__releases(neigh->lock)
849 {
850 	struct sk_buff *skb = skb_peek(&neigh->arp_queue);
851 	/* keep skb alive even if arp_queue overflows */
852 	if (skb)
853 		skb = skb_copy(skb, GFP_ATOMIC);
854 	write_unlock(&neigh->lock);
855 	neigh->ops->solicit(neigh, skb);
856 	atomic_inc(&neigh->probes);
857 	kfree_skb(skb);
858 }
859 
860 /* Called when a timer expires for a neighbour entry. */
861 
862 static void neigh_timer_handler(unsigned long arg)
863 {
864 	unsigned long now, next;
865 	struct neighbour *neigh = (struct neighbour *)arg;
866 	unsigned state;
867 	int notify = 0;
868 
869 	write_lock(&neigh->lock);
870 
871 	state = neigh->nud_state;
872 	now = jiffies;
873 	next = now + HZ;
874 
875 	if (!(state & NUD_IN_TIMER))
876 		goto out;
877 
878 	if (state & NUD_REACHABLE) {
879 		if (time_before_eq(now,
880 				   neigh->confirmed + neigh->parms->reachable_time)) {
881 			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
882 			next = neigh->confirmed + neigh->parms->reachable_time;
883 		} else if (time_before_eq(now,
884 					  neigh->used + neigh->parms->delay_probe_time)) {
885 			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
886 			neigh->nud_state = NUD_DELAY;
887 			neigh->updated = jiffies;
888 			neigh_suspect(neigh);
889 			next = now + neigh->parms->delay_probe_time;
890 		} else {
891 			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
892 			neigh->nud_state = NUD_STALE;
893 			neigh->updated = jiffies;
894 			neigh_suspect(neigh);
895 			notify = 1;
896 		}
897 	} else if (state & NUD_DELAY) {
898 		if (time_before_eq(now,
899 				   neigh->confirmed + neigh->parms->delay_probe_time)) {
900 			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
901 			neigh->nud_state = NUD_REACHABLE;
902 			neigh->updated = jiffies;
903 			neigh_connect(neigh);
904 			notify = 1;
905 			next = neigh->confirmed + neigh->parms->reachable_time;
906 		} else {
907 			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
908 			neigh->nud_state = NUD_PROBE;
909 			neigh->updated = jiffies;
910 			atomic_set(&neigh->probes, 0);
911 			next = now + neigh->parms->retrans_time;
912 		}
913 	} else {
914 		/* NUD_PROBE|NUD_INCOMPLETE */
915 		next = now + neigh->parms->retrans_time;
916 	}
917 
918 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
919 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
920 		neigh->nud_state = NUD_FAILED;
921 		notify = 1;
922 		neigh_invalidate(neigh);
923 	}
924 
925 	if (neigh->nud_state & NUD_IN_TIMER) {
926 		if (time_before(next, jiffies + HZ/2))
927 			next = jiffies + HZ/2;
928 		if (!mod_timer(&neigh->timer, next))
929 			neigh_hold(neigh);
930 	}
931 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
932 		neigh_probe(neigh);
933 	} else {
934 out:
935 		write_unlock(&neigh->lock);
936 	}
937 
938 	if (notify)
939 		neigh_update_notify(neigh);
940 
941 	neigh_release(neigh);
942 }
943 
944 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
945 {
946 	int rc;
947 	bool immediate_probe = false;
948 
949 	write_lock_bh(&neigh->lock);
950 
951 	rc = 0;
952 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
953 		goto out_unlock_bh;
954 
955 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
956 		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
957 			unsigned long next, now = jiffies;
958 
959 			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
960 			neigh->nud_state     = NUD_INCOMPLETE;
961 			neigh->updated = now;
962 			next = now + max(neigh->parms->retrans_time, HZ/2);
963 			neigh_add_timer(neigh, next);
964 			immediate_probe = true;
965 		} else {
966 			neigh->nud_state = NUD_FAILED;
967 			neigh->updated = jiffies;
968 			write_unlock_bh(&neigh->lock);
969 
970 			kfree_skb(skb);
971 			return 1;
972 		}
973 	} else if (neigh->nud_state & NUD_STALE) {
974 		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
975 		neigh->nud_state = NUD_DELAY;
976 		neigh->updated = jiffies;
977 		neigh_add_timer(neigh,
978 				jiffies + neigh->parms->delay_probe_time);
979 	}
980 
981 	if (neigh->nud_state == NUD_INCOMPLETE) {
982 		if (skb) {
983 			if (skb_queue_len(&neigh->arp_queue) >=
984 			    neigh->parms->queue_len) {
985 				struct sk_buff *buff;
986 				buff = __skb_dequeue(&neigh->arp_queue);
987 				kfree_skb(buff);
988 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
989 			}
990 			skb_dst_force(skb);
991 			__skb_queue_tail(&neigh->arp_queue, skb);
992 		}
993 		rc = 1;
994 	}
995 out_unlock_bh:
996 	if (immediate_probe)
997 		neigh_probe(neigh);
998 	else
999 		write_unlock(&neigh->lock);
1000 	local_bh_enable();
1001 	return rc;
1002 }
1003 EXPORT_SYMBOL(__neigh_event_send);
1004 
1005 static void neigh_update_hhs(struct neighbour *neigh)
1006 {
1007 	struct hh_cache *hh;
1008 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1009 		= NULL;
1010 
1011 	if (neigh->dev->header_ops)
1012 		update = neigh->dev->header_ops->cache_update;
1013 
1014 	if (update) {
1015 		hh = &neigh->hh;
1016 		if (hh->hh_len) {
1017 			write_seqlock_bh(&hh->hh_lock);
1018 			update(hh, neigh->dev, neigh->ha);
1019 			write_sequnlock_bh(&hh->hh_lock);
1020 		}
1021 	}
1022 }
1023 
1024 
1025 
1026 /* Generic update routine.
1027    -- lladdr is new lladdr or NULL, if it is not supplied.
1028    -- new    is new state.
1029    -- flags
1030 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1031 				if it is different.
1032 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1033 				lladdr instead of overriding it
1034 				if it is different.
1035 				It also allows to retain current state
1036 				if lladdr is unchanged.
1037 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1038 
1039 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1040 				NTF_ROUTER flag.
1041 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1042 				a router.
1043 
1044    Caller MUST hold reference count on the entry.
1045  */
1046 
1047 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1048 		 u32 flags)
1049 {
1050 	u8 old;
1051 	int err;
1052 	int notify = 0;
1053 	struct net_device *dev;
1054 	int update_isrouter = 0;
1055 
1056 	write_lock_bh(&neigh->lock);
1057 
1058 	dev    = neigh->dev;
1059 	old    = neigh->nud_state;
1060 	err    = -EPERM;
1061 
1062 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1063 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1064 		goto out;
1065 
1066 	if (!(new & NUD_VALID)) {
1067 		neigh_del_timer(neigh);
1068 		if (old & NUD_CONNECTED)
1069 			neigh_suspect(neigh);
1070 		neigh->nud_state = new;
1071 		err = 0;
1072 		notify = old & NUD_VALID;
1073 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1074 		    (new & NUD_FAILED)) {
1075 			neigh_invalidate(neigh);
1076 			notify = 1;
1077 		}
1078 		goto out;
1079 	}
1080 
1081 	/* Compare new lladdr with cached one */
1082 	if (!dev->addr_len) {
1083 		/* First case: device needs no address. */
1084 		lladdr = neigh->ha;
1085 	} else if (lladdr) {
1086 		/* The second case: if something is already cached
1087 		   and a new address is proposed:
1088 		   - compare new & old
1089 		   - if they are different, check override flag
1090 		 */
1091 		if ((old & NUD_VALID) &&
1092 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1093 			lladdr = neigh->ha;
1094 	} else {
1095 		/* No address is supplied; if we know something,
1096 		   use it, otherwise discard the request.
1097 		 */
1098 		err = -EINVAL;
1099 		if (!(old & NUD_VALID))
1100 			goto out;
1101 		lladdr = neigh->ha;
1102 	}
1103 
1104 	if (new & NUD_CONNECTED)
1105 		neigh->confirmed = jiffies;
1106 	neigh->updated = jiffies;
1107 
1108 	/* If entry was valid and address is not changed,
1109 	   do not change entry state, if new one is STALE.
1110 	 */
1111 	err = 0;
1112 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1113 	if (old & NUD_VALID) {
1114 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1115 			update_isrouter = 0;
1116 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1117 			    (old & NUD_CONNECTED)) {
1118 				lladdr = neigh->ha;
1119 				new = NUD_STALE;
1120 			} else
1121 				goto out;
1122 		} else {
1123 			if (lladdr == neigh->ha && new == NUD_STALE &&
1124 			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1125 			     (old & NUD_CONNECTED))
1126 			    )
1127 				new = old;
1128 		}
1129 	}
1130 
1131 	if (new != old) {
1132 		neigh_del_timer(neigh);
1133 		if (new & NUD_IN_TIMER)
1134 			neigh_add_timer(neigh, (jiffies +
1135 						((new & NUD_REACHABLE) ?
1136 						 neigh->parms->reachable_time :
1137 						 0)));
1138 		neigh->nud_state = new;
1139 	}
1140 
1141 	if (lladdr != neigh->ha) {
1142 		write_seqlock(&neigh->ha_lock);
1143 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1144 		write_sequnlock(&neigh->ha_lock);
1145 		neigh_update_hhs(neigh);
1146 		if (!(new & NUD_CONNECTED))
1147 			neigh->confirmed = jiffies -
1148 				      (neigh->parms->base_reachable_time << 1);
1149 		notify = 1;
1150 	}
1151 	if (new == old)
1152 		goto out;
1153 	if (new & NUD_CONNECTED)
1154 		neigh_connect(neigh);
1155 	else
1156 		neigh_suspect(neigh);
1157 	if (!(old & NUD_VALID)) {
1158 		struct sk_buff *skb;
1159 
1160 		/* Again: avoid dead loop if something went wrong */
1161 
1162 		while (neigh->nud_state & NUD_VALID &&
1163 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1164 			struct dst_entry *dst = skb_dst(skb);
1165 			struct neighbour *n2, *n1 = neigh;
1166 			write_unlock_bh(&neigh->lock);
1167 
1168 			rcu_read_lock();
1169 			/* On shaper/eql skb->dst->neighbour != neigh :( */
1170 			if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
1171 				n1 = n2;
1172 			n1->output(n1, skb);
1173 			rcu_read_unlock();
1174 
1175 			write_lock_bh(&neigh->lock);
1176 		}
1177 		skb_queue_purge(&neigh->arp_queue);
1178 	}
1179 out:
1180 	if (update_isrouter) {
1181 		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1182 			(neigh->flags | NTF_ROUTER) :
1183 			(neigh->flags & ~NTF_ROUTER);
1184 	}
1185 	write_unlock_bh(&neigh->lock);
1186 
1187 	if (notify)
1188 		neigh_update_notify(neigh);
1189 
1190 	return err;
1191 }
1192 EXPORT_SYMBOL(neigh_update);
1193 
1194 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1195 				 u8 *lladdr, void *saddr,
1196 				 struct net_device *dev)
1197 {
1198 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1199 						 lladdr || !dev->addr_len);
1200 	if (neigh)
1201 		neigh_update(neigh, lladdr, NUD_STALE,
1202 			     NEIGH_UPDATE_F_OVERRIDE);
1203 	return neigh;
1204 }
1205 EXPORT_SYMBOL(neigh_event_ns);
1206 
1207 /* called with read_lock_bh(&n->lock); */
1208 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1209 {
1210 	struct net_device *dev = dst->dev;
1211 	__be16 prot = dst->ops->protocol;
1212 	struct hh_cache	*hh = &n->hh;
1213 
1214 	write_lock_bh(&n->lock);
1215 
1216 	/* Only one thread can come in here and initialize the
1217 	 * hh_cache entry.
1218 	 */
1219 	if (!hh->hh_len)
1220 		dev->header_ops->cache(n, hh, prot);
1221 
1222 	write_unlock_bh(&n->lock);
1223 }
1224 
1225 /* This function can be used in contexts, where only old dev_queue_xmit
1226  * worked, f.e. if you want to override normal output path (eql, shaper),
1227  * but resolution is not made yet.
1228  */
1229 
1230 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1231 {
1232 	struct net_device *dev = skb->dev;
1233 
1234 	__skb_pull(skb, skb_network_offset(skb));
1235 
1236 	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1237 			    skb->len) < 0 &&
1238 	    dev->header_ops->rebuild(skb))
1239 		return 0;
1240 
1241 	return dev_queue_xmit(skb);
1242 }
1243 EXPORT_SYMBOL(neigh_compat_output);
1244 
1245 /* Slow and careful. */
1246 
1247 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1248 {
1249 	struct dst_entry *dst = skb_dst(skb);
1250 	int rc = 0;
1251 
1252 	if (!dst)
1253 		goto discard;
1254 
1255 	__skb_pull(skb, skb_network_offset(skb));
1256 
1257 	if (!neigh_event_send(neigh, skb)) {
1258 		int err;
1259 		struct net_device *dev = neigh->dev;
1260 		unsigned int seq;
1261 
1262 		if (dev->header_ops->cache && !neigh->hh.hh_len)
1263 			neigh_hh_init(neigh, dst);
1264 
1265 		do {
1266 			seq = read_seqbegin(&neigh->ha_lock);
1267 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1268 					      neigh->ha, NULL, skb->len);
1269 		} while (read_seqretry(&neigh->ha_lock, seq));
1270 
1271 		if (err >= 0)
1272 			rc = dev_queue_xmit(skb);
1273 		else
1274 			goto out_kfree_skb;
1275 	}
1276 out:
1277 	return rc;
1278 discard:
1279 	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1280 		      dst, neigh);
1281 out_kfree_skb:
1282 	rc = -EINVAL;
1283 	kfree_skb(skb);
1284 	goto out;
1285 }
1286 EXPORT_SYMBOL(neigh_resolve_output);
1287 
1288 /* As fast as possible without hh cache */
1289 
1290 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1291 {
1292 	struct net_device *dev = neigh->dev;
1293 	unsigned int seq;
1294 	int err;
1295 
1296 	__skb_pull(skb, skb_network_offset(skb));
1297 
1298 	do {
1299 		seq = read_seqbegin(&neigh->ha_lock);
1300 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1301 				      neigh->ha, NULL, skb->len);
1302 	} while (read_seqretry(&neigh->ha_lock, seq));
1303 
1304 	if (err >= 0)
1305 		err = dev_queue_xmit(skb);
1306 	else {
1307 		err = -EINVAL;
1308 		kfree_skb(skb);
1309 	}
1310 	return err;
1311 }
1312 EXPORT_SYMBOL(neigh_connected_output);
1313 
1314 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1315 {
1316 	return dev_queue_xmit(skb);
1317 }
1318 EXPORT_SYMBOL(neigh_direct_output);
1319 
1320 static void neigh_proxy_process(unsigned long arg)
1321 {
1322 	struct neigh_table *tbl = (struct neigh_table *)arg;
1323 	long sched_next = 0;
1324 	unsigned long now = jiffies;
1325 	struct sk_buff *skb, *n;
1326 
1327 	spin_lock(&tbl->proxy_queue.lock);
1328 
1329 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1330 		long tdif = NEIGH_CB(skb)->sched_next - now;
1331 
1332 		if (tdif <= 0) {
1333 			struct net_device *dev = skb->dev;
1334 
1335 			__skb_unlink(skb, &tbl->proxy_queue);
1336 			if (tbl->proxy_redo && netif_running(dev)) {
1337 				rcu_read_lock();
1338 				tbl->proxy_redo(skb);
1339 				rcu_read_unlock();
1340 			} else {
1341 				kfree_skb(skb);
1342 			}
1343 
1344 			dev_put(dev);
1345 		} else if (!sched_next || tdif < sched_next)
1346 			sched_next = tdif;
1347 	}
1348 	del_timer(&tbl->proxy_timer);
1349 	if (sched_next)
1350 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1351 	spin_unlock(&tbl->proxy_queue.lock);
1352 }
1353 
1354 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1355 		    struct sk_buff *skb)
1356 {
1357 	unsigned long now = jiffies;
1358 	unsigned long sched_next = now + (net_random() % p->proxy_delay);
1359 
1360 	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1361 		kfree_skb(skb);
1362 		return;
1363 	}
1364 
1365 	NEIGH_CB(skb)->sched_next = sched_next;
1366 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1367 
1368 	spin_lock(&tbl->proxy_queue.lock);
1369 	if (del_timer(&tbl->proxy_timer)) {
1370 		if (time_before(tbl->proxy_timer.expires, sched_next))
1371 			sched_next = tbl->proxy_timer.expires;
1372 	}
1373 	skb_dst_drop(skb);
1374 	dev_hold(skb->dev);
1375 	__skb_queue_tail(&tbl->proxy_queue, skb);
1376 	mod_timer(&tbl->proxy_timer, sched_next);
1377 	spin_unlock(&tbl->proxy_queue.lock);
1378 }
1379 EXPORT_SYMBOL(pneigh_enqueue);
1380 
1381 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1382 						      struct net *net, int ifindex)
1383 {
1384 	struct neigh_parms *p;
1385 
1386 	for (p = &tbl->parms; p; p = p->next) {
1387 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1388 		    (!p->dev && !ifindex))
1389 			return p;
1390 	}
1391 
1392 	return NULL;
1393 }
1394 
1395 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1396 				      struct neigh_table *tbl)
1397 {
1398 	struct neigh_parms *p, *ref;
1399 	struct net *net = dev_net(dev);
1400 	const struct net_device_ops *ops = dev->netdev_ops;
1401 
1402 	ref = lookup_neigh_parms(tbl, net, 0);
1403 	if (!ref)
1404 		return NULL;
1405 
1406 	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1407 	if (p) {
1408 		p->tbl		  = tbl;
1409 		atomic_set(&p->refcnt, 1);
1410 		p->reachable_time =
1411 				neigh_rand_reach_time(p->base_reachable_time);
1412 
1413 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1414 			kfree(p);
1415 			return NULL;
1416 		}
1417 
1418 		dev_hold(dev);
1419 		p->dev = dev;
1420 		write_pnet(&p->net, hold_net(net));
1421 		p->sysctl_table = NULL;
1422 		write_lock_bh(&tbl->lock);
1423 		p->next		= tbl->parms.next;
1424 		tbl->parms.next = p;
1425 		write_unlock_bh(&tbl->lock);
1426 	}
1427 	return p;
1428 }
1429 EXPORT_SYMBOL(neigh_parms_alloc);
1430 
1431 static void neigh_rcu_free_parms(struct rcu_head *head)
1432 {
1433 	struct neigh_parms *parms =
1434 		container_of(head, struct neigh_parms, rcu_head);
1435 
1436 	neigh_parms_put(parms);
1437 }
1438 
1439 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1440 {
1441 	struct neigh_parms **p;
1442 
1443 	if (!parms || parms == &tbl->parms)
1444 		return;
1445 	write_lock_bh(&tbl->lock);
1446 	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1447 		if (*p == parms) {
1448 			*p = parms->next;
1449 			parms->dead = 1;
1450 			write_unlock_bh(&tbl->lock);
1451 			if (parms->dev)
1452 				dev_put(parms->dev);
1453 			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1454 			return;
1455 		}
1456 	}
1457 	write_unlock_bh(&tbl->lock);
1458 	NEIGH_PRINTK1("neigh_parms_release: not found\n");
1459 }
1460 EXPORT_SYMBOL(neigh_parms_release);
1461 
1462 static void neigh_parms_destroy(struct neigh_parms *parms)
1463 {
1464 	release_net(neigh_parms_net(parms));
1465 	kfree(parms);
1466 }
1467 
1468 static struct lock_class_key neigh_table_proxy_queue_class;
1469 
1470 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1471 {
1472 	unsigned long now = jiffies;
1473 	unsigned long phsize;
1474 
1475 	write_pnet(&tbl->parms.net, &init_net);
1476 	atomic_set(&tbl->parms.refcnt, 1);
1477 	tbl->parms.reachable_time =
1478 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
1479 
1480 	if (!tbl->kmem_cachep)
1481 		tbl->kmem_cachep =
1482 			kmem_cache_create(tbl->id, tbl->entry_size, 0,
1483 					  SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1484 					  NULL);
1485 	tbl->stats = alloc_percpu(struct neigh_statistics);
1486 	if (!tbl->stats)
1487 		panic("cannot create neighbour cache statistics");
1488 
1489 #ifdef CONFIG_PROC_FS
1490 	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1491 			      &neigh_stat_seq_fops, tbl))
1492 		panic("cannot create neighbour proc dir entry");
1493 #endif
1494 
1495 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1496 
1497 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1498 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1499 
1500 	if (!tbl->nht || !tbl->phash_buckets)
1501 		panic("cannot allocate neighbour cache hashes");
1502 
1503 	rwlock_init(&tbl->lock);
1504 	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1505 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1506 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1507 	skb_queue_head_init_class(&tbl->proxy_queue,
1508 			&neigh_table_proxy_queue_class);
1509 
1510 	tbl->last_flush = now;
1511 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1512 }
1513 EXPORT_SYMBOL(neigh_table_init_no_netlink);
1514 
1515 void neigh_table_init(struct neigh_table *tbl)
1516 {
1517 	struct neigh_table *tmp;
1518 
1519 	neigh_table_init_no_netlink(tbl);
1520 	write_lock(&neigh_tbl_lock);
1521 	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1522 		if (tmp->family == tbl->family)
1523 			break;
1524 	}
1525 	tbl->next	= neigh_tables;
1526 	neigh_tables	= tbl;
1527 	write_unlock(&neigh_tbl_lock);
1528 
1529 	if (unlikely(tmp)) {
1530 		printk(KERN_ERR "NEIGH: Registering multiple tables for "
1531 		       "family %d\n", tbl->family);
1532 		dump_stack();
1533 	}
1534 }
1535 EXPORT_SYMBOL(neigh_table_init);
1536 
1537 int neigh_table_clear(struct neigh_table *tbl)
1538 {
1539 	struct neigh_table **tp;
1540 
1541 	/* It is not clean... Fix it to unload IPv6 module safely */
1542 	cancel_delayed_work_sync(&tbl->gc_work);
1543 	del_timer_sync(&tbl->proxy_timer);
1544 	pneigh_queue_purge(&tbl->proxy_queue);
1545 	neigh_ifdown(tbl, NULL);
1546 	if (atomic_read(&tbl->entries))
1547 		printk(KERN_CRIT "neighbour leakage\n");
1548 	write_lock(&neigh_tbl_lock);
1549 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1550 		if (*tp == tbl) {
1551 			*tp = tbl->next;
1552 			break;
1553 		}
1554 	}
1555 	write_unlock(&neigh_tbl_lock);
1556 
1557 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1558 		 neigh_hash_free_rcu);
1559 	tbl->nht = NULL;
1560 
1561 	kfree(tbl->phash_buckets);
1562 	tbl->phash_buckets = NULL;
1563 
1564 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1565 
1566 	free_percpu(tbl->stats);
1567 	tbl->stats = NULL;
1568 
1569 	kmem_cache_destroy(tbl->kmem_cachep);
1570 	tbl->kmem_cachep = NULL;
1571 
1572 	return 0;
1573 }
1574 EXPORT_SYMBOL(neigh_table_clear);
1575 
1576 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1577 {
1578 	struct net *net = sock_net(skb->sk);
1579 	struct ndmsg *ndm;
1580 	struct nlattr *dst_attr;
1581 	struct neigh_table *tbl;
1582 	struct net_device *dev = NULL;
1583 	int err = -EINVAL;
1584 
1585 	ASSERT_RTNL();
1586 	if (nlmsg_len(nlh) < sizeof(*ndm))
1587 		goto out;
1588 
1589 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1590 	if (dst_attr == NULL)
1591 		goto out;
1592 
1593 	ndm = nlmsg_data(nlh);
1594 	if (ndm->ndm_ifindex) {
1595 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1596 		if (dev == NULL) {
1597 			err = -ENODEV;
1598 			goto out;
1599 		}
1600 	}
1601 
1602 	read_lock(&neigh_tbl_lock);
1603 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1604 		struct neighbour *neigh;
1605 
1606 		if (tbl->family != ndm->ndm_family)
1607 			continue;
1608 		read_unlock(&neigh_tbl_lock);
1609 
1610 		if (nla_len(dst_attr) < tbl->key_len)
1611 			goto out;
1612 
1613 		if (ndm->ndm_flags & NTF_PROXY) {
1614 			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1615 			goto out;
1616 		}
1617 
1618 		if (dev == NULL)
1619 			goto out;
1620 
1621 		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1622 		if (neigh == NULL) {
1623 			err = -ENOENT;
1624 			goto out;
1625 		}
1626 
1627 		err = neigh_update(neigh, NULL, NUD_FAILED,
1628 				   NEIGH_UPDATE_F_OVERRIDE |
1629 				   NEIGH_UPDATE_F_ADMIN);
1630 		neigh_release(neigh);
1631 		goto out;
1632 	}
1633 	read_unlock(&neigh_tbl_lock);
1634 	err = -EAFNOSUPPORT;
1635 
1636 out:
1637 	return err;
1638 }
1639 
1640 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1641 {
1642 	struct net *net = sock_net(skb->sk);
1643 	struct ndmsg *ndm;
1644 	struct nlattr *tb[NDA_MAX+1];
1645 	struct neigh_table *tbl;
1646 	struct net_device *dev = NULL;
1647 	int err;
1648 
1649 	ASSERT_RTNL();
1650 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1651 	if (err < 0)
1652 		goto out;
1653 
1654 	err = -EINVAL;
1655 	if (tb[NDA_DST] == NULL)
1656 		goto out;
1657 
1658 	ndm = nlmsg_data(nlh);
1659 	if (ndm->ndm_ifindex) {
1660 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1661 		if (dev == NULL) {
1662 			err = -ENODEV;
1663 			goto out;
1664 		}
1665 
1666 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1667 			goto out;
1668 	}
1669 
1670 	read_lock(&neigh_tbl_lock);
1671 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1672 		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1673 		struct neighbour *neigh;
1674 		void *dst, *lladdr;
1675 
1676 		if (tbl->family != ndm->ndm_family)
1677 			continue;
1678 		read_unlock(&neigh_tbl_lock);
1679 
1680 		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1681 			goto out;
1682 		dst = nla_data(tb[NDA_DST]);
1683 		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1684 
1685 		if (ndm->ndm_flags & NTF_PROXY) {
1686 			struct pneigh_entry *pn;
1687 
1688 			err = -ENOBUFS;
1689 			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1690 			if (pn) {
1691 				pn->flags = ndm->ndm_flags;
1692 				err = 0;
1693 			}
1694 			goto out;
1695 		}
1696 
1697 		if (dev == NULL)
1698 			goto out;
1699 
1700 		neigh = neigh_lookup(tbl, dst, dev);
1701 		if (neigh == NULL) {
1702 			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1703 				err = -ENOENT;
1704 				goto out;
1705 			}
1706 
1707 			neigh = __neigh_lookup_errno(tbl, dst, dev);
1708 			if (IS_ERR(neigh)) {
1709 				err = PTR_ERR(neigh);
1710 				goto out;
1711 			}
1712 		} else {
1713 			if (nlh->nlmsg_flags & NLM_F_EXCL) {
1714 				err = -EEXIST;
1715 				neigh_release(neigh);
1716 				goto out;
1717 			}
1718 
1719 			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1720 				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1721 		}
1722 
1723 		if (ndm->ndm_flags & NTF_USE) {
1724 			neigh_event_send(neigh, NULL);
1725 			err = 0;
1726 		} else
1727 			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1728 		neigh_release(neigh);
1729 		goto out;
1730 	}
1731 
1732 	read_unlock(&neigh_tbl_lock);
1733 	err = -EAFNOSUPPORT;
1734 out:
1735 	return err;
1736 }
1737 
1738 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1739 {
1740 	struct nlattr *nest;
1741 
1742 	nest = nla_nest_start(skb, NDTA_PARMS);
1743 	if (nest == NULL)
1744 		return -ENOBUFS;
1745 
1746 	if (parms->dev)
1747 		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1748 
1749 	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1750 	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1751 	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1752 	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1753 	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1754 	NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1755 	NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1756 	NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1757 		      parms->base_reachable_time);
1758 	NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1759 	NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1760 	NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1761 	NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1762 	NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1763 	NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1764 
1765 	return nla_nest_end(skb, nest);
1766 
1767 nla_put_failure:
1768 	nla_nest_cancel(skb, nest);
1769 	return -EMSGSIZE;
1770 }
1771 
1772 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1773 			      u32 pid, u32 seq, int type, int flags)
1774 {
1775 	struct nlmsghdr *nlh;
1776 	struct ndtmsg *ndtmsg;
1777 
1778 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1779 	if (nlh == NULL)
1780 		return -EMSGSIZE;
1781 
1782 	ndtmsg = nlmsg_data(nlh);
1783 
1784 	read_lock_bh(&tbl->lock);
1785 	ndtmsg->ndtm_family = tbl->family;
1786 	ndtmsg->ndtm_pad1   = 0;
1787 	ndtmsg->ndtm_pad2   = 0;
1788 
1789 	NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1790 	NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1791 	NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1792 	NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1793 	NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1794 
1795 	{
1796 		unsigned long now = jiffies;
1797 		unsigned int flush_delta = now - tbl->last_flush;
1798 		unsigned int rand_delta = now - tbl->last_rand;
1799 		struct neigh_hash_table *nht;
1800 		struct ndt_config ndc = {
1801 			.ndtc_key_len		= tbl->key_len,
1802 			.ndtc_entry_size	= tbl->entry_size,
1803 			.ndtc_entries		= atomic_read(&tbl->entries),
1804 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1805 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1806 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1807 		};
1808 
1809 		rcu_read_lock_bh();
1810 		nht = rcu_dereference_bh(tbl->nht);
1811 		ndc.ndtc_hash_rnd = nht->hash_rnd;
1812 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1813 		rcu_read_unlock_bh();
1814 
1815 		NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1816 	}
1817 
1818 	{
1819 		int cpu;
1820 		struct ndt_stats ndst;
1821 
1822 		memset(&ndst, 0, sizeof(ndst));
1823 
1824 		for_each_possible_cpu(cpu) {
1825 			struct neigh_statistics	*st;
1826 
1827 			st = per_cpu_ptr(tbl->stats, cpu);
1828 			ndst.ndts_allocs		+= st->allocs;
1829 			ndst.ndts_destroys		+= st->destroys;
1830 			ndst.ndts_hash_grows		+= st->hash_grows;
1831 			ndst.ndts_res_failed		+= st->res_failed;
1832 			ndst.ndts_lookups		+= st->lookups;
1833 			ndst.ndts_hits			+= st->hits;
1834 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1835 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1836 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1837 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1838 		}
1839 
1840 		NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1841 	}
1842 
1843 	BUG_ON(tbl->parms.dev);
1844 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1845 		goto nla_put_failure;
1846 
1847 	read_unlock_bh(&tbl->lock);
1848 	return nlmsg_end(skb, nlh);
1849 
1850 nla_put_failure:
1851 	read_unlock_bh(&tbl->lock);
1852 	nlmsg_cancel(skb, nlh);
1853 	return -EMSGSIZE;
1854 }
1855 
1856 static int neightbl_fill_param_info(struct sk_buff *skb,
1857 				    struct neigh_table *tbl,
1858 				    struct neigh_parms *parms,
1859 				    u32 pid, u32 seq, int type,
1860 				    unsigned int flags)
1861 {
1862 	struct ndtmsg *ndtmsg;
1863 	struct nlmsghdr *nlh;
1864 
1865 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1866 	if (nlh == NULL)
1867 		return -EMSGSIZE;
1868 
1869 	ndtmsg = nlmsg_data(nlh);
1870 
1871 	read_lock_bh(&tbl->lock);
1872 	ndtmsg->ndtm_family = tbl->family;
1873 	ndtmsg->ndtm_pad1   = 0;
1874 	ndtmsg->ndtm_pad2   = 0;
1875 
1876 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1877 	    neightbl_fill_parms(skb, parms) < 0)
1878 		goto errout;
1879 
1880 	read_unlock_bh(&tbl->lock);
1881 	return nlmsg_end(skb, nlh);
1882 errout:
1883 	read_unlock_bh(&tbl->lock);
1884 	nlmsg_cancel(skb, nlh);
1885 	return -EMSGSIZE;
1886 }
1887 
1888 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1889 	[NDTA_NAME]		= { .type = NLA_STRING },
1890 	[NDTA_THRESH1]		= { .type = NLA_U32 },
1891 	[NDTA_THRESH2]		= { .type = NLA_U32 },
1892 	[NDTA_THRESH3]		= { .type = NLA_U32 },
1893 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1894 	[NDTA_PARMS]		= { .type = NLA_NESTED },
1895 };
1896 
1897 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1898 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1899 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1900 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1901 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1902 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1903 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1904 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1905 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1906 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1907 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1908 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1909 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1910 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1911 };
1912 
1913 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1914 {
1915 	struct net *net = sock_net(skb->sk);
1916 	struct neigh_table *tbl;
1917 	struct ndtmsg *ndtmsg;
1918 	struct nlattr *tb[NDTA_MAX+1];
1919 	int err;
1920 
1921 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1922 			  nl_neightbl_policy);
1923 	if (err < 0)
1924 		goto errout;
1925 
1926 	if (tb[NDTA_NAME] == NULL) {
1927 		err = -EINVAL;
1928 		goto errout;
1929 	}
1930 
1931 	ndtmsg = nlmsg_data(nlh);
1932 	read_lock(&neigh_tbl_lock);
1933 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1934 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1935 			continue;
1936 
1937 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1938 			break;
1939 	}
1940 
1941 	if (tbl == NULL) {
1942 		err = -ENOENT;
1943 		goto errout_locked;
1944 	}
1945 
1946 	/*
1947 	 * We acquire tbl->lock to be nice to the periodic timers and
1948 	 * make sure they always see a consistent set of values.
1949 	 */
1950 	write_lock_bh(&tbl->lock);
1951 
1952 	if (tb[NDTA_PARMS]) {
1953 		struct nlattr *tbp[NDTPA_MAX+1];
1954 		struct neigh_parms *p;
1955 		int i, ifindex = 0;
1956 
1957 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1958 				       nl_ntbl_parm_policy);
1959 		if (err < 0)
1960 			goto errout_tbl_lock;
1961 
1962 		if (tbp[NDTPA_IFINDEX])
1963 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1964 
1965 		p = lookup_neigh_parms(tbl, net, ifindex);
1966 		if (p == NULL) {
1967 			err = -ENOENT;
1968 			goto errout_tbl_lock;
1969 		}
1970 
1971 		for (i = 1; i <= NDTPA_MAX; i++) {
1972 			if (tbp[i] == NULL)
1973 				continue;
1974 
1975 			switch (i) {
1976 			case NDTPA_QUEUE_LEN:
1977 				p->queue_len = nla_get_u32(tbp[i]);
1978 				break;
1979 			case NDTPA_PROXY_QLEN:
1980 				p->proxy_qlen = nla_get_u32(tbp[i]);
1981 				break;
1982 			case NDTPA_APP_PROBES:
1983 				p->app_probes = nla_get_u32(tbp[i]);
1984 				break;
1985 			case NDTPA_UCAST_PROBES:
1986 				p->ucast_probes = nla_get_u32(tbp[i]);
1987 				break;
1988 			case NDTPA_MCAST_PROBES:
1989 				p->mcast_probes = nla_get_u32(tbp[i]);
1990 				break;
1991 			case NDTPA_BASE_REACHABLE_TIME:
1992 				p->base_reachable_time = nla_get_msecs(tbp[i]);
1993 				break;
1994 			case NDTPA_GC_STALETIME:
1995 				p->gc_staletime = nla_get_msecs(tbp[i]);
1996 				break;
1997 			case NDTPA_DELAY_PROBE_TIME:
1998 				p->delay_probe_time = nla_get_msecs(tbp[i]);
1999 				break;
2000 			case NDTPA_RETRANS_TIME:
2001 				p->retrans_time = nla_get_msecs(tbp[i]);
2002 				break;
2003 			case NDTPA_ANYCAST_DELAY:
2004 				p->anycast_delay = nla_get_msecs(tbp[i]);
2005 				break;
2006 			case NDTPA_PROXY_DELAY:
2007 				p->proxy_delay = nla_get_msecs(tbp[i]);
2008 				break;
2009 			case NDTPA_LOCKTIME:
2010 				p->locktime = nla_get_msecs(tbp[i]);
2011 				break;
2012 			}
2013 		}
2014 	}
2015 
2016 	if (tb[NDTA_THRESH1])
2017 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2018 
2019 	if (tb[NDTA_THRESH2])
2020 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2021 
2022 	if (tb[NDTA_THRESH3])
2023 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2024 
2025 	if (tb[NDTA_GC_INTERVAL])
2026 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2027 
2028 	err = 0;
2029 
2030 errout_tbl_lock:
2031 	write_unlock_bh(&tbl->lock);
2032 errout_locked:
2033 	read_unlock(&neigh_tbl_lock);
2034 errout:
2035 	return err;
2036 }
2037 
2038 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2039 {
2040 	struct net *net = sock_net(skb->sk);
2041 	int family, tidx, nidx = 0;
2042 	int tbl_skip = cb->args[0];
2043 	int neigh_skip = cb->args[1];
2044 	struct neigh_table *tbl;
2045 
2046 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2047 
2048 	read_lock(&neigh_tbl_lock);
2049 	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2050 		struct neigh_parms *p;
2051 
2052 		if (tidx < tbl_skip || (family && tbl->family != family))
2053 			continue;
2054 
2055 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2056 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2057 				       NLM_F_MULTI) <= 0)
2058 			break;
2059 
2060 		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2061 			if (!net_eq(neigh_parms_net(p), net))
2062 				continue;
2063 
2064 			if (nidx < neigh_skip)
2065 				goto next;
2066 
2067 			if (neightbl_fill_param_info(skb, tbl, p,
2068 						     NETLINK_CB(cb->skb).pid,
2069 						     cb->nlh->nlmsg_seq,
2070 						     RTM_NEWNEIGHTBL,
2071 						     NLM_F_MULTI) <= 0)
2072 				goto out;
2073 		next:
2074 			nidx++;
2075 		}
2076 
2077 		neigh_skip = 0;
2078 	}
2079 out:
2080 	read_unlock(&neigh_tbl_lock);
2081 	cb->args[0] = tidx;
2082 	cb->args[1] = nidx;
2083 
2084 	return skb->len;
2085 }
2086 
2087 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2088 			   u32 pid, u32 seq, int type, unsigned int flags)
2089 {
2090 	unsigned long now = jiffies;
2091 	struct nda_cacheinfo ci;
2092 	struct nlmsghdr *nlh;
2093 	struct ndmsg *ndm;
2094 
2095 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2096 	if (nlh == NULL)
2097 		return -EMSGSIZE;
2098 
2099 	ndm = nlmsg_data(nlh);
2100 	ndm->ndm_family	 = neigh->ops->family;
2101 	ndm->ndm_pad1    = 0;
2102 	ndm->ndm_pad2    = 0;
2103 	ndm->ndm_flags	 = neigh->flags;
2104 	ndm->ndm_type	 = neigh->type;
2105 	ndm->ndm_ifindex = neigh->dev->ifindex;
2106 
2107 	NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2108 
2109 	read_lock_bh(&neigh->lock);
2110 	ndm->ndm_state	 = neigh->nud_state;
2111 	if (neigh->nud_state & NUD_VALID) {
2112 		char haddr[MAX_ADDR_LEN];
2113 
2114 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2115 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2116 			read_unlock_bh(&neigh->lock);
2117 			goto nla_put_failure;
2118 		}
2119 	}
2120 
2121 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2122 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2123 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2124 	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
2125 	read_unlock_bh(&neigh->lock);
2126 
2127 	NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2128 	NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2129 
2130 	return nlmsg_end(skb, nlh);
2131 
2132 nla_put_failure:
2133 	nlmsg_cancel(skb, nlh);
2134 	return -EMSGSIZE;
2135 }
2136 
2137 static void neigh_update_notify(struct neighbour *neigh)
2138 {
2139 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2140 	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
2141 }
2142 
2143 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2144 			    struct netlink_callback *cb)
2145 {
2146 	struct net *net = sock_net(skb->sk);
2147 	struct neighbour *n;
2148 	int rc, h, s_h = cb->args[1];
2149 	int idx, s_idx = idx = cb->args[2];
2150 	struct neigh_hash_table *nht;
2151 
2152 	rcu_read_lock_bh();
2153 	nht = rcu_dereference_bh(tbl->nht);
2154 
2155 	for (h = 0; h < (1 << nht->hash_shift); h++) {
2156 		if (h < s_h)
2157 			continue;
2158 		if (h > s_h)
2159 			s_idx = 0;
2160 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2161 		     n != NULL;
2162 		     n = rcu_dereference_bh(n->next)) {
2163 			if (!net_eq(dev_net(n->dev), net))
2164 				continue;
2165 			if (idx < s_idx)
2166 				goto next;
2167 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2168 					    cb->nlh->nlmsg_seq,
2169 					    RTM_NEWNEIGH,
2170 					    NLM_F_MULTI) <= 0) {
2171 				rc = -1;
2172 				goto out;
2173 			}
2174 next:
2175 			idx++;
2176 		}
2177 	}
2178 	rc = skb->len;
2179 out:
2180 	rcu_read_unlock_bh();
2181 	cb->args[1] = h;
2182 	cb->args[2] = idx;
2183 	return rc;
2184 }
2185 
2186 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2187 {
2188 	struct neigh_table *tbl;
2189 	int t, family, s_t;
2190 
2191 	read_lock(&neigh_tbl_lock);
2192 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2193 	s_t = cb->args[0];
2194 
2195 	for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2196 		if (t < s_t || (family && tbl->family != family))
2197 			continue;
2198 		if (t > s_t)
2199 			memset(&cb->args[1], 0, sizeof(cb->args) -
2200 						sizeof(cb->args[0]));
2201 		if (neigh_dump_table(tbl, skb, cb) < 0)
2202 			break;
2203 	}
2204 	read_unlock(&neigh_tbl_lock);
2205 
2206 	cb->args[0] = t;
2207 	return skb->len;
2208 }
2209 
2210 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2211 {
2212 	int chain;
2213 	struct neigh_hash_table *nht;
2214 
2215 	rcu_read_lock_bh();
2216 	nht = rcu_dereference_bh(tbl->nht);
2217 
2218 	read_lock(&tbl->lock); /* avoid resizes */
2219 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2220 		struct neighbour *n;
2221 
2222 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2223 		     n != NULL;
2224 		     n = rcu_dereference_bh(n->next))
2225 			cb(n, cookie);
2226 	}
2227 	read_unlock(&tbl->lock);
2228 	rcu_read_unlock_bh();
2229 }
2230 EXPORT_SYMBOL(neigh_for_each);
2231 
2232 /* The tbl->lock must be held as a writer and BH disabled. */
2233 void __neigh_for_each_release(struct neigh_table *tbl,
2234 			      int (*cb)(struct neighbour *))
2235 {
2236 	int chain;
2237 	struct neigh_hash_table *nht;
2238 
2239 	nht = rcu_dereference_protected(tbl->nht,
2240 					lockdep_is_held(&tbl->lock));
2241 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2242 		struct neighbour *n;
2243 		struct neighbour __rcu **np;
2244 
2245 		np = &nht->hash_buckets[chain];
2246 		while ((n = rcu_dereference_protected(*np,
2247 					lockdep_is_held(&tbl->lock))) != NULL) {
2248 			int release;
2249 
2250 			write_lock(&n->lock);
2251 			release = cb(n);
2252 			if (release) {
2253 				rcu_assign_pointer(*np,
2254 					rcu_dereference_protected(n->next,
2255 						lockdep_is_held(&tbl->lock)));
2256 				n->dead = 1;
2257 			} else
2258 				np = &n->next;
2259 			write_unlock(&n->lock);
2260 			if (release)
2261 				neigh_cleanup_and_release(n);
2262 		}
2263 	}
2264 }
2265 EXPORT_SYMBOL(__neigh_for_each_release);
2266 
2267 #ifdef CONFIG_PROC_FS
2268 
2269 static struct neighbour *neigh_get_first(struct seq_file *seq)
2270 {
2271 	struct neigh_seq_state *state = seq->private;
2272 	struct net *net = seq_file_net(seq);
2273 	struct neigh_hash_table *nht = state->nht;
2274 	struct neighbour *n = NULL;
2275 	int bucket = state->bucket;
2276 
2277 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2278 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2279 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2280 
2281 		while (n) {
2282 			if (!net_eq(dev_net(n->dev), net))
2283 				goto next;
2284 			if (state->neigh_sub_iter) {
2285 				loff_t fakep = 0;
2286 				void *v;
2287 
2288 				v = state->neigh_sub_iter(state, n, &fakep);
2289 				if (!v)
2290 					goto next;
2291 			}
2292 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2293 				break;
2294 			if (n->nud_state & ~NUD_NOARP)
2295 				break;
2296 next:
2297 			n = rcu_dereference_bh(n->next);
2298 		}
2299 
2300 		if (n)
2301 			break;
2302 	}
2303 	state->bucket = bucket;
2304 
2305 	return n;
2306 }
2307 
2308 static struct neighbour *neigh_get_next(struct seq_file *seq,
2309 					struct neighbour *n,
2310 					loff_t *pos)
2311 {
2312 	struct neigh_seq_state *state = seq->private;
2313 	struct net *net = seq_file_net(seq);
2314 	struct neigh_hash_table *nht = state->nht;
2315 
2316 	if (state->neigh_sub_iter) {
2317 		void *v = state->neigh_sub_iter(state, n, pos);
2318 		if (v)
2319 			return n;
2320 	}
2321 	n = rcu_dereference_bh(n->next);
2322 
2323 	while (1) {
2324 		while (n) {
2325 			if (!net_eq(dev_net(n->dev), net))
2326 				goto next;
2327 			if (state->neigh_sub_iter) {
2328 				void *v = state->neigh_sub_iter(state, n, pos);
2329 				if (v)
2330 					return n;
2331 				goto next;
2332 			}
2333 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2334 				break;
2335 
2336 			if (n->nud_state & ~NUD_NOARP)
2337 				break;
2338 next:
2339 			n = rcu_dereference_bh(n->next);
2340 		}
2341 
2342 		if (n)
2343 			break;
2344 
2345 		if (++state->bucket >= (1 << nht->hash_shift))
2346 			break;
2347 
2348 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2349 	}
2350 
2351 	if (n && pos)
2352 		--(*pos);
2353 	return n;
2354 }
2355 
2356 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2357 {
2358 	struct neighbour *n = neigh_get_first(seq);
2359 
2360 	if (n) {
2361 		--(*pos);
2362 		while (*pos) {
2363 			n = neigh_get_next(seq, n, pos);
2364 			if (!n)
2365 				break;
2366 		}
2367 	}
2368 	return *pos ? NULL : n;
2369 }
2370 
2371 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2372 {
2373 	struct neigh_seq_state *state = seq->private;
2374 	struct net *net = seq_file_net(seq);
2375 	struct neigh_table *tbl = state->tbl;
2376 	struct pneigh_entry *pn = NULL;
2377 	int bucket = state->bucket;
2378 
2379 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2380 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2381 		pn = tbl->phash_buckets[bucket];
2382 		while (pn && !net_eq(pneigh_net(pn), net))
2383 			pn = pn->next;
2384 		if (pn)
2385 			break;
2386 	}
2387 	state->bucket = bucket;
2388 
2389 	return pn;
2390 }
2391 
2392 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2393 					    struct pneigh_entry *pn,
2394 					    loff_t *pos)
2395 {
2396 	struct neigh_seq_state *state = seq->private;
2397 	struct net *net = seq_file_net(seq);
2398 	struct neigh_table *tbl = state->tbl;
2399 
2400 	pn = pn->next;
2401 	while (!pn) {
2402 		if (++state->bucket > PNEIGH_HASHMASK)
2403 			break;
2404 		pn = tbl->phash_buckets[state->bucket];
2405 		while (pn && !net_eq(pneigh_net(pn), net))
2406 			pn = pn->next;
2407 		if (pn)
2408 			break;
2409 	}
2410 
2411 	if (pn && pos)
2412 		--(*pos);
2413 
2414 	return pn;
2415 }
2416 
2417 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2418 {
2419 	struct pneigh_entry *pn = pneigh_get_first(seq);
2420 
2421 	if (pn) {
2422 		--(*pos);
2423 		while (*pos) {
2424 			pn = pneigh_get_next(seq, pn, pos);
2425 			if (!pn)
2426 				break;
2427 		}
2428 	}
2429 	return *pos ? NULL : pn;
2430 }
2431 
2432 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2433 {
2434 	struct neigh_seq_state *state = seq->private;
2435 	void *rc;
2436 	loff_t idxpos = *pos;
2437 
2438 	rc = neigh_get_idx(seq, &idxpos);
2439 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2440 		rc = pneigh_get_idx(seq, &idxpos);
2441 
2442 	return rc;
2443 }
2444 
2445 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2446 	__acquires(rcu_bh)
2447 {
2448 	struct neigh_seq_state *state = seq->private;
2449 
2450 	state->tbl = tbl;
2451 	state->bucket = 0;
2452 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2453 
2454 	rcu_read_lock_bh();
2455 	state->nht = rcu_dereference_bh(tbl->nht);
2456 
2457 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2458 }
2459 EXPORT_SYMBOL(neigh_seq_start);
2460 
2461 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2462 {
2463 	struct neigh_seq_state *state;
2464 	void *rc;
2465 
2466 	if (v == SEQ_START_TOKEN) {
2467 		rc = neigh_get_first(seq);
2468 		goto out;
2469 	}
2470 
2471 	state = seq->private;
2472 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2473 		rc = neigh_get_next(seq, v, NULL);
2474 		if (rc)
2475 			goto out;
2476 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2477 			rc = pneigh_get_first(seq);
2478 	} else {
2479 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2480 		rc = pneigh_get_next(seq, v, NULL);
2481 	}
2482 out:
2483 	++(*pos);
2484 	return rc;
2485 }
2486 EXPORT_SYMBOL(neigh_seq_next);
2487 
2488 void neigh_seq_stop(struct seq_file *seq, void *v)
2489 	__releases(rcu_bh)
2490 {
2491 	rcu_read_unlock_bh();
2492 }
2493 EXPORT_SYMBOL(neigh_seq_stop);
2494 
2495 /* statistics via seq_file */
2496 
2497 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2498 {
2499 	struct neigh_table *tbl = seq->private;
2500 	int cpu;
2501 
2502 	if (*pos == 0)
2503 		return SEQ_START_TOKEN;
2504 
2505 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2506 		if (!cpu_possible(cpu))
2507 			continue;
2508 		*pos = cpu+1;
2509 		return per_cpu_ptr(tbl->stats, cpu);
2510 	}
2511 	return NULL;
2512 }
2513 
2514 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2515 {
2516 	struct neigh_table *tbl = seq->private;
2517 	int cpu;
2518 
2519 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2520 		if (!cpu_possible(cpu))
2521 			continue;
2522 		*pos = cpu+1;
2523 		return per_cpu_ptr(tbl->stats, cpu);
2524 	}
2525 	return NULL;
2526 }
2527 
2528 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2529 {
2530 
2531 }
2532 
2533 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2534 {
2535 	struct neigh_table *tbl = seq->private;
2536 	struct neigh_statistics *st = v;
2537 
2538 	if (v == SEQ_START_TOKEN) {
2539 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2540 		return 0;
2541 	}
2542 
2543 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2544 			"%08lx %08lx  %08lx %08lx %08lx\n",
2545 		   atomic_read(&tbl->entries),
2546 
2547 		   st->allocs,
2548 		   st->destroys,
2549 		   st->hash_grows,
2550 
2551 		   st->lookups,
2552 		   st->hits,
2553 
2554 		   st->res_failed,
2555 
2556 		   st->rcv_probes_mcast,
2557 		   st->rcv_probes_ucast,
2558 
2559 		   st->periodic_gc_runs,
2560 		   st->forced_gc_runs,
2561 		   st->unres_discards
2562 		   );
2563 
2564 	return 0;
2565 }
2566 
2567 static const struct seq_operations neigh_stat_seq_ops = {
2568 	.start	= neigh_stat_seq_start,
2569 	.next	= neigh_stat_seq_next,
2570 	.stop	= neigh_stat_seq_stop,
2571 	.show	= neigh_stat_seq_show,
2572 };
2573 
2574 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2575 {
2576 	int ret = seq_open(file, &neigh_stat_seq_ops);
2577 
2578 	if (!ret) {
2579 		struct seq_file *sf = file->private_data;
2580 		sf->private = PDE(inode)->data;
2581 	}
2582 	return ret;
2583 };
2584 
2585 static const struct file_operations neigh_stat_seq_fops = {
2586 	.owner	 = THIS_MODULE,
2587 	.open 	 = neigh_stat_seq_open,
2588 	.read	 = seq_read,
2589 	.llseek	 = seq_lseek,
2590 	.release = seq_release,
2591 };
2592 
2593 #endif /* CONFIG_PROC_FS */
2594 
2595 static inline size_t neigh_nlmsg_size(void)
2596 {
2597 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2598 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2599 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2600 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2601 	       + nla_total_size(4); /* NDA_PROBES */
2602 }
2603 
2604 static void __neigh_notify(struct neighbour *n, int type, int flags)
2605 {
2606 	struct net *net = dev_net(n->dev);
2607 	struct sk_buff *skb;
2608 	int err = -ENOBUFS;
2609 
2610 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2611 	if (skb == NULL)
2612 		goto errout;
2613 
2614 	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2615 	if (err < 0) {
2616 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2617 		WARN_ON(err == -EMSGSIZE);
2618 		kfree_skb(skb);
2619 		goto errout;
2620 	}
2621 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2622 	return;
2623 errout:
2624 	if (err < 0)
2625 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2626 }
2627 
2628 #ifdef CONFIG_ARPD
2629 void neigh_app_ns(struct neighbour *n)
2630 {
2631 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2632 }
2633 EXPORT_SYMBOL(neigh_app_ns);
2634 #endif /* CONFIG_ARPD */
2635 
2636 #ifdef CONFIG_SYSCTL
2637 
2638 #define NEIGH_VARS_MAX 19
2639 
2640 static struct neigh_sysctl_table {
2641 	struct ctl_table_header *sysctl_header;
2642 	struct ctl_table neigh_vars[NEIGH_VARS_MAX];
2643 	char *dev_name;
2644 } neigh_sysctl_template __read_mostly = {
2645 	.neigh_vars = {
2646 		{
2647 			.procname	= "mcast_solicit",
2648 			.maxlen		= sizeof(int),
2649 			.mode		= 0644,
2650 			.proc_handler	= proc_dointvec,
2651 		},
2652 		{
2653 			.procname	= "ucast_solicit",
2654 			.maxlen		= sizeof(int),
2655 			.mode		= 0644,
2656 			.proc_handler	= proc_dointvec,
2657 		},
2658 		{
2659 			.procname	= "app_solicit",
2660 			.maxlen		= sizeof(int),
2661 			.mode		= 0644,
2662 			.proc_handler	= proc_dointvec,
2663 		},
2664 		{
2665 			.procname	= "retrans_time",
2666 			.maxlen		= sizeof(int),
2667 			.mode		= 0644,
2668 			.proc_handler	= proc_dointvec_userhz_jiffies,
2669 		},
2670 		{
2671 			.procname	= "base_reachable_time",
2672 			.maxlen		= sizeof(int),
2673 			.mode		= 0644,
2674 			.proc_handler	= proc_dointvec_jiffies,
2675 		},
2676 		{
2677 			.procname	= "delay_first_probe_time",
2678 			.maxlen		= sizeof(int),
2679 			.mode		= 0644,
2680 			.proc_handler	= proc_dointvec_jiffies,
2681 		},
2682 		{
2683 			.procname	= "gc_stale_time",
2684 			.maxlen		= sizeof(int),
2685 			.mode		= 0644,
2686 			.proc_handler	= proc_dointvec_jiffies,
2687 		},
2688 		{
2689 			.procname	= "unres_qlen",
2690 			.maxlen		= sizeof(int),
2691 			.mode		= 0644,
2692 			.proc_handler	= proc_dointvec,
2693 		},
2694 		{
2695 			.procname	= "proxy_qlen",
2696 			.maxlen		= sizeof(int),
2697 			.mode		= 0644,
2698 			.proc_handler	= proc_dointvec,
2699 		},
2700 		{
2701 			.procname	= "anycast_delay",
2702 			.maxlen		= sizeof(int),
2703 			.mode		= 0644,
2704 			.proc_handler	= proc_dointvec_userhz_jiffies,
2705 		},
2706 		{
2707 			.procname	= "proxy_delay",
2708 			.maxlen		= sizeof(int),
2709 			.mode		= 0644,
2710 			.proc_handler	= proc_dointvec_userhz_jiffies,
2711 		},
2712 		{
2713 			.procname	= "locktime",
2714 			.maxlen		= sizeof(int),
2715 			.mode		= 0644,
2716 			.proc_handler	= proc_dointvec_userhz_jiffies,
2717 		},
2718 		{
2719 			.procname	= "retrans_time_ms",
2720 			.maxlen		= sizeof(int),
2721 			.mode		= 0644,
2722 			.proc_handler	= proc_dointvec_ms_jiffies,
2723 		},
2724 		{
2725 			.procname	= "base_reachable_time_ms",
2726 			.maxlen		= sizeof(int),
2727 			.mode		= 0644,
2728 			.proc_handler	= proc_dointvec_ms_jiffies,
2729 		},
2730 		{
2731 			.procname	= "gc_interval",
2732 			.maxlen		= sizeof(int),
2733 			.mode		= 0644,
2734 			.proc_handler	= proc_dointvec_jiffies,
2735 		},
2736 		{
2737 			.procname	= "gc_thresh1",
2738 			.maxlen		= sizeof(int),
2739 			.mode		= 0644,
2740 			.proc_handler	= proc_dointvec,
2741 		},
2742 		{
2743 			.procname	= "gc_thresh2",
2744 			.maxlen		= sizeof(int),
2745 			.mode		= 0644,
2746 			.proc_handler	= proc_dointvec,
2747 		},
2748 		{
2749 			.procname	= "gc_thresh3",
2750 			.maxlen		= sizeof(int),
2751 			.mode		= 0644,
2752 			.proc_handler	= proc_dointvec,
2753 		},
2754 		{},
2755 	},
2756 };
2757 
2758 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2759 			  char *p_name, proc_handler *handler)
2760 {
2761 	struct neigh_sysctl_table *t;
2762 	const char *dev_name_source = NULL;
2763 
2764 #define NEIGH_CTL_PATH_ROOT	0
2765 #define NEIGH_CTL_PATH_PROTO	1
2766 #define NEIGH_CTL_PATH_NEIGH	2
2767 #define NEIGH_CTL_PATH_DEV	3
2768 
2769 	struct ctl_path neigh_path[] = {
2770 		{ .procname = "net",	 },
2771 		{ .procname = "proto",	 },
2772 		{ .procname = "neigh",	 },
2773 		{ .procname = "default", },
2774 		{ },
2775 	};
2776 
2777 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2778 	if (!t)
2779 		goto err;
2780 
2781 	t->neigh_vars[0].data  = &p->mcast_probes;
2782 	t->neigh_vars[1].data  = &p->ucast_probes;
2783 	t->neigh_vars[2].data  = &p->app_probes;
2784 	t->neigh_vars[3].data  = &p->retrans_time;
2785 	t->neigh_vars[4].data  = &p->base_reachable_time;
2786 	t->neigh_vars[5].data  = &p->delay_probe_time;
2787 	t->neigh_vars[6].data  = &p->gc_staletime;
2788 	t->neigh_vars[7].data  = &p->queue_len;
2789 	t->neigh_vars[8].data  = &p->proxy_qlen;
2790 	t->neigh_vars[9].data  = &p->anycast_delay;
2791 	t->neigh_vars[10].data = &p->proxy_delay;
2792 	t->neigh_vars[11].data = &p->locktime;
2793 	t->neigh_vars[12].data  = &p->retrans_time;
2794 	t->neigh_vars[13].data  = &p->base_reachable_time;
2795 
2796 	if (dev) {
2797 		dev_name_source = dev->name;
2798 		/* Terminate the table early */
2799 		memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2800 	} else {
2801 		dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2802 		t->neigh_vars[14].data = (int *)(p + 1);
2803 		t->neigh_vars[15].data = (int *)(p + 1) + 1;
2804 		t->neigh_vars[16].data = (int *)(p + 1) + 2;
2805 		t->neigh_vars[17].data = (int *)(p + 1) + 3;
2806 	}
2807 
2808 
2809 	if (handler) {
2810 		/* RetransTime */
2811 		t->neigh_vars[3].proc_handler = handler;
2812 		t->neigh_vars[3].extra1 = dev;
2813 		/* ReachableTime */
2814 		t->neigh_vars[4].proc_handler = handler;
2815 		t->neigh_vars[4].extra1 = dev;
2816 		/* RetransTime (in milliseconds)*/
2817 		t->neigh_vars[12].proc_handler = handler;
2818 		t->neigh_vars[12].extra1 = dev;
2819 		/* ReachableTime (in milliseconds) */
2820 		t->neigh_vars[13].proc_handler = handler;
2821 		t->neigh_vars[13].extra1 = dev;
2822 	}
2823 
2824 	t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2825 	if (!t->dev_name)
2826 		goto free;
2827 
2828 	neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2829 	neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2830 
2831 	t->sysctl_header =
2832 		register_net_sysctl_table(neigh_parms_net(p), neigh_path, t->neigh_vars);
2833 	if (!t->sysctl_header)
2834 		goto free_procname;
2835 
2836 	p->sysctl_table = t;
2837 	return 0;
2838 
2839 free_procname:
2840 	kfree(t->dev_name);
2841 free:
2842 	kfree(t);
2843 err:
2844 	return -ENOBUFS;
2845 }
2846 EXPORT_SYMBOL(neigh_sysctl_register);
2847 
2848 void neigh_sysctl_unregister(struct neigh_parms *p)
2849 {
2850 	if (p->sysctl_table) {
2851 		struct neigh_sysctl_table *t = p->sysctl_table;
2852 		p->sysctl_table = NULL;
2853 		unregister_sysctl_table(t->sysctl_header);
2854 		kfree(t->dev_name);
2855 		kfree(t);
2856 	}
2857 }
2858 EXPORT_SYMBOL(neigh_sysctl_unregister);
2859 
2860 #endif	/* CONFIG_SYSCTL */
2861 
2862 static int __init neigh_init(void)
2863 {
2864 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
2865 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
2866 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
2867 
2868 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
2869 		      NULL);
2870 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
2871 
2872 	return 0;
2873 }
2874 
2875 subsys_initcall(neigh_init);
2876 
2877