xref: /linux/net/core/neighbour.c (revision c4ee0af3fa0dc65f690fc908f02b8355f9576ea0)
1 /*
2  *	Generic address resolution entity
3  *
4  *	Authors:
5  *	Pedro Roque		<roque@di.fc.ul.pt>
6  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *	Fixes:
14  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
15  *	Harald Welte		Add neighbour cache statistics like rtstat
16  */
17 
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19 
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 
42 #define DEBUG
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...)		\
45 do {						\
46 	if (level <= NEIGH_DEBUG)		\
47 		pr_debug(fmt, ##__VA_ARGS__);	\
48 } while (0)
49 
50 #define PNEIGH_HASHMASK		0xF
51 
52 static void neigh_timer_handler(unsigned long arg);
53 static void __neigh_notify(struct neighbour *n, int type, int flags);
54 static void neigh_update_notify(struct neighbour *neigh);
55 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
56 
57 static struct neigh_table *neigh_tables;
58 #ifdef CONFIG_PROC_FS
59 static const struct file_operations neigh_stat_seq_fops;
60 #endif
61 
62 /*
63    Neighbour hash table buckets are protected with rwlock tbl->lock.
64 
65    - All the scans/updates to hash buckets MUST be made under this lock.
66    - NOTHING clever should be made under this lock: no callbacks
67      to protocol backends, no attempts to send something to network.
68      It will result in deadlocks, if backend/driver wants to use neighbour
69      cache.
70    - If the entry requires some non-trivial actions, increase
71      its reference count and release table lock.
72 
73    Neighbour entries are protected:
74    - with reference count.
75    - with rwlock neigh->lock
76 
77    Reference count prevents destruction.
78 
79    neigh->lock mainly serializes ll address data and its validity state.
80    However, the same lock is used to protect another entry fields:
81     - timer
82     - resolution queue
83 
84    Again, nothing clever shall be made under neigh->lock,
85    the most complicated procedure, which we allow is dev->hard_header.
86    It is supposed, that dev->hard_header is simplistic and does
87    not make callbacks to neighbour tables.
88 
89    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90    list of neighbour tables. This list is used only in process context,
91  */
92 
93 static DEFINE_RWLOCK(neigh_tbl_lock);
94 
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97 	kfree_skb(skb);
98 	return -ENETDOWN;
99 }
100 
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103 	if (neigh->parms->neigh_cleanup)
104 		neigh->parms->neigh_cleanup(neigh);
105 
106 	__neigh_notify(neigh, RTM_DELNEIGH, 0);
107 	neigh_release(neigh);
108 }
109 
110 /*
111  * It is random distribution in the interval (1/2)*base...(3/2)*base.
112  * It corresponds to default IPv6 settings and is not overridable,
113  * because it is really reasonable choice.
114  */
115 
116 unsigned long neigh_rand_reach_time(unsigned long base)
117 {
118 	return base ? (net_random() % base) + (base >> 1) : 0;
119 }
120 EXPORT_SYMBOL(neigh_rand_reach_time);
121 
122 
123 static int neigh_forced_gc(struct neigh_table *tbl)
124 {
125 	int shrunk = 0;
126 	int i;
127 	struct neigh_hash_table *nht;
128 
129 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
130 
131 	write_lock_bh(&tbl->lock);
132 	nht = rcu_dereference_protected(tbl->nht,
133 					lockdep_is_held(&tbl->lock));
134 	for (i = 0; i < (1 << nht->hash_shift); i++) {
135 		struct neighbour *n;
136 		struct neighbour __rcu **np;
137 
138 		np = &nht->hash_buckets[i];
139 		while ((n = rcu_dereference_protected(*np,
140 					lockdep_is_held(&tbl->lock))) != NULL) {
141 			/* Neighbour record may be discarded if:
142 			 * - nobody refers to it.
143 			 * - it is not permanent
144 			 */
145 			write_lock(&n->lock);
146 			if (atomic_read(&n->refcnt) == 1 &&
147 			    !(n->nud_state & NUD_PERMANENT)) {
148 				rcu_assign_pointer(*np,
149 					rcu_dereference_protected(n->next,
150 						  lockdep_is_held(&tbl->lock)));
151 				n->dead = 1;
152 				shrunk	= 1;
153 				write_unlock(&n->lock);
154 				neigh_cleanup_and_release(n);
155 				continue;
156 			}
157 			write_unlock(&n->lock);
158 			np = &n->next;
159 		}
160 	}
161 
162 	tbl->last_flush = jiffies;
163 
164 	write_unlock_bh(&tbl->lock);
165 
166 	return shrunk;
167 }
168 
169 static void neigh_add_timer(struct neighbour *n, unsigned long when)
170 {
171 	neigh_hold(n);
172 	if (unlikely(mod_timer(&n->timer, when))) {
173 		printk("NEIGH: BUG, double timer add, state is %x\n",
174 		       n->nud_state);
175 		dump_stack();
176 	}
177 }
178 
179 static int neigh_del_timer(struct neighbour *n)
180 {
181 	if ((n->nud_state & NUD_IN_TIMER) &&
182 	    del_timer(&n->timer)) {
183 		neigh_release(n);
184 		return 1;
185 	}
186 	return 0;
187 }
188 
189 static void pneigh_queue_purge(struct sk_buff_head *list)
190 {
191 	struct sk_buff *skb;
192 
193 	while ((skb = skb_dequeue(list)) != NULL) {
194 		dev_put(skb->dev);
195 		kfree_skb(skb);
196 	}
197 }
198 
199 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200 {
201 	int i;
202 	struct neigh_hash_table *nht;
203 
204 	nht = rcu_dereference_protected(tbl->nht,
205 					lockdep_is_held(&tbl->lock));
206 
207 	for (i = 0; i < (1 << nht->hash_shift); i++) {
208 		struct neighbour *n;
209 		struct neighbour __rcu **np = &nht->hash_buckets[i];
210 
211 		while ((n = rcu_dereference_protected(*np,
212 					lockdep_is_held(&tbl->lock))) != NULL) {
213 			if (dev && n->dev != dev) {
214 				np = &n->next;
215 				continue;
216 			}
217 			rcu_assign_pointer(*np,
218 				   rcu_dereference_protected(n->next,
219 						lockdep_is_held(&tbl->lock)));
220 			write_lock(&n->lock);
221 			neigh_del_timer(n);
222 			n->dead = 1;
223 
224 			if (atomic_read(&n->refcnt) != 1) {
225 				/* The most unpleasant situation.
226 				   We must destroy neighbour entry,
227 				   but someone still uses it.
228 
229 				   The destroy will be delayed until
230 				   the last user releases us, but
231 				   we must kill timers etc. and move
232 				   it to safe state.
233 				 */
234 				__skb_queue_purge(&n->arp_queue);
235 				n->arp_queue_len_bytes = 0;
236 				n->output = neigh_blackhole;
237 				if (n->nud_state & NUD_VALID)
238 					n->nud_state = NUD_NOARP;
239 				else
240 					n->nud_state = NUD_NONE;
241 				neigh_dbg(2, "neigh %p is stray\n", n);
242 			}
243 			write_unlock(&n->lock);
244 			neigh_cleanup_and_release(n);
245 		}
246 	}
247 }
248 
249 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
250 {
251 	write_lock_bh(&tbl->lock);
252 	neigh_flush_dev(tbl, dev);
253 	write_unlock_bh(&tbl->lock);
254 }
255 EXPORT_SYMBOL(neigh_changeaddr);
256 
257 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
258 {
259 	write_lock_bh(&tbl->lock);
260 	neigh_flush_dev(tbl, dev);
261 	pneigh_ifdown(tbl, dev);
262 	write_unlock_bh(&tbl->lock);
263 
264 	del_timer_sync(&tbl->proxy_timer);
265 	pneigh_queue_purge(&tbl->proxy_queue);
266 	return 0;
267 }
268 EXPORT_SYMBOL(neigh_ifdown);
269 
270 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
271 {
272 	struct neighbour *n = NULL;
273 	unsigned long now = jiffies;
274 	int entries;
275 
276 	entries = atomic_inc_return(&tbl->entries) - 1;
277 	if (entries >= tbl->gc_thresh3 ||
278 	    (entries >= tbl->gc_thresh2 &&
279 	     time_after(now, tbl->last_flush + 5 * HZ))) {
280 		if (!neigh_forced_gc(tbl) &&
281 		    entries >= tbl->gc_thresh3)
282 			goto out_entries;
283 	}
284 
285 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286 	if (!n)
287 		goto out_entries;
288 
289 	__skb_queue_head_init(&n->arp_queue);
290 	rwlock_init(&n->lock);
291 	seqlock_init(&n->ha_lock);
292 	n->updated	  = n->used = now;
293 	n->nud_state	  = NUD_NONE;
294 	n->output	  = neigh_blackhole;
295 	seqlock_init(&n->hh.hh_lock);
296 	n->parms	  = neigh_parms_clone(&tbl->parms);
297 	setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
298 
299 	NEIGH_CACHE_STAT_INC(tbl, allocs);
300 	n->tbl		  = tbl;
301 	atomic_set(&n->refcnt, 1);
302 	n->dead		  = 1;
303 out:
304 	return n;
305 
306 out_entries:
307 	atomic_dec(&tbl->entries);
308 	goto out;
309 }
310 
311 static void neigh_get_hash_rnd(u32 *x)
312 {
313 	get_random_bytes(x, sizeof(*x));
314 	*x |= 1;
315 }
316 
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319 	size_t size = (1 << shift) * sizeof(struct neighbour *);
320 	struct neigh_hash_table *ret;
321 	struct neighbour __rcu **buckets;
322 	int i;
323 
324 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325 	if (!ret)
326 		return NULL;
327 	if (size <= PAGE_SIZE)
328 		buckets = kzalloc(size, GFP_ATOMIC);
329 	else
330 		buckets = (struct neighbour __rcu **)
331 			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332 					   get_order(size));
333 	if (!buckets) {
334 		kfree(ret);
335 		return NULL;
336 	}
337 	ret->hash_buckets = buckets;
338 	ret->hash_shift = shift;
339 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
341 	return ret;
342 }
343 
344 static void neigh_hash_free_rcu(struct rcu_head *head)
345 {
346 	struct neigh_hash_table *nht = container_of(head,
347 						    struct neigh_hash_table,
348 						    rcu);
349 	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350 	struct neighbour __rcu **buckets = nht->hash_buckets;
351 
352 	if (size <= PAGE_SIZE)
353 		kfree(buckets);
354 	else
355 		free_pages((unsigned long)buckets, get_order(size));
356 	kfree(nht);
357 }
358 
359 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360 						unsigned long new_shift)
361 {
362 	unsigned int i, hash;
363 	struct neigh_hash_table *new_nht, *old_nht;
364 
365 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
366 
367 	old_nht = rcu_dereference_protected(tbl->nht,
368 					    lockdep_is_held(&tbl->lock));
369 	new_nht = neigh_hash_alloc(new_shift);
370 	if (!new_nht)
371 		return old_nht;
372 
373 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374 		struct neighbour *n, *next;
375 
376 		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377 						   lockdep_is_held(&tbl->lock));
378 		     n != NULL;
379 		     n = next) {
380 			hash = tbl->hash(n->primary_key, n->dev,
381 					 new_nht->hash_rnd);
382 
383 			hash >>= (32 - new_nht->hash_shift);
384 			next = rcu_dereference_protected(n->next,
385 						lockdep_is_held(&tbl->lock));
386 
387 			rcu_assign_pointer(n->next,
388 					   rcu_dereference_protected(
389 						new_nht->hash_buckets[hash],
390 						lockdep_is_held(&tbl->lock)));
391 			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
392 		}
393 	}
394 
395 	rcu_assign_pointer(tbl->nht, new_nht);
396 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397 	return new_nht;
398 }
399 
400 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401 			       struct net_device *dev)
402 {
403 	struct neighbour *n;
404 	int key_len = tbl->key_len;
405 	u32 hash_val;
406 	struct neigh_hash_table *nht;
407 
408 	NEIGH_CACHE_STAT_INC(tbl, lookups);
409 
410 	rcu_read_lock_bh();
411 	nht = rcu_dereference_bh(tbl->nht);
412 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
413 
414 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415 	     n != NULL;
416 	     n = rcu_dereference_bh(n->next)) {
417 		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418 			if (!atomic_inc_not_zero(&n->refcnt))
419 				n = NULL;
420 			NEIGH_CACHE_STAT_INC(tbl, hits);
421 			break;
422 		}
423 	}
424 
425 	rcu_read_unlock_bh();
426 	return n;
427 }
428 EXPORT_SYMBOL(neigh_lookup);
429 
430 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431 				     const void *pkey)
432 {
433 	struct neighbour *n;
434 	int key_len = tbl->key_len;
435 	u32 hash_val;
436 	struct neigh_hash_table *nht;
437 
438 	NEIGH_CACHE_STAT_INC(tbl, lookups);
439 
440 	rcu_read_lock_bh();
441 	nht = rcu_dereference_bh(tbl->nht);
442 	hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
443 
444 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445 	     n != NULL;
446 	     n = rcu_dereference_bh(n->next)) {
447 		if (!memcmp(n->primary_key, pkey, key_len) &&
448 		    net_eq(dev_net(n->dev), net)) {
449 			if (!atomic_inc_not_zero(&n->refcnt))
450 				n = NULL;
451 			NEIGH_CACHE_STAT_INC(tbl, hits);
452 			break;
453 		}
454 	}
455 
456 	rcu_read_unlock_bh();
457 	return n;
458 }
459 EXPORT_SYMBOL(neigh_lookup_nodev);
460 
461 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462 				 struct net_device *dev, bool want_ref)
463 {
464 	u32 hash_val;
465 	int key_len = tbl->key_len;
466 	int error;
467 	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468 	struct neigh_hash_table *nht;
469 
470 	if (!n) {
471 		rc = ERR_PTR(-ENOBUFS);
472 		goto out;
473 	}
474 
475 	memcpy(n->primary_key, pkey, key_len);
476 	n->dev = dev;
477 	dev_hold(dev);
478 
479 	/* Protocol specific setup. */
480 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
481 		rc = ERR_PTR(error);
482 		goto out_neigh_release;
483 	}
484 
485 	if (dev->netdev_ops->ndo_neigh_construct) {
486 		error = dev->netdev_ops->ndo_neigh_construct(n);
487 		if (error < 0) {
488 			rc = ERR_PTR(error);
489 			goto out_neigh_release;
490 		}
491 	}
492 
493 	/* Device specific setup. */
494 	if (n->parms->neigh_setup &&
495 	    (error = n->parms->neigh_setup(n)) < 0) {
496 		rc = ERR_PTR(error);
497 		goto out_neigh_release;
498 	}
499 
500 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
501 
502 	write_lock_bh(&tbl->lock);
503 	nht = rcu_dereference_protected(tbl->nht,
504 					lockdep_is_held(&tbl->lock));
505 
506 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
508 
509 	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
510 
511 	if (n->parms->dead) {
512 		rc = ERR_PTR(-EINVAL);
513 		goto out_tbl_unlock;
514 	}
515 
516 	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517 					    lockdep_is_held(&tbl->lock));
518 	     n1 != NULL;
519 	     n1 = rcu_dereference_protected(n1->next,
520 			lockdep_is_held(&tbl->lock))) {
521 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522 			if (want_ref)
523 				neigh_hold(n1);
524 			rc = n1;
525 			goto out_tbl_unlock;
526 		}
527 	}
528 
529 	n->dead = 0;
530 	if (want_ref)
531 		neigh_hold(n);
532 	rcu_assign_pointer(n->next,
533 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
534 						     lockdep_is_held(&tbl->lock)));
535 	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536 	write_unlock_bh(&tbl->lock);
537 	neigh_dbg(2, "neigh %p is created\n", n);
538 	rc = n;
539 out:
540 	return rc;
541 out_tbl_unlock:
542 	write_unlock_bh(&tbl->lock);
543 out_neigh_release:
544 	neigh_release(n);
545 	goto out;
546 }
547 EXPORT_SYMBOL(__neigh_create);
548 
549 static u32 pneigh_hash(const void *pkey, int key_len)
550 {
551 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
552 	hash_val ^= (hash_val >> 16);
553 	hash_val ^= hash_val >> 8;
554 	hash_val ^= hash_val >> 4;
555 	hash_val &= PNEIGH_HASHMASK;
556 	return hash_val;
557 }
558 
559 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560 					      struct net *net,
561 					      const void *pkey,
562 					      int key_len,
563 					      struct net_device *dev)
564 {
565 	while (n) {
566 		if (!memcmp(n->key, pkey, key_len) &&
567 		    net_eq(pneigh_net(n), net) &&
568 		    (n->dev == dev || !n->dev))
569 			return n;
570 		n = n->next;
571 	}
572 	return NULL;
573 }
574 
575 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576 		struct net *net, const void *pkey, struct net_device *dev)
577 {
578 	int key_len = tbl->key_len;
579 	u32 hash_val = pneigh_hash(pkey, key_len);
580 
581 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582 				 net, pkey, key_len, dev);
583 }
584 EXPORT_SYMBOL_GPL(__pneigh_lookup);
585 
586 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587 				    struct net *net, const void *pkey,
588 				    struct net_device *dev, int creat)
589 {
590 	struct pneigh_entry *n;
591 	int key_len = tbl->key_len;
592 	u32 hash_val = pneigh_hash(pkey, key_len);
593 
594 	read_lock_bh(&tbl->lock);
595 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596 			      net, pkey, key_len, dev);
597 	read_unlock_bh(&tbl->lock);
598 
599 	if (n || !creat)
600 		goto out;
601 
602 	ASSERT_RTNL();
603 
604 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605 	if (!n)
606 		goto out;
607 
608 	write_pnet(&n->net, hold_net(net));
609 	memcpy(n->key, pkey, key_len);
610 	n->dev = dev;
611 	if (dev)
612 		dev_hold(dev);
613 
614 	if (tbl->pconstructor && tbl->pconstructor(n)) {
615 		if (dev)
616 			dev_put(dev);
617 		release_net(net);
618 		kfree(n);
619 		n = NULL;
620 		goto out;
621 	}
622 
623 	write_lock_bh(&tbl->lock);
624 	n->next = tbl->phash_buckets[hash_val];
625 	tbl->phash_buckets[hash_val] = n;
626 	write_unlock_bh(&tbl->lock);
627 out:
628 	return n;
629 }
630 EXPORT_SYMBOL(pneigh_lookup);
631 
632 
633 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634 		  struct net_device *dev)
635 {
636 	struct pneigh_entry *n, **np;
637 	int key_len = tbl->key_len;
638 	u32 hash_val = pneigh_hash(pkey, key_len);
639 
640 	write_lock_bh(&tbl->lock);
641 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642 	     np = &n->next) {
643 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644 		    net_eq(pneigh_net(n), net)) {
645 			*np = n->next;
646 			write_unlock_bh(&tbl->lock);
647 			if (tbl->pdestructor)
648 				tbl->pdestructor(n);
649 			if (n->dev)
650 				dev_put(n->dev);
651 			release_net(pneigh_net(n));
652 			kfree(n);
653 			return 0;
654 		}
655 	}
656 	write_unlock_bh(&tbl->lock);
657 	return -ENOENT;
658 }
659 
660 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
661 {
662 	struct pneigh_entry *n, **np;
663 	u32 h;
664 
665 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666 		np = &tbl->phash_buckets[h];
667 		while ((n = *np) != NULL) {
668 			if (!dev || n->dev == dev) {
669 				*np = n->next;
670 				if (tbl->pdestructor)
671 					tbl->pdestructor(n);
672 				if (n->dev)
673 					dev_put(n->dev);
674 				release_net(pneigh_net(n));
675 				kfree(n);
676 				continue;
677 			}
678 			np = &n->next;
679 		}
680 	}
681 	return -ENOENT;
682 }
683 
684 static void neigh_parms_destroy(struct neigh_parms *parms);
685 
686 static inline void neigh_parms_put(struct neigh_parms *parms)
687 {
688 	if (atomic_dec_and_test(&parms->refcnt))
689 		neigh_parms_destroy(parms);
690 }
691 
692 /*
693  *	neighbour must already be out of the table;
694  *
695  */
696 void neigh_destroy(struct neighbour *neigh)
697 {
698 	struct net_device *dev = neigh->dev;
699 
700 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
701 
702 	if (!neigh->dead) {
703 		pr_warn("Destroying alive neighbour %p\n", neigh);
704 		dump_stack();
705 		return;
706 	}
707 
708 	if (neigh_del_timer(neigh))
709 		pr_warn("Impossible event\n");
710 
711 	write_lock_bh(&neigh->lock);
712 	__skb_queue_purge(&neigh->arp_queue);
713 	write_unlock_bh(&neigh->lock);
714 	neigh->arp_queue_len_bytes = 0;
715 
716 	if (dev->netdev_ops->ndo_neigh_destroy)
717 		dev->netdev_ops->ndo_neigh_destroy(neigh);
718 
719 	dev_put(dev);
720 	neigh_parms_put(neigh->parms);
721 
722 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
723 
724 	atomic_dec(&neigh->tbl->entries);
725 	kfree_rcu(neigh, rcu);
726 }
727 EXPORT_SYMBOL(neigh_destroy);
728 
729 /* Neighbour state is suspicious;
730    disable fast path.
731 
732    Called with write_locked neigh.
733  */
734 static void neigh_suspect(struct neighbour *neigh)
735 {
736 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
737 
738 	neigh->output = neigh->ops->output;
739 }
740 
741 /* Neighbour state is OK;
742    enable fast path.
743 
744    Called with write_locked neigh.
745  */
746 static void neigh_connect(struct neighbour *neigh)
747 {
748 	neigh_dbg(2, "neigh %p is connected\n", neigh);
749 
750 	neigh->output = neigh->ops->connected_output;
751 }
752 
753 static void neigh_periodic_work(struct work_struct *work)
754 {
755 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
756 	struct neighbour *n;
757 	struct neighbour __rcu **np;
758 	unsigned int i;
759 	struct neigh_hash_table *nht;
760 
761 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
762 
763 	write_lock_bh(&tbl->lock);
764 	nht = rcu_dereference_protected(tbl->nht,
765 					lockdep_is_held(&tbl->lock));
766 
767 	if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
768 		goto out;
769 
770 	/*
771 	 *	periodically recompute ReachableTime from random function
772 	 */
773 
774 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
775 		struct neigh_parms *p;
776 		tbl->last_rand = jiffies;
777 		for (p = &tbl->parms; p; p = p->next)
778 			p->reachable_time =
779 				neigh_rand_reach_time(p->base_reachable_time);
780 	}
781 
782 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
783 		np = &nht->hash_buckets[i];
784 
785 		while ((n = rcu_dereference_protected(*np,
786 				lockdep_is_held(&tbl->lock))) != NULL) {
787 			unsigned int state;
788 
789 			write_lock(&n->lock);
790 
791 			state = n->nud_state;
792 			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
793 				write_unlock(&n->lock);
794 				goto next_elt;
795 			}
796 
797 			if (time_before(n->used, n->confirmed))
798 				n->used = n->confirmed;
799 
800 			if (atomic_read(&n->refcnt) == 1 &&
801 			    (state == NUD_FAILED ||
802 			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
803 				*np = n->next;
804 				n->dead = 1;
805 				write_unlock(&n->lock);
806 				neigh_cleanup_and_release(n);
807 				continue;
808 			}
809 			write_unlock(&n->lock);
810 
811 next_elt:
812 			np = &n->next;
813 		}
814 		/*
815 		 * It's fine to release lock here, even if hash table
816 		 * grows while we are preempted.
817 		 */
818 		write_unlock_bh(&tbl->lock);
819 		cond_resched();
820 		write_lock_bh(&tbl->lock);
821 		nht = rcu_dereference_protected(tbl->nht,
822 						lockdep_is_held(&tbl->lock));
823 	}
824 out:
825 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
826 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
827 	 * base_reachable_time.
828 	 */
829 	schedule_delayed_work(&tbl->gc_work,
830 			      tbl->parms.base_reachable_time >> 1);
831 	write_unlock_bh(&tbl->lock);
832 }
833 
834 static __inline__ int neigh_max_probes(struct neighbour *n)
835 {
836 	struct neigh_parms *p = n->parms;
837 	return (n->nud_state & NUD_PROBE) ?
838 		p->ucast_probes :
839 		p->ucast_probes + p->app_probes + p->mcast_probes;
840 }
841 
842 static void neigh_invalidate(struct neighbour *neigh)
843 	__releases(neigh->lock)
844 	__acquires(neigh->lock)
845 {
846 	struct sk_buff *skb;
847 
848 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
849 	neigh_dbg(2, "neigh %p is failed\n", neigh);
850 	neigh->updated = jiffies;
851 
852 	/* It is very thin place. report_unreachable is very complicated
853 	   routine. Particularly, it can hit the same neighbour entry!
854 
855 	   So that, we try to be accurate and avoid dead loop. --ANK
856 	 */
857 	while (neigh->nud_state == NUD_FAILED &&
858 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
859 		write_unlock(&neigh->lock);
860 		neigh->ops->error_report(neigh, skb);
861 		write_lock(&neigh->lock);
862 	}
863 	__skb_queue_purge(&neigh->arp_queue);
864 	neigh->arp_queue_len_bytes = 0;
865 }
866 
867 static void neigh_probe(struct neighbour *neigh)
868 	__releases(neigh->lock)
869 {
870 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
871 	/* keep skb alive even if arp_queue overflows */
872 	if (skb)
873 		skb = skb_copy(skb, GFP_ATOMIC);
874 	write_unlock(&neigh->lock);
875 	neigh->ops->solicit(neigh, skb);
876 	atomic_inc(&neigh->probes);
877 	kfree_skb(skb);
878 }
879 
880 /* Called when a timer expires for a neighbour entry. */
881 
882 static void neigh_timer_handler(unsigned long arg)
883 {
884 	unsigned long now, next;
885 	struct neighbour *neigh = (struct neighbour *)arg;
886 	unsigned int state;
887 	int notify = 0;
888 
889 	write_lock(&neigh->lock);
890 
891 	state = neigh->nud_state;
892 	now = jiffies;
893 	next = now + HZ;
894 
895 	if (!(state & NUD_IN_TIMER))
896 		goto out;
897 
898 	if (state & NUD_REACHABLE) {
899 		if (time_before_eq(now,
900 				   neigh->confirmed + neigh->parms->reachable_time)) {
901 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
902 			next = neigh->confirmed + neigh->parms->reachable_time;
903 		} else if (time_before_eq(now,
904 					  neigh->used + neigh->parms->delay_probe_time)) {
905 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
906 			neigh->nud_state = NUD_DELAY;
907 			neigh->updated = jiffies;
908 			neigh_suspect(neigh);
909 			next = now + neigh->parms->delay_probe_time;
910 		} else {
911 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
912 			neigh->nud_state = NUD_STALE;
913 			neigh->updated = jiffies;
914 			neigh_suspect(neigh);
915 			notify = 1;
916 		}
917 	} else if (state & NUD_DELAY) {
918 		if (time_before_eq(now,
919 				   neigh->confirmed + neigh->parms->delay_probe_time)) {
920 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
921 			neigh->nud_state = NUD_REACHABLE;
922 			neigh->updated = jiffies;
923 			neigh_connect(neigh);
924 			notify = 1;
925 			next = neigh->confirmed + neigh->parms->reachable_time;
926 		} else {
927 			neigh_dbg(2, "neigh %p is probed\n", neigh);
928 			neigh->nud_state = NUD_PROBE;
929 			neigh->updated = jiffies;
930 			atomic_set(&neigh->probes, 0);
931 			next = now + neigh->parms->retrans_time;
932 		}
933 	} else {
934 		/* NUD_PROBE|NUD_INCOMPLETE */
935 		next = now + neigh->parms->retrans_time;
936 	}
937 
938 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
939 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
940 		neigh->nud_state = NUD_FAILED;
941 		notify = 1;
942 		neigh_invalidate(neigh);
943 	}
944 
945 	if (neigh->nud_state & NUD_IN_TIMER) {
946 		if (time_before(next, jiffies + HZ/2))
947 			next = jiffies + HZ/2;
948 		if (!mod_timer(&neigh->timer, next))
949 			neigh_hold(neigh);
950 	}
951 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
952 		neigh_probe(neigh);
953 	} else {
954 out:
955 		write_unlock(&neigh->lock);
956 	}
957 
958 	if (notify)
959 		neigh_update_notify(neigh);
960 
961 	neigh_release(neigh);
962 }
963 
964 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
965 {
966 	int rc;
967 	bool immediate_probe = false;
968 
969 	write_lock_bh(&neigh->lock);
970 
971 	rc = 0;
972 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
973 		goto out_unlock_bh;
974 
975 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
976 		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
977 			unsigned long next, now = jiffies;
978 
979 			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
980 			neigh->nud_state     = NUD_INCOMPLETE;
981 			neigh->updated = now;
982 			next = now + max(neigh->parms->retrans_time, HZ/2);
983 			neigh_add_timer(neigh, next);
984 			immediate_probe = true;
985 		} else {
986 			neigh->nud_state = NUD_FAILED;
987 			neigh->updated = jiffies;
988 			write_unlock_bh(&neigh->lock);
989 
990 			kfree_skb(skb);
991 			return 1;
992 		}
993 	} else if (neigh->nud_state & NUD_STALE) {
994 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
995 		neigh->nud_state = NUD_DELAY;
996 		neigh->updated = jiffies;
997 		neigh_add_timer(neigh,
998 				jiffies + neigh->parms->delay_probe_time);
999 	}
1000 
1001 	if (neigh->nud_state == NUD_INCOMPLETE) {
1002 		if (skb) {
1003 			while (neigh->arp_queue_len_bytes + skb->truesize >
1004 			       neigh->parms->queue_len_bytes) {
1005 				struct sk_buff *buff;
1006 
1007 				buff = __skb_dequeue(&neigh->arp_queue);
1008 				if (!buff)
1009 					break;
1010 				neigh->arp_queue_len_bytes -= buff->truesize;
1011 				kfree_skb(buff);
1012 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1013 			}
1014 			skb_dst_force(skb);
1015 			__skb_queue_tail(&neigh->arp_queue, skb);
1016 			neigh->arp_queue_len_bytes += skb->truesize;
1017 		}
1018 		rc = 1;
1019 	}
1020 out_unlock_bh:
1021 	if (immediate_probe)
1022 		neigh_probe(neigh);
1023 	else
1024 		write_unlock(&neigh->lock);
1025 	local_bh_enable();
1026 	return rc;
1027 }
1028 EXPORT_SYMBOL(__neigh_event_send);
1029 
1030 static void neigh_update_hhs(struct neighbour *neigh)
1031 {
1032 	struct hh_cache *hh;
1033 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1034 		= NULL;
1035 
1036 	if (neigh->dev->header_ops)
1037 		update = neigh->dev->header_ops->cache_update;
1038 
1039 	if (update) {
1040 		hh = &neigh->hh;
1041 		if (hh->hh_len) {
1042 			write_seqlock_bh(&hh->hh_lock);
1043 			update(hh, neigh->dev, neigh->ha);
1044 			write_sequnlock_bh(&hh->hh_lock);
1045 		}
1046 	}
1047 }
1048 
1049 
1050 
1051 /* Generic update routine.
1052    -- lladdr is new lladdr or NULL, if it is not supplied.
1053    -- new    is new state.
1054    -- flags
1055 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1056 				if it is different.
1057 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1058 				lladdr instead of overriding it
1059 				if it is different.
1060 				It also allows to retain current state
1061 				if lladdr is unchanged.
1062 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1063 
1064 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1065 				NTF_ROUTER flag.
1066 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1067 				a router.
1068 
1069    Caller MUST hold reference count on the entry.
1070  */
1071 
1072 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1073 		 u32 flags)
1074 {
1075 	u8 old;
1076 	int err;
1077 	int notify = 0;
1078 	struct net_device *dev;
1079 	int update_isrouter = 0;
1080 
1081 	write_lock_bh(&neigh->lock);
1082 
1083 	dev    = neigh->dev;
1084 	old    = neigh->nud_state;
1085 	err    = -EPERM;
1086 
1087 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1088 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1089 		goto out;
1090 
1091 	if (!(new & NUD_VALID)) {
1092 		neigh_del_timer(neigh);
1093 		if (old & NUD_CONNECTED)
1094 			neigh_suspect(neigh);
1095 		neigh->nud_state = new;
1096 		err = 0;
1097 		notify = old & NUD_VALID;
1098 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1099 		    (new & NUD_FAILED)) {
1100 			neigh_invalidate(neigh);
1101 			notify = 1;
1102 		}
1103 		goto out;
1104 	}
1105 
1106 	/* Compare new lladdr with cached one */
1107 	if (!dev->addr_len) {
1108 		/* First case: device needs no address. */
1109 		lladdr = neigh->ha;
1110 	} else if (lladdr) {
1111 		/* The second case: if something is already cached
1112 		   and a new address is proposed:
1113 		   - compare new & old
1114 		   - if they are different, check override flag
1115 		 */
1116 		if ((old & NUD_VALID) &&
1117 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1118 			lladdr = neigh->ha;
1119 	} else {
1120 		/* No address is supplied; if we know something,
1121 		   use it, otherwise discard the request.
1122 		 */
1123 		err = -EINVAL;
1124 		if (!(old & NUD_VALID))
1125 			goto out;
1126 		lladdr = neigh->ha;
1127 	}
1128 
1129 	if (new & NUD_CONNECTED)
1130 		neigh->confirmed = jiffies;
1131 	neigh->updated = jiffies;
1132 
1133 	/* If entry was valid and address is not changed,
1134 	   do not change entry state, if new one is STALE.
1135 	 */
1136 	err = 0;
1137 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1138 	if (old & NUD_VALID) {
1139 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1140 			update_isrouter = 0;
1141 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1142 			    (old & NUD_CONNECTED)) {
1143 				lladdr = neigh->ha;
1144 				new = NUD_STALE;
1145 			} else
1146 				goto out;
1147 		} else {
1148 			if (lladdr == neigh->ha && new == NUD_STALE &&
1149 			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1150 			     (old & NUD_CONNECTED))
1151 			    )
1152 				new = old;
1153 		}
1154 	}
1155 
1156 	if (new != old) {
1157 		neigh_del_timer(neigh);
1158 		if (new & NUD_IN_TIMER)
1159 			neigh_add_timer(neigh, (jiffies +
1160 						((new & NUD_REACHABLE) ?
1161 						 neigh->parms->reachable_time :
1162 						 0)));
1163 		neigh->nud_state = new;
1164 		notify = 1;
1165 	}
1166 
1167 	if (lladdr != neigh->ha) {
1168 		write_seqlock(&neigh->ha_lock);
1169 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1170 		write_sequnlock(&neigh->ha_lock);
1171 		neigh_update_hhs(neigh);
1172 		if (!(new & NUD_CONNECTED))
1173 			neigh->confirmed = jiffies -
1174 				      (neigh->parms->base_reachable_time << 1);
1175 		notify = 1;
1176 	}
1177 	if (new == old)
1178 		goto out;
1179 	if (new & NUD_CONNECTED)
1180 		neigh_connect(neigh);
1181 	else
1182 		neigh_suspect(neigh);
1183 	if (!(old & NUD_VALID)) {
1184 		struct sk_buff *skb;
1185 
1186 		/* Again: avoid dead loop if something went wrong */
1187 
1188 		while (neigh->nud_state & NUD_VALID &&
1189 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1190 			struct dst_entry *dst = skb_dst(skb);
1191 			struct neighbour *n2, *n1 = neigh;
1192 			write_unlock_bh(&neigh->lock);
1193 
1194 			rcu_read_lock();
1195 
1196 			/* Why not just use 'neigh' as-is?  The problem is that
1197 			 * things such as shaper, eql, and sch_teql can end up
1198 			 * using alternative, different, neigh objects to output
1199 			 * the packet in the output path.  So what we need to do
1200 			 * here is re-lookup the top-level neigh in the path so
1201 			 * we can reinject the packet there.
1202 			 */
1203 			n2 = NULL;
1204 			if (dst) {
1205 				n2 = dst_neigh_lookup_skb(dst, skb);
1206 				if (n2)
1207 					n1 = n2;
1208 			}
1209 			n1->output(n1, skb);
1210 			if (n2)
1211 				neigh_release(n2);
1212 			rcu_read_unlock();
1213 
1214 			write_lock_bh(&neigh->lock);
1215 		}
1216 		__skb_queue_purge(&neigh->arp_queue);
1217 		neigh->arp_queue_len_bytes = 0;
1218 	}
1219 out:
1220 	if (update_isrouter) {
1221 		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1222 			(neigh->flags | NTF_ROUTER) :
1223 			(neigh->flags & ~NTF_ROUTER);
1224 	}
1225 	write_unlock_bh(&neigh->lock);
1226 
1227 	if (notify)
1228 		neigh_update_notify(neigh);
1229 
1230 	return err;
1231 }
1232 EXPORT_SYMBOL(neigh_update);
1233 
1234 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1235 				 u8 *lladdr, void *saddr,
1236 				 struct net_device *dev)
1237 {
1238 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1239 						 lladdr || !dev->addr_len);
1240 	if (neigh)
1241 		neigh_update(neigh, lladdr, NUD_STALE,
1242 			     NEIGH_UPDATE_F_OVERRIDE);
1243 	return neigh;
1244 }
1245 EXPORT_SYMBOL(neigh_event_ns);
1246 
1247 /* called with read_lock_bh(&n->lock); */
1248 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1249 {
1250 	struct net_device *dev = dst->dev;
1251 	__be16 prot = dst->ops->protocol;
1252 	struct hh_cache	*hh = &n->hh;
1253 
1254 	write_lock_bh(&n->lock);
1255 
1256 	/* Only one thread can come in here and initialize the
1257 	 * hh_cache entry.
1258 	 */
1259 	if (!hh->hh_len)
1260 		dev->header_ops->cache(n, hh, prot);
1261 
1262 	write_unlock_bh(&n->lock);
1263 }
1264 
1265 /* This function can be used in contexts, where only old dev_queue_xmit
1266  * worked, f.e. if you want to override normal output path (eql, shaper),
1267  * but resolution is not made yet.
1268  */
1269 
1270 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1271 {
1272 	struct net_device *dev = skb->dev;
1273 
1274 	__skb_pull(skb, skb_network_offset(skb));
1275 
1276 	if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1277 			    skb->len) < 0 &&
1278 	    dev->header_ops->rebuild(skb))
1279 		return 0;
1280 
1281 	return dev_queue_xmit(skb);
1282 }
1283 EXPORT_SYMBOL(neigh_compat_output);
1284 
1285 /* Slow and careful. */
1286 
1287 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1288 {
1289 	struct dst_entry *dst = skb_dst(skb);
1290 	int rc = 0;
1291 
1292 	if (!dst)
1293 		goto discard;
1294 
1295 	if (!neigh_event_send(neigh, skb)) {
1296 		int err;
1297 		struct net_device *dev = neigh->dev;
1298 		unsigned int seq;
1299 
1300 		if (dev->header_ops->cache && !neigh->hh.hh_len)
1301 			neigh_hh_init(neigh, dst);
1302 
1303 		do {
1304 			__skb_pull(skb, skb_network_offset(skb));
1305 			seq = read_seqbegin(&neigh->ha_lock);
1306 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1307 					      neigh->ha, NULL, skb->len);
1308 		} while (read_seqretry(&neigh->ha_lock, seq));
1309 
1310 		if (err >= 0)
1311 			rc = dev_queue_xmit(skb);
1312 		else
1313 			goto out_kfree_skb;
1314 	}
1315 out:
1316 	return rc;
1317 discard:
1318 	neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1319 out_kfree_skb:
1320 	rc = -EINVAL;
1321 	kfree_skb(skb);
1322 	goto out;
1323 }
1324 EXPORT_SYMBOL(neigh_resolve_output);
1325 
1326 /* As fast as possible without hh cache */
1327 
1328 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1329 {
1330 	struct net_device *dev = neigh->dev;
1331 	unsigned int seq;
1332 	int err;
1333 
1334 	do {
1335 		__skb_pull(skb, skb_network_offset(skb));
1336 		seq = read_seqbegin(&neigh->ha_lock);
1337 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1338 				      neigh->ha, NULL, skb->len);
1339 	} while (read_seqretry(&neigh->ha_lock, seq));
1340 
1341 	if (err >= 0)
1342 		err = dev_queue_xmit(skb);
1343 	else {
1344 		err = -EINVAL;
1345 		kfree_skb(skb);
1346 	}
1347 	return err;
1348 }
1349 EXPORT_SYMBOL(neigh_connected_output);
1350 
1351 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1352 {
1353 	return dev_queue_xmit(skb);
1354 }
1355 EXPORT_SYMBOL(neigh_direct_output);
1356 
1357 static void neigh_proxy_process(unsigned long arg)
1358 {
1359 	struct neigh_table *tbl = (struct neigh_table *)arg;
1360 	long sched_next = 0;
1361 	unsigned long now = jiffies;
1362 	struct sk_buff *skb, *n;
1363 
1364 	spin_lock(&tbl->proxy_queue.lock);
1365 
1366 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1367 		long tdif = NEIGH_CB(skb)->sched_next - now;
1368 
1369 		if (tdif <= 0) {
1370 			struct net_device *dev = skb->dev;
1371 
1372 			__skb_unlink(skb, &tbl->proxy_queue);
1373 			if (tbl->proxy_redo && netif_running(dev)) {
1374 				rcu_read_lock();
1375 				tbl->proxy_redo(skb);
1376 				rcu_read_unlock();
1377 			} else {
1378 				kfree_skb(skb);
1379 			}
1380 
1381 			dev_put(dev);
1382 		} else if (!sched_next || tdif < sched_next)
1383 			sched_next = tdif;
1384 	}
1385 	del_timer(&tbl->proxy_timer);
1386 	if (sched_next)
1387 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1388 	spin_unlock(&tbl->proxy_queue.lock);
1389 }
1390 
1391 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1392 		    struct sk_buff *skb)
1393 {
1394 	unsigned long now = jiffies;
1395 	unsigned long sched_next = now + (net_random() % p->proxy_delay);
1396 
1397 	if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1398 		kfree_skb(skb);
1399 		return;
1400 	}
1401 
1402 	NEIGH_CB(skb)->sched_next = sched_next;
1403 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1404 
1405 	spin_lock(&tbl->proxy_queue.lock);
1406 	if (del_timer(&tbl->proxy_timer)) {
1407 		if (time_before(tbl->proxy_timer.expires, sched_next))
1408 			sched_next = tbl->proxy_timer.expires;
1409 	}
1410 	skb_dst_drop(skb);
1411 	dev_hold(skb->dev);
1412 	__skb_queue_tail(&tbl->proxy_queue, skb);
1413 	mod_timer(&tbl->proxy_timer, sched_next);
1414 	spin_unlock(&tbl->proxy_queue.lock);
1415 }
1416 EXPORT_SYMBOL(pneigh_enqueue);
1417 
1418 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1419 						      struct net *net, int ifindex)
1420 {
1421 	struct neigh_parms *p;
1422 
1423 	for (p = &tbl->parms; p; p = p->next) {
1424 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1425 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1426 			return p;
1427 	}
1428 
1429 	return NULL;
1430 }
1431 
1432 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1433 				      struct neigh_table *tbl)
1434 {
1435 	struct neigh_parms *p;
1436 	struct net *net = dev_net(dev);
1437 	const struct net_device_ops *ops = dev->netdev_ops;
1438 
1439 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1440 	if (p) {
1441 		p->tbl		  = tbl;
1442 		atomic_set(&p->refcnt, 1);
1443 		p->reachable_time =
1444 				neigh_rand_reach_time(p->base_reachable_time);
1445 		dev_hold(dev);
1446 		p->dev = dev;
1447 		write_pnet(&p->net, hold_net(net));
1448 		p->sysctl_table = NULL;
1449 
1450 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1451 			release_net(net);
1452 			dev_put(dev);
1453 			kfree(p);
1454 			return NULL;
1455 		}
1456 
1457 		write_lock_bh(&tbl->lock);
1458 		p->next		= tbl->parms.next;
1459 		tbl->parms.next = p;
1460 		write_unlock_bh(&tbl->lock);
1461 	}
1462 	return p;
1463 }
1464 EXPORT_SYMBOL(neigh_parms_alloc);
1465 
1466 static void neigh_rcu_free_parms(struct rcu_head *head)
1467 {
1468 	struct neigh_parms *parms =
1469 		container_of(head, struct neigh_parms, rcu_head);
1470 
1471 	neigh_parms_put(parms);
1472 }
1473 
1474 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1475 {
1476 	struct neigh_parms **p;
1477 
1478 	if (!parms || parms == &tbl->parms)
1479 		return;
1480 	write_lock_bh(&tbl->lock);
1481 	for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1482 		if (*p == parms) {
1483 			*p = parms->next;
1484 			parms->dead = 1;
1485 			write_unlock_bh(&tbl->lock);
1486 			if (parms->dev)
1487 				dev_put(parms->dev);
1488 			call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1489 			return;
1490 		}
1491 	}
1492 	write_unlock_bh(&tbl->lock);
1493 	neigh_dbg(1, "%s: not found\n", __func__);
1494 }
1495 EXPORT_SYMBOL(neigh_parms_release);
1496 
1497 static void neigh_parms_destroy(struct neigh_parms *parms)
1498 {
1499 	release_net(neigh_parms_net(parms));
1500 	kfree(parms);
1501 }
1502 
1503 static struct lock_class_key neigh_table_proxy_queue_class;
1504 
1505 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1506 {
1507 	unsigned long now = jiffies;
1508 	unsigned long phsize;
1509 
1510 	write_pnet(&tbl->parms.net, &init_net);
1511 	atomic_set(&tbl->parms.refcnt, 1);
1512 	tbl->parms.reachable_time =
1513 			  neigh_rand_reach_time(tbl->parms.base_reachable_time);
1514 
1515 	tbl->stats = alloc_percpu(struct neigh_statistics);
1516 	if (!tbl->stats)
1517 		panic("cannot create neighbour cache statistics");
1518 
1519 #ifdef CONFIG_PROC_FS
1520 	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1521 			      &neigh_stat_seq_fops, tbl))
1522 		panic("cannot create neighbour proc dir entry");
1523 #endif
1524 
1525 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1526 
1527 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1528 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1529 
1530 	if (!tbl->nht || !tbl->phash_buckets)
1531 		panic("cannot allocate neighbour cache hashes");
1532 
1533 	if (!tbl->entry_size)
1534 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1535 					tbl->key_len, NEIGH_PRIV_ALIGN);
1536 	else
1537 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1538 
1539 	rwlock_init(&tbl->lock);
1540 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1541 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1542 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1543 	skb_queue_head_init_class(&tbl->proxy_queue,
1544 			&neigh_table_proxy_queue_class);
1545 
1546 	tbl->last_flush = now;
1547 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1548 }
1549 
1550 void neigh_table_init(struct neigh_table *tbl)
1551 {
1552 	struct neigh_table *tmp;
1553 
1554 	neigh_table_init_no_netlink(tbl);
1555 	write_lock(&neigh_tbl_lock);
1556 	for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1557 		if (tmp->family == tbl->family)
1558 			break;
1559 	}
1560 	tbl->next	= neigh_tables;
1561 	neigh_tables	= tbl;
1562 	write_unlock(&neigh_tbl_lock);
1563 
1564 	if (unlikely(tmp)) {
1565 		pr_err("Registering multiple tables for family %d\n",
1566 		       tbl->family);
1567 		dump_stack();
1568 	}
1569 }
1570 EXPORT_SYMBOL(neigh_table_init);
1571 
1572 int neigh_table_clear(struct neigh_table *tbl)
1573 {
1574 	struct neigh_table **tp;
1575 
1576 	/* It is not clean... Fix it to unload IPv6 module safely */
1577 	cancel_delayed_work_sync(&tbl->gc_work);
1578 	del_timer_sync(&tbl->proxy_timer);
1579 	pneigh_queue_purge(&tbl->proxy_queue);
1580 	neigh_ifdown(tbl, NULL);
1581 	if (atomic_read(&tbl->entries))
1582 		pr_crit("neighbour leakage\n");
1583 	write_lock(&neigh_tbl_lock);
1584 	for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1585 		if (*tp == tbl) {
1586 			*tp = tbl->next;
1587 			break;
1588 		}
1589 	}
1590 	write_unlock(&neigh_tbl_lock);
1591 
1592 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1593 		 neigh_hash_free_rcu);
1594 	tbl->nht = NULL;
1595 
1596 	kfree(tbl->phash_buckets);
1597 	tbl->phash_buckets = NULL;
1598 
1599 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1600 
1601 	free_percpu(tbl->stats);
1602 	tbl->stats = NULL;
1603 
1604 	return 0;
1605 }
1606 EXPORT_SYMBOL(neigh_table_clear);
1607 
1608 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1609 {
1610 	struct net *net = sock_net(skb->sk);
1611 	struct ndmsg *ndm;
1612 	struct nlattr *dst_attr;
1613 	struct neigh_table *tbl;
1614 	struct net_device *dev = NULL;
1615 	int err = -EINVAL;
1616 
1617 	ASSERT_RTNL();
1618 	if (nlmsg_len(nlh) < sizeof(*ndm))
1619 		goto out;
1620 
1621 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1622 	if (dst_attr == NULL)
1623 		goto out;
1624 
1625 	ndm = nlmsg_data(nlh);
1626 	if (ndm->ndm_ifindex) {
1627 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1628 		if (dev == NULL) {
1629 			err = -ENODEV;
1630 			goto out;
1631 		}
1632 	}
1633 
1634 	read_lock(&neigh_tbl_lock);
1635 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1636 		struct neighbour *neigh;
1637 
1638 		if (tbl->family != ndm->ndm_family)
1639 			continue;
1640 		read_unlock(&neigh_tbl_lock);
1641 
1642 		if (nla_len(dst_attr) < tbl->key_len)
1643 			goto out;
1644 
1645 		if (ndm->ndm_flags & NTF_PROXY) {
1646 			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1647 			goto out;
1648 		}
1649 
1650 		if (dev == NULL)
1651 			goto out;
1652 
1653 		neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1654 		if (neigh == NULL) {
1655 			err = -ENOENT;
1656 			goto out;
1657 		}
1658 
1659 		err = neigh_update(neigh, NULL, NUD_FAILED,
1660 				   NEIGH_UPDATE_F_OVERRIDE |
1661 				   NEIGH_UPDATE_F_ADMIN);
1662 		neigh_release(neigh);
1663 		goto out;
1664 	}
1665 	read_unlock(&neigh_tbl_lock);
1666 	err = -EAFNOSUPPORT;
1667 
1668 out:
1669 	return err;
1670 }
1671 
1672 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1673 {
1674 	struct net *net = sock_net(skb->sk);
1675 	struct ndmsg *ndm;
1676 	struct nlattr *tb[NDA_MAX+1];
1677 	struct neigh_table *tbl;
1678 	struct net_device *dev = NULL;
1679 	int err;
1680 
1681 	ASSERT_RTNL();
1682 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1683 	if (err < 0)
1684 		goto out;
1685 
1686 	err = -EINVAL;
1687 	if (tb[NDA_DST] == NULL)
1688 		goto out;
1689 
1690 	ndm = nlmsg_data(nlh);
1691 	if (ndm->ndm_ifindex) {
1692 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1693 		if (dev == NULL) {
1694 			err = -ENODEV;
1695 			goto out;
1696 		}
1697 
1698 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1699 			goto out;
1700 	}
1701 
1702 	read_lock(&neigh_tbl_lock);
1703 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1704 		int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1705 		struct neighbour *neigh;
1706 		void *dst, *lladdr;
1707 
1708 		if (tbl->family != ndm->ndm_family)
1709 			continue;
1710 		read_unlock(&neigh_tbl_lock);
1711 
1712 		if (nla_len(tb[NDA_DST]) < tbl->key_len)
1713 			goto out;
1714 		dst = nla_data(tb[NDA_DST]);
1715 		lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1716 
1717 		if (ndm->ndm_flags & NTF_PROXY) {
1718 			struct pneigh_entry *pn;
1719 
1720 			err = -ENOBUFS;
1721 			pn = pneigh_lookup(tbl, net, dst, dev, 1);
1722 			if (pn) {
1723 				pn->flags = ndm->ndm_flags;
1724 				err = 0;
1725 			}
1726 			goto out;
1727 		}
1728 
1729 		if (dev == NULL)
1730 			goto out;
1731 
1732 		neigh = neigh_lookup(tbl, dst, dev);
1733 		if (neigh == NULL) {
1734 			if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1735 				err = -ENOENT;
1736 				goto out;
1737 			}
1738 
1739 			neigh = __neigh_lookup_errno(tbl, dst, dev);
1740 			if (IS_ERR(neigh)) {
1741 				err = PTR_ERR(neigh);
1742 				goto out;
1743 			}
1744 		} else {
1745 			if (nlh->nlmsg_flags & NLM_F_EXCL) {
1746 				err = -EEXIST;
1747 				neigh_release(neigh);
1748 				goto out;
1749 			}
1750 
1751 			if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1752 				flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1753 		}
1754 
1755 		if (ndm->ndm_flags & NTF_USE) {
1756 			neigh_event_send(neigh, NULL);
1757 			err = 0;
1758 		} else
1759 			err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1760 		neigh_release(neigh);
1761 		goto out;
1762 	}
1763 
1764 	read_unlock(&neigh_tbl_lock);
1765 	err = -EAFNOSUPPORT;
1766 out:
1767 	return err;
1768 }
1769 
1770 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1771 {
1772 	struct nlattr *nest;
1773 
1774 	nest = nla_nest_start(skb, NDTA_PARMS);
1775 	if (nest == NULL)
1776 		return -ENOBUFS;
1777 
1778 	if ((parms->dev &&
1779 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1780 	    nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1781 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1782 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
1783 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
1784 			parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1785 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1786 	    nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1787 	    nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1788 	    nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1789 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1790 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1791 			  parms->base_reachable_time) ||
1792 	    nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1793 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1794 			  parms->delay_probe_time) ||
1795 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1796 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1797 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1798 	    nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1799 		goto nla_put_failure;
1800 	return nla_nest_end(skb, nest);
1801 
1802 nla_put_failure:
1803 	nla_nest_cancel(skb, nest);
1804 	return -EMSGSIZE;
1805 }
1806 
1807 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1808 			      u32 pid, u32 seq, int type, int flags)
1809 {
1810 	struct nlmsghdr *nlh;
1811 	struct ndtmsg *ndtmsg;
1812 
1813 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1814 	if (nlh == NULL)
1815 		return -EMSGSIZE;
1816 
1817 	ndtmsg = nlmsg_data(nlh);
1818 
1819 	read_lock_bh(&tbl->lock);
1820 	ndtmsg->ndtm_family = tbl->family;
1821 	ndtmsg->ndtm_pad1   = 0;
1822 	ndtmsg->ndtm_pad2   = 0;
1823 
1824 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1825 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1826 	    nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1827 	    nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1828 	    nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1829 		goto nla_put_failure;
1830 	{
1831 		unsigned long now = jiffies;
1832 		unsigned int flush_delta = now - tbl->last_flush;
1833 		unsigned int rand_delta = now - tbl->last_rand;
1834 		struct neigh_hash_table *nht;
1835 		struct ndt_config ndc = {
1836 			.ndtc_key_len		= tbl->key_len,
1837 			.ndtc_entry_size	= tbl->entry_size,
1838 			.ndtc_entries		= atomic_read(&tbl->entries),
1839 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
1840 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
1841 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
1842 		};
1843 
1844 		rcu_read_lock_bh();
1845 		nht = rcu_dereference_bh(tbl->nht);
1846 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1847 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1848 		rcu_read_unlock_bh();
1849 
1850 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1851 			goto nla_put_failure;
1852 	}
1853 
1854 	{
1855 		int cpu;
1856 		struct ndt_stats ndst;
1857 
1858 		memset(&ndst, 0, sizeof(ndst));
1859 
1860 		for_each_possible_cpu(cpu) {
1861 			struct neigh_statistics	*st;
1862 
1863 			st = per_cpu_ptr(tbl->stats, cpu);
1864 			ndst.ndts_allocs		+= st->allocs;
1865 			ndst.ndts_destroys		+= st->destroys;
1866 			ndst.ndts_hash_grows		+= st->hash_grows;
1867 			ndst.ndts_res_failed		+= st->res_failed;
1868 			ndst.ndts_lookups		+= st->lookups;
1869 			ndst.ndts_hits			+= st->hits;
1870 			ndst.ndts_rcv_probes_mcast	+= st->rcv_probes_mcast;
1871 			ndst.ndts_rcv_probes_ucast	+= st->rcv_probes_ucast;
1872 			ndst.ndts_periodic_gc_runs	+= st->periodic_gc_runs;
1873 			ndst.ndts_forced_gc_runs	+= st->forced_gc_runs;
1874 		}
1875 
1876 		if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1877 			goto nla_put_failure;
1878 	}
1879 
1880 	BUG_ON(tbl->parms.dev);
1881 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1882 		goto nla_put_failure;
1883 
1884 	read_unlock_bh(&tbl->lock);
1885 	return nlmsg_end(skb, nlh);
1886 
1887 nla_put_failure:
1888 	read_unlock_bh(&tbl->lock);
1889 	nlmsg_cancel(skb, nlh);
1890 	return -EMSGSIZE;
1891 }
1892 
1893 static int neightbl_fill_param_info(struct sk_buff *skb,
1894 				    struct neigh_table *tbl,
1895 				    struct neigh_parms *parms,
1896 				    u32 pid, u32 seq, int type,
1897 				    unsigned int flags)
1898 {
1899 	struct ndtmsg *ndtmsg;
1900 	struct nlmsghdr *nlh;
1901 
1902 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1903 	if (nlh == NULL)
1904 		return -EMSGSIZE;
1905 
1906 	ndtmsg = nlmsg_data(nlh);
1907 
1908 	read_lock_bh(&tbl->lock);
1909 	ndtmsg->ndtm_family = tbl->family;
1910 	ndtmsg->ndtm_pad1   = 0;
1911 	ndtmsg->ndtm_pad2   = 0;
1912 
1913 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1914 	    neightbl_fill_parms(skb, parms) < 0)
1915 		goto errout;
1916 
1917 	read_unlock_bh(&tbl->lock);
1918 	return nlmsg_end(skb, nlh);
1919 errout:
1920 	read_unlock_bh(&tbl->lock);
1921 	nlmsg_cancel(skb, nlh);
1922 	return -EMSGSIZE;
1923 }
1924 
1925 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1926 	[NDTA_NAME]		= { .type = NLA_STRING },
1927 	[NDTA_THRESH1]		= { .type = NLA_U32 },
1928 	[NDTA_THRESH2]		= { .type = NLA_U32 },
1929 	[NDTA_THRESH3]		= { .type = NLA_U32 },
1930 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
1931 	[NDTA_PARMS]		= { .type = NLA_NESTED },
1932 };
1933 
1934 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1935 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
1936 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
1937 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
1938 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
1939 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
1940 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
1941 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
1942 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
1943 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
1944 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
1945 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
1946 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
1947 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
1948 };
1949 
1950 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1951 {
1952 	struct net *net = sock_net(skb->sk);
1953 	struct neigh_table *tbl;
1954 	struct ndtmsg *ndtmsg;
1955 	struct nlattr *tb[NDTA_MAX+1];
1956 	int err;
1957 
1958 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1959 			  nl_neightbl_policy);
1960 	if (err < 0)
1961 		goto errout;
1962 
1963 	if (tb[NDTA_NAME] == NULL) {
1964 		err = -EINVAL;
1965 		goto errout;
1966 	}
1967 
1968 	ndtmsg = nlmsg_data(nlh);
1969 	read_lock(&neigh_tbl_lock);
1970 	for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1971 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1972 			continue;
1973 
1974 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1975 			break;
1976 	}
1977 
1978 	if (tbl == NULL) {
1979 		err = -ENOENT;
1980 		goto errout_locked;
1981 	}
1982 
1983 	/*
1984 	 * We acquire tbl->lock to be nice to the periodic timers and
1985 	 * make sure they always see a consistent set of values.
1986 	 */
1987 	write_lock_bh(&tbl->lock);
1988 
1989 	if (tb[NDTA_PARMS]) {
1990 		struct nlattr *tbp[NDTPA_MAX+1];
1991 		struct neigh_parms *p;
1992 		int i, ifindex = 0;
1993 
1994 		err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1995 				       nl_ntbl_parm_policy);
1996 		if (err < 0)
1997 			goto errout_tbl_lock;
1998 
1999 		if (tbp[NDTPA_IFINDEX])
2000 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2001 
2002 		p = lookup_neigh_parms(tbl, net, ifindex);
2003 		if (p == NULL) {
2004 			err = -ENOENT;
2005 			goto errout_tbl_lock;
2006 		}
2007 
2008 		for (i = 1; i <= NDTPA_MAX; i++) {
2009 			if (tbp[i] == NULL)
2010 				continue;
2011 
2012 			switch (i) {
2013 			case NDTPA_QUEUE_LEN:
2014 				p->queue_len_bytes = nla_get_u32(tbp[i]) *
2015 						     SKB_TRUESIZE(ETH_FRAME_LEN);
2016 				break;
2017 			case NDTPA_QUEUE_LENBYTES:
2018 				p->queue_len_bytes = nla_get_u32(tbp[i]);
2019 				break;
2020 			case NDTPA_PROXY_QLEN:
2021 				p->proxy_qlen = nla_get_u32(tbp[i]);
2022 				break;
2023 			case NDTPA_APP_PROBES:
2024 				p->app_probes = nla_get_u32(tbp[i]);
2025 				break;
2026 			case NDTPA_UCAST_PROBES:
2027 				p->ucast_probes = nla_get_u32(tbp[i]);
2028 				break;
2029 			case NDTPA_MCAST_PROBES:
2030 				p->mcast_probes = nla_get_u32(tbp[i]);
2031 				break;
2032 			case NDTPA_BASE_REACHABLE_TIME:
2033 				p->base_reachable_time = nla_get_msecs(tbp[i]);
2034 				break;
2035 			case NDTPA_GC_STALETIME:
2036 				p->gc_staletime = nla_get_msecs(tbp[i]);
2037 				break;
2038 			case NDTPA_DELAY_PROBE_TIME:
2039 				p->delay_probe_time = nla_get_msecs(tbp[i]);
2040 				break;
2041 			case NDTPA_RETRANS_TIME:
2042 				p->retrans_time = nla_get_msecs(tbp[i]);
2043 				break;
2044 			case NDTPA_ANYCAST_DELAY:
2045 				p->anycast_delay = nla_get_msecs(tbp[i]);
2046 				break;
2047 			case NDTPA_PROXY_DELAY:
2048 				p->proxy_delay = nla_get_msecs(tbp[i]);
2049 				break;
2050 			case NDTPA_LOCKTIME:
2051 				p->locktime = nla_get_msecs(tbp[i]);
2052 				break;
2053 			}
2054 		}
2055 	}
2056 
2057 	err = -ENOENT;
2058 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2059 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2060 	    !net_eq(net, &init_net))
2061 		goto errout_tbl_lock;
2062 
2063 	if (tb[NDTA_THRESH1])
2064 		tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2065 
2066 	if (tb[NDTA_THRESH2])
2067 		tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2068 
2069 	if (tb[NDTA_THRESH3])
2070 		tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2071 
2072 	if (tb[NDTA_GC_INTERVAL])
2073 		tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2074 
2075 	err = 0;
2076 
2077 errout_tbl_lock:
2078 	write_unlock_bh(&tbl->lock);
2079 errout_locked:
2080 	read_unlock(&neigh_tbl_lock);
2081 errout:
2082 	return err;
2083 }
2084 
2085 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2086 {
2087 	struct net *net = sock_net(skb->sk);
2088 	int family, tidx, nidx = 0;
2089 	int tbl_skip = cb->args[0];
2090 	int neigh_skip = cb->args[1];
2091 	struct neigh_table *tbl;
2092 
2093 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2094 
2095 	read_lock(&neigh_tbl_lock);
2096 	for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2097 		struct neigh_parms *p;
2098 
2099 		if (tidx < tbl_skip || (family && tbl->family != family))
2100 			continue;
2101 
2102 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2103 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2104 				       NLM_F_MULTI) <= 0)
2105 			break;
2106 
2107 		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2108 			if (!net_eq(neigh_parms_net(p), net))
2109 				continue;
2110 
2111 			if (nidx < neigh_skip)
2112 				goto next;
2113 
2114 			if (neightbl_fill_param_info(skb, tbl, p,
2115 						     NETLINK_CB(cb->skb).portid,
2116 						     cb->nlh->nlmsg_seq,
2117 						     RTM_NEWNEIGHTBL,
2118 						     NLM_F_MULTI) <= 0)
2119 				goto out;
2120 		next:
2121 			nidx++;
2122 		}
2123 
2124 		neigh_skip = 0;
2125 	}
2126 out:
2127 	read_unlock(&neigh_tbl_lock);
2128 	cb->args[0] = tidx;
2129 	cb->args[1] = nidx;
2130 
2131 	return skb->len;
2132 }
2133 
2134 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2135 			   u32 pid, u32 seq, int type, unsigned int flags)
2136 {
2137 	unsigned long now = jiffies;
2138 	struct nda_cacheinfo ci;
2139 	struct nlmsghdr *nlh;
2140 	struct ndmsg *ndm;
2141 
2142 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2143 	if (nlh == NULL)
2144 		return -EMSGSIZE;
2145 
2146 	ndm = nlmsg_data(nlh);
2147 	ndm->ndm_family	 = neigh->ops->family;
2148 	ndm->ndm_pad1    = 0;
2149 	ndm->ndm_pad2    = 0;
2150 	ndm->ndm_flags	 = neigh->flags;
2151 	ndm->ndm_type	 = neigh->type;
2152 	ndm->ndm_ifindex = neigh->dev->ifindex;
2153 
2154 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2155 		goto nla_put_failure;
2156 
2157 	read_lock_bh(&neigh->lock);
2158 	ndm->ndm_state	 = neigh->nud_state;
2159 	if (neigh->nud_state & NUD_VALID) {
2160 		char haddr[MAX_ADDR_LEN];
2161 
2162 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2163 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2164 			read_unlock_bh(&neigh->lock);
2165 			goto nla_put_failure;
2166 		}
2167 	}
2168 
2169 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2170 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2171 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2172 	ci.ndm_refcnt	 = atomic_read(&neigh->refcnt) - 1;
2173 	read_unlock_bh(&neigh->lock);
2174 
2175 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2176 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2177 		goto nla_put_failure;
2178 
2179 	return nlmsg_end(skb, nlh);
2180 
2181 nla_put_failure:
2182 	nlmsg_cancel(skb, nlh);
2183 	return -EMSGSIZE;
2184 }
2185 
2186 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2187 			    u32 pid, u32 seq, int type, unsigned int flags,
2188 			    struct neigh_table *tbl)
2189 {
2190 	struct nlmsghdr *nlh;
2191 	struct ndmsg *ndm;
2192 
2193 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2194 	if (nlh == NULL)
2195 		return -EMSGSIZE;
2196 
2197 	ndm = nlmsg_data(nlh);
2198 	ndm->ndm_family	 = tbl->family;
2199 	ndm->ndm_pad1    = 0;
2200 	ndm->ndm_pad2    = 0;
2201 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
2202 	ndm->ndm_type	 = NDA_DST;
2203 	ndm->ndm_ifindex = pn->dev->ifindex;
2204 	ndm->ndm_state	 = NUD_NONE;
2205 
2206 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2207 		goto nla_put_failure;
2208 
2209 	return nlmsg_end(skb, nlh);
2210 
2211 nla_put_failure:
2212 	nlmsg_cancel(skb, nlh);
2213 	return -EMSGSIZE;
2214 }
2215 
2216 static void neigh_update_notify(struct neighbour *neigh)
2217 {
2218 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2219 	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
2220 }
2221 
2222 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2223 			    struct netlink_callback *cb)
2224 {
2225 	struct net *net = sock_net(skb->sk);
2226 	struct neighbour *n;
2227 	int rc, h, s_h = cb->args[1];
2228 	int idx, s_idx = idx = cb->args[2];
2229 	struct neigh_hash_table *nht;
2230 
2231 	rcu_read_lock_bh();
2232 	nht = rcu_dereference_bh(tbl->nht);
2233 
2234 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2235 		if (h > s_h)
2236 			s_idx = 0;
2237 		for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2238 		     n != NULL;
2239 		     n = rcu_dereference_bh(n->next)) {
2240 			if (!net_eq(dev_net(n->dev), net))
2241 				continue;
2242 			if (idx < s_idx)
2243 				goto next;
2244 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2245 					    cb->nlh->nlmsg_seq,
2246 					    RTM_NEWNEIGH,
2247 					    NLM_F_MULTI) <= 0) {
2248 				rc = -1;
2249 				goto out;
2250 			}
2251 next:
2252 			idx++;
2253 		}
2254 	}
2255 	rc = skb->len;
2256 out:
2257 	rcu_read_unlock_bh();
2258 	cb->args[1] = h;
2259 	cb->args[2] = idx;
2260 	return rc;
2261 }
2262 
2263 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2264 			     struct netlink_callback *cb)
2265 {
2266 	struct pneigh_entry *n;
2267 	struct net *net = sock_net(skb->sk);
2268 	int rc, h, s_h = cb->args[3];
2269 	int idx, s_idx = idx = cb->args[4];
2270 
2271 	read_lock_bh(&tbl->lock);
2272 
2273 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2274 		if (h > s_h)
2275 			s_idx = 0;
2276 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2277 			if (dev_net(n->dev) != net)
2278 				continue;
2279 			if (idx < s_idx)
2280 				goto next;
2281 			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2282 					    cb->nlh->nlmsg_seq,
2283 					    RTM_NEWNEIGH,
2284 					    NLM_F_MULTI, tbl) <= 0) {
2285 				read_unlock_bh(&tbl->lock);
2286 				rc = -1;
2287 				goto out;
2288 			}
2289 		next:
2290 			idx++;
2291 		}
2292 	}
2293 
2294 	read_unlock_bh(&tbl->lock);
2295 	rc = skb->len;
2296 out:
2297 	cb->args[3] = h;
2298 	cb->args[4] = idx;
2299 	return rc;
2300 
2301 }
2302 
2303 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2304 {
2305 	struct neigh_table *tbl;
2306 	int t, family, s_t;
2307 	int proxy = 0;
2308 	int err;
2309 
2310 	read_lock(&neigh_tbl_lock);
2311 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2312 
2313 	/* check for full ndmsg structure presence, family member is
2314 	 * the same for both structures
2315 	 */
2316 	if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2317 	    ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2318 		proxy = 1;
2319 
2320 	s_t = cb->args[0];
2321 
2322 	for (tbl = neigh_tables, t = 0; tbl;
2323 	     tbl = tbl->next, t++) {
2324 		if (t < s_t || (family && tbl->family != family))
2325 			continue;
2326 		if (t > s_t)
2327 			memset(&cb->args[1], 0, sizeof(cb->args) -
2328 						sizeof(cb->args[0]));
2329 		if (proxy)
2330 			err = pneigh_dump_table(tbl, skb, cb);
2331 		else
2332 			err = neigh_dump_table(tbl, skb, cb);
2333 		if (err < 0)
2334 			break;
2335 	}
2336 	read_unlock(&neigh_tbl_lock);
2337 
2338 	cb->args[0] = t;
2339 	return skb->len;
2340 }
2341 
2342 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2343 {
2344 	int chain;
2345 	struct neigh_hash_table *nht;
2346 
2347 	rcu_read_lock_bh();
2348 	nht = rcu_dereference_bh(tbl->nht);
2349 
2350 	read_lock(&tbl->lock); /* avoid resizes */
2351 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2352 		struct neighbour *n;
2353 
2354 		for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2355 		     n != NULL;
2356 		     n = rcu_dereference_bh(n->next))
2357 			cb(n, cookie);
2358 	}
2359 	read_unlock(&tbl->lock);
2360 	rcu_read_unlock_bh();
2361 }
2362 EXPORT_SYMBOL(neigh_for_each);
2363 
2364 /* The tbl->lock must be held as a writer and BH disabled. */
2365 void __neigh_for_each_release(struct neigh_table *tbl,
2366 			      int (*cb)(struct neighbour *))
2367 {
2368 	int chain;
2369 	struct neigh_hash_table *nht;
2370 
2371 	nht = rcu_dereference_protected(tbl->nht,
2372 					lockdep_is_held(&tbl->lock));
2373 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2374 		struct neighbour *n;
2375 		struct neighbour __rcu **np;
2376 
2377 		np = &nht->hash_buckets[chain];
2378 		while ((n = rcu_dereference_protected(*np,
2379 					lockdep_is_held(&tbl->lock))) != NULL) {
2380 			int release;
2381 
2382 			write_lock(&n->lock);
2383 			release = cb(n);
2384 			if (release) {
2385 				rcu_assign_pointer(*np,
2386 					rcu_dereference_protected(n->next,
2387 						lockdep_is_held(&tbl->lock)));
2388 				n->dead = 1;
2389 			} else
2390 				np = &n->next;
2391 			write_unlock(&n->lock);
2392 			if (release)
2393 				neigh_cleanup_and_release(n);
2394 		}
2395 	}
2396 }
2397 EXPORT_SYMBOL(__neigh_for_each_release);
2398 
2399 #ifdef CONFIG_PROC_FS
2400 
2401 static struct neighbour *neigh_get_first(struct seq_file *seq)
2402 {
2403 	struct neigh_seq_state *state = seq->private;
2404 	struct net *net = seq_file_net(seq);
2405 	struct neigh_hash_table *nht = state->nht;
2406 	struct neighbour *n = NULL;
2407 	int bucket = state->bucket;
2408 
2409 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2410 	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2411 		n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2412 
2413 		while (n) {
2414 			if (!net_eq(dev_net(n->dev), net))
2415 				goto next;
2416 			if (state->neigh_sub_iter) {
2417 				loff_t fakep = 0;
2418 				void *v;
2419 
2420 				v = state->neigh_sub_iter(state, n, &fakep);
2421 				if (!v)
2422 					goto next;
2423 			}
2424 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2425 				break;
2426 			if (n->nud_state & ~NUD_NOARP)
2427 				break;
2428 next:
2429 			n = rcu_dereference_bh(n->next);
2430 		}
2431 
2432 		if (n)
2433 			break;
2434 	}
2435 	state->bucket = bucket;
2436 
2437 	return n;
2438 }
2439 
2440 static struct neighbour *neigh_get_next(struct seq_file *seq,
2441 					struct neighbour *n,
2442 					loff_t *pos)
2443 {
2444 	struct neigh_seq_state *state = seq->private;
2445 	struct net *net = seq_file_net(seq);
2446 	struct neigh_hash_table *nht = state->nht;
2447 
2448 	if (state->neigh_sub_iter) {
2449 		void *v = state->neigh_sub_iter(state, n, pos);
2450 		if (v)
2451 			return n;
2452 	}
2453 	n = rcu_dereference_bh(n->next);
2454 
2455 	while (1) {
2456 		while (n) {
2457 			if (!net_eq(dev_net(n->dev), net))
2458 				goto next;
2459 			if (state->neigh_sub_iter) {
2460 				void *v = state->neigh_sub_iter(state, n, pos);
2461 				if (v)
2462 					return n;
2463 				goto next;
2464 			}
2465 			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2466 				break;
2467 
2468 			if (n->nud_state & ~NUD_NOARP)
2469 				break;
2470 next:
2471 			n = rcu_dereference_bh(n->next);
2472 		}
2473 
2474 		if (n)
2475 			break;
2476 
2477 		if (++state->bucket >= (1 << nht->hash_shift))
2478 			break;
2479 
2480 		n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2481 	}
2482 
2483 	if (n && pos)
2484 		--(*pos);
2485 	return n;
2486 }
2487 
2488 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2489 {
2490 	struct neighbour *n = neigh_get_first(seq);
2491 
2492 	if (n) {
2493 		--(*pos);
2494 		while (*pos) {
2495 			n = neigh_get_next(seq, n, pos);
2496 			if (!n)
2497 				break;
2498 		}
2499 	}
2500 	return *pos ? NULL : n;
2501 }
2502 
2503 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2504 {
2505 	struct neigh_seq_state *state = seq->private;
2506 	struct net *net = seq_file_net(seq);
2507 	struct neigh_table *tbl = state->tbl;
2508 	struct pneigh_entry *pn = NULL;
2509 	int bucket = state->bucket;
2510 
2511 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
2512 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2513 		pn = tbl->phash_buckets[bucket];
2514 		while (pn && !net_eq(pneigh_net(pn), net))
2515 			pn = pn->next;
2516 		if (pn)
2517 			break;
2518 	}
2519 	state->bucket = bucket;
2520 
2521 	return pn;
2522 }
2523 
2524 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2525 					    struct pneigh_entry *pn,
2526 					    loff_t *pos)
2527 {
2528 	struct neigh_seq_state *state = seq->private;
2529 	struct net *net = seq_file_net(seq);
2530 	struct neigh_table *tbl = state->tbl;
2531 
2532 	do {
2533 		pn = pn->next;
2534 	} while (pn && !net_eq(pneigh_net(pn), net));
2535 
2536 	while (!pn) {
2537 		if (++state->bucket > PNEIGH_HASHMASK)
2538 			break;
2539 		pn = tbl->phash_buckets[state->bucket];
2540 		while (pn && !net_eq(pneigh_net(pn), net))
2541 			pn = pn->next;
2542 		if (pn)
2543 			break;
2544 	}
2545 
2546 	if (pn && pos)
2547 		--(*pos);
2548 
2549 	return pn;
2550 }
2551 
2552 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2553 {
2554 	struct pneigh_entry *pn = pneigh_get_first(seq);
2555 
2556 	if (pn) {
2557 		--(*pos);
2558 		while (*pos) {
2559 			pn = pneigh_get_next(seq, pn, pos);
2560 			if (!pn)
2561 				break;
2562 		}
2563 	}
2564 	return *pos ? NULL : pn;
2565 }
2566 
2567 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2568 {
2569 	struct neigh_seq_state *state = seq->private;
2570 	void *rc;
2571 	loff_t idxpos = *pos;
2572 
2573 	rc = neigh_get_idx(seq, &idxpos);
2574 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2575 		rc = pneigh_get_idx(seq, &idxpos);
2576 
2577 	return rc;
2578 }
2579 
2580 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2581 	__acquires(rcu_bh)
2582 {
2583 	struct neigh_seq_state *state = seq->private;
2584 
2585 	state->tbl = tbl;
2586 	state->bucket = 0;
2587 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2588 
2589 	rcu_read_lock_bh();
2590 	state->nht = rcu_dereference_bh(tbl->nht);
2591 
2592 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2593 }
2594 EXPORT_SYMBOL(neigh_seq_start);
2595 
2596 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2597 {
2598 	struct neigh_seq_state *state;
2599 	void *rc;
2600 
2601 	if (v == SEQ_START_TOKEN) {
2602 		rc = neigh_get_first(seq);
2603 		goto out;
2604 	}
2605 
2606 	state = seq->private;
2607 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2608 		rc = neigh_get_next(seq, v, NULL);
2609 		if (rc)
2610 			goto out;
2611 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2612 			rc = pneigh_get_first(seq);
2613 	} else {
2614 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2615 		rc = pneigh_get_next(seq, v, NULL);
2616 	}
2617 out:
2618 	++(*pos);
2619 	return rc;
2620 }
2621 EXPORT_SYMBOL(neigh_seq_next);
2622 
2623 void neigh_seq_stop(struct seq_file *seq, void *v)
2624 	__releases(rcu_bh)
2625 {
2626 	rcu_read_unlock_bh();
2627 }
2628 EXPORT_SYMBOL(neigh_seq_stop);
2629 
2630 /* statistics via seq_file */
2631 
2632 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2633 {
2634 	struct neigh_table *tbl = seq->private;
2635 	int cpu;
2636 
2637 	if (*pos == 0)
2638 		return SEQ_START_TOKEN;
2639 
2640 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2641 		if (!cpu_possible(cpu))
2642 			continue;
2643 		*pos = cpu+1;
2644 		return per_cpu_ptr(tbl->stats, cpu);
2645 	}
2646 	return NULL;
2647 }
2648 
2649 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2650 {
2651 	struct neigh_table *tbl = seq->private;
2652 	int cpu;
2653 
2654 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2655 		if (!cpu_possible(cpu))
2656 			continue;
2657 		*pos = cpu+1;
2658 		return per_cpu_ptr(tbl->stats, cpu);
2659 	}
2660 	return NULL;
2661 }
2662 
2663 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2664 {
2665 
2666 }
2667 
2668 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2669 {
2670 	struct neigh_table *tbl = seq->private;
2671 	struct neigh_statistics *st = v;
2672 
2673 	if (v == SEQ_START_TOKEN) {
2674 		seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2675 		return 0;
2676 	}
2677 
2678 	seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2679 			"%08lx %08lx  %08lx %08lx %08lx\n",
2680 		   atomic_read(&tbl->entries),
2681 
2682 		   st->allocs,
2683 		   st->destroys,
2684 		   st->hash_grows,
2685 
2686 		   st->lookups,
2687 		   st->hits,
2688 
2689 		   st->res_failed,
2690 
2691 		   st->rcv_probes_mcast,
2692 		   st->rcv_probes_ucast,
2693 
2694 		   st->periodic_gc_runs,
2695 		   st->forced_gc_runs,
2696 		   st->unres_discards
2697 		   );
2698 
2699 	return 0;
2700 }
2701 
2702 static const struct seq_operations neigh_stat_seq_ops = {
2703 	.start	= neigh_stat_seq_start,
2704 	.next	= neigh_stat_seq_next,
2705 	.stop	= neigh_stat_seq_stop,
2706 	.show	= neigh_stat_seq_show,
2707 };
2708 
2709 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2710 {
2711 	int ret = seq_open(file, &neigh_stat_seq_ops);
2712 
2713 	if (!ret) {
2714 		struct seq_file *sf = file->private_data;
2715 		sf->private = PDE_DATA(inode);
2716 	}
2717 	return ret;
2718 };
2719 
2720 static const struct file_operations neigh_stat_seq_fops = {
2721 	.owner	 = THIS_MODULE,
2722 	.open 	 = neigh_stat_seq_open,
2723 	.read	 = seq_read,
2724 	.llseek	 = seq_lseek,
2725 	.release = seq_release,
2726 };
2727 
2728 #endif /* CONFIG_PROC_FS */
2729 
2730 static inline size_t neigh_nlmsg_size(void)
2731 {
2732 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2733 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2734 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2735 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2736 	       + nla_total_size(4); /* NDA_PROBES */
2737 }
2738 
2739 static void __neigh_notify(struct neighbour *n, int type, int flags)
2740 {
2741 	struct net *net = dev_net(n->dev);
2742 	struct sk_buff *skb;
2743 	int err = -ENOBUFS;
2744 
2745 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2746 	if (skb == NULL)
2747 		goto errout;
2748 
2749 	err = neigh_fill_info(skb, n, 0, 0, type, flags);
2750 	if (err < 0) {
2751 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2752 		WARN_ON(err == -EMSGSIZE);
2753 		kfree_skb(skb);
2754 		goto errout;
2755 	}
2756 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2757 	return;
2758 errout:
2759 	if (err < 0)
2760 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2761 }
2762 
2763 void neigh_app_ns(struct neighbour *n)
2764 {
2765 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2766 }
2767 EXPORT_SYMBOL(neigh_app_ns);
2768 
2769 #ifdef CONFIG_SYSCTL
2770 static int zero;
2771 static int int_max = INT_MAX;
2772 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2773 
2774 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2775 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2776 {
2777 	int size, ret;
2778 	struct ctl_table tmp = *ctl;
2779 
2780 	tmp.extra1 = &zero;
2781 	tmp.extra2 = &unres_qlen_max;
2782 	tmp.data = &size;
2783 
2784 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2785 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2786 
2787 	if (write && !ret)
2788 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2789 	return ret;
2790 }
2791 
2792 enum {
2793 	NEIGH_VAR_MCAST_PROBE,
2794 	NEIGH_VAR_UCAST_PROBE,
2795 	NEIGH_VAR_APP_PROBE,
2796 	NEIGH_VAR_RETRANS_TIME,
2797 	NEIGH_VAR_BASE_REACHABLE_TIME,
2798 	NEIGH_VAR_DELAY_PROBE_TIME,
2799 	NEIGH_VAR_GC_STALETIME,
2800 	NEIGH_VAR_QUEUE_LEN,
2801 	NEIGH_VAR_QUEUE_LEN_BYTES,
2802 	NEIGH_VAR_PROXY_QLEN,
2803 	NEIGH_VAR_ANYCAST_DELAY,
2804 	NEIGH_VAR_PROXY_DELAY,
2805 	NEIGH_VAR_LOCKTIME,
2806 	NEIGH_VAR_RETRANS_TIME_MS,
2807 	NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2808 	NEIGH_VAR_GC_INTERVAL,
2809 	NEIGH_VAR_GC_THRESH1,
2810 	NEIGH_VAR_GC_THRESH2,
2811 	NEIGH_VAR_GC_THRESH3,
2812 	NEIGH_VAR_MAX
2813 };
2814 
2815 static struct neigh_sysctl_table {
2816 	struct ctl_table_header *sysctl_header;
2817 	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2818 } neigh_sysctl_template __read_mostly = {
2819 	.neigh_vars = {
2820 		[NEIGH_VAR_MCAST_PROBE] = {
2821 			.procname	= "mcast_solicit",
2822 			.maxlen		= sizeof(int),
2823 			.mode		= 0644,
2824 			.extra1 	= &zero,
2825 			.extra2		= &int_max,
2826 			.proc_handler	= proc_dointvec_minmax,
2827 		},
2828 		[NEIGH_VAR_UCAST_PROBE] = {
2829 			.procname	= "ucast_solicit",
2830 			.maxlen		= sizeof(int),
2831 			.mode		= 0644,
2832 			.extra1 	= &zero,
2833 			.extra2		= &int_max,
2834 			.proc_handler	= proc_dointvec_minmax,
2835 		},
2836 		[NEIGH_VAR_APP_PROBE] = {
2837 			.procname	= "app_solicit",
2838 			.maxlen		= sizeof(int),
2839 			.mode		= 0644,
2840 			.extra1 	= &zero,
2841 			.extra2		= &int_max,
2842 			.proc_handler	= proc_dointvec_minmax,
2843 		},
2844 		[NEIGH_VAR_RETRANS_TIME] = {
2845 			.procname	= "retrans_time",
2846 			.maxlen		= sizeof(int),
2847 			.mode		= 0644,
2848 			.proc_handler	= proc_dointvec_userhz_jiffies,
2849 		},
2850 		[NEIGH_VAR_BASE_REACHABLE_TIME] = {
2851 			.procname	= "base_reachable_time",
2852 			.maxlen		= sizeof(int),
2853 			.mode		= 0644,
2854 			.proc_handler	= proc_dointvec_jiffies,
2855 		},
2856 		[NEIGH_VAR_DELAY_PROBE_TIME] = {
2857 			.procname	= "delay_first_probe_time",
2858 			.maxlen		= sizeof(int),
2859 			.mode		= 0644,
2860 			.proc_handler	= proc_dointvec_jiffies,
2861 		},
2862 		[NEIGH_VAR_GC_STALETIME] = {
2863 			.procname	= "gc_stale_time",
2864 			.maxlen		= sizeof(int),
2865 			.mode		= 0644,
2866 			.proc_handler	= proc_dointvec_jiffies,
2867 		},
2868 		[NEIGH_VAR_QUEUE_LEN] = {
2869 			.procname	= "unres_qlen",
2870 			.maxlen		= sizeof(int),
2871 			.mode		= 0644,
2872 			.proc_handler	= proc_unres_qlen,
2873 		},
2874 		[NEIGH_VAR_QUEUE_LEN_BYTES] = {
2875 			.procname	= "unres_qlen_bytes",
2876 			.maxlen		= sizeof(int),
2877 			.mode		= 0644,
2878 			.extra1		= &zero,
2879 			.proc_handler   = proc_dointvec_minmax,
2880 		},
2881 		[NEIGH_VAR_PROXY_QLEN] = {
2882 			.procname	= "proxy_qlen",
2883 			.maxlen		= sizeof(int),
2884 			.mode		= 0644,
2885 			.extra1 	= &zero,
2886 			.extra2		= &int_max,
2887 			.proc_handler	= proc_dointvec_minmax,
2888 		},
2889 		[NEIGH_VAR_ANYCAST_DELAY] = {
2890 			.procname	= "anycast_delay",
2891 			.maxlen		= sizeof(int),
2892 			.mode		= 0644,
2893 			.proc_handler	= proc_dointvec_userhz_jiffies,
2894 		},
2895 		[NEIGH_VAR_PROXY_DELAY] = {
2896 			.procname	= "proxy_delay",
2897 			.maxlen		= sizeof(int),
2898 			.mode		= 0644,
2899 			.proc_handler	= proc_dointvec_userhz_jiffies,
2900 		},
2901 		[NEIGH_VAR_LOCKTIME] = {
2902 			.procname	= "locktime",
2903 			.maxlen		= sizeof(int),
2904 			.mode		= 0644,
2905 			.proc_handler	= proc_dointvec_userhz_jiffies,
2906 		},
2907 		[NEIGH_VAR_RETRANS_TIME_MS] = {
2908 			.procname	= "retrans_time_ms",
2909 			.maxlen		= sizeof(int),
2910 			.mode		= 0644,
2911 			.proc_handler	= proc_dointvec_ms_jiffies,
2912 		},
2913 		[NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2914 			.procname	= "base_reachable_time_ms",
2915 			.maxlen		= sizeof(int),
2916 			.mode		= 0644,
2917 			.proc_handler	= proc_dointvec_ms_jiffies,
2918 		},
2919 		[NEIGH_VAR_GC_INTERVAL] = {
2920 			.procname	= "gc_interval",
2921 			.maxlen		= sizeof(int),
2922 			.mode		= 0644,
2923 			.proc_handler	= proc_dointvec_jiffies,
2924 		},
2925 		[NEIGH_VAR_GC_THRESH1] = {
2926 			.procname	= "gc_thresh1",
2927 			.maxlen		= sizeof(int),
2928 			.mode		= 0644,
2929 			.extra1 	= &zero,
2930 			.extra2		= &int_max,
2931 			.proc_handler	= proc_dointvec_minmax,
2932 		},
2933 		[NEIGH_VAR_GC_THRESH2] = {
2934 			.procname	= "gc_thresh2",
2935 			.maxlen		= sizeof(int),
2936 			.mode		= 0644,
2937 			.extra1 	= &zero,
2938 			.extra2		= &int_max,
2939 			.proc_handler	= proc_dointvec_minmax,
2940 		},
2941 		[NEIGH_VAR_GC_THRESH3] = {
2942 			.procname	= "gc_thresh3",
2943 			.maxlen		= sizeof(int),
2944 			.mode		= 0644,
2945 			.extra1 	= &zero,
2946 			.extra2		= &int_max,
2947 			.proc_handler	= proc_dointvec_minmax,
2948 		},
2949 		{},
2950 	},
2951 };
2952 
2953 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2954 			  char *p_name, proc_handler *handler)
2955 {
2956 	struct neigh_sysctl_table *t;
2957 	const char *dev_name_source = NULL;
2958 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2959 
2960 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2961 	if (!t)
2962 		goto err;
2963 
2964 	t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2965 	t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2966 	t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2967 	t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2968 	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2969 	t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2970 	t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2971 	t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2972 	t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2973 	t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2974 	t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2975 	t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2976 	t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2977 	t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2978 	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2979 
2980 	if (dev) {
2981 		dev_name_source = dev->name;
2982 		/* Terminate the table early */
2983 		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2984 		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2985 	} else {
2986 		dev_name_source = "default";
2987 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2988 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2989 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2990 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2991 	}
2992 
2993 
2994 	if (handler) {
2995 		/* RetransTime */
2996 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2997 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2998 		/* ReachableTime */
2999 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3000 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
3001 		/* RetransTime (in milliseconds)*/
3002 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3003 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
3004 		/* ReachableTime (in milliseconds) */
3005 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3006 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
3007 	}
3008 
3009 	/* Don't export sysctls to unprivileged users */
3010 	if (neigh_parms_net(p)->user_ns != &init_user_ns)
3011 		t->neigh_vars[0].procname = NULL;
3012 
3013 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3014 		p_name, dev_name_source);
3015 	t->sysctl_header =
3016 		register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3017 	if (!t->sysctl_header)
3018 		goto free;
3019 
3020 	p->sysctl_table = t;
3021 	return 0;
3022 
3023 free:
3024 	kfree(t);
3025 err:
3026 	return -ENOBUFS;
3027 }
3028 EXPORT_SYMBOL(neigh_sysctl_register);
3029 
3030 void neigh_sysctl_unregister(struct neigh_parms *p)
3031 {
3032 	if (p->sysctl_table) {
3033 		struct neigh_sysctl_table *t = p->sysctl_table;
3034 		p->sysctl_table = NULL;
3035 		unregister_net_sysctl_table(t->sysctl_header);
3036 		kfree(t);
3037 	}
3038 }
3039 EXPORT_SYMBOL(neigh_sysctl_unregister);
3040 
3041 #endif	/* CONFIG_SYSCTL */
3042 
3043 static int __init neigh_init(void)
3044 {
3045 	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3046 	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3047 	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3048 
3049 	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3050 		      NULL);
3051 	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3052 
3053 	return 0;
3054 }
3055 
3056 subsys_initcall(neigh_init);
3057 
3058