xref: /linux/net/core/neighbour.c (revision 5fc31936081919a8572a3d644f3fbb258038f337)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	Generic address resolution entity
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
8  *
9  *	Fixes:
10  *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
11  *	Harald Welte		Add neighbour cache statistics like rtstat
12  */
13 
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 
16 #include <linux/slab.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/socket.h>
21 #include <linux/netdevice.h>
22 #include <linux/proc_fs.h>
23 #ifdef CONFIG_SYSCTL
24 #include <linux/sysctl.h>
25 #endif
26 #include <linux/times.h>
27 #include <net/net_namespace.h>
28 #include <net/neighbour.h>
29 #include <net/arp.h>
30 #include <net/dst.h>
31 #include <net/sock.h>
32 #include <net/netevent.h>
33 #include <net/netlink.h>
34 #include <linux/rtnetlink.h>
35 #include <linux/random.h>
36 #include <linux/string.h>
37 #include <linux/log2.h>
38 #include <linux/inetdevice.h>
39 #include <net/addrconf.h>
40 
41 #include <trace/events/neigh.h>
42 
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...)		\
45 do {						\
46 	if (level <= NEIGH_DEBUG)		\
47 		pr_debug(fmt, ##__VA_ARGS__);	\
48 } while (0)
49 
50 #define PNEIGH_HASHMASK		0xF
51 
52 static void neigh_timer_handler(struct timer_list *t);
53 static void __neigh_notify(struct neighbour *n, int type, int flags,
54 			   u32 pid);
55 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
56 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
57 				    struct net_device *dev);
58 
59 #ifdef CONFIG_PROC_FS
60 static const struct seq_operations neigh_stat_seq_ops;
61 #endif
62 
neigh_get_dev_table(struct net_device * dev,int family)63 static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
64 {
65 	int i;
66 
67 	switch (family) {
68 	default:
69 		DEBUG_NET_WARN_ON_ONCE(1);
70 		fallthrough; /* to avoid panic by null-ptr-deref */
71 	case AF_INET:
72 		i = NEIGH_ARP_TABLE;
73 		break;
74 	case AF_INET6:
75 		i = NEIGH_ND_TABLE;
76 		break;
77 	}
78 
79 	return &dev->neighbours[i];
80 }
81 
82 /*
83    Neighbour hash table buckets are protected with rwlock tbl->lock.
84 
85    - All the scans/updates to hash buckets MUST be made under this lock.
86    - NOTHING clever should be made under this lock: no callbacks
87      to protocol backends, no attempts to send something to network.
88      It will result in deadlocks, if backend/driver wants to use neighbour
89      cache.
90    - If the entry requires some non-trivial actions, increase
91      its reference count and release table lock.
92 
93    Neighbour entries are protected:
94    - with reference count.
95    - with rwlock neigh->lock
96 
97    Reference count prevents destruction.
98 
99    neigh->lock mainly serializes ll address data and its validity state.
100    However, the same lock is used to protect another entry fields:
101     - timer
102     - resolution queue
103 
104    Again, nothing clever shall be made under neigh->lock,
105    the most complicated procedure, which we allow is dev->hard_header.
106    It is supposed, that dev->hard_header is simplistic and does
107    not make callbacks to neighbour tables.
108  */
109 
neigh_blackhole(struct neighbour * neigh,struct sk_buff * skb)110 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
111 {
112 	kfree_skb(skb);
113 	return -ENETDOWN;
114 }
115 
neigh_cleanup_and_release(struct neighbour * neigh)116 static void neigh_cleanup_and_release(struct neighbour *neigh)
117 {
118 	trace_neigh_cleanup_and_release(neigh, 0);
119 	__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
120 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
121 	neigh_release(neigh);
122 }
123 
124 /*
125  * It is random distribution in the interval (1/2)*base...(3/2)*base.
126  * It corresponds to default IPv6 settings and is not overridable,
127  * because it is really reasonable choice.
128  */
129 
neigh_rand_reach_time(unsigned long base)130 unsigned long neigh_rand_reach_time(unsigned long base)
131 {
132 	return base ? get_random_u32_below(base) + (base >> 1) : 0;
133 }
134 EXPORT_SYMBOL(neigh_rand_reach_time);
135 
neigh_mark_dead(struct neighbour * n)136 static void neigh_mark_dead(struct neighbour *n)
137 {
138 	n->dead = 1;
139 	if (!list_empty(&n->gc_list)) {
140 		list_del_init(&n->gc_list);
141 		atomic_dec(&n->tbl->gc_entries);
142 	}
143 	if (!list_empty(&n->managed_list))
144 		list_del_init(&n->managed_list);
145 }
146 
neigh_update_gc_list(struct neighbour * n)147 static void neigh_update_gc_list(struct neighbour *n)
148 {
149 	bool on_gc_list, exempt_from_gc;
150 
151 	write_lock_bh(&n->tbl->lock);
152 	write_lock(&n->lock);
153 	if (n->dead)
154 		goto out;
155 
156 	/* remove from the gc list if new state is permanent or if neighbor
157 	 * is externally learned; otherwise entry should be on the gc list
158 	 */
159 	exempt_from_gc = n->nud_state & NUD_PERMANENT ||
160 			 n->flags & NTF_EXT_LEARNED;
161 	on_gc_list = !list_empty(&n->gc_list);
162 
163 	if (exempt_from_gc && on_gc_list) {
164 		list_del_init(&n->gc_list);
165 		atomic_dec(&n->tbl->gc_entries);
166 	} else if (!exempt_from_gc && !on_gc_list) {
167 		/* add entries to the tail; cleaning removes from the front */
168 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
169 		atomic_inc(&n->tbl->gc_entries);
170 	}
171 out:
172 	write_unlock(&n->lock);
173 	write_unlock_bh(&n->tbl->lock);
174 }
175 
neigh_update_managed_list(struct neighbour * n)176 static void neigh_update_managed_list(struct neighbour *n)
177 {
178 	bool on_managed_list, add_to_managed;
179 
180 	write_lock_bh(&n->tbl->lock);
181 	write_lock(&n->lock);
182 	if (n->dead)
183 		goto out;
184 
185 	add_to_managed = n->flags & NTF_MANAGED;
186 	on_managed_list = !list_empty(&n->managed_list);
187 
188 	if (!add_to_managed && on_managed_list)
189 		list_del_init(&n->managed_list);
190 	else if (add_to_managed && !on_managed_list)
191 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
192 out:
193 	write_unlock(&n->lock);
194 	write_unlock_bh(&n->tbl->lock);
195 }
196 
neigh_update_flags(struct neighbour * neigh,u32 flags,int * notify,bool * gc_update,bool * managed_update)197 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
198 			       bool *gc_update, bool *managed_update)
199 {
200 	u32 ndm_flags, old_flags = neigh->flags;
201 
202 	if (!(flags & NEIGH_UPDATE_F_ADMIN))
203 		return;
204 
205 	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
206 	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
207 
208 	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
209 		if (ndm_flags & NTF_EXT_LEARNED)
210 			neigh->flags |= NTF_EXT_LEARNED;
211 		else
212 			neigh->flags &= ~NTF_EXT_LEARNED;
213 		*notify = 1;
214 		*gc_update = true;
215 	}
216 	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
217 		if (ndm_flags & NTF_MANAGED)
218 			neigh->flags |= NTF_MANAGED;
219 		else
220 			neigh->flags &= ~NTF_MANAGED;
221 		*notify = 1;
222 		*managed_update = true;
223 	}
224 }
225 
neigh_remove_one(struct neighbour * n)226 bool neigh_remove_one(struct neighbour *n)
227 {
228 	bool retval = false;
229 
230 	write_lock(&n->lock);
231 	if (refcount_read(&n->refcnt) == 1) {
232 		hlist_del_rcu(&n->hash);
233 		hlist_del_rcu(&n->dev_list);
234 		neigh_mark_dead(n);
235 		retval = true;
236 	}
237 	write_unlock(&n->lock);
238 	if (retval)
239 		neigh_cleanup_and_release(n);
240 	return retval;
241 }
242 
neigh_forced_gc(struct neigh_table * tbl)243 static int neigh_forced_gc(struct neigh_table *tbl)
244 {
245 	int max_clean = atomic_read(&tbl->gc_entries) -
246 			READ_ONCE(tbl->gc_thresh2);
247 	u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
248 	unsigned long tref = jiffies - 5 * HZ;
249 	struct neighbour *n, *tmp;
250 	int shrunk = 0;
251 	int loop = 0;
252 
253 	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
254 
255 	write_lock_bh(&tbl->lock);
256 
257 	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
258 		if (refcount_read(&n->refcnt) == 1) {
259 			bool remove = false;
260 
261 			write_lock(&n->lock);
262 			if ((n->nud_state == NUD_FAILED) ||
263 			    (n->nud_state == NUD_NOARP) ||
264 			    (tbl->is_multicast &&
265 			     tbl->is_multicast(n->primary_key)) ||
266 			    !time_in_range(n->updated, tref, jiffies))
267 				remove = true;
268 			write_unlock(&n->lock);
269 
270 			if (remove && neigh_remove_one(n))
271 				shrunk++;
272 			if (shrunk >= max_clean)
273 				break;
274 			if (++loop == 16) {
275 				if (ktime_get_ns() > tmax)
276 					goto unlock;
277 				loop = 0;
278 			}
279 		}
280 	}
281 
282 	WRITE_ONCE(tbl->last_flush, jiffies);
283 unlock:
284 	write_unlock_bh(&tbl->lock);
285 
286 	return shrunk;
287 }
288 
neigh_add_timer(struct neighbour * n,unsigned long when)289 static void neigh_add_timer(struct neighbour *n, unsigned long when)
290 {
291 	/* Use safe distance from the jiffies - LONG_MAX point while timer
292 	 * is running in DELAY/PROBE state but still show to user space
293 	 * large times in the past.
294 	 */
295 	unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
296 
297 	neigh_hold(n);
298 	if (!time_in_range(n->confirmed, mint, jiffies))
299 		n->confirmed = mint;
300 	if (time_before(n->used, n->confirmed))
301 		n->used = n->confirmed;
302 	if (unlikely(mod_timer(&n->timer, when))) {
303 		printk("NEIGH: BUG, double timer add, state is %x\n",
304 		       n->nud_state);
305 		dump_stack();
306 	}
307 }
308 
neigh_del_timer(struct neighbour * n)309 static int neigh_del_timer(struct neighbour *n)
310 {
311 	if ((n->nud_state & NUD_IN_TIMER) &&
312 	    del_timer(&n->timer)) {
313 		neigh_release(n);
314 		return 1;
315 	}
316 	return 0;
317 }
318 
neigh_get_dev_parms_rcu(struct net_device * dev,int family)319 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
320 						   int family)
321 {
322 	switch (family) {
323 	case AF_INET:
324 		return __in_dev_arp_parms_get_rcu(dev);
325 	case AF_INET6:
326 		return __in6_dev_nd_parms_get_rcu(dev);
327 	}
328 	return NULL;
329 }
330 
neigh_parms_qlen_dec(struct net_device * dev,int family)331 static void neigh_parms_qlen_dec(struct net_device *dev, int family)
332 {
333 	struct neigh_parms *p;
334 
335 	rcu_read_lock();
336 	p = neigh_get_dev_parms_rcu(dev, family);
337 	if (p)
338 		p->qlen--;
339 	rcu_read_unlock();
340 }
341 
pneigh_queue_purge(struct sk_buff_head * list,struct net * net,int family)342 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
343 			       int family)
344 {
345 	struct sk_buff_head tmp;
346 	unsigned long flags;
347 	struct sk_buff *skb;
348 
349 	skb_queue_head_init(&tmp);
350 	spin_lock_irqsave(&list->lock, flags);
351 	skb = skb_peek(list);
352 	while (skb != NULL) {
353 		struct sk_buff *skb_next = skb_peek_next(skb, list);
354 		struct net_device *dev = skb->dev;
355 
356 		if (net == NULL || net_eq(dev_net(dev), net)) {
357 			neigh_parms_qlen_dec(dev, family);
358 			__skb_unlink(skb, list);
359 			__skb_queue_tail(&tmp, skb);
360 		}
361 		skb = skb_next;
362 	}
363 	spin_unlock_irqrestore(&list->lock, flags);
364 
365 	while ((skb = __skb_dequeue(&tmp))) {
366 		dev_put(skb->dev);
367 		kfree_skb(skb);
368 	}
369 }
370 
neigh_flush_dev(struct neigh_table * tbl,struct net_device * dev,bool skip_perm)371 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
372 			    bool skip_perm)
373 {
374 	struct hlist_head *dev_head;
375 	struct hlist_node *tmp;
376 	struct neighbour *n;
377 
378 	dev_head = neigh_get_dev_table(dev, tbl->family);
379 
380 	hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
381 		if (skip_perm && n->nud_state & NUD_PERMANENT)
382 			continue;
383 
384 		hlist_del_rcu(&n->hash);
385 		hlist_del_rcu(&n->dev_list);
386 		write_lock(&n->lock);
387 		neigh_del_timer(n);
388 		neigh_mark_dead(n);
389 		if (refcount_read(&n->refcnt) != 1) {
390 			/* The most unpleasant situation.
391 			 * We must destroy neighbour entry,
392 			 * but someone still uses it.
393 			 *
394 			 * The destroy will be delayed until
395 			 * the last user releases us, but
396 			 * we must kill timers etc. and move
397 			 * it to safe state.
398 			 */
399 			__skb_queue_purge(&n->arp_queue);
400 			n->arp_queue_len_bytes = 0;
401 			WRITE_ONCE(n->output, neigh_blackhole);
402 			if (n->nud_state & NUD_VALID)
403 				n->nud_state = NUD_NOARP;
404 			else
405 				n->nud_state = NUD_NONE;
406 			neigh_dbg(2, "neigh %p is stray\n", n);
407 		}
408 		write_unlock(&n->lock);
409 		neigh_cleanup_and_release(n);
410 	}
411 }
412 
neigh_changeaddr(struct neigh_table * tbl,struct net_device * dev)413 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
414 {
415 	write_lock_bh(&tbl->lock);
416 	neigh_flush_dev(tbl, dev, false);
417 	write_unlock_bh(&tbl->lock);
418 }
419 EXPORT_SYMBOL(neigh_changeaddr);
420 
__neigh_ifdown(struct neigh_table * tbl,struct net_device * dev,bool skip_perm)421 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
422 			  bool skip_perm)
423 {
424 	write_lock_bh(&tbl->lock);
425 	neigh_flush_dev(tbl, dev, skip_perm);
426 	pneigh_ifdown_and_unlock(tbl, dev);
427 	pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
428 			   tbl->family);
429 	if (skb_queue_empty_lockless(&tbl->proxy_queue))
430 		del_timer_sync(&tbl->proxy_timer);
431 	return 0;
432 }
433 
neigh_carrier_down(struct neigh_table * tbl,struct net_device * dev)434 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
435 {
436 	__neigh_ifdown(tbl, dev, true);
437 	return 0;
438 }
439 EXPORT_SYMBOL(neigh_carrier_down);
440 
neigh_ifdown(struct neigh_table * tbl,struct net_device * dev)441 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
442 {
443 	__neigh_ifdown(tbl, dev, false);
444 	return 0;
445 }
446 EXPORT_SYMBOL(neigh_ifdown);
447 
neigh_alloc(struct neigh_table * tbl,struct net_device * dev,u32 flags,bool exempt_from_gc)448 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
449 				     struct net_device *dev,
450 				     u32 flags, bool exempt_from_gc)
451 {
452 	struct neighbour *n = NULL;
453 	unsigned long now = jiffies;
454 	int entries, gc_thresh3;
455 
456 	if (exempt_from_gc)
457 		goto do_alloc;
458 
459 	entries = atomic_inc_return(&tbl->gc_entries) - 1;
460 	gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
461 	if (entries >= gc_thresh3 ||
462 	    (entries >= READ_ONCE(tbl->gc_thresh2) &&
463 	     time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
464 		if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
465 			net_info_ratelimited("%s: neighbor table overflow!\n",
466 					     tbl->id);
467 			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
468 			goto out_entries;
469 		}
470 	}
471 
472 do_alloc:
473 	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
474 	if (!n)
475 		goto out_entries;
476 
477 	__skb_queue_head_init(&n->arp_queue);
478 	rwlock_init(&n->lock);
479 	seqlock_init(&n->ha_lock);
480 	n->updated	  = n->used = now;
481 	n->nud_state	  = NUD_NONE;
482 	n->output	  = neigh_blackhole;
483 	n->flags	  = flags;
484 	seqlock_init(&n->hh.hh_lock);
485 	n->parms	  = neigh_parms_clone(&tbl->parms);
486 	timer_setup(&n->timer, neigh_timer_handler, 0);
487 
488 	NEIGH_CACHE_STAT_INC(tbl, allocs);
489 	n->tbl		  = tbl;
490 	refcount_set(&n->refcnt, 1);
491 	n->dead		  = 1;
492 	INIT_LIST_HEAD(&n->gc_list);
493 	INIT_LIST_HEAD(&n->managed_list);
494 
495 	atomic_inc(&tbl->entries);
496 out:
497 	return n;
498 
499 out_entries:
500 	if (!exempt_from_gc)
501 		atomic_dec(&tbl->gc_entries);
502 	goto out;
503 }
504 
neigh_get_hash_rnd(u32 * x)505 static void neigh_get_hash_rnd(u32 *x)
506 {
507 	*x = get_random_u32() | 1;
508 }
509 
neigh_hash_alloc(unsigned int shift)510 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
511 {
512 	size_t size = (1 << shift) * sizeof(struct hlist_head);
513 	struct hlist_head *hash_heads;
514 	struct neigh_hash_table *ret;
515 	int i;
516 
517 	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
518 	if (!ret)
519 		return NULL;
520 
521 	hash_heads = kvzalloc(size, GFP_ATOMIC);
522 	if (!hash_heads) {
523 		kfree(ret);
524 		return NULL;
525 	}
526 	ret->hash_heads = hash_heads;
527 	ret->hash_shift = shift;
528 	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
529 		neigh_get_hash_rnd(&ret->hash_rnd[i]);
530 	return ret;
531 }
532 
neigh_hash_free_rcu(struct rcu_head * head)533 static void neigh_hash_free_rcu(struct rcu_head *head)
534 {
535 	struct neigh_hash_table *nht = container_of(head,
536 						    struct neigh_hash_table,
537 						    rcu);
538 
539 	kvfree(nht->hash_heads);
540 	kfree(nht);
541 }
542 
neigh_hash_grow(struct neigh_table * tbl,unsigned long new_shift)543 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
544 						unsigned long new_shift)
545 {
546 	unsigned int i, hash;
547 	struct neigh_hash_table *new_nht, *old_nht;
548 
549 	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
550 
551 	old_nht = rcu_dereference_protected(tbl->nht,
552 					    lockdep_is_held(&tbl->lock));
553 	new_nht = neigh_hash_alloc(new_shift);
554 	if (!new_nht)
555 		return old_nht;
556 
557 	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
558 		struct hlist_node *tmp;
559 		struct neighbour *n;
560 
561 		neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
562 			hash = tbl->hash(n->primary_key, n->dev,
563 					 new_nht->hash_rnd);
564 
565 			hash >>= (32 - new_nht->hash_shift);
566 
567 			hlist_del_rcu(&n->hash);
568 			hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
569 		}
570 	}
571 
572 	rcu_assign_pointer(tbl->nht, new_nht);
573 	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
574 	return new_nht;
575 }
576 
neigh_lookup(struct neigh_table * tbl,const void * pkey,struct net_device * dev)577 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
578 			       struct net_device *dev)
579 {
580 	struct neighbour *n;
581 
582 	NEIGH_CACHE_STAT_INC(tbl, lookups);
583 
584 	rcu_read_lock();
585 	n = __neigh_lookup_noref(tbl, pkey, dev);
586 	if (n) {
587 		if (!refcount_inc_not_zero(&n->refcnt))
588 			n = NULL;
589 		NEIGH_CACHE_STAT_INC(tbl, hits);
590 	}
591 
592 	rcu_read_unlock();
593 	return n;
594 }
595 EXPORT_SYMBOL(neigh_lookup);
596 
597 static struct neighbour *
___neigh_create(struct neigh_table * tbl,const void * pkey,struct net_device * dev,u32 flags,bool exempt_from_gc,bool want_ref)598 ___neigh_create(struct neigh_table *tbl, const void *pkey,
599 		struct net_device *dev, u32 flags,
600 		bool exempt_from_gc, bool want_ref)
601 {
602 	u32 hash_val, key_len = tbl->key_len;
603 	struct neighbour *n1, *rc, *n;
604 	struct neigh_hash_table *nht;
605 	int error;
606 
607 	n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
608 	trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
609 	if (!n) {
610 		rc = ERR_PTR(-ENOBUFS);
611 		goto out;
612 	}
613 
614 	memcpy(n->primary_key, pkey, key_len);
615 	n->dev = dev;
616 	netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
617 
618 	/* Protocol specific setup. */
619 	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
620 		rc = ERR_PTR(error);
621 		goto out_neigh_release;
622 	}
623 
624 	if (dev->netdev_ops->ndo_neigh_construct) {
625 		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
626 		if (error < 0) {
627 			rc = ERR_PTR(error);
628 			goto out_neigh_release;
629 		}
630 	}
631 
632 	/* Device specific setup. */
633 	if (n->parms->neigh_setup &&
634 	    (error = n->parms->neigh_setup(n)) < 0) {
635 		rc = ERR_PTR(error);
636 		goto out_neigh_release;
637 	}
638 
639 	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
640 
641 	write_lock_bh(&tbl->lock);
642 	nht = rcu_dereference_protected(tbl->nht,
643 					lockdep_is_held(&tbl->lock));
644 
645 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
646 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
647 
648 	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
649 
650 	if (n->parms->dead) {
651 		rc = ERR_PTR(-EINVAL);
652 		goto out_tbl_unlock;
653 	}
654 
655 	neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
656 		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
657 			if (want_ref)
658 				neigh_hold(n1);
659 			rc = n1;
660 			goto out_tbl_unlock;
661 		}
662 	}
663 
664 	n->dead = 0;
665 	if (!exempt_from_gc)
666 		list_add_tail(&n->gc_list, &n->tbl->gc_list);
667 	if (n->flags & NTF_MANAGED)
668 		list_add_tail(&n->managed_list, &n->tbl->managed_list);
669 	if (want_ref)
670 		neigh_hold(n);
671 	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
672 
673 	hlist_add_head_rcu(&n->dev_list,
674 			   neigh_get_dev_table(dev, tbl->family));
675 
676 	write_unlock_bh(&tbl->lock);
677 	neigh_dbg(2, "neigh %p is created\n", n);
678 	rc = n;
679 out:
680 	return rc;
681 out_tbl_unlock:
682 	write_unlock_bh(&tbl->lock);
683 out_neigh_release:
684 	if (!exempt_from_gc)
685 		atomic_dec(&tbl->gc_entries);
686 	neigh_release(n);
687 	goto out;
688 }
689 
__neigh_create(struct neigh_table * tbl,const void * pkey,struct net_device * dev,bool want_ref)690 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
691 				 struct net_device *dev, bool want_ref)
692 {
693 	bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
694 
695 	return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
696 }
697 EXPORT_SYMBOL(__neigh_create);
698 
pneigh_hash(const void * pkey,unsigned int key_len)699 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
700 {
701 	u32 hash_val = *(u32 *)(pkey + key_len - 4);
702 	hash_val ^= (hash_val >> 16);
703 	hash_val ^= hash_val >> 8;
704 	hash_val ^= hash_val >> 4;
705 	hash_val &= PNEIGH_HASHMASK;
706 	return hash_val;
707 }
708 
__pneigh_lookup_1(struct pneigh_entry * n,struct net * net,const void * pkey,unsigned int key_len,struct net_device * dev)709 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
710 					      struct net *net,
711 					      const void *pkey,
712 					      unsigned int key_len,
713 					      struct net_device *dev)
714 {
715 	while (n) {
716 		if (!memcmp(n->key, pkey, key_len) &&
717 		    net_eq(pneigh_net(n), net) &&
718 		    (n->dev == dev || !n->dev))
719 			return n;
720 		n = n->next;
721 	}
722 	return NULL;
723 }
724 
__pneigh_lookup(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev)725 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
726 		struct net *net, const void *pkey, struct net_device *dev)
727 {
728 	unsigned int key_len = tbl->key_len;
729 	u32 hash_val = pneigh_hash(pkey, key_len);
730 
731 	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
732 				 net, pkey, key_len, dev);
733 }
734 EXPORT_SYMBOL_GPL(__pneigh_lookup);
735 
pneigh_lookup(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev,int creat)736 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
737 				    struct net *net, const void *pkey,
738 				    struct net_device *dev, int creat)
739 {
740 	struct pneigh_entry *n;
741 	unsigned int key_len = tbl->key_len;
742 	u32 hash_val = pneigh_hash(pkey, key_len);
743 
744 	read_lock_bh(&tbl->lock);
745 	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
746 			      net, pkey, key_len, dev);
747 	read_unlock_bh(&tbl->lock);
748 
749 	if (n || !creat)
750 		goto out;
751 
752 	ASSERT_RTNL();
753 
754 	n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
755 	if (!n)
756 		goto out;
757 
758 	write_pnet(&n->net, net);
759 	memcpy(n->key, pkey, key_len);
760 	n->dev = dev;
761 	netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
762 
763 	if (tbl->pconstructor && tbl->pconstructor(n)) {
764 		netdev_put(dev, &n->dev_tracker);
765 		kfree(n);
766 		n = NULL;
767 		goto out;
768 	}
769 
770 	write_lock_bh(&tbl->lock);
771 	n->next = tbl->phash_buckets[hash_val];
772 	tbl->phash_buckets[hash_val] = n;
773 	write_unlock_bh(&tbl->lock);
774 out:
775 	return n;
776 }
777 EXPORT_SYMBOL(pneigh_lookup);
778 
779 
pneigh_delete(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev)780 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
781 		  struct net_device *dev)
782 {
783 	struct pneigh_entry *n, **np;
784 	unsigned int key_len = tbl->key_len;
785 	u32 hash_val = pneigh_hash(pkey, key_len);
786 
787 	write_lock_bh(&tbl->lock);
788 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
789 	     np = &n->next) {
790 		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
791 		    net_eq(pneigh_net(n), net)) {
792 			*np = n->next;
793 			write_unlock_bh(&tbl->lock);
794 			if (tbl->pdestructor)
795 				tbl->pdestructor(n);
796 			netdev_put(n->dev, &n->dev_tracker);
797 			kfree(n);
798 			return 0;
799 		}
800 	}
801 	write_unlock_bh(&tbl->lock);
802 	return -ENOENT;
803 }
804 
pneigh_ifdown_and_unlock(struct neigh_table * tbl,struct net_device * dev)805 static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
806 				    struct net_device *dev)
807 {
808 	struct pneigh_entry *n, **np, *freelist = NULL;
809 	u32 h;
810 
811 	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
812 		np = &tbl->phash_buckets[h];
813 		while ((n = *np) != NULL) {
814 			if (!dev || n->dev == dev) {
815 				*np = n->next;
816 				n->next = freelist;
817 				freelist = n;
818 				continue;
819 			}
820 			np = &n->next;
821 		}
822 	}
823 	write_unlock_bh(&tbl->lock);
824 	while ((n = freelist)) {
825 		freelist = n->next;
826 		n->next = NULL;
827 		if (tbl->pdestructor)
828 			tbl->pdestructor(n);
829 		netdev_put(n->dev, &n->dev_tracker);
830 		kfree(n);
831 	}
832 	return -ENOENT;
833 }
834 
835 static void neigh_parms_destroy(struct neigh_parms *parms);
836 
neigh_parms_put(struct neigh_parms * parms)837 static inline void neigh_parms_put(struct neigh_parms *parms)
838 {
839 	if (refcount_dec_and_test(&parms->refcnt))
840 		neigh_parms_destroy(parms);
841 }
842 
843 /*
844  *	neighbour must already be out of the table;
845  *
846  */
neigh_destroy(struct neighbour * neigh)847 void neigh_destroy(struct neighbour *neigh)
848 {
849 	struct net_device *dev = neigh->dev;
850 
851 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
852 
853 	if (!neigh->dead) {
854 		pr_warn("Destroying alive neighbour %p\n", neigh);
855 		dump_stack();
856 		return;
857 	}
858 
859 	if (neigh_del_timer(neigh))
860 		pr_warn("Impossible event\n");
861 
862 	write_lock_bh(&neigh->lock);
863 	__skb_queue_purge(&neigh->arp_queue);
864 	write_unlock_bh(&neigh->lock);
865 	neigh->arp_queue_len_bytes = 0;
866 
867 	if (dev->netdev_ops->ndo_neigh_destroy)
868 		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
869 
870 	netdev_put(dev, &neigh->dev_tracker);
871 	neigh_parms_put(neigh->parms);
872 
873 	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
874 
875 	atomic_dec(&neigh->tbl->entries);
876 	kfree_rcu(neigh, rcu);
877 }
878 EXPORT_SYMBOL(neigh_destroy);
879 
880 /* Neighbour state is suspicious;
881    disable fast path.
882 
883    Called with write_locked neigh.
884  */
neigh_suspect(struct neighbour * neigh)885 static void neigh_suspect(struct neighbour *neigh)
886 {
887 	neigh_dbg(2, "neigh %p is suspected\n", neigh);
888 
889 	WRITE_ONCE(neigh->output, neigh->ops->output);
890 }
891 
892 /* Neighbour state is OK;
893    enable fast path.
894 
895    Called with write_locked neigh.
896  */
neigh_connect(struct neighbour * neigh)897 static void neigh_connect(struct neighbour *neigh)
898 {
899 	neigh_dbg(2, "neigh %p is connected\n", neigh);
900 
901 	WRITE_ONCE(neigh->output, neigh->ops->connected_output);
902 }
903 
neigh_periodic_work(struct work_struct * work)904 static void neigh_periodic_work(struct work_struct *work)
905 {
906 	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
907 	struct neigh_hash_table *nht;
908 	struct hlist_node *tmp;
909 	struct neighbour *n;
910 	unsigned int i;
911 
912 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
913 
914 	write_lock_bh(&tbl->lock);
915 	nht = rcu_dereference_protected(tbl->nht,
916 					lockdep_is_held(&tbl->lock));
917 
918 	/*
919 	 *	periodically recompute ReachableTime from random function
920 	 */
921 
922 	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
923 		struct neigh_parms *p;
924 
925 		WRITE_ONCE(tbl->last_rand, jiffies);
926 		list_for_each_entry(p, &tbl->parms_list, list)
927 			p->reachable_time =
928 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
929 	}
930 
931 	if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
932 		goto out;
933 
934 	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
935 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
936 			unsigned int state;
937 
938 			write_lock(&n->lock);
939 
940 			state = n->nud_state;
941 			if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
942 			    (n->flags & NTF_EXT_LEARNED)) {
943 				write_unlock(&n->lock);
944 				continue;
945 			}
946 
947 			if (time_before(n->used, n->confirmed) &&
948 			    time_is_before_eq_jiffies(n->confirmed))
949 				n->used = n->confirmed;
950 
951 			if (refcount_read(&n->refcnt) == 1 &&
952 			    (state == NUD_FAILED ||
953 			     !time_in_range_open(jiffies, n->used,
954 						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
955 				hlist_del_rcu(&n->hash);
956 				hlist_del_rcu(&n->dev_list);
957 				neigh_mark_dead(n);
958 				write_unlock(&n->lock);
959 				neigh_cleanup_and_release(n);
960 				continue;
961 			}
962 			write_unlock(&n->lock);
963 		}
964 		/*
965 		 * It's fine to release lock here, even if hash table
966 		 * grows while we are preempted.
967 		 */
968 		write_unlock_bh(&tbl->lock);
969 		cond_resched();
970 		write_lock_bh(&tbl->lock);
971 		nht = rcu_dereference_protected(tbl->nht,
972 						lockdep_is_held(&tbl->lock));
973 	}
974 out:
975 	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
976 	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
977 	 * BASE_REACHABLE_TIME.
978 	 */
979 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
980 			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
981 	write_unlock_bh(&tbl->lock);
982 }
983 
neigh_max_probes(struct neighbour * n)984 static __inline__ int neigh_max_probes(struct neighbour *n)
985 {
986 	struct neigh_parms *p = n->parms;
987 	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
988 	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
989 	        NEIGH_VAR(p, MCAST_PROBES));
990 }
991 
neigh_invalidate(struct neighbour * neigh)992 static void neigh_invalidate(struct neighbour *neigh)
993 	__releases(neigh->lock)
994 	__acquires(neigh->lock)
995 {
996 	struct sk_buff *skb;
997 
998 	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
999 	neigh_dbg(2, "neigh %p is failed\n", neigh);
1000 	neigh->updated = jiffies;
1001 
1002 	/* It is very thin place. report_unreachable is very complicated
1003 	   routine. Particularly, it can hit the same neighbour entry!
1004 
1005 	   So that, we try to be accurate and avoid dead loop. --ANK
1006 	 */
1007 	while (neigh->nud_state == NUD_FAILED &&
1008 	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1009 		write_unlock(&neigh->lock);
1010 		neigh->ops->error_report(neigh, skb);
1011 		write_lock(&neigh->lock);
1012 	}
1013 	__skb_queue_purge(&neigh->arp_queue);
1014 	neigh->arp_queue_len_bytes = 0;
1015 }
1016 
neigh_probe(struct neighbour * neigh)1017 static void neigh_probe(struct neighbour *neigh)
1018 	__releases(neigh->lock)
1019 {
1020 	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1021 	/* keep skb alive even if arp_queue overflows */
1022 	if (skb)
1023 		skb = skb_clone(skb, GFP_ATOMIC);
1024 	write_unlock(&neigh->lock);
1025 	if (neigh->ops->solicit)
1026 		neigh->ops->solicit(neigh, skb);
1027 	atomic_inc(&neigh->probes);
1028 	consume_skb(skb);
1029 }
1030 
1031 /* Called when a timer expires for a neighbour entry. */
1032 
neigh_timer_handler(struct timer_list * t)1033 static void neigh_timer_handler(struct timer_list *t)
1034 {
1035 	unsigned long now, next;
1036 	struct neighbour *neigh = from_timer(neigh, t, timer);
1037 	unsigned int state;
1038 	int notify = 0;
1039 
1040 	write_lock(&neigh->lock);
1041 
1042 	state = neigh->nud_state;
1043 	now = jiffies;
1044 	next = now + HZ;
1045 
1046 	if (!(state & NUD_IN_TIMER))
1047 		goto out;
1048 
1049 	if (state & NUD_REACHABLE) {
1050 		if (time_before_eq(now,
1051 				   neigh->confirmed + neigh->parms->reachable_time)) {
1052 			neigh_dbg(2, "neigh %p is still alive\n", neigh);
1053 			next = neigh->confirmed + neigh->parms->reachable_time;
1054 		} else if (time_before_eq(now,
1055 					  neigh->used +
1056 					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1057 			neigh_dbg(2, "neigh %p is delayed\n", neigh);
1058 			WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1059 			neigh->updated = jiffies;
1060 			neigh_suspect(neigh);
1061 			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1062 		} else {
1063 			neigh_dbg(2, "neigh %p is suspected\n", neigh);
1064 			WRITE_ONCE(neigh->nud_state, NUD_STALE);
1065 			neigh->updated = jiffies;
1066 			neigh_suspect(neigh);
1067 			notify = 1;
1068 		}
1069 	} else if (state & NUD_DELAY) {
1070 		if (time_before_eq(now,
1071 				   neigh->confirmed +
1072 				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1073 			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1074 			WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
1075 			neigh->updated = jiffies;
1076 			neigh_connect(neigh);
1077 			notify = 1;
1078 			next = neigh->confirmed + neigh->parms->reachable_time;
1079 		} else {
1080 			neigh_dbg(2, "neigh %p is probed\n", neigh);
1081 			WRITE_ONCE(neigh->nud_state, NUD_PROBE);
1082 			neigh->updated = jiffies;
1083 			atomic_set(&neigh->probes, 0);
1084 			notify = 1;
1085 			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1086 					 HZ/100);
1087 		}
1088 	} else {
1089 		/* NUD_PROBE|NUD_INCOMPLETE */
1090 		next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1091 	}
1092 
1093 	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1094 	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1095 		WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1096 		notify = 1;
1097 		neigh_invalidate(neigh);
1098 		goto out;
1099 	}
1100 
1101 	if (neigh->nud_state & NUD_IN_TIMER) {
1102 		if (time_before(next, jiffies + HZ/100))
1103 			next = jiffies + HZ/100;
1104 		if (!mod_timer(&neigh->timer, next))
1105 			neigh_hold(neigh);
1106 	}
1107 	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1108 		neigh_probe(neigh);
1109 	} else {
1110 out:
1111 		write_unlock(&neigh->lock);
1112 	}
1113 
1114 	if (notify)
1115 		neigh_update_notify(neigh, 0);
1116 
1117 	trace_neigh_timer_handler(neigh, 0);
1118 
1119 	neigh_release(neigh);
1120 }
1121 
__neigh_event_send(struct neighbour * neigh,struct sk_buff * skb,const bool immediate_ok)1122 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1123 		       const bool immediate_ok)
1124 {
1125 	int rc;
1126 	bool immediate_probe = false;
1127 
1128 	write_lock_bh(&neigh->lock);
1129 
1130 	rc = 0;
1131 	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1132 		goto out_unlock_bh;
1133 	if (neigh->dead)
1134 		goto out_dead;
1135 
1136 	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1137 		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1138 		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1139 			unsigned long next, now = jiffies;
1140 
1141 			atomic_set(&neigh->probes,
1142 				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1143 			neigh_del_timer(neigh);
1144 			WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1145 			neigh->updated = now;
1146 			if (!immediate_ok) {
1147 				next = now + 1;
1148 			} else {
1149 				immediate_probe = true;
1150 				next = now + max(NEIGH_VAR(neigh->parms,
1151 							   RETRANS_TIME),
1152 						 HZ / 100);
1153 			}
1154 			neigh_add_timer(neigh, next);
1155 		} else {
1156 			WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1157 			neigh->updated = jiffies;
1158 			write_unlock_bh(&neigh->lock);
1159 
1160 			kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1161 			return 1;
1162 		}
1163 	} else if (neigh->nud_state & NUD_STALE) {
1164 		neigh_dbg(2, "neigh %p is delayed\n", neigh);
1165 		neigh_del_timer(neigh);
1166 		WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1167 		neigh->updated = jiffies;
1168 		neigh_add_timer(neigh, jiffies +
1169 				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1170 	}
1171 
1172 	if (neigh->nud_state == NUD_INCOMPLETE) {
1173 		if (skb) {
1174 			while (neigh->arp_queue_len_bytes + skb->truesize >
1175 			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1176 				struct sk_buff *buff;
1177 
1178 				buff = __skb_dequeue(&neigh->arp_queue);
1179 				if (!buff)
1180 					break;
1181 				neigh->arp_queue_len_bytes -= buff->truesize;
1182 				kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1183 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1184 			}
1185 			skb_dst_force(skb);
1186 			__skb_queue_tail(&neigh->arp_queue, skb);
1187 			neigh->arp_queue_len_bytes += skb->truesize;
1188 		}
1189 		rc = 1;
1190 	}
1191 out_unlock_bh:
1192 	if (immediate_probe)
1193 		neigh_probe(neigh);
1194 	else
1195 		write_unlock(&neigh->lock);
1196 	local_bh_enable();
1197 	trace_neigh_event_send_done(neigh, rc);
1198 	return rc;
1199 
1200 out_dead:
1201 	if (neigh->nud_state & NUD_STALE)
1202 		goto out_unlock_bh;
1203 	write_unlock_bh(&neigh->lock);
1204 	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1205 	trace_neigh_event_send_dead(neigh, 1);
1206 	return 1;
1207 }
1208 EXPORT_SYMBOL(__neigh_event_send);
1209 
neigh_update_hhs(struct neighbour * neigh)1210 static void neigh_update_hhs(struct neighbour *neigh)
1211 {
1212 	struct hh_cache *hh;
1213 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1214 		= NULL;
1215 
1216 	if (neigh->dev->header_ops)
1217 		update = neigh->dev->header_ops->cache_update;
1218 
1219 	if (update) {
1220 		hh = &neigh->hh;
1221 		if (READ_ONCE(hh->hh_len)) {
1222 			write_seqlock_bh(&hh->hh_lock);
1223 			update(hh, neigh->dev, neigh->ha);
1224 			write_sequnlock_bh(&hh->hh_lock);
1225 		}
1226 	}
1227 }
1228 
1229 /* Generic update routine.
1230    -- lladdr is new lladdr or NULL, if it is not supplied.
1231    -- new    is new state.
1232    -- flags
1233 	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1234 				if it is different.
1235 	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1236 				lladdr instead of overriding it
1237 				if it is different.
1238 	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1239 	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
1240 	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
1241 	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1242 				NTF_ROUTER flag.
1243 	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1244 				a router.
1245 
1246    Caller MUST hold reference count on the entry.
1247  */
__neigh_update(struct neighbour * neigh,const u8 * lladdr,u8 new,u32 flags,u32 nlmsg_pid,struct netlink_ext_ack * extack)1248 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1249 			  u8 new, u32 flags, u32 nlmsg_pid,
1250 			  struct netlink_ext_ack *extack)
1251 {
1252 	bool gc_update = false, managed_update = false;
1253 	int update_isrouter = 0;
1254 	struct net_device *dev;
1255 	int err, notify = 0;
1256 	u8 old;
1257 
1258 	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1259 
1260 	write_lock_bh(&neigh->lock);
1261 
1262 	dev    = neigh->dev;
1263 	old    = neigh->nud_state;
1264 	err    = -EPERM;
1265 
1266 	if (neigh->dead) {
1267 		NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1268 		new = old;
1269 		goto out;
1270 	}
1271 	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1272 	    (old & (NUD_NOARP | NUD_PERMANENT)))
1273 		goto out;
1274 
1275 	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
1276 	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1277 		new = old & ~NUD_PERMANENT;
1278 		WRITE_ONCE(neigh->nud_state, new);
1279 		err = 0;
1280 		goto out;
1281 	}
1282 
1283 	if (!(new & NUD_VALID)) {
1284 		neigh_del_timer(neigh);
1285 		if (old & NUD_CONNECTED)
1286 			neigh_suspect(neigh);
1287 		WRITE_ONCE(neigh->nud_state, new);
1288 		err = 0;
1289 		notify = old & NUD_VALID;
1290 		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1291 		    (new & NUD_FAILED)) {
1292 			neigh_invalidate(neigh);
1293 			notify = 1;
1294 		}
1295 		goto out;
1296 	}
1297 
1298 	/* Compare new lladdr with cached one */
1299 	if (!dev->addr_len) {
1300 		/* First case: device needs no address. */
1301 		lladdr = neigh->ha;
1302 	} else if (lladdr) {
1303 		/* The second case: if something is already cached
1304 		   and a new address is proposed:
1305 		   - compare new & old
1306 		   - if they are different, check override flag
1307 		 */
1308 		if ((old & NUD_VALID) &&
1309 		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1310 			lladdr = neigh->ha;
1311 	} else {
1312 		/* No address is supplied; if we know something,
1313 		   use it, otherwise discard the request.
1314 		 */
1315 		err = -EINVAL;
1316 		if (!(old & NUD_VALID)) {
1317 			NL_SET_ERR_MSG(extack, "No link layer address given");
1318 			goto out;
1319 		}
1320 		lladdr = neigh->ha;
1321 	}
1322 
1323 	/* Update confirmed timestamp for neighbour entry after we
1324 	 * received ARP packet even if it doesn't change IP to MAC binding.
1325 	 */
1326 	if (new & NUD_CONNECTED)
1327 		neigh->confirmed = jiffies;
1328 
1329 	/* If entry was valid and address is not changed,
1330 	   do not change entry state, if new one is STALE.
1331 	 */
1332 	err = 0;
1333 	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1334 	if (old & NUD_VALID) {
1335 		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1336 			update_isrouter = 0;
1337 			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1338 			    (old & NUD_CONNECTED)) {
1339 				lladdr = neigh->ha;
1340 				new = NUD_STALE;
1341 			} else
1342 				goto out;
1343 		} else {
1344 			if (lladdr == neigh->ha && new == NUD_STALE &&
1345 			    !(flags & NEIGH_UPDATE_F_ADMIN))
1346 				new = old;
1347 		}
1348 	}
1349 
1350 	/* Update timestamp only once we know we will make a change to the
1351 	 * neighbour entry. Otherwise we risk to move the locktime window with
1352 	 * noop updates and ignore relevant ARP updates.
1353 	 */
1354 	if (new != old || lladdr != neigh->ha)
1355 		neigh->updated = jiffies;
1356 
1357 	if (new != old) {
1358 		neigh_del_timer(neigh);
1359 		if (new & NUD_PROBE)
1360 			atomic_set(&neigh->probes, 0);
1361 		if (new & NUD_IN_TIMER)
1362 			neigh_add_timer(neigh, (jiffies +
1363 						((new & NUD_REACHABLE) ?
1364 						 neigh->parms->reachable_time :
1365 						 0)));
1366 		WRITE_ONCE(neigh->nud_state, new);
1367 		notify = 1;
1368 	}
1369 
1370 	if (lladdr != neigh->ha) {
1371 		write_seqlock(&neigh->ha_lock);
1372 		memcpy(&neigh->ha, lladdr, dev->addr_len);
1373 		write_sequnlock(&neigh->ha_lock);
1374 		neigh_update_hhs(neigh);
1375 		if (!(new & NUD_CONNECTED))
1376 			neigh->confirmed = jiffies -
1377 				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1378 		notify = 1;
1379 	}
1380 	if (new == old)
1381 		goto out;
1382 	if (new & NUD_CONNECTED)
1383 		neigh_connect(neigh);
1384 	else
1385 		neigh_suspect(neigh);
1386 	if (!(old & NUD_VALID)) {
1387 		struct sk_buff *skb;
1388 
1389 		/* Again: avoid dead loop if something went wrong */
1390 
1391 		while (neigh->nud_state & NUD_VALID &&
1392 		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1393 			struct dst_entry *dst = skb_dst(skb);
1394 			struct neighbour *n2, *n1 = neigh;
1395 			write_unlock_bh(&neigh->lock);
1396 
1397 			rcu_read_lock();
1398 
1399 			/* Why not just use 'neigh' as-is?  The problem is that
1400 			 * things such as shaper, eql, and sch_teql can end up
1401 			 * using alternative, different, neigh objects to output
1402 			 * the packet in the output path.  So what we need to do
1403 			 * here is re-lookup the top-level neigh in the path so
1404 			 * we can reinject the packet there.
1405 			 */
1406 			n2 = NULL;
1407 			if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1408 				n2 = dst_neigh_lookup_skb(dst, skb);
1409 				if (n2)
1410 					n1 = n2;
1411 			}
1412 			READ_ONCE(n1->output)(n1, skb);
1413 			if (n2)
1414 				neigh_release(n2);
1415 			rcu_read_unlock();
1416 
1417 			write_lock_bh(&neigh->lock);
1418 		}
1419 		__skb_queue_purge(&neigh->arp_queue);
1420 		neigh->arp_queue_len_bytes = 0;
1421 	}
1422 out:
1423 	if (update_isrouter)
1424 		neigh_update_is_router(neigh, flags, &notify);
1425 	write_unlock_bh(&neigh->lock);
1426 	if (((new ^ old) & NUD_PERMANENT) || gc_update)
1427 		neigh_update_gc_list(neigh);
1428 	if (managed_update)
1429 		neigh_update_managed_list(neigh);
1430 	if (notify)
1431 		neigh_update_notify(neigh, nlmsg_pid);
1432 	trace_neigh_update_done(neigh, err);
1433 	return err;
1434 }
1435 
neigh_update(struct neighbour * neigh,const u8 * lladdr,u8 new,u32 flags,u32 nlmsg_pid)1436 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1437 		 u32 flags, u32 nlmsg_pid)
1438 {
1439 	return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1440 }
1441 EXPORT_SYMBOL(neigh_update);
1442 
1443 /* Update the neigh to listen temporarily for probe responses, even if it is
1444  * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1445  */
__neigh_set_probe_once(struct neighbour * neigh)1446 void __neigh_set_probe_once(struct neighbour *neigh)
1447 {
1448 	if (neigh->dead)
1449 		return;
1450 	neigh->updated = jiffies;
1451 	if (!(neigh->nud_state & NUD_FAILED))
1452 		return;
1453 	WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1454 	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1455 	neigh_add_timer(neigh,
1456 			jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1457 				      HZ/100));
1458 }
1459 EXPORT_SYMBOL(__neigh_set_probe_once);
1460 
neigh_event_ns(struct neigh_table * tbl,u8 * lladdr,void * saddr,struct net_device * dev)1461 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1462 				 u8 *lladdr, void *saddr,
1463 				 struct net_device *dev)
1464 {
1465 	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1466 						 lladdr || !dev->addr_len);
1467 	if (neigh)
1468 		neigh_update(neigh, lladdr, NUD_STALE,
1469 			     NEIGH_UPDATE_F_OVERRIDE, 0);
1470 	return neigh;
1471 }
1472 EXPORT_SYMBOL(neigh_event_ns);
1473 
1474 /* called with read_lock_bh(&n->lock); */
neigh_hh_init(struct neighbour * n)1475 static void neigh_hh_init(struct neighbour *n)
1476 {
1477 	struct net_device *dev = n->dev;
1478 	__be16 prot = n->tbl->protocol;
1479 	struct hh_cache	*hh = &n->hh;
1480 
1481 	write_lock_bh(&n->lock);
1482 
1483 	/* Only one thread can come in here and initialize the
1484 	 * hh_cache entry.
1485 	 */
1486 	if (!hh->hh_len)
1487 		dev->header_ops->cache(n, hh, prot);
1488 
1489 	write_unlock_bh(&n->lock);
1490 }
1491 
1492 /* Slow and careful. */
1493 
neigh_resolve_output(struct neighbour * neigh,struct sk_buff * skb)1494 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1495 {
1496 	int rc = 0;
1497 
1498 	if (!neigh_event_send(neigh, skb)) {
1499 		int err;
1500 		struct net_device *dev = neigh->dev;
1501 		unsigned int seq;
1502 
1503 		if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1504 			neigh_hh_init(neigh);
1505 
1506 		do {
1507 			__skb_pull(skb, skb_network_offset(skb));
1508 			seq = read_seqbegin(&neigh->ha_lock);
1509 			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1510 					      neigh->ha, NULL, skb->len);
1511 		} while (read_seqretry(&neigh->ha_lock, seq));
1512 
1513 		if (err >= 0)
1514 			rc = dev_queue_xmit(skb);
1515 		else
1516 			goto out_kfree_skb;
1517 	}
1518 out:
1519 	return rc;
1520 out_kfree_skb:
1521 	rc = -EINVAL;
1522 	kfree_skb(skb);
1523 	goto out;
1524 }
1525 EXPORT_SYMBOL(neigh_resolve_output);
1526 
1527 /* As fast as possible without hh cache */
1528 
neigh_connected_output(struct neighbour * neigh,struct sk_buff * skb)1529 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1530 {
1531 	struct net_device *dev = neigh->dev;
1532 	unsigned int seq;
1533 	int err;
1534 
1535 	do {
1536 		__skb_pull(skb, skb_network_offset(skb));
1537 		seq = read_seqbegin(&neigh->ha_lock);
1538 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1539 				      neigh->ha, NULL, skb->len);
1540 	} while (read_seqretry(&neigh->ha_lock, seq));
1541 
1542 	if (err >= 0)
1543 		err = dev_queue_xmit(skb);
1544 	else {
1545 		err = -EINVAL;
1546 		kfree_skb(skb);
1547 	}
1548 	return err;
1549 }
1550 EXPORT_SYMBOL(neigh_connected_output);
1551 
neigh_direct_output(struct neighbour * neigh,struct sk_buff * skb)1552 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1553 {
1554 	return dev_queue_xmit(skb);
1555 }
1556 EXPORT_SYMBOL(neigh_direct_output);
1557 
neigh_managed_work(struct work_struct * work)1558 static void neigh_managed_work(struct work_struct *work)
1559 {
1560 	struct neigh_table *tbl = container_of(work, struct neigh_table,
1561 					       managed_work.work);
1562 	struct neighbour *neigh;
1563 
1564 	write_lock_bh(&tbl->lock);
1565 	list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1566 		neigh_event_send_probe(neigh, NULL, false);
1567 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1568 			   NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1569 	write_unlock_bh(&tbl->lock);
1570 }
1571 
neigh_proxy_process(struct timer_list * t)1572 static void neigh_proxy_process(struct timer_list *t)
1573 {
1574 	struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1575 	long sched_next = 0;
1576 	unsigned long now = jiffies;
1577 	struct sk_buff *skb, *n;
1578 
1579 	spin_lock(&tbl->proxy_queue.lock);
1580 
1581 	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1582 		long tdif = NEIGH_CB(skb)->sched_next - now;
1583 
1584 		if (tdif <= 0) {
1585 			struct net_device *dev = skb->dev;
1586 
1587 			neigh_parms_qlen_dec(dev, tbl->family);
1588 			__skb_unlink(skb, &tbl->proxy_queue);
1589 
1590 			if (tbl->proxy_redo && netif_running(dev)) {
1591 				rcu_read_lock();
1592 				tbl->proxy_redo(skb);
1593 				rcu_read_unlock();
1594 			} else {
1595 				kfree_skb(skb);
1596 			}
1597 
1598 			dev_put(dev);
1599 		} else if (!sched_next || tdif < sched_next)
1600 			sched_next = tdif;
1601 	}
1602 	del_timer(&tbl->proxy_timer);
1603 	if (sched_next)
1604 		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1605 	spin_unlock(&tbl->proxy_queue.lock);
1606 }
1607 
neigh_proxy_delay(struct neigh_parms * p)1608 static unsigned long neigh_proxy_delay(struct neigh_parms *p)
1609 {
1610 	/* If proxy_delay is zero, do not call get_random_u32_below()
1611 	 * as it is undefined behavior.
1612 	 */
1613 	unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
1614 
1615 	return proxy_delay ?
1616 	       jiffies + get_random_u32_below(proxy_delay) : jiffies;
1617 }
1618 
pneigh_enqueue(struct neigh_table * tbl,struct neigh_parms * p,struct sk_buff * skb)1619 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1620 		    struct sk_buff *skb)
1621 {
1622 	unsigned long sched_next = neigh_proxy_delay(p);
1623 
1624 	if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1625 		kfree_skb(skb);
1626 		return;
1627 	}
1628 
1629 	NEIGH_CB(skb)->sched_next = sched_next;
1630 	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1631 
1632 	spin_lock(&tbl->proxy_queue.lock);
1633 	if (del_timer(&tbl->proxy_timer)) {
1634 		if (time_before(tbl->proxy_timer.expires, sched_next))
1635 			sched_next = tbl->proxy_timer.expires;
1636 	}
1637 	skb_dst_drop(skb);
1638 	dev_hold(skb->dev);
1639 	__skb_queue_tail(&tbl->proxy_queue, skb);
1640 	p->qlen++;
1641 	mod_timer(&tbl->proxy_timer, sched_next);
1642 	spin_unlock(&tbl->proxy_queue.lock);
1643 }
1644 EXPORT_SYMBOL(pneigh_enqueue);
1645 
lookup_neigh_parms(struct neigh_table * tbl,struct net * net,int ifindex)1646 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1647 						      struct net *net, int ifindex)
1648 {
1649 	struct neigh_parms *p;
1650 
1651 	list_for_each_entry(p, &tbl->parms_list, list) {
1652 		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1653 		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1654 			return p;
1655 	}
1656 
1657 	return NULL;
1658 }
1659 
neigh_parms_alloc(struct net_device * dev,struct neigh_table * tbl)1660 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1661 				      struct neigh_table *tbl)
1662 {
1663 	struct neigh_parms *p;
1664 	struct net *net = dev_net(dev);
1665 	const struct net_device_ops *ops = dev->netdev_ops;
1666 
1667 	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1668 	if (p) {
1669 		p->tbl		  = tbl;
1670 		refcount_set(&p->refcnt, 1);
1671 		p->reachable_time =
1672 				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1673 		p->qlen = 0;
1674 		netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1675 		p->dev = dev;
1676 		write_pnet(&p->net, net);
1677 		p->sysctl_table = NULL;
1678 
1679 		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1680 			netdev_put(dev, &p->dev_tracker);
1681 			kfree(p);
1682 			return NULL;
1683 		}
1684 
1685 		write_lock_bh(&tbl->lock);
1686 		list_add(&p->list, &tbl->parms.list);
1687 		write_unlock_bh(&tbl->lock);
1688 
1689 		neigh_parms_data_state_cleanall(p);
1690 	}
1691 	return p;
1692 }
1693 EXPORT_SYMBOL(neigh_parms_alloc);
1694 
neigh_rcu_free_parms(struct rcu_head * head)1695 static void neigh_rcu_free_parms(struct rcu_head *head)
1696 {
1697 	struct neigh_parms *parms =
1698 		container_of(head, struct neigh_parms, rcu_head);
1699 
1700 	neigh_parms_put(parms);
1701 }
1702 
neigh_parms_release(struct neigh_table * tbl,struct neigh_parms * parms)1703 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1704 {
1705 	if (!parms || parms == &tbl->parms)
1706 		return;
1707 	write_lock_bh(&tbl->lock);
1708 	list_del(&parms->list);
1709 	parms->dead = 1;
1710 	write_unlock_bh(&tbl->lock);
1711 	netdev_put(parms->dev, &parms->dev_tracker);
1712 	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1713 }
1714 EXPORT_SYMBOL(neigh_parms_release);
1715 
neigh_parms_destroy(struct neigh_parms * parms)1716 static void neigh_parms_destroy(struct neigh_parms *parms)
1717 {
1718 	kfree(parms);
1719 }
1720 
1721 static struct lock_class_key neigh_table_proxy_queue_class;
1722 
1723 static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1724 
neigh_table_init(int index,struct neigh_table * tbl)1725 void neigh_table_init(int index, struct neigh_table *tbl)
1726 {
1727 	unsigned long now = jiffies;
1728 	unsigned long phsize;
1729 
1730 	INIT_LIST_HEAD(&tbl->parms_list);
1731 	INIT_LIST_HEAD(&tbl->gc_list);
1732 	INIT_LIST_HEAD(&tbl->managed_list);
1733 
1734 	list_add(&tbl->parms.list, &tbl->parms_list);
1735 	write_pnet(&tbl->parms.net, &init_net);
1736 	refcount_set(&tbl->parms.refcnt, 1);
1737 	tbl->parms.reachable_time =
1738 			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1739 	tbl->parms.qlen = 0;
1740 
1741 	tbl->stats = alloc_percpu(struct neigh_statistics);
1742 	if (!tbl->stats)
1743 		panic("cannot create neighbour cache statistics");
1744 
1745 #ifdef CONFIG_PROC_FS
1746 	if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1747 			      &neigh_stat_seq_ops, tbl))
1748 		panic("cannot create neighbour proc dir entry");
1749 #endif
1750 
1751 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1752 
1753 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1754 	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1755 
1756 	if (!tbl->nht || !tbl->phash_buckets)
1757 		panic("cannot allocate neighbour cache hashes");
1758 
1759 	if (!tbl->entry_size)
1760 		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1761 					tbl->key_len, NEIGH_PRIV_ALIGN);
1762 	else
1763 		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1764 
1765 	rwlock_init(&tbl->lock);
1766 
1767 	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1768 	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1769 			tbl->parms.reachable_time);
1770 	INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1771 	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1772 
1773 	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1774 	skb_queue_head_init_class(&tbl->proxy_queue,
1775 			&neigh_table_proxy_queue_class);
1776 
1777 	tbl->last_flush = now;
1778 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1779 
1780 	rcu_assign_pointer(neigh_tables[index], tbl);
1781 }
1782 EXPORT_SYMBOL(neigh_table_init);
1783 
1784 /*
1785  * Only called from ndisc_cleanup(), which means this is dead code
1786  * because we no longer can unload IPv6 module.
1787  */
neigh_table_clear(int index,struct neigh_table * tbl)1788 int neigh_table_clear(int index, struct neigh_table *tbl)
1789 {
1790 	RCU_INIT_POINTER(neigh_tables[index], NULL);
1791 	synchronize_rcu();
1792 
1793 	/* It is not clean... Fix it to unload IPv6 module safely */
1794 	cancel_delayed_work_sync(&tbl->managed_work);
1795 	cancel_delayed_work_sync(&tbl->gc_work);
1796 	del_timer_sync(&tbl->proxy_timer);
1797 	pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
1798 	neigh_ifdown(tbl, NULL);
1799 	if (atomic_read(&tbl->entries))
1800 		pr_crit("neighbour leakage\n");
1801 
1802 	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1803 		 neigh_hash_free_rcu);
1804 	tbl->nht = NULL;
1805 
1806 	kfree(tbl->phash_buckets);
1807 	tbl->phash_buckets = NULL;
1808 
1809 	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1810 
1811 	free_percpu(tbl->stats);
1812 	tbl->stats = NULL;
1813 
1814 	return 0;
1815 }
1816 EXPORT_SYMBOL(neigh_table_clear);
1817 
neigh_find_table(int family)1818 static struct neigh_table *neigh_find_table(int family)
1819 {
1820 	struct neigh_table *tbl = NULL;
1821 
1822 	switch (family) {
1823 	case AF_INET:
1824 		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
1825 		break;
1826 	case AF_INET6:
1827 		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
1828 		break;
1829 	}
1830 
1831 	return tbl;
1832 }
1833 
1834 const struct nla_policy nda_policy[NDA_MAX+1] = {
1835 	[NDA_UNSPEC]		= { .strict_start_type = NDA_NH_ID },
1836 	[NDA_DST]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1837 	[NDA_LLADDR]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1838 	[NDA_CACHEINFO]		= { .len = sizeof(struct nda_cacheinfo) },
1839 	[NDA_PROBES]		= { .type = NLA_U32 },
1840 	[NDA_VLAN]		= { .type = NLA_U16 },
1841 	[NDA_PORT]		= { .type = NLA_U16 },
1842 	[NDA_VNI]		= { .type = NLA_U32 },
1843 	[NDA_IFINDEX]		= { .type = NLA_U32 },
1844 	[NDA_MASTER]		= { .type = NLA_U32 },
1845 	[NDA_PROTOCOL]		= { .type = NLA_U8 },
1846 	[NDA_NH_ID]		= { .type = NLA_U32 },
1847 	[NDA_FLAGS_EXT]		= NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1848 	[NDA_FDB_EXT_ATTRS]	= { .type = NLA_NESTED },
1849 };
1850 
neigh_delete(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1851 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1852 			struct netlink_ext_ack *extack)
1853 {
1854 	struct net *net = sock_net(skb->sk);
1855 	struct ndmsg *ndm;
1856 	struct nlattr *dst_attr;
1857 	struct neigh_table *tbl;
1858 	struct neighbour *neigh;
1859 	struct net_device *dev = NULL;
1860 	int err = -EINVAL;
1861 
1862 	ASSERT_RTNL();
1863 	if (nlmsg_len(nlh) < sizeof(*ndm))
1864 		goto out;
1865 
1866 	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1867 	if (!dst_attr) {
1868 		NL_SET_ERR_MSG(extack, "Network address not specified");
1869 		goto out;
1870 	}
1871 
1872 	ndm = nlmsg_data(nlh);
1873 	if (ndm->ndm_ifindex) {
1874 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1875 		if (dev == NULL) {
1876 			err = -ENODEV;
1877 			goto out;
1878 		}
1879 	}
1880 
1881 	tbl = neigh_find_table(ndm->ndm_family);
1882 	if (tbl == NULL)
1883 		return -EAFNOSUPPORT;
1884 
1885 	if (nla_len(dst_attr) < (int)tbl->key_len) {
1886 		NL_SET_ERR_MSG(extack, "Invalid network address");
1887 		goto out;
1888 	}
1889 
1890 	if (ndm->ndm_flags & NTF_PROXY) {
1891 		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1892 		goto out;
1893 	}
1894 
1895 	if (dev == NULL)
1896 		goto out;
1897 
1898 	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1899 	if (neigh == NULL) {
1900 		err = -ENOENT;
1901 		goto out;
1902 	}
1903 
1904 	err = __neigh_update(neigh, NULL, NUD_FAILED,
1905 			     NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1906 			     NETLINK_CB(skb).portid, extack);
1907 	write_lock_bh(&tbl->lock);
1908 	neigh_release(neigh);
1909 	neigh_remove_one(neigh);
1910 	write_unlock_bh(&tbl->lock);
1911 
1912 out:
1913 	return err;
1914 }
1915 
neigh_add(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1916 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1917 		     struct netlink_ext_ack *extack)
1918 {
1919 	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1920 		    NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1921 	struct net *net = sock_net(skb->sk);
1922 	struct ndmsg *ndm;
1923 	struct nlattr *tb[NDA_MAX+1];
1924 	struct neigh_table *tbl;
1925 	struct net_device *dev = NULL;
1926 	struct neighbour *neigh;
1927 	void *dst, *lladdr;
1928 	u8 protocol = 0;
1929 	u32 ndm_flags;
1930 	int err;
1931 
1932 	ASSERT_RTNL();
1933 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1934 				     nda_policy, extack);
1935 	if (err < 0)
1936 		goto out;
1937 
1938 	err = -EINVAL;
1939 	if (!tb[NDA_DST]) {
1940 		NL_SET_ERR_MSG(extack, "Network address not specified");
1941 		goto out;
1942 	}
1943 
1944 	ndm = nlmsg_data(nlh);
1945 	ndm_flags = ndm->ndm_flags;
1946 	if (tb[NDA_FLAGS_EXT]) {
1947 		u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
1948 
1949 		BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
1950 			     (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
1951 			      hweight32(NTF_EXT_MASK)));
1952 		ndm_flags |= (ext << NTF_EXT_SHIFT);
1953 	}
1954 	if (ndm->ndm_ifindex) {
1955 		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1956 		if (dev == NULL) {
1957 			err = -ENODEV;
1958 			goto out;
1959 		}
1960 
1961 		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
1962 			NL_SET_ERR_MSG(extack, "Invalid link address");
1963 			goto out;
1964 		}
1965 	}
1966 
1967 	tbl = neigh_find_table(ndm->ndm_family);
1968 	if (tbl == NULL)
1969 		return -EAFNOSUPPORT;
1970 
1971 	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
1972 		NL_SET_ERR_MSG(extack, "Invalid network address");
1973 		goto out;
1974 	}
1975 
1976 	dst = nla_data(tb[NDA_DST]);
1977 	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1978 
1979 	if (tb[NDA_PROTOCOL])
1980 		protocol = nla_get_u8(tb[NDA_PROTOCOL]);
1981 	if (ndm_flags & NTF_PROXY) {
1982 		struct pneigh_entry *pn;
1983 
1984 		if (ndm_flags & NTF_MANAGED) {
1985 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
1986 			goto out;
1987 		}
1988 
1989 		err = -ENOBUFS;
1990 		pn = pneigh_lookup(tbl, net, dst, dev, 1);
1991 		if (pn) {
1992 			pn->flags = ndm_flags;
1993 			if (protocol)
1994 				pn->protocol = protocol;
1995 			err = 0;
1996 		}
1997 		goto out;
1998 	}
1999 
2000 	if (!dev) {
2001 		NL_SET_ERR_MSG(extack, "Device not specified");
2002 		goto out;
2003 	}
2004 
2005 	if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2006 		err = -EINVAL;
2007 		goto out;
2008 	}
2009 
2010 	neigh = neigh_lookup(tbl, dst, dev);
2011 	if (neigh == NULL) {
2012 		bool ndm_permanent  = ndm->ndm_state & NUD_PERMANENT;
2013 		bool exempt_from_gc = ndm_permanent ||
2014 				      ndm_flags & NTF_EXT_LEARNED;
2015 
2016 		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2017 			err = -ENOENT;
2018 			goto out;
2019 		}
2020 		if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2021 			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2022 			err = -EINVAL;
2023 			goto out;
2024 		}
2025 
2026 		neigh = ___neigh_create(tbl, dst, dev,
2027 					ndm_flags &
2028 					(NTF_EXT_LEARNED | NTF_MANAGED),
2029 					exempt_from_gc, true);
2030 		if (IS_ERR(neigh)) {
2031 			err = PTR_ERR(neigh);
2032 			goto out;
2033 		}
2034 	} else {
2035 		if (nlh->nlmsg_flags & NLM_F_EXCL) {
2036 			err = -EEXIST;
2037 			neigh_release(neigh);
2038 			goto out;
2039 		}
2040 
2041 		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2042 			flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2043 				   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2044 	}
2045 
2046 	if (protocol)
2047 		neigh->protocol = protocol;
2048 	if (ndm_flags & NTF_EXT_LEARNED)
2049 		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2050 	if (ndm_flags & NTF_ROUTER)
2051 		flags |= NEIGH_UPDATE_F_ISROUTER;
2052 	if (ndm_flags & NTF_MANAGED)
2053 		flags |= NEIGH_UPDATE_F_MANAGED;
2054 	if (ndm_flags & NTF_USE)
2055 		flags |= NEIGH_UPDATE_F_USE;
2056 
2057 	err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2058 			     NETLINK_CB(skb).portid, extack);
2059 	if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
2060 		neigh_event_send(neigh, NULL);
2061 		err = 0;
2062 	}
2063 	neigh_release(neigh);
2064 out:
2065 	return err;
2066 }
2067 
neightbl_fill_parms(struct sk_buff * skb,struct neigh_parms * parms)2068 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2069 {
2070 	struct nlattr *nest;
2071 
2072 	nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2073 	if (nest == NULL)
2074 		return -ENOBUFS;
2075 
2076 	if ((parms->dev &&
2077 	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2078 	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2079 	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2080 			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2081 	    /* approximative value for deprecated QUEUE_LEN (in packets) */
2082 	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
2083 			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2084 	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2085 	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2086 	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
2087 			NEIGH_VAR(parms, UCAST_PROBES)) ||
2088 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
2089 			NEIGH_VAR(parms, MCAST_PROBES)) ||
2090 	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2091 			NEIGH_VAR(parms, MCAST_REPROBES)) ||
2092 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2093 			  NDTPA_PAD) ||
2094 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2095 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2096 	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
2097 			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2098 	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2099 			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2100 	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2101 			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2102 	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2103 			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2104 	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2105 			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2106 	    nla_put_msecs(skb, NDTPA_LOCKTIME,
2107 			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2108 	    nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2109 			  NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2110 		goto nla_put_failure;
2111 	return nla_nest_end(skb, nest);
2112 
2113 nla_put_failure:
2114 	nla_nest_cancel(skb, nest);
2115 	return -EMSGSIZE;
2116 }
2117 
neightbl_fill_info(struct sk_buff * skb,struct neigh_table * tbl,u32 pid,u32 seq,int type,int flags)2118 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2119 			      u32 pid, u32 seq, int type, int flags)
2120 {
2121 	struct nlmsghdr *nlh;
2122 	struct ndtmsg *ndtmsg;
2123 
2124 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2125 	if (nlh == NULL)
2126 		return -EMSGSIZE;
2127 
2128 	ndtmsg = nlmsg_data(nlh);
2129 
2130 	read_lock_bh(&tbl->lock);
2131 	ndtmsg->ndtm_family = tbl->family;
2132 	ndtmsg->ndtm_pad1   = 0;
2133 	ndtmsg->ndtm_pad2   = 0;
2134 
2135 	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2136 	    nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
2137 			  NDTA_PAD) ||
2138 	    nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
2139 	    nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
2140 	    nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
2141 		goto nla_put_failure;
2142 	{
2143 		unsigned long now = jiffies;
2144 		long flush_delta = now - READ_ONCE(tbl->last_flush);
2145 		long rand_delta = now - READ_ONCE(tbl->last_rand);
2146 		struct neigh_hash_table *nht;
2147 		struct ndt_config ndc = {
2148 			.ndtc_key_len		= tbl->key_len,
2149 			.ndtc_entry_size	= tbl->entry_size,
2150 			.ndtc_entries		= atomic_read(&tbl->entries),
2151 			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
2152 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
2153 			.ndtc_proxy_qlen	= READ_ONCE(tbl->proxy_queue.qlen),
2154 		};
2155 
2156 		rcu_read_lock();
2157 		nht = rcu_dereference(tbl->nht);
2158 		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2159 		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2160 		rcu_read_unlock();
2161 
2162 		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2163 			goto nla_put_failure;
2164 	}
2165 
2166 	{
2167 		int cpu;
2168 		struct ndt_stats ndst;
2169 
2170 		memset(&ndst, 0, sizeof(ndst));
2171 
2172 		for_each_possible_cpu(cpu) {
2173 			struct neigh_statistics	*st;
2174 
2175 			st = per_cpu_ptr(tbl->stats, cpu);
2176 			ndst.ndts_allocs		+= READ_ONCE(st->allocs);
2177 			ndst.ndts_destroys		+= READ_ONCE(st->destroys);
2178 			ndst.ndts_hash_grows		+= READ_ONCE(st->hash_grows);
2179 			ndst.ndts_res_failed		+= READ_ONCE(st->res_failed);
2180 			ndst.ndts_lookups		+= READ_ONCE(st->lookups);
2181 			ndst.ndts_hits			+= READ_ONCE(st->hits);
2182 			ndst.ndts_rcv_probes_mcast	+= READ_ONCE(st->rcv_probes_mcast);
2183 			ndst.ndts_rcv_probes_ucast	+= READ_ONCE(st->rcv_probes_ucast);
2184 			ndst.ndts_periodic_gc_runs	+= READ_ONCE(st->periodic_gc_runs);
2185 			ndst.ndts_forced_gc_runs	+= READ_ONCE(st->forced_gc_runs);
2186 			ndst.ndts_table_fulls		+= READ_ONCE(st->table_fulls);
2187 		}
2188 
2189 		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2190 				  NDTA_PAD))
2191 			goto nla_put_failure;
2192 	}
2193 
2194 	BUG_ON(tbl->parms.dev);
2195 	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2196 		goto nla_put_failure;
2197 
2198 	read_unlock_bh(&tbl->lock);
2199 	nlmsg_end(skb, nlh);
2200 	return 0;
2201 
2202 nla_put_failure:
2203 	read_unlock_bh(&tbl->lock);
2204 	nlmsg_cancel(skb, nlh);
2205 	return -EMSGSIZE;
2206 }
2207 
neightbl_fill_param_info(struct sk_buff * skb,struct neigh_table * tbl,struct neigh_parms * parms,u32 pid,u32 seq,int type,unsigned int flags)2208 static int neightbl_fill_param_info(struct sk_buff *skb,
2209 				    struct neigh_table *tbl,
2210 				    struct neigh_parms *parms,
2211 				    u32 pid, u32 seq, int type,
2212 				    unsigned int flags)
2213 {
2214 	struct ndtmsg *ndtmsg;
2215 	struct nlmsghdr *nlh;
2216 
2217 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2218 	if (nlh == NULL)
2219 		return -EMSGSIZE;
2220 
2221 	ndtmsg = nlmsg_data(nlh);
2222 
2223 	read_lock_bh(&tbl->lock);
2224 	ndtmsg->ndtm_family = tbl->family;
2225 	ndtmsg->ndtm_pad1   = 0;
2226 	ndtmsg->ndtm_pad2   = 0;
2227 
2228 	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2229 	    neightbl_fill_parms(skb, parms) < 0)
2230 		goto errout;
2231 
2232 	read_unlock_bh(&tbl->lock);
2233 	nlmsg_end(skb, nlh);
2234 	return 0;
2235 errout:
2236 	read_unlock_bh(&tbl->lock);
2237 	nlmsg_cancel(skb, nlh);
2238 	return -EMSGSIZE;
2239 }
2240 
2241 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2242 	[NDTA_NAME]		= { .type = NLA_STRING },
2243 	[NDTA_THRESH1]		= { .type = NLA_U32 },
2244 	[NDTA_THRESH2]		= { .type = NLA_U32 },
2245 	[NDTA_THRESH3]		= { .type = NLA_U32 },
2246 	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
2247 	[NDTA_PARMS]		= { .type = NLA_NESTED },
2248 };
2249 
2250 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2251 	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
2252 	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
2253 	[NDTPA_QUEUE_LENBYTES]		= { .type = NLA_U32 },
2254 	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
2255 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
2256 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
2257 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
2258 	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
2259 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
2260 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
2261 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
2262 	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
2263 	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
2264 	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
2265 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
2266 	[NDTPA_INTERVAL_PROBE_TIME_MS]	= { .type = NLA_U64, .min = 1 },
2267 };
2268 
neightbl_set(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2269 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2270 			struct netlink_ext_ack *extack)
2271 {
2272 	struct net *net = sock_net(skb->sk);
2273 	struct neigh_table *tbl;
2274 	struct ndtmsg *ndtmsg;
2275 	struct nlattr *tb[NDTA_MAX+1];
2276 	bool found = false;
2277 	int err, tidx;
2278 
2279 	err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2280 				     nl_neightbl_policy, extack);
2281 	if (err < 0)
2282 		goto errout;
2283 
2284 	if (tb[NDTA_NAME] == NULL) {
2285 		err = -EINVAL;
2286 		goto errout;
2287 	}
2288 
2289 	ndtmsg = nlmsg_data(nlh);
2290 
2291 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2292 		tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2293 		if (!tbl)
2294 			continue;
2295 		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2296 			continue;
2297 		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2298 			found = true;
2299 			break;
2300 		}
2301 	}
2302 
2303 	if (!found)
2304 		return -ENOENT;
2305 
2306 	/*
2307 	 * We acquire tbl->lock to be nice to the periodic timers and
2308 	 * make sure they always see a consistent set of values.
2309 	 */
2310 	write_lock_bh(&tbl->lock);
2311 
2312 	if (tb[NDTA_PARMS]) {
2313 		struct nlattr *tbp[NDTPA_MAX+1];
2314 		struct neigh_parms *p;
2315 		int i, ifindex = 0;
2316 
2317 		err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2318 						  tb[NDTA_PARMS],
2319 						  nl_ntbl_parm_policy, extack);
2320 		if (err < 0)
2321 			goto errout_tbl_lock;
2322 
2323 		if (tbp[NDTPA_IFINDEX])
2324 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2325 
2326 		p = lookup_neigh_parms(tbl, net, ifindex);
2327 		if (p == NULL) {
2328 			err = -ENOENT;
2329 			goto errout_tbl_lock;
2330 		}
2331 
2332 		for (i = 1; i <= NDTPA_MAX; i++) {
2333 			if (tbp[i] == NULL)
2334 				continue;
2335 
2336 			switch (i) {
2337 			case NDTPA_QUEUE_LEN:
2338 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2339 					      nla_get_u32(tbp[i]) *
2340 					      SKB_TRUESIZE(ETH_FRAME_LEN));
2341 				break;
2342 			case NDTPA_QUEUE_LENBYTES:
2343 				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2344 					      nla_get_u32(tbp[i]));
2345 				break;
2346 			case NDTPA_PROXY_QLEN:
2347 				NEIGH_VAR_SET(p, PROXY_QLEN,
2348 					      nla_get_u32(tbp[i]));
2349 				break;
2350 			case NDTPA_APP_PROBES:
2351 				NEIGH_VAR_SET(p, APP_PROBES,
2352 					      nla_get_u32(tbp[i]));
2353 				break;
2354 			case NDTPA_UCAST_PROBES:
2355 				NEIGH_VAR_SET(p, UCAST_PROBES,
2356 					      nla_get_u32(tbp[i]));
2357 				break;
2358 			case NDTPA_MCAST_PROBES:
2359 				NEIGH_VAR_SET(p, MCAST_PROBES,
2360 					      nla_get_u32(tbp[i]));
2361 				break;
2362 			case NDTPA_MCAST_REPROBES:
2363 				NEIGH_VAR_SET(p, MCAST_REPROBES,
2364 					      nla_get_u32(tbp[i]));
2365 				break;
2366 			case NDTPA_BASE_REACHABLE_TIME:
2367 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2368 					      nla_get_msecs(tbp[i]));
2369 				/* update reachable_time as well, otherwise, the change will
2370 				 * only be effective after the next time neigh_periodic_work
2371 				 * decides to recompute it (can be multiple minutes)
2372 				 */
2373 				p->reachable_time =
2374 					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2375 				break;
2376 			case NDTPA_GC_STALETIME:
2377 				NEIGH_VAR_SET(p, GC_STALETIME,
2378 					      nla_get_msecs(tbp[i]));
2379 				break;
2380 			case NDTPA_DELAY_PROBE_TIME:
2381 				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2382 					      nla_get_msecs(tbp[i]));
2383 				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2384 				break;
2385 			case NDTPA_INTERVAL_PROBE_TIME_MS:
2386 				NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2387 					      nla_get_msecs(tbp[i]));
2388 				break;
2389 			case NDTPA_RETRANS_TIME:
2390 				NEIGH_VAR_SET(p, RETRANS_TIME,
2391 					      nla_get_msecs(tbp[i]));
2392 				break;
2393 			case NDTPA_ANYCAST_DELAY:
2394 				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2395 					      nla_get_msecs(tbp[i]));
2396 				break;
2397 			case NDTPA_PROXY_DELAY:
2398 				NEIGH_VAR_SET(p, PROXY_DELAY,
2399 					      nla_get_msecs(tbp[i]));
2400 				break;
2401 			case NDTPA_LOCKTIME:
2402 				NEIGH_VAR_SET(p, LOCKTIME,
2403 					      nla_get_msecs(tbp[i]));
2404 				break;
2405 			}
2406 		}
2407 	}
2408 
2409 	err = -ENOENT;
2410 	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2411 	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2412 	    !net_eq(net, &init_net))
2413 		goto errout_tbl_lock;
2414 
2415 	if (tb[NDTA_THRESH1])
2416 		WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
2417 
2418 	if (tb[NDTA_THRESH2])
2419 		WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
2420 
2421 	if (tb[NDTA_THRESH3])
2422 		WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
2423 
2424 	if (tb[NDTA_GC_INTERVAL])
2425 		WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
2426 
2427 	err = 0;
2428 
2429 errout_tbl_lock:
2430 	write_unlock_bh(&tbl->lock);
2431 errout:
2432 	return err;
2433 }
2434 
neightbl_valid_dump_info(const struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2435 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2436 				    struct netlink_ext_ack *extack)
2437 {
2438 	struct ndtmsg *ndtm;
2439 
2440 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2441 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2442 		return -EINVAL;
2443 	}
2444 
2445 	ndtm = nlmsg_data(nlh);
2446 	if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2447 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2448 		return -EINVAL;
2449 	}
2450 
2451 	if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2452 		NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2453 		return -EINVAL;
2454 	}
2455 
2456 	return 0;
2457 }
2458 
neightbl_dump_info(struct sk_buff * skb,struct netlink_callback * cb)2459 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2460 {
2461 	const struct nlmsghdr *nlh = cb->nlh;
2462 	struct net *net = sock_net(skb->sk);
2463 	int family, tidx, nidx = 0;
2464 	int tbl_skip = cb->args[0];
2465 	int neigh_skip = cb->args[1];
2466 	struct neigh_table *tbl;
2467 
2468 	if (cb->strict_check) {
2469 		int err = neightbl_valid_dump_info(nlh, cb->extack);
2470 
2471 		if (err < 0)
2472 			return err;
2473 	}
2474 
2475 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2476 
2477 	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2478 		struct neigh_parms *p;
2479 
2480 		tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2481 		if (!tbl)
2482 			continue;
2483 
2484 		if (tidx < tbl_skip || (family && tbl->family != family))
2485 			continue;
2486 
2487 		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2488 				       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2489 				       NLM_F_MULTI) < 0)
2490 			break;
2491 
2492 		nidx = 0;
2493 		p = list_next_entry(&tbl->parms, list);
2494 		list_for_each_entry_from(p, &tbl->parms_list, list) {
2495 			if (!net_eq(neigh_parms_net(p), net))
2496 				continue;
2497 
2498 			if (nidx < neigh_skip)
2499 				goto next;
2500 
2501 			if (neightbl_fill_param_info(skb, tbl, p,
2502 						     NETLINK_CB(cb->skb).portid,
2503 						     nlh->nlmsg_seq,
2504 						     RTM_NEWNEIGHTBL,
2505 						     NLM_F_MULTI) < 0)
2506 				goto out;
2507 		next:
2508 			nidx++;
2509 		}
2510 
2511 		neigh_skip = 0;
2512 	}
2513 out:
2514 	cb->args[0] = tidx;
2515 	cb->args[1] = nidx;
2516 
2517 	return skb->len;
2518 }
2519 
neigh_fill_info(struct sk_buff * skb,struct neighbour * neigh,u32 pid,u32 seq,int type,unsigned int flags)2520 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2521 			   u32 pid, u32 seq, int type, unsigned int flags)
2522 {
2523 	u32 neigh_flags, neigh_flags_ext;
2524 	unsigned long now = jiffies;
2525 	struct nda_cacheinfo ci;
2526 	struct nlmsghdr *nlh;
2527 	struct ndmsg *ndm;
2528 
2529 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2530 	if (nlh == NULL)
2531 		return -EMSGSIZE;
2532 
2533 	neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2534 	neigh_flags     = neigh->flags & NTF_OLD_MASK;
2535 
2536 	ndm = nlmsg_data(nlh);
2537 	ndm->ndm_family	 = neigh->ops->family;
2538 	ndm->ndm_pad1    = 0;
2539 	ndm->ndm_pad2    = 0;
2540 	ndm->ndm_flags	 = neigh_flags;
2541 	ndm->ndm_type	 = neigh->type;
2542 	ndm->ndm_ifindex = neigh->dev->ifindex;
2543 
2544 	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2545 		goto nla_put_failure;
2546 
2547 	read_lock_bh(&neigh->lock);
2548 	ndm->ndm_state	 = neigh->nud_state;
2549 	if (neigh->nud_state & NUD_VALID) {
2550 		char haddr[MAX_ADDR_LEN];
2551 
2552 		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2553 		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2554 			read_unlock_bh(&neigh->lock);
2555 			goto nla_put_failure;
2556 		}
2557 	}
2558 
2559 	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2560 	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2561 	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2562 	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
2563 	read_unlock_bh(&neigh->lock);
2564 
2565 	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2566 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2567 		goto nla_put_failure;
2568 
2569 	if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2570 		goto nla_put_failure;
2571 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2572 		goto nla_put_failure;
2573 
2574 	nlmsg_end(skb, nlh);
2575 	return 0;
2576 
2577 nla_put_failure:
2578 	nlmsg_cancel(skb, nlh);
2579 	return -EMSGSIZE;
2580 }
2581 
pneigh_fill_info(struct sk_buff * skb,struct pneigh_entry * pn,u32 pid,u32 seq,int type,unsigned int flags,struct neigh_table * tbl)2582 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2583 			    u32 pid, u32 seq, int type, unsigned int flags,
2584 			    struct neigh_table *tbl)
2585 {
2586 	u32 neigh_flags, neigh_flags_ext;
2587 	struct nlmsghdr *nlh;
2588 	struct ndmsg *ndm;
2589 
2590 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2591 	if (nlh == NULL)
2592 		return -EMSGSIZE;
2593 
2594 	neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
2595 	neigh_flags     = pn->flags & NTF_OLD_MASK;
2596 
2597 	ndm = nlmsg_data(nlh);
2598 	ndm->ndm_family	 = tbl->family;
2599 	ndm->ndm_pad1    = 0;
2600 	ndm->ndm_pad2    = 0;
2601 	ndm->ndm_flags	 = neigh_flags | NTF_PROXY;
2602 	ndm->ndm_type	 = RTN_UNICAST;
2603 	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2604 	ndm->ndm_state	 = NUD_NONE;
2605 
2606 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2607 		goto nla_put_failure;
2608 
2609 	if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2610 		goto nla_put_failure;
2611 	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2612 		goto nla_put_failure;
2613 
2614 	nlmsg_end(skb, nlh);
2615 	return 0;
2616 
2617 nla_put_failure:
2618 	nlmsg_cancel(skb, nlh);
2619 	return -EMSGSIZE;
2620 }
2621 
neigh_update_notify(struct neighbour * neigh,u32 nlmsg_pid)2622 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2623 {
2624 	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2625 	__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2626 }
2627 
neigh_master_filtered(struct net_device * dev,int master_idx)2628 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2629 {
2630 	struct net_device *master;
2631 
2632 	if (!master_idx)
2633 		return false;
2634 
2635 	master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
2636 
2637 	/* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2638 	 * invalid value for ifindex to denote "no master".
2639 	 */
2640 	if (master_idx == -1)
2641 		return !!master;
2642 
2643 	if (!master || master->ifindex != master_idx)
2644 		return true;
2645 
2646 	return false;
2647 }
2648 
neigh_ifindex_filtered(struct net_device * dev,int filter_idx)2649 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2650 {
2651 	if (filter_idx && (!dev || dev->ifindex != filter_idx))
2652 		return true;
2653 
2654 	return false;
2655 }
2656 
2657 struct neigh_dump_filter {
2658 	int master_idx;
2659 	int dev_idx;
2660 };
2661 
neigh_dump_table(struct neigh_table * tbl,struct sk_buff * skb,struct netlink_callback * cb,struct neigh_dump_filter * filter)2662 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2663 			    struct netlink_callback *cb,
2664 			    struct neigh_dump_filter *filter)
2665 {
2666 	struct net *net = sock_net(skb->sk);
2667 	struct neighbour *n;
2668 	int err = 0, h, s_h = cb->args[1];
2669 	int idx, s_idx = idx = cb->args[2];
2670 	struct neigh_hash_table *nht;
2671 	unsigned int flags = NLM_F_MULTI;
2672 
2673 	if (filter->dev_idx || filter->master_idx)
2674 		flags |= NLM_F_DUMP_FILTERED;
2675 
2676 	nht = rcu_dereference(tbl->nht);
2677 
2678 	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2679 		if (h > s_h)
2680 			s_idx = 0;
2681 		idx = 0;
2682 		neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
2683 			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2684 				goto next;
2685 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2686 			    neigh_master_filtered(n->dev, filter->master_idx))
2687 				goto next;
2688 			err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2689 					      cb->nlh->nlmsg_seq,
2690 					      RTM_NEWNEIGH, flags);
2691 			if (err < 0)
2692 				goto out;
2693 next:
2694 			idx++;
2695 		}
2696 	}
2697 out:
2698 	cb->args[1] = h;
2699 	cb->args[2] = idx;
2700 	return err;
2701 }
2702 
pneigh_dump_table(struct neigh_table * tbl,struct sk_buff * skb,struct netlink_callback * cb,struct neigh_dump_filter * filter)2703 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2704 			     struct netlink_callback *cb,
2705 			     struct neigh_dump_filter *filter)
2706 {
2707 	struct pneigh_entry *n;
2708 	struct net *net = sock_net(skb->sk);
2709 	int err = 0, h, s_h = cb->args[3];
2710 	int idx, s_idx = idx = cb->args[4];
2711 	unsigned int flags = NLM_F_MULTI;
2712 
2713 	if (filter->dev_idx || filter->master_idx)
2714 		flags |= NLM_F_DUMP_FILTERED;
2715 
2716 	read_lock_bh(&tbl->lock);
2717 
2718 	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2719 		if (h > s_h)
2720 			s_idx = 0;
2721 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2722 			if (idx < s_idx || pneigh_net(n) != net)
2723 				goto next;
2724 			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2725 			    neigh_master_filtered(n->dev, filter->master_idx))
2726 				goto next;
2727 			err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2728 					       cb->nlh->nlmsg_seq,
2729 					       RTM_NEWNEIGH, flags, tbl);
2730 			if (err < 0) {
2731 				read_unlock_bh(&tbl->lock);
2732 				goto out;
2733 			}
2734 		next:
2735 			idx++;
2736 		}
2737 	}
2738 
2739 	read_unlock_bh(&tbl->lock);
2740 out:
2741 	cb->args[3] = h;
2742 	cb->args[4] = idx;
2743 	return err;
2744 }
2745 
neigh_valid_dump_req(const struct nlmsghdr * nlh,bool strict_check,struct neigh_dump_filter * filter,struct netlink_ext_ack * extack)2746 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2747 				bool strict_check,
2748 				struct neigh_dump_filter *filter,
2749 				struct netlink_ext_ack *extack)
2750 {
2751 	struct nlattr *tb[NDA_MAX + 1];
2752 	int err, i;
2753 
2754 	if (strict_check) {
2755 		struct ndmsg *ndm;
2756 
2757 		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2758 			NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2759 			return -EINVAL;
2760 		}
2761 
2762 		ndm = nlmsg_data(nlh);
2763 		if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2764 		    ndm->ndm_state || ndm->ndm_type) {
2765 			NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2766 			return -EINVAL;
2767 		}
2768 
2769 		if (ndm->ndm_flags & ~NTF_PROXY) {
2770 			NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2771 			return -EINVAL;
2772 		}
2773 
2774 		err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2775 						    tb, NDA_MAX, nda_policy,
2776 						    extack);
2777 	} else {
2778 		err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2779 					     NDA_MAX, nda_policy, extack);
2780 	}
2781 	if (err < 0)
2782 		return err;
2783 
2784 	for (i = 0; i <= NDA_MAX; ++i) {
2785 		if (!tb[i])
2786 			continue;
2787 
2788 		/* all new attributes should require strict_check */
2789 		switch (i) {
2790 		case NDA_IFINDEX:
2791 			filter->dev_idx = nla_get_u32(tb[i]);
2792 			break;
2793 		case NDA_MASTER:
2794 			filter->master_idx = nla_get_u32(tb[i]);
2795 			break;
2796 		default:
2797 			if (strict_check) {
2798 				NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2799 				return -EINVAL;
2800 			}
2801 		}
2802 	}
2803 
2804 	return 0;
2805 }
2806 
neigh_dump_info(struct sk_buff * skb,struct netlink_callback * cb)2807 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2808 {
2809 	const struct nlmsghdr *nlh = cb->nlh;
2810 	struct neigh_dump_filter filter = {};
2811 	struct neigh_table *tbl;
2812 	int t, family, s_t;
2813 	int proxy = 0;
2814 	int err;
2815 
2816 	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2817 
2818 	/* check for full ndmsg structure presence, family member is
2819 	 * the same for both structures
2820 	 */
2821 	if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2822 	    ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2823 		proxy = 1;
2824 
2825 	err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2826 	if (err < 0 && cb->strict_check)
2827 		return err;
2828 	err = 0;
2829 
2830 	s_t = cb->args[0];
2831 
2832 	rcu_read_lock();
2833 	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2834 		tbl = rcu_dereference(neigh_tables[t]);
2835 
2836 		if (!tbl)
2837 			continue;
2838 		if (t < s_t || (family && tbl->family != family))
2839 			continue;
2840 		if (t > s_t)
2841 			memset(&cb->args[1], 0, sizeof(cb->args) -
2842 						sizeof(cb->args[0]));
2843 		if (proxy)
2844 			err = pneigh_dump_table(tbl, skb, cb, &filter);
2845 		else
2846 			err = neigh_dump_table(tbl, skb, cb, &filter);
2847 		if (err < 0)
2848 			break;
2849 	}
2850 	rcu_read_unlock();
2851 
2852 	cb->args[0] = t;
2853 	return err;
2854 }
2855 
neigh_valid_get_req(const struct nlmsghdr * nlh,struct neigh_table ** tbl,void ** dst,int * dev_idx,u8 * ndm_flags,struct netlink_ext_ack * extack)2856 static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2857 			       struct neigh_table **tbl,
2858 			       void **dst, int *dev_idx, u8 *ndm_flags,
2859 			       struct netlink_ext_ack *extack)
2860 {
2861 	struct nlattr *tb[NDA_MAX + 1];
2862 	struct ndmsg *ndm;
2863 	int err, i;
2864 
2865 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2866 		NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2867 		return -EINVAL;
2868 	}
2869 
2870 	ndm = nlmsg_data(nlh);
2871 	if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
2872 	    ndm->ndm_type) {
2873 		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2874 		return -EINVAL;
2875 	}
2876 
2877 	if (ndm->ndm_flags & ~NTF_PROXY) {
2878 		NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2879 		return -EINVAL;
2880 	}
2881 
2882 	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2883 					    NDA_MAX, nda_policy, extack);
2884 	if (err < 0)
2885 		return err;
2886 
2887 	*ndm_flags = ndm->ndm_flags;
2888 	*dev_idx = ndm->ndm_ifindex;
2889 	*tbl = neigh_find_table(ndm->ndm_family);
2890 	if (*tbl == NULL) {
2891 		NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2892 		return -EAFNOSUPPORT;
2893 	}
2894 
2895 	for (i = 0; i <= NDA_MAX; ++i) {
2896 		if (!tb[i])
2897 			continue;
2898 
2899 		switch (i) {
2900 		case NDA_DST:
2901 			if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2902 				NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2903 				return -EINVAL;
2904 			}
2905 			*dst = nla_data(tb[i]);
2906 			break;
2907 		default:
2908 			NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2909 			return -EINVAL;
2910 		}
2911 	}
2912 
2913 	return 0;
2914 }
2915 
neigh_nlmsg_size(void)2916 static inline size_t neigh_nlmsg_size(void)
2917 {
2918 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2919 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2920 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2921 	       + nla_total_size(sizeof(struct nda_cacheinfo))
2922 	       + nla_total_size(4)  /* NDA_PROBES */
2923 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
2924 	       + nla_total_size(1); /* NDA_PROTOCOL */
2925 }
2926 
neigh_get_reply(struct net * net,struct neighbour * neigh,u32 pid,u32 seq)2927 static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2928 			   u32 pid, u32 seq)
2929 {
2930 	struct sk_buff *skb;
2931 	int err = 0;
2932 
2933 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2934 	if (!skb)
2935 		return -ENOBUFS;
2936 
2937 	err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2938 	if (err) {
2939 		kfree_skb(skb);
2940 		goto errout;
2941 	}
2942 
2943 	err = rtnl_unicast(skb, net, pid);
2944 errout:
2945 	return err;
2946 }
2947 
pneigh_nlmsg_size(void)2948 static inline size_t pneigh_nlmsg_size(void)
2949 {
2950 	return NLMSG_ALIGN(sizeof(struct ndmsg))
2951 	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2952 	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
2953 	       + nla_total_size(1); /* NDA_PROTOCOL */
2954 }
2955 
pneigh_get_reply(struct net * net,struct pneigh_entry * neigh,u32 pid,u32 seq,struct neigh_table * tbl)2956 static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
2957 			    u32 pid, u32 seq, struct neigh_table *tbl)
2958 {
2959 	struct sk_buff *skb;
2960 	int err = 0;
2961 
2962 	skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
2963 	if (!skb)
2964 		return -ENOBUFS;
2965 
2966 	err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
2967 	if (err) {
2968 		kfree_skb(skb);
2969 		goto errout;
2970 	}
2971 
2972 	err = rtnl_unicast(skb, net, pid);
2973 errout:
2974 	return err;
2975 }
2976 
neigh_get(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2977 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2978 		     struct netlink_ext_ack *extack)
2979 {
2980 	struct net *net = sock_net(in_skb->sk);
2981 	struct net_device *dev = NULL;
2982 	struct neigh_table *tbl = NULL;
2983 	struct neighbour *neigh;
2984 	void *dst = NULL;
2985 	u8 ndm_flags = 0;
2986 	int dev_idx = 0;
2987 	int err;
2988 
2989 	err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
2990 				  extack);
2991 	if (err < 0)
2992 		return err;
2993 
2994 	if (dev_idx) {
2995 		dev = __dev_get_by_index(net, dev_idx);
2996 		if (!dev) {
2997 			NL_SET_ERR_MSG(extack, "Unknown device ifindex");
2998 			return -ENODEV;
2999 		}
3000 	}
3001 
3002 	if (!dst) {
3003 		NL_SET_ERR_MSG(extack, "Network address not specified");
3004 		return -EINVAL;
3005 	}
3006 
3007 	if (ndm_flags & NTF_PROXY) {
3008 		struct pneigh_entry *pn;
3009 
3010 		pn = pneigh_lookup(tbl, net, dst, dev, 0);
3011 		if (!pn) {
3012 			NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3013 			return -ENOENT;
3014 		}
3015 		return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
3016 					nlh->nlmsg_seq, tbl);
3017 	}
3018 
3019 	if (!dev) {
3020 		NL_SET_ERR_MSG(extack, "No device specified");
3021 		return -EINVAL;
3022 	}
3023 
3024 	neigh = neigh_lookup(tbl, dst, dev);
3025 	if (!neigh) {
3026 		NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3027 		return -ENOENT;
3028 	}
3029 
3030 	err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
3031 			      nlh->nlmsg_seq);
3032 
3033 	neigh_release(neigh);
3034 
3035 	return err;
3036 }
3037 
neigh_for_each(struct neigh_table * tbl,void (* cb)(struct neighbour *,void *),void * cookie)3038 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3039 {
3040 	int chain;
3041 	struct neigh_hash_table *nht;
3042 
3043 	rcu_read_lock();
3044 	nht = rcu_dereference(tbl->nht);
3045 
3046 	read_lock_bh(&tbl->lock); /* avoid resizes */
3047 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3048 		struct neighbour *n;
3049 
3050 		neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
3051 			cb(n, cookie);
3052 	}
3053 	read_unlock_bh(&tbl->lock);
3054 	rcu_read_unlock();
3055 }
3056 EXPORT_SYMBOL(neigh_for_each);
3057 
3058 /* The tbl->lock must be held as a writer and BH disabled. */
__neigh_for_each_release(struct neigh_table * tbl,int (* cb)(struct neighbour *))3059 void __neigh_for_each_release(struct neigh_table *tbl,
3060 			      int (*cb)(struct neighbour *))
3061 {
3062 	struct neigh_hash_table *nht;
3063 	int chain;
3064 
3065 	nht = rcu_dereference_protected(tbl->nht,
3066 					lockdep_is_held(&tbl->lock));
3067 	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3068 		struct hlist_node *tmp;
3069 		struct neighbour *n;
3070 
3071 		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
3072 			int release;
3073 
3074 			write_lock(&n->lock);
3075 			release = cb(n);
3076 			if (release) {
3077 				hlist_del_rcu(&n->hash);
3078 				hlist_del_rcu(&n->dev_list);
3079 				neigh_mark_dead(n);
3080 			}
3081 			write_unlock(&n->lock);
3082 			if (release)
3083 				neigh_cleanup_and_release(n);
3084 		}
3085 	}
3086 }
3087 EXPORT_SYMBOL(__neigh_for_each_release);
3088 
neigh_xmit(int index,struct net_device * dev,const void * addr,struct sk_buff * skb)3089 int neigh_xmit(int index, struct net_device *dev,
3090 	       const void *addr, struct sk_buff *skb)
3091 {
3092 	int err = -EAFNOSUPPORT;
3093 
3094 	if (likely(index < NEIGH_NR_TABLES)) {
3095 		struct neigh_table *tbl;
3096 		struct neighbour *neigh;
3097 
3098 		rcu_read_lock();
3099 		tbl = rcu_dereference(neigh_tables[index]);
3100 		if (!tbl)
3101 			goto out_unlock;
3102 		if (index == NEIGH_ARP_TABLE) {
3103 			u32 key = *((u32 *)addr);
3104 
3105 			neigh = __ipv4_neigh_lookup_noref(dev, key);
3106 		} else {
3107 			neigh = __neigh_lookup_noref(tbl, addr, dev);
3108 		}
3109 		if (!neigh)
3110 			neigh = __neigh_create(tbl, addr, dev, false);
3111 		err = PTR_ERR(neigh);
3112 		if (IS_ERR(neigh)) {
3113 			rcu_read_unlock();
3114 			goto out_kfree_skb;
3115 		}
3116 		err = READ_ONCE(neigh->output)(neigh, skb);
3117 out_unlock:
3118 		rcu_read_unlock();
3119 	}
3120 	else if (index == NEIGH_LINK_TABLE) {
3121 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3122 				      addr, NULL, skb->len);
3123 		if (err < 0)
3124 			goto out_kfree_skb;
3125 		err = dev_queue_xmit(skb);
3126 	}
3127 out:
3128 	return err;
3129 out_kfree_skb:
3130 	kfree_skb(skb);
3131 	goto out;
3132 }
3133 EXPORT_SYMBOL(neigh_xmit);
3134 
3135 #ifdef CONFIG_PROC_FS
3136 
neigh_get_valid(struct seq_file * seq,struct neighbour * n,loff_t * pos)3137 static struct neighbour *neigh_get_valid(struct seq_file *seq,
3138 					 struct neighbour *n,
3139 					 loff_t *pos)
3140 {
3141 	struct neigh_seq_state *state = seq->private;
3142 	struct net *net = seq_file_net(seq);
3143 
3144 	if (!net_eq(dev_net(n->dev), net))
3145 		return NULL;
3146 
3147 	if (state->neigh_sub_iter) {
3148 		loff_t fakep = 0;
3149 		void *v;
3150 
3151 		v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
3152 		if (!v)
3153 			return NULL;
3154 		if (pos)
3155 			return v;
3156 	}
3157 
3158 	if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3159 		return n;
3160 
3161 	if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3162 		return n;
3163 
3164 	return NULL;
3165 }
3166 
neigh_get_first(struct seq_file * seq)3167 static struct neighbour *neigh_get_first(struct seq_file *seq)
3168 {
3169 	struct neigh_seq_state *state = seq->private;
3170 	struct neigh_hash_table *nht = state->nht;
3171 	struct neighbour *n, *tmp;
3172 
3173 	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3174 
3175 	while (++state->bucket < (1 << nht->hash_shift)) {
3176 		neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
3177 			tmp = neigh_get_valid(seq, n, NULL);
3178 			if (tmp)
3179 				return tmp;
3180 		}
3181 	}
3182 
3183 	return NULL;
3184 }
3185 
neigh_get_next(struct seq_file * seq,struct neighbour * n,loff_t * pos)3186 static struct neighbour *neigh_get_next(struct seq_file *seq,
3187 					struct neighbour *n,
3188 					loff_t *pos)
3189 {
3190 	struct neigh_seq_state *state = seq->private;
3191 	struct neighbour *tmp;
3192 
3193 	if (state->neigh_sub_iter) {
3194 		void *v = state->neigh_sub_iter(state, n, pos);
3195 
3196 		if (v)
3197 			return n;
3198 	}
3199 
3200 	hlist_for_each_entry_continue(n, hash) {
3201 		tmp = neigh_get_valid(seq, n, pos);
3202 		if (tmp) {
3203 			n = tmp;
3204 			goto out;
3205 		}
3206 	}
3207 
3208 	n = neigh_get_first(seq);
3209 out:
3210 	if (n && pos)
3211 		--(*pos);
3212 
3213 	return n;
3214 }
3215 
neigh_get_idx(struct seq_file * seq,loff_t * pos)3216 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3217 {
3218 	struct neighbour *n = neigh_get_first(seq);
3219 
3220 	if (n) {
3221 		--(*pos);
3222 		while (*pos) {
3223 			n = neigh_get_next(seq, n, pos);
3224 			if (!n)
3225 				break;
3226 		}
3227 	}
3228 	return *pos ? NULL : n;
3229 }
3230 
pneigh_get_first(struct seq_file * seq)3231 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3232 {
3233 	struct neigh_seq_state *state = seq->private;
3234 	struct net *net = seq_file_net(seq);
3235 	struct neigh_table *tbl = state->tbl;
3236 	struct pneigh_entry *pn = NULL;
3237 	int bucket;
3238 
3239 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
3240 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3241 		pn = tbl->phash_buckets[bucket];
3242 		while (pn && !net_eq(pneigh_net(pn), net))
3243 			pn = pn->next;
3244 		if (pn)
3245 			break;
3246 	}
3247 	state->bucket = bucket;
3248 
3249 	return pn;
3250 }
3251 
pneigh_get_next(struct seq_file * seq,struct pneigh_entry * pn,loff_t * pos)3252 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3253 					    struct pneigh_entry *pn,
3254 					    loff_t *pos)
3255 {
3256 	struct neigh_seq_state *state = seq->private;
3257 	struct net *net = seq_file_net(seq);
3258 	struct neigh_table *tbl = state->tbl;
3259 
3260 	do {
3261 		pn = pn->next;
3262 	} while (pn && !net_eq(pneigh_net(pn), net));
3263 
3264 	while (!pn) {
3265 		if (++state->bucket > PNEIGH_HASHMASK)
3266 			break;
3267 		pn = tbl->phash_buckets[state->bucket];
3268 		while (pn && !net_eq(pneigh_net(pn), net))
3269 			pn = pn->next;
3270 		if (pn)
3271 			break;
3272 	}
3273 
3274 	if (pn && pos)
3275 		--(*pos);
3276 
3277 	return pn;
3278 }
3279 
pneigh_get_idx(struct seq_file * seq,loff_t * pos)3280 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3281 {
3282 	struct pneigh_entry *pn = pneigh_get_first(seq);
3283 
3284 	if (pn) {
3285 		--(*pos);
3286 		while (*pos) {
3287 			pn = pneigh_get_next(seq, pn, pos);
3288 			if (!pn)
3289 				break;
3290 		}
3291 	}
3292 	return *pos ? NULL : pn;
3293 }
3294 
neigh_get_idx_any(struct seq_file * seq,loff_t * pos)3295 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3296 {
3297 	struct neigh_seq_state *state = seq->private;
3298 	void *rc;
3299 	loff_t idxpos = *pos;
3300 
3301 	rc = neigh_get_idx(seq, &idxpos);
3302 	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3303 		rc = pneigh_get_idx(seq, &idxpos);
3304 
3305 	return rc;
3306 }
3307 
neigh_seq_start(struct seq_file * seq,loff_t * pos,struct neigh_table * tbl,unsigned int neigh_seq_flags)3308 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3309 	__acquires(tbl->lock)
3310 	__acquires(rcu)
3311 {
3312 	struct neigh_seq_state *state = seq->private;
3313 
3314 	state->tbl = tbl;
3315 	state->bucket = -1;
3316 	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3317 
3318 	rcu_read_lock();
3319 	state->nht = rcu_dereference(tbl->nht);
3320 	read_lock_bh(&tbl->lock);
3321 
3322 	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3323 }
3324 EXPORT_SYMBOL(neigh_seq_start);
3325 
neigh_seq_next(struct seq_file * seq,void * v,loff_t * pos)3326 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3327 {
3328 	struct neigh_seq_state *state;
3329 	void *rc;
3330 
3331 	if (v == SEQ_START_TOKEN) {
3332 		rc = neigh_get_first(seq);
3333 		goto out;
3334 	}
3335 
3336 	state = seq->private;
3337 	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3338 		rc = neigh_get_next(seq, v, NULL);
3339 		if (rc)
3340 			goto out;
3341 		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3342 			rc = pneigh_get_first(seq);
3343 	} else {
3344 		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3345 		rc = pneigh_get_next(seq, v, NULL);
3346 	}
3347 out:
3348 	++(*pos);
3349 	return rc;
3350 }
3351 EXPORT_SYMBOL(neigh_seq_next);
3352 
neigh_seq_stop(struct seq_file * seq,void * v)3353 void neigh_seq_stop(struct seq_file *seq, void *v)
3354 	__releases(tbl->lock)
3355 	__releases(rcu)
3356 {
3357 	struct neigh_seq_state *state = seq->private;
3358 	struct neigh_table *tbl = state->tbl;
3359 
3360 	read_unlock_bh(&tbl->lock);
3361 	rcu_read_unlock();
3362 }
3363 EXPORT_SYMBOL(neigh_seq_stop);
3364 
3365 /* statistics via seq_file */
3366 
neigh_stat_seq_start(struct seq_file * seq,loff_t * pos)3367 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3368 {
3369 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3370 	int cpu;
3371 
3372 	if (*pos == 0)
3373 		return SEQ_START_TOKEN;
3374 
3375 	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3376 		if (!cpu_possible(cpu))
3377 			continue;
3378 		*pos = cpu+1;
3379 		return per_cpu_ptr(tbl->stats, cpu);
3380 	}
3381 	return NULL;
3382 }
3383 
neigh_stat_seq_next(struct seq_file * seq,void * v,loff_t * pos)3384 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3385 {
3386 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3387 	int cpu;
3388 
3389 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3390 		if (!cpu_possible(cpu))
3391 			continue;
3392 		*pos = cpu+1;
3393 		return per_cpu_ptr(tbl->stats, cpu);
3394 	}
3395 	(*pos)++;
3396 	return NULL;
3397 }
3398 
neigh_stat_seq_stop(struct seq_file * seq,void * v)3399 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3400 {
3401 
3402 }
3403 
neigh_stat_seq_show(struct seq_file * seq,void * v)3404 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3405 {
3406 	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3407 	struct neigh_statistics *st = v;
3408 
3409 	if (v == SEQ_START_TOKEN) {
3410 		seq_puts(seq, "entries  allocs   destroys hash_grows lookups  hits     res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3411 		return 0;
3412 	}
3413 
3414 	seq_printf(seq, "%08x %08lx %08lx %08lx   %08lx %08lx %08lx   "
3415 			"%08lx         %08lx         %08lx         "
3416 			"%08lx       %08lx            %08lx\n",
3417 		   atomic_read(&tbl->entries),
3418 
3419 		   st->allocs,
3420 		   st->destroys,
3421 		   st->hash_grows,
3422 
3423 		   st->lookups,
3424 		   st->hits,
3425 
3426 		   st->res_failed,
3427 
3428 		   st->rcv_probes_mcast,
3429 		   st->rcv_probes_ucast,
3430 
3431 		   st->periodic_gc_runs,
3432 		   st->forced_gc_runs,
3433 		   st->unres_discards,
3434 		   st->table_fulls
3435 		   );
3436 
3437 	return 0;
3438 }
3439 
3440 static const struct seq_operations neigh_stat_seq_ops = {
3441 	.start	= neigh_stat_seq_start,
3442 	.next	= neigh_stat_seq_next,
3443 	.stop	= neigh_stat_seq_stop,
3444 	.show	= neigh_stat_seq_show,
3445 };
3446 #endif /* CONFIG_PROC_FS */
3447 
__neigh_notify(struct neighbour * n,int type,int flags,u32 pid)3448 static void __neigh_notify(struct neighbour *n, int type, int flags,
3449 			   u32 pid)
3450 {
3451 	struct sk_buff *skb;
3452 	int err = -ENOBUFS;
3453 	struct net *net;
3454 
3455 	rcu_read_lock();
3456 	net = dev_net_rcu(n->dev);
3457 	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3458 	if (skb == NULL)
3459 		goto errout;
3460 
3461 	err = neigh_fill_info(skb, n, pid, 0, type, flags);
3462 	if (err < 0) {
3463 		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3464 		WARN_ON(err == -EMSGSIZE);
3465 		kfree_skb(skb);
3466 		goto errout;
3467 	}
3468 	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3469 	goto out;
3470 errout:
3471 	rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3472 out:
3473 	rcu_read_unlock();
3474 }
3475 
neigh_app_ns(struct neighbour * n)3476 void neigh_app_ns(struct neighbour *n)
3477 {
3478 	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3479 }
3480 EXPORT_SYMBOL(neigh_app_ns);
3481 
3482 #ifdef CONFIG_SYSCTL
3483 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3484 
proc_unres_qlen(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3485 static int proc_unres_qlen(const struct ctl_table *ctl, int write,
3486 			   void *buffer, size_t *lenp, loff_t *ppos)
3487 {
3488 	int size, ret;
3489 	struct ctl_table tmp = *ctl;
3490 
3491 	tmp.extra1 = SYSCTL_ZERO;
3492 	tmp.extra2 = &unres_qlen_max;
3493 	tmp.data = &size;
3494 
3495 	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3496 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3497 
3498 	if (write && !ret)
3499 		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3500 	return ret;
3501 }
3502 
neigh_copy_dflt_parms(struct net * net,struct neigh_parms * p,int index)3503 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3504 				  int index)
3505 {
3506 	struct net_device *dev;
3507 	int family = neigh_parms_family(p);
3508 
3509 	rcu_read_lock();
3510 	for_each_netdev_rcu(net, dev) {
3511 		struct neigh_parms *dst_p =
3512 				neigh_get_dev_parms_rcu(dev, family);
3513 
3514 		if (dst_p && !test_bit(index, dst_p->data_state))
3515 			dst_p->data[index] = p->data[index];
3516 	}
3517 	rcu_read_unlock();
3518 }
3519 
neigh_proc_update(const struct ctl_table * ctl,int write)3520 static void neigh_proc_update(const struct ctl_table *ctl, int write)
3521 {
3522 	struct net_device *dev = ctl->extra1;
3523 	struct neigh_parms *p = ctl->extra2;
3524 	struct net *net = neigh_parms_net(p);
3525 	int index = (int *) ctl->data - p->data;
3526 
3527 	if (!write)
3528 		return;
3529 
3530 	set_bit(index, p->data_state);
3531 	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3532 		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3533 	if (!dev) /* NULL dev means this is default value */
3534 		neigh_copy_dflt_parms(net, p, index);
3535 }
3536 
neigh_proc_dointvec_zero_intmax(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3537 static int neigh_proc_dointvec_zero_intmax(const struct ctl_table *ctl, int write,
3538 					   void *buffer, size_t *lenp,
3539 					   loff_t *ppos)
3540 {
3541 	struct ctl_table tmp = *ctl;
3542 	int ret;
3543 
3544 	tmp.extra1 = SYSCTL_ZERO;
3545 	tmp.extra2 = SYSCTL_INT_MAX;
3546 
3547 	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3548 	neigh_proc_update(ctl, write);
3549 	return ret;
3550 }
3551 
neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3552 static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table *ctl, int write,
3553 						   void *buffer, size_t *lenp, loff_t *ppos)
3554 {
3555 	struct ctl_table tmp = *ctl;
3556 	int ret;
3557 
3558 	int min = msecs_to_jiffies(1);
3559 
3560 	tmp.extra1 = &min;
3561 	tmp.extra2 = NULL;
3562 
3563 	ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3564 	neigh_proc_update(ctl, write);
3565 	return ret;
3566 }
3567 
neigh_proc_dointvec(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3568 int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer,
3569 			size_t *lenp, loff_t *ppos)
3570 {
3571 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3572 
3573 	neigh_proc_update(ctl, write);
3574 	return ret;
3575 }
3576 EXPORT_SYMBOL(neigh_proc_dointvec);
3577 
neigh_proc_dointvec_jiffies(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3578 int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer,
3579 				size_t *lenp, loff_t *ppos)
3580 {
3581 	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3582 
3583 	neigh_proc_update(ctl, write);
3584 	return ret;
3585 }
3586 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3587 
neigh_proc_dointvec_userhz_jiffies(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3588 static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table *ctl, int write,
3589 					      void *buffer, size_t *lenp,
3590 					      loff_t *ppos)
3591 {
3592 	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3593 
3594 	neigh_proc_update(ctl, write);
3595 	return ret;
3596 }
3597 
neigh_proc_dointvec_ms_jiffies(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3598 int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
3599 				   void *buffer, size_t *lenp, loff_t *ppos)
3600 {
3601 	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3602 
3603 	neigh_proc_update(ctl, write);
3604 	return ret;
3605 }
3606 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3607 
neigh_proc_dointvec_unres_qlen(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3608 static int neigh_proc_dointvec_unres_qlen(const struct ctl_table *ctl, int write,
3609 					  void *buffer, size_t *lenp,
3610 					  loff_t *ppos)
3611 {
3612 	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3613 
3614 	neigh_proc_update(ctl, write);
3615 	return ret;
3616 }
3617 
neigh_proc_base_reachable_time(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3618 static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write,
3619 					  void *buffer, size_t *lenp,
3620 					  loff_t *ppos)
3621 {
3622 	struct neigh_parms *p = ctl->extra2;
3623 	int ret;
3624 
3625 	if (strcmp(ctl->procname, "base_reachable_time") == 0)
3626 		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3627 	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3628 		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3629 	else
3630 		ret = -1;
3631 
3632 	if (write && ret == 0) {
3633 		/* update reachable_time as well, otherwise, the change will
3634 		 * only be effective after the next time neigh_periodic_work
3635 		 * decides to recompute it
3636 		 */
3637 		p->reachable_time =
3638 			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3639 	}
3640 	return ret;
3641 }
3642 
3643 #define NEIGH_PARMS_DATA_OFFSET(index)	\
3644 	(&((struct neigh_parms *) 0)->data[index])
3645 
3646 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3647 	[NEIGH_VAR_ ## attr] = { \
3648 		.procname	= name, \
3649 		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3650 		.maxlen		= sizeof(int), \
3651 		.mode		= mval, \
3652 		.proc_handler	= proc, \
3653 	}
3654 
3655 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3656 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3657 
3658 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3659 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3660 
3661 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3662 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3663 
3664 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3665 	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3666 
3667 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3668 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3669 
3670 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3671 	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3672 
3673 static struct neigh_sysctl_table {
3674 	struct ctl_table_header *sysctl_header;
3675 	struct ctl_table neigh_vars[NEIGH_VAR_MAX];
3676 } neigh_sysctl_template __read_mostly = {
3677 	.neigh_vars = {
3678 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3679 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3680 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3681 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3682 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3683 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3684 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3685 		NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3686 						       "interval_probe_time_ms"),
3687 		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3688 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3689 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3690 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3691 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3692 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3693 		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3694 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3695 		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3696 		[NEIGH_VAR_GC_INTERVAL] = {
3697 			.procname	= "gc_interval",
3698 			.maxlen		= sizeof(int),
3699 			.mode		= 0644,
3700 			.proc_handler	= proc_dointvec_jiffies,
3701 		},
3702 		[NEIGH_VAR_GC_THRESH1] = {
3703 			.procname	= "gc_thresh1",
3704 			.maxlen		= sizeof(int),
3705 			.mode		= 0644,
3706 			.extra1		= SYSCTL_ZERO,
3707 			.extra2		= SYSCTL_INT_MAX,
3708 			.proc_handler	= proc_dointvec_minmax,
3709 		},
3710 		[NEIGH_VAR_GC_THRESH2] = {
3711 			.procname	= "gc_thresh2",
3712 			.maxlen		= sizeof(int),
3713 			.mode		= 0644,
3714 			.extra1		= SYSCTL_ZERO,
3715 			.extra2		= SYSCTL_INT_MAX,
3716 			.proc_handler	= proc_dointvec_minmax,
3717 		},
3718 		[NEIGH_VAR_GC_THRESH3] = {
3719 			.procname	= "gc_thresh3",
3720 			.maxlen		= sizeof(int),
3721 			.mode		= 0644,
3722 			.extra1		= SYSCTL_ZERO,
3723 			.extra2		= SYSCTL_INT_MAX,
3724 			.proc_handler	= proc_dointvec_minmax,
3725 		},
3726 	},
3727 };
3728 
neigh_sysctl_register(struct net_device * dev,struct neigh_parms * p,proc_handler * handler)3729 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3730 			  proc_handler *handler)
3731 {
3732 	int i;
3733 	struct neigh_sysctl_table *t;
3734 	const char *dev_name_source;
3735 	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3736 	char *p_name;
3737 	size_t neigh_vars_size;
3738 
3739 	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3740 	if (!t)
3741 		goto err;
3742 
3743 	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3744 		t->neigh_vars[i].data += (long) p;
3745 		t->neigh_vars[i].extra1 = dev;
3746 		t->neigh_vars[i].extra2 = p;
3747 	}
3748 
3749 	neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
3750 	if (dev) {
3751 		dev_name_source = dev->name;
3752 		/* Terminate the table early */
3753 		neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
3754 	} else {
3755 		struct neigh_table *tbl = p->tbl;
3756 		dev_name_source = "default";
3757 		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3758 		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3759 		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3760 		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3761 	}
3762 
3763 	if (handler) {
3764 		/* RetransTime */
3765 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3766 		/* ReachableTime */
3767 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3768 		/* RetransTime (in milliseconds)*/
3769 		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3770 		/* ReachableTime (in milliseconds) */
3771 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3772 	} else {
3773 		/* Those handlers will update p->reachable_time after
3774 		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3775 		 * applied after the next neighbour update instead of waiting for
3776 		 * neigh_periodic_work to update its value (can be multiple minutes)
3777 		 * So any handler that replaces them should do this as well
3778 		 */
3779 		/* ReachableTime */
3780 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3781 			neigh_proc_base_reachable_time;
3782 		/* ReachableTime (in milliseconds) */
3783 		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3784 			neigh_proc_base_reachable_time;
3785 	}
3786 
3787 	switch (neigh_parms_family(p)) {
3788 	case AF_INET:
3789 	      p_name = "ipv4";
3790 	      break;
3791 	case AF_INET6:
3792 	      p_name = "ipv6";
3793 	      break;
3794 	default:
3795 	      BUG();
3796 	}
3797 
3798 	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3799 		p_name, dev_name_source);
3800 	t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
3801 						  neigh_path, t->neigh_vars,
3802 						  neigh_vars_size);
3803 	if (!t->sysctl_header)
3804 		goto free;
3805 
3806 	p->sysctl_table = t;
3807 	return 0;
3808 
3809 free:
3810 	kfree(t);
3811 err:
3812 	return -ENOBUFS;
3813 }
3814 EXPORT_SYMBOL(neigh_sysctl_register);
3815 
neigh_sysctl_unregister(struct neigh_parms * p)3816 void neigh_sysctl_unregister(struct neigh_parms *p)
3817 {
3818 	if (p->sysctl_table) {
3819 		struct neigh_sysctl_table *t = p->sysctl_table;
3820 		p->sysctl_table = NULL;
3821 		unregister_net_sysctl_table(t->sysctl_header);
3822 		kfree(t);
3823 	}
3824 }
3825 EXPORT_SYMBOL(neigh_sysctl_unregister);
3826 
3827 #endif	/* CONFIG_SYSCTL */
3828 
3829 static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
3830 	{.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
3831 	{.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
3832 	{.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
3833 	 .flags = RTNL_FLAG_DUMP_UNLOCKED},
3834 	{.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info},
3835 	{.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set},
3836 };
3837 
neigh_init(void)3838 static int __init neigh_init(void)
3839 {
3840 	rtnl_register_many(neigh_rtnl_msg_handlers);
3841 	return 0;
3842 }
3843 
3844 subsys_initcall(neigh_init);
3845