1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Generic address resolution entity
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 *
9 * Fixes:
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
12 */
13
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16 #include <linux/slab.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/socket.h>
21 #include <linux/netdevice.h>
22 #include <linux/proc_fs.h>
23 #ifdef CONFIG_SYSCTL
24 #include <linux/sysctl.h>
25 #endif
26 #include <linux/times.h>
27 #include <net/net_namespace.h>
28 #include <net/neighbour.h>
29 #include <net/arp.h>
30 #include <net/dst.h>
31 #include <net/ip.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
41
42 #include <trace/events/neigh.h>
43
44 #define NEIGH_DEBUG 1
45 #define neigh_dbg(level, fmt, ...) \
46 do { \
47 if (level <= NEIGH_DEBUG) \
48 pr_debug(fmt, ##__VA_ARGS__); \
49 } while (0)
50
51 #define PNEIGH_HASHMASK 0xF
52
53 static void neigh_timer_handler(struct timer_list *t);
54 static void neigh_notify(struct neighbour *n, int type, int flags, u32 pid);
55 static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid);
56 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
57 bool skip_perm);
58
59 #ifdef CONFIG_PROC_FS
60 static const struct seq_operations neigh_stat_seq_ops;
61 #endif
62
neigh_get_dev_table(struct net_device * dev,int family)63 static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
64 {
65 int i;
66
67 switch (family) {
68 default:
69 DEBUG_NET_WARN_ON_ONCE(1);
70 fallthrough; /* to avoid panic by null-ptr-deref */
71 case AF_INET:
72 i = NEIGH_ARP_TABLE;
73 break;
74 case AF_INET6:
75 i = NEIGH_ND_TABLE;
76 break;
77 }
78
79 return &dev->neighbours[i];
80 }
81
82 /*
83 Neighbour hash table buckets are protected with tbl->lock.
84
85 - All the scans/updates to hash buckets MUST be made under this lock.
86 - NOTHING clever should be made under this lock: no callbacks
87 to protocol backends, no attempts to send something to network.
88 It will result in deadlocks, if backend/driver wants to use neighbour
89 cache.
90 - If the entry requires some non-trivial actions, increase
91 its reference count and release table lock.
92
93 Neighbour entries are protected:
94 - with reference count.
95 - with rwlock neigh->lock
96
97 Reference count prevents destruction.
98
99 neigh->lock mainly serializes ll address data and its validity state.
100 However, the same lock is used to protect another entry fields:
101 - timer
102 - resolution queue
103
104 Again, nothing clever shall be made under neigh->lock,
105 the most complicated procedure, which we allow is dev->hard_header.
106 It is supposed, that dev->hard_header is simplistic and does
107 not make callbacks to neighbour tables.
108 */
109
neigh_blackhole(struct neighbour * neigh,struct sk_buff * skb)110 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
111 {
112 kfree_skb(skb);
113 return -ENETDOWN;
114 }
115
neigh_cleanup_and_release(struct neighbour * neigh)116 static void neigh_cleanup_and_release(struct neighbour *neigh)
117 {
118 trace_neigh_cleanup_and_release(neigh, 0);
119 neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
120 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
121 neigh_release(neigh);
122 }
123
124 /*
125 * It is random distribution in the interval (1/2)*base...(3/2)*base.
126 * It corresponds to default IPv6 settings and is not overridable,
127 * because it is really reasonable choice.
128 */
129
neigh_rand_reach_time(unsigned long base)130 unsigned long neigh_rand_reach_time(unsigned long base)
131 {
132 return base ? get_random_u32_below(base) + (base >> 1) : 0;
133 }
134 EXPORT_SYMBOL(neigh_rand_reach_time);
135
neigh_mark_dead(struct neighbour * n)136 static void neigh_mark_dead(struct neighbour *n)
137 {
138 n->dead = 1;
139 if (!list_empty(&n->gc_list)) {
140 list_del_init(&n->gc_list);
141 atomic_dec(&n->tbl->gc_entries);
142 }
143 if (!list_empty(&n->managed_list))
144 list_del_init(&n->managed_list);
145 }
146
neigh_update_gc_list(struct neighbour * n)147 static void neigh_update_gc_list(struct neighbour *n)
148 {
149 bool on_gc_list, exempt_from_gc;
150
151 spin_lock_bh(&n->tbl->lock);
152 write_lock(&n->lock);
153 if (n->dead)
154 goto out;
155
156 /* remove from the gc list if new state is permanent or if neighbor is
157 * externally learned / validated; otherwise entry should be on the gc
158 * list
159 */
160 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
161 n->flags & (NTF_EXT_LEARNED | NTF_EXT_VALIDATED);
162 on_gc_list = !list_empty(&n->gc_list);
163
164 if (exempt_from_gc && on_gc_list) {
165 list_del_init(&n->gc_list);
166 atomic_dec(&n->tbl->gc_entries);
167 } else if (!exempt_from_gc && !on_gc_list) {
168 /* add entries to the tail; cleaning removes from the front */
169 list_add_tail(&n->gc_list, &n->tbl->gc_list);
170 atomic_inc(&n->tbl->gc_entries);
171 }
172 out:
173 write_unlock(&n->lock);
174 spin_unlock_bh(&n->tbl->lock);
175 }
176
neigh_update_managed_list(struct neighbour * n)177 static void neigh_update_managed_list(struct neighbour *n)
178 {
179 bool on_managed_list, add_to_managed;
180
181 spin_lock_bh(&n->tbl->lock);
182 write_lock(&n->lock);
183 if (n->dead)
184 goto out;
185
186 add_to_managed = n->flags & NTF_MANAGED;
187 on_managed_list = !list_empty(&n->managed_list);
188
189 if (!add_to_managed && on_managed_list)
190 list_del_init(&n->managed_list);
191 else if (add_to_managed && !on_managed_list)
192 list_add_tail(&n->managed_list, &n->tbl->managed_list);
193 out:
194 write_unlock(&n->lock);
195 spin_unlock_bh(&n->tbl->lock);
196 }
197
neigh_update_flags(struct neighbour * neigh,u32 flags,int * notify,bool * gc_update,bool * managed_update)198 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
199 bool *gc_update, bool *managed_update)
200 {
201 u32 ndm_flags, old_flags = neigh->flags;
202
203 if (!(flags & NEIGH_UPDATE_F_ADMIN))
204 return;
205
206 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
207 ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
208 ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_VALIDATED) ? NTF_EXT_VALIDATED : 0;
209
210 if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
211 if (ndm_flags & NTF_EXT_LEARNED)
212 neigh->flags |= NTF_EXT_LEARNED;
213 else
214 neigh->flags &= ~NTF_EXT_LEARNED;
215 *notify = 1;
216 *gc_update = true;
217 }
218 if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
219 if (ndm_flags & NTF_MANAGED)
220 neigh->flags |= NTF_MANAGED;
221 else
222 neigh->flags &= ~NTF_MANAGED;
223 *notify = 1;
224 *managed_update = true;
225 }
226 if ((old_flags ^ ndm_flags) & NTF_EXT_VALIDATED) {
227 if (ndm_flags & NTF_EXT_VALIDATED)
228 neigh->flags |= NTF_EXT_VALIDATED;
229 else
230 neigh->flags &= ~NTF_EXT_VALIDATED;
231 *notify = 1;
232 *gc_update = true;
233 }
234 }
235
neigh_remove_one(struct neighbour * n)236 bool neigh_remove_one(struct neighbour *n)
237 {
238 bool retval = false;
239
240 write_lock(&n->lock);
241 if (refcount_read(&n->refcnt) == 1) {
242 hlist_del_rcu(&n->hash);
243 hlist_del_rcu(&n->dev_list);
244 neigh_mark_dead(n);
245 retval = true;
246 }
247 write_unlock(&n->lock);
248 if (retval)
249 neigh_cleanup_and_release(n);
250 return retval;
251 }
252
neigh_forced_gc(struct neigh_table * tbl)253 static int neigh_forced_gc(struct neigh_table *tbl)
254 {
255 int max_clean = atomic_read(&tbl->gc_entries) -
256 READ_ONCE(tbl->gc_thresh2);
257 u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
258 unsigned long tref = jiffies - 5 * HZ;
259 struct neighbour *n, *tmp;
260 int shrunk = 0;
261 int loop = 0;
262
263 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
264
265 spin_lock_bh(&tbl->lock);
266
267 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
268 if (refcount_read(&n->refcnt) == 1) {
269 bool remove = false;
270
271 write_lock(&n->lock);
272 if ((n->nud_state == NUD_FAILED) ||
273 (n->nud_state == NUD_NOARP) ||
274 (tbl->is_multicast &&
275 tbl->is_multicast(n->primary_key)) ||
276 !time_in_range(n->updated, tref, jiffies))
277 remove = true;
278 write_unlock(&n->lock);
279
280 if (remove && neigh_remove_one(n))
281 shrunk++;
282 if (shrunk >= max_clean)
283 break;
284 if (++loop == 16) {
285 if (ktime_get_ns() > tmax)
286 goto unlock;
287 loop = 0;
288 }
289 }
290 }
291
292 WRITE_ONCE(tbl->last_flush, jiffies);
293 unlock:
294 spin_unlock_bh(&tbl->lock);
295
296 return shrunk;
297 }
298
neigh_add_timer(struct neighbour * n,unsigned long when)299 static void neigh_add_timer(struct neighbour *n, unsigned long when)
300 {
301 /* Use safe distance from the jiffies - LONG_MAX point while timer
302 * is running in DELAY/PROBE state but still show to user space
303 * large times in the past.
304 */
305 unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
306
307 neigh_hold(n);
308 if (!time_in_range(n->confirmed, mint, jiffies))
309 n->confirmed = mint;
310 if (time_before(n->used, n->confirmed))
311 n->used = n->confirmed;
312 if (unlikely(mod_timer(&n->timer, when))) {
313 printk("NEIGH: BUG, double timer add, state is %x\n",
314 n->nud_state);
315 dump_stack();
316 }
317 }
318
neigh_del_timer(struct neighbour * n)319 static int neigh_del_timer(struct neighbour *n)
320 {
321 if ((n->nud_state & NUD_IN_TIMER) &&
322 timer_delete(&n->timer)) {
323 neigh_release(n);
324 return 1;
325 }
326 return 0;
327 }
328
neigh_get_dev_parms_rcu(struct net_device * dev,int family)329 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
330 int family)
331 {
332 switch (family) {
333 case AF_INET:
334 return __in_dev_arp_parms_get_rcu(dev);
335 case AF_INET6:
336 return __in6_dev_nd_parms_get_rcu(dev);
337 }
338 return NULL;
339 }
340
neigh_parms_qlen_dec(struct net_device * dev,int family)341 static void neigh_parms_qlen_dec(struct net_device *dev, int family)
342 {
343 struct neigh_parms *p;
344
345 rcu_read_lock();
346 p = neigh_get_dev_parms_rcu(dev, family);
347 if (p)
348 p->qlen--;
349 rcu_read_unlock();
350 }
351
pneigh_queue_purge(struct sk_buff_head * list,struct net * net,int family)352 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
353 int family)
354 {
355 struct sk_buff_head tmp;
356 unsigned long flags;
357 struct sk_buff *skb;
358
359 skb_queue_head_init(&tmp);
360 spin_lock_irqsave(&list->lock, flags);
361 skb = skb_peek(list);
362 while (skb != NULL) {
363 struct sk_buff *skb_next = skb_peek_next(skb, list);
364 struct net_device *dev = skb->dev;
365
366 if (net == NULL || net_eq(dev_net(dev), net)) {
367 neigh_parms_qlen_dec(dev, family);
368 __skb_unlink(skb, list);
369 __skb_queue_tail(&tmp, skb);
370 }
371 skb = skb_next;
372 }
373 spin_unlock_irqrestore(&list->lock, flags);
374
375 while ((skb = __skb_dequeue(&tmp))) {
376 dev_put(skb->dev);
377 kfree_skb(skb);
378 }
379 }
380
neigh_flush_one(struct neighbour * n)381 static void neigh_flush_one(struct neighbour *n)
382 {
383 hlist_del_rcu(&n->hash);
384 hlist_del_rcu(&n->dev_list);
385
386 write_lock(&n->lock);
387
388 neigh_del_timer(n);
389 neigh_mark_dead(n);
390
391 if (refcount_read(&n->refcnt) != 1) {
392 /* The most unpleasant situation.
393 * We must destroy neighbour entry,
394 * but someone still uses it.
395 *
396 * The destroy will be delayed until
397 * the last user releases us, but
398 * we must kill timers etc. and move
399 * it to safe state.
400 */
401 __skb_queue_purge(&n->arp_queue);
402 n->arp_queue_len_bytes = 0;
403 WRITE_ONCE(n->output, neigh_blackhole);
404
405 if (n->nud_state & NUD_VALID)
406 n->nud_state = NUD_NOARP;
407 else
408 n->nud_state = NUD_NONE;
409
410 neigh_dbg(2, "neigh %p is stray\n", n);
411 }
412
413 write_unlock(&n->lock);
414
415 neigh_cleanup_and_release(n);
416 }
417
neigh_flush_dev(struct neigh_table * tbl,struct net_device * dev,bool skip_perm)418 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
419 bool skip_perm)
420 {
421 struct hlist_head *dev_head;
422 struct hlist_node *tmp;
423 struct neighbour *n;
424
425 dev_head = neigh_get_dev_table(dev, tbl->family);
426
427 hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
428 if (skip_perm &&
429 (n->nud_state & NUD_PERMANENT ||
430 n->flags & NTF_EXT_VALIDATED))
431 continue;
432
433 neigh_flush_one(n);
434 }
435 }
436
neigh_flush_table(struct neigh_table * tbl)437 static void neigh_flush_table(struct neigh_table *tbl)
438 {
439 struct neigh_hash_table *nht;
440 int i;
441
442 nht = rcu_dereference_protected(tbl->nht,
443 lockdep_is_held(&tbl->lock));
444
445 for (i = 0; i < (1 << nht->hash_shift); i++) {
446 struct hlist_node *tmp;
447 struct neighbour *n;
448
449 neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i])
450 neigh_flush_one(n);
451 }
452 }
453
neigh_changeaddr(struct neigh_table * tbl,struct net_device * dev)454 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
455 {
456 spin_lock_bh(&tbl->lock);
457 neigh_flush_dev(tbl, dev, false);
458 spin_unlock_bh(&tbl->lock);
459 }
460 EXPORT_SYMBOL(neigh_changeaddr);
461
__neigh_ifdown(struct neigh_table * tbl,struct net_device * dev,bool skip_perm)462 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
463 bool skip_perm)
464 {
465 spin_lock_bh(&tbl->lock);
466 if (likely(dev)) {
467 neigh_flush_dev(tbl, dev, skip_perm);
468 } else {
469 DEBUG_NET_WARN_ON_ONCE(skip_perm);
470 neigh_flush_table(tbl);
471 }
472 spin_unlock_bh(&tbl->lock);
473
474 pneigh_ifdown(tbl, dev, skip_perm);
475 pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
476 tbl->family);
477 if (skb_queue_empty_lockless(&tbl->proxy_queue))
478 timer_delete_sync(&tbl->proxy_timer);
479 return 0;
480 }
481
neigh_carrier_down(struct neigh_table * tbl,struct net_device * dev)482 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
483 {
484 __neigh_ifdown(tbl, dev, true);
485 return 0;
486 }
487 EXPORT_SYMBOL(neigh_carrier_down);
488
neigh_ifdown(struct neigh_table * tbl,struct net_device * dev)489 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
490 {
491 __neigh_ifdown(tbl, dev, false);
492 return 0;
493 }
494 EXPORT_SYMBOL(neigh_ifdown);
495
neigh_alloc(struct neigh_table * tbl,struct net_device * dev,u32 flags,bool exempt_from_gc)496 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
497 struct net_device *dev,
498 u32 flags, bool exempt_from_gc)
499 {
500 struct neighbour *n = NULL;
501 unsigned long now = jiffies;
502 int entries, gc_thresh3;
503
504 if (exempt_from_gc)
505 goto do_alloc;
506
507 entries = atomic_inc_return(&tbl->gc_entries) - 1;
508 gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
509 if (entries >= gc_thresh3 ||
510 (entries >= READ_ONCE(tbl->gc_thresh2) &&
511 time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
512 if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
513 net_info_ratelimited("%s: neighbor table overflow!\n",
514 tbl->id);
515 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
516 goto out_entries;
517 }
518 }
519
520 do_alloc:
521 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
522 if (!n)
523 goto out_entries;
524
525 __skb_queue_head_init(&n->arp_queue);
526 rwlock_init(&n->lock);
527 seqlock_init(&n->ha_lock);
528 n->updated = n->used = now;
529 n->nud_state = NUD_NONE;
530 n->output = neigh_blackhole;
531 n->flags = flags;
532 seqlock_init(&n->hh.hh_lock);
533 n->parms = neigh_parms_clone(&tbl->parms);
534 timer_setup(&n->timer, neigh_timer_handler, 0);
535
536 NEIGH_CACHE_STAT_INC(tbl, allocs);
537 n->tbl = tbl;
538 refcount_set(&n->refcnt, 1);
539 n->dead = 1;
540 INIT_LIST_HEAD(&n->gc_list);
541 INIT_LIST_HEAD(&n->managed_list);
542
543 atomic_inc(&tbl->entries);
544 out:
545 return n;
546
547 out_entries:
548 if (!exempt_from_gc)
549 atomic_dec(&tbl->gc_entries);
550 goto out;
551 }
552
neigh_get_hash_rnd(u32 * x)553 static void neigh_get_hash_rnd(u32 *x)
554 {
555 *x = get_random_u32() | 1;
556 }
557
neigh_hash_alloc(unsigned int shift)558 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
559 {
560 size_t size = (1 << shift) * sizeof(struct hlist_head);
561 struct hlist_head *hash_heads;
562 struct neigh_hash_table *ret;
563 int i;
564
565 ret = kmalloc_obj(*ret, GFP_ATOMIC);
566 if (!ret)
567 return NULL;
568
569 hash_heads = kzalloc(size, GFP_ATOMIC);
570 if (!hash_heads) {
571 kfree(ret);
572 return NULL;
573 }
574 ret->hash_heads = hash_heads;
575 ret->hash_shift = shift;
576 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
577 neigh_get_hash_rnd(&ret->hash_rnd[i]);
578 return ret;
579 }
580
neigh_hash_free_rcu(struct rcu_head * head)581 static void neigh_hash_free_rcu(struct rcu_head *head)
582 {
583 struct neigh_hash_table *nht = container_of(head,
584 struct neigh_hash_table,
585 rcu);
586
587 kfree(nht->hash_heads);
588 kfree(nht);
589 }
590
neigh_hash_grow(struct neigh_table * tbl,unsigned long new_shift)591 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
592 unsigned long new_shift)
593 {
594 unsigned int i, hash;
595 struct neigh_hash_table *new_nht, *old_nht;
596
597 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
598
599 old_nht = rcu_dereference_protected(tbl->nht,
600 lockdep_is_held(&tbl->lock));
601 new_nht = neigh_hash_alloc(new_shift);
602 if (!new_nht)
603 return old_nht;
604
605 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
606 struct hlist_node *tmp;
607 struct neighbour *n;
608
609 neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
610 hash = tbl->hash(n->primary_key, n->dev,
611 new_nht->hash_rnd);
612
613 hash >>= (32 - new_nht->hash_shift);
614
615 hlist_del_rcu(&n->hash);
616 hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
617 }
618 }
619
620 rcu_assign_pointer(tbl->nht, new_nht);
621 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
622 return new_nht;
623 }
624
neigh_lookup(struct neigh_table * tbl,const void * pkey,struct net_device * dev)625 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
626 struct net_device *dev)
627 {
628 struct neighbour *n;
629
630 NEIGH_CACHE_STAT_INC(tbl, lookups);
631
632 rcu_read_lock();
633 n = __neigh_lookup_noref(tbl, pkey, dev);
634 if (n) {
635 if (!refcount_inc_not_zero(&n->refcnt))
636 n = NULL;
637 NEIGH_CACHE_STAT_INC(tbl, hits);
638 }
639
640 rcu_read_unlock();
641 return n;
642 }
643 EXPORT_SYMBOL(neigh_lookup);
644
645 static struct neighbour *
___neigh_create(struct neigh_table * tbl,const void * pkey,struct net_device * dev,u32 flags,bool exempt_from_gc,bool want_ref)646 ___neigh_create(struct neigh_table *tbl, const void *pkey,
647 struct net_device *dev, u32 flags,
648 bool exempt_from_gc, bool want_ref)
649 {
650 u32 hash_val, key_len = tbl->key_len;
651 struct neighbour *n1, *rc, *n;
652 struct neigh_hash_table *nht;
653 int error;
654
655 n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
656 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
657 if (!n) {
658 rc = ERR_PTR(-ENOBUFS);
659 goto out;
660 }
661
662 memcpy(n->primary_key, pkey, key_len);
663 n->dev = dev;
664 netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
665
666 /* Protocol specific setup. */
667 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
668 rc = ERR_PTR(error);
669 goto out_neigh_release;
670 }
671
672 if (dev->netdev_ops->ndo_neigh_construct) {
673 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
674 if (error < 0) {
675 rc = ERR_PTR(error);
676 goto out_neigh_release;
677 }
678 }
679
680 /* Device specific setup. */
681 if (n->parms->neigh_setup &&
682 (error = n->parms->neigh_setup(n)) < 0) {
683 rc = ERR_PTR(error);
684 goto out_neigh_release;
685 }
686
687 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
688
689 spin_lock_bh(&tbl->lock);
690 nht = rcu_dereference_protected(tbl->nht,
691 lockdep_is_held(&tbl->lock));
692
693 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
694 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
695
696 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
697
698 if (n->parms->dead) {
699 rc = ERR_PTR(-EINVAL);
700 goto out_tbl_unlock;
701 }
702
703 neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
704 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
705 if (want_ref)
706 neigh_hold(n1);
707 rc = n1;
708 goto out_tbl_unlock;
709 }
710 }
711
712 n->dead = 0;
713 if (!exempt_from_gc)
714 list_add_tail(&n->gc_list, &n->tbl->gc_list);
715 if (n->flags & NTF_MANAGED)
716 list_add_tail(&n->managed_list, &n->tbl->managed_list);
717 if (want_ref)
718 neigh_hold(n);
719 hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
720
721 hlist_add_head_rcu(&n->dev_list,
722 neigh_get_dev_table(dev, tbl->family));
723
724 spin_unlock_bh(&tbl->lock);
725 neigh_dbg(2, "neigh %p is created\n", n);
726 rc = n;
727 out:
728 return rc;
729 out_tbl_unlock:
730 spin_unlock_bh(&tbl->lock);
731 out_neigh_release:
732 if (!exempt_from_gc)
733 atomic_dec(&tbl->gc_entries);
734 neigh_release(n);
735 goto out;
736 }
737
__neigh_create(struct neigh_table * tbl,const void * pkey,struct net_device * dev,bool want_ref)738 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
739 struct net_device *dev, bool want_ref)
740 {
741 bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
742
743 return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
744 }
745 EXPORT_SYMBOL(__neigh_create);
746
pneigh_hash(const void * pkey,unsigned int key_len)747 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
748 {
749 u32 hash_val = *(u32 *)(pkey + key_len - 4);
750 hash_val ^= (hash_val >> 16);
751 hash_val ^= hash_val >> 8;
752 hash_val ^= hash_val >> 4;
753 hash_val &= PNEIGH_HASHMASK;
754 return hash_val;
755 }
756
pneigh_lookup(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev)757 struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
758 struct net *net, const void *pkey,
759 struct net_device *dev)
760 {
761 struct pneigh_entry *n;
762 unsigned int key_len;
763 u32 hash_val;
764
765 key_len = tbl->key_len;
766 hash_val = pneigh_hash(pkey, key_len);
767 n = rcu_dereference_check(tbl->phash_buckets[hash_val],
768 lockdep_is_held(&tbl->phash_lock));
769
770 while (n) {
771 if (!memcmp(n->key, pkey, key_len) &&
772 net_eq(pneigh_net(n), net) &&
773 (n->dev == dev || !n->dev))
774 return n;
775
776 n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
777 }
778
779 return NULL;
780 }
781 EXPORT_IPV6_MOD(pneigh_lookup);
782
pneigh_create(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev,u32 flags,u8 protocol,bool permanent)783 int pneigh_create(struct neigh_table *tbl, struct net *net,
784 const void *pkey, struct net_device *dev,
785 u32 flags, u8 protocol, bool permanent)
786 {
787 struct pneigh_entry *n;
788 unsigned int key_len;
789 u32 hash_val;
790 int err = 0;
791
792 mutex_lock(&tbl->phash_lock);
793
794 n = pneigh_lookup(tbl, net, pkey, dev);
795 if (n)
796 goto update;
797
798 key_len = tbl->key_len;
799 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
800 if (!n) {
801 err = -ENOBUFS;
802 goto out;
803 }
804
805 write_pnet(&n->net, net);
806 memcpy(n->key, pkey, key_len);
807 n->dev = dev;
808 netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
809
810 if (tbl->pconstructor && tbl->pconstructor(n)) {
811 netdev_put(dev, &n->dev_tracker);
812 kfree(n);
813 err = -ENOBUFS;
814 goto out;
815 }
816
817 hash_val = pneigh_hash(pkey, key_len);
818 n->next = tbl->phash_buckets[hash_val];
819 rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
820 update:
821 WRITE_ONCE(n->flags, flags);
822 n->permanent = permanent;
823 WRITE_ONCE(n->protocol, protocol);
824 out:
825 mutex_unlock(&tbl->phash_lock);
826 return err;
827 }
828
pneigh_destroy(struct rcu_head * rcu)829 static void pneigh_destroy(struct rcu_head *rcu)
830 {
831 struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
832
833 netdev_put(n->dev, &n->dev_tracker);
834 kfree(n);
835 }
836
pneigh_delete(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev)837 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
838 struct net_device *dev)
839 {
840 struct pneigh_entry *n, __rcu **np;
841 unsigned int key_len;
842 u32 hash_val;
843
844 key_len = tbl->key_len;
845 hash_val = pneigh_hash(pkey, key_len);
846
847 mutex_lock(&tbl->phash_lock);
848
849 for (np = &tbl->phash_buckets[hash_val];
850 (n = rcu_dereference_protected(*np, 1)) != NULL;
851 np = &n->next) {
852 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
853 net_eq(pneigh_net(n), net)) {
854 rcu_assign_pointer(*np, n->next);
855
856 mutex_unlock(&tbl->phash_lock);
857
858 if (tbl->pdestructor)
859 tbl->pdestructor(n);
860
861 call_rcu(&n->rcu, pneigh_destroy);
862 return 0;
863 }
864 }
865
866 mutex_unlock(&tbl->phash_lock);
867 return -ENOENT;
868 }
869
pneigh_ifdown(struct neigh_table * tbl,struct net_device * dev,bool skip_perm)870 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
871 bool skip_perm)
872 {
873 struct pneigh_entry *n, __rcu **np;
874 LIST_HEAD(head);
875 u32 h;
876
877 mutex_lock(&tbl->phash_lock);
878
879 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
880 np = &tbl->phash_buckets[h];
881 while ((n = rcu_dereference_protected(*np, 1)) != NULL) {
882 if (skip_perm && n->permanent)
883 goto skip;
884 if (!dev || n->dev == dev) {
885 rcu_assign_pointer(*np, n->next);
886 list_add(&n->free_node, &head);
887 continue;
888 }
889 skip:
890 np = &n->next;
891 }
892 }
893
894 mutex_unlock(&tbl->phash_lock);
895
896 while (!list_empty(&head)) {
897 n = list_first_entry(&head, typeof(*n), free_node);
898 list_del(&n->free_node);
899
900 if (tbl->pdestructor)
901 tbl->pdestructor(n);
902
903 call_rcu(&n->rcu, pneigh_destroy);
904 }
905 }
906
neigh_parms_put(struct neigh_parms * parms)907 static inline void neigh_parms_put(struct neigh_parms *parms)
908 {
909 if (refcount_dec_and_test(&parms->refcnt))
910 kfree(parms);
911 }
912
913 /*
914 * neighbour must already be out of the table;
915 *
916 */
neigh_destroy(struct neighbour * neigh)917 void neigh_destroy(struct neighbour *neigh)
918 {
919 struct net_device *dev = neigh->dev;
920
921 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
922
923 if (!neigh->dead) {
924 pr_warn("Destroying alive neighbour %p\n", neigh);
925 dump_stack();
926 return;
927 }
928
929 if (neigh_del_timer(neigh))
930 pr_warn("Impossible event\n");
931
932 write_lock_bh(&neigh->lock);
933 __skb_queue_purge(&neigh->arp_queue);
934 write_unlock_bh(&neigh->lock);
935 neigh->arp_queue_len_bytes = 0;
936
937 if (dev->netdev_ops->ndo_neigh_destroy)
938 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
939
940 netdev_put(dev, &neigh->dev_tracker);
941 neigh_parms_put(neigh->parms);
942
943 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
944
945 atomic_dec(&neigh->tbl->entries);
946 kfree_rcu(neigh, rcu);
947 }
948 EXPORT_SYMBOL(neigh_destroy);
949
950 /* Neighbour state is suspicious;
951 disable fast path.
952
953 Called with write_locked neigh.
954 */
neigh_suspect(struct neighbour * neigh)955 static void neigh_suspect(struct neighbour *neigh)
956 {
957 neigh_dbg(2, "neigh %p is suspected\n", neigh);
958
959 WRITE_ONCE(neigh->output, neigh->ops->output);
960 }
961
962 /* Neighbour state is OK;
963 enable fast path.
964
965 Called with write_locked neigh.
966 */
neigh_connect(struct neighbour * neigh)967 static void neigh_connect(struct neighbour *neigh)
968 {
969 neigh_dbg(2, "neigh %p is connected\n", neigh);
970
971 WRITE_ONCE(neigh->output, neigh->ops->connected_output);
972 }
973
neigh_periodic_work(struct work_struct * work)974 static void neigh_periodic_work(struct work_struct *work)
975 {
976 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
977 struct neigh_hash_table *nht;
978 struct hlist_node *tmp;
979 struct neighbour *n;
980 unsigned int i;
981
982 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
983
984 spin_lock_bh(&tbl->lock);
985 nht = rcu_dereference_protected(tbl->nht,
986 lockdep_is_held(&tbl->lock));
987
988 /*
989 * periodically recompute ReachableTime from random function
990 */
991
992 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
993 struct neigh_parms *p;
994
995 WRITE_ONCE(tbl->last_rand, jiffies);
996 list_for_each_entry(p, &tbl->parms_list, list)
997 neigh_set_reach_time(p);
998 }
999
1000 if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
1001 goto out;
1002
1003 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
1004 neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
1005 unsigned int state;
1006
1007 write_lock(&n->lock);
1008
1009 state = n->nud_state;
1010 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
1011 (n->flags &
1012 (NTF_EXT_LEARNED | NTF_EXT_VALIDATED))) {
1013 write_unlock(&n->lock);
1014 continue;
1015 }
1016
1017 if (time_before(n->used, n->confirmed) &&
1018 time_is_before_eq_jiffies(n->confirmed))
1019 n->used = n->confirmed;
1020
1021 if (refcount_read(&n->refcnt) == 1 &&
1022 (state == NUD_FAILED ||
1023 !time_in_range_open(jiffies, n->used,
1024 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
1025 hlist_del_rcu(&n->hash);
1026 hlist_del_rcu(&n->dev_list);
1027 neigh_mark_dead(n);
1028 write_unlock(&n->lock);
1029 neigh_cleanup_and_release(n);
1030 continue;
1031 }
1032 write_unlock(&n->lock);
1033 }
1034 /*
1035 * It's fine to release lock here, even if hash table
1036 * grows while we are preempted.
1037 */
1038 spin_unlock_bh(&tbl->lock);
1039 cond_resched();
1040 spin_lock_bh(&tbl->lock);
1041 nht = rcu_dereference_protected(tbl->nht,
1042 lockdep_is_held(&tbl->lock));
1043 }
1044 out:
1045 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
1046 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
1047 * BASE_REACHABLE_TIME.
1048 */
1049 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1050 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
1051 spin_unlock_bh(&tbl->lock);
1052 }
1053
neigh_max_probes(struct neighbour * n)1054 static __inline__ int neigh_max_probes(struct neighbour *n)
1055 {
1056 struct neigh_parms *p = n->parms;
1057 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1058 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1059 NEIGH_VAR(p, MCAST_PROBES));
1060 }
1061
neigh_invalidate(struct neighbour * neigh)1062 static void neigh_invalidate(struct neighbour *neigh)
1063 __releases(neigh->lock)
1064 __acquires(neigh->lock)
1065 {
1066 struct sk_buff *skb;
1067
1068 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1069 neigh_dbg(2, "neigh %p is failed\n", neigh);
1070 neigh->updated = jiffies;
1071
1072 /* It is very thin place. report_unreachable is very complicated
1073 routine. Particularly, it can hit the same neighbour entry!
1074
1075 So that, we try to be accurate and avoid dead loop. --ANK
1076 */
1077 while (neigh->nud_state == NUD_FAILED &&
1078 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1079 write_unlock(&neigh->lock);
1080 neigh->ops->error_report(neigh, skb);
1081 write_lock(&neigh->lock);
1082 }
1083 __skb_queue_purge(&neigh->arp_queue);
1084 neigh->arp_queue_len_bytes = 0;
1085 }
1086
neigh_probe(struct neighbour * neigh)1087 static void neigh_probe(struct neighbour *neigh)
1088 __releases(neigh->lock)
1089 {
1090 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1091 /* keep skb alive even if arp_queue overflows */
1092 if (skb)
1093 skb = skb_clone(skb, GFP_ATOMIC);
1094 write_unlock(&neigh->lock);
1095 if (neigh->ops->solicit)
1096 neigh->ops->solicit(neigh, skb);
1097 atomic_inc(&neigh->probes);
1098 consume_skb(skb);
1099 }
1100
1101 /* Called when a timer expires for a neighbour entry. */
1102
neigh_timer_handler(struct timer_list * t)1103 static void neigh_timer_handler(struct timer_list *t)
1104 {
1105 unsigned long now, next;
1106 struct neighbour *neigh = timer_container_of(neigh, t, timer);
1107 bool skip_probe = false;
1108 unsigned int state;
1109 int notify = 0;
1110
1111 write_lock(&neigh->lock);
1112
1113 state = neigh->nud_state;
1114 now = jiffies;
1115 next = now + HZ;
1116
1117 if (!(state & NUD_IN_TIMER))
1118 goto out;
1119
1120 if (state & NUD_REACHABLE) {
1121 if (time_before_eq(now,
1122 neigh->confirmed + neigh->parms->reachable_time)) {
1123 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1124 next = neigh->confirmed + neigh->parms->reachable_time;
1125 } else if (time_before_eq(now,
1126 neigh->used +
1127 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1128 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1129 WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1130 neigh->updated = jiffies;
1131 neigh_suspect(neigh);
1132 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1133 } else {
1134 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1135 WRITE_ONCE(neigh->nud_state, NUD_STALE);
1136 neigh->updated = jiffies;
1137 neigh_suspect(neigh);
1138 notify = 1;
1139 }
1140 } else if (state & NUD_DELAY) {
1141 if (time_before_eq(now,
1142 neigh->confirmed +
1143 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1144 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1145 WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
1146 neigh->updated = jiffies;
1147 neigh_connect(neigh);
1148 notify = 1;
1149 next = neigh->confirmed + neigh->parms->reachable_time;
1150 } else {
1151 neigh_dbg(2, "neigh %p is probed\n", neigh);
1152 WRITE_ONCE(neigh->nud_state, NUD_PROBE);
1153 neigh->updated = jiffies;
1154 atomic_set(&neigh->probes, 0);
1155 notify = 1;
1156 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1157 HZ/100);
1158 }
1159 } else {
1160 /* NUD_PROBE|NUD_INCOMPLETE */
1161 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1162 }
1163
1164 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1165 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1166 if (neigh->nud_state == NUD_PROBE &&
1167 neigh->flags & NTF_EXT_VALIDATED) {
1168 WRITE_ONCE(neigh->nud_state, NUD_STALE);
1169 neigh->updated = jiffies;
1170 } else {
1171 WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1172 neigh_invalidate(neigh);
1173 }
1174 notify = 1;
1175 skip_probe = true;
1176 }
1177
1178 if (notify)
1179 __neigh_notify(neigh, RTM_NEWNEIGH, 0, 0);
1180
1181 if (skip_probe)
1182 goto out;
1183
1184 if (neigh->nud_state & NUD_IN_TIMER) {
1185 if (time_before(next, jiffies + HZ/100))
1186 next = jiffies + HZ/100;
1187 if (!mod_timer(&neigh->timer, next))
1188 neigh_hold(neigh);
1189 }
1190 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1191 neigh_probe(neigh);
1192 } else {
1193 out:
1194 write_unlock(&neigh->lock);
1195 }
1196
1197 if (notify)
1198 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
1199
1200 trace_neigh_timer_handler(neigh, 0);
1201
1202 neigh_release(neigh);
1203 }
1204
__neigh_event_send(struct neighbour * neigh,struct sk_buff * skb,const bool immediate_ok)1205 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1206 const bool immediate_ok)
1207 {
1208 int rc;
1209 bool immediate_probe = false;
1210
1211 write_lock_bh(&neigh->lock);
1212
1213 rc = 0;
1214 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1215 goto out_unlock_bh;
1216 if (neigh->dead)
1217 goto out_dead;
1218
1219 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1220 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1221 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1222 unsigned long next, now = jiffies;
1223
1224 atomic_set(&neigh->probes,
1225 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1226 neigh_del_timer(neigh);
1227 WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1228 neigh->updated = now;
1229 if (!immediate_ok) {
1230 next = now + 1;
1231 } else {
1232 immediate_probe = true;
1233 next = now + max(NEIGH_VAR(neigh->parms,
1234 RETRANS_TIME),
1235 HZ / 100);
1236 }
1237 neigh_add_timer(neigh, next);
1238 } else {
1239 WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1240 neigh->updated = jiffies;
1241 write_unlock_bh(&neigh->lock);
1242
1243 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1244 return 1;
1245 }
1246 } else if (neigh->nud_state & NUD_STALE) {
1247 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1248 neigh_del_timer(neigh);
1249 WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1250 neigh->updated = jiffies;
1251 neigh_add_timer(neigh, jiffies +
1252 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1253 }
1254
1255 if (neigh->nud_state == NUD_INCOMPLETE) {
1256 if (skb) {
1257 while (neigh->arp_queue_len_bytes + skb->truesize >
1258 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1259 struct sk_buff *buff;
1260
1261 buff = __skb_dequeue(&neigh->arp_queue);
1262 if (!buff)
1263 break;
1264 neigh->arp_queue_len_bytes -= buff->truesize;
1265 kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1266 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1267 }
1268 skb_dst_force(skb);
1269 __skb_queue_tail(&neigh->arp_queue, skb);
1270 neigh->arp_queue_len_bytes += skb->truesize;
1271 }
1272 rc = 1;
1273 }
1274 out_unlock_bh:
1275 if (immediate_probe)
1276 neigh_probe(neigh);
1277 else
1278 write_unlock(&neigh->lock);
1279 local_bh_enable();
1280 trace_neigh_event_send_done(neigh, rc);
1281 return rc;
1282
1283 out_dead:
1284 if (neigh->nud_state & NUD_STALE)
1285 goto out_unlock_bh;
1286 write_unlock_bh(&neigh->lock);
1287 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1288 trace_neigh_event_send_dead(neigh, 1);
1289 return 1;
1290 }
1291 EXPORT_SYMBOL(__neigh_event_send);
1292
neigh_update_hhs(struct neighbour * neigh)1293 static void neigh_update_hhs(struct neighbour *neigh)
1294 {
1295 struct hh_cache *hh;
1296 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1297 = NULL;
1298
1299 if (neigh->dev->header_ops)
1300 update = neigh->dev->header_ops->cache_update;
1301
1302 if (update) {
1303 hh = &neigh->hh;
1304 if (READ_ONCE(hh->hh_len)) {
1305 write_seqlock_bh(&hh->hh_lock);
1306 update(hh, neigh->dev, neigh->ha);
1307 write_sequnlock_bh(&hh->hh_lock);
1308 }
1309 }
1310 }
1311
neigh_update_process_arp_queue(struct neighbour * neigh)1312 static void neigh_update_process_arp_queue(struct neighbour *neigh)
1313 __releases(neigh->lock)
1314 __acquires(neigh->lock)
1315 {
1316 struct sk_buff *skb;
1317
1318 /* Again: avoid deadlock if something went wrong. */
1319 while (neigh->nud_state & NUD_VALID &&
1320 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1321 struct dst_entry *dst = skb_dst(skb);
1322 struct neighbour *n2, *n1 = neigh;
1323
1324 write_unlock_bh(&neigh->lock);
1325
1326 rcu_read_lock();
1327
1328 /* Why not just use 'neigh' as-is? The problem is that
1329 * things such as shaper, eql, and sch_teql can end up
1330 * using alternative, different, neigh objects to output
1331 * the packet in the output path. So what we need to do
1332 * here is re-lookup the top-level neigh in the path so
1333 * we can reinject the packet there.
1334 */
1335 n2 = NULL;
1336 if (dst &&
1337 READ_ONCE(dst->obsolete) != DST_OBSOLETE_DEAD) {
1338 n2 = dst_neigh_lookup_skb(dst, skb);
1339 if (n2)
1340 n1 = n2;
1341 }
1342 READ_ONCE(n1->output)(n1, skb);
1343 if (n2)
1344 neigh_release(n2);
1345 rcu_read_unlock();
1346
1347 write_lock_bh(&neigh->lock);
1348 }
1349 __skb_queue_purge(&neigh->arp_queue);
1350 neigh->arp_queue_len_bytes = 0;
1351 }
1352
1353 /* Generic update routine.
1354 -- lladdr is new lladdr or NULL, if it is not supplied.
1355 -- new is new state.
1356 -- flags
1357 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1358 if it is different.
1359 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1360 lladdr instead of overriding it
1361 if it is different.
1362 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1363 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1364 NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
1365 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1366 NTF_ROUTER flag.
1367 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1368 a router.
1369 NEIGH_UPDATE_F_EXT_VALIDATED means that the entry will not be removed
1370 or invalidated.
1371
1372 Caller MUST hold reference count on the entry.
1373 */
__neigh_update(struct neighbour * neigh,const u8 * lladdr,u8 new,u32 flags,u32 nlmsg_pid,struct netlink_ext_ack * extack)1374 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1375 u8 new, u32 flags, u32 nlmsg_pid,
1376 struct netlink_ext_ack *extack)
1377 {
1378 bool gc_update = false, managed_update = false;
1379 bool process_arp_queue = false;
1380 int update_isrouter = 0;
1381 struct net_device *dev;
1382 int err, notify = 0;
1383 u8 old;
1384
1385 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1386
1387 write_lock_bh(&neigh->lock);
1388
1389 dev = neigh->dev;
1390 old = neigh->nud_state;
1391 err = -EPERM;
1392
1393 if (neigh->dead) {
1394 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1395 new = old;
1396 goto out;
1397 }
1398 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1399 (old & (NUD_NOARP | NUD_PERMANENT)))
1400 goto out;
1401
1402 neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update);
1403 if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1404 new = old & ~NUD_PERMANENT;
1405 WRITE_ONCE(neigh->nud_state, new);
1406 err = 0;
1407 goto out;
1408 }
1409
1410 if (!(new & NUD_VALID)) {
1411 neigh_del_timer(neigh);
1412 if (old & NUD_CONNECTED)
1413 neigh_suspect(neigh);
1414 WRITE_ONCE(neigh->nud_state, new);
1415 err = 0;
1416 notify = old & NUD_VALID;
1417 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1418 (new & NUD_FAILED)) {
1419 neigh_invalidate(neigh);
1420 notify = 1;
1421 }
1422 goto out;
1423 }
1424
1425 /* Compare new lladdr with cached one */
1426 if (!dev->addr_len) {
1427 /* First case: device needs no address. */
1428 lladdr = neigh->ha;
1429 } else if (lladdr) {
1430 /* The second case: if something is already cached
1431 and a new address is proposed:
1432 - compare new & old
1433 - if they are different, check override flag
1434 */
1435 if ((old & NUD_VALID) &&
1436 !memcmp(lladdr, neigh->ha, dev->addr_len))
1437 lladdr = neigh->ha;
1438 } else {
1439 /* No address is supplied; if we know something,
1440 use it, otherwise discard the request.
1441 */
1442 err = -EINVAL;
1443 if (!(old & NUD_VALID)) {
1444 NL_SET_ERR_MSG(extack, "No link layer address given");
1445 goto out;
1446 }
1447 lladdr = neigh->ha;
1448 }
1449
1450 /* Update confirmed timestamp for neighbour entry after we
1451 * received ARP packet even if it doesn't change IP to MAC binding.
1452 */
1453 if (new & NUD_CONNECTED)
1454 neigh->confirmed = jiffies;
1455
1456 /* If entry was valid and address is not changed,
1457 do not change entry state, if new one is STALE.
1458 */
1459 err = 0;
1460 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1461 if (old & NUD_VALID) {
1462 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1463 update_isrouter = 0;
1464 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1465 (old & NUD_CONNECTED)) {
1466 lladdr = neigh->ha;
1467 new = NUD_STALE;
1468 } else
1469 goto out;
1470 } else {
1471 if (lladdr == neigh->ha && new == NUD_STALE &&
1472 !(flags & NEIGH_UPDATE_F_ADMIN))
1473 new = old;
1474 }
1475 }
1476
1477 /* Update timestamp only once we know we will make a change to the
1478 * neighbour entry. Otherwise we risk to move the locktime window with
1479 * noop updates and ignore relevant ARP updates.
1480 */
1481 if (new != old || lladdr != neigh->ha)
1482 neigh->updated = jiffies;
1483
1484 if (new != old) {
1485 neigh_del_timer(neigh);
1486 if (new & NUD_PROBE)
1487 atomic_set(&neigh->probes, 0);
1488 if (new & NUD_IN_TIMER)
1489 neigh_add_timer(neigh, (jiffies +
1490 ((new & NUD_REACHABLE) ?
1491 neigh->parms->reachable_time :
1492 0)));
1493 WRITE_ONCE(neigh->nud_state, new);
1494 notify = 1;
1495 }
1496
1497 if (lladdr != neigh->ha) {
1498 write_seqlock(&neigh->ha_lock);
1499 memcpy(&neigh->ha, lladdr, dev->addr_len);
1500 write_sequnlock(&neigh->ha_lock);
1501 neigh_update_hhs(neigh);
1502 if (!(new & NUD_CONNECTED))
1503 neigh->confirmed = jiffies -
1504 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1505 notify = 1;
1506 }
1507 if (new == old)
1508 goto out;
1509 if (new & NUD_CONNECTED)
1510 neigh_connect(neigh);
1511 else
1512 neigh_suspect(neigh);
1513
1514 if (!(old & NUD_VALID))
1515 process_arp_queue = true;
1516
1517 out:
1518 if (update_isrouter)
1519 neigh_update_is_router(neigh, flags, ¬ify);
1520
1521 if (notify)
1522 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
1523
1524 if (process_arp_queue)
1525 neigh_update_process_arp_queue(neigh);
1526
1527 write_unlock_bh(&neigh->lock);
1528
1529 if (((new ^ old) & NUD_PERMANENT) || gc_update)
1530 neigh_update_gc_list(neigh);
1531 if (managed_update)
1532 neigh_update_managed_list(neigh);
1533
1534 if (notify)
1535 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
1536
1537 trace_neigh_update_done(neigh, err);
1538 return err;
1539 }
1540
neigh_update(struct neighbour * neigh,const u8 * lladdr,u8 new,u32 flags,u32 nlmsg_pid)1541 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1542 u32 flags, u32 nlmsg_pid)
1543 {
1544 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1545 }
1546 EXPORT_SYMBOL(neigh_update);
1547
1548 /* Update the neigh to listen temporarily for probe responses, even if it is
1549 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1550 */
__neigh_set_probe_once(struct neighbour * neigh)1551 void __neigh_set_probe_once(struct neighbour *neigh)
1552 {
1553 if (neigh->dead)
1554 return;
1555 neigh->updated = jiffies;
1556 if (!(neigh->nud_state & NUD_FAILED))
1557 return;
1558 WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1559 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1560 neigh_add_timer(neigh,
1561 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1562 HZ/100));
1563 }
1564 EXPORT_SYMBOL(__neigh_set_probe_once);
1565
neigh_event_ns(struct neigh_table * tbl,u8 * lladdr,void * saddr,struct net_device * dev)1566 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1567 u8 *lladdr, void *saddr,
1568 struct net_device *dev)
1569 {
1570 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1571 lladdr || !dev->addr_len);
1572 if (neigh)
1573 neigh_update(neigh, lladdr, NUD_STALE,
1574 NEIGH_UPDATE_F_OVERRIDE, 0);
1575 return neigh;
1576 }
1577 EXPORT_SYMBOL(neigh_event_ns);
1578
1579 /* called with read_lock_bh(&n->lock); */
neigh_hh_init(struct neighbour * n)1580 static void neigh_hh_init(struct neighbour *n)
1581 {
1582 struct net_device *dev = n->dev;
1583 __be16 prot = n->tbl->protocol;
1584 struct hh_cache *hh = &n->hh;
1585
1586 write_lock_bh(&n->lock);
1587
1588 /* Only one thread can come in here and initialize the
1589 * hh_cache entry.
1590 */
1591 if (!hh->hh_len)
1592 dev->header_ops->cache(n, hh, prot);
1593
1594 write_unlock_bh(&n->lock);
1595 }
1596
1597 /* Slow and careful. */
1598
neigh_resolve_output(struct neighbour * neigh,struct sk_buff * skb)1599 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1600 {
1601 int rc = 0;
1602
1603 if (!neigh_event_send(neigh, skb)) {
1604 int err;
1605 struct net_device *dev = neigh->dev;
1606 unsigned int seq;
1607
1608 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1609 neigh_hh_init(neigh);
1610
1611 do {
1612 __skb_pull(skb, skb_network_offset(skb));
1613 seq = read_seqbegin(&neigh->ha_lock);
1614 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1615 neigh->ha, NULL, skb->len);
1616 } while (read_seqretry(&neigh->ha_lock, seq));
1617
1618 if (err >= 0)
1619 rc = dev_queue_xmit(skb);
1620 else
1621 goto out_kfree_skb;
1622 }
1623 out:
1624 return rc;
1625 out_kfree_skb:
1626 rc = -EINVAL;
1627 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1628 goto out;
1629 }
1630 EXPORT_SYMBOL(neigh_resolve_output);
1631
1632 /* As fast as possible without hh cache */
1633
neigh_connected_output(struct neighbour * neigh,struct sk_buff * skb)1634 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1635 {
1636 struct net_device *dev = neigh->dev;
1637 unsigned int seq;
1638 int err;
1639
1640 do {
1641 __skb_pull(skb, skb_network_offset(skb));
1642 seq = read_seqbegin(&neigh->ha_lock);
1643 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1644 neigh->ha, NULL, skb->len);
1645 } while (read_seqretry(&neigh->ha_lock, seq));
1646
1647 if (err >= 0)
1648 err = dev_queue_xmit(skb);
1649 else {
1650 err = -EINVAL;
1651 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1652 }
1653 return err;
1654 }
1655 EXPORT_SYMBOL(neigh_connected_output);
1656
neigh_direct_output(struct neighbour * neigh,struct sk_buff * skb)1657 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1658 {
1659 return dev_queue_xmit(skb);
1660 }
1661 EXPORT_SYMBOL(neigh_direct_output);
1662
neigh_managed_work(struct work_struct * work)1663 static void neigh_managed_work(struct work_struct *work)
1664 {
1665 struct neigh_table *tbl = container_of(work, struct neigh_table,
1666 managed_work.work);
1667 struct neighbour *neigh;
1668
1669 spin_lock_bh(&tbl->lock);
1670 list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1671 neigh_event_send_probe(neigh, NULL, false);
1672 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1673 NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1674 spin_unlock_bh(&tbl->lock);
1675 }
1676
neigh_proxy_process(struct timer_list * t)1677 static void neigh_proxy_process(struct timer_list *t)
1678 {
1679 struct neigh_table *tbl = timer_container_of(tbl, t, proxy_timer);
1680 long sched_next = 0;
1681 unsigned long now = jiffies;
1682 struct sk_buff *skb, *n;
1683
1684 spin_lock(&tbl->proxy_queue.lock);
1685
1686 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1687 long tdif = NEIGH_CB(skb)->sched_next - now;
1688
1689 if (tdif <= 0) {
1690 struct net_device *dev = skb->dev;
1691
1692 neigh_parms_qlen_dec(dev, tbl->family);
1693 __skb_unlink(skb, &tbl->proxy_queue);
1694
1695 if (tbl->proxy_redo && netif_running(dev)) {
1696 rcu_read_lock();
1697 tbl->proxy_redo(skb);
1698 rcu_read_unlock();
1699 } else {
1700 kfree_skb(skb);
1701 }
1702
1703 dev_put(dev);
1704 } else if (!sched_next || tdif < sched_next)
1705 sched_next = tdif;
1706 }
1707 timer_delete(&tbl->proxy_timer);
1708 if (sched_next)
1709 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1710 spin_unlock(&tbl->proxy_queue.lock);
1711 }
1712
neigh_proxy_delay(struct neigh_parms * p)1713 static unsigned long neigh_proxy_delay(struct neigh_parms *p)
1714 {
1715 /* If proxy_delay is zero, do not call get_random_u32_below()
1716 * as it is undefined behavior.
1717 */
1718 unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
1719
1720 return proxy_delay ?
1721 jiffies + get_random_u32_below(proxy_delay) : jiffies;
1722 }
1723
pneigh_enqueue(struct neigh_table * tbl,struct neigh_parms * p,struct sk_buff * skb)1724 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1725 struct sk_buff *skb)
1726 {
1727 unsigned long sched_next = neigh_proxy_delay(p);
1728
1729 if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1730 kfree_skb(skb);
1731 return;
1732 }
1733
1734 NEIGH_CB(skb)->sched_next = sched_next;
1735 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1736
1737 spin_lock(&tbl->proxy_queue.lock);
1738 if (timer_delete(&tbl->proxy_timer)) {
1739 if (time_before(tbl->proxy_timer.expires, sched_next))
1740 sched_next = tbl->proxy_timer.expires;
1741 }
1742 skb_dst_drop(skb);
1743 dev_hold(skb->dev);
1744 __skb_queue_tail(&tbl->proxy_queue, skb);
1745 p->qlen++;
1746 mod_timer(&tbl->proxy_timer, sched_next);
1747 spin_unlock(&tbl->proxy_queue.lock);
1748 }
1749 EXPORT_SYMBOL(pneigh_enqueue);
1750
lookup_neigh_parms(struct neigh_table * tbl,struct net * net,int ifindex)1751 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1752 struct net *net, int ifindex)
1753 {
1754 struct neigh_parms *p;
1755
1756 list_for_each_entry(p, &tbl->parms_list, list) {
1757 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1758 (!p->dev && !ifindex && net_eq(net, &init_net)))
1759 return p;
1760 }
1761
1762 return NULL;
1763 }
1764
neigh_parms_alloc(struct net_device * dev,struct neigh_table * tbl)1765 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1766 struct neigh_table *tbl)
1767 {
1768 struct neigh_parms *p;
1769 struct net *net = dev_net(dev);
1770 const struct net_device_ops *ops = dev->netdev_ops;
1771
1772 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1773 if (p) {
1774 p->tbl = tbl;
1775 refcount_set(&p->refcnt, 1);
1776 neigh_set_reach_time(p);
1777 p->qlen = 0;
1778 netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1779 p->dev = dev;
1780 write_pnet(&p->net, net);
1781 p->sysctl_table = NULL;
1782
1783 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1784 netdev_put(dev, &p->dev_tracker);
1785 kfree(p);
1786 return NULL;
1787 }
1788
1789 spin_lock_bh(&tbl->lock);
1790 list_add_rcu(&p->list, &tbl->parms.list);
1791 spin_unlock_bh(&tbl->lock);
1792
1793 neigh_parms_data_state_cleanall(p);
1794 }
1795 return p;
1796 }
1797 EXPORT_SYMBOL(neigh_parms_alloc);
1798
neigh_rcu_free_parms(struct rcu_head * head)1799 static void neigh_rcu_free_parms(struct rcu_head *head)
1800 {
1801 struct neigh_parms *parms =
1802 container_of(head, struct neigh_parms, rcu_head);
1803
1804 neigh_parms_put(parms);
1805 }
1806
neigh_parms_release(struct neigh_table * tbl,struct neigh_parms * parms)1807 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1808 {
1809 if (!parms || parms == &tbl->parms)
1810 return;
1811
1812 spin_lock_bh(&tbl->lock);
1813 list_del_rcu(&parms->list);
1814 parms->dead = 1;
1815 spin_unlock_bh(&tbl->lock);
1816
1817 netdev_put(parms->dev, &parms->dev_tracker);
1818 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1819 }
1820 EXPORT_SYMBOL(neigh_parms_release);
1821
1822 static struct lock_class_key neigh_table_proxy_queue_class;
1823
1824 static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1825
neigh_table_init(int index,struct neigh_table * tbl)1826 void neigh_table_init(int index, struct neigh_table *tbl)
1827 {
1828 unsigned long now = jiffies;
1829 unsigned long phsize;
1830
1831 INIT_LIST_HEAD(&tbl->parms_list);
1832 INIT_LIST_HEAD(&tbl->gc_list);
1833 INIT_LIST_HEAD(&tbl->managed_list);
1834
1835 list_add(&tbl->parms.list, &tbl->parms_list);
1836 write_pnet(&tbl->parms.net, &init_net);
1837 refcount_set(&tbl->parms.refcnt, 1);
1838 neigh_set_reach_time(&tbl->parms);
1839 tbl->parms.qlen = 0;
1840
1841 tbl->stats = alloc_percpu(struct neigh_statistics);
1842 if (!tbl->stats)
1843 panic("cannot create neighbour cache statistics");
1844
1845 #ifdef CONFIG_PROC_FS
1846 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1847 &neigh_stat_seq_ops, tbl))
1848 panic("cannot create neighbour proc dir entry");
1849 #endif
1850
1851 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1852
1853 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1854 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1855
1856 if (!tbl->nht || !tbl->phash_buckets)
1857 panic("cannot allocate neighbour cache hashes");
1858
1859 if (!tbl->entry_size)
1860 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1861 tbl->key_len, NEIGH_PRIV_ALIGN);
1862 else
1863 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1864
1865 spin_lock_init(&tbl->lock);
1866 mutex_init(&tbl->phash_lock);
1867
1868 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1869 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1870 tbl->parms.reachable_time);
1871 INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1872 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1873
1874 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1875 skb_queue_head_init_class(&tbl->proxy_queue,
1876 &neigh_table_proxy_queue_class);
1877
1878 tbl->last_flush = now;
1879 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1880
1881 rcu_assign_pointer(neigh_tables[index], tbl);
1882 }
1883 EXPORT_SYMBOL(neigh_table_init);
1884
1885 /*
1886 * Only called from ndisc_cleanup(), which means this is dead code
1887 * because we no longer can unload IPv6 module.
1888 */
neigh_table_clear(int index,struct neigh_table * tbl)1889 int neigh_table_clear(int index, struct neigh_table *tbl)
1890 {
1891 RCU_INIT_POINTER(neigh_tables[index], NULL);
1892 synchronize_rcu();
1893
1894 /* It is not clean... Fix it to unload IPv6 module safely */
1895 cancel_delayed_work_sync(&tbl->managed_work);
1896 cancel_delayed_work_sync(&tbl->gc_work);
1897 timer_delete_sync(&tbl->proxy_timer);
1898 pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
1899 neigh_ifdown(tbl, NULL);
1900 if (atomic_read(&tbl->entries))
1901 pr_crit("neighbour leakage\n");
1902
1903 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1904 neigh_hash_free_rcu);
1905 tbl->nht = NULL;
1906
1907 kfree(tbl->phash_buckets);
1908 tbl->phash_buckets = NULL;
1909
1910 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1911
1912 free_percpu(tbl->stats);
1913 tbl->stats = NULL;
1914
1915 return 0;
1916 }
1917 EXPORT_SYMBOL(neigh_table_clear);
1918
neigh_find_table(int family)1919 static struct neigh_table *neigh_find_table(int family)
1920 {
1921 struct neigh_table *tbl = NULL;
1922
1923 switch (family) {
1924 case AF_INET:
1925 tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
1926 break;
1927 case AF_INET6:
1928 tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
1929 break;
1930 }
1931
1932 return tbl;
1933 }
1934
1935 const struct nla_policy nda_policy[NDA_MAX+1] = {
1936 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
1937 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1938 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1939 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1940 [NDA_PROBES] = { .type = NLA_U32 },
1941 [NDA_VLAN] = { .type = NLA_U16 },
1942 [NDA_PORT] = { .type = NLA_U16 },
1943 [NDA_VNI] = { .type = NLA_U32 },
1944 [NDA_IFINDEX] = { .type = NLA_U32 },
1945 [NDA_MASTER] = { .type = NLA_U32 },
1946 [NDA_PROTOCOL] = { .type = NLA_U8 },
1947 [NDA_NH_ID] = { .type = NLA_U32 },
1948 [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1949 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
1950 };
1951
neigh_delete(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1952 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1953 struct netlink_ext_ack *extack)
1954 {
1955 struct net *net = sock_net(skb->sk);
1956 struct ndmsg *ndm;
1957 struct nlattr *dst_attr;
1958 struct neigh_table *tbl;
1959 struct neighbour *neigh;
1960 struct net_device *dev = NULL;
1961 int err = -EINVAL;
1962
1963 ASSERT_RTNL();
1964 if (nlmsg_len(nlh) < sizeof(*ndm))
1965 goto out;
1966
1967 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1968 if (!dst_attr) {
1969 NL_SET_ERR_MSG(extack, "Network address not specified");
1970 goto out;
1971 }
1972
1973 ndm = nlmsg_data(nlh);
1974 if (ndm->ndm_ifindex) {
1975 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1976 if (dev == NULL) {
1977 err = -ENODEV;
1978 goto out;
1979 }
1980 }
1981
1982 tbl = neigh_find_table(ndm->ndm_family);
1983 if (tbl == NULL)
1984 return -EAFNOSUPPORT;
1985
1986 if (nla_len(dst_attr) < (int)tbl->key_len) {
1987 NL_SET_ERR_MSG(extack, "Invalid network address");
1988 goto out;
1989 }
1990
1991 if (ndm->ndm_flags & NTF_PROXY) {
1992 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1993 goto out;
1994 }
1995
1996 if (dev == NULL)
1997 goto out;
1998
1999 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
2000 if (neigh == NULL) {
2001 err = -ENOENT;
2002 goto out;
2003 }
2004
2005 err = __neigh_update(neigh, NULL, NUD_FAILED,
2006 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
2007 NETLINK_CB(skb).portid, extack);
2008 spin_lock_bh(&tbl->lock);
2009 neigh_release(neigh);
2010 neigh_remove_one(neigh);
2011 spin_unlock_bh(&tbl->lock);
2012
2013 out:
2014 return err;
2015 }
2016
neigh_add(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2017 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
2018 struct netlink_ext_ack *extack)
2019 {
2020 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
2021 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
2022 struct net *net = sock_net(skb->sk);
2023 struct ndmsg *ndm;
2024 struct nlattr *tb[NDA_MAX+1];
2025 struct neigh_table *tbl;
2026 struct net_device *dev = NULL;
2027 struct neighbour *neigh;
2028 void *dst, *lladdr;
2029 u8 protocol = 0;
2030 u32 ndm_flags;
2031 int err;
2032
2033 ASSERT_RTNL();
2034 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
2035 nda_policy, extack);
2036 if (err < 0)
2037 goto out;
2038
2039 err = -EINVAL;
2040 if (!tb[NDA_DST]) {
2041 NL_SET_ERR_MSG(extack, "Network address not specified");
2042 goto out;
2043 }
2044
2045 ndm = nlmsg_data(nlh);
2046 ndm_flags = ndm->ndm_flags;
2047 if (tb[NDA_FLAGS_EXT]) {
2048 u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
2049
2050 BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
2051 (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
2052 hweight32(NTF_EXT_MASK)));
2053 ndm_flags |= (ext << NTF_EXT_SHIFT);
2054 }
2055 if (ndm->ndm_ifindex) {
2056 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
2057 if (dev == NULL) {
2058 err = -ENODEV;
2059 goto out;
2060 }
2061
2062 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
2063 NL_SET_ERR_MSG(extack, "Invalid link address");
2064 goto out;
2065 }
2066 }
2067
2068 tbl = neigh_find_table(ndm->ndm_family);
2069 if (tbl == NULL)
2070 return -EAFNOSUPPORT;
2071
2072 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
2073 NL_SET_ERR_MSG(extack, "Invalid network address");
2074 goto out;
2075 }
2076
2077 dst = nla_data(tb[NDA_DST]);
2078 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
2079
2080 if (tb[NDA_PROTOCOL])
2081 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
2082 if (ndm_flags & NTF_PROXY) {
2083 if (ndm_flags & (NTF_MANAGED | NTF_EXT_VALIDATED)) {
2084 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
2085 goto out;
2086 }
2087
2088 err = pneigh_create(tbl, net, dst, dev, ndm_flags, protocol,
2089 !!(ndm->ndm_state & NUD_PERMANENT));
2090 goto out;
2091 }
2092
2093 if (!dev) {
2094 NL_SET_ERR_MSG(extack, "Device not specified");
2095 goto out;
2096 }
2097
2098 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2099 err = -EINVAL;
2100 goto out;
2101 }
2102
2103 neigh = neigh_lookup(tbl, dst, dev);
2104 if (neigh == NULL) {
2105 bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
2106 bool exempt_from_gc = ndm_permanent ||
2107 ndm_flags & (NTF_EXT_LEARNED |
2108 NTF_EXT_VALIDATED);
2109
2110 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2111 err = -ENOENT;
2112 goto out;
2113 }
2114 if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2115 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2116 err = -EINVAL;
2117 goto out;
2118 }
2119 if (ndm_flags & NTF_EXT_VALIDATED) {
2120 u8 state = ndm->ndm_state;
2121
2122 /* NTF_USE and NTF_MANAGED will result in the neighbor
2123 * being created with an invalid state (NUD_NONE).
2124 */
2125 if (ndm_flags & (NTF_USE | NTF_MANAGED))
2126 state = NUD_NONE;
2127
2128 if (!(state & NUD_VALID)) {
2129 NL_SET_ERR_MSG(extack,
2130 "Cannot create externally validated neighbor with an invalid state");
2131 err = -EINVAL;
2132 goto out;
2133 }
2134 }
2135
2136 neigh = ___neigh_create(tbl, dst, dev,
2137 ndm_flags &
2138 (NTF_EXT_LEARNED | NTF_MANAGED |
2139 NTF_EXT_VALIDATED),
2140 exempt_from_gc, true);
2141 if (IS_ERR(neigh)) {
2142 err = PTR_ERR(neigh);
2143 goto out;
2144 }
2145 } else {
2146 if (nlh->nlmsg_flags & NLM_F_EXCL) {
2147 err = -EEXIST;
2148 neigh_release(neigh);
2149 goto out;
2150 }
2151 if (ndm_flags & NTF_EXT_VALIDATED) {
2152 u8 state = ndm->ndm_state;
2153
2154 /* NTF_USE and NTF_MANAGED do not update the existing
2155 * state other than clearing it if it was
2156 * NUD_PERMANENT.
2157 */
2158 if (ndm_flags & (NTF_USE | NTF_MANAGED))
2159 state = READ_ONCE(neigh->nud_state) & ~NUD_PERMANENT;
2160
2161 if (!(state & NUD_VALID)) {
2162 NL_SET_ERR_MSG(extack,
2163 "Cannot mark neighbor as externally validated with an invalid state");
2164 err = -EINVAL;
2165 neigh_release(neigh);
2166 goto out;
2167 }
2168 }
2169
2170 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2171 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2172 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2173 }
2174
2175 if (protocol)
2176 neigh->protocol = protocol;
2177 if (ndm_flags & NTF_EXT_LEARNED)
2178 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2179 if (ndm_flags & NTF_ROUTER)
2180 flags |= NEIGH_UPDATE_F_ISROUTER;
2181 if (ndm_flags & NTF_MANAGED)
2182 flags |= NEIGH_UPDATE_F_MANAGED;
2183 if (ndm_flags & NTF_USE)
2184 flags |= NEIGH_UPDATE_F_USE;
2185 if (ndm_flags & NTF_EXT_VALIDATED)
2186 flags |= NEIGH_UPDATE_F_EXT_VALIDATED;
2187
2188 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2189 NETLINK_CB(skb).portid, extack);
2190 if (!err && ndm_flags & (NTF_USE | NTF_MANAGED))
2191 neigh_event_send(neigh, NULL);
2192 neigh_release(neigh);
2193 out:
2194 return err;
2195 }
2196
neightbl_fill_parms(struct sk_buff * skb,struct neigh_parms * parms)2197 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2198 {
2199 struct nlattr *nest;
2200
2201 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2202 if (nest == NULL)
2203 return -ENOBUFS;
2204
2205 if ((parms->dev &&
2206 nla_put_u32(skb, NDTPA_IFINDEX, READ_ONCE(parms->dev->ifindex))) ||
2207 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2208 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2209 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2210 /* approximative value for deprecated QUEUE_LEN (in packets) */
2211 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2212 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2213 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2214 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2215 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2216 NEIGH_VAR(parms, UCAST_PROBES)) ||
2217 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2218 NEIGH_VAR(parms, MCAST_PROBES)) ||
2219 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2220 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2221 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, READ_ONCE(parms->reachable_time),
2222 NDTPA_PAD) ||
2223 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2224 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2225 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2226 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2227 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2228 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2229 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2230 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2231 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2232 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2233 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2234 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2235 nla_put_msecs(skb, NDTPA_LOCKTIME,
2236 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2237 nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2238 NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2239 goto nla_put_failure;
2240 return nla_nest_end(skb, nest);
2241
2242 nla_put_failure:
2243 nla_nest_cancel(skb, nest);
2244 return -EMSGSIZE;
2245 }
2246
neightbl_fill_info(struct sk_buff * skb,struct neigh_table * tbl,u32 pid,u32 seq,int type,int flags)2247 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2248 u32 pid, u32 seq, int type, int flags)
2249 {
2250 struct nlmsghdr *nlh;
2251 struct ndtmsg *ndtmsg;
2252
2253 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2254 if (nlh == NULL)
2255 return -EMSGSIZE;
2256
2257 ndtmsg = nlmsg_data(nlh);
2258 ndtmsg->ndtm_family = tbl->family;
2259 ndtmsg->ndtm_pad1 = 0;
2260 ndtmsg->ndtm_pad2 = 0;
2261
2262 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2263 nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
2264 NDTA_PAD) ||
2265 nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
2266 nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
2267 nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
2268 goto nla_put_failure;
2269 {
2270 unsigned long now = jiffies;
2271 long flush_delta = now - READ_ONCE(tbl->last_flush);
2272 long rand_delta = now - READ_ONCE(tbl->last_rand);
2273 struct neigh_hash_table *nht;
2274 struct ndt_config ndc = {
2275 .ndtc_key_len = tbl->key_len,
2276 .ndtc_entry_size = tbl->entry_size,
2277 .ndtc_entries = atomic_read(&tbl->entries),
2278 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2279 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2280 .ndtc_proxy_qlen = READ_ONCE(tbl->proxy_queue.qlen),
2281 };
2282
2283 nht = rcu_dereference(tbl->nht);
2284 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2285 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2286
2287 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2288 goto nla_put_failure;
2289 }
2290
2291 {
2292 int cpu;
2293 struct ndt_stats ndst;
2294
2295 memset(&ndst, 0, sizeof(ndst));
2296
2297 for_each_possible_cpu(cpu) {
2298 struct neigh_statistics *st;
2299
2300 st = per_cpu_ptr(tbl->stats, cpu);
2301 ndst.ndts_allocs += READ_ONCE(st->allocs);
2302 ndst.ndts_destroys += READ_ONCE(st->destroys);
2303 ndst.ndts_hash_grows += READ_ONCE(st->hash_grows);
2304 ndst.ndts_res_failed += READ_ONCE(st->res_failed);
2305 ndst.ndts_lookups += READ_ONCE(st->lookups);
2306 ndst.ndts_hits += READ_ONCE(st->hits);
2307 ndst.ndts_rcv_probes_mcast += READ_ONCE(st->rcv_probes_mcast);
2308 ndst.ndts_rcv_probes_ucast += READ_ONCE(st->rcv_probes_ucast);
2309 ndst.ndts_periodic_gc_runs += READ_ONCE(st->periodic_gc_runs);
2310 ndst.ndts_forced_gc_runs += READ_ONCE(st->forced_gc_runs);
2311 ndst.ndts_table_fulls += READ_ONCE(st->table_fulls);
2312 }
2313
2314 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2315 NDTA_PAD))
2316 goto nla_put_failure;
2317 }
2318
2319 BUG_ON(tbl->parms.dev);
2320 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2321 goto nla_put_failure;
2322
2323 nlmsg_end(skb, nlh);
2324 return 0;
2325
2326 nla_put_failure:
2327 nlmsg_cancel(skb, nlh);
2328 return -EMSGSIZE;
2329 }
2330
neightbl_fill_param_info(struct sk_buff * skb,struct neigh_table * tbl,struct neigh_parms * parms,u32 pid,u32 seq,int type,unsigned int flags)2331 static int neightbl_fill_param_info(struct sk_buff *skb,
2332 struct neigh_table *tbl,
2333 struct neigh_parms *parms,
2334 u32 pid, u32 seq, int type,
2335 unsigned int flags)
2336 {
2337 struct ndtmsg *ndtmsg;
2338 struct nlmsghdr *nlh;
2339
2340 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2341 if (nlh == NULL)
2342 return -EMSGSIZE;
2343
2344 ndtmsg = nlmsg_data(nlh);
2345 ndtmsg->ndtm_family = tbl->family;
2346 ndtmsg->ndtm_pad1 = 0;
2347 ndtmsg->ndtm_pad2 = 0;
2348
2349 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2350 neightbl_fill_parms(skb, parms) < 0)
2351 goto errout;
2352
2353 nlmsg_end(skb, nlh);
2354 return 0;
2355 errout:
2356 nlmsg_cancel(skb, nlh);
2357 return -EMSGSIZE;
2358 }
2359
2360 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2361 [NDTA_NAME] = { .type = NLA_STRING },
2362 [NDTA_THRESH1] = { .type = NLA_U32 },
2363 [NDTA_THRESH2] = { .type = NLA_U32 },
2364 [NDTA_THRESH3] = { .type = NLA_U32 },
2365 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2366 [NDTA_PARMS] = { .type = NLA_NESTED },
2367 };
2368
2369 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2370 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2371 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2372 [NDTPA_QUEUE_LENBYTES] = { .type = NLA_U32 },
2373 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2374 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2375 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2376 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2377 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2378 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2379 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2380 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2381 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2382 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2383 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2384 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2385 [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 },
2386 };
2387
neightbl_set(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2388 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2389 struct netlink_ext_ack *extack)
2390 {
2391 struct net *net = sock_net(skb->sk);
2392 struct nlattr *tb[NDTA_MAX + 1];
2393 struct neigh_table *tbl;
2394 struct ndtmsg *ndtmsg;
2395 bool found = false;
2396 int err, tidx;
2397
2398 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2399 nl_neightbl_policy, extack);
2400 if (err < 0)
2401 goto errout;
2402
2403 if (tb[NDTA_NAME] == NULL) {
2404 err = -EINVAL;
2405 goto errout;
2406 }
2407
2408 ndtmsg = nlmsg_data(nlh);
2409
2410 rcu_read_lock();
2411
2412 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2413 tbl = rcu_dereference(neigh_tables[tidx]);
2414 if (!tbl)
2415 continue;
2416
2417 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2418 continue;
2419
2420 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2421 found = true;
2422 break;
2423 }
2424 }
2425
2426 if (!found) {
2427 rcu_read_unlock();
2428 err = -ENOENT;
2429 goto errout;
2430 }
2431
2432 /*
2433 * We acquire tbl->lock to be nice to the periodic timers and
2434 * make sure they always see a consistent set of values.
2435 */
2436 spin_lock_bh(&tbl->lock);
2437
2438 if (tb[NDTA_PARMS]) {
2439 struct nlattr *tbp[NDTPA_MAX+1];
2440 struct neigh_parms *p;
2441 int i, ifindex = 0;
2442
2443 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2444 tb[NDTA_PARMS],
2445 nl_ntbl_parm_policy, extack);
2446 if (err < 0)
2447 goto errout_tbl_lock;
2448
2449 if (tbp[NDTPA_IFINDEX])
2450 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2451
2452 p = lookup_neigh_parms(tbl, net, ifindex);
2453 if (p == NULL) {
2454 err = -ENOENT;
2455 goto errout_tbl_lock;
2456 }
2457
2458 for (i = 1; i <= NDTPA_MAX; i++) {
2459 if (tbp[i] == NULL)
2460 continue;
2461
2462 switch (i) {
2463 case NDTPA_QUEUE_LEN:
2464 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2465 nla_get_u32(tbp[i]) *
2466 SKB_TRUESIZE(ETH_FRAME_LEN));
2467 break;
2468 case NDTPA_QUEUE_LENBYTES:
2469 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2470 nla_get_u32(tbp[i]));
2471 break;
2472 case NDTPA_PROXY_QLEN:
2473 NEIGH_VAR_SET(p, PROXY_QLEN,
2474 nla_get_u32(tbp[i]));
2475 break;
2476 case NDTPA_APP_PROBES:
2477 NEIGH_VAR_SET(p, APP_PROBES,
2478 nla_get_u32(tbp[i]));
2479 break;
2480 case NDTPA_UCAST_PROBES:
2481 NEIGH_VAR_SET(p, UCAST_PROBES,
2482 nla_get_u32(tbp[i]));
2483 break;
2484 case NDTPA_MCAST_PROBES:
2485 NEIGH_VAR_SET(p, MCAST_PROBES,
2486 nla_get_u32(tbp[i]));
2487 break;
2488 case NDTPA_MCAST_REPROBES:
2489 NEIGH_VAR_SET(p, MCAST_REPROBES,
2490 nla_get_u32(tbp[i]));
2491 break;
2492 case NDTPA_BASE_REACHABLE_TIME:
2493 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2494 nla_get_msecs(tbp[i]));
2495 /* update reachable_time as well, otherwise, the change will
2496 * only be effective after the next time neigh_periodic_work
2497 * decides to recompute it (can be multiple minutes)
2498 */
2499 neigh_set_reach_time(p);
2500 break;
2501 case NDTPA_GC_STALETIME:
2502 NEIGH_VAR_SET(p, GC_STALETIME,
2503 nla_get_msecs(tbp[i]));
2504 break;
2505 case NDTPA_DELAY_PROBE_TIME:
2506 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2507 nla_get_msecs(tbp[i]));
2508 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2509 break;
2510 case NDTPA_INTERVAL_PROBE_TIME_MS:
2511 NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2512 nla_get_msecs(tbp[i]));
2513 break;
2514 case NDTPA_RETRANS_TIME:
2515 NEIGH_VAR_SET(p, RETRANS_TIME,
2516 nla_get_msecs(tbp[i]));
2517 break;
2518 case NDTPA_ANYCAST_DELAY:
2519 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2520 nla_get_msecs(tbp[i]));
2521 break;
2522 case NDTPA_PROXY_DELAY:
2523 NEIGH_VAR_SET(p, PROXY_DELAY,
2524 nla_get_msecs(tbp[i]));
2525 break;
2526 case NDTPA_LOCKTIME:
2527 NEIGH_VAR_SET(p, LOCKTIME,
2528 nla_get_msecs(tbp[i]));
2529 break;
2530 }
2531 }
2532 }
2533
2534 err = -ENOENT;
2535 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2536 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2537 !net_eq(net, &init_net))
2538 goto errout_tbl_lock;
2539
2540 if (tb[NDTA_THRESH1])
2541 WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
2542
2543 if (tb[NDTA_THRESH2])
2544 WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
2545
2546 if (tb[NDTA_THRESH3])
2547 WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
2548
2549 if (tb[NDTA_GC_INTERVAL])
2550 WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
2551
2552 err = 0;
2553
2554 errout_tbl_lock:
2555 spin_unlock_bh(&tbl->lock);
2556 rcu_read_unlock();
2557 errout:
2558 return err;
2559 }
2560
neightbl_valid_dump_info(const struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2561 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2562 struct netlink_ext_ack *extack)
2563 {
2564 struct ndtmsg *ndtm;
2565
2566 ndtm = nlmsg_payload(nlh, sizeof(*ndtm));
2567 if (!ndtm) {
2568 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2569 return -EINVAL;
2570 }
2571
2572 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2573 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2574 return -EINVAL;
2575 }
2576
2577 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2578 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2579 return -EINVAL;
2580 }
2581
2582 return 0;
2583 }
2584
neightbl_dump_info(struct sk_buff * skb,struct netlink_callback * cb)2585 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2586 {
2587 const struct nlmsghdr *nlh = cb->nlh;
2588 struct net *net = sock_net(skb->sk);
2589 int family, tidx, nidx = 0;
2590 int tbl_skip = cb->args[0];
2591 int neigh_skip = cb->args[1];
2592 struct neigh_table *tbl;
2593
2594 if (cb->strict_check) {
2595 int err = neightbl_valid_dump_info(nlh, cb->extack);
2596
2597 if (err < 0)
2598 return err;
2599 }
2600
2601 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2602
2603 rcu_read_lock();
2604
2605 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2606 struct neigh_parms *p;
2607
2608 tbl = rcu_dereference(neigh_tables[tidx]);
2609 if (!tbl)
2610 continue;
2611
2612 if (tidx < tbl_skip || (family && tbl->family != family))
2613 continue;
2614
2615 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2616 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2617 NLM_F_MULTI) < 0)
2618 break;
2619
2620 nidx = 0;
2621 p = list_next_entry(&tbl->parms, list);
2622 list_for_each_entry_from_rcu(p, &tbl->parms_list, list) {
2623 if (!net_eq(neigh_parms_net(p), net))
2624 continue;
2625
2626 if (nidx < neigh_skip)
2627 goto next;
2628
2629 if (neightbl_fill_param_info(skb, tbl, p,
2630 NETLINK_CB(cb->skb).portid,
2631 nlh->nlmsg_seq,
2632 RTM_NEWNEIGHTBL,
2633 NLM_F_MULTI) < 0)
2634 goto out;
2635 next:
2636 nidx++;
2637 }
2638
2639 neigh_skip = 0;
2640 }
2641 out:
2642 rcu_read_unlock();
2643
2644 cb->args[0] = tidx;
2645 cb->args[1] = nidx;
2646
2647 return skb->len;
2648 }
2649
__neigh_fill_info(struct sk_buff * skb,struct neighbour * neigh,u32 pid,u32 seq,int type,unsigned int flags)2650 static int __neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2651 u32 pid, u32 seq, int type, unsigned int flags)
2652 {
2653 u32 neigh_flags, neigh_flags_ext;
2654 unsigned long now = jiffies;
2655 struct nda_cacheinfo ci;
2656 struct nlmsghdr *nlh;
2657 struct ndmsg *ndm;
2658
2659 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2660 if (nlh == NULL)
2661 return -EMSGSIZE;
2662
2663 neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2664 neigh_flags = neigh->flags & NTF_OLD_MASK;
2665
2666 ndm = nlmsg_data(nlh);
2667 ndm->ndm_family = neigh->ops->family;
2668 ndm->ndm_pad1 = 0;
2669 ndm->ndm_pad2 = 0;
2670 ndm->ndm_flags = neigh_flags;
2671 ndm->ndm_type = neigh->type;
2672 ndm->ndm_ifindex = neigh->dev->ifindex;
2673
2674 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2675 goto nla_put_failure;
2676
2677 ndm->ndm_state = neigh->nud_state;
2678 if (neigh->nud_state & NUD_VALID) {
2679 char haddr[MAX_ADDR_LEN];
2680
2681 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2682 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0)
2683 goto nla_put_failure;
2684 }
2685
2686 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2687 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2688 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2689 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2690
2691 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2692 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2693 goto nla_put_failure;
2694
2695 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2696 goto nla_put_failure;
2697 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2698 goto nla_put_failure;
2699
2700 nlmsg_end(skb, nlh);
2701 return 0;
2702
2703 nla_put_failure:
2704 nlmsg_cancel(skb, nlh);
2705 return -EMSGSIZE;
2706 }
2707
neigh_fill_info(struct sk_buff * skb,struct neighbour * neigh,u32 pid,u32 seq,int type,unsigned int flags)2708 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2709 u32 pid, u32 seq, int type, unsigned int flags)
2710 __releases(neigh->lock)
2711 __acquires(neigh->lock)
2712 {
2713 int err;
2714
2715 read_lock_bh(&neigh->lock);
2716 err = __neigh_fill_info(skb, neigh, pid, seq, type, flags);
2717 read_unlock_bh(&neigh->lock);
2718
2719 return err;
2720 }
2721
pneigh_fill_info(struct sk_buff * skb,struct pneigh_entry * pn,u32 pid,u32 seq,int type,unsigned int flags,struct neigh_table * tbl)2722 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2723 u32 pid, u32 seq, int type, unsigned int flags,
2724 struct neigh_table *tbl)
2725 {
2726 u32 neigh_flags, neigh_flags_ext;
2727 struct nlmsghdr *nlh;
2728 struct ndmsg *ndm;
2729 u8 protocol;
2730
2731 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2732 if (nlh == NULL)
2733 return -EMSGSIZE;
2734
2735 neigh_flags = READ_ONCE(pn->flags);
2736 neigh_flags_ext = neigh_flags >> NTF_EXT_SHIFT;
2737 neigh_flags &= NTF_OLD_MASK;
2738
2739 ndm = nlmsg_data(nlh);
2740 ndm->ndm_family = tbl->family;
2741 ndm->ndm_pad1 = 0;
2742 ndm->ndm_pad2 = 0;
2743 ndm->ndm_flags = neigh_flags | NTF_PROXY;
2744 ndm->ndm_type = RTN_UNICAST;
2745 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2746 ndm->ndm_state = NUD_NONE;
2747
2748 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2749 goto nla_put_failure;
2750
2751 protocol = READ_ONCE(pn->protocol);
2752 if (protocol && nla_put_u8(skb, NDA_PROTOCOL, protocol))
2753 goto nla_put_failure;
2754 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2755 goto nla_put_failure;
2756
2757 nlmsg_end(skb, nlh);
2758 return 0;
2759
2760 nla_put_failure:
2761 nlmsg_cancel(skb, nlh);
2762 return -EMSGSIZE;
2763 }
2764
neigh_master_filtered(struct net_device * dev,int master_idx)2765 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2766 {
2767 struct net_device *master;
2768
2769 if (!master_idx)
2770 return false;
2771
2772 master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
2773
2774 /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2775 * invalid value for ifindex to denote "no master".
2776 */
2777 if (master_idx == -1)
2778 return !!master;
2779
2780 if (!master || master->ifindex != master_idx)
2781 return true;
2782
2783 return false;
2784 }
2785
neigh_ifindex_filtered(struct net_device * dev,int filter_idx)2786 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2787 {
2788 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2789 return true;
2790
2791 return false;
2792 }
2793
2794 struct neigh_dump_filter {
2795 int master_idx;
2796 int dev_idx;
2797 };
2798
neigh_dump_table(struct neigh_table * tbl,struct sk_buff * skb,struct netlink_callback * cb,struct neigh_dump_filter * filter)2799 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2800 struct netlink_callback *cb,
2801 struct neigh_dump_filter *filter)
2802 {
2803 struct net *net = sock_net(skb->sk);
2804 struct neighbour *n;
2805 int err = 0, h, s_h = cb->args[1];
2806 int idx, s_idx = idx = cb->args[2];
2807 struct neigh_hash_table *nht;
2808 unsigned int flags = NLM_F_MULTI;
2809
2810 if (filter->dev_idx || filter->master_idx)
2811 flags |= NLM_F_DUMP_FILTERED;
2812
2813 nht = rcu_dereference(tbl->nht);
2814
2815 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2816 if (h > s_h)
2817 s_idx = 0;
2818 idx = 0;
2819 neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
2820 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2821 goto next;
2822 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2823 neigh_master_filtered(n->dev, filter->master_idx))
2824 goto next;
2825 err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2826 cb->nlh->nlmsg_seq,
2827 RTM_NEWNEIGH, flags);
2828 if (err < 0)
2829 goto out;
2830 next:
2831 idx++;
2832 }
2833 }
2834 out:
2835 cb->args[1] = h;
2836 cb->args[2] = idx;
2837 return err;
2838 }
2839
pneigh_dump_table(struct neigh_table * tbl,struct sk_buff * skb,struct netlink_callback * cb,struct neigh_dump_filter * filter)2840 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2841 struct netlink_callback *cb,
2842 struct neigh_dump_filter *filter)
2843 {
2844 struct pneigh_entry *n;
2845 struct net *net = sock_net(skb->sk);
2846 int err = 0, h, s_h = cb->args[3];
2847 int idx, s_idx = idx = cb->args[4];
2848 unsigned int flags = NLM_F_MULTI;
2849
2850 if (filter->dev_idx || filter->master_idx)
2851 flags |= NLM_F_DUMP_FILTERED;
2852
2853 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2854 if (h > s_h)
2855 s_idx = 0;
2856 for (n = rcu_dereference(tbl->phash_buckets[h]), idx = 0;
2857 n;
2858 n = rcu_dereference(n->next)) {
2859 if (idx < s_idx || pneigh_net(n) != net)
2860 goto next;
2861 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2862 neigh_master_filtered(n->dev, filter->master_idx))
2863 goto next;
2864 err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2865 cb->nlh->nlmsg_seq,
2866 RTM_NEWNEIGH, flags, tbl);
2867 if (err < 0)
2868 goto out;
2869 next:
2870 idx++;
2871 }
2872 }
2873
2874 out:
2875 cb->args[3] = h;
2876 cb->args[4] = idx;
2877 return err;
2878 }
2879
neigh_valid_dump_req(const struct nlmsghdr * nlh,bool strict_check,struct neigh_dump_filter * filter,struct netlink_ext_ack * extack)2880 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2881 bool strict_check,
2882 struct neigh_dump_filter *filter,
2883 struct netlink_ext_ack *extack)
2884 {
2885 struct nlattr *tb[NDA_MAX + 1];
2886 int err, i;
2887
2888 if (strict_check) {
2889 struct ndmsg *ndm;
2890
2891 ndm = nlmsg_payload(nlh, sizeof(*ndm));
2892 if (!ndm) {
2893 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2894 return -EINVAL;
2895 }
2896
2897 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2898 ndm->ndm_state || ndm->ndm_type) {
2899 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2900 return -EINVAL;
2901 }
2902
2903 if (ndm->ndm_flags & ~NTF_PROXY) {
2904 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2905 return -EINVAL;
2906 }
2907
2908 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2909 tb, NDA_MAX, nda_policy,
2910 extack);
2911 } else {
2912 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2913 NDA_MAX, nda_policy, extack);
2914 }
2915 if (err < 0)
2916 return err;
2917
2918 for (i = 0; i <= NDA_MAX; ++i) {
2919 if (!tb[i])
2920 continue;
2921
2922 /* all new attributes should require strict_check */
2923 switch (i) {
2924 case NDA_IFINDEX:
2925 filter->dev_idx = nla_get_u32(tb[i]);
2926 break;
2927 case NDA_MASTER:
2928 filter->master_idx = nla_get_u32(tb[i]);
2929 break;
2930 default:
2931 if (strict_check) {
2932 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2933 return -EINVAL;
2934 }
2935 }
2936 }
2937
2938 return 0;
2939 }
2940
neigh_dump_info(struct sk_buff * skb,struct netlink_callback * cb)2941 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2942 {
2943 const struct nlmsghdr *nlh = cb->nlh;
2944 struct neigh_dump_filter filter = {};
2945 struct neigh_table *tbl;
2946 int t, family, s_t;
2947 int proxy = 0;
2948 int err;
2949
2950 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2951
2952 /* check for full ndmsg structure presence, family member is
2953 * the same for both structures
2954 */
2955 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2956 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2957 proxy = 1;
2958
2959 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2960 if (err < 0 && cb->strict_check)
2961 return err;
2962 err = 0;
2963
2964 s_t = cb->args[0];
2965
2966 rcu_read_lock();
2967 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2968 tbl = rcu_dereference(neigh_tables[t]);
2969
2970 if (!tbl)
2971 continue;
2972 if (t < s_t || (family && tbl->family != family))
2973 continue;
2974 if (t > s_t)
2975 memset(&cb->args[1], 0, sizeof(cb->args) -
2976 sizeof(cb->args[0]));
2977 if (proxy)
2978 err = pneigh_dump_table(tbl, skb, cb, &filter);
2979 else
2980 err = neigh_dump_table(tbl, skb, cb, &filter);
2981 if (err < 0)
2982 break;
2983 }
2984 rcu_read_unlock();
2985
2986 cb->args[0] = t;
2987 return err;
2988 }
2989
neigh_valid_get_req(const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)2990 static struct ndmsg *neigh_valid_get_req(const struct nlmsghdr *nlh,
2991 struct nlattr **tb,
2992 struct netlink_ext_ack *extack)
2993 {
2994 struct ndmsg *ndm;
2995 int err, i;
2996
2997 ndm = nlmsg_payload(nlh, sizeof(*ndm));
2998 if (!ndm) {
2999 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
3000 return ERR_PTR(-EINVAL);
3001 }
3002
3003 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
3004 ndm->ndm_type) {
3005 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
3006 return ERR_PTR(-EINVAL);
3007 }
3008
3009 if (ndm->ndm_flags & ~NTF_PROXY) {
3010 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
3011 return ERR_PTR(-EINVAL);
3012 }
3013
3014 if (!(ndm->ndm_flags & NTF_PROXY) && !ndm->ndm_ifindex) {
3015 NL_SET_ERR_MSG(extack, "No device specified");
3016 return ERR_PTR(-EINVAL);
3017 }
3018
3019 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
3020 NDA_MAX, nda_policy, extack);
3021 if (err < 0)
3022 return ERR_PTR(err);
3023
3024 for (i = 0; i <= NDA_MAX; ++i) {
3025 switch (i) {
3026 case NDA_DST:
3027 if (!tb[i]) {
3028 NL_SET_ERR_ATTR_MISS(extack, NULL, NDA_DST);
3029 return ERR_PTR(-EINVAL);
3030 }
3031 break;
3032 default:
3033 if (!tb[i])
3034 continue;
3035
3036 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
3037 return ERR_PTR(-EINVAL);
3038 }
3039 }
3040
3041 return ndm;
3042 }
3043
neigh_nlmsg_size(void)3044 static inline size_t neigh_nlmsg_size(void)
3045 {
3046 return NLMSG_ALIGN(sizeof(struct ndmsg))
3047 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3048 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
3049 + nla_total_size(sizeof(struct nda_cacheinfo))
3050 + nla_total_size(4) /* NDA_PROBES */
3051 + nla_total_size(4) /* NDA_FLAGS_EXT */
3052 + nla_total_size(1); /* NDA_PROTOCOL */
3053 }
3054
pneigh_nlmsg_size(void)3055 static inline size_t pneigh_nlmsg_size(void)
3056 {
3057 return NLMSG_ALIGN(sizeof(struct ndmsg))
3058 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3059 + nla_total_size(4) /* NDA_FLAGS_EXT */
3060 + nla_total_size(1); /* NDA_PROTOCOL */
3061 }
3062
neigh_get(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)3063 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3064 struct netlink_ext_ack *extack)
3065 {
3066 struct net *net = sock_net(in_skb->sk);
3067 u32 pid = NETLINK_CB(in_skb).portid;
3068 struct nlattr *tb[NDA_MAX + 1];
3069 struct net_device *dev = NULL;
3070 u32 seq = nlh->nlmsg_seq;
3071 struct neigh_table *tbl;
3072 struct neighbour *neigh;
3073 struct sk_buff *skb;
3074 struct ndmsg *ndm;
3075 void *dst;
3076 int err;
3077
3078 ndm = neigh_valid_get_req(nlh, tb, extack);
3079 if (IS_ERR(ndm))
3080 return PTR_ERR(ndm);
3081
3082 if (ndm->ndm_flags & NTF_PROXY)
3083 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
3084 else
3085 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
3086 if (!skb)
3087 return -ENOBUFS;
3088
3089 rcu_read_lock();
3090
3091 tbl = neigh_find_table(ndm->ndm_family);
3092 if (!tbl) {
3093 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
3094 err = -EAFNOSUPPORT;
3095 goto err_unlock;
3096 }
3097
3098 if (nla_len(tb[NDA_DST]) != (int)tbl->key_len) {
3099 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
3100 err = -EINVAL;
3101 goto err_unlock;
3102 }
3103
3104 dst = nla_data(tb[NDA_DST]);
3105
3106 if (ndm->ndm_ifindex) {
3107 dev = dev_get_by_index_rcu(net, ndm->ndm_ifindex);
3108 if (!dev) {
3109 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
3110 err = -ENODEV;
3111 goto err_unlock;
3112 }
3113 }
3114
3115 if (ndm->ndm_flags & NTF_PROXY) {
3116 struct pneigh_entry *pn;
3117
3118 pn = pneigh_lookup(tbl, net, dst, dev);
3119 if (!pn) {
3120 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3121 err = -ENOENT;
3122 goto err_unlock;
3123 }
3124
3125 err = pneigh_fill_info(skb, pn, pid, seq, RTM_NEWNEIGH, 0, tbl);
3126 if (err)
3127 goto err_unlock;
3128 } else {
3129 neigh = neigh_lookup(tbl, dst, dev);
3130 if (!neigh) {
3131 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3132 err = -ENOENT;
3133 goto err_unlock;
3134 }
3135
3136 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
3137 neigh_release(neigh);
3138 if (err)
3139 goto err_unlock;
3140 }
3141
3142 rcu_read_unlock();
3143
3144 return rtnl_unicast(skb, net, pid);
3145 err_unlock:
3146 rcu_read_unlock();
3147 kfree_skb(skb);
3148 return err;
3149 }
3150
neigh_for_each(struct neigh_table * tbl,void (* cb)(struct neighbour *,void *),void * cookie)3151 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3152 {
3153 int chain;
3154 struct neigh_hash_table *nht;
3155
3156 rcu_read_lock();
3157 nht = rcu_dereference(tbl->nht);
3158
3159 spin_lock_bh(&tbl->lock); /* avoid resizes */
3160 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3161 struct neighbour *n;
3162
3163 neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
3164 cb(n, cookie);
3165 }
3166 spin_unlock_bh(&tbl->lock);
3167 rcu_read_unlock();
3168 }
3169 EXPORT_SYMBOL(neigh_for_each);
3170
3171 /* The tbl->lock must be held as a writer and BH disabled. */
__neigh_for_each_release(struct neigh_table * tbl,int (* cb)(struct neighbour *))3172 void __neigh_for_each_release(struct neigh_table *tbl,
3173 int (*cb)(struct neighbour *))
3174 {
3175 struct neigh_hash_table *nht;
3176 int chain;
3177
3178 nht = rcu_dereference_protected(tbl->nht,
3179 lockdep_is_held(&tbl->lock));
3180 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3181 struct hlist_node *tmp;
3182 struct neighbour *n;
3183
3184 neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
3185 int release;
3186
3187 write_lock(&n->lock);
3188 release = cb(n);
3189 if (release) {
3190 hlist_del_rcu(&n->hash);
3191 hlist_del_rcu(&n->dev_list);
3192 neigh_mark_dead(n);
3193 }
3194 write_unlock(&n->lock);
3195 if (release)
3196 neigh_cleanup_and_release(n);
3197 }
3198 }
3199 }
3200 EXPORT_SYMBOL(__neigh_for_each_release);
3201
neigh_xmit(int index,struct net_device * dev,const void * addr,struct sk_buff * skb)3202 int neigh_xmit(int index, struct net_device *dev,
3203 const void *addr, struct sk_buff *skb)
3204 {
3205 int err = -EAFNOSUPPORT;
3206
3207 if (likely(index < NEIGH_NR_TABLES)) {
3208 struct neigh_table *tbl;
3209 struct neighbour *neigh;
3210
3211 rcu_read_lock();
3212 tbl = rcu_dereference(neigh_tables[index]);
3213 if (!tbl)
3214 goto out_unlock;
3215 if (index == NEIGH_ARP_TABLE) {
3216 u32 key = *((u32 *)addr);
3217
3218 neigh = __ipv4_neigh_lookup_noref(dev, key);
3219 } else {
3220 neigh = __neigh_lookup_noref(tbl, addr, dev);
3221 }
3222 if (!neigh)
3223 neigh = __neigh_create(tbl, addr, dev, false);
3224 err = PTR_ERR(neigh);
3225 if (IS_ERR(neigh)) {
3226 rcu_read_unlock();
3227 goto out_kfree_skb;
3228 }
3229 err = READ_ONCE(neigh->output)(neigh, skb);
3230 out_unlock:
3231 rcu_read_unlock();
3232 }
3233 else if (index == NEIGH_LINK_TABLE) {
3234 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3235 addr, NULL, skb->len);
3236 if (err < 0)
3237 goto out_kfree_skb;
3238 err = dev_queue_xmit(skb);
3239 }
3240 out:
3241 return err;
3242 out_kfree_skb:
3243 kfree_skb(skb);
3244 goto out;
3245 }
3246 EXPORT_SYMBOL(neigh_xmit);
3247
3248 #ifdef CONFIG_PROC_FS
3249
neigh_get_valid(struct seq_file * seq,struct neighbour * n,loff_t * pos)3250 static struct neighbour *neigh_get_valid(struct seq_file *seq,
3251 struct neighbour *n,
3252 loff_t *pos)
3253 {
3254 struct neigh_seq_state *state = seq->private;
3255 struct net *net = seq_file_net(seq);
3256
3257 if (!net_eq(dev_net(n->dev), net))
3258 return NULL;
3259
3260 if (state->neigh_sub_iter) {
3261 loff_t fakep = 0;
3262 void *v;
3263
3264 v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
3265 if (!v)
3266 return NULL;
3267 if (pos)
3268 return v;
3269 }
3270
3271 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3272 return n;
3273
3274 if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3275 return n;
3276
3277 return NULL;
3278 }
3279
neigh_get_first(struct seq_file * seq)3280 static struct neighbour *neigh_get_first(struct seq_file *seq)
3281 {
3282 struct neigh_seq_state *state = seq->private;
3283 struct neigh_hash_table *nht = state->nht;
3284 struct neighbour *n, *tmp;
3285
3286 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3287
3288 while (++state->bucket < (1 << nht->hash_shift)) {
3289 neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
3290 tmp = neigh_get_valid(seq, n, NULL);
3291 if (tmp)
3292 return tmp;
3293 }
3294 }
3295
3296 return NULL;
3297 }
3298
neigh_get_next(struct seq_file * seq,struct neighbour * n,loff_t * pos)3299 static struct neighbour *neigh_get_next(struct seq_file *seq,
3300 struct neighbour *n,
3301 loff_t *pos)
3302 {
3303 struct neigh_seq_state *state = seq->private;
3304 struct neighbour *tmp;
3305
3306 if (state->neigh_sub_iter) {
3307 void *v = state->neigh_sub_iter(state, n, pos);
3308
3309 if (v)
3310 return n;
3311 }
3312
3313 hlist_for_each_entry_continue(n, hash) {
3314 tmp = neigh_get_valid(seq, n, pos);
3315 if (tmp) {
3316 n = tmp;
3317 goto out;
3318 }
3319 }
3320
3321 n = neigh_get_first(seq);
3322 out:
3323 if (n && pos)
3324 --(*pos);
3325
3326 return n;
3327 }
3328
neigh_get_idx(struct seq_file * seq,loff_t * pos)3329 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3330 {
3331 struct neighbour *n = neigh_get_first(seq);
3332
3333 if (n) {
3334 --(*pos);
3335 while (*pos) {
3336 n = neigh_get_next(seq, n, pos);
3337 if (!n)
3338 break;
3339 }
3340 }
3341 return *pos ? NULL : n;
3342 }
3343
pneigh_get_first(struct seq_file * seq)3344 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3345 {
3346 struct neigh_seq_state *state = seq->private;
3347 struct net *net = seq_file_net(seq);
3348 struct neigh_table *tbl = state->tbl;
3349 struct pneigh_entry *pn = NULL;
3350 int bucket;
3351
3352 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3353 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3354 pn = rcu_dereference(tbl->phash_buckets[bucket]);
3355
3356 while (pn && !net_eq(pneigh_net(pn), net))
3357 pn = rcu_dereference(pn->next);
3358 if (pn)
3359 break;
3360 }
3361 state->bucket = bucket;
3362
3363 return pn;
3364 }
3365
pneigh_get_next(struct seq_file * seq,struct pneigh_entry * pn,loff_t * pos)3366 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3367 struct pneigh_entry *pn,
3368 loff_t *pos)
3369 {
3370 struct neigh_seq_state *state = seq->private;
3371 struct net *net = seq_file_net(seq);
3372 struct neigh_table *tbl = state->tbl;
3373
3374 do {
3375 pn = rcu_dereference(pn->next);
3376 } while (pn && !net_eq(pneigh_net(pn), net));
3377
3378 while (!pn) {
3379 if (++state->bucket > PNEIGH_HASHMASK)
3380 break;
3381
3382 pn = rcu_dereference(tbl->phash_buckets[state->bucket]);
3383
3384 while (pn && !net_eq(pneigh_net(pn), net))
3385 pn = rcu_dereference(pn->next);
3386 if (pn)
3387 break;
3388 }
3389
3390 if (pn && pos)
3391 --(*pos);
3392
3393 return pn;
3394 }
3395
pneigh_get_idx(struct seq_file * seq,loff_t * pos)3396 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3397 {
3398 struct pneigh_entry *pn = pneigh_get_first(seq);
3399
3400 if (pn) {
3401 --(*pos);
3402 while (*pos) {
3403 pn = pneigh_get_next(seq, pn, pos);
3404 if (!pn)
3405 break;
3406 }
3407 }
3408 return *pos ? NULL : pn;
3409 }
3410
neigh_get_idx_any(struct seq_file * seq,loff_t * pos)3411 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3412 {
3413 struct neigh_seq_state *state = seq->private;
3414 void *rc;
3415 loff_t idxpos = *pos;
3416
3417 rc = neigh_get_idx(seq, &idxpos);
3418 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3419 rc = pneigh_get_idx(seq, &idxpos);
3420
3421 return rc;
3422 }
3423
neigh_seq_start(struct seq_file * seq,loff_t * pos,struct neigh_table * tbl,unsigned int neigh_seq_flags)3424 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3425 __acquires(tbl->lock)
3426 __acquires(rcu)
3427 {
3428 struct neigh_seq_state *state = seq->private;
3429
3430 state->tbl = tbl;
3431 state->bucket = -1;
3432 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3433
3434 rcu_read_lock();
3435 state->nht = rcu_dereference(tbl->nht);
3436 spin_lock_bh(&tbl->lock);
3437
3438 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3439 }
3440 EXPORT_SYMBOL(neigh_seq_start);
3441
neigh_seq_next(struct seq_file * seq,void * v,loff_t * pos)3442 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3443 {
3444 struct neigh_seq_state *state;
3445 void *rc;
3446
3447 if (v == SEQ_START_TOKEN) {
3448 rc = neigh_get_first(seq);
3449 goto out;
3450 }
3451
3452 state = seq->private;
3453 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3454 rc = neigh_get_next(seq, v, NULL);
3455 if (rc)
3456 goto out;
3457 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3458 rc = pneigh_get_first(seq);
3459 } else {
3460 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3461 rc = pneigh_get_next(seq, v, NULL);
3462 }
3463 out:
3464 ++(*pos);
3465 return rc;
3466 }
3467 EXPORT_SYMBOL(neigh_seq_next);
3468
neigh_seq_stop(struct seq_file * seq,void * v)3469 void neigh_seq_stop(struct seq_file *seq, void *v)
3470 __releases(tbl->lock)
3471 __releases(rcu)
3472 {
3473 struct neigh_seq_state *state = seq->private;
3474 struct neigh_table *tbl = state->tbl;
3475
3476 spin_unlock_bh(&tbl->lock);
3477 rcu_read_unlock();
3478 }
3479 EXPORT_SYMBOL(neigh_seq_stop);
3480
3481 /* statistics via seq_file */
3482
neigh_stat_seq_start(struct seq_file * seq,loff_t * pos)3483 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3484 {
3485 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3486 int cpu;
3487
3488 if (*pos == 0)
3489 return SEQ_START_TOKEN;
3490
3491 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3492 if (!cpu_possible(cpu))
3493 continue;
3494 *pos = cpu+1;
3495 return per_cpu_ptr(tbl->stats, cpu);
3496 }
3497 return NULL;
3498 }
3499
neigh_stat_seq_next(struct seq_file * seq,void * v,loff_t * pos)3500 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3501 {
3502 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3503 int cpu;
3504
3505 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3506 if (!cpu_possible(cpu))
3507 continue;
3508 *pos = cpu+1;
3509 return per_cpu_ptr(tbl->stats, cpu);
3510 }
3511 (*pos)++;
3512 return NULL;
3513 }
3514
neigh_stat_seq_stop(struct seq_file * seq,void * v)3515 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3516 {
3517
3518 }
3519
neigh_stat_seq_show(struct seq_file * seq,void * v)3520 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3521 {
3522 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3523 struct neigh_statistics *st = v;
3524
3525 if (v == SEQ_START_TOKEN) {
3526 seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3527 return 0;
3528 }
3529
3530 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3531 "%08lx %08lx %08lx "
3532 "%08lx %08lx %08lx\n",
3533 atomic_read(&tbl->entries),
3534
3535 st->allocs,
3536 st->destroys,
3537 st->hash_grows,
3538
3539 st->lookups,
3540 st->hits,
3541
3542 st->res_failed,
3543
3544 st->rcv_probes_mcast,
3545 st->rcv_probes_ucast,
3546
3547 st->periodic_gc_runs,
3548 st->forced_gc_runs,
3549 st->unres_discards,
3550 st->table_fulls
3551 );
3552
3553 return 0;
3554 }
3555
3556 static const struct seq_operations neigh_stat_seq_ops = {
3557 .start = neigh_stat_seq_start,
3558 .next = neigh_stat_seq_next,
3559 .stop = neigh_stat_seq_stop,
3560 .show = neigh_stat_seq_show,
3561 };
3562 #endif /* CONFIG_PROC_FS */
3563
__neigh_notify(struct neighbour * n,int type,int flags,u32 pid)3564 static void __neigh_notify(struct neighbour *n, int type, int flags,
3565 u32 pid)
3566 {
3567 struct sk_buff *skb;
3568 int err = -ENOBUFS;
3569 struct net *net;
3570
3571 rcu_read_lock();
3572 net = dev_net_rcu(n->dev);
3573 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3574 if (skb == NULL)
3575 goto errout;
3576
3577 err = __neigh_fill_info(skb, n, pid, 0, type, flags);
3578 if (err < 0) {
3579 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3580 WARN_ON(err == -EMSGSIZE);
3581 kfree_skb(skb);
3582 goto errout;
3583 }
3584 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3585 goto out;
3586 errout:
3587 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3588 out:
3589 rcu_read_unlock();
3590 }
3591
neigh_notify(struct neighbour * neigh,int type,int flags,u32 pid)3592 static void neigh_notify(struct neighbour *neigh, int type, int flags, u32 pid)
3593 {
3594 read_lock_bh(&neigh->lock);
3595 __neigh_notify(neigh, type, flags, pid);
3596 read_unlock_bh(&neigh->lock);
3597 }
3598
neigh_app_ns(struct neighbour * n)3599 void neigh_app_ns(struct neighbour *n)
3600 {
3601 neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3602 }
3603 EXPORT_SYMBOL(neigh_app_ns);
3604
3605 #ifdef CONFIG_SYSCTL
3606 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3607
proc_unres_qlen(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3608 static int proc_unres_qlen(const struct ctl_table *ctl, int write,
3609 void *buffer, size_t *lenp, loff_t *ppos)
3610 {
3611 int size, ret;
3612 struct ctl_table tmp = *ctl;
3613
3614 tmp.extra1 = SYSCTL_ZERO;
3615 tmp.extra2 = &unres_qlen_max;
3616 tmp.data = &size;
3617
3618 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3619 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3620
3621 if (write && !ret)
3622 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3623 return ret;
3624 }
3625
neigh_copy_dflt_parms(struct net * net,struct neigh_parms * p,int index)3626 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3627 int index)
3628 {
3629 struct net_device *dev;
3630 int family = neigh_parms_family(p);
3631
3632 rcu_read_lock();
3633 for_each_netdev_rcu(net, dev) {
3634 struct neigh_parms *dst_p =
3635 neigh_get_dev_parms_rcu(dev, family);
3636
3637 if (dst_p && !test_bit(index, dst_p->data_state))
3638 dst_p->data[index] = p->data[index];
3639 }
3640 rcu_read_unlock();
3641 }
3642
neigh_proc_update(const struct ctl_table * ctl,int write)3643 static void neigh_proc_update(const struct ctl_table *ctl, int write)
3644 {
3645 struct net_device *dev = ctl->extra1;
3646 struct neigh_parms *p = ctl->extra2;
3647 struct net *net = neigh_parms_net(p);
3648 int index = (int *) ctl->data - p->data;
3649
3650 if (!write)
3651 return;
3652
3653 set_bit(index, p->data_state);
3654 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3655 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3656 if (!dev) /* NULL dev means this is default value */
3657 neigh_copy_dflt_parms(net, p, index);
3658 }
3659
neigh_proc_dointvec_zero_intmax(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3660 static int neigh_proc_dointvec_zero_intmax(const struct ctl_table *ctl, int write,
3661 void *buffer, size_t *lenp,
3662 loff_t *ppos)
3663 {
3664 struct ctl_table tmp = *ctl;
3665 int ret;
3666
3667 tmp.extra1 = SYSCTL_ZERO;
3668 tmp.extra2 = SYSCTL_INT_MAX;
3669
3670 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3671 neigh_proc_update(ctl, write);
3672 return ret;
3673 }
3674
neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3675 static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table *ctl, int write,
3676 void *buffer, size_t *lenp, loff_t *ppos)
3677 {
3678 struct ctl_table tmp = *ctl;
3679 int ret;
3680
3681 int min = msecs_to_jiffies(1);
3682
3683 tmp.extra1 = &min;
3684 tmp.extra2 = NULL;
3685
3686 ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3687 neigh_proc_update(ctl, write);
3688 return ret;
3689 }
3690
neigh_proc_dointvec(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3691 int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer,
3692 size_t *lenp, loff_t *ppos)
3693 {
3694 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3695
3696 neigh_proc_update(ctl, write);
3697 return ret;
3698 }
3699 EXPORT_SYMBOL(neigh_proc_dointvec);
3700
neigh_proc_dointvec_jiffies(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3701 int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer,
3702 size_t *lenp, loff_t *ppos)
3703 {
3704 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3705
3706 neigh_proc_update(ctl, write);
3707 return ret;
3708 }
3709 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3710
neigh_proc_dointvec_userhz_jiffies(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3711 static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table *ctl, int write,
3712 void *buffer, size_t *lenp,
3713 loff_t *ppos)
3714 {
3715 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3716
3717 neigh_proc_update(ctl, write);
3718 return ret;
3719 }
3720
neigh_proc_dointvec_ms_jiffies(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3721 int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
3722 void *buffer, size_t *lenp, loff_t *ppos)
3723 {
3724 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3725
3726 neigh_proc_update(ctl, write);
3727 return ret;
3728 }
3729 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3730
neigh_proc_dointvec_unres_qlen(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3731 static int neigh_proc_dointvec_unres_qlen(const struct ctl_table *ctl, int write,
3732 void *buffer, size_t *lenp,
3733 loff_t *ppos)
3734 {
3735 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3736
3737 neigh_proc_update(ctl, write);
3738 return ret;
3739 }
3740
neigh_proc_base_reachable_time(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3741 static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write,
3742 void *buffer, size_t *lenp,
3743 loff_t *ppos)
3744 {
3745 struct neigh_parms *p = ctl->extra2;
3746 int ret;
3747
3748 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3749 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3750 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3751 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3752 else
3753 ret = -1;
3754
3755 if (write && ret == 0) {
3756 /* update reachable_time as well, otherwise, the change will
3757 * only be effective after the next time neigh_periodic_work
3758 * decides to recompute it
3759 */
3760 neigh_set_reach_time(p);
3761 }
3762 return ret;
3763 }
3764
3765 #define NEIGH_PARMS_DATA_OFFSET(index) \
3766 (&((struct neigh_parms *) 0)->data[index])
3767
3768 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3769 [NEIGH_VAR_ ## attr] = { \
3770 .procname = name, \
3771 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3772 .maxlen = sizeof(int), \
3773 .mode = mval, \
3774 .proc_handler = proc, \
3775 }
3776
3777 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3778 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3779
3780 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3781 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3782
3783 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3784 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3785
3786 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3787 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3788
3789 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3790 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3791
3792 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3793 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3794
3795 static struct neigh_sysctl_table {
3796 struct ctl_table_header *sysctl_header;
3797 struct ctl_table neigh_vars[NEIGH_VAR_MAX];
3798 } neigh_sysctl_template __read_mostly = {
3799 .neigh_vars = {
3800 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3801 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3802 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3803 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3804 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3805 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3806 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3807 NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3808 "interval_probe_time_ms"),
3809 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3810 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3811 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3812 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3813 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3814 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3815 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3816 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3817 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3818 [NEIGH_VAR_GC_INTERVAL] = {
3819 .procname = "gc_interval",
3820 .maxlen = sizeof(int),
3821 .mode = 0644,
3822 .proc_handler = proc_dointvec_jiffies,
3823 },
3824 [NEIGH_VAR_GC_THRESH1] = {
3825 .procname = "gc_thresh1",
3826 .maxlen = sizeof(int),
3827 .mode = 0644,
3828 .extra1 = SYSCTL_ZERO,
3829 .extra2 = SYSCTL_INT_MAX,
3830 .proc_handler = proc_dointvec_minmax,
3831 },
3832 [NEIGH_VAR_GC_THRESH2] = {
3833 .procname = "gc_thresh2",
3834 .maxlen = sizeof(int),
3835 .mode = 0644,
3836 .extra1 = SYSCTL_ZERO,
3837 .extra2 = SYSCTL_INT_MAX,
3838 .proc_handler = proc_dointvec_minmax,
3839 },
3840 [NEIGH_VAR_GC_THRESH3] = {
3841 .procname = "gc_thresh3",
3842 .maxlen = sizeof(int),
3843 .mode = 0644,
3844 .extra1 = SYSCTL_ZERO,
3845 .extra2 = SYSCTL_INT_MAX,
3846 .proc_handler = proc_dointvec_minmax,
3847 },
3848 },
3849 };
3850
neigh_sysctl_register(struct net_device * dev,struct neigh_parms * p,proc_handler * handler)3851 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3852 proc_handler *handler)
3853 {
3854 int i;
3855 struct neigh_sysctl_table *t;
3856 const char *dev_name_source;
3857 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3858 char *p_name;
3859 size_t neigh_vars_size;
3860
3861 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3862 if (!t)
3863 goto err;
3864
3865 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3866 t->neigh_vars[i].data += (long) p;
3867 t->neigh_vars[i].extra1 = dev;
3868 t->neigh_vars[i].extra2 = p;
3869 }
3870
3871 neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
3872 if (dev) {
3873 dev_name_source = dev->name;
3874 /* Terminate the table early */
3875 neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
3876 } else {
3877 struct neigh_table *tbl = p->tbl;
3878 dev_name_source = "default";
3879 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3880 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3881 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3882 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3883 }
3884
3885 if (handler) {
3886 /* RetransTime */
3887 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3888 /* ReachableTime */
3889 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3890 /* RetransTime (in milliseconds)*/
3891 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3892 /* ReachableTime (in milliseconds) */
3893 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3894 } else {
3895 /* Those handlers will update p->reachable_time after
3896 * base_reachable_time(_ms) is set to ensure the new timer starts being
3897 * applied after the next neighbour update instead of waiting for
3898 * neigh_periodic_work to update its value (can be multiple minutes)
3899 * So any handler that replaces them should do this as well
3900 */
3901 /* ReachableTime */
3902 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3903 neigh_proc_base_reachable_time;
3904 /* ReachableTime (in milliseconds) */
3905 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3906 neigh_proc_base_reachable_time;
3907 }
3908
3909 switch (neigh_parms_family(p)) {
3910 case AF_INET:
3911 p_name = "ipv4";
3912 break;
3913 case AF_INET6:
3914 p_name = "ipv6";
3915 break;
3916 default:
3917 BUG();
3918 }
3919
3920 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3921 p_name, dev_name_source);
3922 t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
3923 neigh_path, t->neigh_vars,
3924 neigh_vars_size);
3925 if (!t->sysctl_header)
3926 goto free;
3927
3928 p->sysctl_table = t;
3929 return 0;
3930
3931 free:
3932 kfree(t);
3933 err:
3934 return -ENOBUFS;
3935 }
3936 EXPORT_SYMBOL(neigh_sysctl_register);
3937
neigh_sysctl_unregister(struct neigh_parms * p)3938 void neigh_sysctl_unregister(struct neigh_parms *p)
3939 {
3940 if (p->sysctl_table) {
3941 struct neigh_sysctl_table *t = p->sysctl_table;
3942 p->sysctl_table = NULL;
3943 unregister_net_sysctl_table(t->sysctl_header);
3944 kfree(t);
3945 }
3946 }
3947 EXPORT_SYMBOL(neigh_sysctl_unregister);
3948
3949 #endif /* CONFIG_SYSCTL */
3950
3951 static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
3952 {.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
3953 {.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
3954 {.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
3955 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
3956 {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info,
3957 .flags = RTNL_FLAG_DUMP_UNLOCKED},
3958 {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set,
3959 .flags = RTNL_FLAG_DOIT_UNLOCKED},
3960 };
3961
neigh_init(void)3962 static int __init neigh_init(void)
3963 {
3964 rtnl_register_many(neigh_rtnl_msg_handlers);
3965 return 0;
3966 }
3967
3968 subsys_initcall(neigh_init);
3969