1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Generic address resolution entity
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 *
9 * Fixes:
10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
11 * Harald Welte Add neighbour cache statistics like rtstat
12 */
13
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16 #include <linux/slab.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/socket.h>
21 #include <linux/netdevice.h>
22 #include <linux/proc_fs.h>
23 #ifdef CONFIG_SYSCTL
24 #include <linux/sysctl.h>
25 #endif
26 #include <linux/times.h>
27 #include <net/net_namespace.h>
28 #include <net/neighbour.h>
29 #include <net/arp.h>
30 #include <net/dst.h>
31 #include <net/ip.h>
32 #include <net/sock.h>
33 #include <net/netevent.h>
34 #include <net/netlink.h>
35 #include <linux/rtnetlink.h>
36 #include <linux/random.h>
37 #include <linux/string.h>
38 #include <linux/log2.h>
39 #include <linux/inetdevice.h>
40 #include <net/addrconf.h>
41
42 #include <trace/events/neigh.h>
43
44 #define NEIGH_DEBUG 1
45 #define neigh_dbg(level, fmt, ...) \
46 do { \
47 if (level <= NEIGH_DEBUG) \
48 pr_debug(fmt, ##__VA_ARGS__); \
49 } while (0)
50
51 #define PNEIGH_HASHMASK 0xF
52
53 static void neigh_timer_handler(struct timer_list *t);
54 static void __neigh_notify(struct neighbour *n, int type, int flags,
55 u32 pid);
56 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
57 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
58 bool skip_perm);
59
60 #ifdef CONFIG_PROC_FS
61 static const struct seq_operations neigh_stat_seq_ops;
62 #endif
63
neigh_get_dev_table(struct net_device * dev,int family)64 static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
65 {
66 int i;
67
68 switch (family) {
69 default:
70 DEBUG_NET_WARN_ON_ONCE(1);
71 fallthrough; /* to avoid panic by null-ptr-deref */
72 case AF_INET:
73 i = NEIGH_ARP_TABLE;
74 break;
75 case AF_INET6:
76 i = NEIGH_ND_TABLE;
77 break;
78 }
79
80 return &dev->neighbours[i];
81 }
82
83 /*
84 Neighbour hash table buckets are protected with rwlock tbl->lock.
85
86 - All the scans/updates to hash buckets MUST be made under this lock.
87 - NOTHING clever should be made under this lock: no callbacks
88 to protocol backends, no attempts to send something to network.
89 It will result in deadlocks, if backend/driver wants to use neighbour
90 cache.
91 - If the entry requires some non-trivial actions, increase
92 its reference count and release table lock.
93
94 Neighbour entries are protected:
95 - with reference count.
96 - with rwlock neigh->lock
97
98 Reference count prevents destruction.
99
100 neigh->lock mainly serializes ll address data and its validity state.
101 However, the same lock is used to protect another entry fields:
102 - timer
103 - resolution queue
104
105 Again, nothing clever shall be made under neigh->lock,
106 the most complicated procedure, which we allow is dev->hard_header.
107 It is supposed, that dev->hard_header is simplistic and does
108 not make callbacks to neighbour tables.
109 */
110
neigh_blackhole(struct neighbour * neigh,struct sk_buff * skb)111 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
112 {
113 kfree_skb(skb);
114 return -ENETDOWN;
115 }
116
neigh_cleanup_and_release(struct neighbour * neigh)117 static void neigh_cleanup_and_release(struct neighbour *neigh)
118 {
119 trace_neigh_cleanup_and_release(neigh, 0);
120 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
121 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
122 neigh_release(neigh);
123 }
124
125 /*
126 * It is random distribution in the interval (1/2)*base...(3/2)*base.
127 * It corresponds to default IPv6 settings and is not overridable,
128 * because it is really reasonable choice.
129 */
130
neigh_rand_reach_time(unsigned long base)131 unsigned long neigh_rand_reach_time(unsigned long base)
132 {
133 return base ? get_random_u32_below(base) + (base >> 1) : 0;
134 }
135 EXPORT_SYMBOL(neigh_rand_reach_time);
136
neigh_mark_dead(struct neighbour * n)137 static void neigh_mark_dead(struct neighbour *n)
138 {
139 n->dead = 1;
140 if (!list_empty(&n->gc_list)) {
141 list_del_init(&n->gc_list);
142 atomic_dec(&n->tbl->gc_entries);
143 }
144 if (!list_empty(&n->managed_list))
145 list_del_init(&n->managed_list);
146 }
147
neigh_update_gc_list(struct neighbour * n)148 static void neigh_update_gc_list(struct neighbour *n)
149 {
150 bool on_gc_list, exempt_from_gc;
151
152 write_lock_bh(&n->tbl->lock);
153 write_lock(&n->lock);
154 if (n->dead)
155 goto out;
156
157 /* remove from the gc list if new state is permanent or if neighbor is
158 * externally learned / validated; otherwise entry should be on the gc
159 * list
160 */
161 exempt_from_gc = n->nud_state & NUD_PERMANENT ||
162 n->flags & (NTF_EXT_LEARNED | NTF_EXT_VALIDATED);
163 on_gc_list = !list_empty(&n->gc_list);
164
165 if (exempt_from_gc && on_gc_list) {
166 list_del_init(&n->gc_list);
167 atomic_dec(&n->tbl->gc_entries);
168 } else if (!exempt_from_gc && !on_gc_list) {
169 /* add entries to the tail; cleaning removes from the front */
170 list_add_tail(&n->gc_list, &n->tbl->gc_list);
171 atomic_inc(&n->tbl->gc_entries);
172 }
173 out:
174 write_unlock(&n->lock);
175 write_unlock_bh(&n->tbl->lock);
176 }
177
neigh_update_managed_list(struct neighbour * n)178 static void neigh_update_managed_list(struct neighbour *n)
179 {
180 bool on_managed_list, add_to_managed;
181
182 write_lock_bh(&n->tbl->lock);
183 write_lock(&n->lock);
184 if (n->dead)
185 goto out;
186
187 add_to_managed = n->flags & NTF_MANAGED;
188 on_managed_list = !list_empty(&n->managed_list);
189
190 if (!add_to_managed && on_managed_list)
191 list_del_init(&n->managed_list);
192 else if (add_to_managed && !on_managed_list)
193 list_add_tail(&n->managed_list, &n->tbl->managed_list);
194 out:
195 write_unlock(&n->lock);
196 write_unlock_bh(&n->tbl->lock);
197 }
198
neigh_update_flags(struct neighbour * neigh,u32 flags,int * notify,bool * gc_update,bool * managed_update)199 static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
200 bool *gc_update, bool *managed_update)
201 {
202 u32 ndm_flags, old_flags = neigh->flags;
203
204 if (!(flags & NEIGH_UPDATE_F_ADMIN))
205 return;
206
207 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
208 ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
209 ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_VALIDATED) ? NTF_EXT_VALIDATED : 0;
210
211 if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
212 if (ndm_flags & NTF_EXT_LEARNED)
213 neigh->flags |= NTF_EXT_LEARNED;
214 else
215 neigh->flags &= ~NTF_EXT_LEARNED;
216 *notify = 1;
217 *gc_update = true;
218 }
219 if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
220 if (ndm_flags & NTF_MANAGED)
221 neigh->flags |= NTF_MANAGED;
222 else
223 neigh->flags &= ~NTF_MANAGED;
224 *notify = 1;
225 *managed_update = true;
226 }
227 if ((old_flags ^ ndm_flags) & NTF_EXT_VALIDATED) {
228 if (ndm_flags & NTF_EXT_VALIDATED)
229 neigh->flags |= NTF_EXT_VALIDATED;
230 else
231 neigh->flags &= ~NTF_EXT_VALIDATED;
232 *notify = 1;
233 *gc_update = true;
234 }
235 }
236
neigh_remove_one(struct neighbour * n)237 bool neigh_remove_one(struct neighbour *n)
238 {
239 bool retval = false;
240
241 write_lock(&n->lock);
242 if (refcount_read(&n->refcnt) == 1) {
243 hlist_del_rcu(&n->hash);
244 hlist_del_rcu(&n->dev_list);
245 neigh_mark_dead(n);
246 retval = true;
247 }
248 write_unlock(&n->lock);
249 if (retval)
250 neigh_cleanup_and_release(n);
251 return retval;
252 }
253
neigh_forced_gc(struct neigh_table * tbl)254 static int neigh_forced_gc(struct neigh_table *tbl)
255 {
256 int max_clean = atomic_read(&tbl->gc_entries) -
257 READ_ONCE(tbl->gc_thresh2);
258 u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
259 unsigned long tref = jiffies - 5 * HZ;
260 struct neighbour *n, *tmp;
261 int shrunk = 0;
262 int loop = 0;
263
264 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
265
266 write_lock_bh(&tbl->lock);
267
268 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
269 if (refcount_read(&n->refcnt) == 1) {
270 bool remove = false;
271
272 write_lock(&n->lock);
273 if ((n->nud_state == NUD_FAILED) ||
274 (n->nud_state == NUD_NOARP) ||
275 (tbl->is_multicast &&
276 tbl->is_multicast(n->primary_key)) ||
277 !time_in_range(n->updated, tref, jiffies))
278 remove = true;
279 write_unlock(&n->lock);
280
281 if (remove && neigh_remove_one(n))
282 shrunk++;
283 if (shrunk >= max_clean)
284 break;
285 if (++loop == 16) {
286 if (ktime_get_ns() > tmax)
287 goto unlock;
288 loop = 0;
289 }
290 }
291 }
292
293 WRITE_ONCE(tbl->last_flush, jiffies);
294 unlock:
295 write_unlock_bh(&tbl->lock);
296
297 return shrunk;
298 }
299
neigh_add_timer(struct neighbour * n,unsigned long when)300 static void neigh_add_timer(struct neighbour *n, unsigned long when)
301 {
302 /* Use safe distance from the jiffies - LONG_MAX point while timer
303 * is running in DELAY/PROBE state but still show to user space
304 * large times in the past.
305 */
306 unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
307
308 neigh_hold(n);
309 if (!time_in_range(n->confirmed, mint, jiffies))
310 n->confirmed = mint;
311 if (time_before(n->used, n->confirmed))
312 n->used = n->confirmed;
313 if (unlikely(mod_timer(&n->timer, when))) {
314 printk("NEIGH: BUG, double timer add, state is %x\n",
315 n->nud_state);
316 dump_stack();
317 }
318 }
319
neigh_del_timer(struct neighbour * n)320 static int neigh_del_timer(struct neighbour *n)
321 {
322 if ((n->nud_state & NUD_IN_TIMER) &&
323 timer_delete(&n->timer)) {
324 neigh_release(n);
325 return 1;
326 }
327 return 0;
328 }
329
neigh_get_dev_parms_rcu(struct net_device * dev,int family)330 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
331 int family)
332 {
333 switch (family) {
334 case AF_INET:
335 return __in_dev_arp_parms_get_rcu(dev);
336 case AF_INET6:
337 return __in6_dev_nd_parms_get_rcu(dev);
338 }
339 return NULL;
340 }
341
neigh_parms_qlen_dec(struct net_device * dev,int family)342 static void neigh_parms_qlen_dec(struct net_device *dev, int family)
343 {
344 struct neigh_parms *p;
345
346 rcu_read_lock();
347 p = neigh_get_dev_parms_rcu(dev, family);
348 if (p)
349 p->qlen--;
350 rcu_read_unlock();
351 }
352
pneigh_queue_purge(struct sk_buff_head * list,struct net * net,int family)353 static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
354 int family)
355 {
356 struct sk_buff_head tmp;
357 unsigned long flags;
358 struct sk_buff *skb;
359
360 skb_queue_head_init(&tmp);
361 spin_lock_irqsave(&list->lock, flags);
362 skb = skb_peek(list);
363 while (skb != NULL) {
364 struct sk_buff *skb_next = skb_peek_next(skb, list);
365 struct net_device *dev = skb->dev;
366
367 if (net == NULL || net_eq(dev_net(dev), net)) {
368 neigh_parms_qlen_dec(dev, family);
369 __skb_unlink(skb, list);
370 __skb_queue_tail(&tmp, skb);
371 }
372 skb = skb_next;
373 }
374 spin_unlock_irqrestore(&list->lock, flags);
375
376 while ((skb = __skb_dequeue(&tmp))) {
377 dev_put(skb->dev);
378 kfree_skb(skb);
379 }
380 }
381
neigh_flush_one(struct neighbour * n)382 static void neigh_flush_one(struct neighbour *n)
383 {
384 hlist_del_rcu(&n->hash);
385 hlist_del_rcu(&n->dev_list);
386
387 write_lock(&n->lock);
388
389 neigh_del_timer(n);
390 neigh_mark_dead(n);
391
392 if (refcount_read(&n->refcnt) != 1) {
393 /* The most unpleasant situation.
394 * We must destroy neighbour entry,
395 * but someone still uses it.
396 *
397 * The destroy will be delayed until
398 * the last user releases us, but
399 * we must kill timers etc. and move
400 * it to safe state.
401 */
402 __skb_queue_purge(&n->arp_queue);
403 n->arp_queue_len_bytes = 0;
404 WRITE_ONCE(n->output, neigh_blackhole);
405
406 if (n->nud_state & NUD_VALID)
407 n->nud_state = NUD_NOARP;
408 else
409 n->nud_state = NUD_NONE;
410
411 neigh_dbg(2, "neigh %p is stray\n", n);
412 }
413
414 write_unlock(&n->lock);
415
416 neigh_cleanup_and_release(n);
417 }
418
neigh_flush_dev(struct neigh_table * tbl,struct net_device * dev,bool skip_perm)419 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
420 bool skip_perm)
421 {
422 struct hlist_head *dev_head;
423 struct hlist_node *tmp;
424 struct neighbour *n;
425
426 dev_head = neigh_get_dev_table(dev, tbl->family);
427
428 hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
429 if (skip_perm &&
430 (n->nud_state & NUD_PERMANENT ||
431 n->flags & NTF_EXT_VALIDATED))
432 continue;
433
434 neigh_flush_one(n);
435 }
436 }
437
neigh_flush_table(struct neigh_table * tbl)438 static void neigh_flush_table(struct neigh_table *tbl)
439 {
440 struct neigh_hash_table *nht;
441 int i;
442
443 nht = rcu_dereference_protected(tbl->nht,
444 lockdep_is_held(&tbl->lock));
445
446 for (i = 0; i < (1 << nht->hash_shift); i++) {
447 struct hlist_node *tmp;
448 struct neighbour *n;
449
450 neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i])
451 neigh_flush_one(n);
452 }
453 }
454
neigh_changeaddr(struct neigh_table * tbl,struct net_device * dev)455 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
456 {
457 write_lock_bh(&tbl->lock);
458 neigh_flush_dev(tbl, dev, false);
459 write_unlock_bh(&tbl->lock);
460 }
461 EXPORT_SYMBOL(neigh_changeaddr);
462
__neigh_ifdown(struct neigh_table * tbl,struct net_device * dev,bool skip_perm)463 static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
464 bool skip_perm)
465 {
466 write_lock_bh(&tbl->lock);
467 if (likely(dev)) {
468 neigh_flush_dev(tbl, dev, skip_perm);
469 } else {
470 DEBUG_NET_WARN_ON_ONCE(skip_perm);
471 neigh_flush_table(tbl);
472 }
473 write_unlock_bh(&tbl->lock);
474
475 pneigh_ifdown(tbl, dev, skip_perm);
476 pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
477 tbl->family);
478 if (skb_queue_empty_lockless(&tbl->proxy_queue))
479 timer_delete_sync(&tbl->proxy_timer);
480 return 0;
481 }
482
neigh_carrier_down(struct neigh_table * tbl,struct net_device * dev)483 int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
484 {
485 __neigh_ifdown(tbl, dev, true);
486 return 0;
487 }
488 EXPORT_SYMBOL(neigh_carrier_down);
489
neigh_ifdown(struct neigh_table * tbl,struct net_device * dev)490 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
491 {
492 __neigh_ifdown(tbl, dev, false);
493 return 0;
494 }
495 EXPORT_SYMBOL(neigh_ifdown);
496
neigh_alloc(struct neigh_table * tbl,struct net_device * dev,u32 flags,bool exempt_from_gc)497 static struct neighbour *neigh_alloc(struct neigh_table *tbl,
498 struct net_device *dev,
499 u32 flags, bool exempt_from_gc)
500 {
501 struct neighbour *n = NULL;
502 unsigned long now = jiffies;
503 int entries, gc_thresh3;
504
505 if (exempt_from_gc)
506 goto do_alloc;
507
508 entries = atomic_inc_return(&tbl->gc_entries) - 1;
509 gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
510 if (entries >= gc_thresh3 ||
511 (entries >= READ_ONCE(tbl->gc_thresh2) &&
512 time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
513 if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
514 net_info_ratelimited("%s: neighbor table overflow!\n",
515 tbl->id);
516 NEIGH_CACHE_STAT_INC(tbl, table_fulls);
517 goto out_entries;
518 }
519 }
520
521 do_alloc:
522 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
523 if (!n)
524 goto out_entries;
525
526 __skb_queue_head_init(&n->arp_queue);
527 rwlock_init(&n->lock);
528 seqlock_init(&n->ha_lock);
529 n->updated = n->used = now;
530 n->nud_state = NUD_NONE;
531 n->output = neigh_blackhole;
532 n->flags = flags;
533 seqlock_init(&n->hh.hh_lock);
534 n->parms = neigh_parms_clone(&tbl->parms);
535 timer_setup(&n->timer, neigh_timer_handler, 0);
536
537 NEIGH_CACHE_STAT_INC(tbl, allocs);
538 n->tbl = tbl;
539 refcount_set(&n->refcnt, 1);
540 n->dead = 1;
541 INIT_LIST_HEAD(&n->gc_list);
542 INIT_LIST_HEAD(&n->managed_list);
543
544 atomic_inc(&tbl->entries);
545 out:
546 return n;
547
548 out_entries:
549 if (!exempt_from_gc)
550 atomic_dec(&tbl->gc_entries);
551 goto out;
552 }
553
neigh_get_hash_rnd(u32 * x)554 static void neigh_get_hash_rnd(u32 *x)
555 {
556 *x = get_random_u32() | 1;
557 }
558
neigh_hash_alloc(unsigned int shift)559 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
560 {
561 size_t size = (1 << shift) * sizeof(struct hlist_head);
562 struct hlist_head *hash_heads;
563 struct neigh_hash_table *ret;
564 int i;
565
566 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
567 if (!ret)
568 return NULL;
569
570 hash_heads = kzalloc(size, GFP_ATOMIC);
571 if (!hash_heads) {
572 kfree(ret);
573 return NULL;
574 }
575 ret->hash_heads = hash_heads;
576 ret->hash_shift = shift;
577 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
578 neigh_get_hash_rnd(&ret->hash_rnd[i]);
579 return ret;
580 }
581
neigh_hash_free_rcu(struct rcu_head * head)582 static void neigh_hash_free_rcu(struct rcu_head *head)
583 {
584 struct neigh_hash_table *nht = container_of(head,
585 struct neigh_hash_table,
586 rcu);
587
588 kfree(nht->hash_heads);
589 kfree(nht);
590 }
591
neigh_hash_grow(struct neigh_table * tbl,unsigned long new_shift)592 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
593 unsigned long new_shift)
594 {
595 unsigned int i, hash;
596 struct neigh_hash_table *new_nht, *old_nht;
597
598 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
599
600 old_nht = rcu_dereference_protected(tbl->nht,
601 lockdep_is_held(&tbl->lock));
602 new_nht = neigh_hash_alloc(new_shift);
603 if (!new_nht)
604 return old_nht;
605
606 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
607 struct hlist_node *tmp;
608 struct neighbour *n;
609
610 neigh_for_each_in_bucket_safe(n, tmp, &old_nht->hash_heads[i]) {
611 hash = tbl->hash(n->primary_key, n->dev,
612 new_nht->hash_rnd);
613
614 hash >>= (32 - new_nht->hash_shift);
615
616 hlist_del_rcu(&n->hash);
617 hlist_add_head_rcu(&n->hash, &new_nht->hash_heads[hash]);
618 }
619 }
620
621 rcu_assign_pointer(tbl->nht, new_nht);
622 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
623 return new_nht;
624 }
625
neigh_lookup(struct neigh_table * tbl,const void * pkey,struct net_device * dev)626 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
627 struct net_device *dev)
628 {
629 struct neighbour *n;
630
631 NEIGH_CACHE_STAT_INC(tbl, lookups);
632
633 rcu_read_lock();
634 n = __neigh_lookup_noref(tbl, pkey, dev);
635 if (n) {
636 if (!refcount_inc_not_zero(&n->refcnt))
637 n = NULL;
638 NEIGH_CACHE_STAT_INC(tbl, hits);
639 }
640
641 rcu_read_unlock();
642 return n;
643 }
644 EXPORT_SYMBOL(neigh_lookup);
645
646 static struct neighbour *
___neigh_create(struct neigh_table * tbl,const void * pkey,struct net_device * dev,u32 flags,bool exempt_from_gc,bool want_ref)647 ___neigh_create(struct neigh_table *tbl, const void *pkey,
648 struct net_device *dev, u32 flags,
649 bool exempt_from_gc, bool want_ref)
650 {
651 u32 hash_val, key_len = tbl->key_len;
652 struct neighbour *n1, *rc, *n;
653 struct neigh_hash_table *nht;
654 int error;
655
656 n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
657 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
658 if (!n) {
659 rc = ERR_PTR(-ENOBUFS);
660 goto out;
661 }
662
663 memcpy(n->primary_key, pkey, key_len);
664 n->dev = dev;
665 netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
666
667 /* Protocol specific setup. */
668 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
669 rc = ERR_PTR(error);
670 goto out_neigh_release;
671 }
672
673 if (dev->netdev_ops->ndo_neigh_construct) {
674 error = dev->netdev_ops->ndo_neigh_construct(dev, n);
675 if (error < 0) {
676 rc = ERR_PTR(error);
677 goto out_neigh_release;
678 }
679 }
680
681 /* Device specific setup. */
682 if (n->parms->neigh_setup &&
683 (error = n->parms->neigh_setup(n)) < 0) {
684 rc = ERR_PTR(error);
685 goto out_neigh_release;
686 }
687
688 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
689
690 write_lock_bh(&tbl->lock);
691 nht = rcu_dereference_protected(tbl->nht,
692 lockdep_is_held(&tbl->lock));
693
694 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
695 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
696
697 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
698
699 if (n->parms->dead) {
700 rc = ERR_PTR(-EINVAL);
701 goto out_tbl_unlock;
702 }
703
704 neigh_for_each_in_bucket(n1, &nht->hash_heads[hash_val]) {
705 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
706 if (want_ref)
707 neigh_hold(n1);
708 rc = n1;
709 goto out_tbl_unlock;
710 }
711 }
712
713 n->dead = 0;
714 if (!exempt_from_gc)
715 list_add_tail(&n->gc_list, &n->tbl->gc_list);
716 if (n->flags & NTF_MANAGED)
717 list_add_tail(&n->managed_list, &n->tbl->managed_list);
718 if (want_ref)
719 neigh_hold(n);
720 hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
721
722 hlist_add_head_rcu(&n->dev_list,
723 neigh_get_dev_table(dev, tbl->family));
724
725 write_unlock_bh(&tbl->lock);
726 neigh_dbg(2, "neigh %p is created\n", n);
727 rc = n;
728 out:
729 return rc;
730 out_tbl_unlock:
731 write_unlock_bh(&tbl->lock);
732 out_neigh_release:
733 if (!exempt_from_gc)
734 atomic_dec(&tbl->gc_entries);
735 neigh_release(n);
736 goto out;
737 }
738
__neigh_create(struct neigh_table * tbl,const void * pkey,struct net_device * dev,bool want_ref)739 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
740 struct net_device *dev, bool want_ref)
741 {
742 bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
743
744 return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
745 }
746 EXPORT_SYMBOL(__neigh_create);
747
pneigh_hash(const void * pkey,unsigned int key_len)748 static u32 pneigh_hash(const void *pkey, unsigned int key_len)
749 {
750 u32 hash_val = *(u32 *)(pkey + key_len - 4);
751 hash_val ^= (hash_val >> 16);
752 hash_val ^= hash_val >> 8;
753 hash_val ^= hash_val >> 4;
754 hash_val &= PNEIGH_HASHMASK;
755 return hash_val;
756 }
757
pneigh_lookup(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev)758 struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
759 struct net *net, const void *pkey,
760 struct net_device *dev)
761 {
762 struct pneigh_entry *n;
763 unsigned int key_len;
764 u32 hash_val;
765
766 key_len = tbl->key_len;
767 hash_val = pneigh_hash(pkey, key_len);
768 n = rcu_dereference_check(tbl->phash_buckets[hash_val],
769 lockdep_is_held(&tbl->phash_lock));
770
771 while (n) {
772 if (!memcmp(n->key, pkey, key_len) &&
773 net_eq(pneigh_net(n), net) &&
774 (n->dev == dev || !n->dev))
775 return n;
776
777 n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
778 }
779
780 return NULL;
781 }
782 EXPORT_IPV6_MOD(pneigh_lookup);
783
pneigh_create(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev,u32 flags,u8 protocol,bool permanent)784 int pneigh_create(struct neigh_table *tbl, struct net *net,
785 const void *pkey, struct net_device *dev,
786 u32 flags, u8 protocol, bool permanent)
787 {
788 struct pneigh_entry *n;
789 unsigned int key_len;
790 u32 hash_val;
791 int err = 0;
792
793 mutex_lock(&tbl->phash_lock);
794
795 n = pneigh_lookup(tbl, net, pkey, dev);
796 if (n)
797 goto update;
798
799 key_len = tbl->key_len;
800 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
801 if (!n) {
802 err = -ENOBUFS;
803 goto out;
804 }
805
806 write_pnet(&n->net, net);
807 memcpy(n->key, pkey, key_len);
808 n->dev = dev;
809 netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
810
811 if (tbl->pconstructor && tbl->pconstructor(n)) {
812 netdev_put(dev, &n->dev_tracker);
813 kfree(n);
814 err = -ENOBUFS;
815 goto out;
816 }
817
818 hash_val = pneigh_hash(pkey, key_len);
819 n->next = tbl->phash_buckets[hash_val];
820 rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
821 update:
822 WRITE_ONCE(n->flags, flags);
823 n->permanent = permanent;
824 WRITE_ONCE(n->protocol, protocol);
825 out:
826 mutex_unlock(&tbl->phash_lock);
827 return err;
828 }
829
pneigh_destroy(struct rcu_head * rcu)830 static void pneigh_destroy(struct rcu_head *rcu)
831 {
832 struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
833
834 netdev_put(n->dev, &n->dev_tracker);
835 kfree(n);
836 }
837
pneigh_delete(struct neigh_table * tbl,struct net * net,const void * pkey,struct net_device * dev)838 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
839 struct net_device *dev)
840 {
841 struct pneigh_entry *n, __rcu **np;
842 unsigned int key_len;
843 u32 hash_val;
844
845 key_len = tbl->key_len;
846 hash_val = pneigh_hash(pkey, key_len);
847
848 mutex_lock(&tbl->phash_lock);
849
850 for (np = &tbl->phash_buckets[hash_val];
851 (n = rcu_dereference_protected(*np, 1)) != NULL;
852 np = &n->next) {
853 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
854 net_eq(pneigh_net(n), net)) {
855 rcu_assign_pointer(*np, n->next);
856
857 mutex_unlock(&tbl->phash_lock);
858
859 if (tbl->pdestructor)
860 tbl->pdestructor(n);
861
862 call_rcu(&n->rcu, pneigh_destroy);
863 return 0;
864 }
865 }
866
867 mutex_unlock(&tbl->phash_lock);
868 return -ENOENT;
869 }
870
pneigh_ifdown(struct neigh_table * tbl,struct net_device * dev,bool skip_perm)871 static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
872 bool skip_perm)
873 {
874 struct pneigh_entry *n, __rcu **np;
875 LIST_HEAD(head);
876 u32 h;
877
878 mutex_lock(&tbl->phash_lock);
879
880 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
881 np = &tbl->phash_buckets[h];
882 while ((n = rcu_dereference_protected(*np, 1)) != NULL) {
883 if (skip_perm && n->permanent)
884 goto skip;
885 if (!dev || n->dev == dev) {
886 rcu_assign_pointer(*np, n->next);
887 list_add(&n->free_node, &head);
888 continue;
889 }
890 skip:
891 np = &n->next;
892 }
893 }
894
895 mutex_unlock(&tbl->phash_lock);
896
897 while (!list_empty(&head)) {
898 n = list_first_entry(&head, typeof(*n), free_node);
899 list_del(&n->free_node);
900
901 if (tbl->pdestructor)
902 tbl->pdestructor(n);
903
904 call_rcu(&n->rcu, pneigh_destroy);
905 }
906 }
907
neigh_parms_put(struct neigh_parms * parms)908 static inline void neigh_parms_put(struct neigh_parms *parms)
909 {
910 if (refcount_dec_and_test(&parms->refcnt))
911 kfree(parms);
912 }
913
914 /*
915 * neighbour must already be out of the table;
916 *
917 */
neigh_destroy(struct neighbour * neigh)918 void neigh_destroy(struct neighbour *neigh)
919 {
920 struct net_device *dev = neigh->dev;
921
922 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
923
924 if (!neigh->dead) {
925 pr_warn("Destroying alive neighbour %p\n", neigh);
926 dump_stack();
927 return;
928 }
929
930 if (neigh_del_timer(neigh))
931 pr_warn("Impossible event\n");
932
933 write_lock_bh(&neigh->lock);
934 __skb_queue_purge(&neigh->arp_queue);
935 write_unlock_bh(&neigh->lock);
936 neigh->arp_queue_len_bytes = 0;
937
938 if (dev->netdev_ops->ndo_neigh_destroy)
939 dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
940
941 netdev_put(dev, &neigh->dev_tracker);
942 neigh_parms_put(neigh->parms);
943
944 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
945
946 atomic_dec(&neigh->tbl->entries);
947 kfree_rcu(neigh, rcu);
948 }
949 EXPORT_SYMBOL(neigh_destroy);
950
951 /* Neighbour state is suspicious;
952 disable fast path.
953
954 Called with write_locked neigh.
955 */
neigh_suspect(struct neighbour * neigh)956 static void neigh_suspect(struct neighbour *neigh)
957 {
958 neigh_dbg(2, "neigh %p is suspected\n", neigh);
959
960 WRITE_ONCE(neigh->output, neigh->ops->output);
961 }
962
963 /* Neighbour state is OK;
964 enable fast path.
965
966 Called with write_locked neigh.
967 */
neigh_connect(struct neighbour * neigh)968 static void neigh_connect(struct neighbour *neigh)
969 {
970 neigh_dbg(2, "neigh %p is connected\n", neigh);
971
972 WRITE_ONCE(neigh->output, neigh->ops->connected_output);
973 }
974
neigh_periodic_work(struct work_struct * work)975 static void neigh_periodic_work(struct work_struct *work)
976 {
977 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
978 struct neigh_hash_table *nht;
979 struct hlist_node *tmp;
980 struct neighbour *n;
981 unsigned int i;
982
983 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
984
985 write_lock_bh(&tbl->lock);
986 nht = rcu_dereference_protected(tbl->nht,
987 lockdep_is_held(&tbl->lock));
988
989 /*
990 * periodically recompute ReachableTime from random function
991 */
992
993 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
994 struct neigh_parms *p;
995
996 WRITE_ONCE(tbl->last_rand, jiffies);
997 list_for_each_entry(p, &tbl->parms_list, list)
998 p->reachable_time =
999 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1000 }
1001
1002 if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
1003 goto out;
1004
1005 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
1006 neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
1007 unsigned int state;
1008
1009 write_lock(&n->lock);
1010
1011 state = n->nud_state;
1012 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
1013 (n->flags &
1014 (NTF_EXT_LEARNED | NTF_EXT_VALIDATED))) {
1015 write_unlock(&n->lock);
1016 continue;
1017 }
1018
1019 if (time_before(n->used, n->confirmed) &&
1020 time_is_before_eq_jiffies(n->confirmed))
1021 n->used = n->confirmed;
1022
1023 if (refcount_read(&n->refcnt) == 1 &&
1024 (state == NUD_FAILED ||
1025 !time_in_range_open(jiffies, n->used,
1026 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
1027 hlist_del_rcu(&n->hash);
1028 hlist_del_rcu(&n->dev_list);
1029 neigh_mark_dead(n);
1030 write_unlock(&n->lock);
1031 neigh_cleanup_and_release(n);
1032 continue;
1033 }
1034 write_unlock(&n->lock);
1035 }
1036 /*
1037 * It's fine to release lock here, even if hash table
1038 * grows while we are preempted.
1039 */
1040 write_unlock_bh(&tbl->lock);
1041 cond_resched();
1042 write_lock_bh(&tbl->lock);
1043 nht = rcu_dereference_protected(tbl->nht,
1044 lockdep_is_held(&tbl->lock));
1045 }
1046 out:
1047 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
1048 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
1049 * BASE_REACHABLE_TIME.
1050 */
1051 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1052 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
1053 write_unlock_bh(&tbl->lock);
1054 }
1055
neigh_max_probes(struct neighbour * n)1056 static __inline__ int neigh_max_probes(struct neighbour *n)
1057 {
1058 struct neigh_parms *p = n->parms;
1059 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1060 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1061 NEIGH_VAR(p, MCAST_PROBES));
1062 }
1063
neigh_invalidate(struct neighbour * neigh)1064 static void neigh_invalidate(struct neighbour *neigh)
1065 __releases(neigh->lock)
1066 __acquires(neigh->lock)
1067 {
1068 struct sk_buff *skb;
1069
1070 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1071 neigh_dbg(2, "neigh %p is failed\n", neigh);
1072 neigh->updated = jiffies;
1073
1074 /* It is very thin place. report_unreachable is very complicated
1075 routine. Particularly, it can hit the same neighbour entry!
1076
1077 So that, we try to be accurate and avoid dead loop. --ANK
1078 */
1079 while (neigh->nud_state == NUD_FAILED &&
1080 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1081 write_unlock(&neigh->lock);
1082 neigh->ops->error_report(neigh, skb);
1083 write_lock(&neigh->lock);
1084 }
1085 __skb_queue_purge(&neigh->arp_queue);
1086 neigh->arp_queue_len_bytes = 0;
1087 }
1088
neigh_probe(struct neighbour * neigh)1089 static void neigh_probe(struct neighbour *neigh)
1090 __releases(neigh->lock)
1091 {
1092 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1093 /* keep skb alive even if arp_queue overflows */
1094 if (skb)
1095 skb = skb_clone(skb, GFP_ATOMIC);
1096 write_unlock(&neigh->lock);
1097 if (neigh->ops->solicit)
1098 neigh->ops->solicit(neigh, skb);
1099 atomic_inc(&neigh->probes);
1100 consume_skb(skb);
1101 }
1102
1103 /* Called when a timer expires for a neighbour entry. */
1104
neigh_timer_handler(struct timer_list * t)1105 static void neigh_timer_handler(struct timer_list *t)
1106 {
1107 unsigned long now, next;
1108 struct neighbour *neigh = timer_container_of(neigh, t, timer);
1109 unsigned int state;
1110 int notify = 0;
1111
1112 write_lock(&neigh->lock);
1113
1114 state = neigh->nud_state;
1115 now = jiffies;
1116 next = now + HZ;
1117
1118 if (!(state & NUD_IN_TIMER))
1119 goto out;
1120
1121 if (state & NUD_REACHABLE) {
1122 if (time_before_eq(now,
1123 neigh->confirmed + neigh->parms->reachable_time)) {
1124 neigh_dbg(2, "neigh %p is still alive\n", neigh);
1125 next = neigh->confirmed + neigh->parms->reachable_time;
1126 } else if (time_before_eq(now,
1127 neigh->used +
1128 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1129 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1130 WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1131 neigh->updated = jiffies;
1132 neigh_suspect(neigh);
1133 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1134 } else {
1135 neigh_dbg(2, "neigh %p is suspected\n", neigh);
1136 WRITE_ONCE(neigh->nud_state, NUD_STALE);
1137 neigh->updated = jiffies;
1138 neigh_suspect(neigh);
1139 notify = 1;
1140 }
1141 } else if (state & NUD_DELAY) {
1142 if (time_before_eq(now,
1143 neigh->confirmed +
1144 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1145 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1146 WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
1147 neigh->updated = jiffies;
1148 neigh_connect(neigh);
1149 notify = 1;
1150 next = neigh->confirmed + neigh->parms->reachable_time;
1151 } else {
1152 neigh_dbg(2, "neigh %p is probed\n", neigh);
1153 WRITE_ONCE(neigh->nud_state, NUD_PROBE);
1154 neigh->updated = jiffies;
1155 atomic_set(&neigh->probes, 0);
1156 notify = 1;
1157 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1158 HZ/100);
1159 }
1160 } else {
1161 /* NUD_PROBE|NUD_INCOMPLETE */
1162 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1163 }
1164
1165 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1166 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1167 if (neigh->nud_state == NUD_PROBE &&
1168 neigh->flags & NTF_EXT_VALIDATED) {
1169 WRITE_ONCE(neigh->nud_state, NUD_STALE);
1170 neigh->updated = jiffies;
1171 } else {
1172 WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1173 neigh_invalidate(neigh);
1174 }
1175 notify = 1;
1176 goto out;
1177 }
1178
1179 if (neigh->nud_state & NUD_IN_TIMER) {
1180 if (time_before(next, jiffies + HZ/100))
1181 next = jiffies + HZ/100;
1182 if (!mod_timer(&neigh->timer, next))
1183 neigh_hold(neigh);
1184 }
1185 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1186 neigh_probe(neigh);
1187 } else {
1188 out:
1189 write_unlock(&neigh->lock);
1190 }
1191
1192 if (notify)
1193 neigh_update_notify(neigh, 0);
1194
1195 trace_neigh_timer_handler(neigh, 0);
1196
1197 neigh_release(neigh);
1198 }
1199
__neigh_event_send(struct neighbour * neigh,struct sk_buff * skb,const bool immediate_ok)1200 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1201 const bool immediate_ok)
1202 {
1203 int rc;
1204 bool immediate_probe = false;
1205
1206 write_lock_bh(&neigh->lock);
1207
1208 rc = 0;
1209 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1210 goto out_unlock_bh;
1211 if (neigh->dead)
1212 goto out_dead;
1213
1214 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1215 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1216 NEIGH_VAR(neigh->parms, APP_PROBES)) {
1217 unsigned long next, now = jiffies;
1218
1219 atomic_set(&neigh->probes,
1220 NEIGH_VAR(neigh->parms, UCAST_PROBES));
1221 neigh_del_timer(neigh);
1222 WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1223 neigh->updated = now;
1224 if (!immediate_ok) {
1225 next = now + 1;
1226 } else {
1227 immediate_probe = true;
1228 next = now + max(NEIGH_VAR(neigh->parms,
1229 RETRANS_TIME),
1230 HZ / 100);
1231 }
1232 neigh_add_timer(neigh, next);
1233 } else {
1234 WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1235 neigh->updated = jiffies;
1236 write_unlock_bh(&neigh->lock);
1237
1238 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1239 return 1;
1240 }
1241 } else if (neigh->nud_state & NUD_STALE) {
1242 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1243 neigh_del_timer(neigh);
1244 WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1245 neigh->updated = jiffies;
1246 neigh_add_timer(neigh, jiffies +
1247 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1248 }
1249
1250 if (neigh->nud_state == NUD_INCOMPLETE) {
1251 if (skb) {
1252 while (neigh->arp_queue_len_bytes + skb->truesize >
1253 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1254 struct sk_buff *buff;
1255
1256 buff = __skb_dequeue(&neigh->arp_queue);
1257 if (!buff)
1258 break;
1259 neigh->arp_queue_len_bytes -= buff->truesize;
1260 kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1261 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1262 }
1263 skb_dst_force(skb);
1264 __skb_queue_tail(&neigh->arp_queue, skb);
1265 neigh->arp_queue_len_bytes += skb->truesize;
1266 }
1267 rc = 1;
1268 }
1269 out_unlock_bh:
1270 if (immediate_probe)
1271 neigh_probe(neigh);
1272 else
1273 write_unlock(&neigh->lock);
1274 local_bh_enable();
1275 trace_neigh_event_send_done(neigh, rc);
1276 return rc;
1277
1278 out_dead:
1279 if (neigh->nud_state & NUD_STALE)
1280 goto out_unlock_bh;
1281 write_unlock_bh(&neigh->lock);
1282 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1283 trace_neigh_event_send_dead(neigh, 1);
1284 return 1;
1285 }
1286 EXPORT_SYMBOL(__neigh_event_send);
1287
neigh_update_hhs(struct neighbour * neigh)1288 static void neigh_update_hhs(struct neighbour *neigh)
1289 {
1290 struct hh_cache *hh;
1291 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1292 = NULL;
1293
1294 if (neigh->dev->header_ops)
1295 update = neigh->dev->header_ops->cache_update;
1296
1297 if (update) {
1298 hh = &neigh->hh;
1299 if (READ_ONCE(hh->hh_len)) {
1300 write_seqlock_bh(&hh->hh_lock);
1301 update(hh, neigh->dev, neigh->ha);
1302 write_sequnlock_bh(&hh->hh_lock);
1303 }
1304 }
1305 }
1306
1307 /* Generic update routine.
1308 -- lladdr is new lladdr or NULL, if it is not supplied.
1309 -- new is new state.
1310 -- flags
1311 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1312 if it is different.
1313 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1314 lladdr instead of overriding it
1315 if it is different.
1316 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1317 NEIGH_UPDATE_F_USE means that the entry is user triggered.
1318 NEIGH_UPDATE_F_MANAGED means that the entry will be auto-refreshed.
1319 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1320 NTF_ROUTER flag.
1321 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1322 a router.
1323 NEIGH_UPDATE_F_EXT_VALIDATED means that the entry will not be removed
1324 or invalidated.
1325
1326 Caller MUST hold reference count on the entry.
1327 */
__neigh_update(struct neighbour * neigh,const u8 * lladdr,u8 new,u32 flags,u32 nlmsg_pid,struct netlink_ext_ack * extack)1328 static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1329 u8 new, u32 flags, u32 nlmsg_pid,
1330 struct netlink_ext_ack *extack)
1331 {
1332 bool gc_update = false, managed_update = false;
1333 int update_isrouter = 0;
1334 struct net_device *dev;
1335 int err, notify = 0;
1336 u8 old;
1337
1338 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1339
1340 write_lock_bh(&neigh->lock);
1341
1342 dev = neigh->dev;
1343 old = neigh->nud_state;
1344 err = -EPERM;
1345
1346 if (neigh->dead) {
1347 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1348 new = old;
1349 goto out;
1350 }
1351 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1352 (old & (NUD_NOARP | NUD_PERMANENT)))
1353 goto out;
1354
1355 neigh_update_flags(neigh, flags, ¬ify, &gc_update, &managed_update);
1356 if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1357 new = old & ~NUD_PERMANENT;
1358 WRITE_ONCE(neigh->nud_state, new);
1359 err = 0;
1360 goto out;
1361 }
1362
1363 if (!(new & NUD_VALID)) {
1364 neigh_del_timer(neigh);
1365 if (old & NUD_CONNECTED)
1366 neigh_suspect(neigh);
1367 WRITE_ONCE(neigh->nud_state, new);
1368 err = 0;
1369 notify = old & NUD_VALID;
1370 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1371 (new & NUD_FAILED)) {
1372 neigh_invalidate(neigh);
1373 notify = 1;
1374 }
1375 goto out;
1376 }
1377
1378 /* Compare new lladdr with cached one */
1379 if (!dev->addr_len) {
1380 /* First case: device needs no address. */
1381 lladdr = neigh->ha;
1382 } else if (lladdr) {
1383 /* The second case: if something is already cached
1384 and a new address is proposed:
1385 - compare new & old
1386 - if they are different, check override flag
1387 */
1388 if ((old & NUD_VALID) &&
1389 !memcmp(lladdr, neigh->ha, dev->addr_len))
1390 lladdr = neigh->ha;
1391 } else {
1392 /* No address is supplied; if we know something,
1393 use it, otherwise discard the request.
1394 */
1395 err = -EINVAL;
1396 if (!(old & NUD_VALID)) {
1397 NL_SET_ERR_MSG(extack, "No link layer address given");
1398 goto out;
1399 }
1400 lladdr = neigh->ha;
1401 }
1402
1403 /* Update confirmed timestamp for neighbour entry after we
1404 * received ARP packet even if it doesn't change IP to MAC binding.
1405 */
1406 if (new & NUD_CONNECTED)
1407 neigh->confirmed = jiffies;
1408
1409 /* If entry was valid and address is not changed,
1410 do not change entry state, if new one is STALE.
1411 */
1412 err = 0;
1413 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1414 if (old & NUD_VALID) {
1415 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1416 update_isrouter = 0;
1417 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1418 (old & NUD_CONNECTED)) {
1419 lladdr = neigh->ha;
1420 new = NUD_STALE;
1421 } else
1422 goto out;
1423 } else {
1424 if (lladdr == neigh->ha && new == NUD_STALE &&
1425 !(flags & NEIGH_UPDATE_F_ADMIN))
1426 new = old;
1427 }
1428 }
1429
1430 /* Update timestamp only once we know we will make a change to the
1431 * neighbour entry. Otherwise we risk to move the locktime window with
1432 * noop updates and ignore relevant ARP updates.
1433 */
1434 if (new != old || lladdr != neigh->ha)
1435 neigh->updated = jiffies;
1436
1437 if (new != old) {
1438 neigh_del_timer(neigh);
1439 if (new & NUD_PROBE)
1440 atomic_set(&neigh->probes, 0);
1441 if (new & NUD_IN_TIMER)
1442 neigh_add_timer(neigh, (jiffies +
1443 ((new & NUD_REACHABLE) ?
1444 neigh->parms->reachable_time :
1445 0)));
1446 WRITE_ONCE(neigh->nud_state, new);
1447 notify = 1;
1448 }
1449
1450 if (lladdr != neigh->ha) {
1451 write_seqlock(&neigh->ha_lock);
1452 memcpy(&neigh->ha, lladdr, dev->addr_len);
1453 write_sequnlock(&neigh->ha_lock);
1454 neigh_update_hhs(neigh);
1455 if (!(new & NUD_CONNECTED))
1456 neigh->confirmed = jiffies -
1457 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1458 notify = 1;
1459 }
1460 if (new == old)
1461 goto out;
1462 if (new & NUD_CONNECTED)
1463 neigh_connect(neigh);
1464 else
1465 neigh_suspect(neigh);
1466 if (!(old & NUD_VALID)) {
1467 struct sk_buff *skb;
1468
1469 /* Again: avoid dead loop if something went wrong */
1470
1471 while (neigh->nud_state & NUD_VALID &&
1472 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1473 struct dst_entry *dst = skb_dst(skb);
1474 struct neighbour *n2, *n1 = neigh;
1475 write_unlock_bh(&neigh->lock);
1476
1477 rcu_read_lock();
1478
1479 /* Why not just use 'neigh' as-is? The problem is that
1480 * things such as shaper, eql, and sch_teql can end up
1481 * using alternative, different, neigh objects to output
1482 * the packet in the output path. So what we need to do
1483 * here is re-lookup the top-level neigh in the path so
1484 * we can reinject the packet there.
1485 */
1486 n2 = NULL;
1487 if (dst &&
1488 READ_ONCE(dst->obsolete) != DST_OBSOLETE_DEAD) {
1489 n2 = dst_neigh_lookup_skb(dst, skb);
1490 if (n2)
1491 n1 = n2;
1492 }
1493 READ_ONCE(n1->output)(n1, skb);
1494 if (n2)
1495 neigh_release(n2);
1496 rcu_read_unlock();
1497
1498 write_lock_bh(&neigh->lock);
1499 }
1500 __skb_queue_purge(&neigh->arp_queue);
1501 neigh->arp_queue_len_bytes = 0;
1502 }
1503 out:
1504 if (update_isrouter)
1505 neigh_update_is_router(neigh, flags, ¬ify);
1506 write_unlock_bh(&neigh->lock);
1507 if (((new ^ old) & NUD_PERMANENT) || gc_update)
1508 neigh_update_gc_list(neigh);
1509 if (managed_update)
1510 neigh_update_managed_list(neigh);
1511 if (notify)
1512 neigh_update_notify(neigh, nlmsg_pid);
1513 trace_neigh_update_done(neigh, err);
1514 return err;
1515 }
1516
neigh_update(struct neighbour * neigh,const u8 * lladdr,u8 new,u32 flags,u32 nlmsg_pid)1517 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1518 u32 flags, u32 nlmsg_pid)
1519 {
1520 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1521 }
1522 EXPORT_SYMBOL(neigh_update);
1523
1524 /* Update the neigh to listen temporarily for probe responses, even if it is
1525 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1526 */
__neigh_set_probe_once(struct neighbour * neigh)1527 void __neigh_set_probe_once(struct neighbour *neigh)
1528 {
1529 if (neigh->dead)
1530 return;
1531 neigh->updated = jiffies;
1532 if (!(neigh->nud_state & NUD_FAILED))
1533 return;
1534 WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1535 atomic_set(&neigh->probes, neigh_max_probes(neigh));
1536 neigh_add_timer(neigh,
1537 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1538 HZ/100));
1539 }
1540 EXPORT_SYMBOL(__neigh_set_probe_once);
1541
neigh_event_ns(struct neigh_table * tbl,u8 * lladdr,void * saddr,struct net_device * dev)1542 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1543 u8 *lladdr, void *saddr,
1544 struct net_device *dev)
1545 {
1546 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1547 lladdr || !dev->addr_len);
1548 if (neigh)
1549 neigh_update(neigh, lladdr, NUD_STALE,
1550 NEIGH_UPDATE_F_OVERRIDE, 0);
1551 return neigh;
1552 }
1553 EXPORT_SYMBOL(neigh_event_ns);
1554
1555 /* called with read_lock_bh(&n->lock); */
neigh_hh_init(struct neighbour * n)1556 static void neigh_hh_init(struct neighbour *n)
1557 {
1558 struct net_device *dev = n->dev;
1559 __be16 prot = n->tbl->protocol;
1560 struct hh_cache *hh = &n->hh;
1561
1562 write_lock_bh(&n->lock);
1563
1564 /* Only one thread can come in here and initialize the
1565 * hh_cache entry.
1566 */
1567 if (!hh->hh_len)
1568 dev->header_ops->cache(n, hh, prot);
1569
1570 write_unlock_bh(&n->lock);
1571 }
1572
1573 /* Slow and careful. */
1574
neigh_resolve_output(struct neighbour * neigh,struct sk_buff * skb)1575 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1576 {
1577 int rc = 0;
1578
1579 if (!neigh_event_send(neigh, skb)) {
1580 int err;
1581 struct net_device *dev = neigh->dev;
1582 unsigned int seq;
1583
1584 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1585 neigh_hh_init(neigh);
1586
1587 do {
1588 __skb_pull(skb, skb_network_offset(skb));
1589 seq = read_seqbegin(&neigh->ha_lock);
1590 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1591 neigh->ha, NULL, skb->len);
1592 } while (read_seqretry(&neigh->ha_lock, seq));
1593
1594 if (err >= 0)
1595 rc = dev_queue_xmit(skb);
1596 else
1597 goto out_kfree_skb;
1598 }
1599 out:
1600 return rc;
1601 out_kfree_skb:
1602 rc = -EINVAL;
1603 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1604 goto out;
1605 }
1606 EXPORT_SYMBOL(neigh_resolve_output);
1607
1608 /* As fast as possible without hh cache */
1609
neigh_connected_output(struct neighbour * neigh,struct sk_buff * skb)1610 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1611 {
1612 struct net_device *dev = neigh->dev;
1613 unsigned int seq;
1614 int err;
1615
1616 do {
1617 __skb_pull(skb, skb_network_offset(skb));
1618 seq = read_seqbegin(&neigh->ha_lock);
1619 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1620 neigh->ha, NULL, skb->len);
1621 } while (read_seqretry(&neigh->ha_lock, seq));
1622
1623 if (err >= 0)
1624 err = dev_queue_xmit(skb);
1625 else {
1626 err = -EINVAL;
1627 kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_HH_FILLFAIL);
1628 }
1629 return err;
1630 }
1631 EXPORT_SYMBOL(neigh_connected_output);
1632
neigh_direct_output(struct neighbour * neigh,struct sk_buff * skb)1633 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1634 {
1635 return dev_queue_xmit(skb);
1636 }
1637 EXPORT_SYMBOL(neigh_direct_output);
1638
neigh_managed_work(struct work_struct * work)1639 static void neigh_managed_work(struct work_struct *work)
1640 {
1641 struct neigh_table *tbl = container_of(work, struct neigh_table,
1642 managed_work.work);
1643 struct neighbour *neigh;
1644
1645 write_lock_bh(&tbl->lock);
1646 list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1647 neigh_event_send_probe(neigh, NULL, false);
1648 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1649 NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1650 write_unlock_bh(&tbl->lock);
1651 }
1652
neigh_proxy_process(struct timer_list * t)1653 static void neigh_proxy_process(struct timer_list *t)
1654 {
1655 struct neigh_table *tbl = timer_container_of(tbl, t, proxy_timer);
1656 long sched_next = 0;
1657 unsigned long now = jiffies;
1658 struct sk_buff *skb, *n;
1659
1660 spin_lock(&tbl->proxy_queue.lock);
1661
1662 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1663 long tdif = NEIGH_CB(skb)->sched_next - now;
1664
1665 if (tdif <= 0) {
1666 struct net_device *dev = skb->dev;
1667
1668 neigh_parms_qlen_dec(dev, tbl->family);
1669 __skb_unlink(skb, &tbl->proxy_queue);
1670
1671 if (tbl->proxy_redo && netif_running(dev)) {
1672 rcu_read_lock();
1673 tbl->proxy_redo(skb);
1674 rcu_read_unlock();
1675 } else {
1676 kfree_skb(skb);
1677 }
1678
1679 dev_put(dev);
1680 } else if (!sched_next || tdif < sched_next)
1681 sched_next = tdif;
1682 }
1683 timer_delete(&tbl->proxy_timer);
1684 if (sched_next)
1685 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1686 spin_unlock(&tbl->proxy_queue.lock);
1687 }
1688
neigh_proxy_delay(struct neigh_parms * p)1689 static unsigned long neigh_proxy_delay(struct neigh_parms *p)
1690 {
1691 /* If proxy_delay is zero, do not call get_random_u32_below()
1692 * as it is undefined behavior.
1693 */
1694 unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
1695
1696 return proxy_delay ?
1697 jiffies + get_random_u32_below(proxy_delay) : jiffies;
1698 }
1699
pneigh_enqueue(struct neigh_table * tbl,struct neigh_parms * p,struct sk_buff * skb)1700 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1701 struct sk_buff *skb)
1702 {
1703 unsigned long sched_next = neigh_proxy_delay(p);
1704
1705 if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1706 kfree_skb(skb);
1707 return;
1708 }
1709
1710 NEIGH_CB(skb)->sched_next = sched_next;
1711 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1712
1713 spin_lock(&tbl->proxy_queue.lock);
1714 if (timer_delete(&tbl->proxy_timer)) {
1715 if (time_before(tbl->proxy_timer.expires, sched_next))
1716 sched_next = tbl->proxy_timer.expires;
1717 }
1718 skb_dst_drop(skb);
1719 dev_hold(skb->dev);
1720 __skb_queue_tail(&tbl->proxy_queue, skb);
1721 p->qlen++;
1722 mod_timer(&tbl->proxy_timer, sched_next);
1723 spin_unlock(&tbl->proxy_queue.lock);
1724 }
1725 EXPORT_SYMBOL(pneigh_enqueue);
1726
lookup_neigh_parms(struct neigh_table * tbl,struct net * net,int ifindex)1727 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1728 struct net *net, int ifindex)
1729 {
1730 struct neigh_parms *p;
1731
1732 list_for_each_entry(p, &tbl->parms_list, list) {
1733 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1734 (!p->dev && !ifindex && net_eq(net, &init_net)))
1735 return p;
1736 }
1737
1738 return NULL;
1739 }
1740
neigh_parms_alloc(struct net_device * dev,struct neigh_table * tbl)1741 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1742 struct neigh_table *tbl)
1743 {
1744 struct neigh_parms *p;
1745 struct net *net = dev_net(dev);
1746 const struct net_device_ops *ops = dev->netdev_ops;
1747
1748 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1749 if (p) {
1750 p->tbl = tbl;
1751 refcount_set(&p->refcnt, 1);
1752 p->reachable_time =
1753 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1754 p->qlen = 0;
1755 netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1756 p->dev = dev;
1757 write_pnet(&p->net, net);
1758 p->sysctl_table = NULL;
1759
1760 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1761 netdev_put(dev, &p->dev_tracker);
1762 kfree(p);
1763 return NULL;
1764 }
1765
1766 write_lock_bh(&tbl->lock);
1767 list_add(&p->list, &tbl->parms.list);
1768 write_unlock_bh(&tbl->lock);
1769
1770 neigh_parms_data_state_cleanall(p);
1771 }
1772 return p;
1773 }
1774 EXPORT_SYMBOL(neigh_parms_alloc);
1775
neigh_rcu_free_parms(struct rcu_head * head)1776 static void neigh_rcu_free_parms(struct rcu_head *head)
1777 {
1778 struct neigh_parms *parms =
1779 container_of(head, struct neigh_parms, rcu_head);
1780
1781 neigh_parms_put(parms);
1782 }
1783
neigh_parms_release(struct neigh_table * tbl,struct neigh_parms * parms)1784 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1785 {
1786 if (!parms || parms == &tbl->parms)
1787 return;
1788 write_lock_bh(&tbl->lock);
1789 list_del(&parms->list);
1790 parms->dead = 1;
1791 write_unlock_bh(&tbl->lock);
1792 netdev_put(parms->dev, &parms->dev_tracker);
1793 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1794 }
1795 EXPORT_SYMBOL(neigh_parms_release);
1796
1797 static struct lock_class_key neigh_table_proxy_queue_class;
1798
1799 static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1800
neigh_table_init(int index,struct neigh_table * tbl)1801 void neigh_table_init(int index, struct neigh_table *tbl)
1802 {
1803 unsigned long now = jiffies;
1804 unsigned long phsize;
1805
1806 INIT_LIST_HEAD(&tbl->parms_list);
1807 INIT_LIST_HEAD(&tbl->gc_list);
1808 INIT_LIST_HEAD(&tbl->managed_list);
1809
1810 list_add(&tbl->parms.list, &tbl->parms_list);
1811 write_pnet(&tbl->parms.net, &init_net);
1812 refcount_set(&tbl->parms.refcnt, 1);
1813 tbl->parms.reachable_time =
1814 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1815 tbl->parms.qlen = 0;
1816
1817 tbl->stats = alloc_percpu(struct neigh_statistics);
1818 if (!tbl->stats)
1819 panic("cannot create neighbour cache statistics");
1820
1821 #ifdef CONFIG_PROC_FS
1822 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1823 &neigh_stat_seq_ops, tbl))
1824 panic("cannot create neighbour proc dir entry");
1825 #endif
1826
1827 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1828
1829 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1830 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1831
1832 if (!tbl->nht || !tbl->phash_buckets)
1833 panic("cannot allocate neighbour cache hashes");
1834
1835 if (!tbl->entry_size)
1836 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1837 tbl->key_len, NEIGH_PRIV_ALIGN);
1838 else
1839 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1840
1841 rwlock_init(&tbl->lock);
1842 mutex_init(&tbl->phash_lock);
1843
1844 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1845 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1846 tbl->parms.reachable_time);
1847 INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1848 queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1849
1850 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1851 skb_queue_head_init_class(&tbl->proxy_queue,
1852 &neigh_table_proxy_queue_class);
1853
1854 tbl->last_flush = now;
1855 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1856
1857 rcu_assign_pointer(neigh_tables[index], tbl);
1858 }
1859 EXPORT_SYMBOL(neigh_table_init);
1860
1861 /*
1862 * Only called from ndisc_cleanup(), which means this is dead code
1863 * because we no longer can unload IPv6 module.
1864 */
neigh_table_clear(int index,struct neigh_table * tbl)1865 int neigh_table_clear(int index, struct neigh_table *tbl)
1866 {
1867 RCU_INIT_POINTER(neigh_tables[index], NULL);
1868 synchronize_rcu();
1869
1870 /* It is not clean... Fix it to unload IPv6 module safely */
1871 cancel_delayed_work_sync(&tbl->managed_work);
1872 cancel_delayed_work_sync(&tbl->gc_work);
1873 timer_delete_sync(&tbl->proxy_timer);
1874 pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
1875 neigh_ifdown(tbl, NULL);
1876 if (atomic_read(&tbl->entries))
1877 pr_crit("neighbour leakage\n");
1878
1879 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1880 neigh_hash_free_rcu);
1881 tbl->nht = NULL;
1882
1883 kfree(tbl->phash_buckets);
1884 tbl->phash_buckets = NULL;
1885
1886 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1887
1888 free_percpu(tbl->stats);
1889 tbl->stats = NULL;
1890
1891 return 0;
1892 }
1893 EXPORT_SYMBOL(neigh_table_clear);
1894
neigh_find_table(int family)1895 static struct neigh_table *neigh_find_table(int family)
1896 {
1897 struct neigh_table *tbl = NULL;
1898
1899 switch (family) {
1900 case AF_INET:
1901 tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
1902 break;
1903 case AF_INET6:
1904 tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
1905 break;
1906 }
1907
1908 return tbl;
1909 }
1910
1911 const struct nla_policy nda_policy[NDA_MAX+1] = {
1912 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
1913 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1914 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1915 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
1916 [NDA_PROBES] = { .type = NLA_U32 },
1917 [NDA_VLAN] = { .type = NLA_U16 },
1918 [NDA_PORT] = { .type = NLA_U16 },
1919 [NDA_VNI] = { .type = NLA_U32 },
1920 [NDA_IFINDEX] = { .type = NLA_U32 },
1921 [NDA_MASTER] = { .type = NLA_U32 },
1922 [NDA_PROTOCOL] = { .type = NLA_U8 },
1923 [NDA_NH_ID] = { .type = NLA_U32 },
1924 [NDA_FLAGS_EXT] = NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1925 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED },
1926 };
1927
neigh_delete(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1928 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1929 struct netlink_ext_ack *extack)
1930 {
1931 struct net *net = sock_net(skb->sk);
1932 struct ndmsg *ndm;
1933 struct nlattr *dst_attr;
1934 struct neigh_table *tbl;
1935 struct neighbour *neigh;
1936 struct net_device *dev = NULL;
1937 int err = -EINVAL;
1938
1939 ASSERT_RTNL();
1940 if (nlmsg_len(nlh) < sizeof(*ndm))
1941 goto out;
1942
1943 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1944 if (!dst_attr) {
1945 NL_SET_ERR_MSG(extack, "Network address not specified");
1946 goto out;
1947 }
1948
1949 ndm = nlmsg_data(nlh);
1950 if (ndm->ndm_ifindex) {
1951 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1952 if (dev == NULL) {
1953 err = -ENODEV;
1954 goto out;
1955 }
1956 }
1957
1958 tbl = neigh_find_table(ndm->ndm_family);
1959 if (tbl == NULL)
1960 return -EAFNOSUPPORT;
1961
1962 if (nla_len(dst_attr) < (int)tbl->key_len) {
1963 NL_SET_ERR_MSG(extack, "Invalid network address");
1964 goto out;
1965 }
1966
1967 if (ndm->ndm_flags & NTF_PROXY) {
1968 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1969 goto out;
1970 }
1971
1972 if (dev == NULL)
1973 goto out;
1974
1975 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1976 if (neigh == NULL) {
1977 err = -ENOENT;
1978 goto out;
1979 }
1980
1981 err = __neigh_update(neigh, NULL, NUD_FAILED,
1982 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1983 NETLINK_CB(skb).portid, extack);
1984 write_lock_bh(&tbl->lock);
1985 neigh_release(neigh);
1986 neigh_remove_one(neigh);
1987 write_unlock_bh(&tbl->lock);
1988
1989 out:
1990 return err;
1991 }
1992
neigh_add(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)1993 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1994 struct netlink_ext_ack *extack)
1995 {
1996 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1997 NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1998 struct net *net = sock_net(skb->sk);
1999 struct ndmsg *ndm;
2000 struct nlattr *tb[NDA_MAX+1];
2001 struct neigh_table *tbl;
2002 struct net_device *dev = NULL;
2003 struct neighbour *neigh;
2004 void *dst, *lladdr;
2005 u8 protocol = 0;
2006 u32 ndm_flags;
2007 int err;
2008
2009 ASSERT_RTNL();
2010 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
2011 nda_policy, extack);
2012 if (err < 0)
2013 goto out;
2014
2015 err = -EINVAL;
2016 if (!tb[NDA_DST]) {
2017 NL_SET_ERR_MSG(extack, "Network address not specified");
2018 goto out;
2019 }
2020
2021 ndm = nlmsg_data(nlh);
2022 ndm_flags = ndm->ndm_flags;
2023 if (tb[NDA_FLAGS_EXT]) {
2024 u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
2025
2026 BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
2027 (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
2028 hweight32(NTF_EXT_MASK)));
2029 ndm_flags |= (ext << NTF_EXT_SHIFT);
2030 }
2031 if (ndm->ndm_ifindex) {
2032 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
2033 if (dev == NULL) {
2034 err = -ENODEV;
2035 goto out;
2036 }
2037
2038 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
2039 NL_SET_ERR_MSG(extack, "Invalid link address");
2040 goto out;
2041 }
2042 }
2043
2044 tbl = neigh_find_table(ndm->ndm_family);
2045 if (tbl == NULL)
2046 return -EAFNOSUPPORT;
2047
2048 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
2049 NL_SET_ERR_MSG(extack, "Invalid network address");
2050 goto out;
2051 }
2052
2053 dst = nla_data(tb[NDA_DST]);
2054 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
2055
2056 if (tb[NDA_PROTOCOL])
2057 protocol = nla_get_u8(tb[NDA_PROTOCOL]);
2058 if (ndm_flags & NTF_PROXY) {
2059 if (ndm_flags & (NTF_MANAGED | NTF_EXT_VALIDATED)) {
2060 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
2061 goto out;
2062 }
2063
2064 err = pneigh_create(tbl, net, dst, dev, ndm_flags, protocol,
2065 !!(ndm->ndm_state & NUD_PERMANENT));
2066 goto out;
2067 }
2068
2069 if (!dev) {
2070 NL_SET_ERR_MSG(extack, "Device not specified");
2071 goto out;
2072 }
2073
2074 if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2075 err = -EINVAL;
2076 goto out;
2077 }
2078
2079 neigh = neigh_lookup(tbl, dst, dev);
2080 if (neigh == NULL) {
2081 bool ndm_permanent = ndm->ndm_state & NUD_PERMANENT;
2082 bool exempt_from_gc = ndm_permanent ||
2083 ndm_flags & (NTF_EXT_LEARNED |
2084 NTF_EXT_VALIDATED);
2085
2086 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2087 err = -ENOENT;
2088 goto out;
2089 }
2090 if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2091 NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2092 err = -EINVAL;
2093 goto out;
2094 }
2095 if (ndm_flags & NTF_EXT_VALIDATED) {
2096 u8 state = ndm->ndm_state;
2097
2098 /* NTF_USE and NTF_MANAGED will result in the neighbor
2099 * being created with an invalid state (NUD_NONE).
2100 */
2101 if (ndm_flags & (NTF_USE | NTF_MANAGED))
2102 state = NUD_NONE;
2103
2104 if (!(state & NUD_VALID)) {
2105 NL_SET_ERR_MSG(extack,
2106 "Cannot create externally validated neighbor with an invalid state");
2107 err = -EINVAL;
2108 goto out;
2109 }
2110 }
2111
2112 neigh = ___neigh_create(tbl, dst, dev,
2113 ndm_flags &
2114 (NTF_EXT_LEARNED | NTF_MANAGED |
2115 NTF_EXT_VALIDATED),
2116 exempt_from_gc, true);
2117 if (IS_ERR(neigh)) {
2118 err = PTR_ERR(neigh);
2119 goto out;
2120 }
2121 } else {
2122 if (nlh->nlmsg_flags & NLM_F_EXCL) {
2123 err = -EEXIST;
2124 neigh_release(neigh);
2125 goto out;
2126 }
2127 if (ndm_flags & NTF_EXT_VALIDATED) {
2128 u8 state = ndm->ndm_state;
2129
2130 /* NTF_USE and NTF_MANAGED do not update the existing
2131 * state other than clearing it if it was
2132 * NUD_PERMANENT.
2133 */
2134 if (ndm_flags & (NTF_USE | NTF_MANAGED))
2135 state = READ_ONCE(neigh->nud_state) & ~NUD_PERMANENT;
2136
2137 if (!(state & NUD_VALID)) {
2138 NL_SET_ERR_MSG(extack,
2139 "Cannot mark neighbor as externally validated with an invalid state");
2140 err = -EINVAL;
2141 neigh_release(neigh);
2142 goto out;
2143 }
2144 }
2145
2146 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2147 flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2148 NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2149 }
2150
2151 if (protocol)
2152 neigh->protocol = protocol;
2153 if (ndm_flags & NTF_EXT_LEARNED)
2154 flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2155 if (ndm_flags & NTF_ROUTER)
2156 flags |= NEIGH_UPDATE_F_ISROUTER;
2157 if (ndm_flags & NTF_MANAGED)
2158 flags |= NEIGH_UPDATE_F_MANAGED;
2159 if (ndm_flags & NTF_USE)
2160 flags |= NEIGH_UPDATE_F_USE;
2161 if (ndm_flags & NTF_EXT_VALIDATED)
2162 flags |= NEIGH_UPDATE_F_EXT_VALIDATED;
2163
2164 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2165 NETLINK_CB(skb).portid, extack);
2166 if (!err && ndm_flags & (NTF_USE | NTF_MANAGED))
2167 neigh_event_send(neigh, NULL);
2168 neigh_release(neigh);
2169 out:
2170 return err;
2171 }
2172
neightbl_fill_parms(struct sk_buff * skb,struct neigh_parms * parms)2173 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2174 {
2175 struct nlattr *nest;
2176
2177 nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2178 if (nest == NULL)
2179 return -ENOBUFS;
2180
2181 if ((parms->dev &&
2182 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2183 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2184 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2185 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2186 /* approximative value for deprecated QUEUE_LEN (in packets) */
2187 nla_put_u32(skb, NDTPA_QUEUE_LEN,
2188 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2189 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2190 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2191 nla_put_u32(skb, NDTPA_UCAST_PROBES,
2192 NEIGH_VAR(parms, UCAST_PROBES)) ||
2193 nla_put_u32(skb, NDTPA_MCAST_PROBES,
2194 NEIGH_VAR(parms, MCAST_PROBES)) ||
2195 nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2196 NEIGH_VAR(parms, MCAST_REPROBES)) ||
2197 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2198 NDTPA_PAD) ||
2199 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2200 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2201 nla_put_msecs(skb, NDTPA_GC_STALETIME,
2202 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2203 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2204 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2205 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2206 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2207 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2208 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2209 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2210 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2211 nla_put_msecs(skb, NDTPA_LOCKTIME,
2212 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2213 nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2214 NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2215 goto nla_put_failure;
2216 return nla_nest_end(skb, nest);
2217
2218 nla_put_failure:
2219 nla_nest_cancel(skb, nest);
2220 return -EMSGSIZE;
2221 }
2222
neightbl_fill_info(struct sk_buff * skb,struct neigh_table * tbl,u32 pid,u32 seq,int type,int flags)2223 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2224 u32 pid, u32 seq, int type, int flags)
2225 {
2226 struct nlmsghdr *nlh;
2227 struct ndtmsg *ndtmsg;
2228
2229 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2230 if (nlh == NULL)
2231 return -EMSGSIZE;
2232
2233 ndtmsg = nlmsg_data(nlh);
2234
2235 read_lock_bh(&tbl->lock);
2236 ndtmsg->ndtm_family = tbl->family;
2237 ndtmsg->ndtm_pad1 = 0;
2238 ndtmsg->ndtm_pad2 = 0;
2239
2240 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2241 nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
2242 NDTA_PAD) ||
2243 nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
2244 nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
2245 nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
2246 goto nla_put_failure;
2247 {
2248 unsigned long now = jiffies;
2249 long flush_delta = now - READ_ONCE(tbl->last_flush);
2250 long rand_delta = now - READ_ONCE(tbl->last_rand);
2251 struct neigh_hash_table *nht;
2252 struct ndt_config ndc = {
2253 .ndtc_key_len = tbl->key_len,
2254 .ndtc_entry_size = tbl->entry_size,
2255 .ndtc_entries = atomic_read(&tbl->entries),
2256 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
2257 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
2258 .ndtc_proxy_qlen = READ_ONCE(tbl->proxy_queue.qlen),
2259 };
2260
2261 rcu_read_lock();
2262 nht = rcu_dereference(tbl->nht);
2263 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2264 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2265 rcu_read_unlock();
2266
2267 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2268 goto nla_put_failure;
2269 }
2270
2271 {
2272 int cpu;
2273 struct ndt_stats ndst;
2274
2275 memset(&ndst, 0, sizeof(ndst));
2276
2277 for_each_possible_cpu(cpu) {
2278 struct neigh_statistics *st;
2279
2280 st = per_cpu_ptr(tbl->stats, cpu);
2281 ndst.ndts_allocs += READ_ONCE(st->allocs);
2282 ndst.ndts_destroys += READ_ONCE(st->destroys);
2283 ndst.ndts_hash_grows += READ_ONCE(st->hash_grows);
2284 ndst.ndts_res_failed += READ_ONCE(st->res_failed);
2285 ndst.ndts_lookups += READ_ONCE(st->lookups);
2286 ndst.ndts_hits += READ_ONCE(st->hits);
2287 ndst.ndts_rcv_probes_mcast += READ_ONCE(st->rcv_probes_mcast);
2288 ndst.ndts_rcv_probes_ucast += READ_ONCE(st->rcv_probes_ucast);
2289 ndst.ndts_periodic_gc_runs += READ_ONCE(st->periodic_gc_runs);
2290 ndst.ndts_forced_gc_runs += READ_ONCE(st->forced_gc_runs);
2291 ndst.ndts_table_fulls += READ_ONCE(st->table_fulls);
2292 }
2293
2294 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2295 NDTA_PAD))
2296 goto nla_put_failure;
2297 }
2298
2299 BUG_ON(tbl->parms.dev);
2300 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2301 goto nla_put_failure;
2302
2303 read_unlock_bh(&tbl->lock);
2304 nlmsg_end(skb, nlh);
2305 return 0;
2306
2307 nla_put_failure:
2308 read_unlock_bh(&tbl->lock);
2309 nlmsg_cancel(skb, nlh);
2310 return -EMSGSIZE;
2311 }
2312
neightbl_fill_param_info(struct sk_buff * skb,struct neigh_table * tbl,struct neigh_parms * parms,u32 pid,u32 seq,int type,unsigned int flags)2313 static int neightbl_fill_param_info(struct sk_buff *skb,
2314 struct neigh_table *tbl,
2315 struct neigh_parms *parms,
2316 u32 pid, u32 seq, int type,
2317 unsigned int flags)
2318 {
2319 struct ndtmsg *ndtmsg;
2320 struct nlmsghdr *nlh;
2321
2322 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2323 if (nlh == NULL)
2324 return -EMSGSIZE;
2325
2326 ndtmsg = nlmsg_data(nlh);
2327
2328 read_lock_bh(&tbl->lock);
2329 ndtmsg->ndtm_family = tbl->family;
2330 ndtmsg->ndtm_pad1 = 0;
2331 ndtmsg->ndtm_pad2 = 0;
2332
2333 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2334 neightbl_fill_parms(skb, parms) < 0)
2335 goto errout;
2336
2337 read_unlock_bh(&tbl->lock);
2338 nlmsg_end(skb, nlh);
2339 return 0;
2340 errout:
2341 read_unlock_bh(&tbl->lock);
2342 nlmsg_cancel(skb, nlh);
2343 return -EMSGSIZE;
2344 }
2345
2346 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2347 [NDTA_NAME] = { .type = NLA_STRING },
2348 [NDTA_THRESH1] = { .type = NLA_U32 },
2349 [NDTA_THRESH2] = { .type = NLA_U32 },
2350 [NDTA_THRESH3] = { .type = NLA_U32 },
2351 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
2352 [NDTA_PARMS] = { .type = NLA_NESTED },
2353 };
2354
2355 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2356 [NDTPA_IFINDEX] = { .type = NLA_U32 },
2357 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
2358 [NDTPA_QUEUE_LENBYTES] = { .type = NLA_U32 },
2359 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
2360 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
2361 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
2362 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
2363 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 },
2364 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
2365 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
2366 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
2367 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
2368 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
2369 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
2370 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
2371 [NDTPA_INTERVAL_PROBE_TIME_MS] = { .type = NLA_U64, .min = 1 },
2372 };
2373
neightbl_set(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2374 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2375 struct netlink_ext_ack *extack)
2376 {
2377 struct net *net = sock_net(skb->sk);
2378 struct neigh_table *tbl;
2379 struct ndtmsg *ndtmsg;
2380 struct nlattr *tb[NDTA_MAX+1];
2381 bool found = false;
2382 int err, tidx;
2383
2384 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2385 nl_neightbl_policy, extack);
2386 if (err < 0)
2387 goto errout;
2388
2389 if (tb[NDTA_NAME] == NULL) {
2390 err = -EINVAL;
2391 goto errout;
2392 }
2393
2394 ndtmsg = nlmsg_data(nlh);
2395
2396 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2397 tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2398 if (!tbl)
2399 continue;
2400 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2401 continue;
2402 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2403 found = true;
2404 break;
2405 }
2406 }
2407
2408 if (!found)
2409 return -ENOENT;
2410
2411 /*
2412 * We acquire tbl->lock to be nice to the periodic timers and
2413 * make sure they always see a consistent set of values.
2414 */
2415 write_lock_bh(&tbl->lock);
2416
2417 if (tb[NDTA_PARMS]) {
2418 struct nlattr *tbp[NDTPA_MAX+1];
2419 struct neigh_parms *p;
2420 int i, ifindex = 0;
2421
2422 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2423 tb[NDTA_PARMS],
2424 nl_ntbl_parm_policy, extack);
2425 if (err < 0)
2426 goto errout_tbl_lock;
2427
2428 if (tbp[NDTPA_IFINDEX])
2429 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2430
2431 p = lookup_neigh_parms(tbl, net, ifindex);
2432 if (p == NULL) {
2433 err = -ENOENT;
2434 goto errout_tbl_lock;
2435 }
2436
2437 for (i = 1; i <= NDTPA_MAX; i++) {
2438 if (tbp[i] == NULL)
2439 continue;
2440
2441 switch (i) {
2442 case NDTPA_QUEUE_LEN:
2443 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2444 nla_get_u32(tbp[i]) *
2445 SKB_TRUESIZE(ETH_FRAME_LEN));
2446 break;
2447 case NDTPA_QUEUE_LENBYTES:
2448 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2449 nla_get_u32(tbp[i]));
2450 break;
2451 case NDTPA_PROXY_QLEN:
2452 NEIGH_VAR_SET(p, PROXY_QLEN,
2453 nla_get_u32(tbp[i]));
2454 break;
2455 case NDTPA_APP_PROBES:
2456 NEIGH_VAR_SET(p, APP_PROBES,
2457 nla_get_u32(tbp[i]));
2458 break;
2459 case NDTPA_UCAST_PROBES:
2460 NEIGH_VAR_SET(p, UCAST_PROBES,
2461 nla_get_u32(tbp[i]));
2462 break;
2463 case NDTPA_MCAST_PROBES:
2464 NEIGH_VAR_SET(p, MCAST_PROBES,
2465 nla_get_u32(tbp[i]));
2466 break;
2467 case NDTPA_MCAST_REPROBES:
2468 NEIGH_VAR_SET(p, MCAST_REPROBES,
2469 nla_get_u32(tbp[i]));
2470 break;
2471 case NDTPA_BASE_REACHABLE_TIME:
2472 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2473 nla_get_msecs(tbp[i]));
2474 /* update reachable_time as well, otherwise, the change will
2475 * only be effective after the next time neigh_periodic_work
2476 * decides to recompute it (can be multiple minutes)
2477 */
2478 p->reachable_time =
2479 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2480 break;
2481 case NDTPA_GC_STALETIME:
2482 NEIGH_VAR_SET(p, GC_STALETIME,
2483 nla_get_msecs(tbp[i]));
2484 break;
2485 case NDTPA_DELAY_PROBE_TIME:
2486 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2487 nla_get_msecs(tbp[i]));
2488 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2489 break;
2490 case NDTPA_INTERVAL_PROBE_TIME_MS:
2491 NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2492 nla_get_msecs(tbp[i]));
2493 break;
2494 case NDTPA_RETRANS_TIME:
2495 NEIGH_VAR_SET(p, RETRANS_TIME,
2496 nla_get_msecs(tbp[i]));
2497 break;
2498 case NDTPA_ANYCAST_DELAY:
2499 NEIGH_VAR_SET(p, ANYCAST_DELAY,
2500 nla_get_msecs(tbp[i]));
2501 break;
2502 case NDTPA_PROXY_DELAY:
2503 NEIGH_VAR_SET(p, PROXY_DELAY,
2504 nla_get_msecs(tbp[i]));
2505 break;
2506 case NDTPA_LOCKTIME:
2507 NEIGH_VAR_SET(p, LOCKTIME,
2508 nla_get_msecs(tbp[i]));
2509 break;
2510 }
2511 }
2512 }
2513
2514 err = -ENOENT;
2515 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2516 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2517 !net_eq(net, &init_net))
2518 goto errout_tbl_lock;
2519
2520 if (tb[NDTA_THRESH1])
2521 WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
2522
2523 if (tb[NDTA_THRESH2])
2524 WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
2525
2526 if (tb[NDTA_THRESH3])
2527 WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
2528
2529 if (tb[NDTA_GC_INTERVAL])
2530 WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
2531
2532 err = 0;
2533
2534 errout_tbl_lock:
2535 write_unlock_bh(&tbl->lock);
2536 errout:
2537 return err;
2538 }
2539
neightbl_valid_dump_info(const struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2540 static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2541 struct netlink_ext_ack *extack)
2542 {
2543 struct ndtmsg *ndtm;
2544
2545 ndtm = nlmsg_payload(nlh, sizeof(*ndtm));
2546 if (!ndtm) {
2547 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2548 return -EINVAL;
2549 }
2550
2551 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
2552 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2553 return -EINVAL;
2554 }
2555
2556 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2557 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2558 return -EINVAL;
2559 }
2560
2561 return 0;
2562 }
2563
neightbl_dump_info(struct sk_buff * skb,struct netlink_callback * cb)2564 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2565 {
2566 const struct nlmsghdr *nlh = cb->nlh;
2567 struct net *net = sock_net(skb->sk);
2568 int family, tidx, nidx = 0;
2569 int tbl_skip = cb->args[0];
2570 int neigh_skip = cb->args[1];
2571 struct neigh_table *tbl;
2572
2573 if (cb->strict_check) {
2574 int err = neightbl_valid_dump_info(nlh, cb->extack);
2575
2576 if (err < 0)
2577 return err;
2578 }
2579
2580 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2581
2582 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2583 struct neigh_parms *p;
2584
2585 tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2586 if (!tbl)
2587 continue;
2588
2589 if (tidx < tbl_skip || (family && tbl->family != family))
2590 continue;
2591
2592 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2593 nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2594 NLM_F_MULTI) < 0)
2595 break;
2596
2597 nidx = 0;
2598 p = list_next_entry(&tbl->parms, list);
2599 list_for_each_entry_from(p, &tbl->parms_list, list) {
2600 if (!net_eq(neigh_parms_net(p), net))
2601 continue;
2602
2603 if (nidx < neigh_skip)
2604 goto next;
2605
2606 if (neightbl_fill_param_info(skb, tbl, p,
2607 NETLINK_CB(cb->skb).portid,
2608 nlh->nlmsg_seq,
2609 RTM_NEWNEIGHTBL,
2610 NLM_F_MULTI) < 0)
2611 goto out;
2612 next:
2613 nidx++;
2614 }
2615
2616 neigh_skip = 0;
2617 }
2618 out:
2619 cb->args[0] = tidx;
2620 cb->args[1] = nidx;
2621
2622 return skb->len;
2623 }
2624
neigh_fill_info(struct sk_buff * skb,struct neighbour * neigh,u32 pid,u32 seq,int type,unsigned int flags)2625 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2626 u32 pid, u32 seq, int type, unsigned int flags)
2627 {
2628 u32 neigh_flags, neigh_flags_ext;
2629 unsigned long now = jiffies;
2630 struct nda_cacheinfo ci;
2631 struct nlmsghdr *nlh;
2632 struct ndmsg *ndm;
2633
2634 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2635 if (nlh == NULL)
2636 return -EMSGSIZE;
2637
2638 neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2639 neigh_flags = neigh->flags & NTF_OLD_MASK;
2640
2641 ndm = nlmsg_data(nlh);
2642 ndm->ndm_family = neigh->ops->family;
2643 ndm->ndm_pad1 = 0;
2644 ndm->ndm_pad2 = 0;
2645 ndm->ndm_flags = neigh_flags;
2646 ndm->ndm_type = neigh->type;
2647 ndm->ndm_ifindex = neigh->dev->ifindex;
2648
2649 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2650 goto nla_put_failure;
2651
2652 read_lock_bh(&neigh->lock);
2653 ndm->ndm_state = neigh->nud_state;
2654 if (neigh->nud_state & NUD_VALID) {
2655 char haddr[MAX_ADDR_LEN];
2656
2657 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2658 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2659 read_unlock_bh(&neigh->lock);
2660 goto nla_put_failure;
2661 }
2662 }
2663
2664 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2665 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2666 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2667 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
2668 read_unlock_bh(&neigh->lock);
2669
2670 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2671 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2672 goto nla_put_failure;
2673
2674 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2675 goto nla_put_failure;
2676 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2677 goto nla_put_failure;
2678
2679 nlmsg_end(skb, nlh);
2680 return 0;
2681
2682 nla_put_failure:
2683 nlmsg_cancel(skb, nlh);
2684 return -EMSGSIZE;
2685 }
2686
pneigh_fill_info(struct sk_buff * skb,struct pneigh_entry * pn,u32 pid,u32 seq,int type,unsigned int flags,struct neigh_table * tbl)2687 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2688 u32 pid, u32 seq, int type, unsigned int flags,
2689 struct neigh_table *tbl)
2690 {
2691 u32 neigh_flags, neigh_flags_ext;
2692 struct nlmsghdr *nlh;
2693 struct ndmsg *ndm;
2694 u8 protocol;
2695
2696 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2697 if (nlh == NULL)
2698 return -EMSGSIZE;
2699
2700 neigh_flags = READ_ONCE(pn->flags);
2701 neigh_flags_ext = neigh_flags >> NTF_EXT_SHIFT;
2702 neigh_flags &= NTF_OLD_MASK;
2703
2704 ndm = nlmsg_data(nlh);
2705 ndm->ndm_family = tbl->family;
2706 ndm->ndm_pad1 = 0;
2707 ndm->ndm_pad2 = 0;
2708 ndm->ndm_flags = neigh_flags | NTF_PROXY;
2709 ndm->ndm_type = RTN_UNICAST;
2710 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2711 ndm->ndm_state = NUD_NONE;
2712
2713 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2714 goto nla_put_failure;
2715
2716 protocol = READ_ONCE(pn->protocol);
2717 if (protocol && nla_put_u8(skb, NDA_PROTOCOL, protocol))
2718 goto nla_put_failure;
2719 if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2720 goto nla_put_failure;
2721
2722 nlmsg_end(skb, nlh);
2723 return 0;
2724
2725 nla_put_failure:
2726 nlmsg_cancel(skb, nlh);
2727 return -EMSGSIZE;
2728 }
2729
neigh_update_notify(struct neighbour * neigh,u32 nlmsg_pid)2730 static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2731 {
2732 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2733 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2734 }
2735
neigh_master_filtered(struct net_device * dev,int master_idx)2736 static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2737 {
2738 struct net_device *master;
2739
2740 if (!master_idx)
2741 return false;
2742
2743 master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
2744
2745 /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2746 * invalid value for ifindex to denote "no master".
2747 */
2748 if (master_idx == -1)
2749 return !!master;
2750
2751 if (!master || master->ifindex != master_idx)
2752 return true;
2753
2754 return false;
2755 }
2756
neigh_ifindex_filtered(struct net_device * dev,int filter_idx)2757 static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2758 {
2759 if (filter_idx && (!dev || dev->ifindex != filter_idx))
2760 return true;
2761
2762 return false;
2763 }
2764
2765 struct neigh_dump_filter {
2766 int master_idx;
2767 int dev_idx;
2768 };
2769
neigh_dump_table(struct neigh_table * tbl,struct sk_buff * skb,struct netlink_callback * cb,struct neigh_dump_filter * filter)2770 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2771 struct netlink_callback *cb,
2772 struct neigh_dump_filter *filter)
2773 {
2774 struct net *net = sock_net(skb->sk);
2775 struct neighbour *n;
2776 int err = 0, h, s_h = cb->args[1];
2777 int idx, s_idx = idx = cb->args[2];
2778 struct neigh_hash_table *nht;
2779 unsigned int flags = NLM_F_MULTI;
2780
2781 if (filter->dev_idx || filter->master_idx)
2782 flags |= NLM_F_DUMP_FILTERED;
2783
2784 nht = rcu_dereference(tbl->nht);
2785
2786 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2787 if (h > s_h)
2788 s_idx = 0;
2789 idx = 0;
2790 neigh_for_each_in_bucket_rcu(n, &nht->hash_heads[h]) {
2791 if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2792 goto next;
2793 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2794 neigh_master_filtered(n->dev, filter->master_idx))
2795 goto next;
2796 err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2797 cb->nlh->nlmsg_seq,
2798 RTM_NEWNEIGH, flags);
2799 if (err < 0)
2800 goto out;
2801 next:
2802 idx++;
2803 }
2804 }
2805 out:
2806 cb->args[1] = h;
2807 cb->args[2] = idx;
2808 return err;
2809 }
2810
pneigh_dump_table(struct neigh_table * tbl,struct sk_buff * skb,struct netlink_callback * cb,struct neigh_dump_filter * filter)2811 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2812 struct netlink_callback *cb,
2813 struct neigh_dump_filter *filter)
2814 {
2815 struct pneigh_entry *n;
2816 struct net *net = sock_net(skb->sk);
2817 int err = 0, h, s_h = cb->args[3];
2818 int idx, s_idx = idx = cb->args[4];
2819 unsigned int flags = NLM_F_MULTI;
2820
2821 if (filter->dev_idx || filter->master_idx)
2822 flags |= NLM_F_DUMP_FILTERED;
2823
2824 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2825 if (h > s_h)
2826 s_idx = 0;
2827 for (n = rcu_dereference(tbl->phash_buckets[h]), idx = 0;
2828 n;
2829 n = rcu_dereference(n->next)) {
2830 if (idx < s_idx || pneigh_net(n) != net)
2831 goto next;
2832 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2833 neigh_master_filtered(n->dev, filter->master_idx))
2834 goto next;
2835 err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2836 cb->nlh->nlmsg_seq,
2837 RTM_NEWNEIGH, flags, tbl);
2838 if (err < 0)
2839 goto out;
2840 next:
2841 idx++;
2842 }
2843 }
2844
2845 out:
2846 cb->args[3] = h;
2847 cb->args[4] = idx;
2848 return err;
2849 }
2850
neigh_valid_dump_req(const struct nlmsghdr * nlh,bool strict_check,struct neigh_dump_filter * filter,struct netlink_ext_ack * extack)2851 static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2852 bool strict_check,
2853 struct neigh_dump_filter *filter,
2854 struct netlink_ext_ack *extack)
2855 {
2856 struct nlattr *tb[NDA_MAX + 1];
2857 int err, i;
2858
2859 if (strict_check) {
2860 struct ndmsg *ndm;
2861
2862 ndm = nlmsg_payload(nlh, sizeof(*ndm));
2863 if (!ndm) {
2864 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2865 return -EINVAL;
2866 }
2867
2868 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex ||
2869 ndm->ndm_state || ndm->ndm_type) {
2870 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2871 return -EINVAL;
2872 }
2873
2874 if (ndm->ndm_flags & ~NTF_PROXY) {
2875 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2876 return -EINVAL;
2877 }
2878
2879 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2880 tb, NDA_MAX, nda_policy,
2881 extack);
2882 } else {
2883 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2884 NDA_MAX, nda_policy, extack);
2885 }
2886 if (err < 0)
2887 return err;
2888
2889 for (i = 0; i <= NDA_MAX; ++i) {
2890 if (!tb[i])
2891 continue;
2892
2893 /* all new attributes should require strict_check */
2894 switch (i) {
2895 case NDA_IFINDEX:
2896 filter->dev_idx = nla_get_u32(tb[i]);
2897 break;
2898 case NDA_MASTER:
2899 filter->master_idx = nla_get_u32(tb[i]);
2900 break;
2901 default:
2902 if (strict_check) {
2903 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2904 return -EINVAL;
2905 }
2906 }
2907 }
2908
2909 return 0;
2910 }
2911
neigh_dump_info(struct sk_buff * skb,struct netlink_callback * cb)2912 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2913 {
2914 const struct nlmsghdr *nlh = cb->nlh;
2915 struct neigh_dump_filter filter = {};
2916 struct neigh_table *tbl;
2917 int t, family, s_t;
2918 int proxy = 0;
2919 int err;
2920
2921 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2922
2923 /* check for full ndmsg structure presence, family member is
2924 * the same for both structures
2925 */
2926 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2927 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2928 proxy = 1;
2929
2930 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2931 if (err < 0 && cb->strict_check)
2932 return err;
2933 err = 0;
2934
2935 s_t = cb->args[0];
2936
2937 rcu_read_lock();
2938 for (t = 0; t < NEIGH_NR_TABLES; t++) {
2939 tbl = rcu_dereference(neigh_tables[t]);
2940
2941 if (!tbl)
2942 continue;
2943 if (t < s_t || (family && tbl->family != family))
2944 continue;
2945 if (t > s_t)
2946 memset(&cb->args[1], 0, sizeof(cb->args) -
2947 sizeof(cb->args[0]));
2948 if (proxy)
2949 err = pneigh_dump_table(tbl, skb, cb, &filter);
2950 else
2951 err = neigh_dump_table(tbl, skb, cb, &filter);
2952 if (err < 0)
2953 break;
2954 }
2955 rcu_read_unlock();
2956
2957 cb->args[0] = t;
2958 return err;
2959 }
2960
neigh_valid_get_req(const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)2961 static struct ndmsg *neigh_valid_get_req(const struct nlmsghdr *nlh,
2962 struct nlattr **tb,
2963 struct netlink_ext_ack *extack)
2964 {
2965 struct ndmsg *ndm;
2966 int err, i;
2967
2968 ndm = nlmsg_payload(nlh, sizeof(*ndm));
2969 if (!ndm) {
2970 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2971 return ERR_PTR(-EINVAL);
2972 }
2973
2974 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
2975 ndm->ndm_type) {
2976 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2977 return ERR_PTR(-EINVAL);
2978 }
2979
2980 if (ndm->ndm_flags & ~NTF_PROXY) {
2981 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2982 return ERR_PTR(-EINVAL);
2983 }
2984
2985 if (!(ndm->ndm_flags & NTF_PROXY) && !ndm->ndm_ifindex) {
2986 NL_SET_ERR_MSG(extack, "No device specified");
2987 return ERR_PTR(-EINVAL);
2988 }
2989
2990 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2991 NDA_MAX, nda_policy, extack);
2992 if (err < 0)
2993 return ERR_PTR(err);
2994
2995 for (i = 0; i <= NDA_MAX; ++i) {
2996 switch (i) {
2997 case NDA_DST:
2998 if (!tb[i]) {
2999 NL_SET_ERR_ATTR_MISS(extack, NULL, NDA_DST);
3000 return ERR_PTR(-EINVAL);
3001 }
3002 break;
3003 default:
3004 if (!tb[i])
3005 continue;
3006
3007 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
3008 return ERR_PTR(-EINVAL);
3009 }
3010 }
3011
3012 return ndm;
3013 }
3014
neigh_nlmsg_size(void)3015 static inline size_t neigh_nlmsg_size(void)
3016 {
3017 return NLMSG_ALIGN(sizeof(struct ndmsg))
3018 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3019 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
3020 + nla_total_size(sizeof(struct nda_cacheinfo))
3021 + nla_total_size(4) /* NDA_PROBES */
3022 + nla_total_size(4) /* NDA_FLAGS_EXT */
3023 + nla_total_size(1); /* NDA_PROTOCOL */
3024 }
3025
pneigh_nlmsg_size(void)3026 static inline size_t pneigh_nlmsg_size(void)
3027 {
3028 return NLMSG_ALIGN(sizeof(struct ndmsg))
3029 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3030 + nla_total_size(4) /* NDA_FLAGS_EXT */
3031 + nla_total_size(1); /* NDA_PROTOCOL */
3032 }
3033
neigh_get(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)3034 static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3035 struct netlink_ext_ack *extack)
3036 {
3037 struct net *net = sock_net(in_skb->sk);
3038 u32 pid = NETLINK_CB(in_skb).portid;
3039 struct nlattr *tb[NDA_MAX + 1];
3040 struct net_device *dev = NULL;
3041 u32 seq = nlh->nlmsg_seq;
3042 struct neigh_table *tbl;
3043 struct neighbour *neigh;
3044 struct sk_buff *skb;
3045 struct ndmsg *ndm;
3046 void *dst;
3047 int err;
3048
3049 ndm = neigh_valid_get_req(nlh, tb, extack);
3050 if (IS_ERR(ndm))
3051 return PTR_ERR(ndm);
3052
3053 if (ndm->ndm_flags & NTF_PROXY)
3054 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
3055 else
3056 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
3057 if (!skb)
3058 return -ENOBUFS;
3059
3060 rcu_read_lock();
3061
3062 tbl = neigh_find_table(ndm->ndm_family);
3063 if (!tbl) {
3064 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
3065 err = -EAFNOSUPPORT;
3066 goto err_unlock;
3067 }
3068
3069 if (nla_len(tb[NDA_DST]) != (int)tbl->key_len) {
3070 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
3071 err = -EINVAL;
3072 goto err_unlock;
3073 }
3074
3075 dst = nla_data(tb[NDA_DST]);
3076
3077 if (ndm->ndm_ifindex) {
3078 dev = dev_get_by_index_rcu(net, ndm->ndm_ifindex);
3079 if (!dev) {
3080 NL_SET_ERR_MSG(extack, "Unknown device ifindex");
3081 err = -ENODEV;
3082 goto err_unlock;
3083 }
3084 }
3085
3086 if (ndm->ndm_flags & NTF_PROXY) {
3087 struct pneigh_entry *pn;
3088
3089 pn = pneigh_lookup(tbl, net, dst, dev);
3090 if (!pn) {
3091 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3092 err = -ENOENT;
3093 goto err_unlock;
3094 }
3095
3096 err = pneigh_fill_info(skb, pn, pid, seq, RTM_NEWNEIGH, 0, tbl);
3097 if (err)
3098 goto err_unlock;
3099 } else {
3100 neigh = neigh_lookup(tbl, dst, dev);
3101 if (!neigh) {
3102 NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3103 err = -ENOENT;
3104 goto err_unlock;
3105 }
3106
3107 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
3108 neigh_release(neigh);
3109 if (err)
3110 goto err_unlock;
3111 }
3112
3113 rcu_read_unlock();
3114
3115 return rtnl_unicast(skb, net, pid);
3116 err_unlock:
3117 rcu_read_unlock();
3118 kfree_skb(skb);
3119 return err;
3120 }
3121
neigh_for_each(struct neigh_table * tbl,void (* cb)(struct neighbour *,void *),void * cookie)3122 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3123 {
3124 int chain;
3125 struct neigh_hash_table *nht;
3126
3127 rcu_read_lock();
3128 nht = rcu_dereference(tbl->nht);
3129
3130 read_lock_bh(&tbl->lock); /* avoid resizes */
3131 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3132 struct neighbour *n;
3133
3134 neigh_for_each_in_bucket(n, &nht->hash_heads[chain])
3135 cb(n, cookie);
3136 }
3137 read_unlock_bh(&tbl->lock);
3138 rcu_read_unlock();
3139 }
3140 EXPORT_SYMBOL(neigh_for_each);
3141
3142 /* The tbl->lock must be held as a writer and BH disabled. */
__neigh_for_each_release(struct neigh_table * tbl,int (* cb)(struct neighbour *))3143 void __neigh_for_each_release(struct neigh_table *tbl,
3144 int (*cb)(struct neighbour *))
3145 {
3146 struct neigh_hash_table *nht;
3147 int chain;
3148
3149 nht = rcu_dereference_protected(tbl->nht,
3150 lockdep_is_held(&tbl->lock));
3151 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3152 struct hlist_node *tmp;
3153 struct neighbour *n;
3154
3155 neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[chain]) {
3156 int release;
3157
3158 write_lock(&n->lock);
3159 release = cb(n);
3160 if (release) {
3161 hlist_del_rcu(&n->hash);
3162 hlist_del_rcu(&n->dev_list);
3163 neigh_mark_dead(n);
3164 }
3165 write_unlock(&n->lock);
3166 if (release)
3167 neigh_cleanup_and_release(n);
3168 }
3169 }
3170 }
3171 EXPORT_SYMBOL(__neigh_for_each_release);
3172
neigh_xmit(int index,struct net_device * dev,const void * addr,struct sk_buff * skb)3173 int neigh_xmit(int index, struct net_device *dev,
3174 const void *addr, struct sk_buff *skb)
3175 {
3176 int err = -EAFNOSUPPORT;
3177
3178 if (likely(index < NEIGH_NR_TABLES)) {
3179 struct neigh_table *tbl;
3180 struct neighbour *neigh;
3181
3182 rcu_read_lock();
3183 tbl = rcu_dereference(neigh_tables[index]);
3184 if (!tbl)
3185 goto out_unlock;
3186 if (index == NEIGH_ARP_TABLE) {
3187 u32 key = *((u32 *)addr);
3188
3189 neigh = __ipv4_neigh_lookup_noref(dev, key);
3190 } else {
3191 neigh = __neigh_lookup_noref(tbl, addr, dev);
3192 }
3193 if (!neigh)
3194 neigh = __neigh_create(tbl, addr, dev, false);
3195 err = PTR_ERR(neigh);
3196 if (IS_ERR(neigh)) {
3197 rcu_read_unlock();
3198 goto out_kfree_skb;
3199 }
3200 err = READ_ONCE(neigh->output)(neigh, skb);
3201 out_unlock:
3202 rcu_read_unlock();
3203 }
3204 else if (index == NEIGH_LINK_TABLE) {
3205 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3206 addr, NULL, skb->len);
3207 if (err < 0)
3208 goto out_kfree_skb;
3209 err = dev_queue_xmit(skb);
3210 }
3211 out:
3212 return err;
3213 out_kfree_skb:
3214 kfree_skb(skb);
3215 goto out;
3216 }
3217 EXPORT_SYMBOL(neigh_xmit);
3218
3219 #ifdef CONFIG_PROC_FS
3220
neigh_get_valid(struct seq_file * seq,struct neighbour * n,loff_t * pos)3221 static struct neighbour *neigh_get_valid(struct seq_file *seq,
3222 struct neighbour *n,
3223 loff_t *pos)
3224 {
3225 struct neigh_seq_state *state = seq->private;
3226 struct net *net = seq_file_net(seq);
3227
3228 if (!net_eq(dev_net(n->dev), net))
3229 return NULL;
3230
3231 if (state->neigh_sub_iter) {
3232 loff_t fakep = 0;
3233 void *v;
3234
3235 v = state->neigh_sub_iter(state, n, pos ? pos : &fakep);
3236 if (!v)
3237 return NULL;
3238 if (pos)
3239 return v;
3240 }
3241
3242 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3243 return n;
3244
3245 if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3246 return n;
3247
3248 return NULL;
3249 }
3250
neigh_get_first(struct seq_file * seq)3251 static struct neighbour *neigh_get_first(struct seq_file *seq)
3252 {
3253 struct neigh_seq_state *state = seq->private;
3254 struct neigh_hash_table *nht = state->nht;
3255 struct neighbour *n, *tmp;
3256
3257 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3258
3259 while (++state->bucket < (1 << nht->hash_shift)) {
3260 neigh_for_each_in_bucket(n, &nht->hash_heads[state->bucket]) {
3261 tmp = neigh_get_valid(seq, n, NULL);
3262 if (tmp)
3263 return tmp;
3264 }
3265 }
3266
3267 return NULL;
3268 }
3269
neigh_get_next(struct seq_file * seq,struct neighbour * n,loff_t * pos)3270 static struct neighbour *neigh_get_next(struct seq_file *seq,
3271 struct neighbour *n,
3272 loff_t *pos)
3273 {
3274 struct neigh_seq_state *state = seq->private;
3275 struct neighbour *tmp;
3276
3277 if (state->neigh_sub_iter) {
3278 void *v = state->neigh_sub_iter(state, n, pos);
3279
3280 if (v)
3281 return n;
3282 }
3283
3284 hlist_for_each_entry_continue(n, hash) {
3285 tmp = neigh_get_valid(seq, n, pos);
3286 if (tmp) {
3287 n = tmp;
3288 goto out;
3289 }
3290 }
3291
3292 n = neigh_get_first(seq);
3293 out:
3294 if (n && pos)
3295 --(*pos);
3296
3297 return n;
3298 }
3299
neigh_get_idx(struct seq_file * seq,loff_t * pos)3300 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3301 {
3302 struct neighbour *n = neigh_get_first(seq);
3303
3304 if (n) {
3305 --(*pos);
3306 while (*pos) {
3307 n = neigh_get_next(seq, n, pos);
3308 if (!n)
3309 break;
3310 }
3311 }
3312 return *pos ? NULL : n;
3313 }
3314
pneigh_get_first(struct seq_file * seq)3315 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3316 {
3317 struct neigh_seq_state *state = seq->private;
3318 struct net *net = seq_file_net(seq);
3319 struct neigh_table *tbl = state->tbl;
3320 struct pneigh_entry *pn = NULL;
3321 int bucket;
3322
3323 state->flags |= NEIGH_SEQ_IS_PNEIGH;
3324 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3325 pn = rcu_dereference(tbl->phash_buckets[bucket]);
3326
3327 while (pn && !net_eq(pneigh_net(pn), net))
3328 pn = rcu_dereference(pn->next);
3329 if (pn)
3330 break;
3331 }
3332 state->bucket = bucket;
3333
3334 return pn;
3335 }
3336
pneigh_get_next(struct seq_file * seq,struct pneigh_entry * pn,loff_t * pos)3337 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3338 struct pneigh_entry *pn,
3339 loff_t *pos)
3340 {
3341 struct neigh_seq_state *state = seq->private;
3342 struct net *net = seq_file_net(seq);
3343 struct neigh_table *tbl = state->tbl;
3344
3345 do {
3346 pn = rcu_dereference(pn->next);
3347 } while (pn && !net_eq(pneigh_net(pn), net));
3348
3349 while (!pn) {
3350 if (++state->bucket > PNEIGH_HASHMASK)
3351 break;
3352
3353 pn = rcu_dereference(tbl->phash_buckets[state->bucket]);
3354
3355 while (pn && !net_eq(pneigh_net(pn), net))
3356 pn = rcu_dereference(pn->next);
3357 if (pn)
3358 break;
3359 }
3360
3361 if (pn && pos)
3362 --(*pos);
3363
3364 return pn;
3365 }
3366
pneigh_get_idx(struct seq_file * seq,loff_t * pos)3367 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3368 {
3369 struct pneigh_entry *pn = pneigh_get_first(seq);
3370
3371 if (pn) {
3372 --(*pos);
3373 while (*pos) {
3374 pn = pneigh_get_next(seq, pn, pos);
3375 if (!pn)
3376 break;
3377 }
3378 }
3379 return *pos ? NULL : pn;
3380 }
3381
neigh_get_idx_any(struct seq_file * seq,loff_t * pos)3382 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3383 {
3384 struct neigh_seq_state *state = seq->private;
3385 void *rc;
3386 loff_t idxpos = *pos;
3387
3388 rc = neigh_get_idx(seq, &idxpos);
3389 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3390 rc = pneigh_get_idx(seq, &idxpos);
3391
3392 return rc;
3393 }
3394
neigh_seq_start(struct seq_file * seq,loff_t * pos,struct neigh_table * tbl,unsigned int neigh_seq_flags)3395 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3396 __acquires(tbl->lock)
3397 __acquires(rcu)
3398 {
3399 struct neigh_seq_state *state = seq->private;
3400
3401 state->tbl = tbl;
3402 state->bucket = -1;
3403 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3404
3405 rcu_read_lock();
3406 state->nht = rcu_dereference(tbl->nht);
3407 read_lock_bh(&tbl->lock);
3408
3409 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3410 }
3411 EXPORT_SYMBOL(neigh_seq_start);
3412
neigh_seq_next(struct seq_file * seq,void * v,loff_t * pos)3413 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3414 {
3415 struct neigh_seq_state *state;
3416 void *rc;
3417
3418 if (v == SEQ_START_TOKEN) {
3419 rc = neigh_get_first(seq);
3420 goto out;
3421 }
3422
3423 state = seq->private;
3424 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3425 rc = neigh_get_next(seq, v, NULL);
3426 if (rc)
3427 goto out;
3428 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3429 rc = pneigh_get_first(seq);
3430 } else {
3431 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3432 rc = pneigh_get_next(seq, v, NULL);
3433 }
3434 out:
3435 ++(*pos);
3436 return rc;
3437 }
3438 EXPORT_SYMBOL(neigh_seq_next);
3439
neigh_seq_stop(struct seq_file * seq,void * v)3440 void neigh_seq_stop(struct seq_file *seq, void *v)
3441 __releases(tbl->lock)
3442 __releases(rcu)
3443 {
3444 struct neigh_seq_state *state = seq->private;
3445 struct neigh_table *tbl = state->tbl;
3446
3447 read_unlock_bh(&tbl->lock);
3448 rcu_read_unlock();
3449 }
3450 EXPORT_SYMBOL(neigh_seq_stop);
3451
3452 /* statistics via seq_file */
3453
neigh_stat_seq_start(struct seq_file * seq,loff_t * pos)3454 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3455 {
3456 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3457 int cpu;
3458
3459 if (*pos == 0)
3460 return SEQ_START_TOKEN;
3461
3462 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3463 if (!cpu_possible(cpu))
3464 continue;
3465 *pos = cpu+1;
3466 return per_cpu_ptr(tbl->stats, cpu);
3467 }
3468 return NULL;
3469 }
3470
neigh_stat_seq_next(struct seq_file * seq,void * v,loff_t * pos)3471 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3472 {
3473 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3474 int cpu;
3475
3476 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3477 if (!cpu_possible(cpu))
3478 continue;
3479 *pos = cpu+1;
3480 return per_cpu_ptr(tbl->stats, cpu);
3481 }
3482 (*pos)++;
3483 return NULL;
3484 }
3485
neigh_stat_seq_stop(struct seq_file * seq,void * v)3486 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3487 {
3488
3489 }
3490
neigh_stat_seq_show(struct seq_file * seq,void * v)3491 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3492 {
3493 struct neigh_table *tbl = pde_data(file_inode(seq->file));
3494 struct neigh_statistics *st = v;
3495
3496 if (v == SEQ_START_TOKEN) {
3497 seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3498 return 0;
3499 }
3500
3501 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
3502 "%08lx %08lx %08lx "
3503 "%08lx %08lx %08lx\n",
3504 atomic_read(&tbl->entries),
3505
3506 st->allocs,
3507 st->destroys,
3508 st->hash_grows,
3509
3510 st->lookups,
3511 st->hits,
3512
3513 st->res_failed,
3514
3515 st->rcv_probes_mcast,
3516 st->rcv_probes_ucast,
3517
3518 st->periodic_gc_runs,
3519 st->forced_gc_runs,
3520 st->unres_discards,
3521 st->table_fulls
3522 );
3523
3524 return 0;
3525 }
3526
3527 static const struct seq_operations neigh_stat_seq_ops = {
3528 .start = neigh_stat_seq_start,
3529 .next = neigh_stat_seq_next,
3530 .stop = neigh_stat_seq_stop,
3531 .show = neigh_stat_seq_show,
3532 };
3533 #endif /* CONFIG_PROC_FS */
3534
__neigh_notify(struct neighbour * n,int type,int flags,u32 pid)3535 static void __neigh_notify(struct neighbour *n, int type, int flags,
3536 u32 pid)
3537 {
3538 struct sk_buff *skb;
3539 int err = -ENOBUFS;
3540 struct net *net;
3541
3542 rcu_read_lock();
3543 net = dev_net_rcu(n->dev);
3544 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3545 if (skb == NULL)
3546 goto errout;
3547
3548 err = neigh_fill_info(skb, n, pid, 0, type, flags);
3549 if (err < 0) {
3550 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3551 WARN_ON(err == -EMSGSIZE);
3552 kfree_skb(skb);
3553 goto errout;
3554 }
3555 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3556 goto out;
3557 errout:
3558 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3559 out:
3560 rcu_read_unlock();
3561 }
3562
neigh_app_ns(struct neighbour * n)3563 void neigh_app_ns(struct neighbour *n)
3564 {
3565 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3566 }
3567 EXPORT_SYMBOL(neigh_app_ns);
3568
3569 #ifdef CONFIG_SYSCTL
3570 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3571
proc_unres_qlen(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3572 static int proc_unres_qlen(const struct ctl_table *ctl, int write,
3573 void *buffer, size_t *lenp, loff_t *ppos)
3574 {
3575 int size, ret;
3576 struct ctl_table tmp = *ctl;
3577
3578 tmp.extra1 = SYSCTL_ZERO;
3579 tmp.extra2 = &unres_qlen_max;
3580 tmp.data = &size;
3581
3582 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3583 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3584
3585 if (write && !ret)
3586 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3587 return ret;
3588 }
3589
neigh_copy_dflt_parms(struct net * net,struct neigh_parms * p,int index)3590 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3591 int index)
3592 {
3593 struct net_device *dev;
3594 int family = neigh_parms_family(p);
3595
3596 rcu_read_lock();
3597 for_each_netdev_rcu(net, dev) {
3598 struct neigh_parms *dst_p =
3599 neigh_get_dev_parms_rcu(dev, family);
3600
3601 if (dst_p && !test_bit(index, dst_p->data_state))
3602 dst_p->data[index] = p->data[index];
3603 }
3604 rcu_read_unlock();
3605 }
3606
neigh_proc_update(const struct ctl_table * ctl,int write)3607 static void neigh_proc_update(const struct ctl_table *ctl, int write)
3608 {
3609 struct net_device *dev = ctl->extra1;
3610 struct neigh_parms *p = ctl->extra2;
3611 struct net *net = neigh_parms_net(p);
3612 int index = (int *) ctl->data - p->data;
3613
3614 if (!write)
3615 return;
3616
3617 set_bit(index, p->data_state);
3618 if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3619 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3620 if (!dev) /* NULL dev means this is default value */
3621 neigh_copy_dflt_parms(net, p, index);
3622 }
3623
neigh_proc_dointvec_zero_intmax(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3624 static int neigh_proc_dointvec_zero_intmax(const struct ctl_table *ctl, int write,
3625 void *buffer, size_t *lenp,
3626 loff_t *ppos)
3627 {
3628 struct ctl_table tmp = *ctl;
3629 int ret;
3630
3631 tmp.extra1 = SYSCTL_ZERO;
3632 tmp.extra2 = SYSCTL_INT_MAX;
3633
3634 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3635 neigh_proc_update(ctl, write);
3636 return ret;
3637 }
3638
neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3639 static int neigh_proc_dointvec_ms_jiffies_positive(const struct ctl_table *ctl, int write,
3640 void *buffer, size_t *lenp, loff_t *ppos)
3641 {
3642 struct ctl_table tmp = *ctl;
3643 int ret;
3644
3645 int min = msecs_to_jiffies(1);
3646
3647 tmp.extra1 = &min;
3648 tmp.extra2 = NULL;
3649
3650 ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3651 neigh_proc_update(ctl, write);
3652 return ret;
3653 }
3654
neigh_proc_dointvec(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3655 int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer,
3656 size_t *lenp, loff_t *ppos)
3657 {
3658 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3659
3660 neigh_proc_update(ctl, write);
3661 return ret;
3662 }
3663 EXPORT_SYMBOL(neigh_proc_dointvec);
3664
neigh_proc_dointvec_jiffies(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3665 int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer,
3666 size_t *lenp, loff_t *ppos)
3667 {
3668 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3669
3670 neigh_proc_update(ctl, write);
3671 return ret;
3672 }
3673 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3674
neigh_proc_dointvec_userhz_jiffies(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3675 static int neigh_proc_dointvec_userhz_jiffies(const struct ctl_table *ctl, int write,
3676 void *buffer, size_t *lenp,
3677 loff_t *ppos)
3678 {
3679 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3680
3681 neigh_proc_update(ctl, write);
3682 return ret;
3683 }
3684
neigh_proc_dointvec_ms_jiffies(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3685 int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write,
3686 void *buffer, size_t *lenp, loff_t *ppos)
3687 {
3688 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3689
3690 neigh_proc_update(ctl, write);
3691 return ret;
3692 }
3693 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3694
neigh_proc_dointvec_unres_qlen(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3695 static int neigh_proc_dointvec_unres_qlen(const struct ctl_table *ctl, int write,
3696 void *buffer, size_t *lenp,
3697 loff_t *ppos)
3698 {
3699 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3700
3701 neigh_proc_update(ctl, write);
3702 return ret;
3703 }
3704
neigh_proc_base_reachable_time(const struct ctl_table * ctl,int write,void * buffer,size_t * lenp,loff_t * ppos)3705 static int neigh_proc_base_reachable_time(const struct ctl_table *ctl, int write,
3706 void *buffer, size_t *lenp,
3707 loff_t *ppos)
3708 {
3709 struct neigh_parms *p = ctl->extra2;
3710 int ret;
3711
3712 if (strcmp(ctl->procname, "base_reachable_time") == 0)
3713 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3714 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3715 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3716 else
3717 ret = -1;
3718
3719 if (write && ret == 0) {
3720 /* update reachable_time as well, otherwise, the change will
3721 * only be effective after the next time neigh_periodic_work
3722 * decides to recompute it
3723 */
3724 p->reachable_time =
3725 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3726 }
3727 return ret;
3728 }
3729
3730 #define NEIGH_PARMS_DATA_OFFSET(index) \
3731 (&((struct neigh_parms *) 0)->data[index])
3732
3733 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3734 [NEIGH_VAR_ ## attr] = { \
3735 .procname = name, \
3736 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3737 .maxlen = sizeof(int), \
3738 .mode = mval, \
3739 .proc_handler = proc, \
3740 }
3741
3742 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3743 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3744
3745 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3746 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3747
3748 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3749 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3750
3751 #define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3752 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3753
3754 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3755 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3756
3757 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3758 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3759
3760 static struct neigh_sysctl_table {
3761 struct ctl_table_header *sysctl_header;
3762 struct ctl_table neigh_vars[NEIGH_VAR_MAX];
3763 } neigh_sysctl_template __read_mostly = {
3764 .neigh_vars = {
3765 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3766 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3767 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3768 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3769 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3770 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3771 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3772 NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3773 "interval_probe_time_ms"),
3774 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3775 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3776 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3777 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3778 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3779 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3780 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3781 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3782 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3783 [NEIGH_VAR_GC_INTERVAL] = {
3784 .procname = "gc_interval",
3785 .maxlen = sizeof(int),
3786 .mode = 0644,
3787 .proc_handler = proc_dointvec_jiffies,
3788 },
3789 [NEIGH_VAR_GC_THRESH1] = {
3790 .procname = "gc_thresh1",
3791 .maxlen = sizeof(int),
3792 .mode = 0644,
3793 .extra1 = SYSCTL_ZERO,
3794 .extra2 = SYSCTL_INT_MAX,
3795 .proc_handler = proc_dointvec_minmax,
3796 },
3797 [NEIGH_VAR_GC_THRESH2] = {
3798 .procname = "gc_thresh2",
3799 .maxlen = sizeof(int),
3800 .mode = 0644,
3801 .extra1 = SYSCTL_ZERO,
3802 .extra2 = SYSCTL_INT_MAX,
3803 .proc_handler = proc_dointvec_minmax,
3804 },
3805 [NEIGH_VAR_GC_THRESH3] = {
3806 .procname = "gc_thresh3",
3807 .maxlen = sizeof(int),
3808 .mode = 0644,
3809 .extra1 = SYSCTL_ZERO,
3810 .extra2 = SYSCTL_INT_MAX,
3811 .proc_handler = proc_dointvec_minmax,
3812 },
3813 },
3814 };
3815
neigh_sysctl_register(struct net_device * dev,struct neigh_parms * p,proc_handler * handler)3816 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3817 proc_handler *handler)
3818 {
3819 int i;
3820 struct neigh_sysctl_table *t;
3821 const char *dev_name_source;
3822 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3823 char *p_name;
3824 size_t neigh_vars_size;
3825
3826 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3827 if (!t)
3828 goto err;
3829
3830 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3831 t->neigh_vars[i].data += (long) p;
3832 t->neigh_vars[i].extra1 = dev;
3833 t->neigh_vars[i].extra2 = p;
3834 }
3835
3836 neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
3837 if (dev) {
3838 dev_name_source = dev->name;
3839 /* Terminate the table early */
3840 neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
3841 } else {
3842 struct neigh_table *tbl = p->tbl;
3843 dev_name_source = "default";
3844 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3845 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3846 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3847 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3848 }
3849
3850 if (handler) {
3851 /* RetransTime */
3852 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3853 /* ReachableTime */
3854 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3855 /* RetransTime (in milliseconds)*/
3856 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3857 /* ReachableTime (in milliseconds) */
3858 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3859 } else {
3860 /* Those handlers will update p->reachable_time after
3861 * base_reachable_time(_ms) is set to ensure the new timer starts being
3862 * applied after the next neighbour update instead of waiting for
3863 * neigh_periodic_work to update its value (can be multiple minutes)
3864 * So any handler that replaces them should do this as well
3865 */
3866 /* ReachableTime */
3867 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3868 neigh_proc_base_reachable_time;
3869 /* ReachableTime (in milliseconds) */
3870 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3871 neigh_proc_base_reachable_time;
3872 }
3873
3874 switch (neigh_parms_family(p)) {
3875 case AF_INET:
3876 p_name = "ipv4";
3877 break;
3878 case AF_INET6:
3879 p_name = "ipv6";
3880 break;
3881 default:
3882 BUG();
3883 }
3884
3885 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3886 p_name, dev_name_source);
3887 t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
3888 neigh_path, t->neigh_vars,
3889 neigh_vars_size);
3890 if (!t->sysctl_header)
3891 goto free;
3892
3893 p->sysctl_table = t;
3894 return 0;
3895
3896 free:
3897 kfree(t);
3898 err:
3899 return -ENOBUFS;
3900 }
3901 EXPORT_SYMBOL(neigh_sysctl_register);
3902
neigh_sysctl_unregister(struct neigh_parms * p)3903 void neigh_sysctl_unregister(struct neigh_parms *p)
3904 {
3905 if (p->sysctl_table) {
3906 struct neigh_sysctl_table *t = p->sysctl_table;
3907 p->sysctl_table = NULL;
3908 unregister_net_sysctl_table(t->sysctl_header);
3909 kfree(t);
3910 }
3911 }
3912 EXPORT_SYMBOL(neigh_sysctl_unregister);
3913
3914 #endif /* CONFIG_SYSCTL */
3915
3916 static const struct rtnl_msg_handler neigh_rtnl_msg_handlers[] __initconst = {
3917 {.msgtype = RTM_NEWNEIGH, .doit = neigh_add},
3918 {.msgtype = RTM_DELNEIGH, .doit = neigh_delete},
3919 {.msgtype = RTM_GETNEIGH, .doit = neigh_get, .dumpit = neigh_dump_info,
3920 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
3921 {.msgtype = RTM_GETNEIGHTBL, .dumpit = neightbl_dump_info},
3922 {.msgtype = RTM_SETNEIGHTBL, .doit = neightbl_set},
3923 };
3924
neigh_init(void)3925 static int __init neigh_init(void)
3926 {
3927 rtnl_register_many(neigh_rtnl_msg_handlers);
3928 return 0;
3929 }
3930
3931 subsys_initcall(neigh_init);
3932