1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * IP multicast routing support for mrouted 3.6/3.8
4 *
5 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 * Linux Consultancy and Custom Driver Development
7 *
8 * Fixes:
9 * Michael Chastain : Incorrect size of copying.
10 * Alan Cox : Added the cache manager code
11 * Alan Cox : Fixed the clone/copy bug and device race.
12 * Mike McLagan : Routing by source
13 * Malcolm Beattie : Buffer handling fixes.
14 * Alexey Kuznetsov : Double buffer free and other fixes.
15 * SVR Anand : Fixed several multicast bugs and problems.
16 * Alexey Kuznetsov : Status, optimisations and more.
17 * Brad Parker : Better behaviour on mrouted upcall
18 * overflow.
19 * Carlos Picoto : PIMv1 Support
20 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
21 * Relax this requirement to work with older peers.
22 */
23
24 #include <linux/uaccess.h>
25 #include <linux/types.h>
26 #include <linux/cache.h>
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/mm.h>
30 #include <linux/kernel.h>
31 #include <linux/fcntl.h>
32 #include <linux/stat.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/inet.h>
36 #include <linux/netdevice.h>
37 #include <linux/inetdevice.h>
38 #include <linux/igmp.h>
39 #include <linux/proc_fs.h>
40 #include <linux/seq_file.h>
41 #include <linux/mroute.h>
42 #include <linux/init.h>
43 #include <linux/if_ether.h>
44 #include <linux/slab.h>
45 #include <net/flow.h>
46 #include <net/net_namespace.h>
47 #include <net/ip.h>
48 #include <net/protocol.h>
49 #include <linux/skbuff.h>
50 #include <net/route.h>
51 #include <net/icmp.h>
52 #include <net/udp.h>
53 #include <net/raw.h>
54 #include <linux/notifier.h>
55 #include <linux/if_arp.h>
56 #include <linux/netfilter_ipv4.h>
57 #include <linux/compat.h>
58 #include <linux/export.h>
59 #include <linux/rhashtable.h>
60 #include <net/ip_tunnels.h>
61 #include <net/checksum.h>
62 #include <net/netlink.h>
63 #include <net/fib_rules.h>
64 #include <linux/netconf.h>
65 #include <net/rtnh.h>
66 #include <net/inet_dscp.h>
67
68 #include <linux/nospec.h>
69
70 struct ipmr_rule {
71 struct fib_rule common;
72 };
73
74 struct ipmr_result {
75 struct mr_table *mrt;
76 };
77
78 /* Big lock, protecting vif table, mrt cache and mroute socket state.
79 * Note that the changes are semaphored via rtnl_lock.
80 */
81
82 static DEFINE_SPINLOCK(mrt_lock);
83
vif_dev_read(const struct vif_device * vif)84 static struct net_device *vif_dev_read(const struct vif_device *vif)
85 {
86 return rcu_dereference(vif->dev);
87 }
88
89 /* Multicast router control variables */
90
91 /* Special spinlock for queue of unresolved entries */
92 static DEFINE_SPINLOCK(mfc_unres_lock);
93
94 /* We return to original Alan's scheme. Hash table of resolved
95 * entries is changed only in process context and protected
96 * with weak lock mrt_lock. Queue of unresolved entries is protected
97 * with strong spinlock mfc_unres_lock.
98 *
99 * In this case data path is free of exclusive locks at all.
100 */
101
102 static struct kmem_cache *mrt_cachep __ro_after_init;
103
104 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
105 static void ipmr_free_table(struct mr_table *mrt,
106 struct list_head *dev_kill_list);
107
108 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
109 struct net_device *dev, struct sk_buff *skb,
110 struct mfc_cache *cache, int local);
111 static int ipmr_cache_report(const struct mr_table *mrt,
112 struct sk_buff *pkt, vifi_t vifi, int assert);
113 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
114 int cmd);
115 static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
116 static void mroute_clean_tables(struct mr_table *mrt, int flags,
117 struct list_head *dev_kill_list);
118 static void ipmr_expire_process(struct timer_list *t);
119
120 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
121 #define ipmr_for_each_table(mrt, net) \
122 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \
123 lockdep_rtnl_is_held() || \
124 list_empty(&net->ipv4.mr_tables))
125
ipmr_mr_table_iter(struct net * net,struct mr_table * mrt)126 static struct mr_table *ipmr_mr_table_iter(struct net *net,
127 struct mr_table *mrt)
128 {
129 struct mr_table *ret;
130
131 if (!mrt)
132 ret = list_entry_rcu(net->ipv4.mr_tables.next,
133 struct mr_table, list);
134 else
135 ret = list_entry_rcu(mrt->list.next,
136 struct mr_table, list);
137
138 if (&ret->list == &net->ipv4.mr_tables)
139 return NULL;
140 return ret;
141 }
142
__ipmr_get_table(struct net * net,u32 id)143 static struct mr_table *__ipmr_get_table(struct net *net, u32 id)
144 {
145 struct mr_table *mrt;
146
147 ipmr_for_each_table(mrt, net) {
148 if (mrt->id == id)
149 return mrt;
150 }
151 return NULL;
152 }
153
ipmr_fib_lookup(struct net * net,struct flowi4 * flp4,struct mr_table ** mrt)154 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
155 struct mr_table **mrt)
156 {
157 int err;
158 struct ipmr_result res;
159 struct fib_lookup_arg arg = {
160 .result = &res,
161 .flags = FIB_LOOKUP_NOREF,
162 };
163
164 /* update flow if oif or iif point to device enslaved to l3mdev */
165 l3mdev_update_flow(net, flowi4_to_flowi(flp4));
166
167 err = fib_rules_lookup(net->ipv4.mr_rules_ops,
168 flowi4_to_flowi(flp4), 0, &arg);
169 if (err < 0)
170 return err;
171 *mrt = res.mrt;
172 return 0;
173 }
174
ipmr_rule_action(struct fib_rule * rule,struct flowi * flp,int flags,struct fib_lookup_arg * arg)175 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
176 int flags, struct fib_lookup_arg *arg)
177 {
178 struct ipmr_result *res = arg->result;
179 struct mr_table *mrt;
180
181 switch (rule->action) {
182 case FR_ACT_TO_TBL:
183 break;
184 case FR_ACT_UNREACHABLE:
185 return -ENETUNREACH;
186 case FR_ACT_PROHIBIT:
187 return -EACCES;
188 case FR_ACT_BLACKHOLE:
189 default:
190 return -EINVAL;
191 }
192
193 arg->table = fib_rule_get_table(rule, arg);
194
195 mrt = __ipmr_get_table(rule->fr_net, arg->table);
196 if (!mrt)
197 return -EAGAIN;
198 res->mrt = mrt;
199 return 0;
200 }
201
ipmr_rule_match(struct fib_rule * rule,struct flowi * fl,int flags)202 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
203 {
204 return 1;
205 }
206
ipmr_rule_configure(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh,struct nlattr ** tb,struct netlink_ext_ack * extack)207 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
208 struct fib_rule_hdr *frh, struct nlattr **tb,
209 struct netlink_ext_ack *extack)
210 {
211 return 0;
212 }
213
ipmr_rule_compare(struct fib_rule * rule,struct fib_rule_hdr * frh,struct nlattr ** tb)214 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
215 struct nlattr **tb)
216 {
217 return 1;
218 }
219
ipmr_rule_fill(struct fib_rule * rule,struct sk_buff * skb,struct fib_rule_hdr * frh)220 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
221 struct fib_rule_hdr *frh)
222 {
223 frh->dst_len = 0;
224 frh->src_len = 0;
225 frh->tos = 0;
226 return 0;
227 }
228
229 static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = {
230 .family = RTNL_FAMILY_IPMR,
231 .rule_size = sizeof(struct ipmr_rule),
232 .addr_size = sizeof(u32),
233 .action = ipmr_rule_action,
234 .match = ipmr_rule_match,
235 .configure = ipmr_rule_configure,
236 .compare = ipmr_rule_compare,
237 .fill = ipmr_rule_fill,
238 .nlgroup = RTNLGRP_IPV4_RULE,
239 .owner = THIS_MODULE,
240 };
241
ipmr_rules_init(struct net * net)242 static int __net_init ipmr_rules_init(struct net *net)
243 {
244 struct fib_rules_ops *ops;
245 LIST_HEAD(dev_kill_list);
246 struct mr_table *mrt;
247 int err;
248
249 ops = fib_rules_register(&ipmr_rules_ops_template, net);
250 if (IS_ERR(ops))
251 return PTR_ERR(ops);
252
253 INIT_LIST_HEAD(&net->ipv4.mr_tables);
254
255 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
256 if (IS_ERR(mrt)) {
257 err = PTR_ERR(mrt);
258 goto err1;
259 }
260
261 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT);
262 if (err < 0)
263 goto err2;
264
265 net->ipv4.mr_rules_ops = ops;
266 return 0;
267
268 err2:
269 ipmr_free_table(mrt, &dev_kill_list);
270 err1:
271 fib_rules_unregister(ops);
272 return err;
273 }
274
ipmr_rules_exit(struct net * net)275 static void __net_exit ipmr_rules_exit(struct net *net)
276 {
277 fib_rules_unregister(net->ipv4.mr_rules_ops);
278 }
279
ipmr_rules_exit_rtnl(struct net * net,struct list_head * dev_kill_list)280 static void __net_exit ipmr_rules_exit_rtnl(struct net *net,
281 struct list_head *dev_kill_list)
282 {
283 struct mr_table *mrt, *next;
284
285 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
286 list_del_rcu(&mrt->list);
287 ipmr_free_table(mrt, dev_kill_list);
288 }
289 }
290
ipmr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)291 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
292 struct netlink_ext_ack *extack)
293 {
294 return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack);
295 }
296
ipmr_rules_seq_read(const struct net * net)297 static unsigned int ipmr_rules_seq_read(const struct net *net)
298 {
299 return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
300 }
301
ipmr_rule_default(const struct fib_rule * rule)302 bool ipmr_rule_default(const struct fib_rule *rule)
303 {
304 return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
305 }
306 EXPORT_SYMBOL(ipmr_rule_default);
307 #else
ipmr_mr_table_iter(struct net * net,struct mr_table * mrt)308 static struct mr_table *ipmr_mr_table_iter(struct net *net,
309 struct mr_table *mrt)
310 {
311 if (!mrt)
312 return rcu_dereference(net->ipv4.mrt);
313 return NULL;
314 }
315
__ipmr_get_table(struct net * net,u32 id)316 static struct mr_table *__ipmr_get_table(struct net *net, u32 id)
317 {
318 return rcu_dereference_check(net->ipv4.mrt,
319 lockdep_rtnl_is_held() ||
320 !rcu_access_pointer(net->ipv4.mrt));
321 }
322
323 #define ipmr_for_each_table(mrt, net) \
324 for (mrt = __ipmr_get_table(net, 0); mrt; mrt = NULL)
325
ipmr_fib_lookup(struct net * net,struct flowi4 * flp4,struct mr_table ** mrt)326 static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
327 struct mr_table **mrt)
328 {
329 *mrt = rcu_dereference(net->ipv4.mrt);
330 if (!*mrt)
331 return -EAGAIN;
332 return 0;
333 }
334
ipmr_rules_init(struct net * net)335 static int __net_init ipmr_rules_init(struct net *net)
336 {
337 struct mr_table *mrt;
338
339 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
340 if (IS_ERR(mrt))
341 return PTR_ERR(mrt);
342
343 rcu_assign_pointer(net->ipv4.mrt, mrt);
344 return 0;
345 }
346
ipmr_rules_exit(struct net * net)347 static void __net_exit ipmr_rules_exit(struct net *net)
348 {
349 }
350
ipmr_rules_exit_rtnl(struct net * net,struct list_head * dev_kill_list)351 static void __net_exit ipmr_rules_exit_rtnl(struct net *net,
352 struct list_head *dev_kill_list)
353 {
354 struct mr_table *mrt = rcu_dereference_protected(net->ipv4.mrt, 1);
355
356 RCU_INIT_POINTER(net->ipv4.mrt, NULL);
357 ipmr_free_table(mrt, dev_kill_list);
358 }
359
ipmr_rules_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)360 static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
361 struct netlink_ext_ack *extack)
362 {
363 return 0;
364 }
365
ipmr_rules_seq_read(const struct net * net)366 static unsigned int ipmr_rules_seq_read(const struct net *net)
367 {
368 return 0;
369 }
370
ipmr_rule_default(const struct fib_rule * rule)371 bool ipmr_rule_default(const struct fib_rule *rule)
372 {
373 return true;
374 }
375 EXPORT_SYMBOL(ipmr_rule_default);
376 #endif
377
ipmr_get_table(struct net * net,u32 id)378 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
379 {
380 struct mr_table *mrt;
381
382 rcu_read_lock();
383 mrt = __ipmr_get_table(net, id);
384 rcu_read_unlock();
385
386 return mrt;
387 }
388
ipmr_hash_cmp(struct rhashtable_compare_arg * arg,const void * ptr)389 static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
390 const void *ptr)
391 {
392 const struct mfc_cache_cmp_arg *cmparg = arg->key;
393 const struct mfc_cache *c = ptr;
394
395 return cmparg->mfc_mcastgrp != c->mfc_mcastgrp ||
396 cmparg->mfc_origin != c->mfc_origin;
397 }
398
399 static const struct rhashtable_params ipmr_rht_params = {
400 .head_offset = offsetof(struct mr_mfc, mnode),
401 .key_offset = offsetof(struct mfc_cache, cmparg),
402 .key_len = sizeof(struct mfc_cache_cmp_arg),
403 .nelem_hint = 3,
404 .obj_cmpfn = ipmr_hash_cmp,
405 .automatic_shrinking = true,
406 };
407
ipmr_new_table_set(struct mr_table * mrt,struct net * net)408 static void ipmr_new_table_set(struct mr_table *mrt,
409 struct net *net)
410 {
411 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
412 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
413 #endif
414 }
415
416 static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
417 .mfc_mcastgrp = htonl(INADDR_ANY),
418 .mfc_origin = htonl(INADDR_ANY),
419 };
420
421 static struct mr_table_ops ipmr_mr_table_ops = {
422 .rht_params = &ipmr_rht_params,
423 .cmparg_any = &ipmr_mr_table_ops_cmparg_any,
424 };
425
ipmr_new_table(struct net * net,u32 id)426 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
427 {
428 struct mr_table *mrt;
429
430 /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
431 if (id != RT_TABLE_DEFAULT && id >= 1000000000)
432 return ERR_PTR(-EINVAL);
433
434 mrt = __ipmr_get_table(net, id);
435 if (mrt)
436 return mrt;
437
438 return mr_table_alloc(net, id, &ipmr_mr_table_ops,
439 ipmr_expire_process, ipmr_new_table_set);
440 }
441
ipmr_free_table(struct mr_table * mrt,struct list_head * dev_kill_list)442 static void ipmr_free_table(struct mr_table *mrt, struct list_head *dev_kill_list)
443 {
444 struct net *net = read_pnet(&mrt->net);
445 LIST_HEAD(ipmr_dev_kill_list);
446
447 WARN_ON_ONCE(!mr_can_free_table(net));
448
449 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
450 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC,
451 &ipmr_dev_kill_list);
452 timer_shutdown_sync(&mrt->ipmr_expire_timer);
453 mr_table_free(mrt);
454
455 WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ipmr_dev_kill_list));
456 list_splice(&ipmr_dev_kill_list, dev_kill_list);
457 }
458
459 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
460
461 /* Initialize ipmr pimreg/tunnel in_device */
ipmr_init_vif_indev(const struct net_device * dev)462 static bool ipmr_init_vif_indev(const struct net_device *dev)
463 {
464 struct in_device *in_dev;
465
466 ASSERT_RTNL();
467
468 in_dev = __in_dev_get_rtnl(dev);
469 if (!in_dev)
470 return false;
471 ipv4_devconf_setall(in_dev);
472 neigh_parms_data_state_setall(in_dev->arp_parms);
473 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
474
475 return true;
476 }
477
ipmr_new_tunnel(struct net * net,struct vifctl * v)478 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
479 {
480 struct net_device *tunnel_dev, *new_dev;
481 struct ip_tunnel_parm_kern p = { };
482 int err;
483
484 tunnel_dev = __dev_get_by_name(net, "tunl0");
485 if (!tunnel_dev)
486 goto out;
487
488 p.iph.daddr = v->vifc_rmt_addr.s_addr;
489 p.iph.saddr = v->vifc_lcl_addr.s_addr;
490 p.iph.version = 4;
491 p.iph.ihl = 5;
492 p.iph.protocol = IPPROTO_IPIP;
493 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
494
495 if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl)
496 goto out;
497 err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p,
498 SIOCADDTUNNEL);
499 if (err)
500 goto out;
501
502 new_dev = __dev_get_by_name(net, p.name);
503 if (!new_dev)
504 goto out;
505
506 new_dev->flags |= IFF_MULTICAST;
507 if (!ipmr_init_vif_indev(new_dev))
508 goto out_unregister;
509 if (dev_open(new_dev, NULL))
510 goto out_unregister;
511 dev_hold(new_dev);
512 err = dev_set_allmulti(new_dev, 1);
513 if (err) {
514 dev_close(new_dev);
515 tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p,
516 SIOCDELTUNNEL);
517 dev_put(new_dev);
518 new_dev = ERR_PTR(err);
519 }
520 return new_dev;
521
522 out_unregister:
523 unregister_netdevice(new_dev);
524 out:
525 return ERR_PTR(-ENOBUFS);
526 }
527
528 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
reg_vif_xmit(struct sk_buff * skb,struct net_device * dev)529 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
530 {
531 struct net *net = dev_net(dev);
532 struct mr_table *mrt;
533 struct flowi4 fl4 = {
534 .flowi4_oif = dev->ifindex,
535 .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
536 .flowi4_mark = skb->mark,
537 };
538 int err;
539
540 rcu_read_lock();
541 err = ipmr_fib_lookup(net, &fl4, &mrt);
542 if (err < 0) {
543 rcu_read_unlock();
544 kfree_skb(skb);
545 return err;
546 }
547
548 DEV_STATS_ADD(dev, tx_bytes, skb->len);
549 DEV_STATS_INC(dev, tx_packets);
550
551 /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */
552 ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
553 IGMPMSG_WHOLEPKT);
554
555 rcu_read_unlock();
556 kfree_skb(skb);
557 return NETDEV_TX_OK;
558 }
559
reg_vif_get_iflink(const struct net_device * dev)560 static int reg_vif_get_iflink(const struct net_device *dev)
561 {
562 return 0;
563 }
564
565 static const struct net_device_ops reg_vif_netdev_ops = {
566 .ndo_start_xmit = reg_vif_xmit,
567 .ndo_get_iflink = reg_vif_get_iflink,
568 };
569
reg_vif_setup(struct net_device * dev)570 static void reg_vif_setup(struct net_device *dev)
571 {
572 dev->type = ARPHRD_PIMREG;
573 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
574 dev->flags = IFF_NOARP;
575 dev->netdev_ops = ®_vif_netdev_ops;
576 dev->needs_free_netdev = true;
577 dev->netns_immutable = true;
578 }
579
ipmr_reg_vif(struct net * net,struct mr_table * mrt)580 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
581 {
582 struct net_device *dev;
583 char name[IFNAMSIZ];
584
585 if (mrt->id == RT_TABLE_DEFAULT)
586 sprintf(name, "pimreg");
587 else
588 sprintf(name, "pimreg%u", mrt->id);
589
590 dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
591
592 if (!dev)
593 return NULL;
594
595 dev_net_set(dev, net);
596
597 if (register_netdevice(dev)) {
598 free_netdev(dev);
599 return NULL;
600 }
601
602 if (!ipmr_init_vif_indev(dev))
603 goto failure;
604 if (dev_open(dev, NULL))
605 goto failure;
606
607 dev_hold(dev);
608
609 return dev;
610
611 failure:
612 unregister_netdevice(dev);
613 return NULL;
614 }
615
616 /* called with rcu_read_lock() */
__pim_rcv(struct mr_table * mrt,struct sk_buff * skb,unsigned int pimlen)617 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
618 unsigned int pimlen)
619 {
620 struct net_device *reg_dev = NULL;
621 struct iphdr *encap;
622 int vif_num;
623
624 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
625 /* Check that:
626 * a. packet is really sent to a multicast group
627 * b. packet is not a NULL-REGISTER
628 * c. packet is not truncated
629 */
630 if (!ipv4_is_multicast(encap->daddr) ||
631 encap->tot_len == 0 ||
632 ntohs(encap->tot_len) + pimlen > skb->len)
633 return 1;
634
635 /* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */
636 vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
637 if (vif_num >= 0)
638 reg_dev = vif_dev_read(&mrt->vif_table[vif_num]);
639 if (!reg_dev)
640 return 1;
641
642 skb->mac_header = skb->network_header;
643 skb_pull(skb, (u8 *)encap - skb->data);
644 skb_reset_network_header(skb);
645 skb->protocol = htons(ETH_P_IP);
646 skb->ip_summed = CHECKSUM_NONE;
647
648 skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
649
650 netif_rx(skb);
651
652 return NET_RX_SUCCESS;
653 }
654 #else
ipmr_reg_vif(struct net * net,struct mr_table * mrt)655 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
656 {
657 return NULL;
658 }
659 #endif
660
call_ipmr_vif_entry_notifiers(struct net * net,enum fib_event_type event_type,struct vif_device * vif,struct net_device * vif_dev,vifi_t vif_index,u32 tb_id)661 static int call_ipmr_vif_entry_notifiers(struct net *net,
662 enum fib_event_type event_type,
663 struct vif_device *vif,
664 struct net_device *vif_dev,
665 vifi_t vif_index, u32 tb_id)
666 {
667 return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
668 vif, vif_dev, vif_index, tb_id,
669 &net->ipv4.ipmr_seq);
670 }
671
call_ipmr_mfc_entry_notifiers(struct net * net,enum fib_event_type event_type,struct mfc_cache * mfc,u32 tb_id)672 static int call_ipmr_mfc_entry_notifiers(struct net *net,
673 enum fib_event_type event_type,
674 struct mfc_cache *mfc, u32 tb_id)
675 {
676 return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type,
677 &mfc->_c, tb_id, &net->ipv4.ipmr_seq);
678 }
679
680 /**
681 * vif_delete - Delete a VIF entry
682 * @mrt: Table to delete from
683 * @vifi: VIF identifier to delete
684 * @notify: Set to 1, if the caller is a notifier_call
685 * @head: if unregistering the VIF, place it on this queue
686 */
vif_delete(struct mr_table * mrt,int vifi,int notify,struct list_head * head)687 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
688 struct list_head *head)
689 {
690 struct net *net = read_pnet(&mrt->net);
691 struct vif_device *v;
692 struct net_device *dev;
693 struct in_device *in_dev;
694
695 if (vifi < 0 || vifi >= mrt->maxvif)
696 return -EADDRNOTAVAIL;
697
698 v = &mrt->vif_table[vifi];
699
700 dev = rtnl_dereference(v->dev);
701 if (!dev)
702 return -EADDRNOTAVAIL;
703
704 spin_lock(&mrt_lock);
705 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev,
706 vifi, mrt->id);
707 RCU_INIT_POINTER(v->dev, NULL);
708
709 if (vifi == mrt->mroute_reg_vif_num) {
710 /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */
711 WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
712 }
713 if (vifi + 1 == mrt->maxvif) {
714 int tmp;
715
716 for (tmp = vifi - 1; tmp >= 0; tmp--) {
717 if (VIF_EXISTS(mrt, tmp))
718 break;
719 }
720 WRITE_ONCE(mrt->maxvif, tmp + 1);
721 }
722
723 spin_unlock(&mrt_lock);
724
725 dev_set_allmulti(dev, -1);
726
727 in_dev = __in_dev_get_rtnl(dev);
728 if (in_dev) {
729 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
730 inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
731 NETCONFA_MC_FORWARDING,
732 dev->ifindex, &in_dev->cnf);
733 ip_rt_multicast_event(in_dev);
734 }
735
736 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
737 unregister_netdevice_queue(dev, head);
738
739 netdev_put(dev, &v->dev_tracker);
740 return 0;
741 }
742
ipmr_cache_free_rcu(struct rcu_head * head)743 static void ipmr_cache_free_rcu(struct rcu_head *head)
744 {
745 struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
746
747 kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
748 }
749
ipmr_cache_free(struct mfc_cache * c)750 static void ipmr_cache_free(struct mfc_cache *c)
751 {
752 call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
753 }
754
755 /* Destroy an unresolved cache entry, killing queued skbs
756 * and reporting error to netlink readers.
757 */
ipmr_destroy_unres(struct mr_table * mrt,struct mfc_cache * c)758 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
759 {
760 struct net *net = read_pnet(&mrt->net);
761 struct sk_buff *skb;
762 struct nlmsgerr *e;
763
764 atomic_dec(&mrt->cache_resolve_queue_len);
765
766 while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
767 if (ip_hdr(skb)->version == 0) {
768 struct nlmsghdr *nlh = skb_pull(skb,
769 sizeof(struct iphdr));
770 nlh->nlmsg_type = NLMSG_ERROR;
771 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
772 skb_trim(skb, nlh->nlmsg_len);
773 e = nlmsg_data(nlh);
774 e->error = -ETIMEDOUT;
775 memset(&e->msg, 0, sizeof(e->msg));
776
777 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
778 } else {
779 kfree_skb(skb);
780 }
781 }
782
783 ipmr_cache_free(c);
784 }
785
786 /* Timer process for the unresolved queue. */
ipmr_expire_process(struct timer_list * t)787 static void ipmr_expire_process(struct timer_list *t)
788 {
789 struct mr_table *mrt = timer_container_of(mrt, t, ipmr_expire_timer);
790 struct mr_mfc *c, *next;
791 unsigned long expires;
792 unsigned long now;
793
794 if (!spin_trylock(&mfc_unres_lock)) {
795 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
796 return;
797 }
798
799 if (list_empty(&mrt->mfc_unres_queue))
800 goto out;
801
802 now = jiffies;
803 expires = 10*HZ;
804
805 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
806 if (time_after(c->mfc_un.unres.expires, now)) {
807 unsigned long interval = c->mfc_un.unres.expires - now;
808 if (interval < expires)
809 expires = interval;
810 continue;
811 }
812
813 list_del(&c->list);
814 mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
815 ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
816 }
817
818 if (!list_empty(&mrt->mfc_unres_queue))
819 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
820
821 out:
822 spin_unlock(&mfc_unres_lock);
823 }
824
825 /* Fill oifs list. It is called under locked mrt_lock. */
ipmr_update_thresholds(struct mr_table * mrt,struct mr_mfc * cache,unsigned char * ttls)826 static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
827 unsigned char *ttls)
828 {
829 int vifi;
830
831 cache->mfc_un.res.minvif = MAXVIFS;
832 cache->mfc_un.res.maxvif = 0;
833 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
834
835 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
836 if (VIF_EXISTS(mrt, vifi) &&
837 ttls[vifi] && ttls[vifi] < 255) {
838 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
839 if (cache->mfc_un.res.minvif > vifi)
840 cache->mfc_un.res.minvif = vifi;
841 if (cache->mfc_un.res.maxvif <= vifi)
842 cache->mfc_un.res.maxvif = vifi + 1;
843 }
844 }
845 WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
846 }
847
vif_add(struct net * net,struct mr_table * mrt,struct vifctl * vifc,int mrtsock)848 static int vif_add(struct net *net, struct mr_table *mrt,
849 struct vifctl *vifc, int mrtsock)
850 {
851 struct netdev_phys_item_id ppid = { };
852 int vifi = vifc->vifc_vifi;
853 struct vif_device *v = &mrt->vif_table[vifi];
854 struct net_device *dev;
855 struct in_device *in_dev;
856 int err;
857
858 /* Is vif busy ? */
859 if (VIF_EXISTS(mrt, vifi))
860 return -EADDRINUSE;
861
862 switch (vifc->vifc_flags) {
863 case VIFF_REGISTER:
864 if (!ipmr_pimsm_enabled())
865 return -EINVAL;
866 /* Special Purpose VIF in PIM
867 * All the packets will be sent to the daemon
868 */
869 if (mrt->mroute_reg_vif_num >= 0)
870 return -EADDRINUSE;
871 dev = ipmr_reg_vif(net, mrt);
872 if (!dev)
873 return -ENOBUFS;
874 err = dev_set_allmulti(dev, 1);
875 if (err) {
876 unregister_netdevice(dev);
877 dev_put(dev);
878 return err;
879 }
880 break;
881 case VIFF_TUNNEL:
882 dev = ipmr_new_tunnel(net, vifc);
883 if (IS_ERR(dev))
884 return PTR_ERR(dev);
885 break;
886 case VIFF_USE_IFINDEX:
887 case 0:
888 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
889 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
890 if (dev && !__in_dev_get_rtnl(dev)) {
891 dev_put(dev);
892 return -EADDRNOTAVAIL;
893 }
894 } else {
895 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
896 }
897 if (!dev)
898 return -EADDRNOTAVAIL;
899 err = dev_set_allmulti(dev, 1);
900 if (err) {
901 dev_put(dev);
902 return err;
903 }
904 break;
905 default:
906 return -EINVAL;
907 }
908
909 in_dev = __in_dev_get_rtnl(dev);
910 if (!in_dev) {
911 dev_put(dev);
912 return -EADDRNOTAVAIL;
913 }
914 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
915 inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING,
916 dev->ifindex, &in_dev->cnf);
917 ip_rt_multicast_event(in_dev);
918
919 /* Fill in the VIF structures */
920 vif_device_init(v, dev, vifc->vifc_rate_limit,
921 vifc->vifc_threshold,
922 vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
923 (VIFF_TUNNEL | VIFF_REGISTER));
924
925 err = netif_get_port_parent_id(dev, &ppid, true);
926 if (err == 0) {
927 memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len);
928 v->dev_parent_id.id_len = ppid.id_len;
929 } else {
930 v->dev_parent_id.id_len = 0;
931 }
932
933 v->local = vifc->vifc_lcl_addr.s_addr;
934 v->remote = vifc->vifc_rmt_addr.s_addr;
935
936 /* And finish update writing critical data */
937 spin_lock(&mrt_lock);
938 rcu_assign_pointer(v->dev, dev);
939 netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
940 if (v->flags & VIFF_REGISTER) {
941 /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */
942 WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
943 }
944 if (vifi+1 > mrt->maxvif)
945 WRITE_ONCE(mrt->maxvif, vifi + 1);
946 spin_unlock(&mrt_lock);
947 call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev,
948 vifi, mrt->id);
949 return 0;
950 }
951
952 /* called with rcu_read_lock() */
ipmr_cache_find(struct mr_table * mrt,__be32 origin,__be32 mcastgrp)953 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
954 __be32 origin,
955 __be32 mcastgrp)
956 {
957 struct mfc_cache_cmp_arg arg = {
958 .mfc_mcastgrp = mcastgrp,
959 .mfc_origin = origin
960 };
961
962 return mr_mfc_find(mrt, &arg);
963 }
964
965 /* Look for a (*,G) entry */
ipmr_cache_find_any(struct mr_table * mrt,__be32 mcastgrp,int vifi)966 static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
967 __be32 mcastgrp, int vifi)
968 {
969 struct mfc_cache_cmp_arg arg = {
970 .mfc_mcastgrp = mcastgrp,
971 .mfc_origin = htonl(INADDR_ANY)
972 };
973
974 if (mcastgrp == htonl(INADDR_ANY))
975 return mr_mfc_find_any_parent(mrt, vifi);
976 return mr_mfc_find_any(mrt, vifi, &arg);
977 }
978
979 /* Look for a (S,G,iif) entry if parent != -1 */
ipmr_cache_find_parent(struct mr_table * mrt,__be32 origin,__be32 mcastgrp,int parent)980 static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
981 __be32 origin, __be32 mcastgrp,
982 int parent)
983 {
984 struct mfc_cache_cmp_arg arg = {
985 .mfc_mcastgrp = mcastgrp,
986 .mfc_origin = origin,
987 };
988
989 return mr_mfc_find_parent(mrt, &arg, parent);
990 }
991
992 /* Allocate a multicast cache entry */
ipmr_cache_alloc(void)993 static struct mfc_cache *ipmr_cache_alloc(void)
994 {
995 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
996
997 if (c) {
998 c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
999 c->_c.mfc_un.res.minvif = MAXVIFS;
1000 c->_c.free = ipmr_cache_free_rcu;
1001 refcount_set(&c->_c.mfc_un.res.refcount, 1);
1002 }
1003 return c;
1004 }
1005
ipmr_cache_alloc_unres(void)1006 static struct mfc_cache *ipmr_cache_alloc_unres(void)
1007 {
1008 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1009
1010 if (c) {
1011 skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
1012 c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
1013 }
1014 return c;
1015 }
1016
1017 /* A cache entry has gone into a resolved state from queued */
ipmr_cache_resolve(struct net * net,struct mr_table * mrt,struct mfc_cache * uc,struct mfc_cache * c)1018 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
1019 struct mfc_cache *uc, struct mfc_cache *c)
1020 {
1021 struct sk_buff *skb;
1022 struct nlmsgerr *e;
1023
1024 /* Play the pending entries through our router */
1025 while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
1026 if (ip_hdr(skb)->version == 0) {
1027 struct nlmsghdr *nlh = skb_pull(skb,
1028 sizeof(struct iphdr));
1029
1030 if (mr_fill_mroute(mrt, skb, &c->_c,
1031 nlmsg_data(nlh)) > 0) {
1032 nlh->nlmsg_len = skb_tail_pointer(skb) -
1033 (u8 *)nlh;
1034 } else {
1035 nlh->nlmsg_type = NLMSG_ERROR;
1036 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1037 skb_trim(skb, nlh->nlmsg_len);
1038 e = nlmsg_data(nlh);
1039 e->error = -EMSGSIZE;
1040 memset(&e->msg, 0, sizeof(e->msg));
1041 }
1042
1043 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1044 } else {
1045 rcu_read_lock();
1046 ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
1047 rcu_read_unlock();
1048 }
1049 }
1050 }
1051
1052 /* Bounce a cache query up to mrouted and netlink.
1053 *
1054 * Called under rcu_read_lock().
1055 */
ipmr_cache_report(const struct mr_table * mrt,struct sk_buff * pkt,vifi_t vifi,int assert)1056 static int ipmr_cache_report(const struct mr_table *mrt,
1057 struct sk_buff *pkt, vifi_t vifi, int assert)
1058 {
1059 const int ihl = ip_hdrlen(pkt);
1060 struct sock *mroute_sk;
1061 struct igmphdr *igmp;
1062 struct igmpmsg *msg;
1063 struct sk_buff *skb;
1064 int ret;
1065
1066 mroute_sk = rcu_dereference(mrt->mroute_sk);
1067 if (!mroute_sk)
1068 return -EINVAL;
1069
1070 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
1071 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
1072 else
1073 skb = alloc_skb(128, GFP_ATOMIC);
1074
1075 if (!skb)
1076 return -ENOBUFS;
1077
1078 if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) {
1079 /* Ugly, but we have no choice with this interface.
1080 * Duplicate old header, fix ihl, length etc.
1081 * And all this only to mangle msg->im_msgtype and
1082 * to set msg->im_mbz to "mbz" :-)
1083 */
1084 skb_push(skb, sizeof(struct iphdr));
1085 skb_reset_network_header(skb);
1086 skb_reset_transport_header(skb);
1087 msg = (struct igmpmsg *)skb_network_header(skb);
1088 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
1089 msg->im_msgtype = assert;
1090 msg->im_mbz = 0;
1091 if (assert == IGMPMSG_WRVIFWHOLE) {
1092 msg->im_vif = vifi;
1093 msg->im_vif_hi = vifi >> 8;
1094 } else {
1095 /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */
1096 int vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
1097
1098 msg->im_vif = vif_num;
1099 msg->im_vif_hi = vif_num >> 8;
1100 }
1101 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
1102 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
1103 sizeof(struct iphdr));
1104 } else {
1105 /* Copy the IP header */
1106 skb_set_network_header(skb, skb->len);
1107 skb_put(skb, ihl);
1108 skb_copy_to_linear_data(skb, pkt->data, ihl);
1109 /* Flag to the kernel this is a route add */
1110 ip_hdr(skb)->protocol = 0;
1111 msg = (struct igmpmsg *)skb_network_header(skb);
1112 msg->im_vif = vifi;
1113 msg->im_vif_hi = vifi >> 8;
1114 ipv4_pktinfo_prepare(mroute_sk, pkt, false);
1115 memcpy(skb->cb, pkt->cb, sizeof(skb->cb));
1116 /* Add our header.
1117 * Note that code, csum and group fields are cleared.
1118 */
1119 igmp = skb_put_zero(skb, sizeof(struct igmphdr));
1120 igmp->type = assert;
1121 msg->im_msgtype = assert;
1122 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
1123 skb->transport_header = skb->network_header;
1124 }
1125
1126 igmpmsg_netlink_event(mrt, skb);
1127
1128 /* Deliver to mrouted */
1129 ret = sock_queue_rcv_skb(mroute_sk, skb);
1130
1131 if (ret < 0) {
1132 net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
1133 kfree_skb(skb);
1134 }
1135
1136 return ret;
1137 }
1138
1139 /* Queue a packet for resolution. It gets locked cache entry! */
1140 /* Called under rcu_read_lock() */
ipmr_cache_unresolved(struct mr_table * mrt,vifi_t vifi,struct sk_buff * skb,struct net_device * dev)1141 static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
1142 struct sk_buff *skb, struct net_device *dev)
1143 {
1144 struct net *net = read_pnet(&mrt->net);
1145 const struct iphdr *iph = ip_hdr(skb);
1146 struct mfc_cache *c = NULL;
1147 bool found = false;
1148 int err;
1149
1150 spin_lock_bh(&mfc_unres_lock);
1151
1152 if (!check_net(net)) {
1153 err = -EINVAL;
1154 goto err;
1155 }
1156
1157 list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
1158 if (c->mfc_mcastgrp == iph->daddr &&
1159 c->mfc_origin == iph->saddr) {
1160 found = true;
1161 break;
1162 }
1163 }
1164
1165 if (!found) {
1166 /* Create a new entry if allowable */
1167 c = ipmr_cache_alloc_unres();
1168 if (!c) {
1169 err = -ENOBUFS;
1170 goto err;
1171 }
1172
1173 /* Fill in the new cache entry */
1174 c->_c.mfc_parent = -1;
1175 c->mfc_origin = iph->saddr;
1176 c->mfc_mcastgrp = iph->daddr;
1177
1178 /* Reflect first query at mrouted. */
1179 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
1180 if (err < 0)
1181 goto err;
1182
1183 atomic_inc(&mrt->cache_resolve_queue_len);
1184 list_add(&c->_c.list, &mrt->mfc_unres_queue);
1185 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1186
1187 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1188 mod_timer(&mrt->ipmr_expire_timer,
1189 c->_c.mfc_un.unres.expires);
1190 }
1191
1192 /* See if we can append the packet */
1193 if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
1194 c = NULL;
1195 err = -ENOBUFS;
1196 goto err;
1197 }
1198
1199 if (dev) {
1200 skb->dev = dev;
1201 skb->skb_iif = dev->ifindex;
1202 }
1203
1204 skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
1205
1206 spin_unlock_bh(&mfc_unres_lock);
1207 return 0;
1208
1209 err:
1210 spin_unlock_bh(&mfc_unres_lock);
1211 if (c)
1212 ipmr_cache_free(c);
1213 kfree_skb(skb);
1214 return err;
1215 }
1216
1217 /* MFC cache manipulation by user space mroute daemon */
1218
ipmr_mfc_delete(struct mr_table * mrt,struct mfcctl * mfc,int parent)1219 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
1220 {
1221 struct net *net = read_pnet(&mrt->net);
1222 struct mfc_cache *c;
1223
1224 rcu_read_lock();
1225 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
1226 mfc->mfcc_mcastgrp.s_addr, parent);
1227 rcu_read_unlock();
1228 if (!c)
1229 return -ENOENT;
1230 rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
1231 list_del_rcu(&c->_c.list);
1232 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
1233 mroute_netlink_event(mrt, c, RTM_DELROUTE);
1234 mr_cache_put(&c->_c);
1235
1236 return 0;
1237 }
1238
ipmr_mfc_add(struct net * net,struct mr_table * mrt,struct mfcctl * mfc,int mrtsock,int parent)1239 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1240 struct mfcctl *mfc, int mrtsock, int parent)
1241 {
1242 struct mfc_cache *uc, *c;
1243 struct mr_mfc *_uc;
1244 bool found;
1245 int ret;
1246
1247 if (mfc->mfcc_parent >= MAXVIFS)
1248 return -ENFILE;
1249
1250 rcu_read_lock();
1251 c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
1252 mfc->mfcc_mcastgrp.s_addr, parent);
1253 rcu_read_unlock();
1254 if (c) {
1255 spin_lock(&mrt_lock);
1256 c->_c.mfc_parent = mfc->mfcc_parent;
1257 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
1258 if (!mrtsock)
1259 c->_c.mfc_flags |= MFC_STATIC;
1260 spin_unlock(&mrt_lock);
1261 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
1262 mrt->id);
1263 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1264 return 0;
1265 }
1266
1267 if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) &&
1268 !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1269 return -EINVAL;
1270
1271 c = ipmr_cache_alloc();
1272 if (!c)
1273 return -ENOMEM;
1274
1275 c->mfc_origin = mfc->mfcc_origin.s_addr;
1276 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1277 c->_c.mfc_parent = mfc->mfcc_parent;
1278 ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
1279 if (!mrtsock)
1280 c->_c.mfc_flags |= MFC_STATIC;
1281
1282 ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
1283 ipmr_rht_params);
1284 if (ret) {
1285 pr_err("ipmr: rhtable insert error %d\n", ret);
1286 ipmr_cache_free(c);
1287 return ret;
1288 }
1289 list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
1290 /* Check to see if we resolved a queued list. If so we
1291 * need to send on the frames and tidy up.
1292 */
1293 found = false;
1294 spin_lock_bh(&mfc_unres_lock);
1295 list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
1296 uc = (struct mfc_cache *)_uc;
1297 if (uc->mfc_origin == c->mfc_origin &&
1298 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1299 list_del(&_uc->list);
1300 atomic_dec(&mrt->cache_resolve_queue_len);
1301 found = true;
1302 break;
1303 }
1304 }
1305 if (list_empty(&mrt->mfc_unres_queue))
1306 timer_delete(&mrt->ipmr_expire_timer);
1307 spin_unlock_bh(&mfc_unres_lock);
1308
1309 if (found) {
1310 ipmr_cache_resolve(net, mrt, uc, c);
1311 ipmr_cache_free(uc);
1312 }
1313 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
1314 mroute_netlink_event(mrt, c, RTM_NEWROUTE);
1315 return 0;
1316 }
1317
1318 /* Close the multicast socket, and clear the vif tables etc */
mroute_clean_tables(struct mr_table * mrt,int flags,struct list_head * dev_kill_list)1319 static void mroute_clean_tables(struct mr_table *mrt, int flags,
1320 struct list_head *dev_kill_list)
1321 {
1322 struct net *net = read_pnet(&mrt->net);
1323 struct mfc_cache *cache;
1324 struct mr_mfc *c, *tmp;
1325 int i;
1326
1327 /* Shut down all active vif entries */
1328 if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) {
1329 for (i = 0; i < mrt->maxvif; i++) {
1330 if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
1331 !(flags & MRT_FLUSH_VIFS_STATIC)) ||
1332 (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS)))
1333 continue;
1334 vif_delete(mrt, i, 0, dev_kill_list);
1335 }
1336 }
1337
1338 /* Wipe the cache */
1339 if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) {
1340 mutex_lock(&net->ipv4.mfc_mutex);
1341
1342 list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
1343 if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) ||
1344 (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC)))
1345 continue;
1346 rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
1347 list_del_rcu(&c->list);
1348 cache = (struct mfc_cache *)c;
1349 call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
1350 mrt->id);
1351 mroute_netlink_event(mrt, cache, RTM_DELROUTE);
1352 mr_cache_put(c);
1353 }
1354
1355 mutex_unlock(&net->ipv4.mfc_mutex);
1356 }
1357
1358 if (flags & MRT_FLUSH_MFC) {
1359 if (atomic_read(&mrt->cache_resolve_queue_len) != 0 || !check_net(net)) {
1360 spin_lock_bh(&mfc_unres_lock);
1361 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
1362 list_del(&c->list);
1363 cache = (struct mfc_cache *)c;
1364 mroute_netlink_event(mrt, cache, RTM_DELROUTE);
1365 ipmr_destroy_unres(mrt, cache);
1366 }
1367 spin_unlock_bh(&mfc_unres_lock);
1368 }
1369 }
1370 }
1371
1372 /* called from ip_ra_control(), before an RCU grace period,
1373 * we don't need to call synchronize_rcu() here
1374 */
mrtsock_destruct(struct sock * sk)1375 static void mrtsock_destruct(struct sock *sk)
1376 {
1377 struct net *net = sock_net(sk);
1378 LIST_HEAD(dev_kill_list);
1379 struct mr_table *mrt;
1380
1381 rtnl_lock();
1382
1383 ipmr_for_each_table(mrt, net) {
1384 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1385 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1386 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
1387 NETCONFA_MC_FORWARDING,
1388 NETCONFA_IFINDEX_ALL,
1389 net->ipv4.devconf_all);
1390 RCU_INIT_POINTER(mrt->mroute_sk, NULL);
1391 mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC,
1392 &dev_kill_list);
1393 }
1394 }
1395
1396 unregister_netdevice_many(&dev_kill_list);
1397
1398 rtnl_unlock();
1399 }
1400
1401 /* Socket options and virtual interface manipulation. The whole
1402 * virtual interface system is a complete heap, but unfortunately
1403 * that's how BSD mrouted happens to think. Maybe one day with a proper
1404 * MOSPF/PIM router set up we can clean this up.
1405 */
1406
ip_mroute_setsockopt(struct sock * sk,int optname,sockptr_t optval,unsigned int optlen)1407 int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
1408 unsigned int optlen)
1409 {
1410 struct net *net = sock_net(sk);
1411 int val, ret = 0, parent = 0;
1412 struct mr_table *mrt;
1413 struct vifctl vif;
1414 struct mfcctl mfc;
1415 bool do_wrvifwhole;
1416 u32 uval;
1417
1418 /* There's one exception to the lock - MRT_DONE which needs to unlock */
1419 rtnl_lock();
1420 if (sk->sk_type != SOCK_RAW ||
1421 inet_sk(sk)->inet_num != IPPROTO_IGMP) {
1422 ret = -EOPNOTSUPP;
1423 goto out_unlock;
1424 }
1425
1426 mrt = __ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1427 if (!mrt) {
1428 ret = -ENOENT;
1429 goto out_unlock;
1430 }
1431 if (optname != MRT_INIT) {
1432 if (sk != rcu_access_pointer(mrt->mroute_sk) &&
1433 !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
1434 ret = -EACCES;
1435 goto out_unlock;
1436 }
1437 }
1438
1439 switch (optname) {
1440 case MRT_INIT:
1441 if (optlen != sizeof(int)) {
1442 ret = -EINVAL;
1443 break;
1444 }
1445 if (rtnl_dereference(mrt->mroute_sk)) {
1446 ret = -EADDRINUSE;
1447 break;
1448 }
1449
1450 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1451 if (ret == 0) {
1452 rcu_assign_pointer(mrt->mroute_sk, sk);
1453 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1454 inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
1455 NETCONFA_MC_FORWARDING,
1456 NETCONFA_IFINDEX_ALL,
1457 net->ipv4.devconf_all);
1458 }
1459 break;
1460 case MRT_DONE:
1461 if (sk != rcu_access_pointer(mrt->mroute_sk)) {
1462 ret = -EACCES;
1463 } else {
1464 /* We need to unlock here because mrtsock_destruct takes
1465 * care of rtnl itself and we can't change that due to
1466 * the IP_ROUTER_ALERT setsockopt which runs without it.
1467 */
1468 rtnl_unlock();
1469 ret = ip_ra_control(sk, 0, NULL);
1470 goto out;
1471 }
1472 break;
1473 case MRT_ADD_VIF:
1474 case MRT_DEL_VIF:
1475 if (optlen != sizeof(vif)) {
1476 ret = -EINVAL;
1477 break;
1478 }
1479 if (copy_from_sockptr(&vif, optval, sizeof(vif))) {
1480 ret = -EFAULT;
1481 break;
1482 }
1483 if (vif.vifc_vifi >= MAXVIFS) {
1484 ret = -ENFILE;
1485 break;
1486 }
1487 if (optname == MRT_ADD_VIF) {
1488 ret = vif_add(net, mrt, &vif,
1489 sk == rtnl_dereference(mrt->mroute_sk));
1490 } else {
1491 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1492 }
1493 break;
1494 /* Manipulate the forwarding caches. These live
1495 * in a sort of kernel/user symbiosis.
1496 */
1497 case MRT_ADD_MFC:
1498 case MRT_DEL_MFC:
1499 parent = -1;
1500 fallthrough;
1501 case MRT_ADD_MFC_PROXY:
1502 case MRT_DEL_MFC_PROXY:
1503 if (optlen != sizeof(mfc)) {
1504 ret = -EINVAL;
1505 break;
1506 }
1507 if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) {
1508 ret = -EFAULT;
1509 break;
1510 }
1511 if (parent == 0)
1512 parent = mfc.mfcc_parent;
1513
1514 mutex_lock(&net->ipv4.mfc_mutex);
1515
1516 if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
1517 ret = ipmr_mfc_delete(mrt, &mfc, parent);
1518 else
1519 ret = ipmr_mfc_add(net, mrt, &mfc,
1520 sk == rtnl_dereference(mrt->mroute_sk),
1521 parent);
1522
1523 mutex_unlock(&net->ipv4.mfc_mutex);
1524 break;
1525 case MRT_FLUSH: {
1526 LIST_HEAD(dev_kill_list);
1527
1528 if (optlen != sizeof(val)) {
1529 ret = -EINVAL;
1530 break;
1531 }
1532 if (copy_from_sockptr(&val, optval, sizeof(val))) {
1533 ret = -EFAULT;
1534 break;
1535 }
1536
1537 mroute_clean_tables(mrt, val, &dev_kill_list);
1538 unregister_netdevice_many(&dev_kill_list);
1539 break;
1540 }
1541 /* Control PIM assert. */
1542 case MRT_ASSERT:
1543 if (optlen != sizeof(val)) {
1544 ret = -EINVAL;
1545 break;
1546 }
1547 if (copy_from_sockptr(&val, optval, sizeof(val))) {
1548 ret = -EFAULT;
1549 break;
1550 }
1551 WRITE_ONCE(mrt->mroute_do_assert, val);
1552 break;
1553 case MRT_PIM:
1554 if (!ipmr_pimsm_enabled()) {
1555 ret = -ENOPROTOOPT;
1556 break;
1557 }
1558 if (optlen != sizeof(val)) {
1559 ret = -EINVAL;
1560 break;
1561 }
1562 if (copy_from_sockptr(&val, optval, sizeof(val))) {
1563 ret = -EFAULT;
1564 break;
1565 }
1566
1567 do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE);
1568 val = !!val;
1569 if (val != mrt->mroute_do_pim) {
1570 WRITE_ONCE(mrt->mroute_do_pim, val);
1571 WRITE_ONCE(mrt->mroute_do_assert, val);
1572 WRITE_ONCE(mrt->mroute_do_wrvifwhole, do_wrvifwhole);
1573 }
1574 break;
1575 case MRT_TABLE:
1576 if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) {
1577 ret = -ENOPROTOOPT;
1578 break;
1579 }
1580 if (optlen != sizeof(uval)) {
1581 ret = -EINVAL;
1582 break;
1583 }
1584 if (copy_from_sockptr(&uval, optval, sizeof(uval))) {
1585 ret = -EFAULT;
1586 break;
1587 }
1588
1589 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1590 ret = -EBUSY;
1591 } else {
1592 mrt = ipmr_new_table(net, uval);
1593 if (IS_ERR(mrt))
1594 ret = PTR_ERR(mrt);
1595 else
1596 raw_sk(sk)->ipmr_table = uval;
1597 }
1598 break;
1599 /* Spurious command, or MRT_VERSION which you cannot set. */
1600 default:
1601 ret = -ENOPROTOOPT;
1602 }
1603 out_unlock:
1604 rtnl_unlock();
1605 out:
1606 return ret;
1607 }
1608
1609 /* Execute if this ioctl is a special mroute ioctl */
ipmr_sk_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)1610 int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1611 {
1612 switch (cmd) {
1613 /* These userspace buffers will be consumed by ipmr_ioctl() */
1614 case SIOCGETVIFCNT: {
1615 struct sioc_vif_req buffer;
1616
1617 return sock_ioctl_inout(sk, cmd, arg, &buffer,
1618 sizeof(buffer));
1619 }
1620 case SIOCGETSGCNT: {
1621 struct sioc_sg_req buffer;
1622
1623 return sock_ioctl_inout(sk, cmd, arg, &buffer,
1624 sizeof(buffer));
1625 }
1626 }
1627 /* return code > 0 means that the ioctl was not executed */
1628 return 1;
1629 }
1630
1631 /* Getsock opt support for the multicast routing system. */
ip_mroute_getsockopt(struct sock * sk,int optname,sockptr_t optval,sockptr_t optlen)1632 int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
1633 sockptr_t optlen)
1634 {
1635 int olr;
1636 int val;
1637 struct net *net = sock_net(sk);
1638 struct mr_table *mrt;
1639
1640 if (sk->sk_type != SOCK_RAW ||
1641 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1642 return -EOPNOTSUPP;
1643
1644 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1645 if (!mrt)
1646 return -ENOENT;
1647
1648 switch (optname) {
1649 case MRT_VERSION:
1650 val = 0x0305;
1651 break;
1652 case MRT_PIM:
1653 if (!ipmr_pimsm_enabled())
1654 return -ENOPROTOOPT;
1655 val = READ_ONCE(mrt->mroute_do_pim);
1656 break;
1657 case MRT_ASSERT:
1658 val = READ_ONCE(mrt->mroute_do_assert);
1659 break;
1660 default:
1661 return -ENOPROTOOPT;
1662 }
1663
1664 if (copy_from_sockptr(&olr, optlen, sizeof(int)))
1665 return -EFAULT;
1666 if (olr < 0)
1667 return -EINVAL;
1668
1669 olr = min_t(unsigned int, olr, sizeof(int));
1670
1671 if (copy_to_sockptr(optlen, &olr, sizeof(int)))
1672 return -EFAULT;
1673 if (copy_to_sockptr(optval, &val, olr))
1674 return -EFAULT;
1675 return 0;
1676 }
1677
1678 /* The IP multicast ioctl support routines. */
ipmr_ioctl(struct sock * sk,int cmd,void * arg)1679 int ipmr_ioctl(struct sock *sk, int cmd, void *arg)
1680 {
1681 struct vif_device *vif;
1682 struct mfc_cache *c;
1683 struct net *net = sock_net(sk);
1684 struct sioc_vif_req *vr;
1685 struct sioc_sg_req *sr;
1686 struct mr_table *mrt;
1687
1688 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1689 if (!mrt)
1690 return -ENOENT;
1691
1692 switch (cmd) {
1693 case SIOCGETVIFCNT:
1694 vr = (struct sioc_vif_req *)arg;
1695 if (vr->vifi >= mrt->maxvif)
1696 return -EINVAL;
1697 vr->vifi = array_index_nospec(vr->vifi, mrt->maxvif);
1698 rcu_read_lock();
1699 vif = &mrt->vif_table[vr->vifi];
1700 if (VIF_EXISTS(mrt, vr->vifi)) {
1701 vr->icount = READ_ONCE(vif->pkt_in);
1702 vr->ocount = READ_ONCE(vif->pkt_out);
1703 vr->ibytes = READ_ONCE(vif->bytes_in);
1704 vr->obytes = READ_ONCE(vif->bytes_out);
1705 rcu_read_unlock();
1706
1707 return 0;
1708 }
1709 rcu_read_unlock();
1710 return -EADDRNOTAVAIL;
1711 case SIOCGETSGCNT:
1712 sr = (struct sioc_sg_req *)arg;
1713
1714 rcu_read_lock();
1715 c = ipmr_cache_find(mrt, sr->src.s_addr, sr->grp.s_addr);
1716 if (c) {
1717 sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
1718 sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
1719 sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
1720 rcu_read_unlock();
1721 return 0;
1722 }
1723 rcu_read_unlock();
1724 return -EADDRNOTAVAIL;
1725 default:
1726 return -ENOIOCTLCMD;
1727 }
1728 }
1729
1730 #ifdef CONFIG_COMPAT
1731 struct compat_sioc_sg_req {
1732 struct in_addr src;
1733 struct in_addr grp;
1734 compat_ulong_t pktcnt;
1735 compat_ulong_t bytecnt;
1736 compat_ulong_t wrong_if;
1737 };
1738
1739 struct compat_sioc_vif_req {
1740 vifi_t vifi; /* Which iface */
1741 compat_ulong_t icount;
1742 compat_ulong_t ocount;
1743 compat_ulong_t ibytes;
1744 compat_ulong_t obytes;
1745 };
1746
ipmr_compat_ioctl(struct sock * sk,unsigned int cmd,void __user * arg)1747 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1748 {
1749 struct compat_sioc_sg_req sr;
1750 struct compat_sioc_vif_req vr;
1751 struct vif_device *vif;
1752 struct mfc_cache *c;
1753 struct net *net = sock_net(sk);
1754 struct mr_table *mrt;
1755
1756 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1757 if (!mrt)
1758 return -ENOENT;
1759
1760 switch (cmd) {
1761 case SIOCGETVIFCNT:
1762 if (copy_from_user(&vr, arg, sizeof(vr)))
1763 return -EFAULT;
1764 if (vr.vifi >= mrt->maxvif)
1765 return -EINVAL;
1766 vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
1767 rcu_read_lock();
1768 vif = &mrt->vif_table[vr.vifi];
1769 if (VIF_EXISTS(mrt, vr.vifi)) {
1770 vr.icount = READ_ONCE(vif->pkt_in);
1771 vr.ocount = READ_ONCE(vif->pkt_out);
1772 vr.ibytes = READ_ONCE(vif->bytes_in);
1773 vr.obytes = READ_ONCE(vif->bytes_out);
1774 rcu_read_unlock();
1775
1776 if (copy_to_user(arg, &vr, sizeof(vr)))
1777 return -EFAULT;
1778 return 0;
1779 }
1780 rcu_read_unlock();
1781 return -EADDRNOTAVAIL;
1782 case SIOCGETSGCNT:
1783 if (copy_from_user(&sr, arg, sizeof(sr)))
1784 return -EFAULT;
1785
1786 rcu_read_lock();
1787 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1788 if (c) {
1789 sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
1790 sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
1791 sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
1792 rcu_read_unlock();
1793
1794 if (copy_to_user(arg, &sr, sizeof(sr)))
1795 return -EFAULT;
1796 return 0;
1797 }
1798 rcu_read_unlock();
1799 return -EADDRNOTAVAIL;
1800 default:
1801 return -ENOIOCTLCMD;
1802 }
1803 }
1804 #endif
1805
ipmr_device_event(struct notifier_block * this,unsigned long event,void * ptr)1806 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1807 {
1808 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1809 struct net *net = dev_net(dev);
1810 struct mr_table *mrt;
1811 struct vif_device *v;
1812 int ct;
1813
1814 if (event != NETDEV_UNREGISTER)
1815 return NOTIFY_DONE;
1816
1817 ipmr_for_each_table(mrt, net) {
1818 v = &mrt->vif_table[0];
1819 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1820 if (rcu_access_pointer(v->dev) == dev)
1821 vif_delete(mrt, ct, 1, NULL);
1822 }
1823 }
1824 return NOTIFY_DONE;
1825 }
1826
1827 static struct notifier_block ip_mr_notifier = {
1828 .notifier_call = ipmr_device_event,
1829 };
1830
1831 /* Encapsulate a packet by attaching a valid IPIP header to it.
1832 * This avoids tunnel drivers and other mess and gives us the speed so
1833 * important for multicast video.
1834 */
ip_encap(struct net * net,struct sk_buff * skb,__be32 saddr,__be32 daddr)1835 static void ip_encap(struct net *net, struct sk_buff *skb,
1836 __be32 saddr, __be32 daddr)
1837 {
1838 struct iphdr *iph;
1839 const struct iphdr *old_iph = ip_hdr(skb);
1840
1841 skb_push(skb, sizeof(struct iphdr));
1842 skb->transport_header = skb->network_header;
1843 skb_reset_network_header(skb);
1844 iph = ip_hdr(skb);
1845
1846 iph->version = 4;
1847 iph->tos = old_iph->tos;
1848 iph->ttl = old_iph->ttl;
1849 iph->frag_off = 0;
1850 iph->daddr = daddr;
1851 iph->saddr = saddr;
1852 iph->protocol = IPPROTO_IPIP;
1853 iph->ihl = 5;
1854 iph->tot_len = htons(skb->len);
1855 ip_select_ident(net, skb, NULL);
1856 ip_send_check(iph);
1857
1858 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1859 nf_reset_ct(skb);
1860 }
1861
ipmr_forward_finish(struct net * net,struct sock * sk,struct sk_buff * skb)1862 static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
1863 struct sk_buff *skb)
1864 {
1865 struct ip_options *opt = &(IPCB(skb)->opt);
1866
1867 IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
1868
1869 if (unlikely(opt->optlen))
1870 ip_forward_options(skb);
1871
1872 return dst_output(net, sk, skb);
1873 }
1874
1875 #ifdef CONFIG_NET_SWITCHDEV
ipmr_forward_offloaded(struct sk_buff * skb,struct mr_table * mrt,int in_vifi,int out_vifi)1876 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1877 int in_vifi, int out_vifi)
1878 {
1879 struct vif_device *out_vif = &mrt->vif_table[out_vifi];
1880 struct vif_device *in_vif = &mrt->vif_table[in_vifi];
1881
1882 if (!skb->offload_l3_fwd_mark)
1883 return false;
1884 if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
1885 return false;
1886 return netdev_phys_item_id_same(&out_vif->dev_parent_id,
1887 &in_vif->dev_parent_id);
1888 }
1889 #else
ipmr_forward_offloaded(struct sk_buff * skb,struct mr_table * mrt,int in_vifi,int out_vifi)1890 static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
1891 int in_vifi, int out_vifi)
1892 {
1893 return false;
1894 }
1895 #endif
1896
1897 /* Processing handlers for ipmr_forward, under rcu_read_lock() */
1898
ipmr_prepare_xmit(struct net * net,struct mr_table * mrt,struct sk_buff * skb,int vifi)1899 static int ipmr_prepare_xmit(struct net *net, struct mr_table *mrt,
1900 struct sk_buff *skb, int vifi)
1901 {
1902 const struct iphdr *iph = ip_hdr(skb);
1903 struct vif_device *vif = &mrt->vif_table[vifi];
1904 struct net_device *vif_dev;
1905 struct rtable *rt;
1906 struct flowi4 fl4;
1907 int encap = 0;
1908
1909 vif_dev = vif_dev_read(vif);
1910 if (!vif_dev)
1911 return -1;
1912
1913 if (vif->flags & VIFF_REGISTER) {
1914 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
1915 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
1916 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
1917 DEV_STATS_INC(vif_dev, tx_packets);
1918 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1919 return -1;
1920 }
1921
1922 if (vif->flags & VIFF_TUNNEL) {
1923 rt = ip_route_output_ports(net, &fl4, NULL,
1924 vif->remote, vif->local,
1925 0, 0,
1926 IPPROTO_IPIP,
1927 iph->tos & INET_DSCP_MASK, vif->link);
1928 if (IS_ERR(rt))
1929 return -1;
1930 encap = sizeof(struct iphdr);
1931 } else {
1932 rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
1933 0, 0,
1934 IPPROTO_IPIP,
1935 iph->tos & INET_DSCP_MASK, vif->link);
1936 if (IS_ERR(rt))
1937 return -1;
1938 }
1939
1940 if (skb->len+encap > dst4_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1941 /* Do not fragment multicasts. Alas, IPv4 does not
1942 * allow to send ICMP, so that packets will disappear
1943 * to blackhole.
1944 */
1945 IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
1946 ip_rt_put(rt);
1947 return -1;
1948 }
1949
1950 encap += LL_RESERVED_SPACE(dst_dev_rcu(&rt->dst)) + rt->dst.header_len;
1951
1952 if (skb_cow(skb, encap)) {
1953 ip_rt_put(rt);
1954 return -1;
1955 }
1956
1957 WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
1958 WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
1959
1960 skb_dst_drop(skb);
1961 skb_dst_set(skb, &rt->dst);
1962 ip_decrease_ttl(ip_hdr(skb));
1963
1964 /* FIXME: forward and output firewalls used to be called here.
1965 * What do we do with netfilter? -- RR
1966 */
1967 if (vif->flags & VIFF_TUNNEL) {
1968 ip_encap(net, skb, vif->local, vif->remote);
1969 /* FIXME: extra output firewall step used to be here. --RR */
1970 DEV_STATS_INC(vif_dev, tx_packets);
1971 DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
1972 }
1973
1974 return 0;
1975 }
1976
ipmr_queue_fwd_xmit(struct net * net,struct mr_table * mrt,int in_vifi,struct sk_buff * skb,int vifi)1977 static void ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt,
1978 int in_vifi, struct sk_buff *skb, int vifi)
1979 {
1980 struct rtable *rt;
1981
1982 if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
1983 goto out_free;
1984
1985 if (ipmr_prepare_xmit(net, mrt, skb, vifi))
1986 goto out_free;
1987
1988 rt = skb_rtable(skb);
1989
1990 IPCB(skb)->flags |= IPSKB_FORWARDED;
1991
1992 /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1993 * not only before forwarding, but after forwarding on all output
1994 * interfaces. It is clear, if mrouter runs a multicasting
1995 * program, it should receive packets not depending to what interface
1996 * program is joined.
1997 * If we will not make it, the program will have to join on all
1998 * interfaces. On the other hand, multihoming host (or router, but
1999 * not mrouter) cannot join to more than one interface - it will
2000 * result in receiving multiple packets.
2001 */
2002 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
2003 net, NULL, skb, skb->dev, dst_dev_rcu(&rt->dst),
2004 ipmr_forward_finish);
2005 return;
2006
2007 out_free:
2008 kfree_skb(skb);
2009 }
2010
ipmr_queue_output_xmit(struct net * net,struct mr_table * mrt,struct sk_buff * skb,int vifi)2011 static void ipmr_queue_output_xmit(struct net *net, struct mr_table *mrt,
2012 struct sk_buff *skb, int vifi)
2013 {
2014 if (ipmr_prepare_xmit(net, mrt, skb, vifi))
2015 goto out_free;
2016
2017 ip_mc_output(net, NULL, skb);
2018 return;
2019
2020 out_free:
2021 kfree_skb(skb);
2022 }
2023
2024 /* Called with mrt_lock or rcu_read_lock() */
ipmr_find_vif(const struct mr_table * mrt,struct net_device * dev)2025 static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev)
2026 {
2027 int ct;
2028 /* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */
2029 for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
2030 if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
2031 break;
2032 }
2033 return ct;
2034 }
2035
2036 /* "local" means that we should preserve one skb (for local delivery) */
2037 /* Called uner rcu_read_lock() */
ip_mr_forward(struct net * net,struct mr_table * mrt,struct net_device * dev,struct sk_buff * skb,struct mfc_cache * c,int local)2038 static void ip_mr_forward(struct net *net, struct mr_table *mrt,
2039 struct net_device *dev, struct sk_buff *skb,
2040 struct mfc_cache *c, int local)
2041 {
2042 int true_vifi = ipmr_find_vif(mrt, dev);
2043 int psend = -1;
2044 int vif, ct;
2045
2046 vif = c->_c.mfc_parent;
2047 atomic_long_inc(&c->_c.mfc_un.res.pkt);
2048 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2049 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2050
2051 if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
2052 struct mfc_cache *cache_proxy;
2053
2054 /* For an (*,G) entry, we only check that the incoming
2055 * interface is part of the static tree.
2056 */
2057 cache_proxy = mr_mfc_find_any_parent(mrt, vif);
2058 if (cache_proxy &&
2059 cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
2060 goto forward;
2061 }
2062
2063 /* Wrong interface: drop packet and (maybe) send PIM assert. */
2064 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
2065 if (rt_is_output_route(skb_rtable(skb))) {
2066 /* It is our own packet, looped back.
2067 * Very complicated situation...
2068 *
2069 * The best workaround until routing daemons will be
2070 * fixed is not to redistribute packet, if it was
2071 * send through wrong interface. It means, that
2072 * multicast applications WILL NOT work for
2073 * (S,G), which have default multicast route pointing
2074 * to wrong oif. In any case, it is not a good
2075 * idea to use multicasting applications on router.
2076 */
2077 goto dont_forward;
2078 }
2079
2080 atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
2081
2082 if (true_vifi >= 0 && READ_ONCE(mrt->mroute_do_assert) &&
2083 /* pimsm uses asserts, when switching from RPT to SPT,
2084 * so that we cannot check that packet arrived on an oif.
2085 * It is bad, but otherwise we would need to move pretty
2086 * large chunk of pimd to kernel. Ough... --ANK
2087 */
2088 (READ_ONCE(mrt->mroute_do_pim) ||
2089 c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
2090 time_after(jiffies,
2091 c->_c.mfc_un.res.last_assert +
2092 MFC_ASSERT_THRESH)) {
2093 c->_c.mfc_un.res.last_assert = jiffies;
2094 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
2095 if (READ_ONCE(mrt->mroute_do_wrvifwhole))
2096 ipmr_cache_report(mrt, skb, true_vifi,
2097 IGMPMSG_WRVIFWHOLE);
2098 }
2099 goto dont_forward;
2100 }
2101
2102 forward:
2103 WRITE_ONCE(mrt->vif_table[vif].pkt_in,
2104 mrt->vif_table[vif].pkt_in + 1);
2105 WRITE_ONCE(mrt->vif_table[vif].bytes_in,
2106 mrt->vif_table[vif].bytes_in + skb->len);
2107
2108 /* Forward the frame */
2109 if (c->mfc_origin == htonl(INADDR_ANY) &&
2110 c->mfc_mcastgrp == htonl(INADDR_ANY)) {
2111 if (true_vifi >= 0 &&
2112 true_vifi != c->_c.mfc_parent &&
2113 ip_hdr(skb)->ttl >
2114 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2115 /* It's an (*,*) entry and the packet is not coming from
2116 * the upstream: forward the packet to the upstream
2117 * only.
2118 */
2119 psend = c->_c.mfc_parent;
2120 goto last_forward;
2121 }
2122 goto dont_forward;
2123 }
2124 for (ct = c->_c.mfc_un.res.maxvif - 1;
2125 ct >= c->_c.mfc_un.res.minvif; ct--) {
2126 /* For (*,G) entry, don't forward to the incoming interface */
2127 if ((c->mfc_origin != htonl(INADDR_ANY) ||
2128 ct != true_vifi) &&
2129 ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
2130 if (psend != -1) {
2131 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2132
2133 if (skb2)
2134 ipmr_queue_fwd_xmit(net, mrt, true_vifi,
2135 skb2, psend);
2136 }
2137 psend = ct;
2138 }
2139 }
2140 last_forward:
2141 if (psend != -1) {
2142 if (local) {
2143 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2144
2145 if (skb2)
2146 ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb2,
2147 psend);
2148 } else {
2149 ipmr_queue_fwd_xmit(net, mrt, true_vifi, skb, psend);
2150 return;
2151 }
2152 }
2153
2154 dont_forward:
2155 if (!local)
2156 kfree_skb(skb);
2157 }
2158
ipmr_rt_fib_lookup(struct net * net,struct sk_buff * skb)2159 static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
2160 {
2161 struct rtable *rt = skb_rtable(skb);
2162 struct iphdr *iph = ip_hdr(skb);
2163 struct flowi4 fl4 = {
2164 .daddr = iph->daddr,
2165 .saddr = iph->saddr,
2166 .flowi4_dscp = ip4h_dscp(iph),
2167 .flowi4_oif = (rt_is_output_route(rt) ?
2168 skb->dev->ifindex : 0),
2169 .flowi4_iif = (rt_is_output_route(rt) ?
2170 LOOPBACK_IFINDEX :
2171 skb->dev->ifindex),
2172 .flowi4_mark = skb->mark,
2173 };
2174 struct mr_table *mrt;
2175 int err;
2176
2177 err = ipmr_fib_lookup(net, &fl4, &mrt);
2178 if (err)
2179 return ERR_PTR(err);
2180 return mrt;
2181 }
2182
2183 /* Multicast packets for forwarding arrive here
2184 * Called with rcu_read_lock();
2185 */
ip_mr_input(struct sk_buff * skb)2186 int ip_mr_input(struct sk_buff *skb)
2187 {
2188 struct mfc_cache *cache;
2189 struct net *net = dev_net(skb->dev);
2190 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
2191 struct mr_table *mrt;
2192 struct net_device *dev;
2193
2194 /* skb->dev passed in is the loX master dev for vrfs.
2195 * As there are no vifs associated with loopback devices,
2196 * get the proper interface that does have a vif associated with it.
2197 */
2198 dev = skb->dev;
2199 if (netif_is_l3_master(skb->dev)) {
2200 dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
2201 if (!dev) {
2202 kfree_skb(skb);
2203 return -ENODEV;
2204 }
2205 }
2206
2207 /* Packet is looped back after forward, it should not be
2208 * forwarded second time, but still can be delivered locally.
2209 */
2210 if (IPCB(skb)->flags & IPSKB_FORWARDED)
2211 goto dont_forward;
2212
2213 mrt = ipmr_rt_fib_lookup(net, skb);
2214 if (IS_ERR(mrt)) {
2215 kfree_skb(skb);
2216 return PTR_ERR(mrt);
2217 }
2218 if (!local) {
2219 if (IPCB(skb)->opt.router_alert) {
2220 if (ip_call_ra_chain(skb))
2221 return 0;
2222 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
2223 /* IGMPv1 (and broken IGMPv2 implementations sort of
2224 * Cisco IOS <= 11.2(8)) do not put router alert
2225 * option to IGMP packets destined to routable
2226 * groups. It is very bad, because it means
2227 * that we can forward NO IGMP messages.
2228 */
2229 struct sock *mroute_sk;
2230
2231 mroute_sk = rcu_dereference(mrt->mroute_sk);
2232 if (mroute_sk) {
2233 nf_reset_ct(skb);
2234 raw_rcv(mroute_sk, skb);
2235 return 0;
2236 }
2237 }
2238 }
2239
2240 /* already under rcu_read_lock() */
2241 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
2242 if (!cache) {
2243 int vif = ipmr_find_vif(mrt, dev);
2244
2245 if (vif >= 0)
2246 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
2247 vif);
2248 }
2249
2250 /* No usable cache entry */
2251 if (!cache) {
2252 int vif;
2253
2254 if (local) {
2255 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2256 ip_local_deliver(skb);
2257 if (!skb2)
2258 return -ENOBUFS;
2259 skb = skb2;
2260 }
2261
2262 vif = ipmr_find_vif(mrt, dev);
2263 if (vif >= 0)
2264 return ipmr_cache_unresolved(mrt, vif, skb, dev);
2265 kfree_skb(skb);
2266 return -ENODEV;
2267 }
2268
2269 ip_mr_forward(net, mrt, dev, skb, cache, local);
2270
2271 if (local)
2272 return ip_local_deliver(skb);
2273
2274 return 0;
2275
2276 dont_forward:
2277 if (local)
2278 return ip_local_deliver(skb);
2279 kfree_skb(skb);
2280 return 0;
2281 }
2282
ip_mr_output_finish(struct net * net,struct mr_table * mrt,struct net_device * dev,struct sk_buff * skb,struct mfc_cache * c)2283 static void ip_mr_output_finish(struct net *net, struct mr_table *mrt,
2284 struct net_device *dev, struct sk_buff *skb,
2285 struct mfc_cache *c)
2286 {
2287 int psend = -1;
2288 int ct;
2289
2290 atomic_long_inc(&c->_c.mfc_un.res.pkt);
2291 atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
2292 WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
2293
2294 /* Forward the frame */
2295 if (c->mfc_origin == htonl(INADDR_ANY) &&
2296 c->mfc_mcastgrp == htonl(INADDR_ANY)) {
2297 if (ip_hdr(skb)->ttl >
2298 c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
2299 /* It's an (*,*) entry and the packet is not coming from
2300 * the upstream: forward the packet to the upstream
2301 * only.
2302 */
2303 psend = c->_c.mfc_parent;
2304 goto last_xmit;
2305 }
2306 goto dont_xmit;
2307 }
2308
2309 for (ct = c->_c.mfc_un.res.maxvif - 1;
2310 ct >= c->_c.mfc_un.res.minvif; ct--) {
2311 if (ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
2312 if (psend != -1) {
2313 struct sk_buff *skb2;
2314
2315 skb2 = skb_clone(skb, GFP_ATOMIC);
2316 if (skb2)
2317 ipmr_queue_output_xmit(net, mrt,
2318 skb2, psend);
2319 }
2320 psend = ct;
2321 }
2322 }
2323
2324 last_xmit:
2325 if (psend != -1) {
2326 ipmr_queue_output_xmit(net, mrt, skb, psend);
2327 return;
2328 }
2329
2330 dont_xmit:
2331 kfree_skb(skb);
2332 }
2333
2334 /* Multicast packets for forwarding arrive here
2335 * Called with rcu_read_lock();
2336 */
ip_mr_output(struct net * net,struct sock * sk,struct sk_buff * skb)2337 int ip_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb)
2338 {
2339 struct rtable *rt = skb_rtable(skb);
2340 struct mfc_cache *cache;
2341 struct net_device *dev;
2342 struct mr_table *mrt;
2343 int vif;
2344
2345 guard(rcu)();
2346
2347 dev = dst_dev_rcu(&rt->dst);
2348
2349 if (IPCB(skb)->flags & IPSKB_FORWARDED)
2350 goto mc_output;
2351 if (!(IPCB(skb)->flags & IPSKB_MCROUTE))
2352 goto mc_output;
2353
2354 skb->dev = dev;
2355
2356 mrt = ipmr_rt_fib_lookup(net, skb);
2357 if (IS_ERR(mrt))
2358 goto mc_output;
2359
2360 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
2361 if (!cache) {
2362 vif = ipmr_find_vif(mrt, dev);
2363 if (vif >= 0)
2364 cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
2365 vif);
2366 }
2367
2368 /* No usable cache entry */
2369 if (!cache) {
2370 vif = ipmr_find_vif(mrt, dev);
2371 if (vif >= 0)
2372 return ipmr_cache_unresolved(mrt, vif, skb, dev);
2373 goto mc_output;
2374 }
2375
2376 vif = cache->_c.mfc_parent;
2377 if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev)
2378 goto mc_output;
2379
2380 ip_mr_output_finish(net, mrt, dev, skb, cache);
2381 return 0;
2382
2383 mc_output:
2384 return ip_mc_output(net, sk, skb);
2385 }
2386
2387 #ifdef CONFIG_IP_PIMSM_V1
2388 /* Handle IGMP messages of PIMv1 */
pim_rcv_v1(struct sk_buff * skb)2389 int pim_rcv_v1(struct sk_buff *skb)
2390 {
2391 struct igmphdr *pim;
2392 struct net *net = dev_net(skb->dev);
2393 struct mr_table *mrt;
2394
2395 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
2396 goto drop;
2397
2398 pim = igmp_hdr(skb);
2399
2400 mrt = ipmr_rt_fib_lookup(net, skb);
2401 if (IS_ERR(mrt))
2402 goto drop;
2403 if (!READ_ONCE(mrt->mroute_do_pim) ||
2404 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
2405 goto drop;
2406
2407 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
2408 drop:
2409 kfree_skb(skb);
2410 }
2411 return 0;
2412 }
2413 #endif
2414
2415 #ifdef CONFIG_IP_PIMSM_V2
pim_rcv(struct sk_buff * skb)2416 static int pim_rcv(struct sk_buff *skb)
2417 {
2418 struct pimreghdr *pim;
2419 struct net *net = dev_net(skb->dev);
2420 struct mr_table *mrt;
2421
2422 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
2423 goto drop;
2424
2425 pim = (struct pimreghdr *)skb_transport_header(skb);
2426 if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) ||
2427 (pim->flags & PIM_NULL_REGISTER) ||
2428 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
2429 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
2430 goto drop;
2431
2432 mrt = ipmr_rt_fib_lookup(net, skb);
2433 if (IS_ERR(mrt))
2434 goto drop;
2435 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
2436 drop:
2437 kfree_skb(skb);
2438 }
2439 return 0;
2440 }
2441 #endif
2442
ipmr_get_route(struct net * net,struct sk_buff * skb,__be32 saddr,__be32 daddr,struct rtmsg * rtm,u32 portid)2443 int ipmr_get_route(struct net *net, struct sk_buff *skb,
2444 __be32 saddr, __be32 daddr,
2445 struct rtmsg *rtm, u32 portid)
2446 {
2447 struct mfc_cache *cache;
2448 struct mr_table *mrt;
2449 int err;
2450
2451 rcu_read_lock();
2452 mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT);
2453 if (!mrt) {
2454 rcu_read_unlock();
2455 return -ENOENT;
2456 }
2457
2458 cache = ipmr_cache_find(mrt, saddr, daddr);
2459 if (!cache && skb->dev) {
2460 int vif = ipmr_find_vif(mrt, skb->dev);
2461
2462 if (vif >= 0)
2463 cache = ipmr_cache_find_any(mrt, daddr, vif);
2464 }
2465 if (!cache) {
2466 struct sk_buff *skb2;
2467 struct iphdr *iph;
2468 struct net_device *dev;
2469 int vif = -1;
2470
2471 dev = skb->dev;
2472 if (dev)
2473 vif = ipmr_find_vif(mrt, dev);
2474 if (vif < 0) {
2475 rcu_read_unlock();
2476 return -ENODEV;
2477 }
2478
2479 skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr));
2480 if (!skb2) {
2481 rcu_read_unlock();
2482 return -ENOMEM;
2483 }
2484
2485 NETLINK_CB(skb2).portid = portid;
2486 skb_push(skb2, sizeof(struct iphdr));
2487 skb_reset_network_header(skb2);
2488 iph = ip_hdr(skb2);
2489 iph->ihl = sizeof(struct iphdr) >> 2;
2490 iph->saddr = saddr;
2491 iph->daddr = daddr;
2492 iph->version = 0;
2493 err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
2494 rcu_read_unlock();
2495 return err;
2496 }
2497
2498 err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
2499 rcu_read_unlock();
2500 return err;
2501 }
2502
ipmr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mfc_cache * c,int cmd,int flags)2503 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2504 u32 portid, u32 seq, struct mfc_cache *c, int cmd,
2505 int flags)
2506 {
2507 struct nlmsghdr *nlh;
2508 struct rtmsg *rtm;
2509 int err;
2510
2511 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2512 if (!nlh)
2513 return -EMSGSIZE;
2514
2515 rtm = nlmsg_data(nlh);
2516 rtm->rtm_family = RTNL_FAMILY_IPMR;
2517 rtm->rtm_dst_len = 32;
2518 rtm->rtm_src_len = 32;
2519 rtm->rtm_tos = 0;
2520 rtm->rtm_table = mrt->id;
2521 if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2522 goto nla_put_failure;
2523 rtm->rtm_type = RTN_MULTICAST;
2524 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2525 if (c->_c.mfc_flags & MFC_STATIC)
2526 rtm->rtm_protocol = RTPROT_STATIC;
2527 else
2528 rtm->rtm_protocol = RTPROT_MROUTED;
2529 rtm->rtm_flags = 0;
2530
2531 if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
2532 nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
2533 goto nla_put_failure;
2534 err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
2535 /* do not break the dump if cache is unresolved */
2536 if (err < 0 && err != -ENOENT)
2537 goto nla_put_failure;
2538
2539 nlmsg_end(skb, nlh);
2540 return 0;
2541
2542 nla_put_failure:
2543 nlmsg_cancel(skb, nlh);
2544 return -EMSGSIZE;
2545 }
2546
_ipmr_fill_mroute(struct mr_table * mrt,struct sk_buff * skb,u32 portid,u32 seq,struct mr_mfc * c,int cmd,int flags)2547 static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2548 u32 portid, u32 seq, struct mr_mfc *c, int cmd,
2549 int flags)
2550 {
2551 return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
2552 cmd, flags);
2553 }
2554
mroute_msgsize(bool unresolved)2555 static size_t mroute_msgsize(bool unresolved)
2556 {
2557 size_t len =
2558 NLMSG_ALIGN(sizeof(struct rtmsg))
2559 + nla_total_size(4) /* RTA_TABLE */
2560 + nla_total_size(4) /* RTA_SRC */
2561 + nla_total_size(4) /* RTA_DST */
2562 ;
2563
2564 if (!unresolved)
2565 len = len
2566 + nla_total_size(4) /* RTA_IIF */
2567 + nla_total_size(0) /* RTA_MULTIPATH */
2568 + MAXVIFS * NLA_ALIGN(sizeof(struct rtnexthop))
2569 /* RTA_MFC_STATS */
2570 + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
2571 ;
2572
2573 return len;
2574 }
2575
mroute_netlink_event(struct mr_table * mrt,struct mfc_cache * mfc,int cmd)2576 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
2577 int cmd)
2578 {
2579 struct net *net = read_pnet(&mrt->net);
2580 struct sk_buff *skb;
2581 int err = -ENOBUFS;
2582
2583 skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS),
2584 GFP_ATOMIC);
2585 if (!skb)
2586 goto errout;
2587
2588 err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2589 if (err < 0)
2590 goto errout;
2591
2592 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC);
2593 return;
2594
2595 errout:
2596 kfree_skb(skb);
2597 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
2598 }
2599
igmpmsg_netlink_msgsize(size_t payloadlen)2600 static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
2601 {
2602 size_t len =
2603 NLMSG_ALIGN(sizeof(struct rtgenmsg))
2604 + nla_total_size(1) /* IPMRA_CREPORT_MSGTYPE */
2605 + nla_total_size(4) /* IPMRA_CREPORT_VIF_ID */
2606 + nla_total_size(4) /* IPMRA_CREPORT_SRC_ADDR */
2607 + nla_total_size(4) /* IPMRA_CREPORT_DST_ADDR */
2608 + nla_total_size(4) /* IPMRA_CREPORT_TABLE */
2609 /* IPMRA_CREPORT_PKT */
2610 + nla_total_size(payloadlen)
2611 ;
2612
2613 return len;
2614 }
2615
igmpmsg_netlink_event(const struct mr_table * mrt,struct sk_buff * pkt)2616 static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
2617 {
2618 struct net *net = read_pnet(&mrt->net);
2619 struct nlmsghdr *nlh;
2620 struct rtgenmsg *rtgenm;
2621 struct igmpmsg *msg;
2622 struct sk_buff *skb;
2623 struct nlattr *nla;
2624 int payloadlen;
2625
2626 payloadlen = pkt->len - sizeof(struct igmpmsg);
2627 msg = (struct igmpmsg *)skb_network_header(pkt);
2628
2629 skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC);
2630 if (!skb)
2631 goto errout;
2632
2633 nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
2634 sizeof(struct rtgenmsg), 0);
2635 if (!nlh)
2636 goto errout;
2637 rtgenm = nlmsg_data(nlh);
2638 rtgenm->rtgen_family = RTNL_FAMILY_IPMR;
2639 if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) ||
2640 nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif | (msg->im_vif_hi << 8)) ||
2641 nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR,
2642 msg->im_src.s_addr) ||
2643 nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR,
2644 msg->im_dst.s_addr) ||
2645 nla_put_u32(skb, IPMRA_CREPORT_TABLE, mrt->id))
2646 goto nla_put_failure;
2647
2648 nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen);
2649 if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg),
2650 nla_data(nla), payloadlen))
2651 goto nla_put_failure;
2652
2653 nlmsg_end(skb, nlh);
2654
2655 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC);
2656 return;
2657
2658 nla_put_failure:
2659 nlmsg_cancel(skb, nlh);
2660 errout:
2661 kfree_skb(skb);
2662 rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
2663 }
2664
ipmr_rtm_valid_getroute_req(struct sk_buff * skb,const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)2665 static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb,
2666 const struct nlmsghdr *nlh,
2667 struct nlattr **tb,
2668 struct netlink_ext_ack *extack)
2669 {
2670 struct rtmsg *rtm;
2671 int i, err;
2672
2673 rtm = nlmsg_payload(nlh, sizeof(*rtm));
2674 if (!rtm) {
2675 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request");
2676 return -EINVAL;
2677 }
2678
2679 if (!netlink_strict_get_check(skb))
2680 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
2681 rtm_ipv4_policy, extack);
2682
2683 if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
2684 (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
2685 rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
2686 rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
2687 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request");
2688 return -EINVAL;
2689 }
2690
2691 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2692 rtm_ipv4_policy, extack);
2693 if (err)
2694 return err;
2695
2696 if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
2697 (tb[RTA_DST] && !rtm->rtm_dst_len)) {
2698 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
2699 return -EINVAL;
2700 }
2701
2702 for (i = 0; i <= RTA_MAX; i++) {
2703 if (!tb[i])
2704 continue;
2705
2706 switch (i) {
2707 case RTA_SRC:
2708 case RTA_DST:
2709 case RTA_TABLE:
2710 break;
2711 default:
2712 NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request");
2713 return -EINVAL;
2714 }
2715 }
2716
2717 return 0;
2718 }
2719
ipmr_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2720 static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
2721 struct netlink_ext_ack *extack)
2722 {
2723 struct net *net = sock_net(in_skb->sk);
2724 struct nlattr *tb[RTA_MAX + 1];
2725 struct mfc_cache *cache;
2726 struct mr_table *mrt;
2727 struct sk_buff *skb;
2728 __be32 src, grp;
2729 u32 tableid;
2730 int err;
2731
2732 err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
2733 if (err < 0)
2734 goto errout;
2735
2736 src = nla_get_in_addr_default(tb[RTA_SRC], 0);
2737 grp = nla_get_in_addr_default(tb[RTA_DST], 0);
2738 tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
2739
2740 skb = nlmsg_new(mroute_msgsize(false), GFP_KERNEL);
2741 if (!skb) {
2742 err = -ENOBUFS;
2743 goto errout;
2744 }
2745
2746 rcu_read_lock();
2747
2748 mrt = __ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
2749 if (!mrt) {
2750 err = -ENOENT;
2751 goto errout_unlock;
2752 }
2753
2754 cache = ipmr_cache_find(mrt, src, grp);
2755 if (!cache) {
2756 err = -ENOENT;
2757 goto errout_unlock;
2758 }
2759
2760 err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
2761 nlh->nlmsg_seq, cache,
2762 RTM_NEWROUTE, 0);
2763 if (err < 0)
2764 goto errout_unlock;
2765
2766 rcu_read_unlock();
2767
2768 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2769 errout:
2770 return err;
2771
2772 errout_unlock:
2773 rcu_read_unlock();
2774 kfree_skb(skb);
2775 goto errout;
2776 }
2777
ipmr_rtm_dumproute(struct sk_buff * skb,struct netlink_callback * cb)2778 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2779 {
2780 struct fib_dump_filter filter = {
2781 .rtnl_held = false,
2782 };
2783 int err;
2784
2785 rcu_read_lock();
2786
2787 if (cb->strict_check) {
2788 err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh,
2789 &filter, cb);
2790 if (err < 0)
2791 goto out;
2792 }
2793
2794 if (filter.table_id) {
2795 struct mr_table *mrt;
2796
2797 mrt = __ipmr_get_table(sock_net(skb->sk), filter.table_id);
2798 if (!mrt) {
2799 if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) {
2800 err = skb->len;
2801 goto out;
2802 }
2803
2804 NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
2805 err = -ENOENT;
2806 goto out;
2807 }
2808
2809 err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute,
2810 &mfc_unres_lock, &filter);
2811 err = skb->len ? : err;
2812 goto out;
2813 }
2814
2815 err = mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
2816 _ipmr_fill_mroute, &mfc_unres_lock, &filter);
2817 out:
2818 rcu_read_unlock();
2819
2820 return err;
2821 }
2822
2823 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
2824 [RTA_SRC] = { .type = NLA_U32 },
2825 [RTA_DST] = { .type = NLA_U32 },
2826 [RTA_IIF] = { .type = NLA_U32 },
2827 [RTA_TABLE] = { .type = NLA_U32 },
2828 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2829 };
2830
ipmr_rtm_validate_proto(unsigned char rtm_protocol)2831 static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol)
2832 {
2833 switch (rtm_protocol) {
2834 case RTPROT_STATIC:
2835 case RTPROT_MROUTED:
2836 return true;
2837 }
2838 return false;
2839 }
2840
ipmr_nla_get_ttls(const struct nlattr * nla,struct mfcctl * mfcc)2841 static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc)
2842 {
2843 struct rtnexthop *rtnh = nla_data(nla);
2844 int remaining = nla_len(nla), vifi = 0;
2845
2846 while (rtnh_ok(rtnh, remaining)) {
2847 mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops;
2848 if (++vifi == MAXVIFS)
2849 break;
2850 rtnh = rtnh_next(rtnh, &remaining);
2851 }
2852
2853 return remaining > 0 ? -EINVAL : vifi;
2854 }
2855
2856 /* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */
rtm_to_ipmr_mfcc(struct net * net,struct nlmsghdr * nlh,struct mfcctl * mfcc,int * mrtsock,struct mr_table ** mrtret,struct netlink_ext_ack * extack)2857 static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
2858 struct mfcctl *mfcc, int *mrtsock,
2859 struct mr_table **mrtret,
2860 struct netlink_ext_ack *extack)
2861 {
2862 struct net_device *dev = NULL;
2863 u32 tblid = RT_TABLE_DEFAULT;
2864 int ret, rem, iif = 0;
2865 struct mr_table *mrt;
2866 struct nlattr *attr;
2867 struct rtmsg *rtm;
2868
2869 ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
2870 rtm_ipmr_policy, extack);
2871 if (ret < 0)
2872 goto out;
2873 rtm = nlmsg_data(nlh);
2874
2875 ret = -EINVAL;
2876 if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 ||
2877 rtm->rtm_type != RTN_MULTICAST ||
2878 rtm->rtm_scope != RT_SCOPE_UNIVERSE ||
2879 !ipmr_rtm_validate_proto(rtm->rtm_protocol))
2880 goto out;
2881
2882 memset(mfcc, 0, sizeof(*mfcc));
2883 mfcc->mfcc_parent = -1;
2884 ret = 0;
2885 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) {
2886 switch (nla_type(attr)) {
2887 case RTA_SRC:
2888 mfcc->mfcc_origin.s_addr = nla_get_be32(attr);
2889 break;
2890 case RTA_DST:
2891 mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr);
2892 break;
2893 case RTA_IIF:
2894 iif = nla_get_u32(attr);
2895 break;
2896 case RTA_MULTIPATH:
2897 if (ipmr_nla_get_ttls(attr, mfcc) < 0) {
2898 ret = -EINVAL;
2899 goto out;
2900 }
2901 break;
2902 case RTA_PREFSRC:
2903 ret = 1;
2904 break;
2905 case RTA_TABLE:
2906 tblid = nla_get_u32(attr);
2907 break;
2908 }
2909 }
2910
2911 rcu_read_lock();
2912
2913 mrt = __ipmr_get_table(net, tblid);
2914 if (!mrt) {
2915 ret = -ENOENT;
2916 goto unlock;
2917 }
2918
2919 if (iif) {
2920 dev = dev_get_by_index_rcu(net, iif);
2921 if (!dev) {
2922 ret = -ENODEV;
2923 goto unlock;
2924 }
2925
2926 mfcc->mfcc_parent = ipmr_find_vif(mrt, dev);
2927 }
2928
2929 *mrtret = mrt;
2930 *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0;
2931
2932 unlock:
2933 rcu_read_unlock();
2934 out:
2935 return ret;
2936 }
2937
2938 /* takes care of both newroute and delroute */
ipmr_rtm_route(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)2939 static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
2940 struct netlink_ext_ack *extack)
2941 {
2942 struct net *net = sock_net(skb->sk);
2943 int ret, mrtsock = 0, parent;
2944 struct mr_table *tbl = NULL;
2945 struct mfcctl mfcc;
2946
2947 ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack);
2948 if (ret < 0)
2949 return ret;
2950
2951 parent = ret ? mfcc.mfcc_parent : -1;
2952
2953 mutex_lock(&net->ipv4.mfc_mutex);
2954
2955 if (nlh->nlmsg_type == RTM_NEWROUTE)
2956 ret = ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent);
2957 else
2958 ret = ipmr_mfc_delete(tbl, &mfcc, parent);
2959
2960 mutex_unlock(&net->ipv4.mfc_mutex);
2961
2962 return ret;
2963 }
2964
ipmr_fill_table(struct mr_table * mrt,struct sk_buff * skb)2965 static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
2966 {
2967 u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);
2968
2969 if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
2970 nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
2971 nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM,
2972 READ_ONCE(mrt->mroute_reg_vif_num)) ||
2973 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
2974 READ_ONCE(mrt->mroute_do_assert)) ||
2975 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM,
2976 READ_ONCE(mrt->mroute_do_pim)) ||
2977 nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
2978 READ_ONCE(mrt->mroute_do_wrvifwhole)))
2979 return false;
2980
2981 return true;
2982 }
2983
ipmr_fill_vif(struct mr_table * mrt,u32 vifid,struct sk_buff * skb)2984 static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
2985 {
2986 struct net_device *vif_dev;
2987 struct nlattr *vif_nest;
2988 struct vif_device *vif;
2989
2990 vif = &mrt->vif_table[vifid];
2991 vif_dev = vif_dev_read(vif);
2992 /* if the VIF doesn't exist just continue */
2993 if (!vif_dev)
2994 return true;
2995
2996 vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF);
2997 if (!vif_nest)
2998 return false;
2999
3000 if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, READ_ONCE(vif_dev->ifindex)) ||
3001 nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) ||
3002 nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) ||
3003 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, READ_ONCE(vif->bytes_in),
3004 IPMRA_VIFA_PAD) ||
3005 nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, READ_ONCE(vif->bytes_out),
3006 IPMRA_VIFA_PAD) ||
3007 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, READ_ONCE(vif->pkt_in),
3008 IPMRA_VIFA_PAD) ||
3009 nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, READ_ONCE(vif->pkt_out),
3010 IPMRA_VIFA_PAD) ||
3011 nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) ||
3012 nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) {
3013 nla_nest_cancel(skb, vif_nest);
3014 return false;
3015 }
3016 nla_nest_end(skb, vif_nest);
3017
3018 return true;
3019 }
3020
ipmr_valid_dumplink(const struct nlmsghdr * nlh,struct netlink_ext_ack * extack)3021 static int ipmr_valid_dumplink(const struct nlmsghdr *nlh,
3022 struct netlink_ext_ack *extack)
3023 {
3024 struct ifinfomsg *ifm;
3025
3026 ifm = nlmsg_payload(nlh, sizeof(*ifm));
3027 if (!ifm) {
3028 NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump");
3029 return -EINVAL;
3030 }
3031
3032 if (nlmsg_attrlen(nlh, sizeof(*ifm))) {
3033 NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump");
3034 return -EINVAL;
3035 }
3036
3037 if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
3038 ifm->ifi_change || ifm->ifi_index) {
3039 NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request");
3040 return -EINVAL;
3041 }
3042
3043 return 0;
3044 }
3045
ipmr_rtm_dumplink(struct sk_buff * skb,struct netlink_callback * cb)3046 static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
3047 {
3048 struct net *net = sock_net(skb->sk);
3049 struct nlmsghdr *nlh = NULL;
3050 unsigned int t = 0, s_t;
3051 unsigned int e = 0, s_e;
3052 struct mr_table *mrt;
3053
3054 if (cb->strict_check) {
3055 int err = ipmr_valid_dumplink(cb->nlh, cb->extack);
3056
3057 if (err < 0)
3058 return err;
3059 }
3060
3061 s_t = cb->args[0];
3062 s_e = cb->args[1];
3063
3064 rcu_read_lock();
3065
3066 ipmr_for_each_table(mrt, net) {
3067 struct nlattr *vifs, *af;
3068 struct ifinfomsg *hdr;
3069 u32 i;
3070
3071 if (t < s_t)
3072 goto skip_table;
3073 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3074 cb->nlh->nlmsg_seq, RTM_NEWLINK,
3075 sizeof(*hdr), NLM_F_MULTI);
3076 if (!nlh)
3077 break;
3078
3079 hdr = nlmsg_data(nlh);
3080 memset(hdr, 0, sizeof(*hdr));
3081 hdr->ifi_family = RTNL_FAMILY_IPMR;
3082
3083 af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
3084 if (!af) {
3085 nlmsg_cancel(skb, nlh);
3086 goto out;
3087 }
3088
3089 if (!ipmr_fill_table(mrt, skb)) {
3090 nlmsg_cancel(skb, nlh);
3091 goto out;
3092 }
3093
3094 vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS);
3095 if (!vifs) {
3096 nla_nest_end(skb, af);
3097 nlmsg_end(skb, nlh);
3098 goto out;
3099 }
3100 for (i = 0; i < READ_ONCE(mrt->maxvif); i++) {
3101 if (e < s_e)
3102 goto skip_entry;
3103 if (!ipmr_fill_vif(mrt, i, skb)) {
3104 nla_nest_end(skb, vifs);
3105 nla_nest_end(skb, af);
3106 nlmsg_end(skb, nlh);
3107 goto out;
3108 }
3109 skip_entry:
3110 e++;
3111 }
3112 s_e = 0;
3113 e = 0;
3114 nla_nest_end(skb, vifs);
3115 nla_nest_end(skb, af);
3116 nlmsg_end(skb, nlh);
3117 skip_table:
3118 t++;
3119 }
3120
3121 out:
3122 rcu_read_unlock();
3123
3124 cb->args[1] = e;
3125 cb->args[0] = t;
3126
3127 return skb->len;
3128 }
3129
3130 #ifdef CONFIG_PROC_FS
3131 /* The /proc interfaces to multicast routing :
3132 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
3133 */
3134
ipmr_vif_seq_start(struct seq_file * seq,loff_t * pos)3135 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
3136 __acquires(RCU)
3137 {
3138 struct mr_vif_iter *iter = seq->private;
3139 struct net *net = seq_file_net(seq);
3140 struct mr_table *mrt;
3141
3142 rcu_read_lock();
3143 mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT);
3144 if (!mrt) {
3145 rcu_read_unlock();
3146 return ERR_PTR(-ENOENT);
3147 }
3148
3149 iter->mrt = mrt;
3150
3151 return mr_vif_seq_start(seq, pos);
3152 }
3153
ipmr_vif_seq_stop(struct seq_file * seq,void * v)3154 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
3155 __releases(RCU)
3156 {
3157 rcu_read_unlock();
3158 }
3159
ipmr_vif_seq_show(struct seq_file * seq,void * v)3160 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
3161 {
3162 struct mr_vif_iter *iter = seq->private;
3163 struct mr_table *mrt = iter->mrt;
3164
3165 if (v == SEQ_START_TOKEN) {
3166 seq_puts(seq,
3167 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
3168 } else {
3169 const struct vif_device *vif = v;
3170 const struct net_device *vif_dev;
3171 const char *name;
3172
3173 vif_dev = vif_dev_read(vif);
3174 name = vif_dev ? vif_dev->name : "none";
3175 seq_printf(seq,
3176 "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
3177 vif - mrt->vif_table,
3178 name, vif->bytes_in, vif->pkt_in,
3179 vif->bytes_out, vif->pkt_out,
3180 vif->flags, vif->local, vif->remote);
3181 }
3182 return 0;
3183 }
3184
3185 static const struct seq_operations ipmr_vif_seq_ops = {
3186 .start = ipmr_vif_seq_start,
3187 .next = mr_vif_seq_next,
3188 .stop = ipmr_vif_seq_stop,
3189 .show = ipmr_vif_seq_show,
3190 };
3191
ipmr_mfc_seq_start(struct seq_file * seq,loff_t * pos)3192 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
3193 {
3194 struct net *net = seq_file_net(seq);
3195 struct mr_table *mrt;
3196
3197 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
3198 if (!mrt)
3199 return ERR_PTR(-ENOENT);
3200
3201 return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
3202 }
3203
ipmr_mfc_seq_show(struct seq_file * seq,void * v)3204 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
3205 {
3206 int n;
3207
3208 if (v == SEQ_START_TOKEN) {
3209 seq_puts(seq,
3210 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
3211 } else {
3212 const struct mfc_cache *mfc = v;
3213 const struct mr_mfc_iter *it = seq->private;
3214 const struct mr_table *mrt = it->mrt;
3215
3216 seq_printf(seq, "%08X %08X %-3hd",
3217 (__force u32) mfc->mfc_mcastgrp,
3218 (__force u32) mfc->mfc_origin,
3219 mfc->_c.mfc_parent);
3220
3221 if (it->cache != &mrt->mfc_unres_queue) {
3222 seq_printf(seq, " %8lu %8lu %8lu",
3223 atomic_long_read(&mfc->_c.mfc_un.res.pkt),
3224 atomic_long_read(&mfc->_c.mfc_un.res.bytes),
3225 atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
3226 for (n = mfc->_c.mfc_un.res.minvif;
3227 n < mfc->_c.mfc_un.res.maxvif; n++) {
3228 if (VIF_EXISTS(mrt, n) &&
3229 mfc->_c.mfc_un.res.ttls[n] < 255)
3230 seq_printf(seq,
3231 " %2d:%-3d",
3232 n, mfc->_c.mfc_un.res.ttls[n]);
3233 }
3234 } else {
3235 /* unresolved mfc_caches don't contain
3236 * pkt, bytes and wrong_if values
3237 */
3238 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
3239 }
3240 seq_putc(seq, '\n');
3241 }
3242 return 0;
3243 }
3244
3245 static const struct seq_operations ipmr_mfc_seq_ops = {
3246 .start = ipmr_mfc_seq_start,
3247 .next = mr_mfc_seq_next,
3248 .stop = mr_mfc_seq_stop,
3249 .show = ipmr_mfc_seq_show,
3250 };
3251 #endif
3252
3253 #ifdef CONFIG_IP_PIMSM_V2
3254 static const struct net_protocol pim_protocol = {
3255 .handler = pim_rcv,
3256 };
3257 #endif
3258
ipmr_seq_read(const struct net * net)3259 static unsigned int ipmr_seq_read(const struct net *net)
3260 {
3261 return atomic_read(&net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net);
3262 }
3263
ipmr_dump(struct net * net,struct notifier_block * nb,struct netlink_ext_ack * extack)3264 static int ipmr_dump(struct net *net, struct notifier_block *nb,
3265 struct netlink_ext_ack *extack)
3266 {
3267 return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
3268 ipmr_mr_table_iter, extack);
3269 }
3270
3271 static const struct fib_notifier_ops ipmr_notifier_ops_template = {
3272 .family = RTNL_FAMILY_IPMR,
3273 .fib_seq_read = ipmr_seq_read,
3274 .fib_dump = ipmr_dump,
3275 .owner = THIS_MODULE,
3276 };
3277
ipmr_notifier_init(struct net * net)3278 static int __net_init ipmr_notifier_init(struct net *net)
3279 {
3280 struct fib_notifier_ops *ops;
3281
3282 atomic_set(&net->ipv4.ipmr_seq, 0);
3283
3284 ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
3285 if (IS_ERR(ops))
3286 return PTR_ERR(ops);
3287 net->ipv4.ipmr_notifier_ops = ops;
3288
3289 return 0;
3290 }
3291
ipmr_notifier_exit(struct net * net)3292 static void __net_exit ipmr_notifier_exit(struct net *net)
3293 {
3294 fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
3295 net->ipv4.ipmr_notifier_ops = NULL;
3296 }
3297
3298 /* Setup for IP multicast routing */
ipmr_net_init(struct net * net)3299 static int __net_init ipmr_net_init(struct net *net)
3300 {
3301 LIST_HEAD(dev_kill_list);
3302 int err;
3303
3304 mutex_init(&net->ipv4.mfc_mutex);
3305
3306 err = ipmr_notifier_init(net);
3307 if (err)
3308 goto ipmr_notifier_fail;
3309
3310 err = ipmr_rules_init(net);
3311 if (err < 0)
3312 goto ipmr_rules_fail;
3313
3314 #ifdef CONFIG_PROC_FS
3315 err = -ENOMEM;
3316 if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops,
3317 sizeof(struct mr_vif_iter)))
3318 goto proc_vif_fail;
3319 if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
3320 sizeof(struct mr_mfc_iter)))
3321 goto proc_cache_fail;
3322 #endif
3323 return 0;
3324
3325 #ifdef CONFIG_PROC_FS
3326 proc_cache_fail:
3327 remove_proc_entry("ip_mr_vif", net->proc_net);
3328 proc_vif_fail:
3329 ipmr_rules_exit_rtnl(net, &dev_kill_list);
3330 ipmr_rules_exit(net);
3331 #endif
3332 ipmr_rules_fail:
3333 ipmr_notifier_exit(net);
3334 ipmr_notifier_fail:
3335 return err;
3336 }
3337
ipmr_net_exit(struct net * net)3338 static void __net_exit ipmr_net_exit(struct net *net)
3339 {
3340 #ifdef CONFIG_PROC_FS
3341 remove_proc_entry("ip_mr_cache", net->proc_net);
3342 remove_proc_entry("ip_mr_vif", net->proc_net);
3343 #endif
3344 ipmr_rules_exit(net);
3345 ipmr_notifier_exit(net);
3346 }
3347
ipmr_net_exit_rtnl(struct net * net,struct list_head * dev_kill_list)3348 static void __net_exit ipmr_net_exit_rtnl(struct net *net,
3349 struct list_head *dev_kill_list)
3350 {
3351 ipmr_rules_exit_rtnl(net, dev_kill_list);
3352 }
3353
3354 static struct pernet_operations ipmr_net_ops = {
3355 .init = ipmr_net_init,
3356 .exit = ipmr_net_exit,
3357 .exit_rtnl = ipmr_net_exit_rtnl,
3358 };
3359
3360 static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = {
3361 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK,
3362 .dumpit = ipmr_rtm_dumplink, .flags = RTNL_FLAG_DUMP_UNLOCKED},
3363 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE,
3364 .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED},
3365 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE,
3366 .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED},
3367 {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE,
3368 .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute,
3369 .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
3370 };
3371
ip_mr_init(void)3372 int __init ip_mr_init(void)
3373 {
3374 int err;
3375
3376 mrt_cachep = KMEM_CACHE(mfc_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
3377
3378 err = register_pernet_subsys(&ipmr_net_ops);
3379 if (err)
3380 goto reg_pernet_fail;
3381
3382 err = register_netdevice_notifier(&ip_mr_notifier);
3383 if (err)
3384 goto reg_notif_fail;
3385 #ifdef CONFIG_IP_PIMSM_V2
3386 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
3387 pr_err("%s: can't add PIM protocol\n", __func__);
3388 err = -EAGAIN;
3389 goto add_proto_fail;
3390 }
3391 #endif
3392 rtnl_register_many(ipmr_rtnl_msg_handlers);
3393
3394 return 0;
3395
3396 #ifdef CONFIG_IP_PIMSM_V2
3397 add_proto_fail:
3398 unregister_netdevice_notifier(&ip_mr_notifier);
3399 #endif
3400 reg_notif_fail:
3401 unregister_pernet_subsys(&ipmr_net_ops);
3402 reg_pernet_fail:
3403 kmem_cache_destroy(mrt_cachep);
3404 return err;
3405 }
3406