1 // SPDX-License-Identifier: GPL-2.0 2 /* Linux multicast routing support 3 * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation 4 */ 5 6 #include <linux/rhashtable.h> 7 #include <linux/mroute_base.h> 8 9 /* Sets everything common except 'dev', since that is done under locking */ 10 void vif_device_init(struct vif_device *v, 11 struct net_device *dev, 12 unsigned long rate_limit, 13 unsigned char threshold, 14 unsigned short flags, 15 unsigned short get_iflink_mask) 16 { 17 RCU_INIT_POINTER(v->dev, NULL); 18 v->bytes_in = 0; 19 v->bytes_out = 0; 20 v->pkt_in = 0; 21 v->pkt_out = 0; 22 v->rate_limit = rate_limit; 23 v->flags = flags; 24 v->threshold = threshold; 25 if (v->flags & get_iflink_mask) 26 v->link = dev_get_iflink(dev); 27 else 28 v->link = dev->ifindex; 29 } 30 31 struct mr_table * 32 mr_table_alloc(struct net *net, u32 id, 33 struct mr_table_ops *ops, 34 void (*expire_func)(struct timer_list *t), 35 void (*table_set)(struct mr_table *mrt, 36 struct net *net)) 37 { 38 struct mr_table *mrt; 39 int err; 40 41 mrt = kzalloc_obj(*mrt); 42 if (!mrt) 43 return ERR_PTR(-ENOMEM); 44 mrt->id = id; 45 write_pnet(&mrt->net, net); 46 47 mrt->ops = *ops; 48 err = rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params); 49 if (err) { 50 kfree(mrt); 51 return ERR_PTR(err); 52 } 53 INIT_LIST_HEAD(&mrt->mfc_cache_list); 54 INIT_LIST_HEAD(&mrt->mfc_unres_queue); 55 56 timer_setup(&mrt->ipmr_expire_timer, expire_func, 0); 57 58 mrt->mroute_reg_vif_num = -1; 59 table_set(mrt, net); 60 return mrt; 61 } 62 63 void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent) 64 { 65 struct rhlist_head *tmp, *list; 66 struct mr_mfc *c; 67 68 list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params); 69 rhl_for_each_entry_rcu(c, tmp, list, mnode) 70 if (parent == -1 || parent == c->mfc_parent) 71 return c; 72 73 return NULL; 74 } 75 76 void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi) 77 { 78 struct rhlist_head *tmp, *list; 79 struct mr_mfc *c; 80 81 list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any, 82 *mrt->ops.rht_params); 83 rhl_for_each_entry_rcu(c, tmp, list, mnode) 84 if (c->mfc_un.res.ttls[vifi] < 255) 85 return c; 86 87 return NULL; 88 } 89 90 void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg) 91 { 92 struct rhlist_head *tmp, *list; 93 struct mr_mfc *c, *proxy; 94 95 list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params); 96 rhl_for_each_entry_rcu(c, tmp, list, mnode) { 97 if (c->mfc_un.res.ttls[vifi] < 255) 98 return c; 99 100 /* It's ok if the vifi is part of the static tree */ 101 proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent); 102 if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) 103 return c; 104 } 105 106 return mr_mfc_find_any_parent(mrt, vifi); 107 } 108 109 #ifdef CONFIG_PROC_FS 110 void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos) 111 { 112 struct mr_table *mrt = iter->mrt; 113 114 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { 115 if (!VIF_EXISTS(mrt, iter->ct)) 116 continue; 117 if (pos-- == 0) 118 return &mrt->vif_table[iter->ct]; 119 } 120 return NULL; 121 } 122 123 void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 124 { 125 struct mr_vif_iter *iter = seq->private; 126 struct net *net = seq_file_net(seq); 127 struct mr_table *mrt = iter->mrt; 128 129 ++*pos; 130 if (v == SEQ_START_TOKEN) 131 return mr_vif_seq_idx(net, iter, 0); 132 133 while (++iter->ct < mrt->maxvif) { 134 if (!VIF_EXISTS(mrt, iter->ct)) 135 continue; 136 return &mrt->vif_table[iter->ct]; 137 } 138 return NULL; 139 } 140 141 void *mr_mfc_seq_idx(struct net *net, 142 struct mr_mfc_iter *it, loff_t pos) 143 { 144 struct mr_table *mrt = it->mrt; 145 struct mr_mfc *mfc; 146 147 rcu_read_lock(); 148 it->cache = &mrt->mfc_cache_list; 149 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) 150 if (pos-- == 0) 151 return mfc; 152 rcu_read_unlock(); 153 154 spin_lock_bh(it->lock); 155 it->cache = &mrt->mfc_unres_queue; 156 list_for_each_entry(mfc, it->cache, list) 157 if (pos-- == 0) 158 return mfc; 159 spin_unlock_bh(it->lock); 160 161 it->cache = NULL; 162 return NULL; 163 } 164 165 void *mr_mfc_seq_next(struct seq_file *seq, void *v, 166 loff_t *pos) 167 { 168 struct mr_mfc_iter *it = seq->private; 169 struct net *net = seq_file_net(seq); 170 struct mr_table *mrt = it->mrt; 171 struct mr_mfc *c = v; 172 173 ++*pos; 174 175 if (v == SEQ_START_TOKEN) 176 return mr_mfc_seq_idx(net, seq->private, 0); 177 178 if (c->list.next != it->cache) 179 return list_entry(c->list.next, struct mr_mfc, list); 180 181 if (it->cache == &mrt->mfc_unres_queue) 182 goto end_of_list; 183 184 /* exhausted cache_array, show unresolved */ 185 rcu_read_unlock(); 186 it->cache = &mrt->mfc_unres_queue; 187 188 spin_lock_bh(it->lock); 189 if (!list_empty(it->cache)) 190 return list_first_entry(it->cache, struct mr_mfc, list); 191 192 end_of_list: 193 spin_unlock_bh(it->lock); 194 it->cache = NULL; 195 196 return NULL; 197 } 198 #endif 199 200 int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, 201 struct mr_mfc *c, struct rtmsg *rtm) 202 { 203 struct net_device *vif_dev; 204 struct rta_mfc_stats mfcs; 205 struct nlattr *mp_attr; 206 struct rtnexthop *nhp; 207 unsigned long lastuse; 208 int ct; 209 210 /* If cache is unresolved, don't try to parse IIF and OIF */ 211 if (c->mfc_parent >= MAXVIFS) { 212 rtm->rtm_flags |= RTNH_F_UNRESOLVED; 213 return -ENOENT; 214 } 215 216 rcu_read_lock(); 217 vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev); 218 if (vif_dev && nla_put_u32(skb, RTA_IIF, READ_ONCE(vif_dev->ifindex)) < 0) { 219 rcu_read_unlock(); 220 return -EMSGSIZE; 221 } 222 rcu_read_unlock(); 223 224 if (c->mfc_flags & MFC_OFFLOAD) 225 rtm->rtm_flags |= RTNH_F_OFFLOAD; 226 227 mp_attr = nla_nest_start_noflag(skb, RTA_MULTIPATH); 228 if (!mp_attr) 229 return -EMSGSIZE; 230 231 rcu_read_lock(); 232 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 233 struct vif_device *vif = &mrt->vif_table[ct]; 234 235 vif_dev = rcu_dereference(vif->dev); 236 if (vif_dev && c->mfc_un.res.ttls[ct] < 255) { 237 238 nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); 239 if (!nhp) { 240 rcu_read_unlock(); 241 nla_nest_cancel(skb, mp_attr); 242 return -EMSGSIZE; 243 } 244 245 nhp->rtnh_flags = 0; 246 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 247 nhp->rtnh_ifindex = READ_ONCE(vif_dev->ifindex); 248 nhp->rtnh_len = sizeof(*nhp); 249 } 250 } 251 rcu_read_unlock(); 252 253 nla_nest_end(skb, mp_attr); 254 255 lastuse = READ_ONCE(c->mfc_un.res.lastuse); 256 lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; 257 258 mfcs.mfcs_packets = atomic_long_read(&c->mfc_un.res.pkt); 259 mfcs.mfcs_bytes = atomic_long_read(&c->mfc_un.res.bytes); 260 mfcs.mfcs_wrong_if = atomic_long_read(&c->mfc_un.res.wrong_if); 261 if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || 262 nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), 263 RTA_PAD)) 264 return -EMSGSIZE; 265 266 rtm->rtm_type = RTN_MULTICAST; 267 return 1; 268 } 269 270 static bool mr_mfc_uses_dev(const struct mr_table *mrt, 271 const struct mr_mfc *c, 272 const struct net_device *dev) 273 { 274 int ct; 275 276 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 277 const struct net_device *vif_dev; 278 const struct vif_device *vif; 279 280 vif = &mrt->vif_table[ct]; 281 vif_dev = rcu_access_pointer(vif->dev); 282 if (vif_dev && c->mfc_un.res.ttls[ct] < 255 && 283 vif_dev == dev) 284 return true; 285 } 286 return false; 287 } 288 289 int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, 290 struct netlink_callback *cb, 291 int (*fill)(struct mr_table *mrt, struct sk_buff *skb, 292 u32 portid, u32 seq, struct mr_mfc *c, 293 int cmd, int flags), 294 spinlock_t *lock, struct fib_dump_filter *filter) 295 { 296 unsigned int e = 0, s_e = cb->args[1]; 297 unsigned int flags = NLM_F_MULTI; 298 struct mr_mfc *mfc; 299 int err; 300 301 if (filter->filter_set) 302 flags |= NLM_F_DUMP_FILTERED; 303 304 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list, 305 lockdep_rtnl_is_held()) { 306 if (e < s_e) 307 goto next_entry; 308 if (filter->dev && 309 !mr_mfc_uses_dev(mrt, mfc, filter->dev)) 310 goto next_entry; 311 312 err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, 313 cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); 314 if (err < 0) 315 goto out; 316 next_entry: 317 e++; 318 } 319 320 spin_lock_bh(lock); 321 list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { 322 if (e < s_e) 323 goto next_entry2; 324 325 err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, 326 cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); 327 if (err < 0) { 328 spin_unlock_bh(lock); 329 goto out; 330 } 331 next_entry2: 332 e++; 333 } 334 spin_unlock_bh(lock); 335 err = 0; 336 out: 337 cb->args[1] = e; 338 return err; 339 } 340 341 int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, 342 struct mr_table *(*iter)(struct net *net, 343 struct mr_table *mrt), 344 int (*fill)(struct mr_table *mrt, 345 struct sk_buff *skb, 346 u32 portid, u32 seq, struct mr_mfc *c, 347 int cmd, int flags), 348 spinlock_t *lock, struct fib_dump_filter *filter) 349 { 350 unsigned int t = 0, s_t = cb->args[0]; 351 struct net *net = sock_net(skb->sk); 352 struct mr_table *mrt; 353 int err; 354 355 /* multicast does not track protocol or have route type other 356 * than RTN_MULTICAST 357 */ 358 if (filter->filter_set) { 359 if (filter->protocol || filter->flags || 360 (filter->rt_type && filter->rt_type != RTN_MULTICAST)) 361 return skb->len; 362 } 363 364 rcu_read_lock(); 365 for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) { 366 if (t < s_t) 367 goto next_table; 368 369 err = mr_table_dump(mrt, skb, cb, fill, lock, filter); 370 if (err < 0) 371 break; 372 cb->args[1] = 0; 373 next_table: 374 t++; 375 } 376 rcu_read_unlock(); 377 378 cb->args[0] = t; 379 380 return skb->len; 381 } 382 383 int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, 384 int (*rules_dump)(struct net *net, 385 struct notifier_block *nb, 386 struct netlink_ext_ack *extack), 387 struct mr_table *(*mr_iter)(struct net *net, 388 struct mr_table *mrt), 389 struct netlink_ext_ack *extack) 390 { 391 struct mr_table *mrt; 392 int err; 393 394 err = rules_dump(net, nb, extack); 395 if (err) 396 return err; 397 398 for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) { 399 struct vif_device *v = &mrt->vif_table[0]; 400 struct net_device *vif_dev; 401 struct mr_mfc *mfc; 402 int vifi; 403 404 /* Notifiy on table VIF entries */ 405 rcu_read_lock(); 406 for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { 407 vif_dev = rcu_dereference(v->dev); 408 if (!vif_dev) 409 continue; 410 411 err = mr_call_vif_notifier(nb, family, 412 FIB_EVENT_VIF_ADD, v, 413 vif_dev, vifi, 414 mrt->id, extack); 415 if (err) 416 break; 417 } 418 rcu_read_unlock(); 419 420 if (err) 421 return err; 422 423 /* Notify on table MFC entries */ 424 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { 425 err = mr_call_mfc_notifier(nb, family, 426 FIB_EVENT_ENTRY_ADD, 427 mfc, mrt->id, extack); 428 if (err) 429 return err; 430 } 431 } 432 433 return 0; 434 } 435