xref: /linux/net/ipv4/ipmr_base.c (revision 1666d945b57b5a10bdea2d229b8ac43d2970f5f8)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Linux multicast routing support
3  * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
4  */
5 
6 #include <linux/rhashtable.h>
7 #include <linux/mroute_base.h>
8 
9 /* Sets everything common except 'dev', since that is done under locking */
10 void vif_device_init(struct vif_device *v,
11 		     struct net_device *dev,
12 		     unsigned long rate_limit,
13 		     unsigned char threshold,
14 		     unsigned short flags,
15 		     unsigned short get_iflink_mask)
16 {
17 	RCU_INIT_POINTER(v->dev, NULL);
18 	v->bytes_in = 0;
19 	v->bytes_out = 0;
20 	v->pkt_in = 0;
21 	v->pkt_out = 0;
22 	v->rate_limit = rate_limit;
23 	v->flags = flags;
24 	v->threshold = threshold;
25 	if (v->flags & get_iflink_mask)
26 		v->link = dev_get_iflink(dev);
27 	else
28 		v->link = dev->ifindex;
29 }
30 
31 struct mr_table *
32 mr_table_alloc(struct net *net, u32 id,
33 	       struct mr_table_ops *ops,
34 	       void (*expire_func)(struct timer_list *t),
35 	       void (*table_set)(struct mr_table *mrt,
36 				 struct net *net))
37 {
38 	struct mr_table *mrt;
39 	int err;
40 
41 	mrt = kzalloc_obj(*mrt);
42 	if (!mrt)
43 		return ERR_PTR(-ENOMEM);
44 	mrt->id = id;
45 	write_pnet(&mrt->net, net);
46 
47 	mrt->ops = *ops;
48 	err = rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
49 	if (err) {
50 		kfree(mrt);
51 		return ERR_PTR(err);
52 	}
53 	INIT_LIST_HEAD(&mrt->mfc_cache_list);
54 	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
55 
56 	timer_setup(&mrt->ipmr_expire_timer, expire_func, 0);
57 
58 	mrt->mroute_reg_vif_num = -1;
59 	table_set(mrt, net);
60 	return mrt;
61 }
62 
63 void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent)
64 {
65 	struct rhlist_head *tmp, *list;
66 	struct mr_mfc *c;
67 
68 	list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
69 	rhl_for_each_entry_rcu(c, tmp, list, mnode)
70 		if (parent == -1 || parent == c->mfc_parent)
71 			return c;
72 
73 	return NULL;
74 }
75 
76 void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi)
77 {
78 	struct rhlist_head *tmp, *list;
79 	struct mr_mfc *c;
80 
81 	list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any,
82 			       *mrt->ops.rht_params);
83 	rhl_for_each_entry_rcu(c, tmp, list, mnode)
84 		if (c->mfc_un.res.ttls[vifi] < 255)
85 			return c;
86 
87 	return NULL;
88 }
89 
90 void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
91 {
92 	struct rhlist_head *tmp, *list;
93 	struct mr_mfc *c, *proxy;
94 
95 	list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
96 	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
97 		if (c->mfc_un.res.ttls[vifi] < 255)
98 			return c;
99 
100 		/* It's ok if the vifi is part of the static tree */
101 		proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent);
102 		if (proxy && proxy->mfc_un.res.ttls[vifi] < 255)
103 			return c;
104 	}
105 
106 	return mr_mfc_find_any_parent(mrt, vifi);
107 }
108 
109 #ifdef CONFIG_PROC_FS
110 void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos)
111 {
112 	struct mr_table *mrt = iter->mrt;
113 
114 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
115 		if (!VIF_EXISTS(mrt, iter->ct))
116 			continue;
117 		if (pos-- == 0)
118 			return &mrt->vif_table[iter->ct];
119 	}
120 	return NULL;
121 }
122 
123 void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
124 {
125 	struct mr_vif_iter *iter = seq->private;
126 	struct net *net = seq_file_net(seq);
127 	struct mr_table *mrt = iter->mrt;
128 
129 	++*pos;
130 	if (v == SEQ_START_TOKEN)
131 		return mr_vif_seq_idx(net, iter, 0);
132 
133 	while (++iter->ct < mrt->maxvif) {
134 		if (!VIF_EXISTS(mrt, iter->ct))
135 			continue;
136 		return &mrt->vif_table[iter->ct];
137 	}
138 	return NULL;
139 }
140 
141 void *mr_mfc_seq_idx(struct net *net,
142 		     struct mr_mfc_iter *it, loff_t pos)
143 {
144 	struct mr_table *mrt = it->mrt;
145 	struct mr_mfc *mfc;
146 
147 	rcu_read_lock();
148 	it->cache = &mrt->mfc_cache_list;
149 	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
150 		if (pos-- == 0)
151 			return mfc;
152 	rcu_read_unlock();
153 
154 	spin_lock_bh(it->lock);
155 	it->cache = &mrt->mfc_unres_queue;
156 	list_for_each_entry(mfc, it->cache, list)
157 		if (pos-- == 0)
158 			return mfc;
159 	spin_unlock_bh(it->lock);
160 
161 	it->cache = NULL;
162 	return NULL;
163 }
164 
165 void *mr_mfc_seq_next(struct seq_file *seq, void *v,
166 		      loff_t *pos)
167 {
168 	struct mr_mfc_iter *it = seq->private;
169 	struct net *net = seq_file_net(seq);
170 	struct mr_table *mrt = it->mrt;
171 	struct mr_mfc *c = v;
172 
173 	++*pos;
174 
175 	if (v == SEQ_START_TOKEN)
176 		return mr_mfc_seq_idx(net, seq->private, 0);
177 
178 	if (c->list.next != it->cache)
179 		return list_entry(c->list.next, struct mr_mfc, list);
180 
181 	if (it->cache == &mrt->mfc_unres_queue)
182 		goto end_of_list;
183 
184 	/* exhausted cache_array, show unresolved */
185 	rcu_read_unlock();
186 	it->cache = &mrt->mfc_unres_queue;
187 
188 	spin_lock_bh(it->lock);
189 	if (!list_empty(it->cache))
190 		return list_first_entry(it->cache, struct mr_mfc, list);
191 
192 end_of_list:
193 	spin_unlock_bh(it->lock);
194 	it->cache = NULL;
195 
196 	return NULL;
197 }
198 #endif
199 
200 int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
201 		   struct mr_mfc *c, struct rtmsg *rtm)
202 {
203 	struct net_device *vif_dev;
204 	struct rta_mfc_stats mfcs;
205 	struct nlattr *mp_attr;
206 	struct rtnexthop *nhp;
207 	unsigned long lastuse;
208 	int ct;
209 
210 	/* If cache is unresolved, don't try to parse IIF and OIF */
211 	if (c->mfc_parent >= MAXVIFS) {
212 		rtm->rtm_flags |= RTNH_F_UNRESOLVED;
213 		return -ENOENT;
214 	}
215 
216 	rcu_read_lock();
217 	vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev);
218 	if (vif_dev && nla_put_u32(skb, RTA_IIF, READ_ONCE(vif_dev->ifindex)) < 0) {
219 		rcu_read_unlock();
220 		return -EMSGSIZE;
221 	}
222 	rcu_read_unlock();
223 
224 	if (c->mfc_flags & MFC_OFFLOAD)
225 		rtm->rtm_flags |= RTNH_F_OFFLOAD;
226 
227 	mp_attr = nla_nest_start_noflag(skb, RTA_MULTIPATH);
228 	if (!mp_attr)
229 		return -EMSGSIZE;
230 
231 	rcu_read_lock();
232 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
233 		struct vif_device *vif = &mrt->vif_table[ct];
234 
235 		vif_dev = rcu_dereference(vif->dev);
236 		if (vif_dev && c->mfc_un.res.ttls[ct] < 255) {
237 
238 			nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
239 			if (!nhp) {
240 				rcu_read_unlock();
241 				nla_nest_cancel(skb, mp_attr);
242 				return -EMSGSIZE;
243 			}
244 
245 			nhp->rtnh_flags = 0;
246 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
247 			nhp->rtnh_ifindex = READ_ONCE(vif_dev->ifindex);
248 			nhp->rtnh_len = sizeof(*nhp);
249 		}
250 	}
251 	rcu_read_unlock();
252 
253 	nla_nest_end(skb, mp_attr);
254 
255 	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
256 	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
257 
258 	mfcs.mfcs_packets = atomic_long_read(&c->mfc_un.res.pkt);
259 	mfcs.mfcs_bytes = atomic_long_read(&c->mfc_un.res.bytes);
260 	mfcs.mfcs_wrong_if = atomic_long_read(&c->mfc_un.res.wrong_if);
261 	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
262 	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
263 			      RTA_PAD))
264 		return -EMSGSIZE;
265 
266 	rtm->rtm_type = RTN_MULTICAST;
267 	return 1;
268 }
269 
270 static bool mr_mfc_uses_dev(const struct mr_table *mrt,
271 			    const struct mr_mfc *c,
272 			    const struct net_device *dev)
273 {
274 	int ct;
275 
276 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
277 		const struct net_device *vif_dev;
278 		const struct vif_device *vif;
279 
280 		vif = &mrt->vif_table[ct];
281 		vif_dev = rcu_access_pointer(vif->dev);
282 		if (vif_dev && c->mfc_un.res.ttls[ct] < 255 &&
283 		    vif_dev == dev)
284 			return true;
285 	}
286 	return false;
287 }
288 
289 int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
290 		  struct netlink_callback *cb,
291 		  int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
292 			      u32 portid, u32 seq, struct mr_mfc *c,
293 			      int cmd, int flags),
294 		  spinlock_t *lock, struct fib_dump_filter *filter)
295 {
296 	unsigned int e = 0, s_e = cb->args[1];
297 	unsigned int flags = NLM_F_MULTI;
298 	struct mr_mfc *mfc;
299 	int err;
300 
301 	if (filter->filter_set)
302 		flags |= NLM_F_DUMP_FILTERED;
303 
304 	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list,
305 				lockdep_rtnl_is_held()) {
306 		if (e < s_e)
307 			goto next_entry;
308 		if (filter->dev &&
309 		    !mr_mfc_uses_dev(mrt, mfc, filter->dev))
310 			goto next_entry;
311 
312 		err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
313 			   cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
314 		if (err < 0)
315 			goto out;
316 next_entry:
317 		e++;
318 	}
319 
320 	spin_lock_bh(lock);
321 	list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
322 		if (e < s_e)
323 			goto next_entry2;
324 
325 		err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
326 			   cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
327 		if (err < 0) {
328 			spin_unlock_bh(lock);
329 			goto out;
330 		}
331 next_entry2:
332 		e++;
333 	}
334 	spin_unlock_bh(lock);
335 	err = 0;
336 out:
337 	cb->args[1] = e;
338 	return err;
339 }
340 
341 int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
342 		     struct mr_table *(*iter)(struct net *net,
343 					      struct mr_table *mrt),
344 		     int (*fill)(struct mr_table *mrt,
345 				 struct sk_buff *skb,
346 				 u32 portid, u32 seq, struct mr_mfc *c,
347 				 int cmd, int flags),
348 		     spinlock_t *lock, struct fib_dump_filter *filter)
349 {
350 	unsigned int t = 0, s_t = cb->args[0];
351 	struct net *net = sock_net(skb->sk);
352 	struct mr_table *mrt;
353 	int err;
354 
355 	/* multicast does not track protocol or have route type other
356 	 * than RTN_MULTICAST
357 	 */
358 	if (filter->filter_set) {
359 		if (filter->protocol || filter->flags ||
360 		    (filter->rt_type && filter->rt_type != RTN_MULTICAST))
361 			return skb->len;
362 	}
363 
364 	rcu_read_lock();
365 	for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
366 		if (t < s_t)
367 			goto next_table;
368 
369 		err = mr_table_dump(mrt, skb, cb, fill, lock, filter);
370 		if (err < 0)
371 			break;
372 		cb->args[1] = 0;
373 next_table:
374 		t++;
375 	}
376 	rcu_read_unlock();
377 
378 	cb->args[0] = t;
379 
380 	return skb->len;
381 }
382 
383 int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
384 	    int (*rules_dump)(struct net *net,
385 			      struct notifier_block *nb,
386 			      struct netlink_ext_ack *extack),
387 	    struct mr_table *(*mr_iter)(struct net *net,
388 					struct mr_table *mrt),
389 	    struct netlink_ext_ack *extack)
390 {
391 	struct mr_table *mrt;
392 	int err;
393 
394 	err = rules_dump(net, nb, extack);
395 	if (err)
396 		return err;
397 
398 	for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) {
399 		struct vif_device *v = &mrt->vif_table[0];
400 		struct net_device *vif_dev;
401 		struct mr_mfc *mfc;
402 		int vifi;
403 
404 		/* Notifiy on table VIF entries */
405 		rcu_read_lock();
406 		for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
407 			vif_dev = rcu_dereference(v->dev);
408 			if (!vif_dev)
409 				continue;
410 
411 			err = mr_call_vif_notifier(nb, family,
412 						   FIB_EVENT_VIF_ADD, v,
413 						   vif_dev, vifi,
414 						   mrt->id, extack);
415 			if (err)
416 				break;
417 		}
418 		rcu_read_unlock();
419 
420 		if (err)
421 			return err;
422 
423 		/* Notify on table MFC entries */
424 		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
425 			err = mr_call_mfc_notifier(nb, family,
426 						   FIB_EVENT_ENTRY_ADD,
427 						   mfc, mrt->id, extack);
428 			if (err)
429 				return err;
430 		}
431 	}
432 
433 	return 0;
434 }
435