1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (c) 2007-2014 Nicira, Inc.
4 */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/init.h>
9 #include <linux/module.h>
10 #include <linux/if_arp.h>
11 #include <linux/if_vlan.h>
12 #include <linux/in.h>
13 #include <linux/ip.h>
14 #include <linux/jhash.h>
15 #include <linux/delay.h>
16 #include <linux/time.h>
17 #include <linux/etherdevice.h>
18 #include <linux/kernel.h>
19 #include <linux/kthread.h>
20 #include <linux/mutex.h>
21 #include <linux/percpu.h>
22 #include <linux/rcupdate.h>
23 #include <linux/tcp.h>
24 #include <linux/udp.h>
25 #include <linux/ethtool.h>
26 #include <linux/wait.h>
27 #include <asm/div64.h>
28 #include <linux/highmem.h>
29 #include <linux/netfilter_bridge.h>
30 #include <linux/netfilter_ipv4.h>
31 #include <linux/inetdevice.h>
32 #include <linux/list.h>
33 #include <linux/openvswitch.h>
34 #include <linux/rculist.h>
35 #include <linux/dmi.h>
36 #include <net/genetlink.h>
37 #include <net/gso.h>
38 #include <net/net_namespace.h>
39 #include <net/netns/generic.h>
40 #include <net/pkt_cls.h>
41
42 #include "datapath.h"
43 #include "drop.h"
44 #include "flow.h"
45 #include "flow_table.h"
46 #include "flow_netlink.h"
47 #include "meter.h"
48 #include "openvswitch_trace.h"
49 #include "vport-internal_dev.h"
50 #include "vport-netdev.h"
51
52 unsigned int ovs_net_id __read_mostly;
53
54 static struct genl_family dp_packet_genl_family;
55 static struct genl_family dp_flow_genl_family;
56 static struct genl_family dp_datapath_genl_family;
57
58 static const struct nla_policy flow_policy[];
59
60 static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
61 .name = OVS_FLOW_MCGROUP,
62 };
63
64 static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
65 .name = OVS_DATAPATH_MCGROUP,
66 };
67
68 static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
69 .name = OVS_VPORT_MCGROUP,
70 };
71
72 /* Check if need to build a reply message.
73 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
ovs_must_notify(struct genl_family * family,struct genl_info * info,unsigned int group)74 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
75 unsigned int group)
76 {
77 return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
78 genl_has_listeners(family, genl_info_net(info), group);
79 }
80
ovs_notify(struct genl_family * family,struct sk_buff * skb,struct genl_info * info)81 static void ovs_notify(struct genl_family *family,
82 struct sk_buff *skb, struct genl_info *info)
83 {
84 genl_notify(family, skb, info, 0, GFP_KERNEL);
85 }
86
87 /**
88 * DOC: Locking:
89 *
90 * All writes e.g. Writes to device state (add/remove datapath, port, set
91 * operations on vports, etc.), Writes to other state (flow table
92 * modifications, set miscellaneous datapath parameters, etc.) are protected
93 * by ovs_lock.
94 *
95 * Reads are protected by RCU.
96 *
97 * There are a few special cases (mostly stats) that have their own
98 * synchronization but they nest under all of above and don't interact with
99 * each other.
100 *
101 * The RTNL lock nests inside ovs_mutex.
102 */
103
104 static DEFINE_MUTEX(ovs_mutex);
105
ovs_lock(void)106 void ovs_lock(void)
107 {
108 mutex_lock(&ovs_mutex);
109 }
110
ovs_unlock(void)111 void ovs_unlock(void)
112 {
113 mutex_unlock(&ovs_mutex);
114 }
115
116 #ifdef CONFIG_LOCKDEP
lockdep_ovsl_is_held(void)117 int lockdep_ovsl_is_held(void)
118 {
119 if (debug_locks)
120 return lockdep_is_held(&ovs_mutex);
121 else
122 return 1;
123 }
124 #endif
125
126 static struct vport *new_vport(const struct vport_parms *);
127 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
128 const struct sw_flow_key *,
129 const struct dp_upcall_info *,
130 uint32_t cutlen);
131 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
132 const struct sw_flow_key *,
133 const struct dp_upcall_info *,
134 uint32_t cutlen);
135
136 static void ovs_dp_masks_rebalance(struct work_struct *work);
137
138 static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
139
140 /* Must be called with rcu_read_lock or ovs_mutex. */
ovs_dp_name(const struct datapath * dp)141 const char *ovs_dp_name(const struct datapath *dp)
142 {
143 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
144 return ovs_vport_name(vport);
145 }
146
get_dpifindex(const struct datapath * dp)147 static int get_dpifindex(const struct datapath *dp)
148 {
149 struct vport *local;
150 int ifindex;
151
152 rcu_read_lock();
153
154 local = ovs_vport_rcu(dp, OVSP_LOCAL);
155 if (local)
156 ifindex = local->dev->ifindex;
157 else
158 ifindex = 0;
159
160 rcu_read_unlock();
161
162 return ifindex;
163 }
164
destroy_dp_rcu(struct rcu_head * rcu)165 static void destroy_dp_rcu(struct rcu_head *rcu)
166 {
167 struct datapath *dp = container_of(rcu, struct datapath, rcu);
168
169 ovs_flow_tbl_destroy(&dp->table);
170 free_percpu(dp->stats_percpu);
171 kfree(dp->ports);
172 ovs_meters_exit(dp);
173 kfree(rcu_dereference_raw(dp->upcall_portids));
174 kfree(dp);
175 }
176
vport_hash_bucket(const struct datapath * dp,u16 port_no)177 static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
178 u16 port_no)
179 {
180 return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
181 }
182
183 /* Called with ovs_mutex or RCU read lock. */
ovs_lookup_vport(const struct datapath * dp,u16 port_no)184 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
185 {
186 struct vport *vport;
187 struct hlist_head *head;
188
189 head = vport_hash_bucket(dp, port_no);
190 hlist_for_each_entry_rcu(vport, head, dp_hash_node,
191 lockdep_ovsl_is_held()) {
192 if (vport->port_no == port_no)
193 return vport;
194 }
195 return NULL;
196 }
197
198 /* Called with ovs_mutex. */
new_vport(const struct vport_parms * parms)199 static struct vport *new_vport(const struct vport_parms *parms)
200 {
201 struct vport *vport;
202
203 vport = ovs_vport_add(parms);
204 if (!IS_ERR(vport)) {
205 struct datapath *dp = parms->dp;
206 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
207
208 hlist_add_head_rcu(&vport->dp_hash_node, head);
209 }
210 return vport;
211 }
212
ovs_vport_update_upcall_stats(struct sk_buff * skb,const struct dp_upcall_info * upcall_info,bool upcall_result)213 static void ovs_vport_update_upcall_stats(struct sk_buff *skb,
214 const struct dp_upcall_info *upcall_info,
215 bool upcall_result)
216 {
217 struct vport *p = OVS_CB(skb)->input_vport;
218 struct vport_upcall_stats_percpu *stats;
219
220 if (upcall_info->cmd != OVS_PACKET_CMD_MISS &&
221 upcall_info->cmd != OVS_PACKET_CMD_ACTION)
222 return;
223
224 stats = this_cpu_ptr(p->upcall_stats);
225 u64_stats_update_begin(&stats->syncp);
226 if (upcall_result)
227 u64_stats_inc(&stats->n_success);
228 else
229 u64_stats_inc(&stats->n_fail);
230 u64_stats_update_end(&stats->syncp);
231 }
232
ovs_dp_detach_port(struct vport * p)233 void ovs_dp_detach_port(struct vport *p)
234 {
235 ASSERT_OVSL();
236
237 /* First drop references to device. */
238 hlist_del_rcu(&p->dp_hash_node);
239
240 /* Then destroy it. */
241 ovs_vport_del(p);
242 }
243
244 /* Must be called with rcu_read_lock. */
ovs_dp_process_packet(struct sk_buff * skb,struct sw_flow_key * key)245 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
246 {
247 struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
248 const struct vport *p = OVS_CB(skb)->input_vport;
249 struct datapath *dp = p->dp;
250 struct sw_flow *flow;
251 struct sw_flow_actions *sf_acts;
252 struct dp_stats_percpu *stats;
253 bool ovs_pcpu_locked = false;
254 u64 *stats_counter;
255 u32 n_mask_hit;
256 u32 n_cache_hit;
257 int error;
258
259 stats = this_cpu_ptr(dp->stats_percpu);
260
261 /* Look up flow. */
262 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
263 &n_mask_hit, &n_cache_hit);
264 if (unlikely(!flow)) {
265 struct dp_upcall_info upcall;
266
267 memset(&upcall, 0, sizeof(upcall));
268 upcall.cmd = OVS_PACKET_CMD_MISS;
269
270 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
271 upcall.portid =
272 ovs_dp_get_upcall_portid(dp, smp_processor_id());
273 else
274 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
275
276 upcall.mru = OVS_CB(skb)->mru;
277 error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
278 switch (error) {
279 case 0:
280 case -EAGAIN:
281 case -ERESTARTSYS:
282 case -EINTR:
283 consume_skb(skb);
284 break;
285 default:
286 kfree_skb(skb);
287 break;
288 }
289 stats_counter = &stats->n_missed;
290 goto out;
291 }
292
293 ovs_flow_stats_update(flow, key->tp.flags, skb);
294 sf_acts = rcu_dereference(flow->sf_acts);
295 /* This path can be invoked recursively: Use the current task to
296 * identify recursive invocation - the lock must be acquired only once.
297 * Even with disabled bottom halves this can be preempted on PREEMPT_RT.
298 * Limit the locking to RT to avoid assigning `owner' if it can be
299 * avoided.
300 */
301 if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
302 local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
303 ovs_pcpu->owner = current;
304 ovs_pcpu_locked = true;
305 }
306
307 error = ovs_execute_actions(dp, skb, sf_acts, key);
308 if (unlikely(error))
309 net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
310 ovs_dp_name(dp), error);
311 if (ovs_pcpu_locked) {
312 ovs_pcpu->owner = NULL;
313 local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
314 }
315
316 stats_counter = &stats->n_hit;
317
318 out:
319 /* Update datapath statistics. */
320 u64_stats_update_begin(&stats->syncp);
321 (*stats_counter)++;
322 stats->n_mask_hit += n_mask_hit;
323 stats->n_cache_hit += n_cache_hit;
324 u64_stats_update_end(&stats->syncp);
325 }
326
ovs_dp_upcall(struct datapath * dp,struct sk_buff * skb,const struct sw_flow_key * key,const struct dp_upcall_info * upcall_info,uint32_t cutlen)327 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
328 const struct sw_flow_key *key,
329 const struct dp_upcall_info *upcall_info,
330 uint32_t cutlen)
331 {
332 struct dp_stats_percpu *stats;
333 int err;
334
335 if (trace_ovs_dp_upcall_enabled())
336 trace_ovs_dp_upcall(dp, skb, key, upcall_info);
337
338 if (upcall_info->portid == 0) {
339 err = -ENOTCONN;
340 goto err;
341 }
342
343 if (!skb_is_gso(skb))
344 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
345 else
346 err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
347
348 ovs_vport_update_upcall_stats(skb, upcall_info, !err);
349 if (err)
350 goto err;
351
352 return 0;
353
354 err:
355 stats = this_cpu_ptr(dp->stats_percpu);
356
357 u64_stats_update_begin(&stats->syncp);
358 stats->n_lost++;
359 u64_stats_update_end(&stats->syncp);
360
361 return err;
362 }
363
queue_gso_packets(struct datapath * dp,struct sk_buff * skb,const struct sw_flow_key * key,const struct dp_upcall_info * upcall_info,uint32_t cutlen)364 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
365 const struct sw_flow_key *key,
366 const struct dp_upcall_info *upcall_info,
367 uint32_t cutlen)
368 {
369 unsigned int gso_type = skb_shinfo(skb)->gso_type;
370 struct sw_flow_key later_key;
371 struct sk_buff *segs, *nskb;
372 int err;
373
374 BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
375 segs = __skb_gso_segment(skb, NETIF_F_SG, false);
376 if (IS_ERR(segs))
377 return PTR_ERR(segs);
378 if (segs == NULL)
379 return -EINVAL;
380
381 if (gso_type & SKB_GSO_UDP) {
382 /* The initial flow key extracted by ovs_flow_key_extract()
383 * in this case is for a first fragment, so we need to
384 * properly mark later fragments.
385 */
386 later_key = *key;
387 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
388 }
389
390 /* Queue all of the segments. */
391 skb_list_walk_safe(segs, skb, nskb) {
392 if (gso_type & SKB_GSO_UDP && skb != segs)
393 key = &later_key;
394
395 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
396 if (err)
397 break;
398
399 }
400
401 /* Free all of the segments. */
402 skb_list_walk_safe(segs, skb, nskb) {
403 if (err)
404 kfree_skb(skb);
405 else
406 consume_skb(skb);
407 }
408 return err;
409 }
410
upcall_msg_size(const struct dp_upcall_info * upcall_info,unsigned int hdrlen,int actions_attrlen)411 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
412 unsigned int hdrlen, int actions_attrlen)
413 {
414 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
415 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
416 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
417 + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
418 + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
419
420 /* OVS_PACKET_ATTR_USERDATA */
421 if (upcall_info->userdata)
422 size += NLA_ALIGN(upcall_info->userdata->nla_len);
423
424 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
425 if (upcall_info->egress_tun_info)
426 size += nla_total_size(ovs_tun_key_attr_size());
427
428 /* OVS_PACKET_ATTR_ACTIONS */
429 if (upcall_info->actions_len)
430 size += nla_total_size(actions_attrlen);
431
432 /* OVS_PACKET_ATTR_MRU */
433 if (upcall_info->mru)
434 size += nla_total_size(sizeof(upcall_info->mru));
435
436 return size;
437 }
438
pad_packet(struct datapath * dp,struct sk_buff * skb)439 static void pad_packet(struct datapath *dp, struct sk_buff *skb)
440 {
441 if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
442 size_t plen = NLA_ALIGN(skb->len) - skb->len;
443
444 if (plen > 0)
445 skb_put_zero(skb, plen);
446 }
447 }
448
queue_userspace_packet(struct datapath * dp,struct sk_buff * skb,const struct sw_flow_key * key,const struct dp_upcall_info * upcall_info,uint32_t cutlen)449 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
450 const struct sw_flow_key *key,
451 const struct dp_upcall_info *upcall_info,
452 uint32_t cutlen)
453 {
454 struct ovs_header *upcall;
455 struct sk_buff *nskb = NULL;
456 struct sk_buff *user_skb = NULL; /* to be queued to userspace */
457 struct nlattr *nla;
458 size_t len;
459 unsigned int hlen;
460 int err, dp_ifindex;
461 u64 hash;
462
463 dp_ifindex = get_dpifindex(dp);
464 if (!dp_ifindex)
465 return -ENODEV;
466
467 if (skb_vlan_tag_present(skb)) {
468 nskb = skb_clone(skb, GFP_ATOMIC);
469 if (!nskb)
470 return -ENOMEM;
471
472 nskb = __vlan_hwaccel_push_inside(nskb);
473 if (!nskb)
474 return -ENOMEM;
475
476 skb = nskb;
477 }
478
479 if (nla_attr_size(skb->len) > USHRT_MAX) {
480 err = -EFBIG;
481 goto out;
482 }
483
484 /* Complete checksum if needed */
485 if (skb->ip_summed == CHECKSUM_PARTIAL &&
486 (err = skb_csum_hwoffload_help(skb, 0)))
487 goto out;
488
489 /* Older versions of OVS user space enforce alignment of the last
490 * Netlink attribute to NLA_ALIGNTO which would require extensive
491 * padding logic. Only perform zerocopy if padding is not required.
492 */
493 if (dp->user_features & OVS_DP_F_UNALIGNED)
494 hlen = skb_zerocopy_headlen(skb);
495 else
496 hlen = skb->len;
497
498 len = upcall_msg_size(upcall_info, hlen - cutlen,
499 OVS_CB(skb)->acts_origlen);
500 user_skb = genlmsg_new(len, GFP_ATOMIC);
501 if (!user_skb) {
502 err = -ENOMEM;
503 goto out;
504 }
505
506 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
507 0, upcall_info->cmd);
508 if (!upcall) {
509 err = -EINVAL;
510 goto out;
511 }
512 upcall->dp_ifindex = dp_ifindex;
513
514 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
515 if (err)
516 goto out;
517
518 if (upcall_info->userdata)
519 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
520 nla_len(upcall_info->userdata),
521 nla_data(upcall_info->userdata));
522
523 if (upcall_info->egress_tun_info) {
524 nla = nla_nest_start_noflag(user_skb,
525 OVS_PACKET_ATTR_EGRESS_TUN_KEY);
526 if (!nla) {
527 err = -EMSGSIZE;
528 goto out;
529 }
530 err = ovs_nla_put_tunnel_info(user_skb,
531 upcall_info->egress_tun_info);
532 if (err)
533 goto out;
534
535 nla_nest_end(user_skb, nla);
536 }
537
538 if (upcall_info->actions_len) {
539 nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
540 if (!nla) {
541 err = -EMSGSIZE;
542 goto out;
543 }
544 err = ovs_nla_put_actions(upcall_info->actions,
545 upcall_info->actions_len,
546 user_skb);
547 if (!err)
548 nla_nest_end(user_skb, nla);
549 else
550 nla_nest_cancel(user_skb, nla);
551 }
552
553 /* Add OVS_PACKET_ATTR_MRU */
554 if (upcall_info->mru &&
555 nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
556 err = -ENOBUFS;
557 goto out;
558 }
559
560 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
561 if (cutlen > 0 &&
562 nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
563 err = -ENOBUFS;
564 goto out;
565 }
566
567 /* Add OVS_PACKET_ATTR_HASH */
568 hash = skb_get_hash_raw(skb);
569 if (skb->sw_hash)
570 hash |= OVS_PACKET_HASH_SW_BIT;
571
572 if (skb->l4_hash)
573 hash |= OVS_PACKET_HASH_L4_BIT;
574
575 if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
576 err = -ENOBUFS;
577 goto out;
578 }
579
580 /* Only reserve room for attribute header, packet data is added
581 * in skb_zerocopy() */
582 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
583 err = -ENOBUFS;
584 goto out;
585 }
586 nla->nla_len = nla_attr_size(skb->len - cutlen);
587
588 err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
589 if (err)
590 goto out;
591
592 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
593 pad_packet(dp, user_skb);
594
595 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
596
597 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
598 user_skb = NULL;
599 out:
600 if (err)
601 skb_tx_error(skb);
602 consume_skb(user_skb);
603 consume_skb(nskb);
604
605 return err;
606 }
607
ovs_packet_cmd_execute(struct sk_buff * skb,struct genl_info * info)608 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
609 {
610 struct ovs_header *ovs_header = genl_info_userhdr(info);
611 struct net *net = sock_net(skb->sk);
612 struct nlattr **a = info->attrs;
613 struct sw_flow_actions *acts;
614 struct sk_buff *packet;
615 struct sw_flow *flow;
616 struct sw_flow_actions *sf_acts;
617 struct datapath *dp;
618 struct vport *input_vport;
619 u16 mru = 0;
620 u64 hash;
621 int len;
622 int err;
623 bool log = !a[OVS_PACKET_ATTR_PROBE];
624
625 err = -EINVAL;
626 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
627 !a[OVS_PACKET_ATTR_ACTIONS])
628 goto err;
629
630 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
631 packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
632 err = -ENOMEM;
633 if (!packet)
634 goto err;
635 skb_reserve(packet, NET_IP_ALIGN);
636
637 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
638
639 /* Set packet's mru */
640 if (a[OVS_PACKET_ATTR_MRU]) {
641 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
642 packet->ignore_df = 1;
643 }
644 OVS_CB(packet)->mru = mru;
645
646 if (a[OVS_PACKET_ATTR_HASH]) {
647 hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
648
649 __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
650 !!(hash & OVS_PACKET_HASH_SW_BIT),
651 !!(hash & OVS_PACKET_HASH_L4_BIT));
652 }
653
654 /* Build an sw_flow for sending this packet. */
655 flow = ovs_flow_alloc();
656 err = PTR_ERR(flow);
657 if (IS_ERR(flow))
658 goto err_kfree_skb;
659
660 err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
661 packet, &flow->key, log);
662 if (err)
663 goto err_flow_free;
664
665 err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
666 &flow->key, &acts, log);
667 if (err)
668 goto err_flow_free;
669
670 rcu_assign_pointer(flow->sf_acts, acts);
671 packet->priority = flow->key.phy.priority;
672 packet->mark = flow->key.phy.skb_mark;
673
674 rcu_read_lock();
675 dp = get_dp_rcu(net, ovs_header->dp_ifindex);
676 err = -ENODEV;
677 if (!dp)
678 goto err_unlock;
679
680 input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
681 if (!input_vport)
682 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
683
684 if (!input_vport)
685 goto err_unlock;
686
687 packet->dev = input_vport->dev;
688 OVS_CB(packet)->input_vport = input_vport;
689 sf_acts = rcu_dereference(flow->sf_acts);
690
691 local_bh_disable();
692 local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
693 if (IS_ENABLED(CONFIG_PREEMPT_RT))
694 this_cpu_write(ovs_pcpu_storage->owner, current);
695 err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
696 if (IS_ENABLED(CONFIG_PREEMPT_RT))
697 this_cpu_write(ovs_pcpu_storage->owner, NULL);
698 local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
699 local_bh_enable();
700 rcu_read_unlock();
701
702 ovs_flow_free(flow, false);
703 return err;
704
705 err_unlock:
706 rcu_read_unlock();
707 err_flow_free:
708 ovs_flow_free(flow, false);
709 err_kfree_skb:
710 kfree_skb(packet);
711 err:
712 return err;
713 }
714
715 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
716 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
717 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
718 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
719 [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
720 [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
721 [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
722 };
723
724 static const struct genl_small_ops dp_packet_genl_ops[] = {
725 { .cmd = OVS_PACKET_CMD_EXECUTE,
726 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
727 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
728 .doit = ovs_packet_cmd_execute
729 }
730 };
731
732 static struct genl_family dp_packet_genl_family __ro_after_init = {
733 .hdrsize = sizeof(struct ovs_header),
734 .name = OVS_PACKET_FAMILY,
735 .version = OVS_PACKET_VERSION,
736 .maxattr = OVS_PACKET_ATTR_MAX,
737 .policy = packet_policy,
738 .netnsok = true,
739 .parallel_ops = true,
740 .small_ops = dp_packet_genl_ops,
741 .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
742 .resv_start_op = OVS_PACKET_CMD_EXECUTE + 1,
743 .module = THIS_MODULE,
744 };
745
get_dp_stats(const struct datapath * dp,struct ovs_dp_stats * stats,struct ovs_dp_megaflow_stats * mega_stats)746 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
747 struct ovs_dp_megaflow_stats *mega_stats)
748 {
749 int i;
750
751 memset(mega_stats, 0, sizeof(*mega_stats));
752
753 stats->n_flows = ovs_flow_tbl_count(&dp->table);
754 mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
755
756 stats->n_hit = stats->n_missed = stats->n_lost = 0;
757
758 for_each_possible_cpu(i) {
759 const struct dp_stats_percpu *percpu_stats;
760 struct dp_stats_percpu local_stats;
761 unsigned int start;
762
763 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
764
765 do {
766 start = u64_stats_fetch_begin(&percpu_stats->syncp);
767 local_stats = *percpu_stats;
768 } while (u64_stats_fetch_retry(&percpu_stats->syncp, start));
769
770 stats->n_hit += local_stats.n_hit;
771 stats->n_missed += local_stats.n_missed;
772 stats->n_lost += local_stats.n_lost;
773 mega_stats->n_mask_hit += local_stats.n_mask_hit;
774 mega_stats->n_cache_hit += local_stats.n_cache_hit;
775 }
776 }
777
should_fill_key(const struct sw_flow_id * sfid,uint32_t ufid_flags)778 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
779 {
780 return ovs_identifier_is_ufid(sfid) &&
781 !(ufid_flags & OVS_UFID_F_OMIT_KEY);
782 }
783
should_fill_mask(uint32_t ufid_flags)784 static bool should_fill_mask(uint32_t ufid_flags)
785 {
786 return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
787 }
788
should_fill_actions(uint32_t ufid_flags)789 static bool should_fill_actions(uint32_t ufid_flags)
790 {
791 return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
792 }
793
ovs_flow_cmd_msg_size(const struct sw_flow_actions * acts,const struct sw_flow_id * sfid,uint32_t ufid_flags)794 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
795 const struct sw_flow_id *sfid,
796 uint32_t ufid_flags)
797 {
798 size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
799
800 /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
801 * see ovs_nla_put_identifier()
802 */
803 if (sfid && ovs_identifier_is_ufid(sfid))
804 len += nla_total_size(sfid->ufid_len);
805 else
806 len += nla_total_size(ovs_key_attr_size());
807
808 /* OVS_FLOW_ATTR_KEY */
809 if (!sfid || should_fill_key(sfid, ufid_flags))
810 len += nla_total_size(ovs_key_attr_size());
811
812 /* OVS_FLOW_ATTR_MASK */
813 if (should_fill_mask(ufid_flags))
814 len += nla_total_size(ovs_key_attr_size());
815
816 /* OVS_FLOW_ATTR_ACTIONS */
817 if (should_fill_actions(ufid_flags))
818 len += nla_total_size(acts->orig_len);
819
820 return len
821 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
822 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
823 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
824 }
825
826 /* Called with ovs_mutex or RCU read lock. */
ovs_flow_cmd_fill_stats(const struct sw_flow * flow,struct sk_buff * skb)827 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
828 struct sk_buff *skb)
829 {
830 struct ovs_flow_stats stats;
831 __be16 tcp_flags;
832 unsigned long used;
833
834 ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
835
836 if (used &&
837 nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
838 OVS_FLOW_ATTR_PAD))
839 return -EMSGSIZE;
840
841 if (stats.n_packets &&
842 nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
843 sizeof(struct ovs_flow_stats), &stats,
844 OVS_FLOW_ATTR_PAD))
845 return -EMSGSIZE;
846
847 if ((u8)ntohs(tcp_flags) &&
848 nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
849 return -EMSGSIZE;
850
851 return 0;
852 }
853
854 /* Called with ovs_mutex or RCU read lock. */
ovs_flow_cmd_fill_actions(const struct sw_flow * flow,struct sk_buff * skb,int skb_orig_len)855 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
856 struct sk_buff *skb, int skb_orig_len)
857 {
858 struct nlattr *start;
859 int err;
860
861 /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
862 * this is the first flow to be dumped into 'skb'. This is unusual for
863 * Netlink but individual action lists can be longer than
864 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
865 * The userspace caller can always fetch the actions separately if it
866 * really wants them. (Most userspace callers in fact don't care.)
867 *
868 * This can only fail for dump operations because the skb is always
869 * properly sized for single flows.
870 */
871 start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
872 if (start) {
873 const struct sw_flow_actions *sf_acts;
874
875 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
876 err = ovs_nla_put_actions(sf_acts->actions,
877 sf_acts->actions_len, skb);
878
879 if (!err)
880 nla_nest_end(skb, start);
881 else {
882 if (skb_orig_len)
883 return err;
884
885 nla_nest_cancel(skb, start);
886 }
887 } else if (skb_orig_len) {
888 return -EMSGSIZE;
889 }
890
891 return 0;
892 }
893
894 /* Called with ovs_mutex or RCU read lock. */
ovs_flow_cmd_fill_info(const struct sw_flow * flow,int dp_ifindex,struct sk_buff * skb,u32 portid,u32 seq,u32 flags,u8 cmd,u32 ufid_flags)895 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
896 struct sk_buff *skb, u32 portid,
897 u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
898 {
899 const int skb_orig_len = skb->len;
900 struct ovs_header *ovs_header;
901 int err;
902
903 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
904 flags, cmd);
905 if (!ovs_header)
906 return -EMSGSIZE;
907
908 ovs_header->dp_ifindex = dp_ifindex;
909
910 err = ovs_nla_put_identifier(flow, skb);
911 if (err)
912 goto error;
913
914 if (should_fill_key(&flow->id, ufid_flags)) {
915 err = ovs_nla_put_masked_key(flow, skb);
916 if (err)
917 goto error;
918 }
919
920 if (should_fill_mask(ufid_flags)) {
921 err = ovs_nla_put_mask(flow, skb);
922 if (err)
923 goto error;
924 }
925
926 err = ovs_flow_cmd_fill_stats(flow, skb);
927 if (err)
928 goto error;
929
930 if (should_fill_actions(ufid_flags)) {
931 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
932 if (err)
933 goto error;
934 }
935
936 genlmsg_end(skb, ovs_header);
937 return 0;
938
939 error:
940 genlmsg_cancel(skb, ovs_header);
941 return err;
942 }
943
944 /* May not be called with RCU read lock. */
ovs_flow_cmd_alloc_info(const struct sw_flow_actions * acts,const struct sw_flow_id * sfid,struct genl_info * info,bool always,uint32_t ufid_flags)945 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
946 const struct sw_flow_id *sfid,
947 struct genl_info *info,
948 bool always,
949 uint32_t ufid_flags)
950 {
951 struct sk_buff *skb;
952 size_t len;
953
954 if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
955 return NULL;
956
957 len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
958 skb = genlmsg_new(len, GFP_KERNEL);
959 if (!skb)
960 return ERR_PTR(-ENOMEM);
961
962 return skb;
963 }
964
965 /* Called with ovs_mutex. */
ovs_flow_cmd_build_info(const struct sw_flow * flow,int dp_ifindex,struct genl_info * info,u8 cmd,bool always,u32 ufid_flags)966 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
967 int dp_ifindex,
968 struct genl_info *info, u8 cmd,
969 bool always, u32 ufid_flags)
970 {
971 struct sk_buff *skb;
972 int retval;
973
974 skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
975 &flow->id, info, always, ufid_flags);
976 if (IS_ERR_OR_NULL(skb))
977 return skb;
978
979 retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
980 info->snd_portid, info->snd_seq, 0,
981 cmd, ufid_flags);
982 if (WARN_ON_ONCE(retval < 0)) {
983 kfree_skb(skb);
984 skb = ERR_PTR(retval);
985 }
986 return skb;
987 }
988
ovs_flow_cmd_new(struct sk_buff * skb,struct genl_info * info)989 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
990 {
991 struct net *net = sock_net(skb->sk);
992 struct nlattr **a = info->attrs;
993 struct ovs_header *ovs_header = genl_info_userhdr(info);
994 struct sw_flow *flow = NULL, *new_flow;
995 struct sw_flow_mask mask;
996 struct sk_buff *reply;
997 struct datapath *dp;
998 struct sw_flow_key *key;
999 struct sw_flow_actions *acts;
1000 struct sw_flow_match match;
1001 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1002 int error;
1003 bool log = !a[OVS_FLOW_ATTR_PROBE];
1004
1005 /* Must have key and actions. */
1006 error = -EINVAL;
1007 if (!a[OVS_FLOW_ATTR_KEY]) {
1008 OVS_NLERR(log, "Flow key attr not present in new flow.");
1009 goto error;
1010 }
1011 if (!a[OVS_FLOW_ATTR_ACTIONS]) {
1012 OVS_NLERR(log, "Flow actions attr not present in new flow.");
1013 goto error;
1014 }
1015
1016 /* Most of the time we need to allocate a new flow, do it before
1017 * locking.
1018 */
1019 new_flow = ovs_flow_alloc();
1020 if (IS_ERR(new_flow)) {
1021 error = PTR_ERR(new_flow);
1022 goto error;
1023 }
1024
1025 /* Extract key. */
1026 key = kzalloc(sizeof(*key), GFP_KERNEL);
1027 if (!key) {
1028 error = -ENOMEM;
1029 goto err_kfree_flow;
1030 }
1031
1032 ovs_match_init(&match, key, false, &mask);
1033 error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1034 a[OVS_FLOW_ATTR_MASK], log);
1035 if (error)
1036 goto err_kfree_key;
1037
1038 ovs_flow_mask_key(&new_flow->key, key, true, &mask);
1039
1040 /* Extract flow identifier. */
1041 error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
1042 key, log);
1043 if (error)
1044 goto err_kfree_key;
1045
1046 /* Validate actions. */
1047 error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
1048 &new_flow->key, &acts, log);
1049 if (error) {
1050 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
1051 goto err_kfree_key;
1052 }
1053
1054 reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
1055 ufid_flags);
1056 if (IS_ERR(reply)) {
1057 error = PTR_ERR(reply);
1058 goto err_kfree_acts;
1059 }
1060
1061 ovs_lock();
1062 dp = get_dp(net, ovs_header->dp_ifindex);
1063 if (unlikely(!dp)) {
1064 error = -ENODEV;
1065 goto err_unlock_ovs;
1066 }
1067
1068 /* Check if this is a duplicate flow */
1069 if (ovs_identifier_is_ufid(&new_flow->id))
1070 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
1071 if (!flow)
1072 flow = ovs_flow_tbl_lookup(&dp->table, key);
1073 if (likely(!flow)) {
1074 rcu_assign_pointer(new_flow->sf_acts, acts);
1075
1076 /* Put flow in bucket. */
1077 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1078 if (unlikely(error)) {
1079 acts = NULL;
1080 goto err_unlock_ovs;
1081 }
1082
1083 if (unlikely(reply)) {
1084 error = ovs_flow_cmd_fill_info(new_flow,
1085 ovs_header->dp_ifindex,
1086 reply, info->snd_portid,
1087 info->snd_seq, 0,
1088 OVS_FLOW_CMD_NEW,
1089 ufid_flags);
1090 BUG_ON(error < 0);
1091 }
1092 ovs_unlock();
1093 } else {
1094 struct sw_flow_actions *old_acts;
1095
1096 /* Bail out if we're not allowed to modify an existing flow.
1097 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1098 * because Generic Netlink treats the latter as a dump
1099 * request. We also accept NLM_F_EXCL in case that bug ever
1100 * gets fixed.
1101 */
1102 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1103 | NLM_F_EXCL))) {
1104 error = -EEXIST;
1105 goto err_unlock_ovs;
1106 }
1107 /* The flow identifier has to be the same for flow updates.
1108 * Look for any overlapping flow.
1109 */
1110 if (unlikely(!ovs_flow_cmp(flow, &match))) {
1111 if (ovs_identifier_is_key(&flow->id))
1112 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1113 &match);
1114 else /* UFID matches but key is different */
1115 flow = NULL;
1116 if (!flow) {
1117 error = -ENOENT;
1118 goto err_unlock_ovs;
1119 }
1120 }
1121 /* Update actions. */
1122 old_acts = ovsl_dereference(flow->sf_acts);
1123 rcu_assign_pointer(flow->sf_acts, acts);
1124
1125 if (unlikely(reply)) {
1126 error = ovs_flow_cmd_fill_info(flow,
1127 ovs_header->dp_ifindex,
1128 reply, info->snd_portid,
1129 info->snd_seq, 0,
1130 OVS_FLOW_CMD_NEW,
1131 ufid_flags);
1132 BUG_ON(error < 0);
1133 }
1134 ovs_unlock();
1135
1136 ovs_nla_free_flow_actions_rcu(old_acts);
1137 ovs_flow_free(new_flow, false);
1138 }
1139
1140 if (reply)
1141 ovs_notify(&dp_flow_genl_family, reply, info);
1142
1143 kfree(key);
1144 return 0;
1145
1146 err_unlock_ovs:
1147 ovs_unlock();
1148 kfree_skb(reply);
1149 err_kfree_acts:
1150 ovs_nla_free_flow_actions(acts);
1151 err_kfree_key:
1152 kfree(key);
1153 err_kfree_flow:
1154 ovs_flow_free(new_flow, false);
1155 error:
1156 return error;
1157 }
1158
1159 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1160 static noinline_for_stack
get_flow_actions(struct net * net,const struct nlattr * a,const struct sw_flow_key * key,const struct sw_flow_mask * mask,bool log)1161 struct sw_flow_actions *get_flow_actions(struct net *net,
1162 const struct nlattr *a,
1163 const struct sw_flow_key *key,
1164 const struct sw_flow_mask *mask,
1165 bool log)
1166 {
1167 struct sw_flow_actions *acts;
1168 struct sw_flow_key masked_key;
1169 int error;
1170
1171 ovs_flow_mask_key(&masked_key, key, true, mask);
1172 error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1173 if (error) {
1174 OVS_NLERR(log,
1175 "Actions may not be safe on all matching packets");
1176 return ERR_PTR(error);
1177 }
1178
1179 return acts;
1180 }
1181
1182 /* Factor out match-init and action-copy to avoid
1183 * "Wframe-larger-than=1024" warning. Because mask is only
1184 * used to get actions, we new a function to save some
1185 * stack space.
1186 *
1187 * If there are not key and action attrs, we return 0
1188 * directly. In the case, the caller will also not use the
1189 * match as before. If there is action attr, we try to get
1190 * actions and save them to *acts. Before returning from
1191 * the function, we reset the match->mask pointer. Because
1192 * we should not to return match object with dangling reference
1193 * to mask.
1194 * */
1195 static noinline_for_stack int
ovs_nla_init_match_and_action(struct net * net,struct sw_flow_match * match,struct sw_flow_key * key,struct nlattr ** a,struct sw_flow_actions ** acts,bool log)1196 ovs_nla_init_match_and_action(struct net *net,
1197 struct sw_flow_match *match,
1198 struct sw_flow_key *key,
1199 struct nlattr **a,
1200 struct sw_flow_actions **acts,
1201 bool log)
1202 {
1203 struct sw_flow_mask mask;
1204 int error = 0;
1205
1206 if (a[OVS_FLOW_ATTR_KEY]) {
1207 ovs_match_init(match, key, true, &mask);
1208 error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1209 a[OVS_FLOW_ATTR_MASK], log);
1210 if (error)
1211 goto error;
1212 }
1213
1214 if (a[OVS_FLOW_ATTR_ACTIONS]) {
1215 if (!a[OVS_FLOW_ATTR_KEY]) {
1216 OVS_NLERR(log,
1217 "Flow key attribute not present in set flow.");
1218 error = -EINVAL;
1219 goto error;
1220 }
1221
1222 *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1223 &mask, log);
1224 if (IS_ERR(*acts)) {
1225 error = PTR_ERR(*acts);
1226 goto error;
1227 }
1228 }
1229
1230 /* On success, error is 0. */
1231 error:
1232 match->mask = NULL;
1233 return error;
1234 }
1235
ovs_flow_cmd_set(struct sk_buff * skb,struct genl_info * info)1236 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1237 {
1238 struct net *net = sock_net(skb->sk);
1239 struct nlattr **a = info->attrs;
1240 struct ovs_header *ovs_header = genl_info_userhdr(info);
1241 struct sw_flow_key key;
1242 struct sw_flow *flow;
1243 struct sk_buff *reply = NULL;
1244 struct datapath *dp;
1245 struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1246 struct sw_flow_match match;
1247 struct sw_flow_id sfid;
1248 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1249 int error = 0;
1250 bool log = !a[OVS_FLOW_ATTR_PROBE];
1251 bool ufid_present;
1252
1253 ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1254 if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1255 OVS_NLERR(log,
1256 "Flow set message rejected, Key attribute missing.");
1257 return -EINVAL;
1258 }
1259
1260 error = ovs_nla_init_match_and_action(net, &match, &key, a,
1261 &acts, log);
1262 if (error)
1263 goto error;
1264
1265 if (acts) {
1266 /* Can allocate before locking if have acts. */
1267 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1268 ufid_flags);
1269 if (IS_ERR(reply)) {
1270 error = PTR_ERR(reply);
1271 goto err_kfree_acts;
1272 }
1273 }
1274
1275 ovs_lock();
1276 dp = get_dp(net, ovs_header->dp_ifindex);
1277 if (unlikely(!dp)) {
1278 error = -ENODEV;
1279 goto err_unlock_ovs;
1280 }
1281 /* Check that the flow exists. */
1282 if (ufid_present)
1283 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1284 else
1285 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1286 if (unlikely(!flow)) {
1287 error = -ENOENT;
1288 goto err_unlock_ovs;
1289 }
1290
1291 /* Update actions, if present. */
1292 if (likely(acts)) {
1293 old_acts = ovsl_dereference(flow->sf_acts);
1294 rcu_assign_pointer(flow->sf_acts, acts);
1295
1296 if (unlikely(reply)) {
1297 error = ovs_flow_cmd_fill_info(flow,
1298 ovs_header->dp_ifindex,
1299 reply, info->snd_portid,
1300 info->snd_seq, 0,
1301 OVS_FLOW_CMD_SET,
1302 ufid_flags);
1303 BUG_ON(error < 0);
1304 }
1305 } else {
1306 /* Could not alloc without acts before locking. */
1307 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1308 info, OVS_FLOW_CMD_SET, false,
1309 ufid_flags);
1310
1311 if (IS_ERR(reply)) {
1312 error = PTR_ERR(reply);
1313 goto err_unlock_ovs;
1314 }
1315 }
1316
1317 /* Clear stats. */
1318 if (a[OVS_FLOW_ATTR_CLEAR])
1319 ovs_flow_stats_clear(flow);
1320 ovs_unlock();
1321
1322 if (reply)
1323 ovs_notify(&dp_flow_genl_family, reply, info);
1324 if (old_acts)
1325 ovs_nla_free_flow_actions_rcu(old_acts);
1326
1327 return 0;
1328
1329 err_unlock_ovs:
1330 ovs_unlock();
1331 kfree_skb(reply);
1332 err_kfree_acts:
1333 ovs_nla_free_flow_actions(acts);
1334 error:
1335 return error;
1336 }
1337
ovs_flow_cmd_get(struct sk_buff * skb,struct genl_info * info)1338 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1339 {
1340 struct nlattr **a = info->attrs;
1341 struct ovs_header *ovs_header = genl_info_userhdr(info);
1342 struct net *net = sock_net(skb->sk);
1343 struct sw_flow_key key;
1344 struct sk_buff *reply;
1345 struct sw_flow *flow;
1346 struct datapath *dp;
1347 struct sw_flow_match match;
1348 struct sw_flow_id ufid;
1349 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1350 int err = 0;
1351 bool log = !a[OVS_FLOW_ATTR_PROBE];
1352 bool ufid_present;
1353
1354 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1355 if (a[OVS_FLOW_ATTR_KEY]) {
1356 ovs_match_init(&match, &key, true, NULL);
1357 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1358 log);
1359 } else if (!ufid_present) {
1360 OVS_NLERR(log,
1361 "Flow get message rejected, Key attribute missing.");
1362 err = -EINVAL;
1363 }
1364 if (err)
1365 return err;
1366
1367 ovs_lock();
1368 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1369 if (!dp) {
1370 err = -ENODEV;
1371 goto unlock;
1372 }
1373
1374 if (ufid_present)
1375 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1376 else
1377 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1378 if (!flow) {
1379 err = -ENOENT;
1380 goto unlock;
1381 }
1382
1383 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1384 OVS_FLOW_CMD_GET, true, ufid_flags);
1385 if (IS_ERR(reply)) {
1386 err = PTR_ERR(reply);
1387 goto unlock;
1388 }
1389
1390 ovs_unlock();
1391 return genlmsg_reply(reply, info);
1392 unlock:
1393 ovs_unlock();
1394 return err;
1395 }
1396
ovs_flow_cmd_del(struct sk_buff * skb,struct genl_info * info)1397 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1398 {
1399 struct nlattr **a = info->attrs;
1400 struct ovs_header *ovs_header = genl_info_userhdr(info);
1401 struct net *net = sock_net(skb->sk);
1402 struct sw_flow_key key;
1403 struct sk_buff *reply;
1404 struct sw_flow *flow = NULL;
1405 struct datapath *dp;
1406 struct sw_flow_match match;
1407 struct sw_flow_id ufid;
1408 u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1409 int err;
1410 bool log = !a[OVS_FLOW_ATTR_PROBE];
1411 bool ufid_present;
1412
1413 ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1414 if (a[OVS_FLOW_ATTR_KEY]) {
1415 ovs_match_init(&match, &key, true, NULL);
1416 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1417 NULL, log);
1418 if (unlikely(err))
1419 return err;
1420 }
1421
1422 ovs_lock();
1423 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1424 if (unlikely(!dp)) {
1425 err = -ENODEV;
1426 goto unlock;
1427 }
1428
1429 if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1430 err = ovs_flow_tbl_flush(&dp->table);
1431 goto unlock;
1432 }
1433
1434 if (ufid_present)
1435 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1436 else
1437 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1438 if (unlikely(!flow)) {
1439 err = -ENOENT;
1440 goto unlock;
1441 }
1442
1443 ovs_flow_tbl_remove(&dp->table, flow);
1444 ovs_unlock();
1445
1446 reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1447 &flow->id, info, false, ufid_flags);
1448 if (likely(reply)) {
1449 if (!IS_ERR(reply)) {
1450 rcu_read_lock(); /*To keep RCU checker happy. */
1451 err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1452 reply, info->snd_portid,
1453 info->snd_seq, 0,
1454 OVS_FLOW_CMD_DEL,
1455 ufid_flags);
1456 rcu_read_unlock();
1457 if (WARN_ON_ONCE(err < 0)) {
1458 kfree_skb(reply);
1459 goto out_free;
1460 }
1461
1462 ovs_notify(&dp_flow_genl_family, reply, info);
1463 } else {
1464 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
1465 PTR_ERR(reply));
1466 }
1467 }
1468
1469 out_free:
1470 ovs_flow_free(flow, true);
1471 return 0;
1472 unlock:
1473 ovs_unlock();
1474 return err;
1475 }
1476
ovs_flow_cmd_dump(struct sk_buff * skb,struct netlink_callback * cb)1477 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1478 {
1479 struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1480 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1481 struct table_instance *ti;
1482 struct datapath *dp;
1483 u32 ufid_flags;
1484 int err;
1485
1486 err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1487 OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1488 if (err)
1489 return err;
1490 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1491
1492 rcu_read_lock();
1493 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1494 if (!dp) {
1495 rcu_read_unlock();
1496 return -ENODEV;
1497 }
1498
1499 ti = rcu_dereference(dp->table.ti);
1500 for (;;) {
1501 struct sw_flow *flow;
1502 u32 bucket, obj;
1503
1504 bucket = cb->args[0];
1505 obj = cb->args[1];
1506 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1507 if (!flow)
1508 break;
1509
1510 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1511 NETLINK_CB(cb->skb).portid,
1512 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1513 OVS_FLOW_CMD_GET, ufid_flags) < 0)
1514 break;
1515
1516 cb->args[0] = bucket;
1517 cb->args[1] = obj;
1518 }
1519 rcu_read_unlock();
1520 return skb->len;
1521 }
1522
1523 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1524 [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1525 [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1526 [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1527 [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1528 [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1529 [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1530 [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1531 };
1532
1533 static const struct genl_small_ops dp_flow_genl_ops[] = {
1534 { .cmd = OVS_FLOW_CMD_NEW,
1535 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1536 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1537 .doit = ovs_flow_cmd_new
1538 },
1539 { .cmd = OVS_FLOW_CMD_DEL,
1540 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1541 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1542 .doit = ovs_flow_cmd_del
1543 },
1544 { .cmd = OVS_FLOW_CMD_GET,
1545 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1546 .flags = 0, /* OK for unprivileged users. */
1547 .doit = ovs_flow_cmd_get,
1548 .dumpit = ovs_flow_cmd_dump
1549 },
1550 { .cmd = OVS_FLOW_CMD_SET,
1551 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1552 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1553 .doit = ovs_flow_cmd_set,
1554 },
1555 };
1556
1557 static struct genl_family dp_flow_genl_family __ro_after_init = {
1558 .hdrsize = sizeof(struct ovs_header),
1559 .name = OVS_FLOW_FAMILY,
1560 .version = OVS_FLOW_VERSION,
1561 .maxattr = OVS_FLOW_ATTR_MAX,
1562 .policy = flow_policy,
1563 .netnsok = true,
1564 .parallel_ops = true,
1565 .small_ops = dp_flow_genl_ops,
1566 .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
1567 .resv_start_op = OVS_FLOW_CMD_SET + 1,
1568 .mcgrps = &ovs_dp_flow_multicast_group,
1569 .n_mcgrps = 1,
1570 .module = THIS_MODULE,
1571 };
1572
ovs_dp_cmd_msg_size(void)1573 static size_t ovs_dp_cmd_msg_size(void)
1574 {
1575 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1576
1577 msgsize += nla_total_size(IFNAMSIZ);
1578 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1579 msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1580 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1581 msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
1582 msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */
1583
1584 return msgsize;
1585 }
1586
1587 /* Called with ovs_mutex. */
ovs_dp_cmd_fill_info(struct datapath * dp,struct sk_buff * skb,u32 portid,u32 seq,u32 flags,u8 cmd)1588 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1589 u32 portid, u32 seq, u32 flags, u8 cmd)
1590 {
1591 struct ovs_header *ovs_header;
1592 struct ovs_dp_stats dp_stats;
1593 struct ovs_dp_megaflow_stats dp_megaflow_stats;
1594 struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
1595 int err, pids_len;
1596
1597 ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1598 flags, cmd);
1599 if (!ovs_header)
1600 goto error;
1601
1602 ovs_header->dp_ifindex = get_dpifindex(dp);
1603
1604 err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1605 if (err)
1606 goto nla_put_failure;
1607
1608 get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1609 if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1610 &dp_stats, OVS_DP_ATTR_PAD))
1611 goto nla_put_failure;
1612
1613 if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1614 sizeof(struct ovs_dp_megaflow_stats),
1615 &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1616 goto nla_put_failure;
1617
1618 if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1619 goto nla_put_failure;
1620
1621 if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
1622 ovs_flow_tbl_masks_cache_size(&dp->table)))
1623 goto nla_put_failure;
1624
1625 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
1626 pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32);
1627 if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids))
1628 goto nla_put_failure;
1629 }
1630
1631 genlmsg_end(skb, ovs_header);
1632 return 0;
1633
1634 nla_put_failure:
1635 genlmsg_cancel(skb, ovs_header);
1636 error:
1637 return -EMSGSIZE;
1638 }
1639
ovs_dp_cmd_alloc_info(void)1640 static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1641 {
1642 return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1643 }
1644
1645 /* Called with rcu_read_lock or ovs_mutex. */
lookup_datapath(struct net * net,const struct ovs_header * ovs_header,struct nlattr * a[OVS_DP_ATTR_MAX+1])1646 static struct datapath *lookup_datapath(struct net *net,
1647 const struct ovs_header *ovs_header,
1648 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1649 {
1650 struct datapath *dp;
1651
1652 if (!a[OVS_DP_ATTR_NAME])
1653 dp = get_dp(net, ovs_header->dp_ifindex);
1654 else {
1655 struct vport *vport;
1656
1657 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1658 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1659 }
1660 return dp ? dp : ERR_PTR(-ENODEV);
1661 }
1662
ovs_dp_reset_user_features(struct sk_buff * skb,struct genl_info * info)1663 static void ovs_dp_reset_user_features(struct sk_buff *skb,
1664 struct genl_info *info)
1665 {
1666 struct datapath *dp;
1667
1668 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
1669 info->attrs);
1670 if (IS_ERR(dp))
1671 return;
1672
1673 pr_warn("%s: Dropping previously announced user features\n",
1674 ovs_dp_name(dp));
1675 dp->user_features = 0;
1676 }
1677
ovs_dp_set_upcall_portids(struct datapath * dp,const struct nlattr * ids)1678 static int ovs_dp_set_upcall_portids(struct datapath *dp,
1679 const struct nlattr *ids)
1680 {
1681 struct dp_nlsk_pids *old, *dp_nlsk_pids;
1682
1683 if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
1684 return -EINVAL;
1685
1686 old = ovsl_dereference(dp->upcall_portids);
1687
1688 dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
1689 GFP_KERNEL);
1690 if (!dp_nlsk_pids)
1691 return -ENOMEM;
1692
1693 dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
1694 nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
1695
1696 rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
1697
1698 kfree_rcu(old, rcu);
1699
1700 return 0;
1701 }
1702
ovs_dp_get_upcall_portid(const struct datapath * dp,uint32_t cpu_id)1703 u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
1704 {
1705 struct dp_nlsk_pids *dp_nlsk_pids;
1706
1707 dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
1708
1709 if (dp_nlsk_pids) {
1710 if (cpu_id < dp_nlsk_pids->n_pids) {
1711 return dp_nlsk_pids->pids[cpu_id];
1712 } else if (dp_nlsk_pids->n_pids > 0 &&
1713 cpu_id >= dp_nlsk_pids->n_pids) {
1714 /* If the number of netlink PIDs is mismatched with
1715 * the number of CPUs as seen by the kernel, log this
1716 * and send the upcall to an arbitrary socket (0) in
1717 * order to not drop packets
1718 */
1719 pr_info_ratelimited("cpu_id mismatch with handler threads");
1720 return dp_nlsk_pids->pids[cpu_id %
1721 dp_nlsk_pids->n_pids];
1722 } else {
1723 return 0;
1724 }
1725 } else {
1726 return 0;
1727 }
1728 }
1729
ovs_dp_change(struct datapath * dp,struct nlattr * a[])1730 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1731 {
1732 u32 user_features = 0, old_features = dp->user_features;
1733 int err;
1734
1735 if (a[OVS_DP_ATTR_USER_FEATURES]) {
1736 user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1737
1738 if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1739 OVS_DP_F_UNALIGNED |
1740 OVS_DP_F_TC_RECIRC_SHARING |
1741 OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
1742 return -EOPNOTSUPP;
1743
1744 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1745 if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1746 return -EOPNOTSUPP;
1747 #endif
1748 }
1749
1750 if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
1751 int err;
1752 u32 cache_size;
1753
1754 cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
1755 err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
1756 if (err)
1757 return err;
1758 }
1759
1760 dp->user_features = user_features;
1761
1762 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
1763 a[OVS_DP_ATTR_PER_CPU_PIDS]) {
1764 /* Upcall Netlink Port IDs have been updated */
1765 err = ovs_dp_set_upcall_portids(dp,
1766 a[OVS_DP_ATTR_PER_CPU_PIDS]);
1767 if (err)
1768 return err;
1769 }
1770
1771 if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1772 !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
1773 tc_skb_ext_tc_enable();
1774 else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1775 (old_features & OVS_DP_F_TC_RECIRC_SHARING))
1776 tc_skb_ext_tc_disable();
1777
1778 return 0;
1779 }
1780
ovs_dp_stats_init(struct datapath * dp)1781 static int ovs_dp_stats_init(struct datapath *dp)
1782 {
1783 dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1784 if (!dp->stats_percpu)
1785 return -ENOMEM;
1786
1787 return 0;
1788 }
1789
ovs_dp_vport_init(struct datapath * dp)1790 static int ovs_dp_vport_init(struct datapath *dp)
1791 {
1792 int i;
1793
1794 dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1795 sizeof(struct hlist_head),
1796 GFP_KERNEL);
1797 if (!dp->ports)
1798 return -ENOMEM;
1799
1800 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1801 INIT_HLIST_HEAD(&dp->ports[i]);
1802
1803 return 0;
1804 }
1805
ovs_dp_cmd_new(struct sk_buff * skb,struct genl_info * info)1806 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1807 {
1808 struct nlattr **a = info->attrs;
1809 struct vport_parms parms;
1810 struct sk_buff *reply;
1811 struct datapath *dp;
1812 struct vport *vport;
1813 struct ovs_net *ovs_net;
1814 int err;
1815
1816 err = -EINVAL;
1817 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1818 goto err;
1819
1820 reply = ovs_dp_cmd_alloc_info();
1821 if (!reply)
1822 return -ENOMEM;
1823
1824 err = -ENOMEM;
1825 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1826 if (dp == NULL)
1827 goto err_destroy_reply;
1828
1829 ovs_dp_set_net(dp, sock_net(skb->sk));
1830
1831 /* Allocate table. */
1832 err = ovs_flow_tbl_init(&dp->table);
1833 if (err)
1834 goto err_destroy_dp;
1835
1836 err = ovs_dp_stats_init(dp);
1837 if (err)
1838 goto err_destroy_table;
1839
1840 err = ovs_dp_vport_init(dp);
1841 if (err)
1842 goto err_destroy_stats;
1843
1844 err = ovs_meters_init(dp);
1845 if (err)
1846 goto err_destroy_ports;
1847
1848 /* Set up our datapath device. */
1849 parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1850 parms.type = OVS_VPORT_TYPE_INTERNAL;
1851 parms.options = NULL;
1852 parms.dp = dp;
1853 parms.port_no = OVSP_LOCAL;
1854 parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1855 parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0);
1856
1857 /* So far only local changes have been made, now need the lock. */
1858 ovs_lock();
1859
1860 err = ovs_dp_change(dp, a);
1861 if (err)
1862 goto err_unlock_and_destroy_meters;
1863
1864 vport = new_vport(&parms);
1865 if (IS_ERR(vport)) {
1866 err = PTR_ERR(vport);
1867 if (err == -EBUSY)
1868 err = -EEXIST;
1869
1870 if (err == -EEXIST) {
1871 /* An outdated user space instance that does not understand
1872 * the concept of user_features has attempted to create a new
1873 * datapath and is likely to reuse it. Drop all user features.
1874 */
1875 if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1876 ovs_dp_reset_user_features(skb, info);
1877 }
1878
1879 goto err_destroy_portids;
1880 }
1881
1882 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1883 info->snd_seq, 0, OVS_DP_CMD_NEW);
1884 BUG_ON(err < 0);
1885
1886 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1887 list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1888
1889 ovs_unlock();
1890
1891 ovs_notify(&dp_datapath_genl_family, reply, info);
1892 return 0;
1893
1894 err_destroy_portids:
1895 kfree(rcu_dereference_raw(dp->upcall_portids));
1896 err_unlock_and_destroy_meters:
1897 ovs_unlock();
1898 ovs_meters_exit(dp);
1899 err_destroy_ports:
1900 kfree(dp->ports);
1901 err_destroy_stats:
1902 free_percpu(dp->stats_percpu);
1903 err_destroy_table:
1904 ovs_flow_tbl_destroy(&dp->table);
1905 err_destroy_dp:
1906 kfree(dp);
1907 err_destroy_reply:
1908 kfree_skb(reply);
1909 err:
1910 return err;
1911 }
1912
1913 /* Called with ovs_mutex. */
__dp_destroy(struct datapath * dp)1914 static void __dp_destroy(struct datapath *dp)
1915 {
1916 struct flow_table *table = &dp->table;
1917 int i;
1918
1919 if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1920 tc_skb_ext_tc_disable();
1921
1922 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1923 struct vport *vport;
1924 struct hlist_node *n;
1925
1926 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1927 if (vport->port_no != OVSP_LOCAL)
1928 ovs_dp_detach_port(vport);
1929 }
1930
1931 list_del_rcu(&dp->list_node);
1932
1933 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1934 * all ports in datapath are destroyed first before freeing datapath.
1935 */
1936 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1937
1938 /* Flush sw_flow in the tables. RCU cb only releases resource
1939 * such as dp, ports and tables. That may avoid some issues
1940 * such as RCU usage warning.
1941 */
1942 table_instance_flow_flush(table, ovsl_dereference(table->ti),
1943 ovsl_dereference(table->ufid_ti));
1944
1945 /* RCU destroy the ports, meters and flow tables. */
1946 call_rcu(&dp->rcu, destroy_dp_rcu);
1947 }
1948
ovs_dp_cmd_del(struct sk_buff * skb,struct genl_info * info)1949 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1950 {
1951 struct sk_buff *reply;
1952 struct datapath *dp;
1953 int err;
1954
1955 reply = ovs_dp_cmd_alloc_info();
1956 if (!reply)
1957 return -ENOMEM;
1958
1959 ovs_lock();
1960 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
1961 info->attrs);
1962 err = PTR_ERR(dp);
1963 if (IS_ERR(dp))
1964 goto err_unlock_free;
1965
1966 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1967 info->snd_seq, 0, OVS_DP_CMD_DEL);
1968 BUG_ON(err < 0);
1969
1970 __dp_destroy(dp);
1971 ovs_unlock();
1972
1973 ovs_notify(&dp_datapath_genl_family, reply, info);
1974
1975 return 0;
1976
1977 err_unlock_free:
1978 ovs_unlock();
1979 kfree_skb(reply);
1980 return err;
1981 }
1982
ovs_dp_cmd_set(struct sk_buff * skb,struct genl_info * info)1983 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1984 {
1985 struct sk_buff *reply;
1986 struct datapath *dp;
1987 int err;
1988
1989 reply = ovs_dp_cmd_alloc_info();
1990 if (!reply)
1991 return -ENOMEM;
1992
1993 ovs_lock();
1994 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
1995 info->attrs);
1996 err = PTR_ERR(dp);
1997 if (IS_ERR(dp))
1998 goto err_unlock_free;
1999
2000 err = ovs_dp_change(dp, info->attrs);
2001 if (err)
2002 goto err_unlock_free;
2003
2004 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
2005 info->snd_seq, 0, OVS_DP_CMD_SET);
2006 BUG_ON(err < 0);
2007
2008 ovs_unlock();
2009 ovs_notify(&dp_datapath_genl_family, reply, info);
2010
2011 return 0;
2012
2013 err_unlock_free:
2014 ovs_unlock();
2015 kfree_skb(reply);
2016 return err;
2017 }
2018
ovs_dp_cmd_get(struct sk_buff * skb,struct genl_info * info)2019 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
2020 {
2021 struct sk_buff *reply;
2022 struct datapath *dp;
2023 int err;
2024
2025 reply = ovs_dp_cmd_alloc_info();
2026 if (!reply)
2027 return -ENOMEM;
2028
2029 ovs_lock();
2030 dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
2031 info->attrs);
2032 if (IS_ERR(dp)) {
2033 err = PTR_ERR(dp);
2034 goto err_unlock_free;
2035 }
2036 err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
2037 info->snd_seq, 0, OVS_DP_CMD_GET);
2038 BUG_ON(err < 0);
2039 ovs_unlock();
2040
2041 return genlmsg_reply(reply, info);
2042
2043 err_unlock_free:
2044 ovs_unlock();
2045 kfree_skb(reply);
2046 return err;
2047 }
2048
ovs_dp_cmd_dump(struct sk_buff * skb,struct netlink_callback * cb)2049 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2050 {
2051 struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
2052 struct datapath *dp;
2053 int skip = cb->args[0];
2054 int i = 0;
2055
2056 ovs_lock();
2057 list_for_each_entry(dp, &ovs_net->dps, list_node) {
2058 if (i >= skip &&
2059 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
2060 cb->nlh->nlmsg_seq, NLM_F_MULTI,
2061 OVS_DP_CMD_GET) < 0)
2062 break;
2063 i++;
2064 }
2065 ovs_unlock();
2066
2067 cb->args[0] = i;
2068
2069 return skb->len;
2070 }
2071
2072 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
2073 [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2074 [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2075 [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
2076 [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
2077 PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
2078 [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
2079 };
2080
2081 static const struct genl_small_ops dp_datapath_genl_ops[] = {
2082 { .cmd = OVS_DP_CMD_NEW,
2083 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2084 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2085 .doit = ovs_dp_cmd_new
2086 },
2087 { .cmd = OVS_DP_CMD_DEL,
2088 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2089 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2090 .doit = ovs_dp_cmd_del
2091 },
2092 { .cmd = OVS_DP_CMD_GET,
2093 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2094 .flags = 0, /* OK for unprivileged users. */
2095 .doit = ovs_dp_cmd_get,
2096 .dumpit = ovs_dp_cmd_dump
2097 },
2098 { .cmd = OVS_DP_CMD_SET,
2099 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2100 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2101 .doit = ovs_dp_cmd_set,
2102 },
2103 };
2104
2105 static struct genl_family dp_datapath_genl_family __ro_after_init = {
2106 .hdrsize = sizeof(struct ovs_header),
2107 .name = OVS_DATAPATH_FAMILY,
2108 .version = OVS_DATAPATH_VERSION,
2109 .maxattr = OVS_DP_ATTR_MAX,
2110 .policy = datapath_policy,
2111 .netnsok = true,
2112 .parallel_ops = true,
2113 .small_ops = dp_datapath_genl_ops,
2114 .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
2115 .resv_start_op = OVS_DP_CMD_SET + 1,
2116 .mcgrps = &ovs_dp_datapath_multicast_group,
2117 .n_mcgrps = 1,
2118 .module = THIS_MODULE,
2119 };
2120
2121 /* Called with ovs_mutex or RCU read lock. */
ovs_vport_cmd_fill_info(struct vport * vport,struct sk_buff * skb,struct net * net,u32 portid,u32 seq,u32 flags,u8 cmd,gfp_t gfp)2122 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
2123 struct net *net, u32 portid, u32 seq,
2124 u32 flags, u8 cmd, gfp_t gfp)
2125 {
2126 struct ovs_header *ovs_header;
2127 struct ovs_vport_stats vport_stats;
2128 struct net *net_vport;
2129 int err;
2130
2131 ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
2132 flags, cmd);
2133 if (!ovs_header)
2134 return -EMSGSIZE;
2135
2136 ovs_header->dp_ifindex = get_dpifindex(vport->dp);
2137
2138 if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
2139 nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
2140 nla_put_string(skb, OVS_VPORT_ATTR_NAME,
2141 ovs_vport_name(vport)) ||
2142 nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
2143 goto nla_put_failure;
2144
2145 rcu_read_lock();
2146 net_vport = dev_net_rcu(vport->dev);
2147 if (!net_eq(net, net_vport)) {
2148 int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC);
2149
2150 if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
2151 goto nla_put_failure_unlock;
2152 }
2153 rcu_read_unlock();
2154
2155 ovs_vport_get_stats(vport, &vport_stats);
2156 if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
2157 sizeof(struct ovs_vport_stats), &vport_stats,
2158 OVS_VPORT_ATTR_PAD))
2159 goto nla_put_failure;
2160
2161 if (ovs_vport_get_upcall_stats(vport, skb))
2162 goto nla_put_failure;
2163
2164 if (ovs_vport_get_upcall_portids(vport, skb))
2165 goto nla_put_failure;
2166
2167 err = ovs_vport_get_options(vport, skb);
2168 if (err == -EMSGSIZE)
2169 goto error;
2170
2171 genlmsg_end(skb, ovs_header);
2172 return 0;
2173
2174 nla_put_failure_unlock:
2175 rcu_read_unlock();
2176 nla_put_failure:
2177 err = -EMSGSIZE;
2178 error:
2179 genlmsg_cancel(skb, ovs_header);
2180 return err;
2181 }
2182
ovs_vport_cmd_alloc_info(void)2183 static struct sk_buff *ovs_vport_cmd_alloc_info(void)
2184 {
2185 return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2186 }
2187
2188 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
ovs_vport_cmd_build_info(struct vport * vport,struct net * net,u32 portid,u32 seq,u8 cmd)2189 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2190 u32 portid, u32 seq, u8 cmd)
2191 {
2192 struct sk_buff *skb;
2193 int retval;
2194
2195 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2196 if (!skb)
2197 return ERR_PTR(-ENOMEM);
2198
2199 retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
2200 GFP_KERNEL);
2201 BUG_ON(retval < 0);
2202
2203 return skb;
2204 }
2205
2206 /* Called with ovs_mutex or RCU read lock. */
lookup_vport(struct net * net,const struct ovs_header * ovs_header,struct nlattr * a[OVS_VPORT_ATTR_MAX+1])2207 static struct vport *lookup_vport(struct net *net,
2208 const struct ovs_header *ovs_header,
2209 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2210 {
2211 struct datapath *dp;
2212 struct vport *vport;
2213
2214 if (a[OVS_VPORT_ATTR_IFINDEX])
2215 return ERR_PTR(-EOPNOTSUPP);
2216 if (a[OVS_VPORT_ATTR_NAME]) {
2217 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
2218 if (!vport)
2219 return ERR_PTR(-ENODEV);
2220 if (ovs_header->dp_ifindex &&
2221 ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2222 return ERR_PTR(-ENODEV);
2223 return vport;
2224 } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2225 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2226
2227 if (port_no >= DP_MAX_PORTS)
2228 return ERR_PTR(-EFBIG);
2229
2230 dp = get_dp(net, ovs_header->dp_ifindex);
2231 if (!dp)
2232 return ERR_PTR(-ENODEV);
2233
2234 vport = ovs_vport_ovsl_rcu(dp, port_no);
2235 if (!vport)
2236 return ERR_PTR(-ENODEV);
2237 return vport;
2238 } else
2239 return ERR_PTR(-EINVAL);
2240
2241 }
2242
ovs_get_max_headroom(struct datapath * dp)2243 static unsigned int ovs_get_max_headroom(struct datapath *dp)
2244 {
2245 unsigned int dev_headroom, max_headroom = 0;
2246 struct net_device *dev;
2247 struct vport *vport;
2248 int i;
2249
2250 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2251 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2252 lockdep_ovsl_is_held()) {
2253 dev = vport->dev;
2254 dev_headroom = netdev_get_fwd_headroom(dev);
2255 if (dev_headroom > max_headroom)
2256 max_headroom = dev_headroom;
2257 }
2258 }
2259
2260 return max_headroom;
2261 }
2262
2263 /* Called with ovs_mutex */
ovs_update_headroom(struct datapath * dp,unsigned int new_headroom)2264 static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2265 {
2266 struct vport *vport;
2267 int i;
2268
2269 dp->max_headroom = new_headroom;
2270 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2271 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2272 lockdep_ovsl_is_held())
2273 netdev_set_rx_headroom(vport->dev, new_headroom);
2274 }
2275 }
2276
ovs_vport_cmd_new(struct sk_buff * skb,struct genl_info * info)2277 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2278 {
2279 struct nlattr **a = info->attrs;
2280 struct ovs_header *ovs_header = genl_info_userhdr(info);
2281 struct vport_parms parms;
2282 struct sk_buff *reply;
2283 struct vport *vport;
2284 struct datapath *dp;
2285 unsigned int new_headroom;
2286 u32 port_no;
2287 int err;
2288
2289 if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2290 !a[OVS_VPORT_ATTR_UPCALL_PID])
2291 return -EINVAL;
2292
2293 parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2294
2295 if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
2296 return -EOPNOTSUPP;
2297
2298 port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0);
2299 if (port_no >= DP_MAX_PORTS)
2300 return -EFBIG;
2301
2302 reply = ovs_vport_cmd_alloc_info();
2303 if (!reply)
2304 return -ENOMEM;
2305
2306 ovs_lock();
2307 restart:
2308 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2309 err = -ENODEV;
2310 if (!dp)
2311 goto exit_unlock_free;
2312
2313 if (port_no) {
2314 vport = ovs_vport_ovsl(dp, port_no);
2315 err = -EBUSY;
2316 if (vport)
2317 goto exit_unlock_free;
2318 } else {
2319 for (port_no = 1; ; port_no++) {
2320 if (port_no >= DP_MAX_PORTS) {
2321 err = -EFBIG;
2322 goto exit_unlock_free;
2323 }
2324 vport = ovs_vport_ovsl(dp, port_no);
2325 if (!vport)
2326 break;
2327 }
2328 }
2329
2330 parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2331 parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2332 parms.dp = dp;
2333 parms.port_no = port_no;
2334 parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2335 parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX],
2336 0);
2337
2338 vport = new_vport(&parms);
2339 err = PTR_ERR(vport);
2340 if (IS_ERR(vport)) {
2341 if (err == -EAGAIN)
2342 goto restart;
2343 goto exit_unlock_free;
2344 }
2345
2346 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2347 info->snd_portid, info->snd_seq, 0,
2348 OVS_VPORT_CMD_NEW, GFP_KERNEL);
2349
2350 new_headroom = netdev_get_fwd_headroom(vport->dev);
2351
2352 if (new_headroom > dp->max_headroom)
2353 ovs_update_headroom(dp, new_headroom);
2354 else
2355 netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2356
2357 BUG_ON(err < 0);
2358 ovs_unlock();
2359
2360 ovs_notify(&dp_vport_genl_family, reply, info);
2361 return 0;
2362
2363 exit_unlock_free:
2364 ovs_unlock();
2365 kfree_skb(reply);
2366 return err;
2367 }
2368
ovs_vport_cmd_set(struct sk_buff * skb,struct genl_info * info)2369 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2370 {
2371 struct nlattr **a = info->attrs;
2372 struct sk_buff *reply;
2373 struct vport *vport;
2374 int err;
2375
2376 reply = ovs_vport_cmd_alloc_info();
2377 if (!reply)
2378 return -ENOMEM;
2379
2380 ovs_lock();
2381 vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
2382 err = PTR_ERR(vport);
2383 if (IS_ERR(vport))
2384 goto exit_unlock_free;
2385
2386 if (a[OVS_VPORT_ATTR_TYPE] &&
2387 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2388 err = -EINVAL;
2389 goto exit_unlock_free;
2390 }
2391
2392 if (a[OVS_VPORT_ATTR_OPTIONS]) {
2393 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2394 if (err)
2395 goto exit_unlock_free;
2396 }
2397
2398
2399 if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2400 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2401
2402 err = ovs_vport_set_upcall_portids(vport, ids);
2403 if (err)
2404 goto exit_unlock_free;
2405 }
2406
2407 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2408 info->snd_portid, info->snd_seq, 0,
2409 OVS_VPORT_CMD_SET, GFP_KERNEL);
2410 BUG_ON(err < 0);
2411
2412 ovs_unlock();
2413 ovs_notify(&dp_vport_genl_family, reply, info);
2414 return 0;
2415
2416 exit_unlock_free:
2417 ovs_unlock();
2418 kfree_skb(reply);
2419 return err;
2420 }
2421
ovs_vport_cmd_del(struct sk_buff * skb,struct genl_info * info)2422 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2423 {
2424 bool update_headroom = false;
2425 struct nlattr **a = info->attrs;
2426 struct sk_buff *reply;
2427 struct datapath *dp;
2428 struct vport *vport;
2429 unsigned int new_headroom;
2430 int err;
2431
2432 reply = ovs_vport_cmd_alloc_info();
2433 if (!reply)
2434 return -ENOMEM;
2435
2436 ovs_lock();
2437 vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
2438 err = PTR_ERR(vport);
2439 if (IS_ERR(vport))
2440 goto exit_unlock_free;
2441
2442 if (vport->port_no == OVSP_LOCAL) {
2443 err = -EINVAL;
2444 goto exit_unlock_free;
2445 }
2446
2447 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2448 info->snd_portid, info->snd_seq, 0,
2449 OVS_VPORT_CMD_DEL, GFP_KERNEL);
2450 BUG_ON(err < 0);
2451
2452 /* the vport deletion may trigger dp headroom update */
2453 dp = vport->dp;
2454 if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2455 update_headroom = true;
2456
2457 netdev_reset_rx_headroom(vport->dev);
2458 ovs_dp_detach_port(vport);
2459
2460 if (update_headroom) {
2461 new_headroom = ovs_get_max_headroom(dp);
2462
2463 if (new_headroom < dp->max_headroom)
2464 ovs_update_headroom(dp, new_headroom);
2465 }
2466 ovs_unlock();
2467
2468 ovs_notify(&dp_vport_genl_family, reply, info);
2469 return 0;
2470
2471 exit_unlock_free:
2472 ovs_unlock();
2473 kfree_skb(reply);
2474 return err;
2475 }
2476
ovs_vport_cmd_get(struct sk_buff * skb,struct genl_info * info)2477 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2478 {
2479 struct nlattr **a = info->attrs;
2480 struct ovs_header *ovs_header = genl_info_userhdr(info);
2481 struct sk_buff *reply;
2482 struct vport *vport;
2483 int err;
2484
2485 reply = ovs_vport_cmd_alloc_info();
2486 if (!reply)
2487 return -ENOMEM;
2488
2489 rcu_read_lock();
2490 vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2491 err = PTR_ERR(vport);
2492 if (IS_ERR(vport))
2493 goto exit_unlock_free;
2494 err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2495 info->snd_portid, info->snd_seq, 0,
2496 OVS_VPORT_CMD_GET, GFP_ATOMIC);
2497 BUG_ON(err < 0);
2498 rcu_read_unlock();
2499
2500 return genlmsg_reply(reply, info);
2501
2502 exit_unlock_free:
2503 rcu_read_unlock();
2504 kfree_skb(reply);
2505 return err;
2506 }
2507
ovs_vport_cmd_dump(struct sk_buff * skb,struct netlink_callback * cb)2508 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2509 {
2510 struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2511 struct datapath *dp;
2512 int bucket = cb->args[0], skip = cb->args[1];
2513 int i, j = 0;
2514
2515 rcu_read_lock();
2516 dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2517 if (!dp) {
2518 rcu_read_unlock();
2519 return -ENODEV;
2520 }
2521 for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2522 struct vport *vport;
2523
2524 j = 0;
2525 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2526 if (j >= skip &&
2527 ovs_vport_cmd_fill_info(vport, skb,
2528 sock_net(skb->sk),
2529 NETLINK_CB(cb->skb).portid,
2530 cb->nlh->nlmsg_seq,
2531 NLM_F_MULTI,
2532 OVS_VPORT_CMD_GET,
2533 GFP_ATOMIC) < 0)
2534 goto out;
2535
2536 j++;
2537 }
2538 skip = 0;
2539 }
2540 out:
2541 rcu_read_unlock();
2542
2543 cb->args[0] = i;
2544 cb->args[1] = j;
2545
2546 return skb->len;
2547 }
2548
ovs_dp_masks_rebalance(struct work_struct * work)2549 static void ovs_dp_masks_rebalance(struct work_struct *work)
2550 {
2551 struct ovs_net *ovs_net = container_of(work, struct ovs_net,
2552 masks_rebalance.work);
2553 struct datapath *dp;
2554
2555 ovs_lock();
2556
2557 list_for_each_entry(dp, &ovs_net->dps, list_node)
2558 ovs_flow_masks_rebalance(&dp->table);
2559
2560 ovs_unlock();
2561
2562 schedule_delayed_work(&ovs_net->masks_rebalance,
2563 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2564 }
2565
2566 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2567 [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2568 [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2569 [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2570 [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2571 [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
2572 [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2573 [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
2574 [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2575 [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
2576 };
2577
2578 static const struct genl_small_ops dp_vport_genl_ops[] = {
2579 { .cmd = OVS_VPORT_CMD_NEW,
2580 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2581 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2582 .doit = ovs_vport_cmd_new
2583 },
2584 { .cmd = OVS_VPORT_CMD_DEL,
2585 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2586 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2587 .doit = ovs_vport_cmd_del
2588 },
2589 { .cmd = OVS_VPORT_CMD_GET,
2590 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2591 .flags = 0, /* OK for unprivileged users. */
2592 .doit = ovs_vport_cmd_get,
2593 .dumpit = ovs_vport_cmd_dump
2594 },
2595 { .cmd = OVS_VPORT_CMD_SET,
2596 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2597 .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2598 .doit = ovs_vport_cmd_set,
2599 },
2600 };
2601
2602 struct genl_family dp_vport_genl_family __ro_after_init = {
2603 .hdrsize = sizeof(struct ovs_header),
2604 .name = OVS_VPORT_FAMILY,
2605 .version = OVS_VPORT_VERSION,
2606 .maxattr = OVS_VPORT_ATTR_MAX,
2607 .policy = vport_policy,
2608 .netnsok = true,
2609 .parallel_ops = true,
2610 .small_ops = dp_vport_genl_ops,
2611 .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
2612 .resv_start_op = OVS_VPORT_CMD_SET + 1,
2613 .mcgrps = &ovs_dp_vport_multicast_group,
2614 .n_mcgrps = 1,
2615 .module = THIS_MODULE,
2616 };
2617
2618 static struct genl_family * const dp_genl_families[] = {
2619 &dp_datapath_genl_family,
2620 &dp_vport_genl_family,
2621 &dp_flow_genl_family,
2622 &dp_packet_genl_family,
2623 &dp_meter_genl_family,
2624 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2625 &dp_ct_limit_genl_family,
2626 #endif
2627 };
2628
dp_unregister_genl(int n_families)2629 static void dp_unregister_genl(int n_families)
2630 {
2631 int i;
2632
2633 for (i = 0; i < n_families; i++)
2634 genl_unregister_family(dp_genl_families[i]);
2635 }
2636
dp_register_genl(void)2637 static int __init dp_register_genl(void)
2638 {
2639 int err;
2640 int i;
2641
2642 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2643
2644 err = genl_register_family(dp_genl_families[i]);
2645 if (err)
2646 goto error;
2647 }
2648
2649 return 0;
2650
2651 error:
2652 dp_unregister_genl(i);
2653 return err;
2654 }
2655
ovs_init_net(struct net * net)2656 static int __net_init ovs_init_net(struct net *net)
2657 {
2658 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2659 int err;
2660
2661 INIT_LIST_HEAD(&ovs_net->dps);
2662 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2663 INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
2664
2665 err = ovs_ct_init(net);
2666 if (err)
2667 return err;
2668
2669 schedule_delayed_work(&ovs_net->masks_rebalance,
2670 msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2671 return 0;
2672 }
2673
list_vports_from_net(struct net * net,struct net * dnet,struct list_head * head)2674 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2675 struct list_head *head)
2676 {
2677 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2678 struct datapath *dp;
2679
2680 list_for_each_entry(dp, &ovs_net->dps, list_node) {
2681 int i;
2682
2683 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2684 struct vport *vport;
2685
2686 hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2687 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2688 continue;
2689
2690 if (dev_net(vport->dev) == dnet)
2691 list_add(&vport->detach_list, head);
2692 }
2693 }
2694 }
2695 }
2696
ovs_exit_net(struct net * dnet)2697 static void __net_exit ovs_exit_net(struct net *dnet)
2698 {
2699 struct datapath *dp, *dp_next;
2700 struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2701 struct vport *vport, *vport_next;
2702 struct net *net;
2703 LIST_HEAD(head);
2704
2705 ovs_lock();
2706
2707 ovs_ct_exit(dnet);
2708
2709 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2710 __dp_destroy(dp);
2711
2712 down_read(&net_rwsem);
2713 for_each_net(net)
2714 list_vports_from_net(net, dnet, &head);
2715 up_read(&net_rwsem);
2716
2717 /* Detach all vports from given namespace. */
2718 list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2719 list_del(&vport->detach_list);
2720 ovs_dp_detach_port(vport);
2721 }
2722
2723 ovs_unlock();
2724
2725 cancel_delayed_work_sync(&ovs_net->masks_rebalance);
2726 cancel_work_sync(&ovs_net->dp_notify_work);
2727 }
2728
2729 static struct pernet_operations ovs_net_ops = {
2730 .init = ovs_init_net,
2731 .exit = ovs_exit_net,
2732 .id = &ovs_net_id,
2733 .size = sizeof(struct ovs_net),
2734 };
2735
2736 static const char * const ovs_drop_reasons[] = {
2737 #define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x),
2738 OVS_DROP_REASONS(S)
2739 #undef S
2740 };
2741
2742 static struct drop_reason_list drop_reason_list_ovs = {
2743 .reasons = ovs_drop_reasons,
2744 .n_reasons = ARRAY_SIZE(ovs_drop_reasons),
2745 };
2746
ovs_alloc_percpu_storage(void)2747 static int __init ovs_alloc_percpu_storage(void)
2748 {
2749 unsigned int cpu;
2750
2751 ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage);
2752 if (!ovs_pcpu_storage)
2753 return -ENOMEM;
2754
2755 for_each_possible_cpu(cpu) {
2756 struct ovs_pcpu_storage *ovs_pcpu;
2757
2758 ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu);
2759 local_lock_init(&ovs_pcpu->bh_lock);
2760 }
2761 return 0;
2762 }
2763
ovs_free_percpu_storage(void)2764 static void ovs_free_percpu_storage(void)
2765 {
2766 free_percpu(ovs_pcpu_storage);
2767 }
2768
dp_init(void)2769 static int __init dp_init(void)
2770 {
2771 int err;
2772
2773 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
2774 sizeof_field(struct sk_buff, cb));
2775
2776 pr_info("Open vSwitch switching datapath\n");
2777
2778 err = ovs_alloc_percpu_storage();
2779 if (err)
2780 goto error;
2781
2782 err = ovs_internal_dev_rtnl_link_register();
2783 if (err)
2784 goto error;
2785
2786 err = ovs_flow_init();
2787 if (err)
2788 goto error_unreg_rtnl_link;
2789
2790 err = ovs_vport_init();
2791 if (err)
2792 goto error_flow_exit;
2793
2794 err = register_pernet_device(&ovs_net_ops);
2795 if (err)
2796 goto error_vport_exit;
2797
2798 err = register_netdevice_notifier(&ovs_dp_device_notifier);
2799 if (err)
2800 goto error_netns_exit;
2801
2802 err = ovs_netdev_init();
2803 if (err)
2804 goto error_unreg_notifier;
2805
2806 err = dp_register_genl();
2807 if (err < 0)
2808 goto error_unreg_netdev;
2809
2810 drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
2811 &drop_reason_list_ovs);
2812
2813 return 0;
2814
2815 error_unreg_netdev:
2816 ovs_netdev_exit();
2817 error_unreg_notifier:
2818 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2819 error_netns_exit:
2820 unregister_pernet_device(&ovs_net_ops);
2821 error_vport_exit:
2822 ovs_vport_exit();
2823 error_flow_exit:
2824 ovs_flow_exit();
2825 error_unreg_rtnl_link:
2826 ovs_internal_dev_rtnl_link_unregister();
2827 error:
2828 ovs_free_percpu_storage();
2829 return err;
2830 }
2831
dp_cleanup(void)2832 static void dp_cleanup(void)
2833 {
2834 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2835 ovs_netdev_exit();
2836 unregister_netdevice_notifier(&ovs_dp_device_notifier);
2837 unregister_pernet_device(&ovs_net_ops);
2838 drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH);
2839 rcu_barrier();
2840 ovs_vport_exit();
2841 ovs_flow_exit();
2842 ovs_internal_dev_rtnl_link_unregister();
2843 ovs_free_percpu_storage();
2844 }
2845
2846 module_init(dp_init);
2847 module_exit(dp_cleanup);
2848
2849 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2850 MODULE_LICENSE("GPL");
2851 MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2852 MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2853 MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2854 MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2855 MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
2856 MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
2857