1 /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
2 /* Copyright (c) 2018 Mellanox Technologies. */
3
4 #include <net/inet_ecn.h>
5 #include <net/vxlan.h>
6 #include <net/gre.h>
7 #include <net/geneve.h>
8 #include <net/bareudp.h>
9 #include "en/tc_tun.h"
10 #include "en/tc_priv.h"
11 #include "en_tc.h"
12 #include "rep/tc.h"
13 #include "rep/neigh.h"
14 #include "lag/lag.h"
15 #include "lag/mp.h"
16
17 struct mlx5e_tc_tun_route_attr {
18 struct net_device *out_dev;
19 struct net_device *route_dev;
20 union {
21 struct flowi4 fl4;
22 struct flowi6 fl6;
23 } fl;
24 struct neighbour *n;
25 u8 ttl;
26 };
27
28 #define TC_TUN_ROUTE_ATTR_INIT(name) struct mlx5e_tc_tun_route_attr name = {}
29
mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr * attr)30 static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr)
31 {
32 if (attr->n)
33 neigh_release(attr->n);
34 if (attr->route_dev)
35 dev_put(attr->route_dev);
36 }
37
mlx5e_get_tc_tun(struct net_device * tunnel_dev)38 struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
39 {
40 if (netif_is_vxlan(tunnel_dev))
41 return &vxlan_tunnel;
42 else if (netif_is_geneve(tunnel_dev))
43 return &geneve_tunnel;
44 else if (netif_is_gretap(tunnel_dev) ||
45 netif_is_ip6gretap(tunnel_dev))
46 return &gre_tunnel;
47 else if (netif_is_bareudp(tunnel_dev))
48 return &mplsoudp_tunnel;
49 else
50 return NULL;
51 }
52
get_route_and_out_devs(struct mlx5e_priv * priv,struct net_device * dev,struct net_device ** route_dev,struct net_device ** out_dev)53 static int get_route_and_out_devs(struct mlx5e_priv *priv,
54 struct net_device *dev,
55 struct net_device **route_dev,
56 struct net_device **out_dev)
57 {
58 struct net_device *uplink_dev, *uplink_upper, *real_dev;
59 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
60 bool dst_is_lag_dev;
61
62 real_dev = is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : dev;
63 uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
64
65 rcu_read_lock();
66 uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
67 /* mlx5_lag_is_sriov() is a blocking function which can't be called
68 * while holding rcu read lock. Take the net_device for correctness
69 * sake.
70 */
71 if (uplink_upper)
72 dev_hold(uplink_upper);
73 rcu_read_unlock();
74
75 dst_is_lag_dev = (uplink_upper &&
76 netif_is_lag_master(uplink_upper) &&
77 real_dev == uplink_upper &&
78 mlx5_lag_is_sriov(priv->mdev));
79 if (uplink_upper)
80 dev_put(uplink_upper);
81
82 /* if the egress device isn't on the same HW e-switch or
83 * it's a LAG device, use the uplink
84 */
85 *route_dev = dev;
86 if (!netdev_port_same_parent_id(priv->netdev, real_dev) ||
87 dst_is_lag_dev || is_vlan_dev(*route_dev) ||
88 netif_is_ovs_master(*route_dev))
89 *out_dev = uplink_dev;
90 else if (mlx5e_eswitch_rep(dev) &&
91 mlx5e_is_valid_eswitch_fwd_dev(priv, dev))
92 *out_dev = *route_dev;
93 else
94 return -EOPNOTSUPP;
95
96 if (!mlx5e_eswitch_uplink_rep(*out_dev))
97 return -EOPNOTSUPP;
98
99 if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev &&
100 !mlx5_lag_is_mpesw(priv->mdev))
101 return -EOPNOTSUPP;
102
103 return 0;
104 }
105
mlx5e_route_lookup_ipv4_get(struct mlx5e_priv * priv,struct net_device * dev,struct mlx5e_tc_tun_route_attr * attr)106 static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv,
107 struct net_device *dev,
108 struct mlx5e_tc_tun_route_attr *attr)
109 {
110 struct net_device *route_dev;
111 struct net_device *out_dev;
112 struct neighbour *n;
113 struct rtable *rt;
114
115 #if IS_ENABLED(CONFIG_INET)
116 struct mlx5_core_dev *mdev = priv->mdev;
117 struct net_device *uplink_dev;
118 int ret;
119
120 if (mlx5_lag_is_multipath(mdev)) {
121 struct mlx5_eswitch *esw = mdev->priv.eswitch;
122
123 uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
124 attr->fl.fl4.flowi4_oif = uplink_dev->ifindex;
125 } else {
126 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
127
128 if (tunnel && tunnel->get_remote_ifindex)
129 attr->fl.fl4.flowi4_oif = tunnel->get_remote_ifindex(dev);
130 }
131
132 rt = ip_route_output_key(dev_net(dev), &attr->fl.fl4);
133 if (IS_ERR(rt))
134 return PTR_ERR(rt);
135
136 if (rt->rt_type != RTN_UNICAST) {
137 ret = -ENETUNREACH;
138 goto err_rt_release;
139 }
140
141 if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
142 ret = -ENETUNREACH;
143 goto err_rt_release;
144 }
145 #else
146 return -EOPNOTSUPP;
147 #endif
148
149 ret = get_route_and_out_devs(priv, rt->dst.dev, &route_dev, &out_dev);
150 if (ret < 0)
151 goto err_rt_release;
152 dev_hold(route_dev);
153
154 if (!attr->ttl)
155 attr->ttl = ip4_dst_hoplimit(&rt->dst);
156 n = dst_neigh_lookup(&rt->dst, &attr->fl.fl4.daddr);
157 if (!n) {
158 ret = -ENOMEM;
159 goto err_dev_release;
160 }
161
162 ip_rt_put(rt);
163 attr->route_dev = route_dev;
164 attr->out_dev = out_dev;
165 attr->n = n;
166 return 0;
167
168 err_dev_release:
169 dev_put(route_dev);
170 err_rt_release:
171 ip_rt_put(rt);
172 return ret;
173 }
174
mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr * attr)175 static void mlx5e_route_lookup_ipv4_put(struct mlx5e_tc_tun_route_attr *attr)
176 {
177 mlx5e_tc_tun_route_attr_cleanup(attr);
178 }
179
mlx5e_netdev_kind(struct net_device * dev)180 static const char *mlx5e_netdev_kind(struct net_device *dev)
181 {
182 if (dev->rtnl_link_ops)
183 return dev->rtnl_link_ops->kind;
184 else
185 return "unknown";
186 }
187
mlx5e_gen_ip_tunnel_header(char buf[],__u8 * ip_proto,struct mlx5e_encap_entry * e)188 static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto,
189 struct mlx5e_encap_entry *e)
190 {
191 if (!e->tunnel) {
192 pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n");
193 return -EOPNOTSUPP;
194 }
195
196 return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e);
197 }
198
gen_eth_tnl_hdr(char * buf,struct net_device * dev,struct mlx5e_encap_entry * e,u16 proto)199 static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev,
200 struct mlx5e_encap_entry *e,
201 u16 proto)
202 {
203 struct ethhdr *eth = (struct ethhdr *)buf;
204 char *ip;
205
206 ether_addr_copy(eth->h_dest, e->h_dest);
207 ether_addr_copy(eth->h_source, dev->dev_addr);
208 if (is_vlan_dev(dev)) {
209 struct vlan_hdr *vlan = (struct vlan_hdr *)
210 ((char *)eth + ETH_HLEN);
211 ip = (char *)vlan + VLAN_HLEN;
212 eth->h_proto = vlan_dev_vlan_proto(dev);
213 vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev));
214 vlan->h_vlan_encapsulated_proto = htons(proto);
215 } else {
216 eth->h_proto = htons(proto);
217 ip = (char *)eth + ETH_HLEN;
218 }
219
220 return ip;
221 }
222
mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv * priv,struct net_device * mirred_dev,struct mlx5e_encap_entry * e)223 int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
224 struct net_device *mirred_dev,
225 struct mlx5e_encap_entry *e)
226 {
227 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
228 const struct ip_tunnel_key *tun_key = &e->tun_info->key;
229 struct mlx5_pkt_reformat_params reformat_params;
230 struct mlx5e_neigh m_neigh = {};
231 TC_TUN_ROUTE_ATTR_INIT(attr);
232 int ipv4_encap_size;
233 char *encap_header;
234 struct iphdr *ip;
235 u8 nud_state;
236 int err;
237
238 /* add the IP fields */
239 attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
240 attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
241 attr.fl.fl4.saddr = tun_key->u.ipv4.src;
242 attr.ttl = tun_key->ttl;
243
244 err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
245 if (err)
246 return err;
247
248 ipv4_encap_size =
249 (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
250 sizeof(struct iphdr) +
251 e->tunnel->calc_hlen(e);
252
253 if (max_encap_size < ipv4_encap_size) {
254 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
255 ipv4_encap_size, max_encap_size);
256 err = -EOPNOTSUPP;
257 goto release_neigh;
258 }
259
260 encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
261 if (!encap_header) {
262 err = -ENOMEM;
263 goto release_neigh;
264 }
265
266 m_neigh.family = attr.n->ops->family;
267 memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
268 e->out_dev = attr.out_dev;
269 e->route_dev_ifindex = attr.route_dev->ifindex;
270
271 /* It's important to add the neigh to the hash table before checking
272 * the neigh validity state. So if we'll get a notification, in case the
273 * neigh changes it's validity state, we would find the relevant neigh
274 * in the hash.
275 */
276 err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
277 if (err)
278 goto free_encap;
279
280 read_lock_bh(&attr.n->lock);
281 nud_state = attr.n->nud_state;
282 ether_addr_copy(e->h_dest, attr.n->ha);
283 read_unlock_bh(&attr.n->lock);
284
285 /* add ethernet header */
286 ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
287 ETH_P_IP);
288
289 /* add ip header */
290 ip->tos = tun_key->tos;
291 ip->version = 0x4;
292 ip->ihl = 0x5;
293 ip->ttl = attr.ttl;
294 ip->daddr = attr.fl.fl4.daddr;
295 ip->saddr = attr.fl.fl4.saddr;
296
297 /* add tunneling protocol header */
298 err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
299 &ip->protocol, e);
300 if (err)
301 goto destroy_neigh_entry;
302
303 e->encap_size = ipv4_encap_size;
304 e->encap_header = encap_header;
305 encap_header = NULL;
306
307 if (!(nud_state & NUD_VALID)) {
308 neigh_event_send(attr.n, NULL);
309 /* the encap entry will be made valid on neigh update event
310 * and not used before that.
311 */
312 goto release_neigh;
313 }
314
315 memset(&reformat_params, 0, sizeof(reformat_params));
316 reformat_params.type = e->reformat_type;
317 reformat_params.size = e->encap_size;
318 reformat_params.data = e->encap_header;
319 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
320 MLX5_FLOW_NAMESPACE_FDB);
321 if (IS_ERR(e->pkt_reformat)) {
322 err = PTR_ERR(e->pkt_reformat);
323 goto destroy_neigh_entry;
324 }
325
326 e->flags |= MLX5_ENCAP_ENTRY_VALID;
327 mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
328 mlx5e_route_lookup_ipv4_put(&attr);
329 return err;
330
331 destroy_neigh_entry:
332 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
333 free_encap:
334 kfree(encap_header);
335 release_neigh:
336 mlx5e_route_lookup_ipv4_put(&attr);
337 return err;
338 }
339
mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv * priv,struct net_device * mirred_dev,struct mlx5e_encap_entry * e)340 int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv,
341 struct net_device *mirred_dev,
342 struct mlx5e_encap_entry *e)
343 {
344 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
345 const struct ip_tunnel_key *tun_key = &e->tun_info->key;
346 struct mlx5_pkt_reformat_params reformat_params;
347 TC_TUN_ROUTE_ATTR_INIT(attr);
348 int ipv4_encap_size;
349 char *encap_header;
350 struct iphdr *ip;
351 u8 nud_state;
352 int err;
353
354 /* add the IP fields */
355 attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK;
356 attr.fl.fl4.daddr = tun_key->u.ipv4.dst;
357 attr.fl.fl4.saddr = tun_key->u.ipv4.src;
358 attr.ttl = tun_key->ttl;
359
360 err = mlx5e_route_lookup_ipv4_get(priv, mirred_dev, &attr);
361 if (err)
362 return err;
363
364 ipv4_encap_size =
365 (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
366 sizeof(struct iphdr) +
367 e->tunnel->calc_hlen(e);
368
369 if (max_encap_size < ipv4_encap_size) {
370 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
371 ipv4_encap_size, max_encap_size);
372 err = -EOPNOTSUPP;
373 goto release_neigh;
374 }
375
376 encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
377 if (!encap_header) {
378 err = -ENOMEM;
379 goto release_neigh;
380 }
381
382 e->route_dev_ifindex = attr.route_dev->ifindex;
383
384 read_lock_bh(&attr.n->lock);
385 nud_state = attr.n->nud_state;
386 ether_addr_copy(e->h_dest, attr.n->ha);
387 WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
388 read_unlock_bh(&attr.n->lock);
389
390 /* add ethernet header */
391 ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
392 ETH_P_IP);
393
394 /* add ip header */
395 ip->tos = tun_key->tos;
396 ip->version = 0x4;
397 ip->ihl = 0x5;
398 ip->ttl = attr.ttl;
399 ip->daddr = attr.fl.fl4.daddr;
400 ip->saddr = attr.fl.fl4.saddr;
401
402 /* add tunneling protocol header */
403 err = mlx5e_gen_ip_tunnel_header((char *)ip + sizeof(struct iphdr),
404 &ip->protocol, e);
405 if (err)
406 goto free_encap;
407
408 e->encap_size = ipv4_encap_size;
409 kfree(e->encap_header);
410 e->encap_header = encap_header;
411 encap_header = NULL;
412
413 if (!(nud_state & NUD_VALID)) {
414 neigh_event_send(attr.n, NULL);
415 /* the encap entry will be made valid on neigh update event
416 * and not used before that.
417 */
418 goto release_neigh;
419 }
420
421 memset(&reformat_params, 0, sizeof(reformat_params));
422 reformat_params.type = e->reformat_type;
423 reformat_params.size = e->encap_size;
424 reformat_params.data = e->encap_header;
425 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
426 MLX5_FLOW_NAMESPACE_FDB);
427 if (IS_ERR(e->pkt_reformat)) {
428 err = PTR_ERR(e->pkt_reformat);
429 goto free_encap;
430 }
431
432 e->flags |= MLX5_ENCAP_ENTRY_VALID;
433 mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
434 mlx5e_route_lookup_ipv4_put(&attr);
435 return err;
436
437 free_encap:
438 kfree(encap_header);
439 release_neigh:
440 mlx5e_route_lookup_ipv4_put(&attr);
441 return err;
442 }
443
444 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
mlx5e_route_lookup_ipv6_get(struct mlx5e_priv * priv,struct net_device * dev,struct mlx5e_tc_tun_route_attr * attr)445 static int mlx5e_route_lookup_ipv6_get(struct mlx5e_priv *priv,
446 struct net_device *dev,
447 struct mlx5e_tc_tun_route_attr *attr)
448 {
449 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(dev);
450 struct net_device *route_dev;
451 struct net_device *out_dev;
452 struct dst_entry *dst;
453 struct neighbour *n;
454 int ret;
455
456 if (tunnel && tunnel->get_remote_ifindex)
457 attr->fl.fl6.flowi6_oif = tunnel->get_remote_ifindex(dev);
458 dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(dev), NULL, &attr->fl.fl6,
459 NULL);
460 if (IS_ERR(dst))
461 return PTR_ERR(dst);
462
463 if (!attr->ttl)
464 attr->ttl = ip6_dst_hoplimit(dst);
465
466 ret = get_route_and_out_devs(priv, dst->dev, &route_dev, &out_dev);
467 if (ret < 0)
468 goto err_dst_release;
469
470 dev_hold(route_dev);
471 n = dst_neigh_lookup(dst, &attr->fl.fl6.daddr);
472 if (!n) {
473 ret = -ENOMEM;
474 goto err_dev_release;
475 }
476
477 dst_release(dst);
478 attr->out_dev = out_dev;
479 attr->route_dev = route_dev;
480 attr->n = n;
481 return 0;
482
483 err_dev_release:
484 dev_put(route_dev);
485 err_dst_release:
486 dst_release(dst);
487 return ret;
488 }
489
mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr * attr)490 static void mlx5e_route_lookup_ipv6_put(struct mlx5e_tc_tun_route_attr *attr)
491 {
492 mlx5e_tc_tun_route_attr_cleanup(attr);
493 }
494
mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv * priv,struct net_device * mirred_dev,struct mlx5e_encap_entry * e)495 int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
496 struct net_device *mirred_dev,
497 struct mlx5e_encap_entry *e)
498 {
499 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
500 const struct ip_tunnel_key *tun_key = &e->tun_info->key;
501 struct mlx5_pkt_reformat_params reformat_params;
502 struct mlx5e_neigh m_neigh = {};
503 TC_TUN_ROUTE_ATTR_INIT(attr);
504 struct ipv6hdr *ip6h;
505 int ipv6_encap_size;
506 char *encap_header;
507 u8 nud_state;
508 int err;
509
510 attr.ttl = tun_key->ttl;
511 attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
512 attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
513 attr.fl.fl6.saddr = tun_key->u.ipv6.src;
514
515 err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
516 if (err)
517 return err;
518
519 ipv6_encap_size =
520 (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
521 sizeof(struct ipv6hdr) +
522 e->tunnel->calc_hlen(e);
523
524 if (max_encap_size < ipv6_encap_size) {
525 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
526 ipv6_encap_size, max_encap_size);
527 err = -EOPNOTSUPP;
528 goto release_neigh;
529 }
530
531 encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
532 if (!encap_header) {
533 err = -ENOMEM;
534 goto release_neigh;
535 }
536
537 m_neigh.family = attr.n->ops->family;
538 memcpy(&m_neigh.dst_ip, attr.n->primary_key, attr.n->tbl->key_len);
539 e->out_dev = attr.out_dev;
540 e->route_dev_ifindex = attr.route_dev->ifindex;
541
542 /* It's important to add the neigh to the hash table before checking
543 * the neigh validity state. So if we'll get a notification, in case the
544 * neigh changes it's validity state, we would find the relevant neigh
545 * in the hash.
546 */
547 err = mlx5e_rep_encap_entry_attach(netdev_priv(attr.out_dev), e, &m_neigh, attr.n->dev);
548 if (err)
549 goto free_encap;
550
551 read_lock_bh(&attr.n->lock);
552 nud_state = attr.n->nud_state;
553 ether_addr_copy(e->h_dest, attr.n->ha);
554 read_unlock_bh(&attr.n->lock);
555
556 /* add ethernet header */
557 ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
558 ETH_P_IPV6);
559
560 /* add ip header */
561 ip6_flow_hdr(ip6h, tun_key->tos, 0);
562 /* the HW fills up ipv6 payload len */
563 ip6h->hop_limit = attr.ttl;
564 ip6h->daddr = attr.fl.fl6.daddr;
565 ip6h->saddr = attr.fl.fl6.saddr;
566
567 /* add tunneling protocol header */
568 err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
569 &ip6h->nexthdr, e);
570 if (err)
571 goto destroy_neigh_entry;
572
573 e->encap_size = ipv6_encap_size;
574 e->encap_header = encap_header;
575 encap_header = NULL;
576
577 if (!(nud_state & NUD_VALID)) {
578 neigh_event_send(attr.n, NULL);
579 /* the encap entry will be made valid on neigh update event
580 * and not used before that.
581 */
582 goto release_neigh;
583 }
584
585 memset(&reformat_params, 0, sizeof(reformat_params));
586 reformat_params.type = e->reformat_type;
587 reformat_params.size = e->encap_size;
588 reformat_params.data = e->encap_header;
589 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
590 MLX5_FLOW_NAMESPACE_FDB);
591 if (IS_ERR(e->pkt_reformat)) {
592 err = PTR_ERR(e->pkt_reformat);
593 goto destroy_neigh_entry;
594 }
595
596 e->flags |= MLX5_ENCAP_ENTRY_VALID;
597 mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
598 mlx5e_route_lookup_ipv6_put(&attr);
599 return err;
600
601 destroy_neigh_entry:
602 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
603 free_encap:
604 kfree(encap_header);
605 release_neigh:
606 mlx5e_route_lookup_ipv6_put(&attr);
607 return err;
608 }
609
mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv * priv,struct net_device * mirred_dev,struct mlx5e_encap_entry * e)610 int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
611 struct net_device *mirred_dev,
612 struct mlx5e_encap_entry *e)
613 {
614 int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
615 const struct ip_tunnel_key *tun_key = &e->tun_info->key;
616 struct mlx5_pkt_reformat_params reformat_params;
617 TC_TUN_ROUTE_ATTR_INIT(attr);
618 struct ipv6hdr *ip6h;
619 int ipv6_encap_size;
620 char *encap_header;
621 u8 nud_state;
622 int err;
623
624 attr.ttl = tun_key->ttl;
625
626 attr.fl.fl6.flowlabel = ip6_make_flowinfo(tun_key->tos, tun_key->label);
627 attr.fl.fl6.daddr = tun_key->u.ipv6.dst;
628 attr.fl.fl6.saddr = tun_key->u.ipv6.src;
629
630 err = mlx5e_route_lookup_ipv6_get(priv, mirred_dev, &attr);
631 if (err)
632 return err;
633
634 ipv6_encap_size =
635 (is_vlan_dev(attr.route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) +
636 sizeof(struct ipv6hdr) +
637 e->tunnel->calc_hlen(e);
638
639 if (max_encap_size < ipv6_encap_size) {
640 mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
641 ipv6_encap_size, max_encap_size);
642 err = -EOPNOTSUPP;
643 goto release_neigh;
644 }
645
646 encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
647 if (!encap_header) {
648 err = -ENOMEM;
649 goto release_neigh;
650 }
651
652 e->route_dev_ifindex = attr.route_dev->ifindex;
653
654 read_lock_bh(&attr.n->lock);
655 nud_state = attr.n->nud_state;
656 ether_addr_copy(e->h_dest, attr.n->ha);
657 WRITE_ONCE(e->nhe->neigh_dev, attr.n->dev);
658 read_unlock_bh(&attr.n->lock);
659
660 /* add ethernet header */
661 ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, attr.route_dev, e,
662 ETH_P_IPV6);
663
664 /* add ip header */
665 ip6_flow_hdr(ip6h, tun_key->tos, 0);
666 /* the HW fills up ipv6 payload len */
667 ip6h->hop_limit = attr.ttl;
668 ip6h->daddr = attr.fl.fl6.daddr;
669 ip6h->saddr = attr.fl.fl6.saddr;
670
671 /* add tunneling protocol header */
672 err = mlx5e_gen_ip_tunnel_header((char *)ip6h + sizeof(struct ipv6hdr),
673 &ip6h->nexthdr, e);
674 if (err)
675 goto free_encap;
676
677 e->encap_size = ipv6_encap_size;
678 kfree(e->encap_header);
679 e->encap_header = encap_header;
680 encap_header = NULL;
681
682 if (!(nud_state & NUD_VALID)) {
683 neigh_event_send(attr.n, NULL);
684 /* the encap entry will be made valid on neigh update event
685 * and not used before that.
686 */
687 goto release_neigh;
688 }
689
690 memset(&reformat_params, 0, sizeof(reformat_params));
691 reformat_params.type = e->reformat_type;
692 reformat_params.size = e->encap_size;
693 reformat_params.data = e->encap_header;
694 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, &reformat_params,
695 MLX5_FLOW_NAMESPACE_FDB);
696 if (IS_ERR(e->pkt_reformat)) {
697 err = PTR_ERR(e->pkt_reformat);
698 goto free_encap;
699 }
700
701 e->flags |= MLX5_ENCAP_ENTRY_VALID;
702 mlx5e_rep_queue_neigh_stats_work(netdev_priv(attr.out_dev));
703 mlx5e_route_lookup_ipv6_put(&attr);
704 return err;
705
706 free_encap:
707 kfree(encap_header);
708 release_neigh:
709 mlx5e_route_lookup_ipv6_put(&attr);
710 return err;
711 }
712 #endif
713
mlx5e_tc_tun_route_lookup(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * flow_attr,struct net_device * filter_dev)714 int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
715 struct mlx5_flow_spec *spec,
716 struct mlx5_flow_attr *flow_attr,
717 struct net_device *filter_dev)
718 {
719 struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr;
720 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
721 struct mlx5e_tc_int_port *int_port;
722 TC_TUN_ROUTE_ATTR_INIT(attr);
723 u16 vport_num;
724 int err = 0;
725
726 if (flow_attr->tun_ip_version == 4) {
727 /* Addresses are swapped for decap */
728 attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
729 attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
730 err = mlx5e_route_lookup_ipv4_get(priv, filter_dev, &attr);
731 }
732 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
733 else if (flow_attr->tun_ip_version == 6) {
734 /* Addresses are swapped for decap */
735 attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
736 attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
737 err = mlx5e_route_lookup_ipv6_get(priv, filter_dev, &attr);
738 }
739 #endif
740 else
741 return 0;
742
743 if (err)
744 return err;
745
746 if (attr.route_dev->netdev_ops == &mlx5e_netdev_ops &&
747 mlx5e_tc_is_vf_tunnel(attr.out_dev, attr.route_dev)) {
748 err = mlx5e_tc_query_route_vport(attr.out_dev, attr.route_dev, &vport_num);
749 if (err)
750 goto out;
751
752 esw_attr->rx_tun_attr->decap_vport = vport_num;
753 } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) {
754 int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
755 attr.route_dev->ifindex,
756 MLX5E_TC_INT_PORT_INGRESS);
757 if (IS_ERR(int_port)) {
758 err = PTR_ERR(int_port);
759 goto out;
760 }
761 esw_attr->int_port = int_port;
762 }
763
764 out:
765 if (flow_attr->tun_ip_version == 4)
766 mlx5e_route_lookup_ipv4_put(&attr);
767 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
768 else if (flow_attr->tun_ip_version == 6)
769 mlx5e_route_lookup_ipv6_put(&attr);
770 #endif
771 return err;
772 }
773
mlx5e_tc_tun_device_to_offload(struct mlx5e_priv * priv,struct net_device * netdev)774 bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv,
775 struct net_device *netdev)
776 {
777 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev);
778
779 if (tunnel && tunnel->can_offload(priv))
780 return true;
781 else
782 return false;
783 }
784
mlx5e_tc_tun_init_encap_attr(struct net_device * tunnel_dev,struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct netlink_ext_ack * extack)785 int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev,
786 struct mlx5e_priv *priv,
787 struct mlx5e_encap_entry *e,
788 struct netlink_ext_ack *extack)
789 {
790 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev);
791
792 if (!tunnel) {
793 e->reformat_type = -1;
794 return -EOPNOTSUPP;
795 }
796
797 return tunnel->init_encap_attr(tunnel_dev, priv, e, extack);
798 }
799
mlx5e_tc_tun_parse(struct net_device * filter_dev,struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,u8 * match_level)800 int mlx5e_tc_tun_parse(struct net_device *filter_dev,
801 struct mlx5e_priv *priv,
802 struct mlx5_flow_spec *spec,
803 struct flow_cls_offload *f,
804 u8 *match_level)
805 {
806 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
807 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
808 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
809 outer_headers);
810 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
811 outer_headers);
812 struct netlink_ext_ack *extack = f->common.extack;
813 int err = 0;
814
815 if (!tunnel) {
816 netdev_warn(priv->netdev,
817 "decapsulation offload is not supported for %s net device\n",
818 mlx5e_netdev_kind(filter_dev));
819 err = -EOPNOTSUPP;
820 goto out;
821 }
822
823 *match_level = tunnel->match_level;
824
825 if (tunnel->parse_udp_ports) {
826 err = tunnel->parse_udp_ports(priv, spec, f,
827 headers_c, headers_v);
828 if (err)
829 goto out;
830 }
831
832 if (tunnel->parse_tunnel) {
833 err = tunnel->parse_tunnel(priv, spec, f,
834 headers_c, headers_v);
835 if (err)
836 goto out;
837 }
838
839 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
840 struct flow_dissector_key_basic key_basic = {};
841 struct flow_dissector_key_basic mask_basic = {
842 .n_proto = htons(0xFFFF),
843 };
844 struct flow_match_basic match_basic = {
845 .key = &key_basic, .mask = &mask_basic,
846 };
847 struct flow_match_control match;
848 u16 addr_type;
849
850 flow_rule_match_enc_control(rule, &match);
851 addr_type = match.key->addr_type;
852
853 if (flow_rule_has_enc_control_flags(match.mask->flags,
854 extack)) {
855 err = -EOPNOTSUPP;
856 goto out;
857 }
858
859 /* For tunnel addr_type used same key id`s as for non-tunnel */
860 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
861 struct flow_match_ipv4_addrs match;
862
863 flow_rule_match_enc_ipv4_addrs(rule, &match);
864 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
865 src_ipv4_src_ipv6.ipv4_layout.ipv4,
866 ntohl(match.mask->src));
867 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
868 src_ipv4_src_ipv6.ipv4_layout.ipv4,
869 ntohl(match.key->src));
870
871 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
872 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
873 ntohl(match.mask->dst));
874 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
875 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
876 ntohl(match.key->dst));
877
878 key_basic.n_proto = htons(ETH_P_IP);
879 mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
880 headers_c, headers_v);
881 } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
882 struct flow_match_ipv6_addrs match;
883
884 flow_rule_match_enc_ipv6_addrs(rule, &match);
885 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
886 src_ipv4_src_ipv6.ipv6_layout.ipv6),
887 &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
888 ipv6));
889 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
890 src_ipv4_src_ipv6.ipv6_layout.ipv6),
891 &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
892 ipv6));
893
894 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
895 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
896 &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
897 ipv6));
898 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
899 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
900 &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
901 ipv6));
902
903 key_basic.n_proto = htons(ETH_P_IPV6);
904 mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
905 headers_c, headers_v);
906 }
907 }
908
909 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
910 struct flow_match_ip match;
911
912 flow_rule_match_enc_ip(rule, &match);
913 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
914 match.mask->tos & 0x3);
915 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
916 match.key->tos & 0x3);
917
918 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
919 match.mask->tos >> 2);
920 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
921 match.key->tos >> 2);
922
923 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
924 match.mask->ttl);
925 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
926 match.key->ttl);
927
928 if (match.mask->ttl &&
929 !MLX5_CAP_ESW_FLOWTABLE_FDB
930 (priv->mdev,
931 ft_field_support.outer_ipv4_ttl)) {
932 NL_SET_ERR_MSG_MOD(extack,
933 "Matching on TTL is not supported");
934 err = -EOPNOTSUPP;
935 goto out;
936 }
937 }
938
939 /* let software handle IP fragments */
940 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
941 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
942
943 return 0;
944
945 out:
946 return err;
947 }
948
mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,void * headers_c,void * headers_v)949 int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv,
950 struct mlx5_flow_spec *spec,
951 struct flow_cls_offload *f,
952 void *headers_c,
953 void *headers_v)
954 {
955 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
956 struct netlink_ext_ack *extack = f->common.extack;
957 struct flow_match_ports enc_ports;
958
959 /* Full udp dst port must be given */
960
961 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
962 NL_SET_ERR_MSG_MOD(extack,
963 "UDP tunnel decap filter must include enc_dst_port condition");
964 netdev_warn(priv->netdev,
965 "UDP tunnel decap filter must include enc_dst_port condition\n");
966 return -EOPNOTSUPP;
967 }
968
969 flow_rule_match_enc_ports(rule, &enc_ports);
970
971 if (memchr_inv(&enc_ports.mask->dst, 0xff,
972 sizeof(enc_ports.mask->dst))) {
973 NL_SET_ERR_MSG_MOD(extack,
974 "UDP tunnel decap filter must match enc_dst_port fully");
975 netdev_warn(priv->netdev,
976 "UDP tunnel decap filter must match enc_dst_port fully\n");
977 return -EOPNOTSUPP;
978 }
979
980 /* match on UDP protocol and dst port number */
981
982 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
983 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
984
985 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
986 ntohs(enc_ports.mask->dst));
987 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
988 ntohs(enc_ports.key->dst));
989
990 /* UDP src port on outer header is generated by HW,
991 * so it is probably a bad idea to request matching it.
992 * Nonetheless, it is allowed.
993 */
994
995 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
996 ntohs(enc_ports.mask->src));
997 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
998 ntohs(enc_ports.key->src));
999
1000 return 0;
1001 }
1002