xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c (revision 896d8946da97332d4dc80fa1937d8dd6b1c35ad4)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3 
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include <net/ip_tunnels.h>
7 #include "tc_tun_encap.h"
8 #include "fs_core.h"
9 #include "en_tc.h"
10 #include "tc_tun.h"
11 #include "rep/tc.h"
12 #include "diag/en_tc_tracepoint.h"
13 
14 enum {
15 	MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
16 };
17 
mlx5e_set_int_port_tunnel(struct mlx5e_priv * priv,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,int out_index)18 static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv,
19 				     struct mlx5_flow_attr *attr,
20 				     struct mlx5e_encap_entry *e,
21 				     int out_index)
22 {
23 	struct net_device *route_dev;
24 	int err = 0;
25 
26 	route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex);
27 
28 	if (!route_dev || !netif_is_ovs_master(route_dev))
29 		goto out;
30 
31 	if (priv->mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS &&
32 	    mlx5e_eswitch_uplink_rep(attr->parse_attr->filter_dev) &&
33 	    (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) {
34 		mlx5_core_warn(priv->mdev,
35 			       "Matching on external port with encap + fwd to table actions is not allowed for firmware steering\n");
36 		err = -EINVAL;
37 		goto out;
38 	}
39 
40 	err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex,
41 						MLX5E_TC_INT_PORT_EGRESS,
42 						&attr->action, out_index);
43 
44 out:
45 	if (route_dev)
46 		dev_put(route_dev);
47 
48 	return err;
49 }
50 
51 struct mlx5e_route_key {
52 	int ip_version;
53 	union {
54 		__be32 v4;
55 		struct in6_addr v6;
56 	} endpoint_ip;
57 };
58 
59 struct mlx5e_route_entry {
60 	struct mlx5e_route_key key;
61 	struct list_head encap_entries;
62 	struct list_head decap_flows;
63 	u32 flags;
64 	struct hlist_node hlist;
65 	refcount_t refcnt;
66 	int tunnel_dev_index;
67 	struct rcu_head rcu;
68 };
69 
70 struct mlx5e_tc_tun_encap {
71 	struct mlx5e_priv *priv;
72 	struct notifier_block fib_nb;
73 	spinlock_t route_lock; /* protects route_tbl */
74 	unsigned long route_tbl_last_update;
75 	DECLARE_HASHTABLE(route_tbl, 8);
76 };
77 
mlx5e_route_entry_valid(struct mlx5e_route_entry * r)78 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
79 {
80 	return r->flags & MLX5E_ROUTE_ENTRY_VALID;
81 }
82 
mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec)83 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
84 			     struct mlx5_flow_spec *spec)
85 {
86 	struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
87 	struct mlx5_rx_tun_attr *tun_attr;
88 	void *daddr, *saddr;
89 	u8 ip_version;
90 
91 	tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
92 	if (!tun_attr)
93 		return -ENOMEM;
94 
95 	esw_attr->rx_tun_attr = tun_attr;
96 	ip_version = mlx5e_tc_get_ip_version(spec, true);
97 
98 	if (ip_version == 4) {
99 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
100 				     outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
101 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
102 				     outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
103 		tun_attr->dst_ip.v4 = *(__be32 *)daddr;
104 		tun_attr->src_ip.v4 = *(__be32 *)saddr;
105 		if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
106 			return 0;
107 	}
108 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
109 	else if (ip_version == 6) {
110 		int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
111 
112 		daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
113 				     outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
114 		saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
115 				     outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
116 		memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
117 		memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
118 		if (ipv6_addr_any(&tun_attr->dst_ip.v6) ||
119 		    ipv6_addr_any(&tun_attr->src_ip.v6))
120 			return 0;
121 	}
122 #endif
123 	/* Only set the flag if both src and dst ip addresses exist. They are
124 	 * required to establish routing.
125 	 */
126 	flow_flag_set(flow, TUN_RX);
127 	flow->attr->tun_ip_version = ip_version;
128 	return 0;
129 }
130 
mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr * esw_attr)131 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
132 {
133 	bool all_flow_encaps_valid = true;
134 	int i;
135 
136 	/* Flow can be associated with multiple encap entries.
137 	 * Before offloading the flow verify that all of them have
138 	 * a valid neighbour.
139 	 */
140 	for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
141 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
142 			continue;
143 		if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
144 			all_flow_encaps_valid = false;
145 			break;
146 		}
147 	}
148 
149 	return all_flow_encaps_valid;
150 }
151 
mlx5e_tc_encap_flows_add(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)152 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
153 			      struct mlx5e_encap_entry *e,
154 			      struct list_head *flow_list)
155 {
156 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
157 	struct mlx5_pkt_reformat_params reformat_params;
158 	struct mlx5_esw_flow_attr *esw_attr;
159 	struct mlx5_flow_handle *rule;
160 	struct mlx5_flow_attr *attr;
161 	struct mlx5_flow_spec *spec;
162 	struct mlx5e_tc_flow *flow;
163 	int err;
164 
165 	if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
166 		return;
167 
168 	memset(&reformat_params, 0, sizeof(reformat_params));
169 	reformat_params.type = e->reformat_type;
170 	reformat_params.size = e->encap_size;
171 	reformat_params.data = e->encap_header;
172 	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
173 						     &reformat_params,
174 						     MLX5_FLOW_NAMESPACE_FDB);
175 	if (IS_ERR(e->pkt_reformat)) {
176 		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
177 			       PTR_ERR(e->pkt_reformat));
178 		return;
179 	}
180 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
181 	mlx5e_rep_queue_neigh_stats_work(priv);
182 
183 	list_for_each_entry(flow, flow_list, tmp_list) {
184 		if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
185 			continue;
186 
187 		spec = &flow->attr->parse_attr->spec;
188 
189 		attr = mlx5e_tc_get_encap_attr(flow);
190 		esw_attr = attr->esw_attr;
191 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
192 		esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
193 
194 		/* Do not offload flows with unresolved neighbors */
195 		if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
196 			continue;
197 
198 		err = mlx5e_tc_offload_flow_post_acts(flow);
199 		if (err) {
200 			mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
201 				       err);
202 			continue;
203 		}
204 
205 		/* update from slow path rule to encap rule */
206 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
207 		if (IS_ERR(rule)) {
208 			mlx5e_tc_unoffload_flow_post_acts(flow);
209 			err = PTR_ERR(rule);
210 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
211 				       err);
212 			continue;
213 		}
214 
215 		mlx5e_tc_unoffload_from_slow_path(esw, flow);
216 		flow->rule[0] = rule;
217 		/* was unset when slow path rule removed */
218 		flow_flag_set(flow, OFFLOADED);
219 	}
220 }
221 
mlx5e_tc_encap_flows_del(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * flow_list)222 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
223 			      struct mlx5e_encap_entry *e,
224 			      struct list_head *flow_list)
225 {
226 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
227 	struct mlx5_esw_flow_attr *esw_attr;
228 	struct mlx5_flow_handle *rule;
229 	struct mlx5_flow_attr *attr;
230 	struct mlx5_flow_spec *spec;
231 	struct mlx5e_tc_flow *flow;
232 	int err;
233 
234 	list_for_each_entry(flow, flow_list, tmp_list) {
235 		if (!mlx5e_is_offloaded_flow(flow))
236 			continue;
237 
238 		attr = mlx5e_tc_get_encap_attr(flow);
239 		esw_attr = attr->esw_attr;
240 		/* mark the flow's encap dest as non-valid */
241 		esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
242 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
243 
244 		/* Clear pkt_reformat before checking slow path flag. Because
245 		 * in next iteration, the same flow is already set slow path
246 		 * flag, but still need to clear the pkt_reformat.
247 		 */
248 		if (flow_flag_test(flow, SLOW))
249 			continue;
250 
251 		/* update from encap rule to slow path rule */
252 		spec = &flow->attr->parse_attr->spec;
253 		rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
254 
255 		if (IS_ERR(rule)) {
256 			err = PTR_ERR(rule);
257 			mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
258 				       err);
259 			continue;
260 		}
261 
262 		mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
263 		mlx5e_tc_unoffload_flow_post_acts(flow);
264 		flow->rule[0] = rule;
265 		/* was unset when fast path rule removed */
266 		flow_flag_set(flow, OFFLOADED);
267 	}
268 
269 	/* we know that the encap is valid */
270 	e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
271 	mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
272 	e->pkt_reformat = NULL;
273 }
274 
mlx5e_take_tmp_flow(struct mlx5e_tc_flow * flow,struct list_head * flow_list,int index)275 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
276 				struct list_head *flow_list,
277 				int index)
278 {
279 	if (IS_ERR(mlx5e_flow_get(flow))) {
280 		/* Flow is being deleted concurrently. Wait for it to be
281 		 * unoffloaded from hardware, otherwise deleting encap will
282 		 * fail.
283 		 */
284 		wait_for_completion(&flow->del_hw_done);
285 		return;
286 	}
287 	wait_for_completion(&flow->init_done);
288 
289 	flow->tmp_entry_index = index;
290 	list_add(&flow->tmp_list, flow_list);
291 }
292 
293 /* Takes reference to all flows attached to encap and adds the flows to
294  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
295  */
mlx5e_take_all_encap_flows(struct mlx5e_encap_entry * e,struct list_head * flow_list)296 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
297 {
298 	struct encap_flow_item *efi;
299 	struct mlx5e_tc_flow *flow;
300 
301 	list_for_each_entry(efi, &e->flows, list) {
302 		flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
303 		mlx5e_take_tmp_flow(flow, flow_list, efi->index);
304 	}
305 }
306 
307 /* Takes reference to all flows attached to route and adds the flows to
308  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
309  */
mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry * r,struct list_head * flow_list)310 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
311 					     struct list_head *flow_list)
312 {
313 	struct mlx5e_tc_flow *flow;
314 
315 	list_for_each_entry(flow, &r->decap_flows, decap_routes)
316 		mlx5e_take_tmp_flow(flow, flow_list, 0);
317 }
318 
319 typedef bool (match_cb)(struct mlx5e_encap_entry *);
320 
321 static struct mlx5e_encap_entry *
mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e,match_cb match)322 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
323 			      struct mlx5e_encap_entry *e,
324 			      match_cb match)
325 {
326 	struct mlx5e_encap_entry *next = NULL;
327 
328 retry:
329 	rcu_read_lock();
330 
331 	/* find encap with non-zero reference counter value */
332 	for (next = e ?
333 		     list_next_or_null_rcu(&nhe->encap_list,
334 					   &e->encap_list,
335 					   struct mlx5e_encap_entry,
336 					   encap_list) :
337 		     list_first_or_null_rcu(&nhe->encap_list,
338 					    struct mlx5e_encap_entry,
339 					    encap_list);
340 	     next;
341 	     next = list_next_or_null_rcu(&nhe->encap_list,
342 					  &next->encap_list,
343 					  struct mlx5e_encap_entry,
344 					  encap_list))
345 		if (mlx5e_encap_take(next))
346 			break;
347 
348 	rcu_read_unlock();
349 
350 	/* release starting encap */
351 	if (e)
352 		mlx5e_encap_put(netdev_priv(e->out_dev), e);
353 	if (!next)
354 		return next;
355 
356 	/* wait for encap to be fully initialized */
357 	wait_for_completion(&next->res_ready);
358 	/* continue searching if encap entry is not in valid state after completion */
359 	if (!match(next)) {
360 		e = next;
361 		goto retry;
362 	}
363 
364 	return next;
365 }
366 
mlx5e_encap_valid(struct mlx5e_encap_entry * e)367 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
368 {
369 	return e->flags & MLX5_ENCAP_ENTRY_VALID;
370 }
371 
372 static struct mlx5e_encap_entry *
mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)373 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
374 			   struct mlx5e_encap_entry *e)
375 {
376 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
377 }
378 
mlx5e_encap_initialized(struct mlx5e_encap_entry * e)379 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
380 {
381 	return e->compl_result >= 0;
382 }
383 
384 struct mlx5e_encap_entry *
mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry * nhe,struct mlx5e_encap_entry * e)385 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
386 			  struct mlx5e_encap_entry *e)
387 {
388 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
389 }
390 
mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry * nhe)391 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
392 {
393 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
394 	struct mlx5e_encap_entry *e = NULL;
395 	struct mlx5e_tc_flow *flow;
396 	struct mlx5_fc *counter;
397 	struct neigh_table *tbl;
398 	bool neigh_used = false;
399 	struct neighbour *n;
400 	u64 lastuse;
401 
402 	if (m_neigh->family == AF_INET)
403 		tbl = &arp_tbl;
404 #if IS_ENABLED(CONFIG_IPV6)
405 	else if (m_neigh->family == AF_INET6)
406 		tbl = ipv6_stub->nd_tbl;
407 #endif
408 	else
409 		return;
410 
411 	/* mlx5e_get_next_valid_encap() releases previous encap before returning
412 	 * next one.
413 	 */
414 	while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
415 		struct mlx5e_priv *priv = netdev_priv(e->out_dev);
416 		struct encap_flow_item *efi, *tmp;
417 		struct mlx5_eswitch *esw;
418 		LIST_HEAD(flow_list);
419 
420 		esw = priv->mdev->priv.eswitch;
421 		mutex_lock(&esw->offloads.encap_tbl_lock);
422 		list_for_each_entry_safe(efi, tmp, &e->flows, list) {
423 			flow = container_of(efi, struct mlx5e_tc_flow,
424 					    encaps[efi->index]);
425 			if (IS_ERR(mlx5e_flow_get(flow)))
426 				continue;
427 			list_add(&flow->tmp_list, &flow_list);
428 
429 			if (mlx5e_is_offloaded_flow(flow)) {
430 				counter = mlx5e_tc_get_counter(flow);
431 				lastuse = mlx5_fc_query_lastuse(counter);
432 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
433 					neigh_used = true;
434 					break;
435 				}
436 			}
437 		}
438 		mutex_unlock(&esw->offloads.encap_tbl_lock);
439 
440 		mlx5e_put_flow_list(priv, &flow_list);
441 		if (neigh_used) {
442 			/* release current encap before breaking the loop */
443 			mlx5e_encap_put(priv, e);
444 			break;
445 		}
446 	}
447 
448 	trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
449 
450 	if (neigh_used) {
451 		nhe->reported_lastuse = jiffies;
452 
453 		/* find the relevant neigh according to the cached device and
454 		 * dst ip pair
455 		 */
456 		n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
457 		if (!n)
458 			return;
459 
460 		neigh_event_send(n, NULL);
461 		neigh_release(n);
462 	}
463 }
464 
mlx5e_encap_dealloc(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)465 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
466 {
467 	WARN_ON(!list_empty(&e->flows));
468 
469 	if (e->compl_result > 0) {
470 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
471 
472 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
473 			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
474 	}
475 
476 	kfree(e->tun_info);
477 	kfree(e->encap_header);
478 	kfree_rcu(e, rcu);
479 }
480 
mlx5e_decap_dealloc(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)481 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
482 				struct mlx5e_decap_entry *d)
483 {
484 	WARN_ON(!list_empty(&d->flows));
485 
486 	if (!d->compl_result)
487 		mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
488 
489 	kfree_rcu(d, rcu);
490 }
491 
mlx5e_encap_put(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)492 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
493 {
494 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
495 
496 	if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
497 		return;
498 	list_del(&e->route_list);
499 	hash_del_rcu(&e->encap_hlist);
500 	mutex_unlock(&esw->offloads.encap_tbl_lock);
501 
502 	mlx5e_encap_dealloc(priv, e);
503 }
504 
mlx5e_encap_put_locked(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e)505 static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
506 {
507 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
508 
509 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
510 
511 	if (!refcount_dec_and_test(&e->refcnt))
512 		return;
513 	list_del(&e->route_list);
514 	hash_del_rcu(&e->encap_hlist);
515 	mlx5e_encap_dealloc(priv, e);
516 }
517 
mlx5e_decap_put(struct mlx5e_priv * priv,struct mlx5e_decap_entry * d)518 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
519 {
520 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
521 
522 	if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
523 		return;
524 	hash_del_rcu(&d->hlist);
525 	mutex_unlock(&esw->offloads.decap_tbl_lock);
526 
527 	mlx5e_decap_dealloc(priv, d);
528 }
529 
530 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
531 				     struct mlx5e_tc_flow *flow,
532 				     int out_index);
533 
mlx5e_detach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,int out_index)534 void mlx5e_detach_encap(struct mlx5e_priv *priv,
535 			struct mlx5e_tc_flow *flow,
536 			struct mlx5_flow_attr *attr,
537 			int out_index)
538 {
539 	struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
540 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
541 
542 	if (!mlx5e_is_eswitch_flow(flow))
543 		return;
544 
545 	if (attr->esw_attr->dests[out_index].flags &
546 	    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
547 		mlx5e_detach_encap_route(priv, flow, out_index);
548 
549 	/* flow wasn't fully initialized */
550 	if (!e)
551 		return;
552 
553 	mutex_lock(&esw->offloads.encap_tbl_lock);
554 	list_del(&flow->encaps[out_index].list);
555 	flow->encaps[out_index].e = NULL;
556 	if (!refcount_dec_and_test(&e->refcnt)) {
557 		mutex_unlock(&esw->offloads.encap_tbl_lock);
558 		return;
559 	}
560 	list_del(&e->route_list);
561 	hash_del_rcu(&e->encap_hlist);
562 	mutex_unlock(&esw->offloads.encap_tbl_lock);
563 
564 	mlx5e_encap_dealloc(priv, e);
565 }
566 
mlx5e_detach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)567 void mlx5e_detach_decap(struct mlx5e_priv *priv,
568 			struct mlx5e_tc_flow *flow)
569 {
570 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
571 	struct mlx5e_decap_entry *d = flow->decap_reformat;
572 
573 	if (!d)
574 		return;
575 
576 	mutex_lock(&esw->offloads.decap_tbl_lock);
577 	list_del(&flow->l3_to_l2_reformat);
578 	flow->decap_reformat = NULL;
579 
580 	if (!refcount_dec_and_test(&d->refcnt)) {
581 		mutex_unlock(&esw->offloads.decap_tbl_lock);
582 		return;
583 	}
584 	hash_del_rcu(&d->hlist);
585 	mutex_unlock(&esw->offloads.decap_tbl_lock);
586 
587 	mlx5e_decap_dealloc(priv, d);
588 }
589 
mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b)590 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
591 					   struct mlx5e_encap_key *b)
592 {
593 	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
594 		a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
595 }
596 
mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key * a,struct mlx5e_encap_key * b,u32 tun_type)597 bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
598 					   struct mlx5e_encap_key *b,
599 					   u32 tun_type)
600 {
601 	struct ip_tunnel_info *a_info;
602 	struct ip_tunnel_info *b_info;
603 	bool a_has_opts, b_has_opts;
604 
605 	if (!mlx5e_tc_tun_encap_info_equal_generic(a, b))
606 		return false;
607 
608 	a_has_opts = test_bit(tun_type, a->ip_tun_key->tun_flags);
609 	b_has_opts = test_bit(tun_type, b->ip_tun_key->tun_flags);
610 
611 	/* keys are equal when both don't have any options attached */
612 	if (!a_has_opts && !b_has_opts)
613 		return true;
614 
615 	if (a_has_opts != b_has_opts)
616 		return false;
617 
618 	/* options stored in memory next to ip_tunnel_info struct */
619 	a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key);
620 	b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
621 
622 	return a_info->options_len == b_info->options_len &&
623 	       !memcmp(ip_tunnel_info_opts(a_info),
624 		       ip_tunnel_info_opts(b_info),
625 		       a_info->options_len);
626 }
627 
cmp_decap_info(struct mlx5e_decap_key * a,struct mlx5e_decap_key * b)628 static int cmp_decap_info(struct mlx5e_decap_key *a,
629 			  struct mlx5e_decap_key *b)
630 {
631 	return memcmp(&a->key, &b->key, sizeof(b->key));
632 }
633 
hash_encap_info(struct mlx5e_encap_key * key)634 static int hash_encap_info(struct mlx5e_encap_key *key)
635 {
636 	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
637 		     key->tc_tunnel->tunnel_type);
638 }
639 
hash_decap_info(struct mlx5e_decap_key * key)640 static int hash_decap_info(struct mlx5e_decap_key *key)
641 {
642 	return jhash(&key->key, sizeof(key->key), 0);
643 }
644 
mlx5e_encap_take(struct mlx5e_encap_entry * e)645 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
646 {
647 	return refcount_inc_not_zero(&e->refcnt);
648 }
649 
mlx5e_decap_take(struct mlx5e_decap_entry * e)650 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
651 {
652 	return refcount_inc_not_zero(&e->refcnt);
653 }
654 
655 static struct mlx5e_encap_entry *
mlx5e_encap_get(struct mlx5e_priv * priv,struct mlx5e_encap_key * key,uintptr_t hash_key)656 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
657 		uintptr_t hash_key)
658 {
659 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
660 	struct mlx5e_encap_key e_key;
661 	struct mlx5e_encap_entry *e;
662 
663 	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
664 				   encap_hlist, hash_key) {
665 		e_key.ip_tun_key = &e->tun_info->key;
666 		e_key.tc_tunnel = e->tunnel;
667 		if (e->tunnel->encap_info_equal(&e_key, key) &&
668 		    mlx5e_encap_take(e))
669 			return e;
670 	}
671 
672 	return NULL;
673 }
674 
675 static struct mlx5e_decap_entry *
mlx5e_decap_get(struct mlx5e_priv * priv,struct mlx5e_decap_key * key,uintptr_t hash_key)676 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
677 		uintptr_t hash_key)
678 {
679 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
680 	struct mlx5e_decap_key r_key;
681 	struct mlx5e_decap_entry *e;
682 
683 	hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
684 				   hlist, hash_key) {
685 		r_key = e->key;
686 		if (!cmp_decap_info(&r_key, key) &&
687 		    mlx5e_decap_take(e))
688 			return e;
689 	}
690 	return NULL;
691 }
692 
mlx5e_dup_tun_info(const struct ip_tunnel_info * tun_info)693 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
694 {
695 	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
696 
697 	return kmemdup(tun_info, tun_size, GFP_KERNEL);
698 }
699 
is_duplicated_encap_entry(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index,struct mlx5e_encap_entry * e,struct netlink_ext_ack * extack)700 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
701 				      struct mlx5e_tc_flow *flow,
702 				      int out_index,
703 				      struct mlx5e_encap_entry *e,
704 				      struct netlink_ext_ack *extack)
705 {
706 	int i;
707 
708 	for (i = 0; i < out_index; i++) {
709 		if (flow->encaps[i].e != e)
710 			continue;
711 		NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
712 		netdev_err(priv->netdev, "can't duplicate encap action\n");
713 		return true;
714 	}
715 
716 	return false;
717 }
718 
mlx5e_set_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)719 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
720 			       struct mlx5_flow_attr *attr,
721 			       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
722 			       struct net_device *out_dev,
723 			       int route_dev_ifindex,
724 			       int out_index)
725 {
726 	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
727 	struct net_device *route_dev;
728 	u16 vport_num;
729 	int err = 0;
730 	u32 data;
731 
732 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
733 
734 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
735 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
736 		goto out;
737 
738 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
739 	if (err)
740 		goto out;
741 
742 	attr->dest_chain = 0;
743 	attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
744 	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
745 	data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
746 						       vport_num);
747 	err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
748 						   MLX5_FLOW_NAMESPACE_FDB,
749 						   VPORT_TO_REG, data);
750 	if (err >= 0) {
751 		esw_attr->dests[out_index].src_port_rewrite_act_id = err;
752 		err = 0;
753 	}
754 
755 out:
756 	if (route_dev)
757 		dev_put(route_dev);
758 	return err;
759 }
760 
mlx5e_update_vf_tunnel(struct mlx5_eswitch * esw,struct mlx5_esw_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts,struct net_device * out_dev,int route_dev_ifindex,int out_index)761 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
762 				  struct mlx5_esw_flow_attr *attr,
763 				  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
764 				  struct net_device *out_dev,
765 				  int route_dev_ifindex,
766 				  int out_index)
767 {
768 	int act_id = attr->dests[out_index].src_port_rewrite_act_id;
769 	struct net_device *route_dev;
770 	u16 vport_num;
771 	int err = 0;
772 	u32 data;
773 
774 	route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
775 
776 	if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
777 	    !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
778 		err = -ENODEV;
779 		goto out;
780 	}
781 
782 	err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
783 	if (err)
784 		goto out;
785 
786 	data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
787 						       vport_num);
788 	mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
789 
790 out:
791 	if (route_dev)
792 		dev_put(route_dev);
793 	return err;
794 }
795 
mlx5e_route_tbl_get_last_update(struct mlx5e_priv * priv)796 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
797 {
798 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
799 	struct mlx5_rep_uplink_priv *uplink_priv;
800 	struct mlx5e_rep_priv *uplink_rpriv;
801 	struct mlx5e_tc_tun_encap *encap;
802 	unsigned int ret;
803 
804 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
805 	uplink_priv = &uplink_rpriv->uplink_priv;
806 	encap = uplink_priv->encap;
807 
808 	spin_lock_bh(&encap->route_lock);
809 	ret = encap->route_tbl_last_update;
810 	spin_unlock_bh(&encap->route_lock);
811 	return ret;
812 }
813 
814 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
815 				    struct mlx5e_tc_flow *flow,
816 				    struct mlx5_flow_attr *attr,
817 				    struct mlx5e_encap_entry *e,
818 				    bool new_encap_entry,
819 				    unsigned long tbl_time_before,
820 				    int out_index);
821 
mlx5e_attach_encap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct net_device * mirred_dev,int out_index,struct netlink_ext_ack * extack,struct net_device ** encap_dev)822 int mlx5e_attach_encap(struct mlx5e_priv *priv,
823 		       struct mlx5e_tc_flow *flow,
824 		       struct mlx5_flow_attr *attr,
825 		       struct net_device *mirred_dev,
826 		       int out_index,
827 		       struct netlink_ext_ack *extack,
828 		       struct net_device **encap_dev)
829 {
830 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
831 	struct mlx5e_tc_flow_parse_attr *parse_attr;
832 	const struct ip_tunnel_info *tun_info;
833 	const struct mlx5e_mpls_info *mpls_info;
834 	unsigned long tbl_time_before = 0;
835 	struct mlx5e_encap_entry *e;
836 	struct mlx5e_encap_key key;
837 	bool entry_created = false;
838 	unsigned short family;
839 	uintptr_t hash_key;
840 	int err = 0;
841 
842 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
843 
844 	parse_attr = attr->parse_attr;
845 	tun_info = parse_attr->tun_info[out_index];
846 	mpls_info = &parse_attr->mpls_info[out_index];
847 	family = ip_tunnel_info_af(tun_info);
848 	key.ip_tun_key = &tun_info->key;
849 	key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
850 	if (!key.tc_tunnel) {
851 		NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
852 		return -EOPNOTSUPP;
853 	}
854 
855 	hash_key = hash_encap_info(&key);
856 
857 	e = mlx5e_encap_get(priv, &key, hash_key);
858 
859 	/* must verify if encap is valid or not */
860 	if (e) {
861 		/* Check that entry was not already attached to this flow */
862 		if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
863 			err = -EOPNOTSUPP;
864 			goto out_err;
865 		}
866 
867 		goto attach_flow;
868 	}
869 
870 	e = kzalloc(sizeof(*e), GFP_KERNEL);
871 	if (!e) {
872 		err = -ENOMEM;
873 		goto out_err;
874 	}
875 
876 	refcount_set(&e->refcnt, 1);
877 	init_completion(&e->res_ready);
878 	entry_created = true;
879 	INIT_LIST_HEAD(&e->route_list);
880 
881 	tun_info = mlx5e_dup_tun_info(tun_info);
882 	if (!tun_info) {
883 		err = -ENOMEM;
884 		goto out_err_init;
885 	}
886 	e->tun_info = tun_info;
887 	memcpy(&e->mpls_info, mpls_info, sizeof(*mpls_info));
888 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
889 	if (err)
890 		goto out_err_init;
891 
892 	INIT_LIST_HEAD(&e->flows);
893 	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
894 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
895 
896 	if (family == AF_INET)
897 		err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
898 	else if (family == AF_INET6)
899 		err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
900 
901 	complete_all(&e->res_ready);
902 	if (err) {
903 		e->compl_result = err;
904 		goto out_err;
905 	}
906 	e->compl_result = 1;
907 
908 attach_flow:
909 	err = mlx5e_attach_encap_route(priv, flow, attr, e, entry_created,
910 				       tbl_time_before, out_index);
911 	if (err)
912 		goto out_err;
913 
914 	err = mlx5e_set_int_port_tunnel(priv, attr, e, out_index);
915 	if (err == -EOPNOTSUPP) {
916 		/* If device doesn't support int port offload,
917 		 * redirect to uplink vport.
918 		 */
919 		mlx5_core_dbg(priv->mdev, "attaching int port as encap dev not supported, using uplink\n");
920 		err = 0;
921 	} else if (err) {
922 		goto out_err;
923 	}
924 
925 	flow->encaps[out_index].e = e;
926 	list_add(&flow->encaps[out_index].list, &e->flows);
927 	flow->encaps[out_index].index = out_index;
928 	*encap_dev = e->out_dev;
929 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
930 		attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
931 		attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
932 	} else {
933 		flow_flag_set(flow, SLOW);
934 	}
935 
936 	return err;
937 
938 out_err:
939 	if (e)
940 		mlx5e_encap_put_locked(priv, e);
941 	return err;
942 
943 out_err_init:
944 	kfree(tun_info);
945 	kfree(e);
946 	return err;
947 }
948 
mlx5e_attach_decap(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct netlink_ext_ack * extack)949 int mlx5e_attach_decap(struct mlx5e_priv *priv,
950 		       struct mlx5e_tc_flow *flow,
951 		       struct netlink_ext_ack *extack)
952 {
953 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
954 	struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
955 	struct mlx5_pkt_reformat_params reformat_params;
956 	struct mlx5e_decap_entry *d;
957 	struct mlx5e_decap_key key;
958 	uintptr_t hash_key;
959 	int err = 0;
960 
961 	if (sizeof(attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
962 		NL_SET_ERR_MSG_MOD(extack,
963 				   "encap header larger than max supported");
964 		return -EOPNOTSUPP;
965 	}
966 
967 	key.key = attr->eth;
968 	hash_key = hash_decap_info(&key);
969 	mutex_lock(&esw->offloads.decap_tbl_lock);
970 	d = mlx5e_decap_get(priv, &key, hash_key);
971 	if (d) {
972 		mutex_unlock(&esw->offloads.decap_tbl_lock);
973 		wait_for_completion(&d->res_ready);
974 		mutex_lock(&esw->offloads.decap_tbl_lock);
975 		if (d->compl_result) {
976 			err = -EREMOTEIO;
977 			goto out_free;
978 		}
979 		goto found;
980 	}
981 
982 	d = kzalloc(sizeof(*d), GFP_KERNEL);
983 	if (!d) {
984 		err = -ENOMEM;
985 		goto out_err;
986 	}
987 
988 	d->key = key;
989 	refcount_set(&d->refcnt, 1);
990 	init_completion(&d->res_ready);
991 	INIT_LIST_HEAD(&d->flows);
992 	hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
993 	mutex_unlock(&esw->offloads.decap_tbl_lock);
994 
995 	memset(&reformat_params, 0, sizeof(reformat_params));
996 	reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
997 	reformat_params.size = sizeof(attr->eth);
998 	reformat_params.data = &attr->eth;
999 	d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
1000 						     &reformat_params,
1001 						     MLX5_FLOW_NAMESPACE_FDB);
1002 	if (IS_ERR(d->pkt_reformat)) {
1003 		err = PTR_ERR(d->pkt_reformat);
1004 		d->compl_result = err;
1005 	}
1006 	mutex_lock(&esw->offloads.decap_tbl_lock);
1007 	complete_all(&d->res_ready);
1008 	if (err)
1009 		goto out_free;
1010 
1011 found:
1012 	flow->decap_reformat = d;
1013 	attr->decap_pkt_reformat = d->pkt_reformat;
1014 	list_add(&flow->l3_to_l2_reformat, &d->flows);
1015 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1016 	return 0;
1017 
1018 out_free:
1019 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1020 	mlx5e_decap_put(priv, d);
1021 	return err;
1022 
1023 out_err:
1024 	mutex_unlock(&esw->offloads.decap_tbl_lock);
1025 	return err;
1026 }
1027 
mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct netlink_ext_ack * extack,bool * vf_tun)1028 int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
1029 				 struct mlx5e_tc_flow *flow,
1030 				 struct mlx5_flow_attr *attr,
1031 				 struct netlink_ext_ack *extack,
1032 				 bool *vf_tun)
1033 {
1034 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1035 	struct mlx5_esw_flow_attr *esw_attr;
1036 	struct net_device *encap_dev = NULL;
1037 	struct mlx5e_rep_priv *rpriv;
1038 	struct mlx5e_priv *out_priv;
1039 	struct mlx5_eswitch *esw;
1040 	int out_index;
1041 	int err = 0;
1042 
1043 	parse_attr = attr->parse_attr;
1044 	esw_attr = attr->esw_attr;
1045 	*vf_tun = false;
1046 
1047 	esw = priv->mdev->priv.eswitch;
1048 	mutex_lock(&esw->offloads.encap_tbl_lock);
1049 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1050 		struct net_device *out_dev;
1051 		int mirred_ifindex;
1052 
1053 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1054 			continue;
1055 
1056 		mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1057 		out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1058 		if (!out_dev) {
1059 			NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1060 			err = -ENODEV;
1061 			goto out;
1062 		}
1063 		err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1064 					 extack, &encap_dev);
1065 		dev_put(out_dev);
1066 		if (err)
1067 			goto out;
1068 
1069 		if (esw_attr->dests[out_index].flags &
1070 		    MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1071 		    !esw_attr->dest_int_port)
1072 			*vf_tun = true;
1073 
1074 		out_priv = netdev_priv(encap_dev);
1075 		rpriv = out_priv->ppriv;
1076 		esw_attr->dests[out_index].vport_valid = true;
1077 		esw_attr->dests[out_index].vport = rpriv->rep->vport;
1078 		esw_attr->dests[out_index].mdev = out_priv->mdev;
1079 	}
1080 
1081 	if (*vf_tun && esw_attr->out_count > 1) {
1082 		NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1083 		err = -EOPNOTSUPP;
1084 		goto out;
1085 	}
1086 
1087 out:
1088 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1089 	return err;
1090 }
1091 
mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1092 void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv,
1093 				    struct mlx5e_tc_flow *flow,
1094 				    struct mlx5_flow_attr *attr)
1095 {
1096 	struct mlx5_esw_flow_attr *esw_attr;
1097 	int out_index;
1098 
1099 	if (!mlx5e_is_eswitch_flow(flow))
1100 		return;
1101 
1102 	esw_attr = attr->esw_attr;
1103 
1104 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1105 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1106 			continue;
1107 
1108 		mlx5e_detach_encap(flow->priv, flow, attr, out_index);
1109 		kfree(attr->parse_attr->tun_info[out_index]);
1110 	}
1111 }
1112 
cmp_route_info(struct mlx5e_route_key * a,struct mlx5e_route_key * b)1113 static int cmp_route_info(struct mlx5e_route_key *a,
1114 			  struct mlx5e_route_key *b)
1115 {
1116 	if (a->ip_version == 4 && b->ip_version == 4)
1117 		return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
1118 			      sizeof(a->endpoint_ip.v4));
1119 	else if (a->ip_version == 6 && b->ip_version == 6)
1120 		return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
1121 			      sizeof(a->endpoint_ip.v6));
1122 	return 1;
1123 }
1124 
hash_route_info(struct mlx5e_route_key * key)1125 static u32 hash_route_info(struct mlx5e_route_key *key)
1126 {
1127 	if (key->ip_version == 4)
1128 		return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
1129 	return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
1130 }
1131 
mlx5e_route_dealloc(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1132 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
1133 				struct mlx5e_route_entry *r)
1134 {
1135 	WARN_ON(!list_empty(&r->decap_flows));
1136 	WARN_ON(!list_empty(&r->encap_entries));
1137 
1138 	kfree_rcu(r, rcu);
1139 }
1140 
mlx5e_route_put(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1141 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1142 {
1143 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1144 
1145 	if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
1146 		return;
1147 
1148 	hash_del_rcu(&r->hlist);
1149 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1150 
1151 	mlx5e_route_dealloc(priv, r);
1152 }
1153 
mlx5e_route_put_locked(struct mlx5e_priv * priv,struct mlx5e_route_entry * r)1154 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
1155 {
1156 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1157 
1158 	lockdep_assert_held(&esw->offloads.encap_tbl_lock);
1159 
1160 	if (!refcount_dec_and_test(&r->refcnt))
1161 		return;
1162 	hash_del_rcu(&r->hlist);
1163 	mlx5e_route_dealloc(priv, r);
1164 }
1165 
1166 static struct mlx5e_route_entry *
mlx5e_route_get(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key,u32 hash_key)1167 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
1168 		u32 hash_key)
1169 {
1170 	struct mlx5e_route_key r_key;
1171 	struct mlx5e_route_entry *r;
1172 
1173 	hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
1174 		r_key = r->key;
1175 		if (!cmp_route_info(&r_key, key) &&
1176 		    refcount_inc_not_zero(&r->refcnt))
1177 			return r;
1178 	}
1179 	return NULL;
1180 }
1181 
1182 static struct mlx5e_route_entry *
mlx5e_route_get_create(struct mlx5e_priv * priv,struct mlx5e_route_key * key,int tunnel_dev_index,unsigned long * route_tbl_change_time)1183 mlx5e_route_get_create(struct mlx5e_priv *priv,
1184 		       struct mlx5e_route_key *key,
1185 		       int tunnel_dev_index,
1186 		       unsigned long *route_tbl_change_time)
1187 {
1188 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1189 	struct mlx5_rep_uplink_priv *uplink_priv;
1190 	struct mlx5e_rep_priv *uplink_rpriv;
1191 	struct mlx5e_tc_tun_encap *encap;
1192 	struct mlx5e_route_entry *r;
1193 	u32 hash_key;
1194 
1195 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1196 	uplink_priv = &uplink_rpriv->uplink_priv;
1197 	encap = uplink_priv->encap;
1198 
1199 	hash_key = hash_route_info(key);
1200 	spin_lock_bh(&encap->route_lock);
1201 	r = mlx5e_route_get(encap, key, hash_key);
1202 	spin_unlock_bh(&encap->route_lock);
1203 	if (r) {
1204 		if (!mlx5e_route_entry_valid(r)) {
1205 			mlx5e_route_put_locked(priv, r);
1206 			return ERR_PTR(-EINVAL);
1207 		}
1208 		return r;
1209 	}
1210 
1211 	r = kzalloc(sizeof(*r), GFP_KERNEL);
1212 	if (!r)
1213 		return ERR_PTR(-ENOMEM);
1214 
1215 	r->key = *key;
1216 	r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1217 	r->tunnel_dev_index = tunnel_dev_index;
1218 	refcount_set(&r->refcnt, 1);
1219 	INIT_LIST_HEAD(&r->decap_flows);
1220 	INIT_LIST_HEAD(&r->encap_entries);
1221 
1222 	spin_lock_bh(&encap->route_lock);
1223 	*route_tbl_change_time = encap->route_tbl_last_update;
1224 	hash_add(encap->route_tbl, &r->hlist, hash_key);
1225 	spin_unlock_bh(&encap->route_lock);
1226 
1227 	return r;
1228 }
1229 
1230 static struct mlx5e_route_entry *
mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap * encap,struct mlx5e_route_key * key)1231 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1232 {
1233 	u32 hash_key = hash_route_info(key);
1234 	struct mlx5e_route_entry *r;
1235 
1236 	spin_lock_bh(&encap->route_lock);
1237 	encap->route_tbl_last_update = jiffies;
1238 	r = mlx5e_route_get(encap, key, hash_key);
1239 	spin_unlock_bh(&encap->route_lock);
1240 
1241 	return r;
1242 }
1243 
1244 struct mlx5e_tc_fib_event_data {
1245 	struct work_struct work;
1246 	unsigned long event;
1247 	struct mlx5e_route_entry *r;
1248 	struct net_device *ul_dev;
1249 };
1250 
1251 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1252 static struct mlx5e_tc_fib_event_data *
mlx5e_tc_init_fib_work(unsigned long event,struct net_device * ul_dev,gfp_t flags)1253 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1254 {
1255 	struct mlx5e_tc_fib_event_data *fib_work;
1256 
1257 	fib_work = kzalloc(sizeof(*fib_work), flags);
1258 	if (WARN_ON(!fib_work))
1259 		return NULL;
1260 
1261 	INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1262 	fib_work->event = event;
1263 	fib_work->ul_dev = ul_dev;
1264 
1265 	return fib_work;
1266 }
1267 
1268 static int
mlx5e_route_enqueue_update(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,unsigned long event)1269 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1270 			   struct mlx5e_route_entry *r,
1271 			   unsigned long event)
1272 {
1273 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1274 	struct mlx5e_tc_fib_event_data *fib_work;
1275 	struct mlx5e_rep_priv *uplink_rpriv;
1276 	struct net_device *ul_dev;
1277 
1278 	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1279 	ul_dev = uplink_rpriv->netdev;
1280 
1281 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1282 	if (!fib_work)
1283 		return -ENOMEM;
1284 
1285 	dev_hold(ul_dev);
1286 	refcount_inc(&r->refcnt);
1287 	fib_work->r = r;
1288 	queue_work(priv->wq, &fib_work->work);
1289 
1290 	return 0;
1291 }
1292 
mlx5e_attach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1293 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1294 			     struct mlx5e_tc_flow *flow)
1295 {
1296 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1297 	unsigned long tbl_time_before, tbl_time_after;
1298 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1299 	struct mlx5_flow_attr *attr = flow->attr;
1300 	struct mlx5_esw_flow_attr *esw_attr;
1301 	struct mlx5e_route_entry *r;
1302 	struct mlx5e_route_key key;
1303 	int err = 0;
1304 
1305 	esw_attr = attr->esw_attr;
1306 	parse_attr = attr->parse_attr;
1307 	mutex_lock(&esw->offloads.encap_tbl_lock);
1308 	if (!esw_attr->rx_tun_attr)
1309 		goto out;
1310 
1311 	tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1312 	tbl_time_after = tbl_time_before;
1313 	err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr, parse_attr->filter_dev);
1314 	if (err || !esw_attr->rx_tun_attr->decap_vport)
1315 		goto out;
1316 
1317 	key.ip_version = attr->tun_ip_version;
1318 	if (key.ip_version == 4)
1319 		key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1320 	else
1321 		key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1322 
1323 	r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1324 				   &tbl_time_after);
1325 	if (IS_ERR(r)) {
1326 		err = PTR_ERR(r);
1327 		goto out;
1328 	}
1329 	/* Routing changed concurrently. FIB event handler might have missed new
1330 	 * entry, schedule update.
1331 	 */
1332 	if (tbl_time_before != tbl_time_after) {
1333 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1334 		if (err) {
1335 			mlx5e_route_put_locked(priv, r);
1336 			goto out;
1337 		}
1338 	}
1339 
1340 	flow->decap_route = r;
1341 	list_add(&flow->decap_routes, &r->decap_flows);
1342 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1343 	return 0;
1344 
1345 out:
1346 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1347 	return err;
1348 }
1349 
mlx5e_attach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr,struct mlx5e_encap_entry * e,bool new_encap_entry,unsigned long tbl_time_before,int out_index)1350 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1351 				    struct mlx5e_tc_flow *flow,
1352 				    struct mlx5_flow_attr *attr,
1353 				    struct mlx5e_encap_entry *e,
1354 				    bool new_encap_entry,
1355 				    unsigned long tbl_time_before,
1356 				    int out_index)
1357 {
1358 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1359 	unsigned long tbl_time_after = tbl_time_before;
1360 	struct mlx5e_tc_flow_parse_attr *parse_attr;
1361 	const struct ip_tunnel_info *tun_info;
1362 	struct mlx5_esw_flow_attr *esw_attr;
1363 	struct mlx5e_route_entry *r;
1364 	struct mlx5e_route_key key;
1365 	unsigned short family;
1366 	int err = 0;
1367 
1368 	esw_attr = attr->esw_attr;
1369 	parse_attr = attr->parse_attr;
1370 	tun_info = parse_attr->tun_info[out_index];
1371 	family = ip_tunnel_info_af(tun_info);
1372 
1373 	if (family == AF_INET) {
1374 		key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1375 		key.ip_version = 4;
1376 	} else if (family == AF_INET6) {
1377 		key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1378 		key.ip_version = 6;
1379 	}
1380 
1381 	err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1382 				  e->route_dev_ifindex, out_index);
1383 	if (err || !(esw_attr->dests[out_index].flags &
1384 		     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1385 		return err;
1386 
1387 	r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1388 				   &tbl_time_after);
1389 	if (IS_ERR(r))
1390 		return PTR_ERR(r);
1391 	/* Routing changed concurrently. FIB event handler might have missed new
1392 	 * entry, schedule update.
1393 	 */
1394 	if (tbl_time_before != tbl_time_after) {
1395 		err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1396 		if (err) {
1397 			mlx5e_route_put_locked(priv, r);
1398 			return err;
1399 		}
1400 	}
1401 
1402 	flow->encap_routes[out_index].r = r;
1403 	if (new_encap_entry)
1404 		list_add(&e->route_list, &r->encap_entries);
1405 	flow->encap_routes[out_index].index = out_index;
1406 	return 0;
1407 }
1408 
mlx5e_detach_decap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow)1409 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1410 			      struct mlx5e_tc_flow *flow)
1411 {
1412 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1413 	struct mlx5e_route_entry *r = flow->decap_route;
1414 
1415 	if (!r)
1416 		return;
1417 
1418 	mutex_lock(&esw->offloads.encap_tbl_lock);
1419 	list_del(&flow->decap_routes);
1420 	flow->decap_route = NULL;
1421 
1422 	if (!refcount_dec_and_test(&r->refcnt)) {
1423 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1424 		return;
1425 	}
1426 	hash_del_rcu(&r->hlist);
1427 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1428 
1429 	mlx5e_route_dealloc(priv, r);
1430 }
1431 
mlx5e_detach_encap_route(struct mlx5e_priv * priv,struct mlx5e_tc_flow * flow,int out_index)1432 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1433 				     struct mlx5e_tc_flow *flow,
1434 				     int out_index)
1435 {
1436 	struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1437 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1438 	struct mlx5e_encap_entry *e, *tmp;
1439 
1440 	if (!r)
1441 		return;
1442 
1443 	mutex_lock(&esw->offloads.encap_tbl_lock);
1444 	flow->encap_routes[out_index].r = NULL;
1445 
1446 	if (!refcount_dec_and_test(&r->refcnt)) {
1447 		mutex_unlock(&esw->offloads.encap_tbl_lock);
1448 		return;
1449 	}
1450 	list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1451 		list_del_init(&e->route_list);
1452 	hash_del_rcu(&r->hlist);
1453 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1454 
1455 	mlx5e_route_dealloc(priv, r);
1456 }
1457 
mlx5e_invalidate_encap(struct mlx5e_priv * priv,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1458 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1459 				   struct mlx5e_encap_entry *e,
1460 				   struct list_head *encap_flows)
1461 {
1462 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1463 	struct mlx5e_tc_flow *flow;
1464 
1465 	list_for_each_entry(flow, encap_flows, tmp_list) {
1466 		struct mlx5_esw_flow_attr *esw_attr;
1467 		struct mlx5_flow_attr *attr;
1468 
1469 		if (!mlx5e_is_offloaded_flow(flow))
1470 			continue;
1471 
1472 		attr = mlx5e_tc_get_encap_attr(flow);
1473 		esw_attr = attr->esw_attr;
1474 
1475 		if (flow_flag_test(flow, SLOW)) {
1476 			mlx5e_tc_unoffload_from_slow_path(esw, flow);
1477 		} else {
1478 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1479 			mlx5e_tc_unoffload_flow_post_acts(flow);
1480 		}
1481 
1482 		mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1483 		attr->modify_hdr = NULL;
1484 
1485 		esw_attr->dests[flow->tmp_entry_index].flags &=
1486 			~MLX5_ESW_DEST_ENCAP_VALID;
1487 		esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1488 	}
1489 
1490 	e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1491 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1492 		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1493 		mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1494 		e->pkt_reformat = NULL;
1495 	}
1496 }
1497 
mlx5e_reoffload_encap(struct mlx5e_priv * priv,struct net_device * tunnel_dev,struct mlx5e_encap_entry * e,struct list_head * encap_flows)1498 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1499 				  struct net_device *tunnel_dev,
1500 				  struct mlx5e_encap_entry *e,
1501 				  struct list_head *encap_flows)
1502 {
1503 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1504 	struct mlx5e_tc_flow *flow;
1505 	int err;
1506 
1507 	err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1508 		mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1509 		mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1510 	if (err)
1511 		mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1512 	e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1513 
1514 	list_for_each_entry(flow, encap_flows, tmp_list) {
1515 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1516 		struct mlx5_esw_flow_attr *esw_attr;
1517 		struct mlx5_flow_handle *rule;
1518 		struct mlx5_flow_attr *attr;
1519 		struct mlx5_flow_spec *spec;
1520 
1521 		if (flow_flag_test(flow, FAILED))
1522 			continue;
1523 
1524 		spec = &flow->attr->parse_attr->spec;
1525 
1526 		attr = mlx5e_tc_get_encap_attr(flow);
1527 		esw_attr = attr->esw_attr;
1528 		parse_attr = attr->parse_attr;
1529 
1530 		err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1531 					     e->out_dev, e->route_dev_ifindex,
1532 					     flow->tmp_entry_index);
1533 		if (err) {
1534 			mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1535 			continue;
1536 		}
1537 
1538 		err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1539 		if (err) {
1540 			mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1541 				       err);
1542 			continue;
1543 		}
1544 
1545 		if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1546 			esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1547 			esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1548 			if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1549 				goto offload_to_slow_path;
1550 
1551 			err = mlx5e_tc_offload_flow_post_acts(flow);
1552 			if (err) {
1553 				mlx5_core_warn(priv->mdev, "Failed to update flow post acts, %d\n",
1554 					       err);
1555 				goto offload_to_slow_path;
1556 			}
1557 
1558 			/* update from slow path rule to encap rule */
1559 			rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, flow->attr);
1560 			if (IS_ERR(rule)) {
1561 				mlx5e_tc_unoffload_flow_post_acts(flow);
1562 				err = PTR_ERR(rule);
1563 				mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1564 					       err);
1565 			} else {
1566 				flow->rule[0] = rule;
1567 			}
1568 		} else {
1569 offload_to_slow_path:
1570 			rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1571 			/* mark the flow's encap dest as non-valid */
1572 			esw_attr->dests[flow->tmp_entry_index].flags &=
1573 				~MLX5_ESW_DEST_ENCAP_VALID;
1574 
1575 			if (IS_ERR(rule)) {
1576 				err = PTR_ERR(rule);
1577 				mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1578 					       err);
1579 			} else {
1580 				flow->rule[0] = rule;
1581 			}
1582 		}
1583 		flow_flag_set(flow, OFFLOADED);
1584 	}
1585 }
1586 
mlx5e_update_route_encaps(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1587 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1588 				     struct mlx5e_route_entry *r,
1589 				     struct list_head *flow_list,
1590 				     bool replace)
1591 {
1592 	struct net_device *tunnel_dev;
1593 	struct mlx5e_encap_entry *e;
1594 
1595 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1596 	if (!tunnel_dev)
1597 		return -ENODEV;
1598 
1599 	list_for_each_entry(e, &r->encap_entries, route_list) {
1600 		LIST_HEAD(encap_flows);
1601 
1602 		mlx5e_take_all_encap_flows(e, &encap_flows);
1603 		if (list_empty(&encap_flows))
1604 			continue;
1605 
1606 		if (mlx5e_route_entry_valid(r))
1607 			mlx5e_invalidate_encap(priv, e, &encap_flows);
1608 
1609 		if (!replace) {
1610 			list_splice(&encap_flows, flow_list);
1611 			continue;
1612 		}
1613 
1614 		mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1615 		list_splice(&encap_flows, flow_list);
1616 	}
1617 
1618 	return 0;
1619 }
1620 
mlx5e_unoffload_flow_list(struct mlx5e_priv * priv,struct list_head * flow_list)1621 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1622 				      struct list_head *flow_list)
1623 {
1624 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1625 	struct mlx5e_tc_flow *flow;
1626 
1627 	list_for_each_entry(flow, flow_list, tmp_list)
1628 		if (mlx5e_is_offloaded_flow(flow))
1629 			mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1630 }
1631 
mlx5e_reoffload_decap(struct mlx5e_priv * priv,struct list_head * decap_flows)1632 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1633 				  struct list_head *decap_flows)
1634 {
1635 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1636 	struct mlx5e_tc_flow *flow;
1637 
1638 	list_for_each_entry(flow, decap_flows, tmp_list) {
1639 		struct mlx5e_tc_flow_parse_attr *parse_attr;
1640 		struct mlx5_flow_attr *attr = flow->attr;
1641 		struct mlx5_flow_handle *rule;
1642 		struct mlx5_flow_spec *spec;
1643 		int err;
1644 
1645 		if (flow_flag_test(flow, FAILED))
1646 			continue;
1647 
1648 		parse_attr = attr->parse_attr;
1649 		spec = &parse_attr->spec;
1650 		err = mlx5e_tc_tun_route_lookup(priv, spec, attr, parse_attr->filter_dev);
1651 		if (err) {
1652 			mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1653 				       err);
1654 			continue;
1655 		}
1656 
1657 		rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1658 		if (IS_ERR(rule)) {
1659 			err = PTR_ERR(rule);
1660 			mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1661 				       err);
1662 		} else {
1663 			flow->rule[0] = rule;
1664 			flow_flag_set(flow, OFFLOADED);
1665 		}
1666 	}
1667 }
1668 
mlx5e_update_route_decap_flows(struct mlx5e_priv * priv,struct mlx5e_route_entry * r,struct list_head * flow_list,bool replace)1669 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1670 					  struct mlx5e_route_entry *r,
1671 					  struct list_head *flow_list,
1672 					  bool replace)
1673 {
1674 	struct net_device *tunnel_dev;
1675 	LIST_HEAD(decap_flows);
1676 
1677 	tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1678 	if (!tunnel_dev)
1679 		return -ENODEV;
1680 
1681 	mlx5e_take_all_route_decap_flows(r, &decap_flows);
1682 	if (mlx5e_route_entry_valid(r))
1683 		mlx5e_unoffload_flow_list(priv, &decap_flows);
1684 	if (replace)
1685 		mlx5e_reoffload_decap(priv, &decap_flows);
1686 
1687 	list_splice(&decap_flows, flow_list);
1688 
1689 	return 0;
1690 }
1691 
mlx5e_tc_fib_event_work(struct work_struct * work)1692 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1693 {
1694 	struct mlx5e_tc_fib_event_data *event_data =
1695 		container_of(work, struct mlx5e_tc_fib_event_data, work);
1696 	struct net_device *ul_dev = event_data->ul_dev;
1697 	struct mlx5e_priv *priv = netdev_priv(ul_dev);
1698 	struct mlx5e_route_entry *r = event_data->r;
1699 	struct mlx5_eswitch *esw;
1700 	LIST_HEAD(flow_list);
1701 	bool replace;
1702 	int err;
1703 
1704 	/* sync with concurrent neigh updates */
1705 	rtnl_lock();
1706 	esw = priv->mdev->priv.eswitch;
1707 	mutex_lock(&esw->offloads.encap_tbl_lock);
1708 	replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1709 
1710 	if (!mlx5e_route_entry_valid(r) && !replace)
1711 		goto out;
1712 
1713 	err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1714 	if (err)
1715 		mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1716 			       err);
1717 
1718 	err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1719 	if (err)
1720 		mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1721 			       err);
1722 
1723 	if (replace)
1724 		r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1725 out:
1726 	mutex_unlock(&esw->offloads.encap_tbl_lock);
1727 	rtnl_unlock();
1728 
1729 	mlx5e_put_flow_list(priv, &flow_list);
1730 	mlx5e_route_put(priv, event_data->r);
1731 	dev_put(event_data->ul_dev);
1732 	kfree(event_data);
1733 }
1734 
1735 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv4(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1736 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1737 			 struct net_device *ul_dev,
1738 			 struct mlx5e_tc_tun_encap *encap,
1739 			 unsigned long event,
1740 			 struct fib_notifier_info *info)
1741 {
1742 	struct fib_entry_notifier_info *fen_info;
1743 	struct mlx5e_tc_fib_event_data *fib_work;
1744 	struct mlx5e_route_entry *r;
1745 	struct mlx5e_route_key key;
1746 	struct net_device *fib_dev;
1747 
1748 	fen_info = container_of(info, struct fib_entry_notifier_info, info);
1749 	if (fen_info->fi->nh)
1750 		return NULL;
1751 	fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1752 	if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1753 	    fen_info->dst_len != 32)
1754 		return NULL;
1755 
1756 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1757 	if (!fib_work)
1758 		return ERR_PTR(-ENOMEM);
1759 
1760 	key.endpoint_ip.v4 = htonl(fen_info->dst);
1761 	key.ip_version = 4;
1762 
1763 	/* Can't fail after this point because releasing reference to r
1764 	 * requires obtaining sleeping mutex which we can't do in atomic
1765 	 * context.
1766 	 */
1767 	r = mlx5e_route_lookup_for_update(encap, &key);
1768 	if (!r)
1769 		goto out;
1770 	fib_work->r = r;
1771 	dev_hold(ul_dev);
1772 
1773 	return fib_work;
1774 
1775 out:
1776 	kfree(fib_work);
1777 	return NULL;
1778 }
1779 
1780 static struct mlx5e_tc_fib_event_data *
mlx5e_init_fib_work_ipv6(struct mlx5e_priv * priv,struct net_device * ul_dev,struct mlx5e_tc_tun_encap * encap,unsigned long event,struct fib_notifier_info * info)1781 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1782 			 struct net_device *ul_dev,
1783 			 struct mlx5e_tc_tun_encap *encap,
1784 			 unsigned long event,
1785 			 struct fib_notifier_info *info)
1786 {
1787 	struct fib6_entry_notifier_info *fen_info;
1788 	struct mlx5e_tc_fib_event_data *fib_work;
1789 	struct mlx5e_route_entry *r;
1790 	struct mlx5e_route_key key;
1791 	struct net_device *fib_dev;
1792 
1793 	fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1794 	fib_dev = fib6_info_nh_dev(fen_info->rt);
1795 	if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1796 	    fen_info->rt->fib6_dst.plen != 128)
1797 		return NULL;
1798 
1799 	fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1800 	if (!fib_work)
1801 		return ERR_PTR(-ENOMEM);
1802 
1803 	memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1804 	       sizeof(fen_info->rt->fib6_dst.addr));
1805 	key.ip_version = 6;
1806 
1807 	/* Can't fail after this point because releasing reference to r
1808 	 * requires obtaining sleeping mutex which we can't do in atomic
1809 	 * context.
1810 	 */
1811 	r = mlx5e_route_lookup_for_update(encap, &key);
1812 	if (!r)
1813 		goto out;
1814 	fib_work->r = r;
1815 	dev_hold(ul_dev);
1816 
1817 	return fib_work;
1818 
1819 out:
1820 	kfree(fib_work);
1821 	return NULL;
1822 }
1823 
mlx5e_tc_tun_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)1824 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1825 {
1826 	struct mlx5e_tc_fib_event_data *fib_work;
1827 	struct fib_notifier_info *info = ptr;
1828 	struct mlx5e_tc_tun_encap *encap;
1829 	struct net_device *ul_dev;
1830 	struct mlx5e_priv *priv;
1831 
1832 	encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1833 	priv = encap->priv;
1834 	ul_dev = priv->netdev;
1835 	priv = netdev_priv(ul_dev);
1836 
1837 	switch (event) {
1838 	case FIB_EVENT_ENTRY_REPLACE:
1839 	case FIB_EVENT_ENTRY_DEL:
1840 		if (info->family == AF_INET)
1841 			fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1842 		else if (info->family == AF_INET6)
1843 			fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1844 		else
1845 			return NOTIFY_DONE;
1846 
1847 		if (!IS_ERR_OR_NULL(fib_work)) {
1848 			queue_work(priv->wq, &fib_work->work);
1849 		} else if (IS_ERR(fib_work)) {
1850 			NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1851 			mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1852 				       PTR_ERR(fib_work));
1853 		}
1854 
1855 		break;
1856 	default:
1857 		return NOTIFY_DONE;
1858 	}
1859 
1860 	return NOTIFY_DONE;
1861 }
1862 
mlx5e_tc_tun_init(struct mlx5e_priv * priv)1863 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1864 {
1865 	struct mlx5e_tc_tun_encap *encap;
1866 	int err;
1867 
1868 	encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1869 	if (!encap)
1870 		return ERR_PTR(-ENOMEM);
1871 
1872 	encap->priv = priv;
1873 	encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1874 	spin_lock_init(&encap->route_lock);
1875 	hash_init(encap->route_tbl);
1876 	err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1877 				    NULL, NULL);
1878 	if (err) {
1879 		kvfree(encap);
1880 		return ERR_PTR(err);
1881 	}
1882 
1883 	return encap;
1884 }
1885 
mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap * encap)1886 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1887 {
1888 	if (!encap)
1889 		return;
1890 
1891 	unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1892 	flush_workqueue(encap->priv->wq); /* flush fib event works */
1893 	kvfree(encap);
1894 }
1895