xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c (revision 68993ced0f618e36cf33388f1e50223e5e6e78cc)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  */
33 
34 #include <crypto/internal/geniv.h>
35 #include <crypto/aead.h>
36 #include <linux/inetdevice.h>
37 #include <linux/netdevice.h>
38 #include <net/netevent.h>
39 
40 #include "en.h"
41 #include "eswitch.h"
42 #include "ipsec.h"
43 #include "ipsec_rxtx.h"
44 #include "en_rep.h"
45 
46 #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
47 #define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
48 
to_ipsec_sa_entry(struct xfrm_state * x)49 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
50 {
51 	return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
52 }
53 
to_ipsec_pol_entry(struct xfrm_policy * x)54 static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x)
55 {
56 	return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle;
57 }
58 
mlx5e_ipsec_handle_sw_limits(struct work_struct * _work)59 static void mlx5e_ipsec_handle_sw_limits(struct work_struct *_work)
60 {
61 	struct mlx5e_ipsec_dwork *dwork =
62 		container_of(_work, struct mlx5e_ipsec_dwork, dwork.work);
63 	struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry;
64 	struct xfrm_state *x = sa_entry->x;
65 
66 	if (sa_entry->attrs.drop)
67 		return;
68 
69 	spin_lock_bh(&x->lock);
70 	if (x->km.state == XFRM_STATE_EXPIRED) {
71 		sa_entry->attrs.drop = true;
72 		spin_unlock_bh(&x->lock);
73 
74 		mlx5e_accel_ipsec_fs_modify(sa_entry);
75 		return;
76 	}
77 
78 	if (x->km.state != XFRM_STATE_VALID) {
79 		spin_unlock_bh(&x->lock);
80 		return;
81 	}
82 
83 	xfrm_state_check_expire(x);
84 	spin_unlock_bh(&x->lock);
85 
86 	queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork,
87 			   MLX5_IPSEC_RESCHED);
88 }
89 
mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry * sa_entry)90 static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
91 {
92 	struct xfrm_state *x = sa_entry->x;
93 	u32 seq_bottom = 0;
94 	u32 esn, esn_msb;
95 	u8 overlap;
96 
97 	switch (x->xso.dir) {
98 	case XFRM_DEV_OFFLOAD_IN:
99 		esn = x->replay_esn->seq;
100 		esn_msb = x->replay_esn->seq_hi;
101 		break;
102 	case XFRM_DEV_OFFLOAD_OUT:
103 		esn = x->replay_esn->oseq;
104 		esn_msb = x->replay_esn->oseq_hi;
105 		break;
106 	default:
107 		WARN_ON(true);
108 		return false;
109 	}
110 
111 	overlap = sa_entry->esn_state.overlap;
112 
113 	if (!x->replay_esn->replay_window) {
114 		seq_bottom = esn;
115 	} else {
116 		if (esn >= x->replay_esn->replay_window)
117 			seq_bottom = esn - x->replay_esn->replay_window + 1;
118 
119 		if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
120 			esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
121 	}
122 
123 	if (sa_entry->esn_state.esn_msb)
124 		sa_entry->esn_state.esn = esn;
125 	else
126 		/* According to RFC4303, section "3.3.3. Sequence Number Generation",
127 		 * the first packet sent using a given SA will contain a sequence
128 		 * number of 1.
129 		 */
130 		sa_entry->esn_state.esn = max_t(u32, esn, 1);
131 	sa_entry->esn_state.esn_msb = esn_msb;
132 
133 	if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
134 		sa_entry->esn_state.overlap = 0;
135 		return true;
136 	} else if (unlikely(!overlap &&
137 			    (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
138 		sa_entry->esn_state.overlap = 1;
139 		return true;
140 	}
141 
142 	return false;
143 }
144 
mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)145 static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
146 				    struct mlx5_accel_esp_xfrm_attrs *attrs)
147 {
148 	struct xfrm_state *x = sa_entry->x;
149 	s64 start_value, n;
150 
151 	attrs->lft.hard_packet_limit = x->lft.hard_packet_limit;
152 	attrs->lft.soft_packet_limit = x->lft.soft_packet_limit;
153 	if (x->lft.soft_packet_limit == XFRM_INF)
154 		return;
155 
156 	/* Compute hard limit initial value and number of rounds.
157 	 *
158 	 * The counting pattern of hardware counter goes:
159 	 *                value  -> 2^31-1
160 	 *      2^31  | (2^31-1) -> 2^31-1
161 	 *      2^31  | (2^31-1) -> 2^31-1
162 	 *      [..]
163 	 *      2^31  | (2^31-1) -> 0
164 	 *
165 	 * The pattern is created by using an ASO operation to atomically set
166 	 * bit 31 after the down counter clears bit 31. This is effectively an
167 	 * atomic addition of 2**31 to the counter.
168 	 *
169 	 * We wish to configure the counter, within the above pattern, so that
170 	 * when it reaches 0, it has hit the hard limit. This is defined by this
171 	 * system of equations:
172 	 *
173 	 *      hard_limit == start_value + n * 2^31
174 	 *      n >= 0
175 	 *      start_value < 2^32, start_value >= 0
176 	 *
177 	 * These equations are not single-solution, there are often two choices:
178 	 *      hard_limit == start_value + n * 2^31
179 	 *      hard_limit == (start_value+2^31) + (n-1) * 2^31
180 	 *
181 	 * The algorithm selects the solution that keeps the counter value
182 	 * above 2^31 until the final iteration.
183 	 */
184 
185 	/* Start by estimating n and compute start_value */
186 	n = attrs->lft.hard_packet_limit / BIT_ULL(31);
187 	start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
188 
189 	/* Choose the best of the two solutions: */
190 	if (n >= 1)
191 		n -= 1;
192 
193 	/* Computed values solve the system of equations: */
194 	start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
195 
196 	/* The best solution means: when there are multiple iterations we must
197 	 * start above 2^31 and count down to 2**31 to get the interrupt.
198 	 */
199 	attrs->lft.hard_packet_limit = lower_32_bits(start_value);
200 	attrs->lft.numb_rounds_hard = (u64)n;
201 
202 	/* Compute soft limit initial value and number of rounds.
203 	 *
204 	 * The soft_limit is achieved by adjusting the counter's
205 	 * interrupt_value. This is embedded in the counting pattern created by
206 	 * hard packet calculations above.
207 	 *
208 	 * We wish to compute the interrupt_value for the soft_limit. This is
209 	 * defined by this system of equations:
210 	 *
211 	 *      soft_limit == start_value - soft_value + n * 2^31
212 	 *      n >= 0
213 	 *      soft_value < 2^32, soft_value >= 0
214 	 *      for n == 0 start_value > soft_value
215 	 *
216 	 * As with compute_hard_n_value() the equations are not single-solution.
217 	 * The algorithm selects the solution that has:
218 	 *      2^30 <= soft_limit < 2^31 + 2^30
219 	 * for the interior iterations, which guarantees a large guard band
220 	 * around the counter hard limit and next interrupt.
221 	 */
222 
223 	/* Start by estimating n and compute soft_value */
224 	n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31);
225 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) -
226 		      x->lft.soft_packet_limit;
227 
228 	/* Compare against constraints and adjust n */
229 	if (n < 0)
230 		n = 0;
231 	else if (start_value >= BIT_ULL(32))
232 		n -= 1;
233 	else if (start_value < 0)
234 		n += 1;
235 
236 	/* Choose the best of the two solutions: */
237 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
238 	if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30))
239 		n += 1;
240 
241 	/* Note that the upper limit of soft_value happens naturally because we
242 	 * always select the lowest soft_value.
243 	 */
244 
245 	/* Computed values solve the system of equations: */
246 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
247 
248 	/* The best solution means: when there are multiple iterations we must
249 	 * not fall below 2^30 as that would get too close to the false
250 	 * hard_limit and when we reach an interior iteration for soft_limit it
251 	 * has to be far away from 2**32-1 which is the counter reset point
252 	 * after the +2^31 to accommodate latency.
253 	 */
254 	attrs->lft.soft_packet_limit = lower_32_bits(start_value);
255 	attrs->lft.numb_rounds_soft = (u64)n;
256 }
257 
mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)258 static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
259 				  struct mlx5_accel_esp_xfrm_attrs *attrs)
260 {
261 	struct mlx5e_ipsec_addr *addrs = &attrs->addrs;
262 	struct net_device *netdev = sa_entry->dev;
263 	struct xfrm_state *x = sa_entry->x;
264 	struct dst_entry *rt_dst_entry;
265 	struct flowi4 fl4 = {};
266 	struct flowi6 fl6 = {};
267 	struct neighbour *n;
268 	u8 addr[ETH_ALEN];
269 	struct rtable *rt;
270 	const void *pkey;
271 	u8 *dst, *src;
272 
273 	if (attrs->mode != XFRM_MODE_TUNNEL ||
274 	    attrs->type != XFRM_DEV_OFFLOAD_PACKET)
275 		return;
276 
277 	ether_addr_copy(addr, netdev->dev_addr);
278 	switch (attrs->dir) {
279 	case XFRM_DEV_OFFLOAD_IN:
280 		src = attrs->dmac;
281 		dst = attrs->smac;
282 
283 		switch (addrs->family) {
284 		case AF_INET:
285 			fl4.flowi4_proto = x->sel.proto;
286 			fl4.daddr = addrs->saddr.a4;
287 			fl4.saddr = addrs->daddr.a4;
288 			pkey = &addrs->saddr.a4;
289 			break;
290 		case AF_INET6:
291 			fl6.flowi6_proto = x->sel.proto;
292 			memcpy(fl6.daddr.s6_addr32, addrs->saddr.a6, 16);
293 			memcpy(fl6.saddr.s6_addr32, addrs->daddr.a6, 16);
294 			pkey = &addrs->saddr.a6;
295 			break;
296 		default:
297 			return;
298 		}
299 		break;
300 	case XFRM_DEV_OFFLOAD_OUT:
301 		src = attrs->smac;
302 		dst = attrs->dmac;
303 		switch (addrs->family) {
304 		case AF_INET:
305 			fl4.flowi4_proto = x->sel.proto;
306 			fl4.daddr = addrs->daddr.a4;
307 			fl4.saddr = addrs->saddr.a4;
308 			pkey = &addrs->daddr.a4;
309 			break;
310 		case AF_INET6:
311 			fl6.flowi6_proto = x->sel.proto;
312 			memcpy(fl6.daddr.s6_addr32, addrs->daddr.a6, 16);
313 			memcpy(fl6.saddr.s6_addr32, addrs->saddr.a6, 16);
314 			pkey = &addrs->daddr.a6;
315 			break;
316 		default:
317 			return;
318 		}
319 		break;
320 	default:
321 		return;
322 	}
323 
324 	ether_addr_copy(src, addr);
325 
326 	/* Destination can refer to a routed network, so perform FIB lookup
327 	 * to resolve nexthop and get its MAC. Neighbour resolution is used as
328 	 * fallback.
329 	 */
330 	switch (addrs->family) {
331 	case AF_INET:
332 		rt = ip_route_output_key(dev_net(netdev), &fl4);
333 		if (IS_ERR(rt))
334 			goto neigh;
335 
336 		if (rt->rt_type != RTN_UNICAST) {
337 			ip_rt_put(rt);
338 			goto neigh;
339 		}
340 		rt_dst_entry = &rt->dst;
341 		break;
342 	case AF_INET6:
343 		if (!IS_ENABLED(CONFIG_IPV6) ||
344 		    ip6_dst_lookup(dev_net(netdev), NULL, &rt_dst_entry, &fl6))
345 			goto neigh;
346 		break;
347 	default:
348 		return;
349 	}
350 
351 	n = dst_neigh_lookup(rt_dst_entry, pkey);
352 	if (!n) {
353 		dst_release(rt_dst_entry);
354 		goto neigh;
355 	}
356 
357 	neigh_ha_snapshot(addr, n, netdev);
358 	ether_addr_copy(dst, addr);
359 	if (attrs->dir == XFRM_DEV_OFFLOAD_OUT &&
360 	    is_zero_ether_addr(addr))
361 		neigh_event_send(n, NULL);
362 	dst_release(rt_dst_entry);
363 	neigh_release(n);
364 	return;
365 
366 neigh:
367 	n = neigh_lookup(&arp_tbl, pkey, netdev);
368 	if (!n) {
369 		n = neigh_create(&arp_tbl, pkey, netdev);
370 		if (IS_ERR(n))
371 			return;
372 		neigh_event_send(n, NULL);
373 		attrs->drop = true;
374 	} else {
375 		neigh_ha_snapshot(addr, n, netdev);
376 		ether_addr_copy(dst, addr);
377 	}
378 	neigh_release(n);
379 }
380 
mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr * addrs)381 static void mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr *addrs)
382 {
383 	/*
384 	 * State doesn't have subnet prefixes in outer headers.
385 	 * The match is performed for exaxt source/destination addresses.
386 	 */
387 	memset(addrs->smask.m6, 0xFF, sizeof(__be32) * 4);
388 	memset(addrs->dmask.m6, 0xFF, sizeof(__be32) * 4);
389 }
390 
mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)391 void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
392 					struct mlx5_accel_esp_xfrm_attrs *attrs)
393 {
394 	struct xfrm_state *x = sa_entry->x;
395 	struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
396 	struct aead_geniv_ctx *geniv_ctx;
397 	struct crypto_aead *aead;
398 	unsigned int crypto_data_len, key_len;
399 	int ivsize;
400 
401 	memset(attrs, 0, sizeof(*attrs));
402 
403 	/* key */
404 	crypto_data_len = (x->aead->alg_key_len + 7) / 8;
405 	key_len = crypto_data_len - 4; /* 4 bytes salt at end */
406 
407 	memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
408 	aes_gcm->key_len = key_len * 8;
409 
410 	/* salt and seq_iv */
411 	aead = x->data;
412 	geniv_ctx = crypto_aead_ctx(aead);
413 	ivsize = crypto_aead_ivsize(aead);
414 	memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
415 	memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
416 	       sizeof(aes_gcm->salt));
417 
418 	attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */
419 
420 	/* iv len */
421 	aes_gcm->icv_len = x->aead->alg_icv_len;
422 
423 	attrs->dir = x->xso.dir;
424 
425 	/* esn */
426 	if (x->props.flags & XFRM_STATE_ESN) {
427 		attrs->replay_esn.trigger = true;
428 		attrs->replay_esn.esn = sa_entry->esn_state.esn;
429 		attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
430 		attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
431 		if (attrs->dir == XFRM_DEV_OFFLOAD_OUT ||
432 		    x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
433 			goto skip_replay_window;
434 
435 		switch (x->replay_esn->replay_window) {
436 		case 32:
437 			attrs->replay_esn.replay_window =
438 				MLX5_IPSEC_ASO_REPLAY_WIN_32BIT;
439 			break;
440 		case 64:
441 			attrs->replay_esn.replay_window =
442 				MLX5_IPSEC_ASO_REPLAY_WIN_64BIT;
443 			break;
444 		case 128:
445 			attrs->replay_esn.replay_window =
446 				MLX5_IPSEC_ASO_REPLAY_WIN_128BIT;
447 			break;
448 		case 256:
449 			attrs->replay_esn.replay_window =
450 				MLX5_IPSEC_ASO_REPLAY_WIN_256BIT;
451 			break;
452 		default:
453 			WARN_ON(true);
454 			return;
455 		}
456 	}
457 
458 skip_replay_window:
459 	/* spi */
460 	attrs->spi = be32_to_cpu(x->id.spi);
461 
462 	/* source , destination ips */
463 	memcpy(&attrs->addrs.saddr, x->props.saddr.a6,
464 	       sizeof(attrs->addrs.saddr));
465 	memcpy(&attrs->addrs.daddr, x->id.daddr.a6, sizeof(attrs->addrs.daddr));
466 	attrs->addrs.family = x->props.family;
467 	mlx5e_ipsec_state_mask(&attrs->addrs);
468 	attrs->type = x->xso.type;
469 	attrs->reqid = x->props.reqid;
470 	attrs->upspec.dport = ntohs(x->sel.dport);
471 	attrs->upspec.dport_mask = ntohs(x->sel.dport_mask);
472 	attrs->upspec.sport = ntohs(x->sel.sport);
473 	attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
474 	attrs->upspec.proto = x->sel.proto;
475 	attrs->mode = x->props.mode;
476 
477 	mlx5e_ipsec_init_limits(sa_entry, attrs);
478 	mlx5e_ipsec_init_macs(sa_entry, attrs);
479 
480 	if (x->encap) {
481 		attrs->encap = true;
482 		attrs->sport = x->encap->encap_sport;
483 		attrs->dport = x->encap->encap_dport;
484 	}
485 }
486 
mlx5e_xfrm_validate_state(struct mlx5_core_dev * mdev,struct xfrm_state * x,struct netlink_ext_ack * extack)487 static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
488 				     struct xfrm_state *x,
489 				     struct netlink_ext_ack *extack)
490 {
491 	if (x->props.aalgo != SADB_AALG_NONE) {
492 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states");
493 		return -EINVAL;
494 	}
495 	if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
496 		NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded");
497 		return -EINVAL;
498 	}
499 	if (x->props.calgo != SADB_X_CALG_NONE) {
500 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states");
501 		return -EINVAL;
502 	}
503 	if (x->props.flags & XFRM_STATE_ESN &&
504 	    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) {
505 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states");
506 		return -EINVAL;
507 	}
508 	if (x->props.family != AF_INET &&
509 	    x->props.family != AF_INET6) {
510 		NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded");
511 		return -EINVAL;
512 	}
513 	if (x->id.proto != IPPROTO_ESP) {
514 		NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded");
515 		return -EINVAL;
516 	}
517 	if (x->encap) {
518 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) {
519 			NL_SET_ERR_MSG_MOD(extack,
520 					   "Encapsulation is not supported");
521 			return -EINVAL;
522 		}
523 
524 		if (x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
525 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation other than UDP is not supported");
526 			return -EINVAL;
527 		}
528 
529 		if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) {
530 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in packet offload mode only");
531 			return -EINVAL;
532 		}
533 
534 		if (x->props.mode != XFRM_MODE_TRANSPORT) {
535 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in transport mode only");
536 			return -EINVAL;
537 		}
538 	}
539 	if (!x->aead) {
540 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead");
541 		return -EINVAL;
542 	}
543 	if (x->aead->alg_icv_len != 128) {
544 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit");
545 		return -EINVAL;
546 	}
547 	if ((x->aead->alg_key_len != 128 + 32) &&
548 	    (x->aead->alg_key_len != 256 + 32)) {
549 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit");
550 		return -EINVAL;
551 	}
552 	if (x->tfcpad) {
553 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding");
554 		return -EINVAL;
555 	}
556 	if (!x->geniv) {
557 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv");
558 		return -EINVAL;
559 	}
560 	if (strcmp(x->geniv, "seqiv")) {
561 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv");
562 		return -EINVAL;
563 	}
564 
565 	if (x->sel.proto != IPPROTO_IP && x->sel.proto != IPPROTO_UDP &&
566 	    x->sel.proto != IPPROTO_TCP) {
567 		NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
568 		return -EINVAL;
569 	}
570 
571 	if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
572 		NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
573 		return -EINVAL;
574 	}
575 
576 	switch (x->xso.type) {
577 	case XFRM_DEV_OFFLOAD_CRYPTO:
578 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
579 			NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported");
580 			return -EINVAL;
581 		}
582 
583 		break;
584 	case XFRM_DEV_OFFLOAD_PACKET:
585 		if (!(mlx5_ipsec_device_caps(mdev) &
586 		      MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
587 			NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
588 			return -EINVAL;
589 		}
590 
591 		if (x->props.mode == XFRM_MODE_TUNNEL &&
592 		    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
593 			NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
594 			return -EINVAL;
595 		}
596 
597 		if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN &&
598 		    x->replay_esn->replay_window != 32 &&
599 		    x->replay_esn->replay_window != 64 &&
600 		    x->replay_esn->replay_window != 128 &&
601 		    x->replay_esn->replay_window != 256) {
602 			NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size");
603 			return -EINVAL;
604 		}
605 
606 		if (!x->props.reqid) {
607 			NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid");
608 			return -EINVAL;
609 		}
610 
611 		if (x->lft.soft_byte_limit >= x->lft.hard_byte_limit &&
612 		    x->lft.hard_byte_limit != XFRM_INF) {
613 			/* XFRM stack doesn't prevent such configuration :(. */
614 			NL_SET_ERR_MSG_MOD(extack, "Hard byte limit must be greater than soft one");
615 			return -EINVAL;
616 		}
617 
618 		if (!x->lft.soft_byte_limit || !x->lft.hard_byte_limit) {
619 			NL_SET_ERR_MSG_MOD(extack, "Soft/hard byte limits can't be 0");
620 			return -EINVAL;
621 		}
622 
623 		if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit &&
624 		    x->lft.hard_packet_limit != XFRM_INF) {
625 			/* XFRM stack doesn't prevent such configuration :(. */
626 			NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one");
627 			return -EINVAL;
628 		}
629 
630 		if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) {
631 			NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0");
632 			return -EINVAL;
633 		}
634 		break;
635 	default:
636 		NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
637 		return -EINVAL;
638 	}
639 	return 0;
640 }
641 
mlx5e_ipsec_modify_state(struct work_struct * _work)642 static void mlx5e_ipsec_modify_state(struct work_struct *_work)
643 {
644 	struct mlx5e_ipsec_work *work =
645 		container_of(_work, struct mlx5e_ipsec_work, work);
646 	struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
647 	struct mlx5_accel_esp_xfrm_attrs *attrs;
648 
649 	attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs;
650 
651 	mlx5_accel_esp_modify_xfrm(sa_entry, attrs);
652 }
653 
mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry * sa_entry)654 static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
655 {
656 	struct xfrm_state *x = sa_entry->x;
657 
658 	if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO ||
659 	    x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
660 		return;
661 
662 	if (x->props.flags & XFRM_STATE_ESN) {
663 		sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn;
664 		return;
665 	}
666 
667 	sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
668 }
669 
mlx5e_ipsec_handle_netdev_event(struct work_struct * _work)670 static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
671 {
672 	struct mlx5e_ipsec_work *work =
673 		container_of(_work, struct mlx5e_ipsec_work, work);
674 	struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
675 	struct mlx5e_ipsec_netevent_data *data = work->data;
676 	struct mlx5_accel_esp_xfrm_attrs *attrs;
677 
678 	attrs = &sa_entry->attrs;
679 
680 	switch (attrs->dir) {
681 	case XFRM_DEV_OFFLOAD_IN:
682 		ether_addr_copy(attrs->smac, data->addr);
683 		break;
684 	case XFRM_DEV_OFFLOAD_OUT:
685 		ether_addr_copy(attrs->dmac, data->addr);
686 		break;
687 	default:
688 		WARN_ON_ONCE(true);
689 	}
690 	attrs->drop = false;
691 	mlx5e_accel_ipsec_fs_modify(sa_entry);
692 }
693 
mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry * sa_entry)694 static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
695 {
696 	struct xfrm_state *x = sa_entry->x;
697 	struct mlx5e_ipsec_work *work;
698 	void *data = NULL;
699 
700 	switch (x->xso.type) {
701 	case XFRM_DEV_OFFLOAD_CRYPTO:
702 		if (!(x->props.flags & XFRM_STATE_ESN))
703 			return 0;
704 		break;
705 	case XFRM_DEV_OFFLOAD_PACKET:
706 		if (x->props.mode != XFRM_MODE_TUNNEL)
707 			return 0;
708 		break;
709 	default:
710 		break;
711 	}
712 
713 	work = kzalloc_obj(*work);
714 	if (!work)
715 		return -ENOMEM;
716 
717 	switch (x->xso.type) {
718 	case XFRM_DEV_OFFLOAD_CRYPTO:
719 		data = kzalloc_obj(*sa_entry);
720 		if (!data)
721 			goto free_work;
722 
723 		INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
724 		break;
725 	case XFRM_DEV_OFFLOAD_PACKET:
726 		data = kzalloc_obj(struct mlx5e_ipsec_netevent_data);
727 		if (!data)
728 			goto free_work;
729 
730 		INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
731 		break;
732 	default:
733 		break;
734 	}
735 
736 	work->data = data;
737 	work->sa_entry = sa_entry;
738 	sa_entry->work = work;
739 	return 0;
740 
741 free_work:
742 	kfree(work);
743 	return -ENOMEM;
744 }
745 
mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry * sa_entry)746 static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
747 {
748 	struct xfrm_state *x = sa_entry->x;
749 	struct mlx5e_ipsec_dwork *dwork;
750 
751 	if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
752 		return 0;
753 
754 	if (x->lft.soft_packet_limit == XFRM_INF &&
755 	    x->lft.hard_packet_limit == XFRM_INF &&
756 	    x->lft.soft_byte_limit == XFRM_INF &&
757 	    x->lft.hard_byte_limit == XFRM_INF)
758 		return 0;
759 
760 	dwork = kzalloc_obj(*dwork);
761 	if (!dwork)
762 		return -ENOMEM;
763 
764 	dwork->sa_entry = sa_entry;
765 	INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_sw_limits);
766 	sa_entry->dwork = dwork;
767 	return 0;
768 }
769 
mlx5e_xfrm_add_state(struct net_device * dev,struct xfrm_state * x,struct netlink_ext_ack * extack)770 static int mlx5e_xfrm_add_state(struct net_device *dev,
771 				struct xfrm_state *x,
772 				struct netlink_ext_ack *extack)
773 {
774 	struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
775 	bool allow_tunnel_mode = false;
776 	struct mlx5e_ipsec *ipsec;
777 	struct mlx5e_priv *priv;
778 	gfp_t gfp;
779 	int err;
780 
781 	priv = netdev_priv(dev);
782 	if (!priv->ipsec)
783 		return -EOPNOTSUPP;
784 
785 	ipsec = priv->ipsec;
786 	gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL;
787 	sa_entry = kzalloc_obj(*sa_entry, gfp);
788 	if (!sa_entry)
789 		return -ENOMEM;
790 
791 	sa_entry->x = x;
792 	sa_entry->dev = dev;
793 	sa_entry->ipsec = ipsec;
794 	/* Check if this SA is originated from acquire flow temporary SA */
795 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) {
796 		x->xso.offload_handle = (unsigned long)sa_entry;
797 		return 0;
798 	}
799 
800 	err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
801 	if (err)
802 		goto err_xfrm;
803 
804 	if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
805 		err = -EBUSY;
806 		goto err_xfrm;
807 	}
808 
809 	err = mlx5_eswitch_block_mode(priv->mdev);
810 	if (err)
811 		goto unblock_ipsec;
812 
813 	if (x->props.mode == XFRM_MODE_TUNNEL &&
814 	    x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
815 		allow_tunnel_mode = mlx5e_ipsec_fs_tunnel_allowed(sa_entry);
816 		if (!allow_tunnel_mode) {
817 			NL_SET_ERR_MSG_MOD(extack,
818 					   "Packet offload tunnel mode is disabled due to encap settings");
819 			err = -EINVAL;
820 			goto unblock_mode;
821 		}
822 	}
823 
824 	/* check esn */
825 	if (x->props.flags & XFRM_STATE_ESN)
826 		mlx5e_ipsec_update_esn_state(sa_entry);
827 	else
828 		/* According to RFC4303, section "3.3.3. Sequence Number Generation",
829 		 * the first packet sent using a given SA will contain a sequence
830 		 * number of 1.
831 		 */
832 		sa_entry->esn_state.esn = 1;
833 
834 	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
835 
836 	err = mlx5_ipsec_create_work(sa_entry);
837 	if (err)
838 		goto unblock_encap;
839 
840 	err = mlx5e_ipsec_create_dwork(sa_entry);
841 	if (err)
842 		goto release_work;
843 
844 	/* create hw context */
845 	err = mlx5_ipsec_create_sa_ctx(sa_entry);
846 	if (err)
847 		goto release_dwork;
848 
849 	err = mlx5e_accel_ipsec_fs_add_rule(sa_entry);
850 	if (err)
851 		goto err_hw_ctx;
852 
853 	/* We use *_bh() variant because xfrm_timer_handler(), which runs
854 	 * in softirq context, can reach our state delete logic and we need
855 	 * xa_erase_bh() there.
856 	 */
857 	err = xa_insert_bh(&ipsec->sadb, sa_entry->ipsec_obj_id, sa_entry,
858 			   GFP_KERNEL);
859 	if (err)
860 		goto err_add_rule;
861 
862 	mlx5e_ipsec_set_esn_ops(sa_entry);
863 
864 	if (sa_entry->dwork)
865 		queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
866 				   MLX5_IPSEC_RESCHED);
867 
868 	if (allow_tunnel_mode) {
869 		xa_lock_bh(&ipsec->sadb);
870 		__xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
871 			      MLX5E_IPSEC_TUNNEL_SA);
872 		xa_unlock_bh(&ipsec->sadb);
873 	}
874 
875 	x->xso.offload_handle = (unsigned long)sa_entry;
876 	if (allow_tunnel_mode)
877 		mlx5_eswitch_unblock_encap(priv->mdev);
878 
879 	mlx5_eswitch_unblock_mode(priv->mdev);
880 
881 	return 0;
882 
883 err_add_rule:
884 	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
885 err_hw_ctx:
886 	mlx5_ipsec_free_sa_ctx(sa_entry);
887 release_dwork:
888 	kfree(sa_entry->dwork);
889 release_work:
890 	if (sa_entry->work)
891 		kfree(sa_entry->work->data);
892 	kfree(sa_entry->work);
893 unblock_encap:
894 	if (allow_tunnel_mode)
895 		mlx5_eswitch_unblock_encap(priv->mdev);
896 unblock_mode:
897 	mlx5_eswitch_unblock_mode(priv->mdev);
898 unblock_ipsec:
899 	mlx5_eswitch_unblock_ipsec(priv->mdev);
900 err_xfrm:
901 	kfree(sa_entry);
902 	NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state");
903 	return err;
904 }
905 
mlx5e_xfrm_del_state(struct net_device * dev,struct xfrm_state * x)906 static void mlx5e_xfrm_del_state(struct net_device *dev, struct xfrm_state *x)
907 {
908 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
909 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
910 	struct mlx5e_ipsec_sa_entry *old;
911 
912 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
913 		return;
914 
915 	old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
916 	WARN_ON(old != sa_entry);
917 }
918 
mlx5e_xfrm_free_state(struct net_device * dev,struct xfrm_state * x)919 static void mlx5e_xfrm_free_state(struct net_device *dev, struct xfrm_state *x)
920 {
921 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
922 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
923 
924 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
925 		goto sa_entry_free;
926 
927 	if (sa_entry->work)
928 		cancel_work_sync(&sa_entry->work->work);
929 
930 	if (sa_entry->dwork)
931 		cancel_delayed_work_sync(&sa_entry->dwork->dwork);
932 
933 	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
934 	mlx5_ipsec_free_sa_ctx(sa_entry);
935 	kfree(sa_entry->dwork);
936 	if (sa_entry->work)
937 		kfree(sa_entry->work->data);
938 	kfree(sa_entry->work);
939 	mlx5_eswitch_unblock_ipsec(ipsec->mdev);
940 sa_entry_free:
941 	kfree(sa_entry);
942 }
943 
mlx5e_ipsec_netevent_event(struct notifier_block * nb,unsigned long event,void * ptr)944 static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
945 				      unsigned long event, void *ptr)
946 {
947 	struct mlx5_accel_esp_xfrm_attrs *attrs;
948 	struct mlx5e_ipsec_netevent_data *data;
949 	struct mlx5e_ipsec_sa_entry *sa_entry;
950 	struct mlx5e_ipsec *ipsec;
951 	struct neighbour *n = ptr;
952 	unsigned long idx;
953 
954 	if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
955 		return NOTIFY_DONE;
956 
957 	ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
958 	xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
959 		attrs = &sa_entry->attrs;
960 
961 		if (attrs->addrs.family == AF_INET) {
962 			if (!neigh_key_eq32(n, &attrs->addrs.saddr.a4) &&
963 			    !neigh_key_eq32(n, &attrs->addrs.daddr.a4))
964 				continue;
965 		} else {
966 			if (!neigh_key_eq128(n, &attrs->addrs.saddr.a4) &&
967 			    !neigh_key_eq128(n, &attrs->addrs.daddr.a4))
968 				continue;
969 		}
970 
971 		data = sa_entry->work->data;
972 
973 		neigh_ha_snapshot(data->addr, n, sa_entry->dev);
974 		queue_work(ipsec->wq, &sa_entry->work->work);
975 	}
976 
977 	return NOTIFY_DONE;
978 }
979 
mlx5e_ipsec_init(struct mlx5e_priv * priv)980 void mlx5e_ipsec_init(struct mlx5e_priv *priv)
981 {
982 	struct mlx5e_ipsec *ipsec;
983 	int ret = -ENOMEM;
984 
985 	if (!mlx5_ipsec_device_caps(priv->mdev)) {
986 		netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
987 		return;
988 	}
989 
990 	ipsec = kzalloc_obj(*ipsec);
991 	if (!ipsec)
992 		return;
993 
994 	xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC);
995 	ipsec->mdev = priv->mdev;
996 	init_completion(&ipsec->comp);
997 	ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0,
998 				    priv->netdev->name);
999 	if (!ipsec->wq)
1000 		goto err_wq;
1001 
1002 	if (mlx5_ipsec_device_caps(priv->mdev) &
1003 	    MLX5_IPSEC_CAP_PACKET_OFFLOAD) {
1004 		ret = mlx5e_ipsec_aso_init(ipsec);
1005 		if (ret)
1006 			goto err_aso;
1007 	}
1008 
1009 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
1010 		ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
1011 		ret = register_netevent_notifier(&ipsec->netevent_nb);
1012 		if (ret)
1013 			goto clear_aso;
1014 	}
1015 
1016 	ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv);
1017 	ret = mlx5e_accel_ipsec_fs_init(ipsec, &priv->devcom);
1018 	if (ret)
1019 		goto err_fs_init;
1020 
1021 	ipsec->fs = priv->fs;
1022 	priv->ipsec = ipsec;
1023 	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
1024 	return;
1025 
1026 err_fs_init:
1027 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
1028 		unregister_netevent_notifier(&ipsec->netevent_nb);
1029 clear_aso:
1030 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
1031 		mlx5e_ipsec_aso_cleanup(ipsec);
1032 err_aso:
1033 	destroy_workqueue(ipsec->wq);
1034 err_wq:
1035 	kfree(ipsec);
1036 	mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret);
1037 	return;
1038 }
1039 
mlx5e_ipsec_cleanup(struct mlx5e_priv * priv)1040 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
1041 {
1042 	struct mlx5e_ipsec *ipsec = priv->ipsec;
1043 
1044 	if (!ipsec)
1045 		return;
1046 
1047 	mlx5e_accel_ipsec_fs_cleanup(ipsec);
1048 	if (ipsec->netevent_nb.notifier_call) {
1049 		unregister_netevent_notifier(&ipsec->netevent_nb);
1050 		ipsec->netevent_nb.notifier_call = NULL;
1051 	}
1052 	if (ipsec->aso)
1053 		mlx5e_ipsec_aso_cleanup(ipsec);
1054 	destroy_workqueue(ipsec->wq);
1055 	kfree(ipsec);
1056 	priv->ipsec = NULL;
1057 }
1058 
mlx5e_xfrm_advance_esn_state(struct xfrm_state * x)1059 static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
1060 {
1061 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
1062 	struct mlx5e_ipsec_work *work = sa_entry->work;
1063 	struct mlx5e_ipsec_sa_entry *sa_entry_shadow;
1064 	bool need_update;
1065 
1066 	need_update = mlx5e_ipsec_update_esn_state(sa_entry);
1067 	if (!need_update)
1068 		return;
1069 
1070 	sa_entry_shadow = work->data;
1071 	memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow));
1072 	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs);
1073 	queue_work(sa_entry->ipsec->wq, &work->work);
1074 }
1075 
mlx5e_xfrm_update_stats(struct xfrm_state * x)1076 static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
1077 {
1078 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
1079 	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
1080 	struct net *net = dev_net(x->xso.dev);
1081 	u64 trailer_packets = 0, trailer_bytes = 0;
1082 	u64 replay_packets = 0, replay_bytes = 0;
1083 	u64 auth_packets = 0, auth_bytes = 0;
1084 	u64 success_packets, success_bytes;
1085 	u64 packets, bytes, lastuse;
1086 	size_t headers;
1087 
1088 	lockdep_assert(lockdep_is_held(&x->lock) ||
1089 		       lockdep_is_held(&net->xfrm.xfrm_cfg_mutex) ||
1090 		       lockdep_is_held(&net->xfrm.xfrm_state_lock));
1091 
1092 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
1093 		return;
1094 
1095 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1096 		mlx5_fc_query_cached(ipsec_rule->auth.fc, &auth_bytes,
1097 				     &auth_packets, &lastuse);
1098 		x->stats.integrity_failed += auth_packets;
1099 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, auth_packets);
1100 
1101 		mlx5_fc_query_cached(ipsec_rule->trailer.fc, &trailer_bytes,
1102 				     &trailer_packets, &lastuse);
1103 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, trailer_packets);
1104 	}
1105 
1106 	if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1107 		return;
1108 
1109 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1110 		mlx5_fc_query_cached(ipsec_rule->replay.fc, &replay_bytes,
1111 				     &replay_packets, &lastuse);
1112 		x->stats.replay += replay_packets;
1113 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, replay_packets);
1114 	}
1115 
1116 	mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
1117 	success_packets = packets - auth_packets - trailer_packets - replay_packets;
1118 	x->curlft.packets += success_packets;
1119 	/* NIC counts all bytes passed through flow steering and doesn't have
1120 	 * an ability to count payload data size which is needed for SA.
1121 	 *
1122 	 * To overcome HW limitestion, let's approximate the payload size
1123 	 * by removing always available headers.
1124 	 */
1125 	headers = sizeof(struct ethhdr);
1126 	if (sa_entry->attrs.addrs.family == AF_INET)
1127 		headers += sizeof(struct iphdr);
1128 	else
1129 		headers += sizeof(struct ipv6hdr);
1130 
1131 	success_bytes = bytes - auth_bytes - trailer_bytes - replay_bytes;
1132 	x->curlft.bytes += success_bytes - headers * success_packets;
1133 }
1134 
word_to_mask(int prefix)1135 static __be32 word_to_mask(int prefix)
1136 {
1137 	if (prefix < 0)
1138 		return 0;
1139 
1140 	if (!prefix || prefix > 31)
1141 		return cpu_to_be32(0xFFFFFFFF);
1142 
1143 	return cpu_to_be32(((1U << prefix) - 1) << (32 - prefix));
1144 }
1145 
mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr * addrs,struct xfrm_selector * sel)1146 static void mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr *addrs,
1147 				    struct xfrm_selector *sel)
1148 {
1149 	int i;
1150 
1151 	if (addrs->family == AF_INET) {
1152 		addrs->smask.m4 = word_to_mask(sel->prefixlen_s);
1153 		addrs->saddr.a4 &= addrs->smask.m4;
1154 		addrs->dmask.m4 = word_to_mask(sel->prefixlen_d);
1155 		addrs->daddr.a4 &= addrs->dmask.m4;
1156 		return;
1157 	}
1158 
1159 	for (i = 0; i < 4; i++) {
1160 		if (sel->prefixlen_s != 32 * i)
1161 			addrs->smask.m6[i] =
1162 				word_to_mask(sel->prefixlen_s - 32 * i);
1163 		addrs->saddr.a6[i] &= addrs->smask.m6[i];
1164 
1165 		if (sel->prefixlen_d != 32 * i)
1166 			addrs->dmask.m6[i] =
1167 				word_to_mask(sel->prefixlen_d - 32 * i);
1168 		addrs->daddr.a6[i] &= addrs->dmask.m6[i];
1169 	}
1170 }
1171 
mlx5e_xfrm_validate_policy(struct mlx5_core_dev * mdev,struct xfrm_policy * x,struct netlink_ext_ack * extack)1172 static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
1173 				      struct xfrm_policy *x,
1174 				      struct netlink_ext_ack *extack)
1175 {
1176 	struct xfrm_selector *sel = &x->selector;
1177 
1178 	if (x->type != XFRM_POLICY_TYPE_MAIN) {
1179 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types");
1180 		return -EINVAL;
1181 	}
1182 
1183 	/* Please pay attention that we support only one template */
1184 	if (x->xfrm_nr > 1) {
1185 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template");
1186 		return -EINVAL;
1187 	}
1188 
1189 	if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN &&
1190 	    x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) {
1191 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy");
1192 		return -EINVAL;
1193 	}
1194 
1195 	if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP &&
1196 	    addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) {
1197 		NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0");
1198 		return -EINVAL;
1199 	}
1200 
1201 	if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) {
1202 		NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
1203 		return -EINVAL;
1204 	}
1205 
1206 	if (x->selector.proto != IPPROTO_IP &&
1207 	    x->selector.proto != IPPROTO_UDP &&
1208 	    x->selector.proto != IPPROTO_TCP) {
1209 		NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
1210 		return -EINVAL;
1211 	}
1212 
1213 	if (x->priority) {
1214 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) {
1215 			NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority");
1216 			return -EINVAL;
1217 		}
1218 
1219 		if (x->priority == U32_MAX) {
1220 			NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority");
1221 			return -EINVAL;
1222 		}
1223 	}
1224 
1225 	if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET &&
1226 	    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
1227 		NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
1228 		return -EINVAL;
1229 	}
1230 
1231 	return 0;
1232 }
1233 
1234 static void
mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry * pol_entry,struct mlx5_accel_pol_xfrm_attrs * attrs)1235 mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
1236 				  struct mlx5_accel_pol_xfrm_attrs *attrs)
1237 {
1238 	struct xfrm_policy *x = pol_entry->x;
1239 	struct xfrm_selector *sel;
1240 
1241 	sel = &x->selector;
1242 	memset(attrs, 0, sizeof(*attrs));
1243 
1244 	memcpy(&attrs->addrs.saddr, sel->saddr.a6, sizeof(attrs->addrs.saddr));
1245 	memcpy(&attrs->addrs.daddr, sel->daddr.a6, sizeof(attrs->addrs.daddr));
1246 	attrs->addrs.family = sel->family;
1247 	mlx5e_ipsec_policy_mask(&attrs->addrs, sel);
1248 	attrs->dir = x->xdo.dir;
1249 	attrs->action = x->action;
1250 	attrs->type = XFRM_DEV_OFFLOAD_PACKET;
1251 	attrs->reqid = x->xfrm_vec[0].reqid;
1252 	attrs->upspec.dport = ntohs(sel->dport);
1253 	attrs->upspec.dport_mask = ntohs(sel->dport_mask);
1254 	attrs->upspec.sport = ntohs(sel->sport);
1255 	attrs->upspec.sport_mask = ntohs(sel->sport_mask);
1256 	attrs->upspec.proto = sel->proto;
1257 	attrs->prio = x->priority;
1258 }
1259 
mlx5e_xfrm_add_policy(struct xfrm_policy * x,struct netlink_ext_ack * extack)1260 static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
1261 				 struct netlink_ext_ack *extack)
1262 {
1263 	struct net_device *netdev = x->xdo.dev;
1264 	struct mlx5e_ipsec_pol_entry *pol_entry;
1265 	struct mlx5e_priv *priv;
1266 	int err;
1267 
1268 	priv = netdev_priv(netdev);
1269 	if (!priv->ipsec) {
1270 		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload");
1271 		return -EOPNOTSUPP;
1272 	}
1273 
1274 	err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack);
1275 	if (err)
1276 		return err;
1277 
1278 	pol_entry = kzalloc_obj(*pol_entry);
1279 	if (!pol_entry)
1280 		return -ENOMEM;
1281 
1282 	pol_entry->x = x;
1283 	pol_entry->ipsec = priv->ipsec;
1284 
1285 	if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
1286 		err = -EBUSY;
1287 		goto ipsec_busy;
1288 	}
1289 
1290 	mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs);
1291 	err = mlx5e_accel_ipsec_fs_add_pol(pol_entry);
1292 	if (err)
1293 		goto err_fs;
1294 
1295 	x->xdo.offload_handle = (unsigned long)pol_entry;
1296 	return 0;
1297 
1298 err_fs:
1299 	mlx5_eswitch_unblock_ipsec(priv->mdev);
1300 ipsec_busy:
1301 	kfree(pol_entry);
1302 	NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy");
1303 	return err;
1304 }
1305 
mlx5e_xfrm_del_policy(struct xfrm_policy * x)1306 static void mlx5e_xfrm_del_policy(struct xfrm_policy *x)
1307 {
1308 	struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1309 
1310 	mlx5e_accel_ipsec_fs_del_pol(pol_entry);
1311 	mlx5_eswitch_unblock_ipsec(pol_entry->ipsec->mdev);
1312 }
1313 
mlx5e_xfrm_free_policy(struct xfrm_policy * x)1314 static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
1315 {
1316 	struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1317 
1318 	kfree(pol_entry);
1319 }
1320 
1321 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
1322 	.xdo_dev_state_add	= mlx5e_xfrm_add_state,
1323 	.xdo_dev_state_delete	= mlx5e_xfrm_del_state,
1324 	.xdo_dev_state_free	= mlx5e_xfrm_free_state,
1325 	.xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
1326 
1327 	.xdo_dev_state_update_stats = mlx5e_xfrm_update_stats,
1328 	.xdo_dev_policy_add = mlx5e_xfrm_add_policy,
1329 	.xdo_dev_policy_delete = mlx5e_xfrm_del_policy,
1330 	.xdo_dev_policy_free = mlx5e_xfrm_free_policy,
1331 };
1332 
mlx5e_ipsec_build_netdev(struct mlx5e_priv * priv)1333 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
1334 {
1335 	struct mlx5_core_dev *mdev = priv->mdev;
1336 	struct net_device *netdev = priv->netdev;
1337 
1338 	if (!mlx5_ipsec_device_caps(mdev))
1339 		return;
1340 
1341 	mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
1342 
1343 	netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
1344 	netdev->features |= NETIF_F_HW_ESP;
1345 	netdev->hw_enc_features |= NETIF_F_HW_ESP;
1346 
1347 	if (!MLX5_CAP_ETH(mdev, swp_csum)) {
1348 		mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
1349 		return;
1350 	}
1351 
1352 	netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
1353 	netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
1354 
1355 	if (!MLX5_CAP_ETH(mdev, swp_lso)) {
1356 		mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
1357 		return;
1358 	}
1359 
1360 	netdev->gso_partial_features |= NETIF_F_GSO_ESP;
1361 	mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
1362 	netdev->features |= NETIF_F_GSO_ESP;
1363 	netdev->hw_features |= NETIF_F_GSO_ESP;
1364 	netdev->hw_enc_features |= NETIF_F_GSO_ESP;
1365 }
1366