xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c (revision 1cac38910ecb881b09f61f57545a771bbe57ba68)
1 /*
2  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  */
33 
34 #include <crypto/internal/geniv.h>
35 #include <crypto/aead.h>
36 #include <linux/inetdevice.h>
37 #include <linux/netdevice.h>
38 #include <net/netevent.h>
39 #include <net/ipv6_stubs.h>
40 
41 #include "en.h"
42 #include "eswitch.h"
43 #include "ipsec.h"
44 #include "ipsec_rxtx.h"
45 #include "en_rep.h"
46 
47 #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
48 #define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
49 
to_ipsec_sa_entry(struct xfrm_state * x)50 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
51 {
52 	return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
53 }
54 
to_ipsec_pol_entry(struct xfrm_policy * x)55 static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x)
56 {
57 	return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle;
58 }
59 
mlx5e_ipsec_handle_sw_limits(struct work_struct * _work)60 static void mlx5e_ipsec_handle_sw_limits(struct work_struct *_work)
61 {
62 	struct mlx5e_ipsec_dwork *dwork =
63 		container_of(_work, struct mlx5e_ipsec_dwork, dwork.work);
64 	struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry;
65 	struct xfrm_state *x = sa_entry->x;
66 
67 	if (sa_entry->attrs.drop)
68 		return;
69 
70 	spin_lock_bh(&x->lock);
71 	if (x->km.state == XFRM_STATE_EXPIRED) {
72 		sa_entry->attrs.drop = true;
73 		spin_unlock_bh(&x->lock);
74 
75 		mlx5e_accel_ipsec_fs_modify(sa_entry);
76 		return;
77 	}
78 
79 	if (x->km.state != XFRM_STATE_VALID) {
80 		spin_unlock_bh(&x->lock);
81 		return;
82 	}
83 
84 	xfrm_state_check_expire(x);
85 	spin_unlock_bh(&x->lock);
86 
87 	queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork,
88 			   MLX5_IPSEC_RESCHED);
89 }
90 
mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry * sa_entry)91 static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
92 {
93 	struct xfrm_state *x = sa_entry->x;
94 	u32 seq_bottom = 0;
95 	u32 esn, esn_msb;
96 	u8 overlap;
97 
98 	switch (x->xso.dir) {
99 	case XFRM_DEV_OFFLOAD_IN:
100 		esn = x->replay_esn->seq;
101 		esn_msb = x->replay_esn->seq_hi;
102 		break;
103 	case XFRM_DEV_OFFLOAD_OUT:
104 		esn = x->replay_esn->oseq;
105 		esn_msb = x->replay_esn->oseq_hi;
106 		break;
107 	default:
108 		WARN_ON(true);
109 		return false;
110 	}
111 
112 	overlap = sa_entry->esn_state.overlap;
113 
114 	if (!x->replay_esn->replay_window) {
115 		seq_bottom = esn;
116 	} else {
117 		if (esn >= x->replay_esn->replay_window)
118 			seq_bottom = esn - x->replay_esn->replay_window + 1;
119 
120 		if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
121 			esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
122 	}
123 
124 	if (sa_entry->esn_state.esn_msb)
125 		sa_entry->esn_state.esn = esn;
126 	else
127 		/* According to RFC4303, section "3.3.3. Sequence Number Generation",
128 		 * the first packet sent using a given SA will contain a sequence
129 		 * number of 1.
130 		 */
131 		sa_entry->esn_state.esn = max_t(u32, esn, 1);
132 	sa_entry->esn_state.esn_msb = esn_msb;
133 
134 	if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
135 		sa_entry->esn_state.overlap = 0;
136 		return true;
137 	} else if (unlikely(!overlap &&
138 			    (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
139 		sa_entry->esn_state.overlap = 1;
140 		return true;
141 	}
142 
143 	return false;
144 }
145 
mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)146 static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
147 				    struct mlx5_accel_esp_xfrm_attrs *attrs)
148 {
149 	struct xfrm_state *x = sa_entry->x;
150 	s64 start_value, n;
151 
152 	attrs->lft.hard_packet_limit = x->lft.hard_packet_limit;
153 	attrs->lft.soft_packet_limit = x->lft.soft_packet_limit;
154 	if (x->lft.soft_packet_limit == XFRM_INF)
155 		return;
156 
157 	/* Compute hard limit initial value and number of rounds.
158 	 *
159 	 * The counting pattern of hardware counter goes:
160 	 *                value  -> 2^31-1
161 	 *      2^31  | (2^31-1) -> 2^31-1
162 	 *      2^31  | (2^31-1) -> 2^31-1
163 	 *      [..]
164 	 *      2^31  | (2^31-1) -> 0
165 	 *
166 	 * The pattern is created by using an ASO operation to atomically set
167 	 * bit 31 after the down counter clears bit 31. This is effectively an
168 	 * atomic addition of 2**31 to the counter.
169 	 *
170 	 * We wish to configure the counter, within the above pattern, so that
171 	 * when it reaches 0, it has hit the hard limit. This is defined by this
172 	 * system of equations:
173 	 *
174 	 *      hard_limit == start_value + n * 2^31
175 	 *      n >= 0
176 	 *      start_value < 2^32, start_value >= 0
177 	 *
178 	 * These equations are not single-solution, there are often two choices:
179 	 *      hard_limit == start_value + n * 2^31
180 	 *      hard_limit == (start_value+2^31) + (n-1) * 2^31
181 	 *
182 	 * The algorithm selects the solution that keeps the counter value
183 	 * above 2^31 until the final iteration.
184 	 */
185 
186 	/* Start by estimating n and compute start_value */
187 	n = attrs->lft.hard_packet_limit / BIT_ULL(31);
188 	start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
189 
190 	/* Choose the best of the two solutions: */
191 	if (n >= 1)
192 		n -= 1;
193 
194 	/* Computed values solve the system of equations: */
195 	start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
196 
197 	/* The best solution means: when there are multiple iterations we must
198 	 * start above 2^31 and count down to 2**31 to get the interrupt.
199 	 */
200 	attrs->lft.hard_packet_limit = lower_32_bits(start_value);
201 	attrs->lft.numb_rounds_hard = (u64)n;
202 
203 	/* Compute soft limit initial value and number of rounds.
204 	 *
205 	 * The soft_limit is achieved by adjusting the counter's
206 	 * interrupt_value. This is embedded in the counting pattern created by
207 	 * hard packet calculations above.
208 	 *
209 	 * We wish to compute the interrupt_value for the soft_limit. This is
210 	 * defined by this system of equations:
211 	 *
212 	 *      soft_limit == start_value - soft_value + n * 2^31
213 	 *      n >= 0
214 	 *      soft_value < 2^32, soft_value >= 0
215 	 *      for n == 0 start_value > soft_value
216 	 *
217 	 * As with compute_hard_n_value() the equations are not single-solution.
218 	 * The algorithm selects the solution that has:
219 	 *      2^30 <= soft_limit < 2^31 + 2^30
220 	 * for the interior iterations, which guarantees a large guard band
221 	 * around the counter hard limit and next interrupt.
222 	 */
223 
224 	/* Start by estimating n and compute soft_value */
225 	n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31);
226 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) -
227 		      x->lft.soft_packet_limit;
228 
229 	/* Compare against constraints and adjust n */
230 	if (n < 0)
231 		n = 0;
232 	else if (start_value >= BIT_ULL(32))
233 		n -= 1;
234 	else if (start_value < 0)
235 		n += 1;
236 
237 	/* Choose the best of the two solutions: */
238 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
239 	if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30))
240 		n += 1;
241 
242 	/* Note that the upper limit of soft_value happens naturally because we
243 	 * always select the lowest soft_value.
244 	 */
245 
246 	/* Computed values solve the system of equations: */
247 	start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
248 
249 	/* The best solution means: when there are multiple iterations we must
250 	 * not fall below 2^30 as that would get too close to the false
251 	 * hard_limit and when we reach an interior iteration for soft_limit it
252 	 * has to be far away from 2**32-1 which is the counter reset point
253 	 * after the +2^31 to accommodate latency.
254 	 */
255 	attrs->lft.soft_packet_limit = lower_32_bits(start_value);
256 	attrs->lft.numb_rounds_soft = (u64)n;
257 }
258 
mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)259 static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
260 				  struct mlx5_accel_esp_xfrm_attrs *attrs)
261 {
262 	struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
263 	struct mlx5e_ipsec_addr *addrs = &attrs->addrs;
264 	struct net_device *netdev = sa_entry->dev;
265 	struct xfrm_state *x = sa_entry->x;
266 	struct dst_entry *rt_dst_entry;
267 	struct flowi4 fl4 = {};
268 	struct flowi6 fl6 = {};
269 	struct neighbour *n;
270 	u8 addr[ETH_ALEN];
271 	struct rtable *rt;
272 	const void *pkey;
273 	u8 *dst, *src;
274 
275 	if (attrs->mode != XFRM_MODE_TUNNEL ||
276 	    attrs->type != XFRM_DEV_OFFLOAD_PACKET)
277 		return;
278 
279 	mlx5_query_mac_address(mdev, addr);
280 	switch (attrs->dir) {
281 	case XFRM_DEV_OFFLOAD_IN:
282 		src = attrs->dmac;
283 		dst = attrs->smac;
284 
285 		switch (addrs->family) {
286 		case AF_INET:
287 			fl4.flowi4_proto = x->sel.proto;
288 			fl4.daddr = addrs->saddr.a4;
289 			fl4.saddr = addrs->daddr.a4;
290 			pkey = &addrs->saddr.a4;
291 			break;
292 		case AF_INET6:
293 			fl6.flowi6_proto = x->sel.proto;
294 			memcpy(fl6.daddr.s6_addr32, addrs->saddr.a6, 16);
295 			memcpy(fl6.saddr.s6_addr32, addrs->daddr.a6, 16);
296 			pkey = &addrs->saddr.a6;
297 			break;
298 		default:
299 			return;
300 		}
301 		break;
302 	case XFRM_DEV_OFFLOAD_OUT:
303 		src = attrs->smac;
304 		dst = attrs->dmac;
305 		switch (addrs->family) {
306 		case AF_INET:
307 			fl4.flowi4_proto = x->sel.proto;
308 			fl4.daddr = addrs->daddr.a4;
309 			fl4.saddr = addrs->saddr.a4;
310 			pkey = &addrs->daddr.a4;
311 			break;
312 		case AF_INET6:
313 			fl6.flowi6_proto = x->sel.proto;
314 			memcpy(fl6.daddr.s6_addr32, addrs->daddr.a6, 16);
315 			memcpy(fl6.saddr.s6_addr32, addrs->saddr.a6, 16);
316 			pkey = &addrs->daddr.a6;
317 			break;
318 		default:
319 			return;
320 		}
321 		break;
322 	default:
323 		return;
324 	}
325 
326 	ether_addr_copy(src, addr);
327 
328 	/* Destination can refer to a routed network, so perform FIB lookup
329 	 * to resolve nexthop and get its MAC. Neighbour resolution is used as
330 	 * fallback.
331 	 */
332 	switch (addrs->family) {
333 	case AF_INET:
334 		rt = ip_route_output_key(dev_net(netdev), &fl4);
335 		if (IS_ERR(rt))
336 			goto neigh;
337 
338 		if (rt->rt_type != RTN_UNICAST) {
339 			ip_rt_put(rt);
340 			goto neigh;
341 		}
342 		rt_dst_entry = &rt->dst;
343 		break;
344 	case AF_INET6:
345 		if (!IS_ENABLED(CONFIG_IPV6) ||
346 		    ip6_dst_lookup(dev_net(netdev), NULL, &rt_dst_entry, &fl6))
347 			goto neigh;
348 		break;
349 	default:
350 		return;
351 	}
352 
353 	n = dst_neigh_lookup(rt_dst_entry, pkey);
354 	if (!n) {
355 		dst_release(rt_dst_entry);
356 		goto neigh;
357 	}
358 
359 	neigh_ha_snapshot(addr, n, netdev);
360 	ether_addr_copy(dst, addr);
361 	if (attrs->dir == XFRM_DEV_OFFLOAD_OUT &&
362 	    is_zero_ether_addr(addr))
363 		neigh_event_send(n, NULL);
364 	dst_release(rt_dst_entry);
365 	neigh_release(n);
366 	return;
367 
368 neigh:
369 	n = neigh_lookup(&arp_tbl, pkey, netdev);
370 	if (!n) {
371 		n = neigh_create(&arp_tbl, pkey, netdev);
372 		if (IS_ERR(n))
373 			return;
374 		neigh_event_send(n, NULL);
375 		attrs->drop = true;
376 	} else {
377 		neigh_ha_snapshot(addr, n, netdev);
378 		ether_addr_copy(dst, addr);
379 	}
380 	neigh_release(n);
381 }
382 
mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr * addrs)383 static void mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr *addrs)
384 {
385 	/*
386 	 * State doesn't have subnet prefixes in outer headers.
387 	 * The match is performed for exaxt source/destination addresses.
388 	 */
389 	memset(addrs->smask.m6, 0xFF, sizeof(__be32) * 4);
390 	memset(addrs->dmask.m6, 0xFF, sizeof(__be32) * 4);
391 }
392 
mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)393 void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
394 					struct mlx5_accel_esp_xfrm_attrs *attrs)
395 {
396 	struct xfrm_state *x = sa_entry->x;
397 	struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
398 	struct aead_geniv_ctx *geniv_ctx;
399 	struct crypto_aead *aead;
400 	unsigned int crypto_data_len, key_len;
401 	int ivsize;
402 
403 	memset(attrs, 0, sizeof(*attrs));
404 
405 	/* key */
406 	crypto_data_len = (x->aead->alg_key_len + 7) / 8;
407 	key_len = crypto_data_len - 4; /* 4 bytes salt at end */
408 
409 	memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
410 	aes_gcm->key_len = key_len * 8;
411 
412 	/* salt and seq_iv */
413 	aead = x->data;
414 	geniv_ctx = crypto_aead_ctx(aead);
415 	ivsize = crypto_aead_ivsize(aead);
416 	memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
417 	memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
418 	       sizeof(aes_gcm->salt));
419 
420 	attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */
421 
422 	/* iv len */
423 	aes_gcm->icv_len = x->aead->alg_icv_len;
424 
425 	attrs->dir = x->xso.dir;
426 
427 	/* esn */
428 	if (x->props.flags & XFRM_STATE_ESN) {
429 		attrs->replay_esn.trigger = true;
430 		attrs->replay_esn.esn = sa_entry->esn_state.esn;
431 		attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
432 		attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
433 		if (attrs->dir == XFRM_DEV_OFFLOAD_OUT ||
434 		    x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
435 			goto skip_replay_window;
436 
437 		switch (x->replay_esn->replay_window) {
438 		case 32:
439 			attrs->replay_esn.replay_window =
440 				MLX5_IPSEC_ASO_REPLAY_WIN_32BIT;
441 			break;
442 		case 64:
443 			attrs->replay_esn.replay_window =
444 				MLX5_IPSEC_ASO_REPLAY_WIN_64BIT;
445 			break;
446 		case 128:
447 			attrs->replay_esn.replay_window =
448 				MLX5_IPSEC_ASO_REPLAY_WIN_128BIT;
449 			break;
450 		case 256:
451 			attrs->replay_esn.replay_window =
452 				MLX5_IPSEC_ASO_REPLAY_WIN_256BIT;
453 			break;
454 		default:
455 			WARN_ON(true);
456 			return;
457 		}
458 	}
459 
460 skip_replay_window:
461 	/* spi */
462 	attrs->spi = be32_to_cpu(x->id.spi);
463 
464 	/* source , destination ips */
465 	memcpy(&attrs->addrs.saddr, x->props.saddr.a6,
466 	       sizeof(attrs->addrs.saddr));
467 	memcpy(&attrs->addrs.daddr, x->id.daddr.a6, sizeof(attrs->addrs.daddr));
468 	attrs->addrs.family = x->props.family;
469 	mlx5e_ipsec_state_mask(&attrs->addrs);
470 	attrs->type = x->xso.type;
471 	attrs->reqid = x->props.reqid;
472 	attrs->upspec.dport = ntohs(x->sel.dport);
473 	attrs->upspec.dport_mask = ntohs(x->sel.dport_mask);
474 	attrs->upspec.sport = ntohs(x->sel.sport);
475 	attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
476 	attrs->upspec.proto = x->sel.proto;
477 	attrs->mode = x->props.mode;
478 
479 	mlx5e_ipsec_init_limits(sa_entry, attrs);
480 	mlx5e_ipsec_init_macs(sa_entry, attrs);
481 
482 	if (x->encap) {
483 		attrs->encap = true;
484 		attrs->sport = x->encap->encap_sport;
485 		attrs->dport = x->encap->encap_dport;
486 	}
487 }
488 
mlx5e_xfrm_validate_state(struct mlx5_core_dev * mdev,struct xfrm_state * x,struct netlink_ext_ack * extack)489 static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
490 				     struct xfrm_state *x,
491 				     struct netlink_ext_ack *extack)
492 {
493 	if (x->props.aalgo != SADB_AALG_NONE) {
494 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states");
495 		return -EINVAL;
496 	}
497 	if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
498 		NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded");
499 		return -EINVAL;
500 	}
501 	if (x->props.calgo != SADB_X_CALG_NONE) {
502 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states");
503 		return -EINVAL;
504 	}
505 	if (x->props.flags & XFRM_STATE_ESN &&
506 	    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) {
507 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states");
508 		return -EINVAL;
509 	}
510 	if (x->props.family != AF_INET &&
511 	    x->props.family != AF_INET6) {
512 		NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded");
513 		return -EINVAL;
514 	}
515 	if (x->id.proto != IPPROTO_ESP) {
516 		NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded");
517 		return -EINVAL;
518 	}
519 	if (x->encap) {
520 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) {
521 			NL_SET_ERR_MSG_MOD(extack,
522 					   "Encapsulation is not supported");
523 			return -EINVAL;
524 		}
525 
526 		if (x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
527 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation other than UDP is not supported");
528 			return -EINVAL;
529 		}
530 
531 		if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) {
532 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in packet offload mode only");
533 			return -EINVAL;
534 		}
535 
536 		if (x->props.mode != XFRM_MODE_TRANSPORT) {
537 			NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in transport mode only");
538 			return -EINVAL;
539 		}
540 	}
541 	if (!x->aead) {
542 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead");
543 		return -EINVAL;
544 	}
545 	if (x->aead->alg_icv_len != 128) {
546 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit");
547 		return -EINVAL;
548 	}
549 	if ((x->aead->alg_key_len != 128 + 32) &&
550 	    (x->aead->alg_key_len != 256 + 32)) {
551 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit");
552 		return -EINVAL;
553 	}
554 	if (x->tfcpad) {
555 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding");
556 		return -EINVAL;
557 	}
558 	if (!x->geniv) {
559 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv");
560 		return -EINVAL;
561 	}
562 	if (strcmp(x->geniv, "seqiv")) {
563 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv");
564 		return -EINVAL;
565 	}
566 
567 	if (x->sel.proto != IPPROTO_IP && x->sel.proto != IPPROTO_UDP &&
568 	    x->sel.proto != IPPROTO_TCP) {
569 		NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
570 		return -EINVAL;
571 	}
572 
573 	if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
574 		NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
575 		return -EINVAL;
576 	}
577 
578 	switch (x->xso.type) {
579 	case XFRM_DEV_OFFLOAD_CRYPTO:
580 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
581 			NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported");
582 			return -EINVAL;
583 		}
584 
585 		break;
586 	case XFRM_DEV_OFFLOAD_PACKET:
587 		if (!(mlx5_ipsec_device_caps(mdev) &
588 		      MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
589 			NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
590 			return -EINVAL;
591 		}
592 
593 		if (x->props.mode == XFRM_MODE_TUNNEL &&
594 		    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
595 			NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
596 			return -EINVAL;
597 		}
598 
599 		if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN &&
600 		    x->replay_esn->replay_window != 32 &&
601 		    x->replay_esn->replay_window != 64 &&
602 		    x->replay_esn->replay_window != 128 &&
603 		    x->replay_esn->replay_window != 256) {
604 			NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size");
605 			return -EINVAL;
606 		}
607 
608 		if (!x->props.reqid) {
609 			NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid");
610 			return -EINVAL;
611 		}
612 
613 		if (x->lft.soft_byte_limit >= x->lft.hard_byte_limit &&
614 		    x->lft.hard_byte_limit != XFRM_INF) {
615 			/* XFRM stack doesn't prevent such configuration :(. */
616 			NL_SET_ERR_MSG_MOD(extack, "Hard byte limit must be greater than soft one");
617 			return -EINVAL;
618 		}
619 
620 		if (!x->lft.soft_byte_limit || !x->lft.hard_byte_limit) {
621 			NL_SET_ERR_MSG_MOD(extack, "Soft/hard byte limits can't be 0");
622 			return -EINVAL;
623 		}
624 
625 		if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit &&
626 		    x->lft.hard_packet_limit != XFRM_INF) {
627 			/* XFRM stack doesn't prevent such configuration :(. */
628 			NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one");
629 			return -EINVAL;
630 		}
631 
632 		if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) {
633 			NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0");
634 			return -EINVAL;
635 		}
636 		break;
637 	default:
638 		NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
639 		return -EINVAL;
640 	}
641 	return 0;
642 }
643 
mlx5e_ipsec_modify_state(struct work_struct * _work)644 static void mlx5e_ipsec_modify_state(struct work_struct *_work)
645 {
646 	struct mlx5e_ipsec_work *work =
647 		container_of(_work, struct mlx5e_ipsec_work, work);
648 	struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
649 	struct mlx5_accel_esp_xfrm_attrs *attrs;
650 
651 	attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs;
652 
653 	mlx5_accel_esp_modify_xfrm(sa_entry, attrs);
654 }
655 
mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry * sa_entry)656 static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
657 {
658 	struct xfrm_state *x = sa_entry->x;
659 
660 	if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO ||
661 	    x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
662 		return;
663 
664 	if (x->props.flags & XFRM_STATE_ESN) {
665 		sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn;
666 		return;
667 	}
668 
669 	sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
670 }
671 
mlx5e_ipsec_handle_netdev_event(struct work_struct * _work)672 static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
673 {
674 	struct mlx5e_ipsec_work *work =
675 		container_of(_work, struct mlx5e_ipsec_work, work);
676 	struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
677 	struct mlx5e_ipsec_netevent_data *data = work->data;
678 	struct mlx5_accel_esp_xfrm_attrs *attrs;
679 
680 	attrs = &sa_entry->attrs;
681 
682 	switch (attrs->dir) {
683 	case XFRM_DEV_OFFLOAD_IN:
684 		ether_addr_copy(attrs->smac, data->addr);
685 		break;
686 	case XFRM_DEV_OFFLOAD_OUT:
687 		ether_addr_copy(attrs->dmac, data->addr);
688 		break;
689 	default:
690 		WARN_ON_ONCE(true);
691 	}
692 	attrs->drop = false;
693 	mlx5e_accel_ipsec_fs_modify(sa_entry);
694 }
695 
mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry * sa_entry)696 static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
697 {
698 	struct xfrm_state *x = sa_entry->x;
699 	struct mlx5e_ipsec_work *work;
700 	void *data = NULL;
701 
702 	switch (x->xso.type) {
703 	case XFRM_DEV_OFFLOAD_CRYPTO:
704 		if (!(x->props.flags & XFRM_STATE_ESN))
705 			return 0;
706 		break;
707 	case XFRM_DEV_OFFLOAD_PACKET:
708 		if (x->props.mode != XFRM_MODE_TUNNEL)
709 			return 0;
710 		break;
711 	default:
712 		break;
713 	}
714 
715 	work = kzalloc(sizeof(*work), GFP_KERNEL);
716 	if (!work)
717 		return -ENOMEM;
718 
719 	switch (x->xso.type) {
720 	case XFRM_DEV_OFFLOAD_CRYPTO:
721 		data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
722 		if (!data)
723 			goto free_work;
724 
725 		INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
726 		break;
727 	case XFRM_DEV_OFFLOAD_PACKET:
728 		data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data),
729 			       GFP_KERNEL);
730 		if (!data)
731 			goto free_work;
732 
733 		INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
734 		break;
735 	default:
736 		break;
737 	}
738 
739 	work->data = data;
740 	work->sa_entry = sa_entry;
741 	sa_entry->work = work;
742 	return 0;
743 
744 free_work:
745 	kfree(work);
746 	return -ENOMEM;
747 }
748 
mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry * sa_entry)749 static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
750 {
751 	struct xfrm_state *x = sa_entry->x;
752 	struct mlx5e_ipsec_dwork *dwork;
753 
754 	if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
755 		return 0;
756 
757 	if (x->lft.soft_packet_limit == XFRM_INF &&
758 	    x->lft.hard_packet_limit == XFRM_INF &&
759 	    x->lft.soft_byte_limit == XFRM_INF &&
760 	    x->lft.hard_byte_limit == XFRM_INF)
761 		return 0;
762 
763 	dwork = kzalloc(sizeof(*dwork), GFP_KERNEL);
764 	if (!dwork)
765 		return -ENOMEM;
766 
767 	dwork->sa_entry = sa_entry;
768 	INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_sw_limits);
769 	sa_entry->dwork = dwork;
770 	return 0;
771 }
772 
mlx5e_xfrm_add_state(struct net_device * dev,struct xfrm_state * x,struct netlink_ext_ack * extack)773 static int mlx5e_xfrm_add_state(struct net_device *dev,
774 				struct xfrm_state *x,
775 				struct netlink_ext_ack *extack)
776 {
777 	struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
778 	bool allow_tunnel_mode = false;
779 	struct mlx5e_ipsec *ipsec;
780 	struct mlx5e_priv *priv;
781 	gfp_t gfp;
782 	int err;
783 
784 	priv = netdev_priv(dev);
785 	if (!priv->ipsec)
786 		return -EOPNOTSUPP;
787 
788 	ipsec = priv->ipsec;
789 	gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL;
790 	sa_entry = kzalloc(sizeof(*sa_entry), gfp);
791 	if (!sa_entry)
792 		return -ENOMEM;
793 
794 	sa_entry->x = x;
795 	sa_entry->dev = dev;
796 	sa_entry->ipsec = ipsec;
797 	/* Check if this SA is originated from acquire flow temporary SA */
798 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
799 		goto out;
800 
801 	err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
802 	if (err)
803 		goto err_xfrm;
804 
805 	if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
806 		err = -EBUSY;
807 		goto err_xfrm;
808 	}
809 
810 	err = mlx5_eswitch_block_mode(priv->mdev);
811 	if (err)
812 		goto unblock_ipsec;
813 
814 	if (x->props.mode == XFRM_MODE_TUNNEL &&
815 	    x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
816 		allow_tunnel_mode = mlx5e_ipsec_fs_tunnel_allowed(sa_entry);
817 		if (!allow_tunnel_mode) {
818 			NL_SET_ERR_MSG_MOD(extack,
819 					   "Packet offload tunnel mode is disabled due to encap settings");
820 			err = -EINVAL;
821 			goto unblock_mode;
822 		}
823 	}
824 
825 	/* check esn */
826 	if (x->props.flags & XFRM_STATE_ESN)
827 		mlx5e_ipsec_update_esn_state(sa_entry);
828 	else
829 		/* According to RFC4303, section "3.3.3. Sequence Number Generation",
830 		 * the first packet sent using a given SA will contain a sequence
831 		 * number of 1.
832 		 */
833 		sa_entry->esn_state.esn = 1;
834 
835 	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
836 
837 	err = mlx5_ipsec_create_work(sa_entry);
838 	if (err)
839 		goto unblock_encap;
840 
841 	err = mlx5e_ipsec_create_dwork(sa_entry);
842 	if (err)
843 		goto release_work;
844 
845 	/* create hw context */
846 	err = mlx5_ipsec_create_sa_ctx(sa_entry);
847 	if (err)
848 		goto release_dwork;
849 
850 	err = mlx5e_accel_ipsec_fs_add_rule(sa_entry);
851 	if (err)
852 		goto err_hw_ctx;
853 
854 	/* We use *_bh() variant because xfrm_timer_handler(), which runs
855 	 * in softirq context, can reach our state delete logic and we need
856 	 * xa_erase_bh() there.
857 	 */
858 	err = xa_insert_bh(&ipsec->sadb, sa_entry->ipsec_obj_id, sa_entry,
859 			   GFP_KERNEL);
860 	if (err)
861 		goto err_add_rule;
862 
863 	mlx5e_ipsec_set_esn_ops(sa_entry);
864 
865 	if (sa_entry->dwork)
866 		queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
867 				   MLX5_IPSEC_RESCHED);
868 
869 	if (allow_tunnel_mode) {
870 		xa_lock_bh(&ipsec->sadb);
871 		__xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
872 			      MLX5E_IPSEC_TUNNEL_SA);
873 		xa_unlock_bh(&ipsec->sadb);
874 	}
875 
876 out:
877 	x->xso.offload_handle = (unsigned long)sa_entry;
878 	if (allow_tunnel_mode)
879 		mlx5_eswitch_unblock_encap(priv->mdev);
880 
881 	mlx5_eswitch_unblock_mode(priv->mdev);
882 
883 	return 0;
884 
885 err_add_rule:
886 	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
887 err_hw_ctx:
888 	mlx5_ipsec_free_sa_ctx(sa_entry);
889 release_dwork:
890 	kfree(sa_entry->dwork);
891 release_work:
892 	if (sa_entry->work)
893 		kfree(sa_entry->work->data);
894 	kfree(sa_entry->work);
895 unblock_encap:
896 	if (allow_tunnel_mode)
897 		mlx5_eswitch_unblock_encap(priv->mdev);
898 unblock_mode:
899 	mlx5_eswitch_unblock_mode(priv->mdev);
900 unblock_ipsec:
901 	mlx5_eswitch_unblock_ipsec(priv->mdev);
902 err_xfrm:
903 	kfree(sa_entry);
904 	NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state");
905 	return err;
906 }
907 
mlx5e_xfrm_del_state(struct net_device * dev,struct xfrm_state * x)908 static void mlx5e_xfrm_del_state(struct net_device *dev, struct xfrm_state *x)
909 {
910 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
911 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
912 	struct mlx5e_ipsec_sa_entry *old;
913 
914 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
915 		return;
916 
917 	old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
918 	WARN_ON(old != sa_entry);
919 }
920 
mlx5e_xfrm_free_state(struct net_device * dev,struct xfrm_state * x)921 static void mlx5e_xfrm_free_state(struct net_device *dev, struct xfrm_state *x)
922 {
923 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
924 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
925 
926 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
927 		goto sa_entry_free;
928 
929 	if (sa_entry->work)
930 		cancel_work_sync(&sa_entry->work->work);
931 
932 	if (sa_entry->dwork)
933 		cancel_delayed_work_sync(&sa_entry->dwork->dwork);
934 
935 	mlx5e_accel_ipsec_fs_del_rule(sa_entry);
936 	mlx5_ipsec_free_sa_ctx(sa_entry);
937 	kfree(sa_entry->dwork);
938 	if (sa_entry->work)
939 		kfree(sa_entry->work->data);
940 	kfree(sa_entry->work);
941 	mlx5_eswitch_unblock_ipsec(ipsec->mdev);
942 sa_entry_free:
943 	kfree(sa_entry);
944 }
945 
mlx5e_ipsec_netevent_event(struct notifier_block * nb,unsigned long event,void * ptr)946 static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
947 				      unsigned long event, void *ptr)
948 {
949 	struct mlx5_accel_esp_xfrm_attrs *attrs;
950 	struct mlx5e_ipsec_netevent_data *data;
951 	struct mlx5e_ipsec_sa_entry *sa_entry;
952 	struct mlx5e_ipsec *ipsec;
953 	struct neighbour *n = ptr;
954 	unsigned long idx;
955 
956 	if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
957 		return NOTIFY_DONE;
958 
959 	ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
960 	xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
961 		attrs = &sa_entry->attrs;
962 
963 		if (attrs->addrs.family == AF_INET) {
964 			if (!neigh_key_eq32(n, &attrs->addrs.saddr.a4) &&
965 			    !neigh_key_eq32(n, &attrs->addrs.daddr.a4))
966 				continue;
967 		} else {
968 			if (!neigh_key_eq128(n, &attrs->addrs.saddr.a4) &&
969 			    !neigh_key_eq128(n, &attrs->addrs.daddr.a4))
970 				continue;
971 		}
972 
973 		data = sa_entry->work->data;
974 
975 		neigh_ha_snapshot(data->addr, n, sa_entry->dev);
976 		queue_work(ipsec->wq, &sa_entry->work->work);
977 	}
978 
979 	return NOTIFY_DONE;
980 }
981 
mlx5e_ipsec_init(struct mlx5e_priv * priv)982 void mlx5e_ipsec_init(struct mlx5e_priv *priv)
983 {
984 	struct mlx5e_ipsec *ipsec;
985 	int ret = -ENOMEM;
986 
987 	if (!mlx5_ipsec_device_caps(priv->mdev)) {
988 		netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
989 		return;
990 	}
991 
992 	ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
993 	if (!ipsec)
994 		return;
995 
996 	xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC);
997 	ipsec->mdev = priv->mdev;
998 	init_completion(&ipsec->comp);
999 	ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0,
1000 				    priv->netdev->name);
1001 	if (!ipsec->wq)
1002 		goto err_wq;
1003 
1004 	if (mlx5_ipsec_device_caps(priv->mdev) &
1005 	    MLX5_IPSEC_CAP_PACKET_OFFLOAD) {
1006 		ret = mlx5e_ipsec_aso_init(ipsec);
1007 		if (ret)
1008 			goto err_aso;
1009 	}
1010 
1011 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
1012 		ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
1013 		ret = register_netevent_notifier(&ipsec->netevent_nb);
1014 		if (ret)
1015 			goto clear_aso;
1016 	}
1017 
1018 	ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv);
1019 	ret = mlx5e_accel_ipsec_fs_init(ipsec, &priv->devcom);
1020 	if (ret)
1021 		goto err_fs_init;
1022 
1023 	ipsec->fs = priv->fs;
1024 	priv->ipsec = ipsec;
1025 	netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
1026 	return;
1027 
1028 err_fs_init:
1029 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
1030 		unregister_netevent_notifier(&ipsec->netevent_nb);
1031 clear_aso:
1032 	if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
1033 		mlx5e_ipsec_aso_cleanup(ipsec);
1034 err_aso:
1035 	destroy_workqueue(ipsec->wq);
1036 err_wq:
1037 	kfree(ipsec);
1038 	mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret);
1039 	return;
1040 }
1041 
mlx5e_ipsec_cleanup(struct mlx5e_priv * priv)1042 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
1043 {
1044 	struct mlx5e_ipsec *ipsec = priv->ipsec;
1045 
1046 	if (!ipsec)
1047 		return;
1048 
1049 	mlx5e_accel_ipsec_fs_cleanup(ipsec);
1050 	if (ipsec->netevent_nb.notifier_call) {
1051 		unregister_netevent_notifier(&ipsec->netevent_nb);
1052 		ipsec->netevent_nb.notifier_call = NULL;
1053 	}
1054 	if (ipsec->aso)
1055 		mlx5e_ipsec_aso_cleanup(ipsec);
1056 	destroy_workqueue(ipsec->wq);
1057 	kfree(ipsec);
1058 	priv->ipsec = NULL;
1059 }
1060 
mlx5e_xfrm_advance_esn_state(struct xfrm_state * x)1061 static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
1062 {
1063 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
1064 	struct mlx5e_ipsec_work *work = sa_entry->work;
1065 	struct mlx5e_ipsec_sa_entry *sa_entry_shadow;
1066 	bool need_update;
1067 
1068 	need_update = mlx5e_ipsec_update_esn_state(sa_entry);
1069 	if (!need_update)
1070 		return;
1071 
1072 	sa_entry_shadow = work->data;
1073 	memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow));
1074 	mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs);
1075 	queue_work(sa_entry->ipsec->wq, &work->work);
1076 }
1077 
mlx5e_xfrm_update_stats(struct xfrm_state * x)1078 static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
1079 {
1080 	struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
1081 	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
1082 	struct net *net = dev_net(x->xso.dev);
1083 	u64 trailer_packets = 0, trailer_bytes = 0;
1084 	u64 replay_packets = 0, replay_bytes = 0;
1085 	u64 auth_packets = 0, auth_bytes = 0;
1086 	u64 success_packets, success_bytes;
1087 	u64 packets, bytes, lastuse;
1088 	size_t headers;
1089 
1090 	lockdep_assert(lockdep_is_held(&x->lock) ||
1091 		       lockdep_is_held(&net->xfrm.xfrm_cfg_mutex) ||
1092 		       lockdep_is_held(&net->xfrm.xfrm_state_lock));
1093 
1094 	if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
1095 		return;
1096 
1097 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1098 		mlx5_fc_query_cached(ipsec_rule->auth.fc, &auth_bytes,
1099 				     &auth_packets, &lastuse);
1100 		x->stats.integrity_failed += auth_packets;
1101 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, auth_packets);
1102 
1103 		mlx5_fc_query_cached(ipsec_rule->trailer.fc, &trailer_bytes,
1104 				     &trailer_packets, &lastuse);
1105 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, trailer_packets);
1106 	}
1107 
1108 	if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1109 		return;
1110 
1111 	if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1112 		mlx5_fc_query_cached(ipsec_rule->replay.fc, &replay_bytes,
1113 				     &replay_packets, &lastuse);
1114 		x->stats.replay += replay_packets;
1115 		XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, replay_packets);
1116 	}
1117 
1118 	mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
1119 	success_packets = packets - auth_packets - trailer_packets - replay_packets;
1120 	x->curlft.packets += success_packets;
1121 	/* NIC counts all bytes passed through flow steering and doesn't have
1122 	 * an ability to count payload data size which is needed for SA.
1123 	 *
1124 	 * To overcome HW limitestion, let's approximate the payload size
1125 	 * by removing always available headers.
1126 	 */
1127 	headers = sizeof(struct ethhdr);
1128 	if (sa_entry->attrs.addrs.family == AF_INET)
1129 		headers += sizeof(struct iphdr);
1130 	else
1131 		headers += sizeof(struct ipv6hdr);
1132 
1133 	success_bytes = bytes - auth_bytes - trailer_bytes - replay_bytes;
1134 	x->curlft.bytes += success_bytes - headers * success_packets;
1135 }
1136 
word_to_mask(int prefix)1137 static __be32 word_to_mask(int prefix)
1138 {
1139 	if (prefix < 0)
1140 		return 0;
1141 
1142 	if (!prefix || prefix > 31)
1143 		return cpu_to_be32(0xFFFFFFFF);
1144 
1145 	return cpu_to_be32(((1U << prefix) - 1) << (32 - prefix));
1146 }
1147 
mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr * addrs,struct xfrm_selector * sel)1148 static void mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr *addrs,
1149 				    struct xfrm_selector *sel)
1150 {
1151 	int i;
1152 
1153 	if (addrs->family == AF_INET) {
1154 		addrs->smask.m4 = word_to_mask(sel->prefixlen_s);
1155 		addrs->saddr.a4 &= addrs->smask.m4;
1156 		addrs->dmask.m4 = word_to_mask(sel->prefixlen_d);
1157 		addrs->daddr.a4 &= addrs->dmask.m4;
1158 		return;
1159 	}
1160 
1161 	for (i = 0; i < 4; i++) {
1162 		if (sel->prefixlen_s != 32 * i)
1163 			addrs->smask.m6[i] =
1164 				word_to_mask(sel->prefixlen_s - 32 * i);
1165 		addrs->saddr.a6[i] &= addrs->smask.m6[i];
1166 
1167 		if (sel->prefixlen_d != 32 * i)
1168 			addrs->dmask.m6[i] =
1169 				word_to_mask(sel->prefixlen_d - 32 * i);
1170 		addrs->daddr.a6[i] &= addrs->dmask.m6[i];
1171 	}
1172 }
1173 
mlx5e_xfrm_validate_policy(struct mlx5_core_dev * mdev,struct xfrm_policy * x,struct netlink_ext_ack * extack)1174 static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
1175 				      struct xfrm_policy *x,
1176 				      struct netlink_ext_ack *extack)
1177 {
1178 	struct xfrm_selector *sel = &x->selector;
1179 
1180 	if (x->type != XFRM_POLICY_TYPE_MAIN) {
1181 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types");
1182 		return -EINVAL;
1183 	}
1184 
1185 	/* Please pay attention that we support only one template */
1186 	if (x->xfrm_nr > 1) {
1187 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template");
1188 		return -EINVAL;
1189 	}
1190 
1191 	if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN &&
1192 	    x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) {
1193 		NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy");
1194 		return -EINVAL;
1195 	}
1196 
1197 	if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP &&
1198 	    addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) {
1199 		NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0");
1200 		return -EINVAL;
1201 	}
1202 
1203 	if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) {
1204 		NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
1205 		return -EINVAL;
1206 	}
1207 
1208 	if (x->selector.proto != IPPROTO_IP &&
1209 	    x->selector.proto != IPPROTO_UDP &&
1210 	    x->selector.proto != IPPROTO_TCP) {
1211 		NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
1212 		return -EINVAL;
1213 	}
1214 
1215 	if (x->priority) {
1216 		if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) {
1217 			NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority");
1218 			return -EINVAL;
1219 		}
1220 
1221 		if (x->priority == U32_MAX) {
1222 			NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority");
1223 			return -EINVAL;
1224 		}
1225 	}
1226 
1227 	if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET &&
1228 	    !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
1229 		NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
1230 		return -EINVAL;
1231 	}
1232 
1233 	return 0;
1234 }
1235 
1236 static void
mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry * pol_entry,struct mlx5_accel_pol_xfrm_attrs * attrs)1237 mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
1238 				  struct mlx5_accel_pol_xfrm_attrs *attrs)
1239 {
1240 	struct xfrm_policy *x = pol_entry->x;
1241 	struct xfrm_selector *sel;
1242 
1243 	sel = &x->selector;
1244 	memset(attrs, 0, sizeof(*attrs));
1245 
1246 	memcpy(&attrs->addrs.saddr, sel->saddr.a6, sizeof(attrs->addrs.saddr));
1247 	memcpy(&attrs->addrs.daddr, sel->daddr.a6, sizeof(attrs->addrs.daddr));
1248 	attrs->addrs.family = sel->family;
1249 	mlx5e_ipsec_policy_mask(&attrs->addrs, sel);
1250 	attrs->dir = x->xdo.dir;
1251 	attrs->action = x->action;
1252 	attrs->type = XFRM_DEV_OFFLOAD_PACKET;
1253 	attrs->reqid = x->xfrm_vec[0].reqid;
1254 	attrs->upspec.dport = ntohs(sel->dport);
1255 	attrs->upspec.dport_mask = ntohs(sel->dport_mask);
1256 	attrs->upspec.sport = ntohs(sel->sport);
1257 	attrs->upspec.sport_mask = ntohs(sel->sport_mask);
1258 	attrs->upspec.proto = sel->proto;
1259 	attrs->prio = x->priority;
1260 }
1261 
mlx5e_xfrm_add_policy(struct xfrm_policy * x,struct netlink_ext_ack * extack)1262 static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
1263 				 struct netlink_ext_ack *extack)
1264 {
1265 	struct net_device *netdev = x->xdo.dev;
1266 	struct mlx5e_ipsec_pol_entry *pol_entry;
1267 	struct mlx5e_priv *priv;
1268 	int err;
1269 
1270 	priv = netdev_priv(netdev);
1271 	if (!priv->ipsec) {
1272 		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload");
1273 		return -EOPNOTSUPP;
1274 	}
1275 
1276 	err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack);
1277 	if (err)
1278 		return err;
1279 
1280 	pol_entry = kzalloc(sizeof(*pol_entry), GFP_KERNEL);
1281 	if (!pol_entry)
1282 		return -ENOMEM;
1283 
1284 	pol_entry->x = x;
1285 	pol_entry->ipsec = priv->ipsec;
1286 
1287 	if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
1288 		err = -EBUSY;
1289 		goto ipsec_busy;
1290 	}
1291 
1292 	mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs);
1293 	err = mlx5e_accel_ipsec_fs_add_pol(pol_entry);
1294 	if (err)
1295 		goto err_fs;
1296 
1297 	x->xdo.offload_handle = (unsigned long)pol_entry;
1298 	return 0;
1299 
1300 err_fs:
1301 	mlx5_eswitch_unblock_ipsec(priv->mdev);
1302 ipsec_busy:
1303 	kfree(pol_entry);
1304 	NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy");
1305 	return err;
1306 }
1307 
mlx5e_xfrm_del_policy(struct xfrm_policy * x)1308 static void mlx5e_xfrm_del_policy(struct xfrm_policy *x)
1309 {
1310 	struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1311 
1312 	mlx5e_accel_ipsec_fs_del_pol(pol_entry);
1313 	mlx5_eswitch_unblock_ipsec(pol_entry->ipsec->mdev);
1314 }
1315 
mlx5e_xfrm_free_policy(struct xfrm_policy * x)1316 static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
1317 {
1318 	struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1319 
1320 	kfree(pol_entry);
1321 }
1322 
1323 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
1324 	.xdo_dev_state_add	= mlx5e_xfrm_add_state,
1325 	.xdo_dev_state_delete	= mlx5e_xfrm_del_state,
1326 	.xdo_dev_state_free	= mlx5e_xfrm_free_state,
1327 	.xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
1328 
1329 	.xdo_dev_state_update_stats = mlx5e_xfrm_update_stats,
1330 	.xdo_dev_policy_add = mlx5e_xfrm_add_policy,
1331 	.xdo_dev_policy_delete = mlx5e_xfrm_del_policy,
1332 	.xdo_dev_policy_free = mlx5e_xfrm_free_policy,
1333 };
1334 
mlx5e_ipsec_build_netdev(struct mlx5e_priv * priv)1335 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
1336 {
1337 	struct mlx5_core_dev *mdev = priv->mdev;
1338 	struct net_device *netdev = priv->netdev;
1339 
1340 	if (!mlx5_ipsec_device_caps(mdev))
1341 		return;
1342 
1343 	mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
1344 
1345 	netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
1346 	netdev->features |= NETIF_F_HW_ESP;
1347 	netdev->hw_enc_features |= NETIF_F_HW_ESP;
1348 
1349 	if (!MLX5_CAP_ETH(mdev, swp_csum)) {
1350 		mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
1351 		return;
1352 	}
1353 
1354 	netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
1355 	netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
1356 
1357 	if (!MLX5_CAP_ETH(mdev, swp_lso)) {
1358 		mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
1359 		return;
1360 	}
1361 
1362 	netdev->gso_partial_features |= NETIF_F_GSO_ESP;
1363 	mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
1364 	netdev->features |= NETIF_F_GSO_ESP;
1365 	netdev->hw_features |= NETIF_F_GSO_ESP;
1366 	netdev->hw_enc_features |= NETIF_F_GSO_ESP;
1367 }
1368