1 /*
2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
34 #include <crypto/internal/geniv.h>
35 #include <crypto/aead.h>
36 #include <linux/inetdevice.h>
37 #include <linux/netdevice.h>
38 #include <net/netevent.h>
39 #include <net/ipv6_stubs.h>
40
41 #include "en.h"
42 #include "eswitch.h"
43 #include "ipsec.h"
44 #include "ipsec_rxtx.h"
45 #include "en_rep.h"
46
47 #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
48 #define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
49
to_ipsec_sa_entry(struct xfrm_state * x)50 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
51 {
52 return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
53 }
54
to_ipsec_pol_entry(struct xfrm_policy * x)55 static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x)
56 {
57 return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle;
58 }
59
mlx5e_ipsec_handle_sw_limits(struct work_struct * _work)60 static void mlx5e_ipsec_handle_sw_limits(struct work_struct *_work)
61 {
62 struct mlx5e_ipsec_dwork *dwork =
63 container_of(_work, struct mlx5e_ipsec_dwork, dwork.work);
64 struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry;
65 struct xfrm_state *x = sa_entry->x;
66
67 if (sa_entry->attrs.drop)
68 return;
69
70 spin_lock_bh(&x->lock);
71 if (x->km.state == XFRM_STATE_EXPIRED) {
72 sa_entry->attrs.drop = true;
73 spin_unlock_bh(&x->lock);
74
75 mlx5e_accel_ipsec_fs_modify(sa_entry);
76 return;
77 }
78
79 if (x->km.state != XFRM_STATE_VALID) {
80 spin_unlock_bh(&x->lock);
81 return;
82 }
83
84 xfrm_state_check_expire(x);
85 spin_unlock_bh(&x->lock);
86
87 queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork,
88 MLX5_IPSEC_RESCHED);
89 }
90
mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry * sa_entry)91 static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
92 {
93 struct xfrm_state *x = sa_entry->x;
94 u32 seq_bottom = 0;
95 u32 esn, esn_msb;
96 u8 overlap;
97
98 switch (x->xso.dir) {
99 case XFRM_DEV_OFFLOAD_IN:
100 esn = x->replay_esn->seq;
101 esn_msb = x->replay_esn->seq_hi;
102 break;
103 case XFRM_DEV_OFFLOAD_OUT:
104 esn = x->replay_esn->oseq;
105 esn_msb = x->replay_esn->oseq_hi;
106 break;
107 default:
108 WARN_ON(true);
109 return false;
110 }
111
112 overlap = sa_entry->esn_state.overlap;
113
114 if (!x->replay_esn->replay_window) {
115 seq_bottom = esn;
116 } else {
117 if (esn >= x->replay_esn->replay_window)
118 seq_bottom = esn - x->replay_esn->replay_window + 1;
119
120 if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
121 esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
122 }
123
124 if (sa_entry->esn_state.esn_msb)
125 sa_entry->esn_state.esn = esn;
126 else
127 /* According to RFC4303, section "3.3.3. Sequence Number Generation",
128 * the first packet sent using a given SA will contain a sequence
129 * number of 1.
130 */
131 sa_entry->esn_state.esn = max_t(u32, esn, 1);
132 sa_entry->esn_state.esn_msb = esn_msb;
133
134 if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
135 sa_entry->esn_state.overlap = 0;
136 return true;
137 } else if (unlikely(!overlap &&
138 (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
139 sa_entry->esn_state.overlap = 1;
140 return true;
141 }
142
143 return false;
144 }
145
mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)146 static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
147 struct mlx5_accel_esp_xfrm_attrs *attrs)
148 {
149 struct xfrm_state *x = sa_entry->x;
150 s64 start_value, n;
151
152 attrs->lft.hard_packet_limit = x->lft.hard_packet_limit;
153 attrs->lft.soft_packet_limit = x->lft.soft_packet_limit;
154 if (x->lft.soft_packet_limit == XFRM_INF)
155 return;
156
157 /* Compute hard limit initial value and number of rounds.
158 *
159 * The counting pattern of hardware counter goes:
160 * value -> 2^31-1
161 * 2^31 | (2^31-1) -> 2^31-1
162 * 2^31 | (2^31-1) -> 2^31-1
163 * [..]
164 * 2^31 | (2^31-1) -> 0
165 *
166 * The pattern is created by using an ASO operation to atomically set
167 * bit 31 after the down counter clears bit 31. This is effectively an
168 * atomic addition of 2**31 to the counter.
169 *
170 * We wish to configure the counter, within the above pattern, so that
171 * when it reaches 0, it has hit the hard limit. This is defined by this
172 * system of equations:
173 *
174 * hard_limit == start_value + n * 2^31
175 * n >= 0
176 * start_value < 2^32, start_value >= 0
177 *
178 * These equations are not single-solution, there are often two choices:
179 * hard_limit == start_value + n * 2^31
180 * hard_limit == (start_value+2^31) + (n-1) * 2^31
181 *
182 * The algorithm selects the solution that keeps the counter value
183 * above 2^31 until the final iteration.
184 */
185
186 /* Start by estimating n and compute start_value */
187 n = attrs->lft.hard_packet_limit / BIT_ULL(31);
188 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
189
190 /* Choose the best of the two solutions: */
191 if (n >= 1)
192 n -= 1;
193
194 /* Computed values solve the system of equations: */
195 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
196
197 /* The best solution means: when there are multiple iterations we must
198 * start above 2^31 and count down to 2**31 to get the interrupt.
199 */
200 attrs->lft.hard_packet_limit = lower_32_bits(start_value);
201 attrs->lft.numb_rounds_hard = (u64)n;
202
203 /* Compute soft limit initial value and number of rounds.
204 *
205 * The soft_limit is achieved by adjusting the counter's
206 * interrupt_value. This is embedded in the counting pattern created by
207 * hard packet calculations above.
208 *
209 * We wish to compute the interrupt_value for the soft_limit. This is
210 * defined by this system of equations:
211 *
212 * soft_limit == start_value - soft_value + n * 2^31
213 * n >= 0
214 * soft_value < 2^32, soft_value >= 0
215 * for n == 0 start_value > soft_value
216 *
217 * As with compute_hard_n_value() the equations are not single-solution.
218 * The algorithm selects the solution that has:
219 * 2^30 <= soft_limit < 2^31 + 2^30
220 * for the interior iterations, which guarantees a large guard band
221 * around the counter hard limit and next interrupt.
222 */
223
224 /* Start by estimating n and compute soft_value */
225 n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31);
226 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) -
227 x->lft.soft_packet_limit;
228
229 /* Compare against constraints and adjust n */
230 if (n < 0)
231 n = 0;
232 else if (start_value >= BIT_ULL(32))
233 n -= 1;
234 else if (start_value < 0)
235 n += 1;
236
237 /* Choose the best of the two solutions: */
238 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
239 if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30))
240 n += 1;
241
242 /* Note that the upper limit of soft_value happens naturally because we
243 * always select the lowest soft_value.
244 */
245
246 /* Computed values solve the system of equations: */
247 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
248
249 /* The best solution means: when there are multiple iterations we must
250 * not fall below 2^30 as that would get too close to the false
251 * hard_limit and when we reach an interior iteration for soft_limit it
252 * has to be far away from 2**32-1 which is the counter reset point
253 * after the +2^31 to accommodate latency.
254 */
255 attrs->lft.soft_packet_limit = lower_32_bits(start_value);
256 attrs->lft.numb_rounds_soft = (u64)n;
257 }
258
mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)259 static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
260 struct mlx5_accel_esp_xfrm_attrs *attrs)
261 {
262 struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
263 struct mlx5e_ipsec_addr *addrs = &attrs->addrs;
264 struct net_device *netdev = sa_entry->dev;
265 struct xfrm_state *x = sa_entry->x;
266 struct dst_entry *rt_dst_entry;
267 struct flowi4 fl4 = {};
268 struct flowi6 fl6 = {};
269 struct neighbour *n;
270 u8 addr[ETH_ALEN];
271 struct rtable *rt;
272 const void *pkey;
273 u8 *dst, *src;
274
275 if (attrs->mode != XFRM_MODE_TUNNEL ||
276 attrs->type != XFRM_DEV_OFFLOAD_PACKET)
277 return;
278
279 mlx5_query_mac_address(mdev, addr);
280 switch (attrs->dir) {
281 case XFRM_DEV_OFFLOAD_IN:
282 src = attrs->dmac;
283 dst = attrs->smac;
284
285 switch (addrs->family) {
286 case AF_INET:
287 fl4.flowi4_proto = x->sel.proto;
288 fl4.daddr = addrs->saddr.a4;
289 fl4.saddr = addrs->daddr.a4;
290 pkey = &addrs->saddr.a4;
291 break;
292 case AF_INET6:
293 fl6.flowi6_proto = x->sel.proto;
294 memcpy(fl6.daddr.s6_addr32, addrs->saddr.a6, 16);
295 memcpy(fl6.saddr.s6_addr32, addrs->daddr.a6, 16);
296 pkey = &addrs->saddr.a6;
297 break;
298 default:
299 return;
300 }
301 break;
302 case XFRM_DEV_OFFLOAD_OUT:
303 src = attrs->smac;
304 dst = attrs->dmac;
305 switch (addrs->family) {
306 case AF_INET:
307 fl4.flowi4_proto = x->sel.proto;
308 fl4.daddr = addrs->daddr.a4;
309 fl4.saddr = addrs->saddr.a4;
310 pkey = &addrs->daddr.a4;
311 break;
312 case AF_INET6:
313 fl6.flowi6_proto = x->sel.proto;
314 memcpy(fl6.daddr.s6_addr32, addrs->daddr.a6, 16);
315 memcpy(fl6.saddr.s6_addr32, addrs->saddr.a6, 16);
316 pkey = &addrs->daddr.a6;
317 break;
318 default:
319 return;
320 }
321 break;
322 default:
323 return;
324 }
325
326 ether_addr_copy(src, addr);
327
328 /* Destination can refer to a routed network, so perform FIB lookup
329 * to resolve nexthop and get its MAC. Neighbour resolution is used as
330 * fallback.
331 */
332 switch (addrs->family) {
333 case AF_INET:
334 rt = ip_route_output_key(dev_net(netdev), &fl4);
335 if (IS_ERR(rt))
336 goto neigh;
337
338 if (rt->rt_type != RTN_UNICAST) {
339 ip_rt_put(rt);
340 goto neigh;
341 }
342 rt_dst_entry = &rt->dst;
343 break;
344 case AF_INET6:
345 rt_dst_entry = ipv6_stub->ipv6_dst_lookup_flow(
346 dev_net(netdev), NULL, &fl6, NULL);
347 if (IS_ERR(rt_dst_entry))
348 goto neigh;
349 break;
350 default:
351 return;
352 }
353
354 n = dst_neigh_lookup(rt_dst_entry, pkey);
355 if (!n) {
356 dst_release(rt_dst_entry);
357 goto neigh;
358 }
359
360 neigh_ha_snapshot(addr, n, netdev);
361 ether_addr_copy(dst, addr);
362 dst_release(rt_dst_entry);
363 neigh_release(n);
364 return;
365
366 neigh:
367 n = neigh_lookup(&arp_tbl, pkey, netdev);
368 if (!n) {
369 n = neigh_create(&arp_tbl, pkey, netdev);
370 if (IS_ERR(n))
371 return;
372 neigh_event_send(n, NULL);
373 attrs->drop = true;
374 } else {
375 neigh_ha_snapshot(addr, n, netdev);
376 ether_addr_copy(dst, addr);
377 }
378 neigh_release(n);
379 }
380
mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr * addrs)381 static void mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr *addrs)
382 {
383 /*
384 * State doesn't have subnet prefixes in outer headers.
385 * The match is performed for exaxt source/destination addresses.
386 */
387 memset(addrs->smask.m6, 0xFF, sizeof(__be32) * 4);
388 memset(addrs->dmask.m6, 0xFF, sizeof(__be32) * 4);
389 }
390
mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry * sa_entry,struct mlx5_accel_esp_xfrm_attrs * attrs)391 void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
392 struct mlx5_accel_esp_xfrm_attrs *attrs)
393 {
394 struct xfrm_state *x = sa_entry->x;
395 struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
396 struct aead_geniv_ctx *geniv_ctx;
397 struct crypto_aead *aead;
398 unsigned int crypto_data_len, key_len;
399 int ivsize;
400
401 memset(attrs, 0, sizeof(*attrs));
402
403 /* key */
404 crypto_data_len = (x->aead->alg_key_len + 7) / 8;
405 key_len = crypto_data_len - 4; /* 4 bytes salt at end */
406
407 memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
408 aes_gcm->key_len = key_len * 8;
409
410 /* salt and seq_iv */
411 aead = x->data;
412 geniv_ctx = crypto_aead_ctx(aead);
413 ivsize = crypto_aead_ivsize(aead);
414 memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
415 memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
416 sizeof(aes_gcm->salt));
417
418 attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */
419
420 /* iv len */
421 aes_gcm->icv_len = x->aead->alg_icv_len;
422
423 attrs->dir = x->xso.dir;
424
425 /* esn */
426 if (x->props.flags & XFRM_STATE_ESN) {
427 attrs->replay_esn.trigger = true;
428 attrs->replay_esn.esn = sa_entry->esn_state.esn;
429 attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
430 attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
431 if (attrs->dir == XFRM_DEV_OFFLOAD_OUT)
432 goto skip_replay_window;
433
434 switch (x->replay_esn->replay_window) {
435 case 32:
436 attrs->replay_esn.replay_window =
437 MLX5_IPSEC_ASO_REPLAY_WIN_32BIT;
438 break;
439 case 64:
440 attrs->replay_esn.replay_window =
441 MLX5_IPSEC_ASO_REPLAY_WIN_64BIT;
442 break;
443 case 128:
444 attrs->replay_esn.replay_window =
445 MLX5_IPSEC_ASO_REPLAY_WIN_128BIT;
446 break;
447 case 256:
448 attrs->replay_esn.replay_window =
449 MLX5_IPSEC_ASO_REPLAY_WIN_256BIT;
450 break;
451 default:
452 WARN_ON(true);
453 return;
454 }
455 }
456
457 skip_replay_window:
458 /* spi */
459 attrs->spi = be32_to_cpu(x->id.spi);
460
461 /* source , destination ips */
462 memcpy(&attrs->addrs.saddr, x->props.saddr.a6,
463 sizeof(attrs->addrs.saddr));
464 memcpy(&attrs->addrs.daddr, x->id.daddr.a6, sizeof(attrs->addrs.daddr));
465 attrs->addrs.family = x->props.family;
466 mlx5e_ipsec_state_mask(&attrs->addrs);
467 attrs->type = x->xso.type;
468 attrs->reqid = x->props.reqid;
469 attrs->upspec.dport = ntohs(x->sel.dport);
470 attrs->upspec.dport_mask = ntohs(x->sel.dport_mask);
471 attrs->upspec.sport = ntohs(x->sel.sport);
472 attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
473 attrs->upspec.proto = x->sel.proto;
474 attrs->mode = x->props.mode;
475
476 mlx5e_ipsec_init_limits(sa_entry, attrs);
477 mlx5e_ipsec_init_macs(sa_entry, attrs);
478
479 if (x->encap) {
480 attrs->encap = true;
481 attrs->sport = x->encap->encap_sport;
482 attrs->dport = x->encap->encap_dport;
483 }
484 }
485
mlx5e_xfrm_validate_state(struct mlx5_core_dev * mdev,struct xfrm_state * x,struct netlink_ext_ack * extack)486 static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
487 struct xfrm_state *x,
488 struct netlink_ext_ack *extack)
489 {
490 if (x->props.aalgo != SADB_AALG_NONE) {
491 NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states");
492 return -EINVAL;
493 }
494 if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
495 NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded");
496 return -EINVAL;
497 }
498 if (x->props.calgo != SADB_X_CALG_NONE) {
499 NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states");
500 return -EINVAL;
501 }
502 if (x->props.flags & XFRM_STATE_ESN &&
503 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) {
504 NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states");
505 return -EINVAL;
506 }
507 if (x->props.family != AF_INET &&
508 x->props.family != AF_INET6) {
509 NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded");
510 return -EINVAL;
511 }
512 if (x->id.proto != IPPROTO_ESP) {
513 NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded");
514 return -EINVAL;
515 }
516 if (x->encap) {
517 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) {
518 NL_SET_ERR_MSG_MOD(extack,
519 "Encapsulation is not supported");
520 return -EINVAL;
521 }
522
523 if (x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
524 NL_SET_ERR_MSG_MOD(extack, "Encapsulation other than UDP is not supported");
525 return -EINVAL;
526 }
527
528 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) {
529 NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in packet offload mode only");
530 return -EINVAL;
531 }
532
533 if (x->props.mode != XFRM_MODE_TRANSPORT) {
534 NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in transport mode only");
535 return -EINVAL;
536 }
537 }
538 if (!x->aead) {
539 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead");
540 return -EINVAL;
541 }
542 if (x->aead->alg_icv_len != 128) {
543 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit");
544 return -EINVAL;
545 }
546 if ((x->aead->alg_key_len != 128 + 32) &&
547 (x->aead->alg_key_len != 256 + 32)) {
548 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit");
549 return -EINVAL;
550 }
551 if (x->tfcpad) {
552 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding");
553 return -EINVAL;
554 }
555 if (!x->geniv) {
556 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv");
557 return -EINVAL;
558 }
559 if (strcmp(x->geniv, "seqiv")) {
560 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv");
561 return -EINVAL;
562 }
563
564 if (x->sel.proto != IPPROTO_IP && x->sel.proto != IPPROTO_UDP &&
565 x->sel.proto != IPPROTO_TCP) {
566 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
567 return -EINVAL;
568 }
569
570 if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
571 NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
572 return -EINVAL;
573 }
574
575 switch (x->xso.type) {
576 case XFRM_DEV_OFFLOAD_CRYPTO:
577 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
578 NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported");
579 return -EINVAL;
580 }
581
582 break;
583 case XFRM_DEV_OFFLOAD_PACKET:
584 if (!(mlx5_ipsec_device_caps(mdev) &
585 MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
586 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
587 return -EINVAL;
588 }
589
590 if (x->props.mode == XFRM_MODE_TUNNEL &&
591 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
592 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
593 return -EINVAL;
594 }
595
596 if (x->replay_esn && x->xso.dir == XFRM_DEV_OFFLOAD_IN &&
597 x->replay_esn->replay_window != 32 &&
598 x->replay_esn->replay_window != 64 &&
599 x->replay_esn->replay_window != 128 &&
600 x->replay_esn->replay_window != 256) {
601 NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size");
602 return -EINVAL;
603 }
604
605 if (!x->props.reqid) {
606 NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid");
607 return -EINVAL;
608 }
609
610 if (x->lft.soft_byte_limit >= x->lft.hard_byte_limit &&
611 x->lft.hard_byte_limit != XFRM_INF) {
612 /* XFRM stack doesn't prevent such configuration :(. */
613 NL_SET_ERR_MSG_MOD(extack, "Hard byte limit must be greater than soft one");
614 return -EINVAL;
615 }
616
617 if (!x->lft.soft_byte_limit || !x->lft.hard_byte_limit) {
618 NL_SET_ERR_MSG_MOD(extack, "Soft/hard byte limits can't be 0");
619 return -EINVAL;
620 }
621
622 if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit &&
623 x->lft.hard_packet_limit != XFRM_INF) {
624 /* XFRM stack doesn't prevent such configuration :(. */
625 NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one");
626 return -EINVAL;
627 }
628
629 if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) {
630 NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0");
631 return -EINVAL;
632 }
633 break;
634 default:
635 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
636 return -EINVAL;
637 }
638 return 0;
639 }
640
mlx5e_ipsec_modify_state(struct work_struct * _work)641 static void mlx5e_ipsec_modify_state(struct work_struct *_work)
642 {
643 struct mlx5e_ipsec_work *work =
644 container_of(_work, struct mlx5e_ipsec_work, work);
645 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
646 struct mlx5_accel_esp_xfrm_attrs *attrs;
647
648 attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs;
649
650 mlx5_accel_esp_modify_xfrm(sa_entry, attrs);
651 }
652
mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry * sa_entry)653 static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
654 {
655 struct xfrm_state *x = sa_entry->x;
656
657 if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO ||
658 x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
659 return;
660
661 if (x->props.flags & XFRM_STATE_ESN) {
662 sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn;
663 return;
664 }
665
666 sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
667 }
668
mlx5e_ipsec_handle_netdev_event(struct work_struct * _work)669 static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
670 {
671 struct mlx5e_ipsec_work *work =
672 container_of(_work, struct mlx5e_ipsec_work, work);
673 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
674 struct mlx5e_ipsec_netevent_data *data = work->data;
675 struct mlx5_accel_esp_xfrm_attrs *attrs;
676
677 attrs = &sa_entry->attrs;
678
679 switch (attrs->dir) {
680 case XFRM_DEV_OFFLOAD_IN:
681 ether_addr_copy(attrs->smac, data->addr);
682 break;
683 case XFRM_DEV_OFFLOAD_OUT:
684 ether_addr_copy(attrs->dmac, data->addr);
685 break;
686 default:
687 WARN_ON_ONCE(true);
688 }
689 attrs->drop = false;
690 mlx5e_accel_ipsec_fs_modify(sa_entry);
691 }
692
mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry * sa_entry)693 static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
694 {
695 struct xfrm_state *x = sa_entry->x;
696 struct mlx5e_ipsec_work *work;
697 void *data = NULL;
698
699 switch (x->xso.type) {
700 case XFRM_DEV_OFFLOAD_CRYPTO:
701 if (!(x->props.flags & XFRM_STATE_ESN))
702 return 0;
703 break;
704 case XFRM_DEV_OFFLOAD_PACKET:
705 if (x->props.mode != XFRM_MODE_TUNNEL)
706 return 0;
707 break;
708 default:
709 break;
710 }
711
712 work = kzalloc(sizeof(*work), GFP_KERNEL);
713 if (!work)
714 return -ENOMEM;
715
716 switch (x->xso.type) {
717 case XFRM_DEV_OFFLOAD_CRYPTO:
718 data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
719 if (!data)
720 goto free_work;
721
722 INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
723 break;
724 case XFRM_DEV_OFFLOAD_PACKET:
725 data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data),
726 GFP_KERNEL);
727 if (!data)
728 goto free_work;
729
730 INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
731 break;
732 default:
733 break;
734 }
735
736 work->data = data;
737 work->sa_entry = sa_entry;
738 sa_entry->work = work;
739 return 0;
740
741 free_work:
742 kfree(work);
743 return -ENOMEM;
744 }
745
mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry * sa_entry)746 static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
747 {
748 struct xfrm_state *x = sa_entry->x;
749 struct mlx5e_ipsec_dwork *dwork;
750
751 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
752 return 0;
753
754 if (x->lft.soft_packet_limit == XFRM_INF &&
755 x->lft.hard_packet_limit == XFRM_INF &&
756 x->lft.soft_byte_limit == XFRM_INF &&
757 x->lft.hard_byte_limit == XFRM_INF)
758 return 0;
759
760 dwork = kzalloc(sizeof(*dwork), GFP_KERNEL);
761 if (!dwork)
762 return -ENOMEM;
763
764 dwork->sa_entry = sa_entry;
765 INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_sw_limits);
766 sa_entry->dwork = dwork;
767 return 0;
768 }
769
mlx5e_xfrm_add_state(struct net_device * dev,struct xfrm_state * x,struct netlink_ext_ack * extack)770 static int mlx5e_xfrm_add_state(struct net_device *dev,
771 struct xfrm_state *x,
772 struct netlink_ext_ack *extack)
773 {
774 struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
775 bool allow_tunnel_mode = false;
776 struct mlx5e_ipsec *ipsec;
777 struct mlx5e_priv *priv;
778 gfp_t gfp;
779 int err;
780
781 priv = netdev_priv(dev);
782 if (!priv->ipsec)
783 return -EOPNOTSUPP;
784
785 ipsec = priv->ipsec;
786 gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL;
787 sa_entry = kzalloc(sizeof(*sa_entry), gfp);
788 if (!sa_entry)
789 return -ENOMEM;
790
791 sa_entry->x = x;
792 sa_entry->dev = dev;
793 sa_entry->ipsec = ipsec;
794 /* Check if this SA is originated from acquire flow temporary SA */
795 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
796 goto out;
797
798 err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
799 if (err)
800 goto err_xfrm;
801
802 if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
803 err = -EBUSY;
804 goto err_xfrm;
805 }
806
807 if (mlx5_eswitch_block_mode(priv->mdev))
808 goto unblock_ipsec;
809
810 if (x->props.mode == XFRM_MODE_TUNNEL &&
811 x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
812 allow_tunnel_mode = mlx5e_ipsec_fs_tunnel_allowed(sa_entry);
813 if (!allow_tunnel_mode) {
814 NL_SET_ERR_MSG_MOD(extack,
815 "Packet offload tunnel mode is disabled due to encap settings");
816 err = -EINVAL;
817 goto unblock_mode;
818 }
819 }
820
821 /* check esn */
822 if (x->props.flags & XFRM_STATE_ESN)
823 mlx5e_ipsec_update_esn_state(sa_entry);
824 else
825 /* According to RFC4303, section "3.3.3. Sequence Number Generation",
826 * the first packet sent using a given SA will contain a sequence
827 * number of 1.
828 */
829 sa_entry->esn_state.esn = 1;
830
831 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
832
833 err = mlx5_ipsec_create_work(sa_entry);
834 if (err)
835 goto unblock_encap;
836
837 err = mlx5e_ipsec_create_dwork(sa_entry);
838 if (err)
839 goto release_work;
840
841 /* create hw context */
842 err = mlx5_ipsec_create_sa_ctx(sa_entry);
843 if (err)
844 goto release_dwork;
845
846 err = mlx5e_accel_ipsec_fs_add_rule(sa_entry);
847 if (err)
848 goto err_hw_ctx;
849
850 /* We use *_bh() variant because xfrm_timer_handler(), which runs
851 * in softirq context, can reach our state delete logic and we need
852 * xa_erase_bh() there.
853 */
854 err = xa_insert_bh(&ipsec->sadb, sa_entry->ipsec_obj_id, sa_entry,
855 GFP_KERNEL);
856 if (err)
857 goto err_add_rule;
858
859 mlx5e_ipsec_set_esn_ops(sa_entry);
860
861 if (sa_entry->dwork)
862 queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
863 MLX5_IPSEC_RESCHED);
864
865 if (allow_tunnel_mode) {
866 xa_lock_bh(&ipsec->sadb);
867 __xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
868 MLX5E_IPSEC_TUNNEL_SA);
869 xa_unlock_bh(&ipsec->sadb);
870 }
871
872 out:
873 x->xso.offload_handle = (unsigned long)sa_entry;
874 if (allow_tunnel_mode)
875 mlx5_eswitch_unblock_encap(priv->mdev);
876
877 mlx5_eswitch_unblock_mode(priv->mdev);
878
879 return 0;
880
881 err_add_rule:
882 mlx5e_accel_ipsec_fs_del_rule(sa_entry);
883 err_hw_ctx:
884 mlx5_ipsec_free_sa_ctx(sa_entry);
885 release_dwork:
886 kfree(sa_entry->dwork);
887 release_work:
888 if (sa_entry->work)
889 kfree(sa_entry->work->data);
890 kfree(sa_entry->work);
891 unblock_encap:
892 if (allow_tunnel_mode)
893 mlx5_eswitch_unblock_encap(priv->mdev);
894 unblock_mode:
895 mlx5_eswitch_unblock_mode(priv->mdev);
896 unblock_ipsec:
897 mlx5_eswitch_unblock_ipsec(priv->mdev);
898 err_xfrm:
899 kfree(sa_entry);
900 NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state");
901 return err;
902 }
903
mlx5e_xfrm_del_state(struct net_device * dev,struct xfrm_state * x)904 static void mlx5e_xfrm_del_state(struct net_device *dev, struct xfrm_state *x)
905 {
906 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
907 struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
908 struct mlx5e_ipsec_sa_entry *old;
909
910 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
911 return;
912
913 old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
914 WARN_ON(old != sa_entry);
915 }
916
mlx5e_xfrm_free_state(struct net_device * dev,struct xfrm_state * x)917 static void mlx5e_xfrm_free_state(struct net_device *dev, struct xfrm_state *x)
918 {
919 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
920 struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
921
922 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
923 goto sa_entry_free;
924
925 if (sa_entry->work)
926 cancel_work_sync(&sa_entry->work->work);
927
928 if (sa_entry->dwork)
929 cancel_delayed_work_sync(&sa_entry->dwork->dwork);
930
931 mlx5e_accel_ipsec_fs_del_rule(sa_entry);
932 mlx5_ipsec_free_sa_ctx(sa_entry);
933 kfree(sa_entry->dwork);
934 if (sa_entry->work)
935 kfree(sa_entry->work->data);
936 kfree(sa_entry->work);
937 mlx5_eswitch_unblock_ipsec(ipsec->mdev);
938 sa_entry_free:
939 kfree(sa_entry);
940 }
941
mlx5e_ipsec_netevent_event(struct notifier_block * nb,unsigned long event,void * ptr)942 static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
943 unsigned long event, void *ptr)
944 {
945 struct mlx5_accel_esp_xfrm_attrs *attrs;
946 struct mlx5e_ipsec_netevent_data *data;
947 struct mlx5e_ipsec_sa_entry *sa_entry;
948 struct mlx5e_ipsec *ipsec;
949 struct neighbour *n = ptr;
950 unsigned long idx;
951
952 if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
953 return NOTIFY_DONE;
954
955 ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
956 xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
957 attrs = &sa_entry->attrs;
958
959 if (attrs->addrs.family == AF_INET) {
960 if (!neigh_key_eq32(n, &attrs->addrs.saddr.a4) &&
961 !neigh_key_eq32(n, &attrs->addrs.daddr.a4))
962 continue;
963 } else {
964 if (!neigh_key_eq128(n, &attrs->addrs.saddr.a4) &&
965 !neigh_key_eq128(n, &attrs->addrs.daddr.a4))
966 continue;
967 }
968
969 data = sa_entry->work->data;
970
971 neigh_ha_snapshot(data->addr, n, sa_entry->dev);
972 queue_work(ipsec->wq, &sa_entry->work->work);
973 }
974
975 return NOTIFY_DONE;
976 }
977
mlx5e_ipsec_init(struct mlx5e_priv * priv)978 void mlx5e_ipsec_init(struct mlx5e_priv *priv)
979 {
980 struct mlx5e_ipsec *ipsec;
981 int ret = -ENOMEM;
982
983 if (!mlx5_ipsec_device_caps(priv->mdev)) {
984 netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
985 return;
986 }
987
988 ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
989 if (!ipsec)
990 return;
991
992 xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC);
993 ipsec->mdev = priv->mdev;
994 init_completion(&ipsec->comp);
995 ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0,
996 priv->netdev->name);
997 if (!ipsec->wq)
998 goto err_wq;
999
1000 if (mlx5_ipsec_device_caps(priv->mdev) &
1001 MLX5_IPSEC_CAP_PACKET_OFFLOAD) {
1002 ret = mlx5e_ipsec_aso_init(ipsec);
1003 if (ret)
1004 goto err_aso;
1005 }
1006
1007 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
1008 ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
1009 ret = register_netevent_notifier(&ipsec->netevent_nb);
1010 if (ret)
1011 goto clear_aso;
1012 }
1013
1014 ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv);
1015 ret = mlx5e_accel_ipsec_fs_init(ipsec, &priv->devcom);
1016 if (ret)
1017 goto err_fs_init;
1018
1019 ipsec->fs = priv->fs;
1020 priv->ipsec = ipsec;
1021 netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
1022 return;
1023
1024 err_fs_init:
1025 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
1026 unregister_netevent_notifier(&ipsec->netevent_nb);
1027 clear_aso:
1028 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
1029 mlx5e_ipsec_aso_cleanup(ipsec);
1030 err_aso:
1031 destroy_workqueue(ipsec->wq);
1032 err_wq:
1033 kfree(ipsec);
1034 mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret);
1035 return;
1036 }
1037
mlx5e_ipsec_cleanup(struct mlx5e_priv * priv)1038 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
1039 {
1040 struct mlx5e_ipsec *ipsec = priv->ipsec;
1041
1042 if (!ipsec)
1043 return;
1044
1045 mlx5e_accel_ipsec_fs_cleanup(ipsec);
1046 if (ipsec->netevent_nb.notifier_call) {
1047 unregister_netevent_notifier(&ipsec->netevent_nb);
1048 ipsec->netevent_nb.notifier_call = NULL;
1049 }
1050 if (ipsec->aso)
1051 mlx5e_ipsec_aso_cleanup(ipsec);
1052 destroy_workqueue(ipsec->wq);
1053 kfree(ipsec);
1054 priv->ipsec = NULL;
1055 }
1056
mlx5e_xfrm_advance_esn_state(struct xfrm_state * x)1057 static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
1058 {
1059 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
1060 struct mlx5e_ipsec_work *work = sa_entry->work;
1061 struct mlx5e_ipsec_sa_entry *sa_entry_shadow;
1062 bool need_update;
1063
1064 need_update = mlx5e_ipsec_update_esn_state(sa_entry);
1065 if (!need_update)
1066 return;
1067
1068 sa_entry_shadow = work->data;
1069 memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow));
1070 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs);
1071 queue_work(sa_entry->ipsec->wq, &work->work);
1072 }
1073
mlx5e_xfrm_update_stats(struct xfrm_state * x)1074 static void mlx5e_xfrm_update_stats(struct xfrm_state *x)
1075 {
1076 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
1077 struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
1078 struct net *net = dev_net(x->xso.dev);
1079 u64 trailer_packets = 0, trailer_bytes = 0;
1080 u64 replay_packets = 0, replay_bytes = 0;
1081 u64 auth_packets = 0, auth_bytes = 0;
1082 u64 success_packets, success_bytes;
1083 u64 packets, bytes, lastuse;
1084 size_t headers;
1085
1086 lockdep_assert(lockdep_is_held(&x->lock) ||
1087 lockdep_is_held(&net->xfrm.xfrm_cfg_mutex) ||
1088 lockdep_is_held(&net->xfrm.xfrm_state_lock));
1089
1090 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
1091 return;
1092
1093 if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1094 mlx5_fc_query_cached(ipsec_rule->auth.fc, &auth_bytes,
1095 &auth_packets, &lastuse);
1096 x->stats.integrity_failed += auth_packets;
1097 XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR, auth_packets);
1098
1099 mlx5_fc_query_cached(ipsec_rule->trailer.fc, &trailer_bytes,
1100 &trailer_packets, &lastuse);
1101 XFRM_ADD_STATS(net, LINUX_MIB_XFRMINHDRERROR, trailer_packets);
1102 }
1103
1104 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1105 return;
1106
1107 if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) {
1108 mlx5_fc_query_cached(ipsec_rule->replay.fc, &replay_bytes,
1109 &replay_packets, &lastuse);
1110 x->stats.replay += replay_packets;
1111 XFRM_ADD_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR, replay_packets);
1112 }
1113
1114 mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
1115 success_packets = packets - auth_packets - trailer_packets - replay_packets;
1116 x->curlft.packets += success_packets;
1117 /* NIC counts all bytes passed through flow steering and doesn't have
1118 * an ability to count payload data size which is needed for SA.
1119 *
1120 * To overcome HW limitestion, let's approximate the payload size
1121 * by removing always available headers.
1122 */
1123 headers = sizeof(struct ethhdr);
1124 if (sa_entry->attrs.addrs.family == AF_INET)
1125 headers += sizeof(struct iphdr);
1126 else
1127 headers += sizeof(struct ipv6hdr);
1128
1129 success_bytes = bytes - auth_bytes - trailer_bytes - replay_bytes;
1130 x->curlft.bytes += success_bytes - headers * success_packets;
1131 }
1132
word_to_mask(int prefix)1133 static __be32 word_to_mask(int prefix)
1134 {
1135 if (prefix < 0)
1136 return 0;
1137
1138 if (!prefix || prefix > 31)
1139 return cpu_to_be32(0xFFFFFFFF);
1140
1141 return cpu_to_be32(((1U << prefix) - 1) << (32 - prefix));
1142 }
1143
mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr * addrs,struct xfrm_selector * sel)1144 static void mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr *addrs,
1145 struct xfrm_selector *sel)
1146 {
1147 int i;
1148
1149 if (addrs->family == AF_INET) {
1150 addrs->smask.m4 = word_to_mask(sel->prefixlen_s);
1151 addrs->saddr.a4 &= addrs->smask.m4;
1152 addrs->dmask.m4 = word_to_mask(sel->prefixlen_d);
1153 addrs->daddr.a4 &= addrs->dmask.m4;
1154 return;
1155 }
1156
1157 for (i = 0; i < 4; i++) {
1158 if (sel->prefixlen_s != 32 * i)
1159 addrs->smask.m6[i] =
1160 word_to_mask(sel->prefixlen_s - 32 * i);
1161 addrs->saddr.a6[i] &= addrs->smask.m6[i];
1162
1163 if (sel->prefixlen_d != 32 * i)
1164 addrs->dmask.m6[i] =
1165 word_to_mask(sel->prefixlen_d - 32 * i);
1166 addrs->daddr.a6[i] &= addrs->dmask.m6[i];
1167 }
1168 }
1169
mlx5e_xfrm_validate_policy(struct mlx5_core_dev * mdev,struct xfrm_policy * x,struct netlink_ext_ack * extack)1170 static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
1171 struct xfrm_policy *x,
1172 struct netlink_ext_ack *extack)
1173 {
1174 struct xfrm_selector *sel = &x->selector;
1175
1176 if (x->type != XFRM_POLICY_TYPE_MAIN) {
1177 NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types");
1178 return -EINVAL;
1179 }
1180
1181 /* Please pay attention that we support only one template */
1182 if (x->xfrm_nr > 1) {
1183 NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template");
1184 return -EINVAL;
1185 }
1186
1187 if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN &&
1188 x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) {
1189 NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy");
1190 return -EINVAL;
1191 }
1192
1193 if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP &&
1194 addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) {
1195 NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0");
1196 return -EINVAL;
1197 }
1198
1199 if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) {
1200 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
1201 return -EINVAL;
1202 }
1203
1204 if (x->selector.proto != IPPROTO_IP &&
1205 x->selector.proto != IPPROTO_UDP &&
1206 x->selector.proto != IPPROTO_TCP) {
1207 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
1208 return -EINVAL;
1209 }
1210
1211 if (x->priority) {
1212 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) {
1213 NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority");
1214 return -EINVAL;
1215 }
1216
1217 if (x->priority == U32_MAX) {
1218 NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority");
1219 return -EINVAL;
1220 }
1221 }
1222
1223 if (x->xdo.type == XFRM_DEV_OFFLOAD_PACKET &&
1224 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
1225 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
1226 return -EINVAL;
1227 }
1228
1229 return 0;
1230 }
1231
1232 static void
mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry * pol_entry,struct mlx5_accel_pol_xfrm_attrs * attrs)1233 mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
1234 struct mlx5_accel_pol_xfrm_attrs *attrs)
1235 {
1236 struct xfrm_policy *x = pol_entry->x;
1237 struct xfrm_selector *sel;
1238
1239 sel = &x->selector;
1240 memset(attrs, 0, sizeof(*attrs));
1241
1242 memcpy(&attrs->addrs.saddr, sel->saddr.a6, sizeof(attrs->addrs.saddr));
1243 memcpy(&attrs->addrs.daddr, sel->daddr.a6, sizeof(attrs->addrs.daddr));
1244 attrs->addrs.family = sel->family;
1245 mlx5e_ipsec_policy_mask(&attrs->addrs, sel);
1246 attrs->dir = x->xdo.dir;
1247 attrs->action = x->action;
1248 attrs->type = XFRM_DEV_OFFLOAD_PACKET;
1249 attrs->reqid = x->xfrm_vec[0].reqid;
1250 attrs->upspec.dport = ntohs(sel->dport);
1251 attrs->upspec.dport_mask = ntohs(sel->dport_mask);
1252 attrs->upspec.sport = ntohs(sel->sport);
1253 attrs->upspec.sport_mask = ntohs(sel->sport_mask);
1254 attrs->upspec.proto = sel->proto;
1255 attrs->prio = x->priority;
1256 }
1257
mlx5e_xfrm_add_policy(struct xfrm_policy * x,struct netlink_ext_ack * extack)1258 static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
1259 struct netlink_ext_ack *extack)
1260 {
1261 struct net_device *netdev = x->xdo.dev;
1262 struct mlx5e_ipsec_pol_entry *pol_entry;
1263 struct mlx5e_priv *priv;
1264 int err;
1265
1266 priv = netdev_priv(netdev);
1267 if (!priv->ipsec) {
1268 NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload");
1269 return -EOPNOTSUPP;
1270 }
1271
1272 err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack);
1273 if (err)
1274 return err;
1275
1276 pol_entry = kzalloc(sizeof(*pol_entry), GFP_KERNEL);
1277 if (!pol_entry)
1278 return -ENOMEM;
1279
1280 pol_entry->x = x;
1281 pol_entry->ipsec = priv->ipsec;
1282
1283 if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
1284 err = -EBUSY;
1285 goto ipsec_busy;
1286 }
1287
1288 mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs);
1289 err = mlx5e_accel_ipsec_fs_add_pol(pol_entry);
1290 if (err)
1291 goto err_fs;
1292
1293 x->xdo.offload_handle = (unsigned long)pol_entry;
1294 return 0;
1295
1296 err_fs:
1297 mlx5_eswitch_unblock_ipsec(priv->mdev);
1298 ipsec_busy:
1299 kfree(pol_entry);
1300 NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy");
1301 return err;
1302 }
1303
mlx5e_xfrm_del_policy(struct xfrm_policy * x)1304 static void mlx5e_xfrm_del_policy(struct xfrm_policy *x)
1305 {
1306 struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1307
1308 mlx5e_accel_ipsec_fs_del_pol(pol_entry);
1309 mlx5_eswitch_unblock_ipsec(pol_entry->ipsec->mdev);
1310 }
1311
mlx5e_xfrm_free_policy(struct xfrm_policy * x)1312 static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
1313 {
1314 struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1315
1316 kfree(pol_entry);
1317 }
1318
1319 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
1320 .xdo_dev_state_add = mlx5e_xfrm_add_state,
1321 .xdo_dev_state_delete = mlx5e_xfrm_del_state,
1322 .xdo_dev_state_free = mlx5e_xfrm_free_state,
1323 .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
1324
1325 .xdo_dev_state_update_stats = mlx5e_xfrm_update_stats,
1326 .xdo_dev_policy_add = mlx5e_xfrm_add_policy,
1327 .xdo_dev_policy_delete = mlx5e_xfrm_del_policy,
1328 .xdo_dev_policy_free = mlx5e_xfrm_free_policy,
1329 };
1330
mlx5e_ipsec_build_netdev(struct mlx5e_priv * priv)1331 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
1332 {
1333 struct mlx5_core_dev *mdev = priv->mdev;
1334 struct net_device *netdev = priv->netdev;
1335
1336 if (!mlx5_ipsec_device_caps(mdev))
1337 return;
1338
1339 mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
1340
1341 netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
1342 netdev->features |= NETIF_F_HW_ESP;
1343 netdev->hw_enc_features |= NETIF_F_HW_ESP;
1344
1345 if (!MLX5_CAP_ETH(mdev, swp_csum)) {
1346 mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
1347 return;
1348 }
1349
1350 netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
1351 netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
1352
1353 if (!MLX5_CAP_ETH(mdev, swp_lso)) {
1354 mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
1355 return;
1356 }
1357
1358 netdev->gso_partial_features |= NETIF_F_GSO_ESP;
1359 mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
1360 netdev->features |= NETIF_F_GSO_ESP;
1361 netdev->hw_features |= NETIF_F_GSO_ESP;
1362 netdev->hw_enc_features |= NETIF_F_GSO_ESP;
1363 }
1364