xref: /linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c (revision ec8a42e7343234802b9054874fe01810880289ce)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/netdevice.h>
5 #include <linux/netlink.h>
6 #include <linux/random.h>
7 #include <net/vxlan.h>
8 
9 #include "reg.h"
10 #include "spectrum.h"
11 #include "spectrum_nve.h"
12 
13 /* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B)
14  *
15  * In the worst case - where we have a VLAN tag on the outer Ethernet
16  * header and IPv6 in overlay and underlay - we need to parse 128 bytes
17  */
18 #define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128
19 #define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96
20 
21 #define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS	(VXLAN_F_UDP_ZERO_CSUM_TX | \
22 						 VXLAN_F_LEARN)
23 
24 static bool mlxsw_sp_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
25 					   const struct mlxsw_sp_nve_params *params,
26 					   struct netlink_ext_ack *extack)
27 {
28 	struct vxlan_dev *vxlan = netdev_priv(params->dev);
29 	struct vxlan_config *cfg = &vxlan->cfg;
30 
31 	if (cfg->saddr.sa.sa_family != AF_INET) {
32 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only IPv4 underlay is supported");
33 		return false;
34 	}
35 
36 	if (vxlan_addr_multicast(&cfg->remote_ip)) {
37 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported");
38 		return false;
39 	}
40 
41 	if (vxlan_addr_any(&cfg->saddr)) {
42 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Source address must be specified");
43 		return false;
44 	}
45 
46 	if (cfg->remote_ifindex) {
47 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Local interface is not supported");
48 		return false;
49 	}
50 
51 	if (cfg->port_min || cfg->port_max) {
52 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only default UDP source port range is supported");
53 		return false;
54 	}
55 
56 	if (cfg->tos != 1) {
57 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: TOS must be configured to inherit");
58 		return false;
59 	}
60 
61 	if (cfg->flags & VXLAN_F_TTL_INHERIT) {
62 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to inherit");
63 		return false;
64 	}
65 
66 	if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) {
67 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported");
68 		return false;
69 	}
70 
71 	if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS) {
72 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag");
73 		return false;
74 	}
75 
76 	if (cfg->ttl == 0) {
77 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to 0");
78 		return false;
79 	}
80 
81 	if (cfg->label != 0) {
82 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: Flow label must be configured to 0");
83 		return false;
84 	}
85 
86 	return true;
87 }
88 
89 static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
90 					    const struct mlxsw_sp_nve_params *params,
91 					    struct netlink_ext_ack *extack)
92 {
93 	if (params->ethertype == ETH_P_8021AD) {
94 		NL_SET_ERR_MSG_MOD(extack, "VxLAN: 802.1ad bridge is not supported with VxLAN");
95 		return false;
96 	}
97 
98 	return mlxsw_sp_nve_vxlan_can_offload(nve, params, extack);
99 }
100 
101 static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
102 				      const struct mlxsw_sp_nve_params *params,
103 				      struct mlxsw_sp_nve_config *config)
104 {
105 	struct vxlan_dev *vxlan = netdev_priv(params->dev);
106 	struct vxlan_config *cfg = &vxlan->cfg;
107 
108 	config->type = MLXSW_SP_NVE_TYPE_VXLAN;
109 	config->ttl = cfg->ttl;
110 	config->flowlabel = cfg->label;
111 	config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0;
112 	config->ul_tb_id = RT_TABLE_MAIN;
113 	config->ul_proto = MLXSW_SP_L3_PROTO_IPV4;
114 	config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr;
115 	config->udp_dport = cfg->dst_port;
116 	config->ethertype = params->ethertype;
117 }
118 
119 static int __mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
120 				      unsigned int parsing_depth,
121 				      __be16 udp_dport)
122 {
123 	char mprs_pl[MLXSW_REG_MPRS_LEN];
124 
125 	mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport));
126 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
127 }
128 
129 static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
130 				    __be16 udp_dport)
131 {
132 	int parsing_depth = mlxsw_sp->nve->inc_parsing_depth_refs ?
133 				MLXSW_SP_NVE_VXLAN_PARSING_DEPTH :
134 				MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH;
135 
136 	return __mlxsw_sp_nve_parsing_set(mlxsw_sp, parsing_depth, udp_dport);
137 }
138 
139 static int
140 __mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp,
141 				     __be16 udp_dport)
142 {
143 	int err;
144 
145 	mlxsw_sp->nve->inc_parsing_depth_refs++;
146 
147 	err = mlxsw_sp_nve_parsing_set(mlxsw_sp, udp_dport);
148 	if (err)
149 		goto err_nve_parsing_set;
150 	return 0;
151 
152 err_nve_parsing_set:
153 	mlxsw_sp->nve->inc_parsing_depth_refs--;
154 	return err;
155 }
156 
157 static void
158 __mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp,
159 				     __be16 udp_dport)
160 {
161 	mlxsw_sp->nve->inc_parsing_depth_refs--;
162 	mlxsw_sp_nve_parsing_set(mlxsw_sp, udp_dport);
163 }
164 
165 int mlxsw_sp_nve_inc_parsing_depth_get(struct mlxsw_sp *mlxsw_sp)
166 {
167 	__be16 udp_dport = mlxsw_sp->nve->config.udp_dport;
168 
169 	return __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, udp_dport);
170 }
171 
172 void mlxsw_sp_nve_inc_parsing_depth_put(struct mlxsw_sp *mlxsw_sp)
173 {
174 	__be16 udp_dport = mlxsw_sp->nve->config.udp_dport;
175 
176 	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, udp_dport);
177 }
178 
179 static void
180 mlxsw_sp_nve_vxlan_config_prepare(char *tngcr_pl,
181 				  const struct mlxsw_sp_nve_config *config)
182 {
183 	u8 udp_sport;
184 
185 	mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true,
186 			     config->ttl);
187 	/* VxLAN driver's default UDP source port range is 32768 (0x8000)
188 	 * to 60999 (0xee47). Set the upper 8 bits of the UDP source port
189 	 * to a random number between 0x80 and 0xee
190 	 */
191 	get_random_bytes(&udp_sport, sizeof(udp_sport));
192 	udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80;
193 	mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport);
194 	mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4));
195 }
196 
197 static int
198 mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp,
199 			       const struct mlxsw_sp_nve_config *config)
200 {
201 	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
202 	u16 ul_vr_id;
203 	int err;
204 
205 	err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id,
206 					  &ul_vr_id);
207 	if (err)
208 		return err;
209 
210 	mlxsw_sp_nve_vxlan_config_prepare(tngcr_pl, config);
211 	mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en);
212 	mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id);
213 
214 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
215 }
216 
217 static void mlxsw_sp1_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp)
218 {
219 	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
220 
221 	mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0);
222 
223 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
224 }
225 
226 static int mlxsw_sp1_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp,
227 					unsigned int tunnel_index)
228 {
229 	char rtdp_pl[MLXSW_REG_RTDP_LEN];
230 
231 	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index);
232 
233 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
234 }
235 
236 static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve,
237 				    const struct mlxsw_sp_nve_config *config)
238 {
239 	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
240 	int err;
241 
242 	err = __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, config->udp_dport);
243 	if (err)
244 		return err;
245 
246 	err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config);
247 	if (err)
248 		goto err_config_set;
249 
250 	err = mlxsw_sp1_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index);
251 	if (err)
252 		goto err_rtdp_set;
253 
254 	err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id,
255 						config->ul_proto,
256 						&config->ul_sip,
257 						nve->tunnel_index);
258 	if (err)
259 		goto err_promote_decap;
260 
261 	return 0;
262 
263 err_promote_decap:
264 err_rtdp_set:
265 	mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
266 err_config_set:
267 	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
268 	return err;
269 }
270 
271 static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
272 {
273 	struct mlxsw_sp_nve_config *config = &nve->config;
274 	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
275 
276 	mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
277 					 config->ul_proto, &config->ul_sip);
278 	mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
279 	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
280 }
281 
282 static int
283 mlxsw_sp_nve_vxlan_fdb_replay(const struct net_device *nve_dev, __be32 vni,
284 			      struct netlink_ext_ack *extack)
285 {
286 	if (WARN_ON(!netif_is_vxlan(nve_dev)))
287 		return -EINVAL;
288 	return vxlan_fdb_replay(nve_dev, vni, &mlxsw_sp_switchdev_notifier,
289 				extack);
290 }
291 
292 static void
293 mlxsw_sp_nve_vxlan_clear_offload(const struct net_device *nve_dev, __be32 vni)
294 {
295 	if (WARN_ON(!netif_is_vxlan(nve_dev)))
296 		return;
297 	vxlan_fdb_clear_offload(nve_dev, vni);
298 }
299 
300 const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = {
301 	.type		= MLXSW_SP_NVE_TYPE_VXLAN,
302 	.can_offload	= mlxsw_sp1_nve_vxlan_can_offload,
303 	.nve_config	= mlxsw_sp_nve_vxlan_config,
304 	.init		= mlxsw_sp1_nve_vxlan_init,
305 	.fini		= mlxsw_sp1_nve_vxlan_fini,
306 	.fdb_replay	= mlxsw_sp_nve_vxlan_fdb_replay,
307 	.fdb_clear_offload = mlxsw_sp_nve_vxlan_clear_offload,
308 };
309 
310 static bool mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp,
311 					     bool learning_en)
312 {
313 	char tnpc_pl[MLXSW_REG_TNPC_LEN];
314 
315 	mlxsw_reg_tnpc_pack(tnpc_pl, MLXSW_REG_TUNNEL_PORT_NVE,
316 			    learning_en);
317 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnpc), tnpc_pl);
318 }
319 
320 static int
321 mlxsw_sp2_nve_decap_ethertype_set(struct mlxsw_sp *mlxsw_sp, u16 ethertype)
322 {
323 	char spvid_pl[MLXSW_REG_SPVID_LEN] = {};
324 	u8 sver_type;
325 	int err;
326 
327 	mlxsw_reg_spvid_tport_set(spvid_pl, true);
328 	mlxsw_reg_spvid_local_port_set(spvid_pl,
329 				       MLXSW_REG_TUNNEL_PORT_NVE);
330 	err = mlxsw_sp_ethtype_to_sver_type(ethertype, &sver_type);
331 	if (err)
332 		return err;
333 
334 	mlxsw_reg_spvid_et_vlan_set(spvid_pl, sver_type);
335 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl);
336 }
337 
338 static int
339 mlxsw_sp2_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp,
340 			       const struct mlxsw_sp_nve_config *config)
341 {
342 	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
343 	char spvtr_pl[MLXSW_REG_SPVTR_LEN];
344 	u16 ul_rif_index;
345 	int err;
346 
347 	err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, config->ul_tb_id,
348 					 &ul_rif_index);
349 	if (err)
350 		return err;
351 	mlxsw_sp->nve->ul_rif_index = ul_rif_index;
352 
353 	err = mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, config->learning_en);
354 	if (err)
355 		goto err_vxlan_learning_set;
356 
357 	mlxsw_sp_nve_vxlan_config_prepare(tngcr_pl, config);
358 	mlxsw_reg_tngcr_underlay_rif_set(tngcr_pl, ul_rif_index);
359 
360 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
361 	if (err)
362 		goto err_tngcr_write;
363 
364 	mlxsw_reg_spvtr_pack(spvtr_pl, true, MLXSW_REG_TUNNEL_PORT_NVE,
365 			     MLXSW_REG_SPVTR_IPVID_MODE_ALWAYS_PUSH_VLAN);
366 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvtr), spvtr_pl);
367 	if (err)
368 		goto err_spvtr_write;
369 
370 	err = mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp, config->ethertype);
371 	if (err)
372 		goto err_decap_ethertype_set;
373 
374 	return 0;
375 
376 err_decap_ethertype_set:
377 	mlxsw_reg_spvtr_pack(spvtr_pl, true, MLXSW_REG_TUNNEL_PORT_NVE,
378 			     MLXSW_REG_SPVTR_IPVID_MODE_IEEE_COMPLIANT_PVID);
379 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvtr), spvtr_pl);
380 err_spvtr_write:
381 	mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0);
382 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
383 err_tngcr_write:
384 	mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, false);
385 err_vxlan_learning_set:
386 	mlxsw_sp_router_ul_rif_put(mlxsw_sp, ul_rif_index);
387 	return err;
388 }
389 
390 static void mlxsw_sp2_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp)
391 {
392 	char spvtr_pl[MLXSW_REG_SPVTR_LEN];
393 	char tngcr_pl[MLXSW_REG_TNGCR_LEN];
394 
395 	/* Set default EtherType */
396 	mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp, ETH_P_8021Q);
397 	mlxsw_reg_spvtr_pack(spvtr_pl, true, MLXSW_REG_TUNNEL_PORT_NVE,
398 			     MLXSW_REG_SPVTR_IPVID_MODE_IEEE_COMPLIANT_PVID);
399 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvtr), spvtr_pl);
400 	mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0);
401 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
402 	mlxsw_sp2_nve_vxlan_learning_set(mlxsw_sp, false);
403 	mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->nve->ul_rif_index);
404 }
405 
406 static int mlxsw_sp2_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp,
407 					unsigned int tunnel_index,
408 					u16 ul_rif_index)
409 {
410 	char rtdp_pl[MLXSW_REG_RTDP_LEN];
411 
412 	mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index);
413 	mlxsw_reg_rtdp_egress_router_interface_set(rtdp_pl, ul_rif_index);
414 
415 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
416 }
417 
418 static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve,
419 				    const struct mlxsw_sp_nve_config *config)
420 {
421 	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
422 	int err;
423 
424 	err = __mlxsw_sp_nve_inc_parsing_depth_get(mlxsw_sp, config->udp_dport);
425 	if (err)
426 		return err;
427 
428 	err = mlxsw_sp2_nve_vxlan_config_set(mlxsw_sp, config);
429 	if (err)
430 		goto err_config_set;
431 
432 	err = mlxsw_sp2_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index,
433 					   nve->ul_rif_index);
434 	if (err)
435 		goto err_rtdp_set;
436 
437 	err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id,
438 						config->ul_proto,
439 						&config->ul_sip,
440 						nve->tunnel_index);
441 	if (err)
442 		goto err_promote_decap;
443 
444 	return 0;
445 
446 err_promote_decap:
447 err_rtdp_set:
448 	mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp);
449 err_config_set:
450 	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
451 	return err;
452 }
453 
454 static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
455 {
456 	struct mlxsw_sp_nve_config *config = &nve->config;
457 	struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
458 
459 	mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
460 					 config->ul_proto, &config->ul_sip);
461 	mlxsw_sp2_nve_vxlan_config_clear(mlxsw_sp);
462 	__mlxsw_sp_nve_inc_parsing_depth_put(mlxsw_sp, 0);
463 }
464 
465 const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = {
466 	.type		= MLXSW_SP_NVE_TYPE_VXLAN,
467 	.can_offload	= mlxsw_sp_nve_vxlan_can_offload,
468 	.nve_config	= mlxsw_sp_nve_vxlan_config,
469 	.init		= mlxsw_sp2_nve_vxlan_init,
470 	.fini		= mlxsw_sp2_nve_vxlan_fini,
471 	.fdb_replay	= mlxsw_sp_nve_vxlan_fdb_replay,
472 	.fdb_clear_offload = mlxsw_sp_nve_vxlan_clear_offload,
473 };
474