xref: /linux/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c (revision 15a1fbdcfb519c2bd291ed01c6c94e0b89537a77)
1 /* Broadcom NetXtreme-C/E network driver.
2  *
3  * Copyright (c) 2017 Broadcom Limited
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/netdevice.h>
11 #include <linux/inetdevice.h>
12 #include <linux/if_vlan.h>
13 #include <net/flow_dissector.h>
14 #include <net/pkt_cls.h>
15 #include <net/tc_act/tc_gact.h>
16 #include <net/tc_act/tc_skbedit.h>
17 #include <net/tc_act/tc_mirred.h>
18 #include <net/tc_act/tc_vlan.h>
19 #include <net/tc_act/tc_pedit.h>
20 #include <net/tc_act/tc_tunnel_key.h>
21 #include <net/vxlan.h>
22 
23 #include "bnxt_hsi.h"
24 #include "bnxt.h"
25 #include "bnxt_sriov.h"
26 #include "bnxt_tc.h"
27 #include "bnxt_vfr.h"
28 
29 #define BNXT_FID_INVALID			0xffff
30 #define VLAN_TCI(vid, prio)	((vid) | ((prio) << VLAN_PRIO_SHIFT))
31 
32 #define is_vlan_pcp_wildcarded(vlan_tci_mask)	\
33 	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == 0x0000)
34 #define is_vlan_pcp_exactmatch(vlan_tci_mask)	\
35 	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == VLAN_PRIO_MASK)
36 #define is_vlan_pcp_zero(vlan_tci)	\
37 	((ntohs(vlan_tci) & VLAN_PRIO_MASK) == 0x0000)
38 #define is_vid_exactmatch(vlan_tci_mask)	\
39 	((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK)
40 
41 static bool is_wildcard(void *mask, int len);
42 static bool is_exactmatch(void *mask, int len);
43 /* Return the dst fid of the func for flow forwarding
44  * For PFs: src_fid is the fid of the PF
45  * For VF-reps: src_fid the fid of the VF
46  */
47 static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
48 {
49 	struct bnxt *bp;
50 
51 	/* check if dev belongs to the same switch */
52 	if (!netdev_port_same_parent_id(pf_bp->dev, dev)) {
53 		netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch",
54 			    dev->ifindex);
55 		return BNXT_FID_INVALID;
56 	}
57 
58 	/* Is dev a VF-rep? */
59 	if (bnxt_dev_is_vf_rep(dev))
60 		return bnxt_vf_rep_get_fid(dev);
61 
62 	bp = netdev_priv(dev);
63 	return bp->pf.fw_fid;
64 }
65 
66 static int bnxt_tc_parse_redir(struct bnxt *bp,
67 			       struct bnxt_tc_actions *actions,
68 			       const struct flow_action_entry *act)
69 {
70 	struct net_device *dev = act->dev;
71 
72 	if (!dev) {
73 		netdev_info(bp->dev, "no dev in mirred action");
74 		return -EINVAL;
75 	}
76 
77 	actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
78 	actions->dst_dev = dev;
79 	return 0;
80 }
81 
82 static int bnxt_tc_parse_vlan(struct bnxt *bp,
83 			      struct bnxt_tc_actions *actions,
84 			      const struct flow_action_entry *act)
85 {
86 	switch (act->id) {
87 	case FLOW_ACTION_VLAN_POP:
88 		actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
89 		break;
90 	case FLOW_ACTION_VLAN_PUSH:
91 		actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
92 		actions->push_vlan_tci = htons(act->vlan.vid);
93 		actions->push_vlan_tpid = act->vlan.proto;
94 		break;
95 	default:
96 		return -EOPNOTSUPP;
97 	}
98 	return 0;
99 }
100 
101 static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
102 				    struct bnxt_tc_actions *actions,
103 				    const struct flow_action_entry *act)
104 {
105 	const struct ip_tunnel_info *tun_info = act->tunnel;
106 	const struct ip_tunnel_key *tun_key = &tun_info->key;
107 
108 	if (ip_tunnel_info_af(tun_info) != AF_INET) {
109 		netdev_info(bp->dev, "only IPv4 tunnel-encap is supported");
110 		return -EOPNOTSUPP;
111 	}
112 
113 	actions->tun_encap_key = *tun_key;
114 	actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP;
115 	return 0;
116 }
117 
118 /* Key & Mask from the stack comes unaligned in multiple iterations of 4 bytes
119  * each(u32).
120  * This routine consolidates such multiple unaligned values into one
121  * field each for Key & Mask (for src and dst macs separately)
122  * For example,
123  *			Mask/Key	Offset	Iteration
124  *			==========	======	=========
125  *	dst mac		0xffffffff	0	1
126  *	dst mac		0x0000ffff	4	2
127  *
128  *	src mac		0xffff0000	4	1
129  *	src mac		0xffffffff	8	2
130  *
131  * The above combination coming from the stack will be consolidated as
132  *			Mask/Key
133  *			==============
134  *	src mac:	0xffffffffffff
135  *	dst mac:	0xffffffffffff
136  */
137 static void bnxt_set_l2_key_mask(u32 part_key, u32 part_mask,
138 				 u8 *actual_key, u8 *actual_mask)
139 {
140 	u32 key = get_unaligned((u32 *)actual_key);
141 	u32 mask = get_unaligned((u32 *)actual_mask);
142 
143 	part_key &= part_mask;
144 	part_key |= key & ~part_mask;
145 
146 	put_unaligned(mask | part_mask, (u32 *)actual_mask);
147 	put_unaligned(part_key, (u32 *)actual_key);
148 }
149 
150 static int
151 bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions,
152 			    u16 *eth_addr, u16 *eth_addr_mask)
153 {
154 	u16 *p;
155 	int j;
156 
157 	if (unlikely(bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)))
158 		return -EINVAL;
159 
160 	if (!is_wildcard(&eth_addr_mask[0], ETH_ALEN)) {
161 		if (!is_exactmatch(&eth_addr_mask[0], ETH_ALEN))
162 			return -EINVAL;
163 		/* FW expects dmac to be in u16 array format */
164 		p = eth_addr;
165 		for (j = 0; j < 3; j++)
166 			actions->l2_rewrite_dmac[j] = cpu_to_be16(*(p + j));
167 	}
168 
169 	if (!is_wildcard(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN)) {
170 		if (!is_exactmatch(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN))
171 			return -EINVAL;
172 		/* FW expects smac to be in u16 array format */
173 		p = &eth_addr[ETH_ALEN / 2];
174 		for (j = 0; j < 3; j++)
175 			actions->l2_rewrite_smac[j] = cpu_to_be16(*(p + j));
176 	}
177 
178 	return 0;
179 }
180 
181 static int
182 bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions,
183 		    struct flow_action_entry *act, int act_idx, u8 *eth_addr,
184 		    u8 *eth_addr_mask)
185 {
186 	size_t offset_of_ip6_daddr = offsetof(struct ipv6hdr, daddr);
187 	size_t offset_of_ip6_saddr = offsetof(struct ipv6hdr, saddr);
188 	u32 mask, val, offset, idx;
189 	u8 htype;
190 
191 	offset = act->mangle.offset;
192 	htype = act->mangle.htype;
193 	mask = ~act->mangle.mask;
194 	val = act->mangle.val;
195 
196 	switch (htype) {
197 	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
198 		if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) {
199 			netdev_err(bp->dev,
200 				   "%s: eth_hdr: Invalid pedit field\n",
201 				   __func__);
202 			return -EINVAL;
203 		}
204 		actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE;
205 
206 		bnxt_set_l2_key_mask(val, mask, &eth_addr[offset],
207 				     &eth_addr_mask[offset]);
208 		break;
209 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
210 		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
211 		actions->nat.l3_is_ipv4 = true;
212 		if (offset ==  offsetof(struct iphdr, saddr)) {
213 			actions->nat.src_xlate = true;
214 			actions->nat.l3.ipv4.saddr.s_addr = htonl(val);
215 		} else if (offset ==  offsetof(struct iphdr, daddr)) {
216 			actions->nat.src_xlate = false;
217 			actions->nat.l3.ipv4.daddr.s_addr = htonl(val);
218 		} else {
219 			netdev_err(bp->dev,
220 				   "%s: IPv4_hdr: Invalid pedit field\n",
221 				   __func__);
222 			return -EINVAL;
223 		}
224 
225 		netdev_dbg(bp->dev, "nat.src_xlate = %d src IP: %pI4 dst ip : %pI4\n",
226 			   actions->nat.src_xlate, &actions->nat.l3.ipv4.saddr,
227 			   &actions->nat.l3.ipv4.daddr);
228 		break;
229 
230 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
231 		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
232 		actions->nat.l3_is_ipv4 = false;
233 		if (offset >= offsetof(struct ipv6hdr, saddr) &&
234 		    offset < offset_of_ip6_daddr) {
235 			/* 16 byte IPv6 address comes in 4 iterations of
236 			 * 4byte chunks each
237 			 */
238 			actions->nat.src_xlate = true;
239 			idx = (offset - offset_of_ip6_saddr) / 4;
240 			/* First 4bytes will be copied to idx 0 and so on */
241 			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
242 		} else if (offset >= offset_of_ip6_daddr &&
243 			   offset < offset_of_ip6_daddr + 16) {
244 			actions->nat.src_xlate = false;
245 			idx = (offset - offset_of_ip6_daddr) / 4;
246 			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
247 		} else {
248 			netdev_err(bp->dev,
249 				   "%s: IPv6_hdr: Invalid pedit field\n",
250 				   __func__);
251 			return -EINVAL;
252 		}
253 		break;
254 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
255 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
256 		/* HW does not support L4 rewrite alone without L3
257 		 * rewrite
258 		 */
259 		if (!(actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE)) {
260 			netdev_err(bp->dev,
261 				   "Need to specify L3 rewrite as well\n");
262 			return -EINVAL;
263 		}
264 		if (actions->nat.src_xlate)
265 			actions->nat.l4.ports.sport = htons(val);
266 		else
267 			actions->nat.l4.ports.dport = htons(val);
268 		netdev_dbg(bp->dev, "actions->nat.sport = %d dport = %d\n",
269 			   actions->nat.l4.ports.sport,
270 			   actions->nat.l4.ports.dport);
271 		break;
272 	default:
273 		netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n",
274 			   __func__);
275 		return -EINVAL;
276 	}
277 	return 0;
278 }
279 
280 static int bnxt_tc_parse_actions(struct bnxt *bp,
281 				 struct bnxt_tc_actions *actions,
282 				 struct flow_action *flow_action)
283 {
284 	/* Used to store the L2 rewrite mask for dmac (6 bytes) followed by
285 	 * smac (6 bytes) if rewrite of both is specified, otherwise either
286 	 * dmac or smac
287 	 */
288 	u16 eth_addr_mask[ETH_ALEN] = { 0 };
289 	/* Used to store the L2 rewrite key for dmac (6 bytes) followed by
290 	 * smac (6 bytes) if rewrite of both is specified, otherwise either
291 	 * dmac or smac
292 	 */
293 	u16 eth_addr[ETH_ALEN] = { 0 };
294 	struct flow_action_entry *act;
295 	int i, rc;
296 
297 	if (!flow_action_has_entries(flow_action)) {
298 		netdev_info(bp->dev, "no actions");
299 		return -EINVAL;
300 	}
301 
302 	flow_action_for_each(i, act, flow_action) {
303 		switch (act->id) {
304 		case FLOW_ACTION_DROP:
305 			actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
306 			return 0; /* don't bother with other actions */
307 		case FLOW_ACTION_REDIRECT:
308 			rc = bnxt_tc_parse_redir(bp, actions, act);
309 			if (rc)
310 				return rc;
311 			break;
312 		case FLOW_ACTION_VLAN_POP:
313 		case FLOW_ACTION_VLAN_PUSH:
314 		case FLOW_ACTION_VLAN_MANGLE:
315 			rc = bnxt_tc_parse_vlan(bp, actions, act);
316 			if (rc)
317 				return rc;
318 			break;
319 		case FLOW_ACTION_TUNNEL_ENCAP:
320 			rc = bnxt_tc_parse_tunnel_set(bp, actions, act);
321 			if (rc)
322 				return rc;
323 			break;
324 		case FLOW_ACTION_TUNNEL_DECAP:
325 			actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
326 			break;
327 		/* Packet edit: L2 rewrite, NAT, NAPT */
328 		case FLOW_ACTION_MANGLE:
329 			rc = bnxt_tc_parse_pedit(bp, actions, act, i,
330 						 (u8 *)eth_addr,
331 						 (u8 *)eth_addr_mask);
332 			if (rc)
333 				return rc;
334 			break;
335 		default:
336 			break;
337 		}
338 	}
339 
340 	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
341 		rc = bnxt_fill_l2_rewrite_fields(actions, eth_addr,
342 						 eth_addr_mask);
343 		if (rc)
344 			return rc;
345 	}
346 
347 	if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
348 		if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
349 			/* dst_fid is PF's fid */
350 			actions->dst_fid = bp->pf.fw_fid;
351 		} else {
352 			/* find the FID from dst_dev */
353 			actions->dst_fid =
354 				bnxt_flow_get_dst_fid(bp, actions->dst_dev);
355 			if (actions->dst_fid == BNXT_FID_INVALID)
356 				return -EINVAL;
357 		}
358 	}
359 
360 	return 0;
361 }
362 
363 static int bnxt_tc_parse_flow(struct bnxt *bp,
364 			      struct flow_cls_offload *tc_flow_cmd,
365 			      struct bnxt_tc_flow *flow)
366 {
367 	struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd);
368 	struct flow_dissector *dissector = rule->match.dissector;
369 
370 	/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
371 	if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
372 	    (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
373 		netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x",
374 			    dissector->used_keys);
375 		return -EOPNOTSUPP;
376 	}
377 
378 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
379 		struct flow_match_basic match;
380 
381 		flow_rule_match_basic(rule, &match);
382 		flow->l2_key.ether_type = match.key->n_proto;
383 		flow->l2_mask.ether_type = match.mask->n_proto;
384 
385 		if (match.key->n_proto == htons(ETH_P_IP) ||
386 		    match.key->n_proto == htons(ETH_P_IPV6)) {
387 			flow->l4_key.ip_proto = match.key->ip_proto;
388 			flow->l4_mask.ip_proto = match.mask->ip_proto;
389 		}
390 	}
391 
392 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
393 		struct flow_match_eth_addrs match;
394 
395 		flow_rule_match_eth_addrs(rule, &match);
396 		flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
397 		ether_addr_copy(flow->l2_key.dmac, match.key->dst);
398 		ether_addr_copy(flow->l2_mask.dmac, match.mask->dst);
399 		ether_addr_copy(flow->l2_key.smac, match.key->src);
400 		ether_addr_copy(flow->l2_mask.smac, match.mask->src);
401 	}
402 
403 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
404 		struct flow_match_vlan match;
405 
406 		flow_rule_match_vlan(rule, &match);
407 		flow->l2_key.inner_vlan_tci =
408 			cpu_to_be16(VLAN_TCI(match.key->vlan_id,
409 					     match.key->vlan_priority));
410 		flow->l2_mask.inner_vlan_tci =
411 			cpu_to_be16((VLAN_TCI(match.mask->vlan_id,
412 					      match.mask->vlan_priority)));
413 		flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
414 		flow->l2_mask.inner_vlan_tpid = htons(0xffff);
415 		flow->l2_key.num_vlans = 1;
416 	}
417 
418 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
419 		struct flow_match_ipv4_addrs match;
420 
421 		flow_rule_match_ipv4_addrs(rule, &match);
422 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
423 		flow->l3_key.ipv4.daddr.s_addr = match.key->dst;
424 		flow->l3_mask.ipv4.daddr.s_addr = match.mask->dst;
425 		flow->l3_key.ipv4.saddr.s_addr = match.key->src;
426 		flow->l3_mask.ipv4.saddr.s_addr = match.mask->src;
427 	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
428 		struct flow_match_ipv6_addrs match;
429 
430 		flow_rule_match_ipv6_addrs(rule, &match);
431 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
432 		flow->l3_key.ipv6.daddr = match.key->dst;
433 		flow->l3_mask.ipv6.daddr = match.mask->dst;
434 		flow->l3_key.ipv6.saddr = match.key->src;
435 		flow->l3_mask.ipv6.saddr = match.mask->src;
436 	}
437 
438 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
439 		struct flow_match_ports match;
440 
441 		flow_rule_match_ports(rule, &match);
442 		flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
443 		flow->l4_key.ports.dport = match.key->dst;
444 		flow->l4_mask.ports.dport = match.mask->dst;
445 		flow->l4_key.ports.sport = match.key->src;
446 		flow->l4_mask.ports.sport = match.mask->src;
447 	}
448 
449 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
450 		struct flow_match_icmp match;
451 
452 		flow_rule_match_icmp(rule, &match);
453 		flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
454 		flow->l4_key.icmp.type = match.key->type;
455 		flow->l4_key.icmp.code = match.key->code;
456 		flow->l4_mask.icmp.type = match.mask->type;
457 		flow->l4_mask.icmp.code = match.mask->code;
458 	}
459 
460 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
461 		struct flow_match_ipv4_addrs match;
462 
463 		flow_rule_match_enc_ipv4_addrs(rule, &match);
464 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS;
465 		flow->tun_key.u.ipv4.dst = match.key->dst;
466 		flow->tun_mask.u.ipv4.dst = match.mask->dst;
467 		flow->tun_key.u.ipv4.src = match.key->src;
468 		flow->tun_mask.u.ipv4.src = match.mask->src;
469 	} else if (flow_rule_match_key(rule,
470 				      FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
471 		return -EOPNOTSUPP;
472 	}
473 
474 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
475 		struct flow_match_enc_keyid match;
476 
477 		flow_rule_match_enc_keyid(rule, &match);
478 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID;
479 		flow->tun_key.tun_id = key32_to_tunnel_id(match.key->keyid);
480 		flow->tun_mask.tun_id = key32_to_tunnel_id(match.mask->keyid);
481 	}
482 
483 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
484 		struct flow_match_ports match;
485 
486 		flow_rule_match_enc_ports(rule, &match);
487 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS;
488 		flow->tun_key.tp_dst = match.key->dst;
489 		flow->tun_mask.tp_dst = match.mask->dst;
490 		flow->tun_key.tp_src = match.key->src;
491 		flow->tun_mask.tp_src = match.mask->src;
492 	}
493 
494 	return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action);
495 }
496 
497 static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
498 				   struct bnxt_tc_flow_node *flow_node)
499 {
500 	struct hwrm_cfa_flow_free_input req = { 0 };
501 	int rc;
502 
503 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_FREE, -1, -1);
504 	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
505 		req.ext_flow_handle = flow_node->ext_flow_handle;
506 	else
507 		req.flow_handle = flow_node->flow_handle;
508 
509 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
510 	if (rc)
511 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
512 
513 	return rc;
514 }
515 
516 static int ipv6_mask_len(struct in6_addr *mask)
517 {
518 	int mask_len = 0, i;
519 
520 	for (i = 0; i < 4; i++)
521 		mask_len += inet_mask_len(mask->s6_addr32[i]);
522 
523 	return mask_len;
524 }
525 
526 static bool is_wildcard(void *mask, int len)
527 {
528 	const u8 *p = mask;
529 	int i;
530 
531 	for (i = 0; i < len; i++) {
532 		if (p[i] != 0)
533 			return false;
534 	}
535 	return true;
536 }
537 
538 static bool is_exactmatch(void *mask, int len)
539 {
540 	const u8 *p = mask;
541 	int i;
542 
543 	for (i = 0; i < len; i++)
544 		if (p[i] != 0xff)
545 			return false;
546 
547 	return true;
548 }
549 
550 static bool is_vlan_tci_allowed(__be16  vlan_tci_mask,
551 				__be16  vlan_tci)
552 {
553 	/* VLAN priority must be either exactly zero or fully wildcarded and
554 	 * VLAN id must be exact match.
555 	 */
556 	if (is_vid_exactmatch(vlan_tci_mask) &&
557 	    ((is_vlan_pcp_exactmatch(vlan_tci_mask) &&
558 	      is_vlan_pcp_zero(vlan_tci)) ||
559 	     is_vlan_pcp_wildcarded(vlan_tci_mask)))
560 		return true;
561 
562 	return false;
563 }
564 
565 static bool bits_set(void *key, int len)
566 {
567 	const u8 *p = key;
568 	int i;
569 
570 	for (i = 0; i < len; i++)
571 		if (p[i] != 0)
572 			return true;
573 
574 	return false;
575 }
576 
577 static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
578 				    __le16 ref_flow_handle,
579 				    __le32 tunnel_handle,
580 				    struct bnxt_tc_flow_node *flow_node)
581 {
582 	struct bnxt_tc_actions *actions = &flow->actions;
583 	struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
584 	struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
585 	struct hwrm_cfa_flow_alloc_input req = { 0 };
586 	struct hwrm_cfa_flow_alloc_output *resp;
587 	u16 flow_flags = 0, action_flags = 0;
588 	int rc;
589 
590 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_ALLOC, -1, -1);
591 
592 	req.src_fid = cpu_to_le16(flow->src_fid);
593 	req.ref_flow_handle = ref_flow_handle;
594 
595 	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
596 		memcpy(req.l2_rewrite_dmac, actions->l2_rewrite_dmac,
597 		       ETH_ALEN);
598 		memcpy(req.l2_rewrite_smac, actions->l2_rewrite_smac,
599 		       ETH_ALEN);
600 		action_flags |=
601 			CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
602 	}
603 
604 	if (actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE) {
605 		if (actions->nat.l3_is_ipv4) {
606 			action_flags |=
607 				CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS;
608 
609 			if (actions->nat.src_xlate) {
610 				action_flags |=
611 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
612 				/* L3 source rewrite */
613 				req.nat_ip_address[0] =
614 					actions->nat.l3.ipv4.saddr.s_addr;
615 				/* L4 source port */
616 				if (actions->nat.l4.ports.sport)
617 					req.nat_port =
618 						actions->nat.l4.ports.sport;
619 			} else {
620 				action_flags |=
621 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
622 				/* L3 destination rewrite */
623 				req.nat_ip_address[0] =
624 					actions->nat.l3.ipv4.daddr.s_addr;
625 				/* L4 destination port */
626 				if (actions->nat.l4.ports.dport)
627 					req.nat_port =
628 						actions->nat.l4.ports.dport;
629 			}
630 			netdev_dbg(bp->dev,
631 				   "req.nat_ip_address: %pI4 src_xlate: %d req.nat_port: %x\n",
632 				   req.nat_ip_address, actions->nat.src_xlate,
633 				   req.nat_port);
634 		} else {
635 			if (actions->nat.src_xlate) {
636 				action_flags |=
637 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
638 				/* L3 source rewrite */
639 				memcpy(req.nat_ip_address,
640 				       actions->nat.l3.ipv6.saddr.s6_addr32,
641 				       sizeof(req.nat_ip_address));
642 				/* L4 source port */
643 				if (actions->nat.l4.ports.sport)
644 					req.nat_port =
645 						actions->nat.l4.ports.sport;
646 			} else {
647 				action_flags |=
648 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
649 				/* L3 destination rewrite */
650 				memcpy(req.nat_ip_address,
651 				       actions->nat.l3.ipv6.daddr.s6_addr32,
652 				       sizeof(req.nat_ip_address));
653 				/* L4 destination port */
654 				if (actions->nat.l4.ports.dport)
655 					req.nat_port =
656 						actions->nat.l4.ports.dport;
657 			}
658 			netdev_dbg(bp->dev,
659 				   "req.nat_ip_address: %pI6 src_xlate: %d req.nat_port: %x\n",
660 				   req.nat_ip_address, actions->nat.src_xlate,
661 				   req.nat_port);
662 		}
663 	}
664 
665 	if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
666 	    actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
667 		req.tunnel_handle = tunnel_handle;
668 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL;
669 		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL;
670 	}
671 
672 	req.ethertype = flow->l2_key.ether_type;
673 	req.ip_proto = flow->l4_key.ip_proto;
674 
675 	if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
676 		memcpy(req.dmac, flow->l2_key.dmac, ETH_ALEN);
677 		memcpy(req.smac, flow->l2_key.smac, ETH_ALEN);
678 	}
679 
680 	if (flow->l2_key.num_vlans > 0) {
681 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE;
682 		/* FW expects the inner_vlan_tci value to be set
683 		 * in outer_vlan_tci when num_vlans is 1 (which is
684 		 * always the case in TC.)
685 		 */
686 		req.outer_vlan_tci = flow->l2_key.inner_vlan_tci;
687 	}
688 
689 	/* If all IP and L4 fields are wildcarded then this is an L2 flow */
690 	if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
691 	    is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
692 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
693 	} else {
694 		flow_flags |= flow->l2_key.ether_type == htons(ETH_P_IP) ?
695 				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4 :
696 				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
697 
698 		if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
699 			req.ip_dst[0] = l3_key->ipv4.daddr.s_addr;
700 			req.ip_dst_mask_len =
701 				inet_mask_len(l3_mask->ipv4.daddr.s_addr);
702 			req.ip_src[0] = l3_key->ipv4.saddr.s_addr;
703 			req.ip_src_mask_len =
704 				inet_mask_len(l3_mask->ipv4.saddr.s_addr);
705 		} else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
706 			memcpy(req.ip_dst, l3_key->ipv6.daddr.s6_addr32,
707 			       sizeof(req.ip_dst));
708 			req.ip_dst_mask_len =
709 					ipv6_mask_len(&l3_mask->ipv6.daddr);
710 			memcpy(req.ip_src, l3_key->ipv6.saddr.s6_addr32,
711 			       sizeof(req.ip_src));
712 			req.ip_src_mask_len =
713 					ipv6_mask_len(&l3_mask->ipv6.saddr);
714 		}
715 	}
716 
717 	if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
718 		req.l4_src_port = flow->l4_key.ports.sport;
719 		req.l4_src_port_mask = flow->l4_mask.ports.sport;
720 		req.l4_dst_port = flow->l4_key.ports.dport;
721 		req.l4_dst_port_mask = flow->l4_mask.ports.dport;
722 	} else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
723 		/* l4 ports serve as type/code when ip_proto is ICMP */
724 		req.l4_src_port = htons(flow->l4_key.icmp.type);
725 		req.l4_src_port_mask = htons(flow->l4_mask.icmp.type);
726 		req.l4_dst_port = htons(flow->l4_key.icmp.code);
727 		req.l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
728 	}
729 	req.flags = cpu_to_le16(flow_flags);
730 
731 	if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
732 		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
733 	} else {
734 		if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
735 			action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
736 			req.dst_fid = cpu_to_le16(actions->dst_fid);
737 		}
738 		if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
739 			action_flags |=
740 			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
741 			req.l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
742 			req.l2_rewrite_vlan_tci = actions->push_vlan_tci;
743 			memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
744 			memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
745 		}
746 		if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
747 			action_flags |=
748 			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
749 			/* Rewrite config with tpid = 0 implies vlan pop */
750 			req.l2_rewrite_vlan_tpid = 0;
751 			memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
752 			memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
753 		}
754 	}
755 	req.action_flags = cpu_to_le16(action_flags);
756 
757 	mutex_lock(&bp->hwrm_cmd_lock);
758 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
759 	if (!rc) {
760 		resp = bnxt_get_hwrm_resp_addr(bp, &req);
761 		/* CFA_FLOW_ALLOC response interpretation:
762 		 *		    fw with	     fw with
763 		 *		    16-bit	     64-bit
764 		 *		    flow handle      flow handle
765 		 *		    ===========	     ===========
766 		 * flow_handle      flow handle      flow context id
767 		 * ext_flow_handle  INVALID	     flow handle
768 		 * flow_id	    INVALID	     flow counter id
769 		 */
770 		flow_node->flow_handle = resp->flow_handle;
771 		if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
772 			flow_node->ext_flow_handle = resp->ext_flow_handle;
773 			flow_node->flow_id = resp->flow_id;
774 		}
775 	}
776 	mutex_unlock(&bp->hwrm_cmd_lock);
777 	return rc;
778 }
779 
780 static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
781 				       struct bnxt_tc_flow *flow,
782 				       struct bnxt_tc_l2_key *l2_info,
783 				       __le32 ref_decap_handle,
784 				       __le32 *decap_filter_handle)
785 {
786 	struct hwrm_cfa_decap_filter_alloc_input req = { 0 };
787 	struct hwrm_cfa_decap_filter_alloc_output *resp;
788 	struct ip_tunnel_key *tun_key = &flow->tun_key;
789 	u32 enables = 0;
790 	int rc;
791 
792 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_ALLOC, -1, -1);
793 
794 	req.flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
795 	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE |
796 		   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL;
797 	req.tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
798 	req.ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
799 
800 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) {
801 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID;
802 		/* tunnel_id is wrongly defined in hsi defn. as __le32 */
803 		req.tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
804 	}
805 
806 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
807 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
808 		ether_addr_copy(req.dst_macaddr, l2_info->dmac);
809 	}
810 	if (l2_info->num_vlans) {
811 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
812 		req.t_ivlan_vid = l2_info->inner_vlan_tci;
813 	}
814 
815 	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE;
816 	req.ethertype = htons(ETH_P_IP);
817 
818 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) {
819 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR |
820 			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR |
821 			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE;
822 		req.ip_addr_type = CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
823 		req.dst_ipaddr[0] = tun_key->u.ipv4.dst;
824 		req.src_ipaddr[0] = tun_key->u.ipv4.src;
825 	}
826 
827 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) {
828 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT;
829 		req.dst_port = tun_key->tp_dst;
830 	}
831 
832 	/* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc
833 	 * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16.
834 	 */
835 	req.l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
836 	req.enables = cpu_to_le32(enables);
837 
838 	mutex_lock(&bp->hwrm_cmd_lock);
839 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
840 	if (!rc) {
841 		resp = bnxt_get_hwrm_resp_addr(bp, &req);
842 		*decap_filter_handle = resp->decap_filter_id;
843 	} else {
844 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
845 	}
846 	mutex_unlock(&bp->hwrm_cmd_lock);
847 
848 	return rc;
849 }
850 
851 static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
852 				      __le32 decap_filter_handle)
853 {
854 	struct hwrm_cfa_decap_filter_free_input req = { 0 };
855 	int rc;
856 
857 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_FREE, -1, -1);
858 	req.decap_filter_id = decap_filter_handle;
859 
860 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
861 	if (rc)
862 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
863 
864 	return rc;
865 }
866 
867 static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
868 				       struct ip_tunnel_key *encap_key,
869 				       struct bnxt_tc_l2_key *l2_info,
870 				       __le32 *encap_record_handle)
871 {
872 	struct hwrm_cfa_encap_record_alloc_input req = { 0 };
873 	struct hwrm_cfa_encap_record_alloc_output *resp;
874 	struct hwrm_cfa_encap_data_vxlan *encap =
875 			(struct hwrm_cfa_encap_data_vxlan *)&req.encap_data;
876 	struct hwrm_vxlan_ipv4_hdr *encap_ipv4 =
877 				(struct hwrm_vxlan_ipv4_hdr *)encap->l3;
878 	int rc;
879 
880 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_ALLOC, -1, -1);
881 
882 	req.encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
883 
884 	ether_addr_copy(encap->dst_mac_addr, l2_info->dmac);
885 	ether_addr_copy(encap->src_mac_addr, l2_info->smac);
886 	if (l2_info->num_vlans) {
887 		encap->num_vlan_tags = l2_info->num_vlans;
888 		encap->ovlan_tci = l2_info->inner_vlan_tci;
889 		encap->ovlan_tpid = l2_info->inner_vlan_tpid;
890 	}
891 
892 	encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT;
893 	encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT;
894 	encap_ipv4->ttl = encap_key->ttl;
895 
896 	encap_ipv4->dest_ip_addr = encap_key->u.ipv4.dst;
897 	encap_ipv4->src_ip_addr = encap_key->u.ipv4.src;
898 	encap_ipv4->protocol = IPPROTO_UDP;
899 
900 	encap->dst_port = encap_key->tp_dst;
901 	encap->vni = tunnel_id_to_key32(encap_key->tun_id);
902 
903 	mutex_lock(&bp->hwrm_cmd_lock);
904 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
905 	if (!rc) {
906 		resp = bnxt_get_hwrm_resp_addr(bp, &req);
907 		*encap_record_handle = resp->encap_record_id;
908 	} else {
909 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
910 	}
911 	mutex_unlock(&bp->hwrm_cmd_lock);
912 
913 	return rc;
914 }
915 
916 static int hwrm_cfa_encap_record_free(struct bnxt *bp,
917 				      __le32 encap_record_handle)
918 {
919 	struct hwrm_cfa_encap_record_free_input req = { 0 };
920 	int rc;
921 
922 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_FREE, -1, -1);
923 	req.encap_record_id = encap_record_handle;
924 
925 	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
926 	if (rc)
927 		netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc);
928 
929 	return rc;
930 }
931 
932 static int bnxt_tc_put_l2_node(struct bnxt *bp,
933 			       struct bnxt_tc_flow_node *flow_node)
934 {
935 	struct bnxt_tc_l2_node *l2_node = flow_node->l2_node;
936 	struct bnxt_tc_info *tc_info = bp->tc_info;
937 	int rc;
938 
939 	/* remove flow_node from the L2 shared flow list */
940 	list_del(&flow_node->l2_list_node);
941 	if (--l2_node->refcount == 0) {
942 		rc =  rhashtable_remove_fast(&tc_info->l2_table, &l2_node->node,
943 					     tc_info->l2_ht_params);
944 		if (rc)
945 			netdev_err(bp->dev,
946 				   "Error: %s: rhashtable_remove_fast: %d",
947 				   __func__, rc);
948 		kfree_rcu(l2_node, rcu);
949 	}
950 	return 0;
951 }
952 
953 static struct bnxt_tc_l2_node *
954 bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
955 		    struct rhashtable_params ht_params,
956 		    struct bnxt_tc_l2_key *l2_key)
957 {
958 	struct bnxt_tc_l2_node *l2_node;
959 	int rc;
960 
961 	l2_node = rhashtable_lookup_fast(l2_table, l2_key, ht_params);
962 	if (!l2_node) {
963 		l2_node = kzalloc(sizeof(*l2_node), GFP_KERNEL);
964 		if (!l2_node) {
965 			rc = -ENOMEM;
966 			return NULL;
967 		}
968 
969 		l2_node->key = *l2_key;
970 		rc = rhashtable_insert_fast(l2_table, &l2_node->node,
971 					    ht_params);
972 		if (rc) {
973 			kfree_rcu(l2_node, rcu);
974 			netdev_err(bp->dev,
975 				   "Error: %s: rhashtable_insert_fast: %d",
976 				   __func__, rc);
977 			return NULL;
978 		}
979 		INIT_LIST_HEAD(&l2_node->common_l2_flows);
980 	}
981 	return l2_node;
982 }
983 
984 /* Get the ref_flow_handle for a flow by checking if there are any other
985  * flows that share the same L2 key as this flow.
986  */
987 static int
988 bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
989 			    struct bnxt_tc_flow_node *flow_node,
990 			    __le16 *ref_flow_handle)
991 {
992 	struct bnxt_tc_info *tc_info = bp->tc_info;
993 	struct bnxt_tc_flow_node *ref_flow_node;
994 	struct bnxt_tc_l2_node *l2_node;
995 
996 	l2_node = bnxt_tc_get_l2_node(bp, &tc_info->l2_table,
997 				      tc_info->l2_ht_params,
998 				      &flow->l2_key);
999 	if (!l2_node)
1000 		return -1;
1001 
1002 	/* If any other flow is using this l2_node, use it's flow_handle
1003 	 * as the ref_flow_handle
1004 	 */
1005 	if (l2_node->refcount > 0) {
1006 		ref_flow_node = list_first_entry(&l2_node->common_l2_flows,
1007 						 struct bnxt_tc_flow_node,
1008 						 l2_list_node);
1009 		*ref_flow_handle = ref_flow_node->flow_handle;
1010 	} else {
1011 		*ref_flow_handle = cpu_to_le16(0xffff);
1012 	}
1013 
1014 	/* Insert the l2_node into the flow_node so that subsequent flows
1015 	 * with a matching l2 key can use the flow_handle of this flow
1016 	 * as their ref_flow_handle
1017 	 */
1018 	flow_node->l2_node = l2_node;
1019 	list_add(&flow_node->l2_list_node, &l2_node->common_l2_flows);
1020 	l2_node->refcount++;
1021 	return 0;
1022 }
1023 
1024 /* After the flow parsing is done, this routine is used for checking
1025  * if there are any aspects of the flow that prevent it from being
1026  * offloaded.
1027  */
1028 static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
1029 {
1030 	/* If L4 ports are specified then ip_proto must be TCP or UDP */
1031 	if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
1032 	    (flow->l4_key.ip_proto != IPPROTO_TCP &&
1033 	     flow->l4_key.ip_proto != IPPROTO_UDP)) {
1034 		netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports",
1035 			    flow->l4_key.ip_proto);
1036 		return false;
1037 	}
1038 
1039 	/* Currently source/dest MAC cannot be partial wildcard  */
1040 	if (bits_set(&flow->l2_key.smac, sizeof(flow->l2_key.smac)) &&
1041 	    !is_exactmatch(flow->l2_mask.smac, sizeof(flow->l2_mask.smac))) {
1042 		netdev_info(bp->dev, "Wildcard match unsupported for Source MAC\n");
1043 		return false;
1044 	}
1045 	if (bits_set(&flow->l2_key.dmac, sizeof(flow->l2_key.dmac)) &&
1046 	    !is_exactmatch(&flow->l2_mask.dmac, sizeof(flow->l2_mask.dmac))) {
1047 		netdev_info(bp->dev, "Wildcard match unsupported for Dest MAC\n");
1048 		return false;
1049 	}
1050 
1051 	/* Currently VLAN fields cannot be partial wildcard */
1052 	if (bits_set(&flow->l2_key.inner_vlan_tci,
1053 		     sizeof(flow->l2_key.inner_vlan_tci)) &&
1054 	    !is_vlan_tci_allowed(flow->l2_mask.inner_vlan_tci,
1055 				 flow->l2_key.inner_vlan_tci)) {
1056 		netdev_info(bp->dev, "Unsupported VLAN TCI\n");
1057 		return false;
1058 	}
1059 	if (bits_set(&flow->l2_key.inner_vlan_tpid,
1060 		     sizeof(flow->l2_key.inner_vlan_tpid)) &&
1061 	    !is_exactmatch(&flow->l2_mask.inner_vlan_tpid,
1062 			   sizeof(flow->l2_mask.inner_vlan_tpid))) {
1063 		netdev_info(bp->dev, "Wildcard match unsupported for VLAN TPID\n");
1064 		return false;
1065 	}
1066 
1067 	/* Currently Ethertype must be set */
1068 	if (!is_exactmatch(&flow->l2_mask.ether_type,
1069 			   sizeof(flow->l2_mask.ether_type))) {
1070 		netdev_info(bp->dev, "Wildcard match unsupported for Ethertype\n");
1071 		return false;
1072 	}
1073 
1074 	return true;
1075 }
1076 
1077 /* Returns the final refcount of the node on success
1078  * or a -ve error code on failure
1079  */
1080 static int bnxt_tc_put_tunnel_node(struct bnxt *bp,
1081 				   struct rhashtable *tunnel_table,
1082 				   struct rhashtable_params *ht_params,
1083 				   struct bnxt_tc_tunnel_node *tunnel_node)
1084 {
1085 	int rc;
1086 
1087 	if (--tunnel_node->refcount == 0) {
1088 		rc =  rhashtable_remove_fast(tunnel_table, &tunnel_node->node,
1089 					     *ht_params);
1090 		if (rc) {
1091 			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc);
1092 			rc = -1;
1093 		}
1094 		kfree_rcu(tunnel_node, rcu);
1095 		return rc;
1096 	} else {
1097 		return tunnel_node->refcount;
1098 	}
1099 }
1100 
1101 /* Get (or add) either encap or decap tunnel node from/to the supplied
1102  * hash table.
1103  */
1104 static struct bnxt_tc_tunnel_node *
1105 bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table,
1106 			struct rhashtable_params *ht_params,
1107 			struct ip_tunnel_key *tun_key)
1108 {
1109 	struct bnxt_tc_tunnel_node *tunnel_node;
1110 	int rc;
1111 
1112 	tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params);
1113 	if (!tunnel_node) {
1114 		tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL);
1115 		if (!tunnel_node) {
1116 			rc = -ENOMEM;
1117 			goto err;
1118 		}
1119 
1120 		tunnel_node->key = *tun_key;
1121 		tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE;
1122 		rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node,
1123 					    *ht_params);
1124 		if (rc) {
1125 			kfree_rcu(tunnel_node, rcu);
1126 			goto err;
1127 		}
1128 	}
1129 	tunnel_node->refcount++;
1130 	return tunnel_node;
1131 err:
1132 	netdev_info(bp->dev, "error rc=%d", rc);
1133 	return NULL;
1134 }
1135 
1136 static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp,
1137 					struct bnxt_tc_flow *flow,
1138 					struct bnxt_tc_l2_key *l2_key,
1139 					struct bnxt_tc_flow_node *flow_node,
1140 					__le32 *ref_decap_handle)
1141 {
1142 	struct bnxt_tc_info *tc_info = bp->tc_info;
1143 	struct bnxt_tc_flow_node *ref_flow_node;
1144 	struct bnxt_tc_l2_node *decap_l2_node;
1145 
1146 	decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table,
1147 					    tc_info->decap_l2_ht_params,
1148 					    l2_key);
1149 	if (!decap_l2_node)
1150 		return -1;
1151 
1152 	/* If any other flow is using this decap_l2_node, use it's decap_handle
1153 	 * as the ref_decap_handle
1154 	 */
1155 	if (decap_l2_node->refcount > 0) {
1156 		ref_flow_node =
1157 			list_first_entry(&decap_l2_node->common_l2_flows,
1158 					 struct bnxt_tc_flow_node,
1159 					 decap_l2_list_node);
1160 		*ref_decap_handle = ref_flow_node->decap_node->tunnel_handle;
1161 	} else {
1162 		*ref_decap_handle = INVALID_TUNNEL_HANDLE;
1163 	}
1164 
1165 	/* Insert the l2_node into the flow_node so that subsequent flows
1166 	 * with a matching decap l2 key can use the decap_filter_handle of
1167 	 * this flow as their ref_decap_handle
1168 	 */
1169 	flow_node->decap_l2_node = decap_l2_node;
1170 	list_add(&flow_node->decap_l2_list_node,
1171 		 &decap_l2_node->common_l2_flows);
1172 	decap_l2_node->refcount++;
1173 	return 0;
1174 }
1175 
1176 static void bnxt_tc_put_decap_l2_node(struct bnxt *bp,
1177 				      struct bnxt_tc_flow_node *flow_node)
1178 {
1179 	struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node;
1180 	struct bnxt_tc_info *tc_info = bp->tc_info;
1181 	int rc;
1182 
1183 	/* remove flow_node from the decap L2 sharing flow list */
1184 	list_del(&flow_node->decap_l2_list_node);
1185 	if (--decap_l2_node->refcount == 0) {
1186 		rc =  rhashtable_remove_fast(&tc_info->decap_l2_table,
1187 					     &decap_l2_node->node,
1188 					     tc_info->decap_l2_ht_params);
1189 		if (rc)
1190 			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc);
1191 		kfree_rcu(decap_l2_node, rcu);
1192 	}
1193 }
1194 
1195 static void bnxt_tc_put_decap_handle(struct bnxt *bp,
1196 				     struct bnxt_tc_flow_node *flow_node)
1197 {
1198 	__le32 decap_handle = flow_node->decap_node->tunnel_handle;
1199 	struct bnxt_tc_info *tc_info = bp->tc_info;
1200 	int rc;
1201 
1202 	if (flow_node->decap_l2_node)
1203 		bnxt_tc_put_decap_l2_node(bp, flow_node);
1204 
1205 	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1206 				     &tc_info->decap_ht_params,
1207 				     flow_node->decap_node);
1208 	if (!rc && decap_handle != INVALID_TUNNEL_HANDLE)
1209 		hwrm_cfa_decap_filter_free(bp, decap_handle);
1210 }
1211 
1212 static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
1213 				       struct ip_tunnel_key *tun_key,
1214 				       struct bnxt_tc_l2_key *l2_info)
1215 {
1216 #ifdef CONFIG_INET
1217 	struct net_device *real_dst_dev = bp->dev;
1218 	struct flowi4 flow = { {0} };
1219 	struct net_device *dst_dev;
1220 	struct neighbour *nbr;
1221 	struct rtable *rt;
1222 	int rc;
1223 
1224 	flow.flowi4_proto = IPPROTO_UDP;
1225 	flow.fl4_dport = tun_key->tp_dst;
1226 	flow.daddr = tun_key->u.ipv4.dst;
1227 
1228 	rt = ip_route_output_key(dev_net(real_dst_dev), &flow);
1229 	if (IS_ERR(rt)) {
1230 		netdev_info(bp->dev, "no route to %pI4b", &flow.daddr);
1231 		return -EOPNOTSUPP;
1232 	}
1233 
1234 	/* The route must either point to the real_dst_dev or a dst_dev that
1235 	 * uses the real_dst_dev.
1236 	 */
1237 	dst_dev = rt->dst.dev;
1238 	if (is_vlan_dev(dst_dev)) {
1239 #if IS_ENABLED(CONFIG_VLAN_8021Q)
1240 		struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev);
1241 
1242 		if (vlan->real_dev != real_dst_dev) {
1243 			netdev_info(bp->dev,
1244 				    "dst_dev(%s) doesn't use PF-if(%s)",
1245 				    netdev_name(dst_dev),
1246 				    netdev_name(real_dst_dev));
1247 			rc = -EOPNOTSUPP;
1248 			goto put_rt;
1249 		}
1250 		l2_info->inner_vlan_tci = htons(vlan->vlan_id);
1251 		l2_info->inner_vlan_tpid = vlan->vlan_proto;
1252 		l2_info->num_vlans = 1;
1253 #endif
1254 	} else if (dst_dev != real_dst_dev) {
1255 		netdev_info(bp->dev,
1256 			    "dst_dev(%s) for %pI4b is not PF-if(%s)",
1257 			    netdev_name(dst_dev), &flow.daddr,
1258 			    netdev_name(real_dst_dev));
1259 		rc = -EOPNOTSUPP;
1260 		goto put_rt;
1261 	}
1262 
1263 	nbr = dst_neigh_lookup(&rt->dst, &flow.daddr);
1264 	if (!nbr) {
1265 		netdev_info(bp->dev, "can't lookup neighbor for %pI4b",
1266 			    &flow.daddr);
1267 		rc = -EOPNOTSUPP;
1268 		goto put_rt;
1269 	}
1270 
1271 	tun_key->u.ipv4.src = flow.saddr;
1272 	tun_key->ttl = ip4_dst_hoplimit(&rt->dst);
1273 	neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev);
1274 	ether_addr_copy(l2_info->smac, dst_dev->dev_addr);
1275 	neigh_release(nbr);
1276 	ip_rt_put(rt);
1277 
1278 	return 0;
1279 put_rt:
1280 	ip_rt_put(rt);
1281 	return rc;
1282 #else
1283 	return -EOPNOTSUPP;
1284 #endif
1285 }
1286 
1287 static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1288 				    struct bnxt_tc_flow_node *flow_node,
1289 				    __le32 *decap_filter_handle)
1290 {
1291 	struct ip_tunnel_key *decap_key = &flow->tun_key;
1292 	struct bnxt_tc_info *tc_info = bp->tc_info;
1293 	struct bnxt_tc_l2_key l2_info = { {0} };
1294 	struct bnxt_tc_tunnel_node *decap_node;
1295 	struct ip_tunnel_key tun_key = { 0 };
1296 	struct bnxt_tc_l2_key *decap_l2_info;
1297 	__le32 ref_decap_handle;
1298 	int rc;
1299 
1300 	/* Check if there's another flow using the same tunnel decap.
1301 	 * If not, add this tunnel to the table and resolve the other
1302 	 * tunnel header fileds. Ignore src_port in the tunnel_key,
1303 	 * since it is not required for decap filters.
1304 	 */
1305 	decap_key->tp_src = 0;
1306 	decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table,
1307 					     &tc_info->decap_ht_params,
1308 					     decap_key);
1309 	if (!decap_node)
1310 		return -ENOMEM;
1311 
1312 	flow_node->decap_node = decap_node;
1313 
1314 	if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1315 		goto done;
1316 
1317 	/* Resolve the L2 fields for tunnel decap
1318 	 * Resolve the route for remote vtep (saddr) of the decap key
1319 	 * Find it's next-hop mac addrs
1320 	 */
1321 	tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src;
1322 	tun_key.tp_dst = flow->tun_key.tp_dst;
1323 	rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info);
1324 	if (rc)
1325 		goto put_decap;
1326 
1327 	decap_l2_info = &decap_node->l2_info;
1328 	/* decap smac is wildcarded */
1329 	ether_addr_copy(decap_l2_info->dmac, l2_info.smac);
1330 	if (l2_info.num_vlans) {
1331 		decap_l2_info->num_vlans = l2_info.num_vlans;
1332 		decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid;
1333 		decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci;
1334 	}
1335 	flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS;
1336 
1337 	/* For getting a decap_filter_handle we first need to check if
1338 	 * there are any other decap flows that share the same tunnel L2
1339 	 * key and if so, pass that flow's decap_filter_handle as the
1340 	 * ref_decap_handle for this flow.
1341 	 */
1342 	rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node,
1343 					  &ref_decap_handle);
1344 	if (rc)
1345 		goto put_decap;
1346 
1347 	/* Issue the hwrm cmd to allocate a decap filter handle */
1348 	rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info,
1349 					 ref_decap_handle,
1350 					 &decap_node->tunnel_handle);
1351 	if (rc)
1352 		goto put_decap_l2;
1353 
1354 done:
1355 	*decap_filter_handle = decap_node->tunnel_handle;
1356 	return 0;
1357 
1358 put_decap_l2:
1359 	bnxt_tc_put_decap_l2_node(bp, flow_node);
1360 put_decap:
1361 	bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1362 				&tc_info->decap_ht_params,
1363 				flow_node->decap_node);
1364 	return rc;
1365 }
1366 
1367 static void bnxt_tc_put_encap_handle(struct bnxt *bp,
1368 				     struct bnxt_tc_tunnel_node *encap_node)
1369 {
1370 	__le32 encap_handle = encap_node->tunnel_handle;
1371 	struct bnxt_tc_info *tc_info = bp->tc_info;
1372 	int rc;
1373 
1374 	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1375 				     &tc_info->encap_ht_params, encap_node);
1376 	if (!rc && encap_handle != INVALID_TUNNEL_HANDLE)
1377 		hwrm_cfa_encap_record_free(bp, encap_handle);
1378 }
1379 
1380 /* Lookup the tunnel encap table and check if there's an encap_handle
1381  * alloc'd already.
1382  * If not, query L2 info via a route lookup and issue an encap_record_alloc
1383  * cmd to FW.
1384  */
1385 static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1386 				    struct bnxt_tc_flow_node *flow_node,
1387 				    __le32 *encap_handle)
1388 {
1389 	struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key;
1390 	struct bnxt_tc_info *tc_info = bp->tc_info;
1391 	struct bnxt_tc_tunnel_node *encap_node;
1392 	int rc;
1393 
1394 	/* Check if there's another flow using the same tunnel encap.
1395 	 * If not, add this tunnel to the table and resolve the other
1396 	 * tunnel header fileds
1397 	 */
1398 	encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table,
1399 					     &tc_info->encap_ht_params,
1400 					     encap_key);
1401 	if (!encap_node)
1402 		return -ENOMEM;
1403 
1404 	flow_node->encap_node = encap_node;
1405 
1406 	if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1407 		goto done;
1408 
1409 	rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info);
1410 	if (rc)
1411 		goto put_encap;
1412 
1413 	/* Allocate a new tunnel encap record */
1414 	rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info,
1415 					 &encap_node->tunnel_handle);
1416 	if (rc)
1417 		goto put_encap;
1418 
1419 done:
1420 	*encap_handle = encap_node->tunnel_handle;
1421 	return 0;
1422 
1423 put_encap:
1424 	bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1425 				&tc_info->encap_ht_params, encap_node);
1426 	return rc;
1427 }
1428 
1429 static void bnxt_tc_put_tunnel_handle(struct bnxt *bp,
1430 				      struct bnxt_tc_flow *flow,
1431 				      struct bnxt_tc_flow_node *flow_node)
1432 {
1433 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1434 		bnxt_tc_put_decap_handle(bp, flow_node);
1435 	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1436 		bnxt_tc_put_encap_handle(bp, flow_node->encap_node);
1437 }
1438 
1439 static int bnxt_tc_get_tunnel_handle(struct bnxt *bp,
1440 				     struct bnxt_tc_flow *flow,
1441 				     struct bnxt_tc_flow_node *flow_node,
1442 				     __le32 *tunnel_handle)
1443 {
1444 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1445 		return bnxt_tc_get_decap_handle(bp, flow, flow_node,
1446 						tunnel_handle);
1447 	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1448 		return bnxt_tc_get_encap_handle(bp, flow, flow_node,
1449 						tunnel_handle);
1450 	else
1451 		return 0;
1452 }
1453 static int __bnxt_tc_del_flow(struct bnxt *bp,
1454 			      struct bnxt_tc_flow_node *flow_node)
1455 {
1456 	struct bnxt_tc_info *tc_info = bp->tc_info;
1457 	int rc;
1458 
1459 	/* send HWRM cmd to free the flow-id */
1460 	bnxt_hwrm_cfa_flow_free(bp, flow_node);
1461 
1462 	mutex_lock(&tc_info->lock);
1463 
1464 	/* release references to any tunnel encap/decap nodes */
1465 	bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node);
1466 
1467 	/* release reference to l2 node */
1468 	bnxt_tc_put_l2_node(bp, flow_node);
1469 
1470 	mutex_unlock(&tc_info->lock);
1471 
1472 	rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
1473 				    tc_info->flow_ht_params);
1474 	if (rc)
1475 		netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d",
1476 			   __func__, rc);
1477 
1478 	kfree_rcu(flow_node, rcu);
1479 	return 0;
1480 }
1481 
1482 static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow,
1483 				 u16 src_fid)
1484 {
1485 	flow->l2_key.dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX;
1486 }
1487 
1488 static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
1489 				u16 src_fid)
1490 {
1491 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1492 		flow->src_fid = bp->pf.fw_fid;
1493 	else
1494 		flow->src_fid = src_fid;
1495 }
1496 
1497 /* Add a new flow or replace an existing flow.
1498  * Notes on locking:
1499  * There are essentially two critical sections here.
1500  * 1. while adding a new flow
1501  *    a) lookup l2-key
1502  *    b) issue HWRM cmd and get flow_handle
1503  *    c) link l2-key with flow
1504  * 2. while deleting a flow
1505  *    a) unlinking l2-key from flow
1506  * A lock is needed to protect these two critical sections.
1507  *
1508  * The hash-tables are already protected by the rhashtable API.
1509  */
1510 static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
1511 			    struct flow_cls_offload *tc_flow_cmd)
1512 {
1513 	struct bnxt_tc_flow_node *new_node, *old_node;
1514 	struct bnxt_tc_info *tc_info = bp->tc_info;
1515 	struct bnxt_tc_flow *flow;
1516 	__le32 tunnel_handle = 0;
1517 	__le16 ref_flow_handle;
1518 	int rc;
1519 
1520 	/* allocate memory for the new flow and it's node */
1521 	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
1522 	if (!new_node) {
1523 		rc = -ENOMEM;
1524 		goto done;
1525 	}
1526 	new_node->cookie = tc_flow_cmd->cookie;
1527 	flow = &new_node->flow;
1528 
1529 	rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
1530 	if (rc)
1531 		goto free_node;
1532 
1533 	bnxt_tc_set_src_fid(bp, flow, src_fid);
1534 	bnxt_tc_set_flow_dir(bp, flow, flow->src_fid);
1535 
1536 	if (!bnxt_tc_can_offload(bp, flow)) {
1537 		rc = -EOPNOTSUPP;
1538 		kfree_rcu(new_node, rcu);
1539 		return rc;
1540 	}
1541 
1542 	/* If a flow exists with the same cookie, delete it */
1543 	old_node = rhashtable_lookup_fast(&tc_info->flow_table,
1544 					  &tc_flow_cmd->cookie,
1545 					  tc_info->flow_ht_params);
1546 	if (old_node)
1547 		__bnxt_tc_del_flow(bp, old_node);
1548 
1549 	/* Check if the L2 part of the flow has been offloaded already.
1550 	 * If so, bump up it's refcnt and get it's reference handle.
1551 	 */
1552 	mutex_lock(&tc_info->lock);
1553 	rc = bnxt_tc_get_ref_flow_handle(bp, flow, new_node, &ref_flow_handle);
1554 	if (rc)
1555 		goto unlock;
1556 
1557 	/* If the flow involves tunnel encap/decap, get tunnel_handle */
1558 	rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle);
1559 	if (rc)
1560 		goto put_l2;
1561 
1562 	/* send HWRM cmd to alloc the flow */
1563 	rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
1564 				      tunnel_handle, new_node);
1565 	if (rc)
1566 		goto put_tunnel;
1567 
1568 	flow->lastused = jiffies;
1569 	spin_lock_init(&flow->stats_lock);
1570 	/* add new flow to flow-table */
1571 	rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node,
1572 				    tc_info->flow_ht_params);
1573 	if (rc)
1574 		goto hwrm_flow_free;
1575 
1576 	mutex_unlock(&tc_info->lock);
1577 	return 0;
1578 
1579 hwrm_flow_free:
1580 	bnxt_hwrm_cfa_flow_free(bp, new_node);
1581 put_tunnel:
1582 	bnxt_tc_put_tunnel_handle(bp, flow, new_node);
1583 put_l2:
1584 	bnxt_tc_put_l2_node(bp, new_node);
1585 unlock:
1586 	mutex_unlock(&tc_info->lock);
1587 free_node:
1588 	kfree_rcu(new_node, rcu);
1589 done:
1590 	netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d",
1591 		   __func__, tc_flow_cmd->cookie, rc);
1592 	return rc;
1593 }
1594 
1595 static int bnxt_tc_del_flow(struct bnxt *bp,
1596 			    struct flow_cls_offload *tc_flow_cmd)
1597 {
1598 	struct bnxt_tc_info *tc_info = bp->tc_info;
1599 	struct bnxt_tc_flow_node *flow_node;
1600 
1601 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1602 					   &tc_flow_cmd->cookie,
1603 					   tc_info->flow_ht_params);
1604 	if (!flow_node)
1605 		return -EINVAL;
1606 
1607 	return __bnxt_tc_del_flow(bp, flow_node);
1608 }
1609 
1610 static int bnxt_tc_get_flow_stats(struct bnxt *bp,
1611 				  struct flow_cls_offload *tc_flow_cmd)
1612 {
1613 	struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats;
1614 	struct bnxt_tc_info *tc_info = bp->tc_info;
1615 	struct bnxt_tc_flow_node *flow_node;
1616 	struct bnxt_tc_flow *flow;
1617 	unsigned long lastused;
1618 
1619 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1620 					   &tc_flow_cmd->cookie,
1621 					   tc_info->flow_ht_params);
1622 	if (!flow_node)
1623 		return -1;
1624 
1625 	flow = &flow_node->flow;
1626 	curr_stats = &flow->stats;
1627 	prev_stats = &flow->prev_stats;
1628 
1629 	spin_lock(&flow->stats_lock);
1630 	stats.packets = curr_stats->packets - prev_stats->packets;
1631 	stats.bytes = curr_stats->bytes - prev_stats->bytes;
1632 	*prev_stats = *curr_stats;
1633 	lastused = flow->lastused;
1634 	spin_unlock(&flow->stats_lock);
1635 
1636 	flow_stats_update(&tc_flow_cmd->stats, stats.bytes, stats.packets,
1637 			  lastused);
1638 	return 0;
1639 }
1640 
1641 static void bnxt_fill_cfa_stats_req(struct bnxt *bp,
1642 				    struct bnxt_tc_flow_node *flow_node,
1643 				    __le16 *flow_handle, __le32 *flow_id)
1644 {
1645 	u16 handle;
1646 
1647 	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
1648 		*flow_id = flow_node->flow_id;
1649 
1650 		/* If flow_id is used to fetch flow stats then:
1651 		 * 1. lower 12 bits of flow_handle must be set to all 1s.
1652 		 * 2. 15th bit of flow_handle must specify the flow
1653 		 *    direction (TX/RX).
1654 		 */
1655 		if (flow_node->flow.l2_key.dir == BNXT_DIR_RX)
1656 			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX |
1657 				 CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1658 		else
1659 			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1660 
1661 		*flow_handle = cpu_to_le16(handle);
1662 	} else {
1663 		*flow_handle = flow_node->flow_handle;
1664 	}
1665 }
1666 
1667 static int
1668 bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
1669 			     struct bnxt_tc_stats_batch stats_batch[])
1670 {
1671 	struct hwrm_cfa_flow_stats_input req = { 0 };
1672 	struct hwrm_cfa_flow_stats_output *resp;
1673 	__le16 *req_flow_handles = &req.flow_handle_0;
1674 	__le32 *req_flow_ids = &req.flow_id_0;
1675 	int rc, i;
1676 
1677 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
1678 	req.num_flows = cpu_to_le16(num_flows);
1679 	for (i = 0; i < num_flows; i++) {
1680 		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1681 
1682 		bnxt_fill_cfa_stats_req(bp, flow_node,
1683 					&req_flow_handles[i], &req_flow_ids[i]);
1684 	}
1685 
1686 	mutex_lock(&bp->hwrm_cmd_lock);
1687 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
1688 	if (!rc) {
1689 		__le64 *resp_packets;
1690 		__le64 *resp_bytes;
1691 
1692 		resp = bnxt_get_hwrm_resp_addr(bp, &req);
1693 		resp_packets = &resp->packet_0;
1694 		resp_bytes = &resp->byte_0;
1695 
1696 		for (i = 0; i < num_flows; i++) {
1697 			stats_batch[i].hw_stats.packets =
1698 						le64_to_cpu(resp_packets[i]);
1699 			stats_batch[i].hw_stats.bytes =
1700 						le64_to_cpu(resp_bytes[i]);
1701 		}
1702 	} else {
1703 		netdev_info(bp->dev, "error rc=%d", rc);
1704 	}
1705 	mutex_unlock(&bp->hwrm_cmd_lock);
1706 
1707 	return rc;
1708 }
1709 
1710 /* Add val to accum while handling a possible wraparound
1711  * of val. Eventhough val is of type u64, its actual width
1712  * is denoted by mask and will wrap-around beyond that width.
1713  */
1714 static void accumulate_val(u64 *accum, u64 val, u64 mask)
1715 {
1716 #define low_bits(x, mask)		((x) & (mask))
1717 #define high_bits(x, mask)		((x) & ~(mask))
1718 	bool wrapped = val < low_bits(*accum, mask);
1719 
1720 	*accum = high_bits(*accum, mask) + val;
1721 	if (wrapped)
1722 		*accum += (mask + 1);
1723 }
1724 
1725 /* The HW counters' width is much less than 64bits.
1726  * Handle possible wrap-around while updating the stat counters
1727  */
1728 static void bnxt_flow_stats_accum(struct bnxt_tc_info *tc_info,
1729 				  struct bnxt_tc_flow_stats *acc_stats,
1730 				  struct bnxt_tc_flow_stats *hw_stats)
1731 {
1732 	accumulate_val(&acc_stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
1733 	accumulate_val(&acc_stats->packets, hw_stats->packets,
1734 		       tc_info->packets_mask);
1735 }
1736 
1737 static int
1738 bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows,
1739 				struct bnxt_tc_stats_batch stats_batch[])
1740 {
1741 	struct bnxt_tc_info *tc_info = bp->tc_info;
1742 	int rc, i;
1743 
1744 	rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch);
1745 	if (rc)
1746 		return rc;
1747 
1748 	for (i = 0; i < num_flows; i++) {
1749 		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1750 		struct bnxt_tc_flow *flow = &flow_node->flow;
1751 
1752 		spin_lock(&flow->stats_lock);
1753 		bnxt_flow_stats_accum(tc_info, &flow->stats,
1754 				      &stats_batch[i].hw_stats);
1755 		if (flow->stats.packets != flow->prev_stats.packets)
1756 			flow->lastused = jiffies;
1757 		spin_unlock(&flow->stats_lock);
1758 	}
1759 
1760 	return 0;
1761 }
1762 
1763 static int
1764 bnxt_tc_flow_stats_batch_prep(struct bnxt *bp,
1765 			      struct bnxt_tc_stats_batch stats_batch[],
1766 			      int *num_flows)
1767 {
1768 	struct bnxt_tc_info *tc_info = bp->tc_info;
1769 	struct rhashtable_iter *iter = &tc_info->iter;
1770 	void *flow_node;
1771 	int rc, i;
1772 
1773 	rhashtable_walk_start(iter);
1774 
1775 	rc = 0;
1776 	for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) {
1777 		flow_node = rhashtable_walk_next(iter);
1778 		if (IS_ERR(flow_node)) {
1779 			i = 0;
1780 			if (PTR_ERR(flow_node) == -EAGAIN) {
1781 				continue;
1782 			} else {
1783 				rc = PTR_ERR(flow_node);
1784 				goto done;
1785 			}
1786 		}
1787 
1788 		/* No more flows */
1789 		if (!flow_node)
1790 			goto done;
1791 
1792 		stats_batch[i].flow_node = flow_node;
1793 	}
1794 done:
1795 	rhashtable_walk_stop(iter);
1796 	*num_flows = i;
1797 	return rc;
1798 }
1799 
1800 void bnxt_tc_flow_stats_work(struct bnxt *bp)
1801 {
1802 	struct bnxt_tc_info *tc_info = bp->tc_info;
1803 	int num_flows, rc;
1804 
1805 	num_flows = atomic_read(&tc_info->flow_table.nelems);
1806 	if (!num_flows)
1807 		return;
1808 
1809 	rhashtable_walk_enter(&tc_info->flow_table, &tc_info->iter);
1810 
1811 	for (;;) {
1812 		rc = bnxt_tc_flow_stats_batch_prep(bp, tc_info->stats_batch,
1813 						   &num_flows);
1814 		if (rc) {
1815 			if (rc == -EAGAIN)
1816 				continue;
1817 			break;
1818 		}
1819 
1820 		if (!num_flows)
1821 			break;
1822 
1823 		bnxt_tc_flow_stats_batch_update(bp, num_flows,
1824 						tc_info->stats_batch);
1825 	}
1826 
1827 	rhashtable_walk_exit(&tc_info->iter);
1828 }
1829 
1830 int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
1831 			 struct flow_cls_offload *cls_flower)
1832 {
1833 	switch (cls_flower->command) {
1834 	case FLOW_CLS_REPLACE:
1835 		return bnxt_tc_add_flow(bp, src_fid, cls_flower);
1836 	case FLOW_CLS_DESTROY:
1837 		return bnxt_tc_del_flow(bp, cls_flower);
1838 	case FLOW_CLS_STATS:
1839 		return bnxt_tc_get_flow_stats(bp, cls_flower);
1840 	default:
1841 		return -EOPNOTSUPP;
1842 	}
1843 }
1844 
1845 static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type,
1846 				       void *type_data, void *cb_priv)
1847 {
1848 	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1849 	struct flow_cls_offload *flower = type_data;
1850 	struct bnxt *bp = priv->bp;
1851 
1852 	if (flower->common.chain_index)
1853 		return -EOPNOTSUPP;
1854 
1855 	switch (type) {
1856 	case TC_SETUP_CLSFLOWER:
1857 		return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, flower);
1858 	default:
1859 		return -EOPNOTSUPP;
1860 	}
1861 }
1862 
1863 static struct bnxt_flower_indr_block_cb_priv *
1864 bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev)
1865 {
1866 	struct bnxt_flower_indr_block_cb_priv *cb_priv;
1867 
1868 	/* All callback list access should be protected by RTNL. */
1869 	ASSERT_RTNL();
1870 
1871 	list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list)
1872 		if (cb_priv->tunnel_netdev == netdev)
1873 			return cb_priv;
1874 
1875 	return NULL;
1876 }
1877 
1878 static void bnxt_tc_setup_indr_rel(void *cb_priv)
1879 {
1880 	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1881 
1882 	list_del(&priv->list);
1883 	kfree(priv);
1884 }
1885 
1886 static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp,
1887 				    struct flow_block_offload *f)
1888 {
1889 	struct bnxt_flower_indr_block_cb_priv *cb_priv;
1890 	struct flow_block_cb *block_cb;
1891 
1892 	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1893 		return -EOPNOTSUPP;
1894 
1895 	switch (f->command) {
1896 	case FLOW_BLOCK_BIND:
1897 		cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
1898 		if (!cb_priv)
1899 			return -ENOMEM;
1900 
1901 		cb_priv->tunnel_netdev = netdev;
1902 		cb_priv->bp = bp;
1903 		list_add(&cb_priv->list, &bp->tc_indr_block_list);
1904 
1905 		block_cb = flow_block_cb_alloc(bnxt_tc_setup_indr_block_cb,
1906 					       cb_priv, cb_priv,
1907 					       bnxt_tc_setup_indr_rel);
1908 		if (IS_ERR(block_cb)) {
1909 			list_del(&cb_priv->list);
1910 			kfree(cb_priv);
1911 			return PTR_ERR(block_cb);
1912 		}
1913 
1914 		flow_block_cb_add(block_cb, f);
1915 		list_add_tail(&block_cb->driver_list, &bnxt_block_cb_list);
1916 		break;
1917 	case FLOW_BLOCK_UNBIND:
1918 		cb_priv = bnxt_tc_indr_block_cb_lookup(bp, netdev);
1919 		if (!cb_priv)
1920 			return -ENOENT;
1921 
1922 		block_cb = flow_block_cb_lookup(f->block,
1923 						bnxt_tc_setup_indr_block_cb,
1924 						cb_priv);
1925 		if (!block_cb)
1926 			return -ENOENT;
1927 
1928 		flow_block_cb_remove(block_cb, f);
1929 		list_del(&block_cb->driver_list);
1930 		break;
1931 	default:
1932 		return -EOPNOTSUPP;
1933 	}
1934 	return 0;
1935 }
1936 
1937 static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv,
1938 				 enum tc_setup_type type, void *type_data)
1939 {
1940 	switch (type) {
1941 	case TC_SETUP_BLOCK:
1942 		return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data);
1943 	default:
1944 		return -EOPNOTSUPP;
1945 	}
1946 }
1947 
1948 static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
1949 {
1950 	return netif_is_vxlan(netdev);
1951 }
1952 
1953 static int bnxt_tc_indr_block_event(struct notifier_block *nb,
1954 				    unsigned long event, void *ptr)
1955 {
1956 	struct net_device *netdev;
1957 	struct bnxt *bp;
1958 	int rc;
1959 
1960 	netdev = netdev_notifier_info_to_dev(ptr);
1961 	if (!bnxt_is_netdev_indr_offload(netdev))
1962 		return NOTIFY_OK;
1963 
1964 	bp = container_of(nb, struct bnxt, tc_netdev_nb);
1965 
1966 	switch (event) {
1967 	case NETDEV_REGISTER:
1968 		rc = __flow_indr_block_cb_register(netdev, bp,
1969 						   bnxt_tc_setup_indr_cb,
1970 						   bp);
1971 		if (rc)
1972 			netdev_info(bp->dev,
1973 				    "Failed to register indirect blk: dev: %s",
1974 				    netdev->name);
1975 		break;
1976 	case NETDEV_UNREGISTER:
1977 		__flow_indr_block_cb_unregister(netdev,
1978 						bnxt_tc_setup_indr_cb,
1979 						bp);
1980 		break;
1981 	}
1982 
1983 	return NOTIFY_DONE;
1984 }
1985 
1986 static const struct rhashtable_params bnxt_tc_flow_ht_params = {
1987 	.head_offset = offsetof(struct bnxt_tc_flow_node, node),
1988 	.key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
1989 	.key_len = sizeof(((struct bnxt_tc_flow_node *)0)->cookie),
1990 	.automatic_shrinking = true
1991 };
1992 
1993 static const struct rhashtable_params bnxt_tc_l2_ht_params = {
1994 	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
1995 	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
1996 	.key_len = BNXT_TC_L2_KEY_LEN,
1997 	.automatic_shrinking = true
1998 };
1999 
2000 static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = {
2001 	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
2002 	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
2003 	.key_len = BNXT_TC_L2_KEY_LEN,
2004 	.automatic_shrinking = true
2005 };
2006 
2007 static const struct rhashtable_params bnxt_tc_tunnel_ht_params = {
2008 	.head_offset = offsetof(struct bnxt_tc_tunnel_node, node),
2009 	.key_offset = offsetof(struct bnxt_tc_tunnel_node, key),
2010 	.key_len = sizeof(struct ip_tunnel_key),
2011 	.automatic_shrinking = true
2012 };
2013 
2014 /* convert counter width in bits to a mask */
2015 #define mask(width)		((u64)~0 >> (64 - (width)))
2016 
2017 int bnxt_init_tc(struct bnxt *bp)
2018 {
2019 	struct bnxt_tc_info *tc_info;
2020 	int rc;
2021 
2022 	if (bp->hwrm_spec_code < 0x10803) {
2023 		netdev_warn(bp->dev,
2024 			    "Firmware does not support TC flower offload.\n");
2025 		return -ENOTSUPP;
2026 	}
2027 
2028 	tc_info = kzalloc(sizeof(*tc_info), GFP_KERNEL);
2029 	if (!tc_info)
2030 		return -ENOMEM;
2031 	mutex_init(&tc_info->lock);
2032 
2033 	/* Counter widths are programmed by FW */
2034 	tc_info->bytes_mask = mask(36);
2035 	tc_info->packets_mask = mask(28);
2036 
2037 	tc_info->flow_ht_params = bnxt_tc_flow_ht_params;
2038 	rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params);
2039 	if (rc)
2040 		goto free_tc_info;
2041 
2042 	tc_info->l2_ht_params = bnxt_tc_l2_ht_params;
2043 	rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params);
2044 	if (rc)
2045 		goto destroy_flow_table;
2046 
2047 	tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params;
2048 	rc = rhashtable_init(&tc_info->decap_l2_table,
2049 			     &tc_info->decap_l2_ht_params);
2050 	if (rc)
2051 		goto destroy_l2_table;
2052 
2053 	tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params;
2054 	rc = rhashtable_init(&tc_info->decap_table,
2055 			     &tc_info->decap_ht_params);
2056 	if (rc)
2057 		goto destroy_decap_l2_table;
2058 
2059 	tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params;
2060 	rc = rhashtable_init(&tc_info->encap_table,
2061 			     &tc_info->encap_ht_params);
2062 	if (rc)
2063 		goto destroy_decap_table;
2064 
2065 	tc_info->enabled = true;
2066 	bp->dev->hw_features |= NETIF_F_HW_TC;
2067 	bp->dev->features |= NETIF_F_HW_TC;
2068 	bp->tc_info = tc_info;
2069 
2070 	/* init indirect block notifications */
2071 	INIT_LIST_HEAD(&bp->tc_indr_block_list);
2072 	bp->tc_netdev_nb.notifier_call = bnxt_tc_indr_block_event;
2073 	rc = register_netdevice_notifier(&bp->tc_netdev_nb);
2074 	if (!rc)
2075 		return 0;
2076 
2077 	rhashtable_destroy(&tc_info->encap_table);
2078 
2079 destroy_decap_table:
2080 	rhashtable_destroy(&tc_info->decap_table);
2081 destroy_decap_l2_table:
2082 	rhashtable_destroy(&tc_info->decap_l2_table);
2083 destroy_l2_table:
2084 	rhashtable_destroy(&tc_info->l2_table);
2085 destroy_flow_table:
2086 	rhashtable_destroy(&tc_info->flow_table);
2087 free_tc_info:
2088 	kfree(tc_info);
2089 	return rc;
2090 }
2091 
2092 void bnxt_shutdown_tc(struct bnxt *bp)
2093 {
2094 	struct bnxt_tc_info *tc_info = bp->tc_info;
2095 
2096 	if (!bnxt_tc_flower_enabled(bp))
2097 		return;
2098 
2099 	unregister_netdevice_notifier(&bp->tc_netdev_nb);
2100 	rhashtable_destroy(&tc_info->flow_table);
2101 	rhashtable_destroy(&tc_info->l2_table);
2102 	rhashtable_destroy(&tc_info->decap_l2_table);
2103 	rhashtable_destroy(&tc_info->decap_table);
2104 	rhashtable_destroy(&tc_info->encap_table);
2105 	kfree(tc_info);
2106 	bp->tc_info = NULL;
2107 }
2108