xref: /linux/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c (revision e65e175b07bef5974045cc42238de99057669ca7)
1 /* Broadcom NetXtreme-C/E network driver.
2  *
3  * Copyright (c) 2017 Broadcom Limited
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/netdevice.h>
11 #include <linux/inetdevice.h>
12 #include <linux/if_vlan.h>
13 #include <net/flow_dissector.h>
14 #include <net/pkt_cls.h>
15 #include <net/tc_act/tc_gact.h>
16 #include <net/tc_act/tc_skbedit.h>
17 #include <net/tc_act/tc_mirred.h>
18 #include <net/tc_act/tc_vlan.h>
19 #include <net/tc_act/tc_pedit.h>
20 #include <net/tc_act/tc_tunnel_key.h>
21 #include <net/vxlan.h>
22 
23 #include "bnxt_hsi.h"
24 #include "bnxt.h"
25 #include "bnxt_hwrm.h"
26 #include "bnxt_sriov.h"
27 #include "bnxt_tc.h"
28 #include "bnxt_vfr.h"
29 
30 #define BNXT_FID_INVALID			0xffff
31 #define VLAN_TCI(vid, prio)	((vid) | ((prio) << VLAN_PRIO_SHIFT))
32 
33 #define is_vlan_pcp_wildcarded(vlan_tci_mask)	\
34 	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == 0x0000)
35 #define is_vlan_pcp_exactmatch(vlan_tci_mask)	\
36 	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == VLAN_PRIO_MASK)
37 #define is_vlan_pcp_zero(vlan_tci)	\
38 	((ntohs(vlan_tci) & VLAN_PRIO_MASK) == 0x0000)
39 #define is_vid_exactmatch(vlan_tci_mask)	\
40 	((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK)
41 
42 static bool is_wildcard(void *mask, int len);
43 static bool is_exactmatch(void *mask, int len);
44 /* Return the dst fid of the func for flow forwarding
45  * For PFs: src_fid is the fid of the PF
46  * For VF-reps: src_fid the fid of the VF
47  */
48 static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
49 {
50 	struct bnxt *bp;
51 
52 	/* check if dev belongs to the same switch */
53 	if (!netdev_port_same_parent_id(pf_bp->dev, dev)) {
54 		netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch\n",
55 			    dev->ifindex);
56 		return BNXT_FID_INVALID;
57 	}
58 
59 	/* Is dev a VF-rep? */
60 	if (bnxt_dev_is_vf_rep(dev))
61 		return bnxt_vf_rep_get_fid(dev);
62 
63 	bp = netdev_priv(dev);
64 	return bp->pf.fw_fid;
65 }
66 
67 static int bnxt_tc_parse_redir(struct bnxt *bp,
68 			       struct bnxt_tc_actions *actions,
69 			       const struct flow_action_entry *act)
70 {
71 	struct net_device *dev = act->dev;
72 
73 	if (!dev) {
74 		netdev_info(bp->dev, "no dev in mirred action\n");
75 		return -EINVAL;
76 	}
77 
78 	actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
79 	actions->dst_dev = dev;
80 	return 0;
81 }
82 
83 static int bnxt_tc_parse_vlan(struct bnxt *bp,
84 			      struct bnxt_tc_actions *actions,
85 			      const struct flow_action_entry *act)
86 {
87 	switch (act->id) {
88 	case FLOW_ACTION_VLAN_POP:
89 		actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
90 		break;
91 	case FLOW_ACTION_VLAN_PUSH:
92 		actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
93 		actions->push_vlan_tci = htons(act->vlan.vid);
94 		actions->push_vlan_tpid = act->vlan.proto;
95 		break;
96 	default:
97 		return -EOPNOTSUPP;
98 	}
99 	return 0;
100 }
101 
102 static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
103 				    struct bnxt_tc_actions *actions,
104 				    const struct flow_action_entry *act)
105 {
106 	const struct ip_tunnel_info *tun_info = act->tunnel;
107 	const struct ip_tunnel_key *tun_key = &tun_info->key;
108 
109 	if (ip_tunnel_info_af(tun_info) != AF_INET) {
110 		netdev_info(bp->dev, "only IPv4 tunnel-encap is supported\n");
111 		return -EOPNOTSUPP;
112 	}
113 
114 	actions->tun_encap_key = *tun_key;
115 	actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP;
116 	return 0;
117 }
118 
119 /* Key & Mask from the stack comes unaligned in multiple iterations of 4 bytes
120  * each(u32).
121  * This routine consolidates such multiple unaligned values into one
122  * field each for Key & Mask (for src and dst macs separately)
123  * For example,
124  *			Mask/Key	Offset	Iteration
125  *			==========	======	=========
126  *	dst mac		0xffffffff	0	1
127  *	dst mac		0x0000ffff	4	2
128  *
129  *	src mac		0xffff0000	4	1
130  *	src mac		0xffffffff	8	2
131  *
132  * The above combination coming from the stack will be consolidated as
133  *			Mask/Key
134  *			==============
135  *	src mac:	0xffffffffffff
136  *	dst mac:	0xffffffffffff
137  */
138 static void bnxt_set_l2_key_mask(u32 part_key, u32 part_mask,
139 				 u8 *actual_key, u8 *actual_mask)
140 {
141 	u32 key = get_unaligned((u32 *)actual_key);
142 	u32 mask = get_unaligned((u32 *)actual_mask);
143 
144 	part_key &= part_mask;
145 	part_key |= key & ~part_mask;
146 
147 	put_unaligned(mask | part_mask, (u32 *)actual_mask);
148 	put_unaligned(part_key, (u32 *)actual_key);
149 }
150 
151 static int
152 bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions,
153 			    u16 *eth_addr, u16 *eth_addr_mask)
154 {
155 	u16 *p;
156 	int j;
157 
158 	if (unlikely(bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)))
159 		return -EINVAL;
160 
161 	if (!is_wildcard(&eth_addr_mask[0], ETH_ALEN)) {
162 		if (!is_exactmatch(&eth_addr_mask[0], ETH_ALEN))
163 			return -EINVAL;
164 		/* FW expects dmac to be in u16 array format */
165 		p = eth_addr;
166 		for (j = 0; j < 3; j++)
167 			actions->l2_rewrite_dmac[j] = cpu_to_be16(*(p + j));
168 	}
169 
170 	if (!is_wildcard(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN)) {
171 		if (!is_exactmatch(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN))
172 			return -EINVAL;
173 		/* FW expects smac to be in u16 array format */
174 		p = &eth_addr[ETH_ALEN / 2];
175 		for (j = 0; j < 3; j++)
176 			actions->l2_rewrite_smac[j] = cpu_to_be16(*(p + j));
177 	}
178 
179 	return 0;
180 }
181 
182 static int
183 bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions,
184 		    struct flow_action_entry *act, int act_idx, u8 *eth_addr,
185 		    u8 *eth_addr_mask)
186 {
187 	size_t offset_of_ip6_daddr = offsetof(struct ipv6hdr, daddr);
188 	size_t offset_of_ip6_saddr = offsetof(struct ipv6hdr, saddr);
189 	u32 mask, val, offset, idx;
190 	u8 htype;
191 
192 	offset = act->mangle.offset;
193 	htype = act->mangle.htype;
194 	mask = ~act->mangle.mask;
195 	val = act->mangle.val;
196 
197 	switch (htype) {
198 	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
199 		if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) {
200 			netdev_err(bp->dev,
201 				   "%s: eth_hdr: Invalid pedit field\n",
202 				   __func__);
203 			return -EINVAL;
204 		}
205 		actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE;
206 
207 		bnxt_set_l2_key_mask(val, mask, &eth_addr[offset],
208 				     &eth_addr_mask[offset]);
209 		break;
210 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
211 		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
212 		actions->nat.l3_is_ipv4 = true;
213 		if (offset ==  offsetof(struct iphdr, saddr)) {
214 			actions->nat.src_xlate = true;
215 			actions->nat.l3.ipv4.saddr.s_addr = htonl(val);
216 		} else if (offset ==  offsetof(struct iphdr, daddr)) {
217 			actions->nat.src_xlate = false;
218 			actions->nat.l3.ipv4.daddr.s_addr = htonl(val);
219 		} else {
220 			netdev_err(bp->dev,
221 				   "%s: IPv4_hdr: Invalid pedit field\n",
222 				   __func__);
223 			return -EINVAL;
224 		}
225 
226 		netdev_dbg(bp->dev, "nat.src_xlate = %d src IP: %pI4 dst ip : %pI4\n",
227 			   actions->nat.src_xlate, &actions->nat.l3.ipv4.saddr,
228 			   &actions->nat.l3.ipv4.daddr);
229 		break;
230 
231 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
232 		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
233 		actions->nat.l3_is_ipv4 = false;
234 		if (offset >= offsetof(struct ipv6hdr, saddr) &&
235 		    offset < offset_of_ip6_daddr) {
236 			/* 16 byte IPv6 address comes in 4 iterations of
237 			 * 4byte chunks each
238 			 */
239 			actions->nat.src_xlate = true;
240 			idx = (offset - offset_of_ip6_saddr) / 4;
241 			/* First 4bytes will be copied to idx 0 and so on */
242 			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
243 		} else if (offset >= offset_of_ip6_daddr &&
244 			   offset < offset_of_ip6_daddr + 16) {
245 			actions->nat.src_xlate = false;
246 			idx = (offset - offset_of_ip6_daddr) / 4;
247 			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
248 		} else {
249 			netdev_err(bp->dev,
250 				   "%s: IPv6_hdr: Invalid pedit field\n",
251 				   __func__);
252 			return -EINVAL;
253 		}
254 		break;
255 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
256 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
257 		/* HW does not support L4 rewrite alone without L3
258 		 * rewrite
259 		 */
260 		if (!(actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE)) {
261 			netdev_err(bp->dev,
262 				   "Need to specify L3 rewrite as well\n");
263 			return -EINVAL;
264 		}
265 		if (actions->nat.src_xlate)
266 			actions->nat.l4.ports.sport = htons(val);
267 		else
268 			actions->nat.l4.ports.dport = htons(val);
269 		netdev_dbg(bp->dev, "actions->nat.sport = %d dport = %d\n",
270 			   actions->nat.l4.ports.sport,
271 			   actions->nat.l4.ports.dport);
272 		break;
273 	default:
274 		netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n",
275 			   __func__);
276 		return -EINVAL;
277 	}
278 	return 0;
279 }
280 
281 static int bnxt_tc_parse_actions(struct bnxt *bp,
282 				 struct bnxt_tc_actions *actions,
283 				 struct flow_action *flow_action,
284 				 struct netlink_ext_ack *extack)
285 {
286 	/* Used to store the L2 rewrite mask for dmac (6 bytes) followed by
287 	 * smac (6 bytes) if rewrite of both is specified, otherwise either
288 	 * dmac or smac
289 	 */
290 	u16 eth_addr_mask[ETH_ALEN] = { 0 };
291 	/* Used to store the L2 rewrite key for dmac (6 bytes) followed by
292 	 * smac (6 bytes) if rewrite of both is specified, otherwise either
293 	 * dmac or smac
294 	 */
295 	u16 eth_addr[ETH_ALEN] = { 0 };
296 	struct flow_action_entry *act;
297 	int i, rc;
298 
299 	if (!flow_action_has_entries(flow_action)) {
300 		netdev_info(bp->dev, "no actions\n");
301 		return -EINVAL;
302 	}
303 
304 	if (!flow_action_basic_hw_stats_check(flow_action, extack))
305 		return -EOPNOTSUPP;
306 
307 	flow_action_for_each(i, act, flow_action) {
308 		switch (act->id) {
309 		case FLOW_ACTION_DROP:
310 			actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
311 			return 0; /* don't bother with other actions */
312 		case FLOW_ACTION_REDIRECT:
313 			rc = bnxt_tc_parse_redir(bp, actions, act);
314 			if (rc)
315 				return rc;
316 			break;
317 		case FLOW_ACTION_VLAN_POP:
318 		case FLOW_ACTION_VLAN_PUSH:
319 		case FLOW_ACTION_VLAN_MANGLE:
320 			rc = bnxt_tc_parse_vlan(bp, actions, act);
321 			if (rc)
322 				return rc;
323 			break;
324 		case FLOW_ACTION_TUNNEL_ENCAP:
325 			rc = bnxt_tc_parse_tunnel_set(bp, actions, act);
326 			if (rc)
327 				return rc;
328 			break;
329 		case FLOW_ACTION_TUNNEL_DECAP:
330 			actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
331 			break;
332 		/* Packet edit: L2 rewrite, NAT, NAPT */
333 		case FLOW_ACTION_MANGLE:
334 			rc = bnxt_tc_parse_pedit(bp, actions, act, i,
335 						 (u8 *)eth_addr,
336 						 (u8 *)eth_addr_mask);
337 			if (rc)
338 				return rc;
339 			break;
340 		default:
341 			break;
342 		}
343 	}
344 
345 	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
346 		rc = bnxt_fill_l2_rewrite_fields(actions, eth_addr,
347 						 eth_addr_mask);
348 		if (rc)
349 			return rc;
350 	}
351 
352 	if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
353 		if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
354 			/* dst_fid is PF's fid */
355 			actions->dst_fid = bp->pf.fw_fid;
356 		} else {
357 			/* find the FID from dst_dev */
358 			actions->dst_fid =
359 				bnxt_flow_get_dst_fid(bp, actions->dst_dev);
360 			if (actions->dst_fid == BNXT_FID_INVALID)
361 				return -EINVAL;
362 		}
363 	}
364 
365 	return 0;
366 }
367 
368 static int bnxt_tc_parse_flow(struct bnxt *bp,
369 			      struct flow_cls_offload *tc_flow_cmd,
370 			      struct bnxt_tc_flow *flow)
371 {
372 	struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd);
373 	struct flow_dissector *dissector = rule->match.dissector;
374 
375 	/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
376 	if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
377 	    (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
378 		netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x\n",
379 			    dissector->used_keys);
380 		return -EOPNOTSUPP;
381 	}
382 
383 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
384 		struct flow_match_basic match;
385 
386 		flow_rule_match_basic(rule, &match);
387 		flow->l2_key.ether_type = match.key->n_proto;
388 		flow->l2_mask.ether_type = match.mask->n_proto;
389 
390 		if (match.key->n_proto == htons(ETH_P_IP) ||
391 		    match.key->n_proto == htons(ETH_P_IPV6)) {
392 			flow->l4_key.ip_proto = match.key->ip_proto;
393 			flow->l4_mask.ip_proto = match.mask->ip_proto;
394 		}
395 	}
396 
397 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
398 		struct flow_match_eth_addrs match;
399 
400 		flow_rule_match_eth_addrs(rule, &match);
401 		flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
402 		ether_addr_copy(flow->l2_key.dmac, match.key->dst);
403 		ether_addr_copy(flow->l2_mask.dmac, match.mask->dst);
404 		ether_addr_copy(flow->l2_key.smac, match.key->src);
405 		ether_addr_copy(flow->l2_mask.smac, match.mask->src);
406 	}
407 
408 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
409 		struct flow_match_vlan match;
410 
411 		flow_rule_match_vlan(rule, &match);
412 		flow->l2_key.inner_vlan_tci =
413 			cpu_to_be16(VLAN_TCI(match.key->vlan_id,
414 					     match.key->vlan_priority));
415 		flow->l2_mask.inner_vlan_tci =
416 			cpu_to_be16((VLAN_TCI(match.mask->vlan_id,
417 					      match.mask->vlan_priority)));
418 		flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
419 		flow->l2_mask.inner_vlan_tpid = htons(0xffff);
420 		flow->l2_key.num_vlans = 1;
421 	}
422 
423 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
424 		struct flow_match_ipv4_addrs match;
425 
426 		flow_rule_match_ipv4_addrs(rule, &match);
427 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
428 		flow->l3_key.ipv4.daddr.s_addr = match.key->dst;
429 		flow->l3_mask.ipv4.daddr.s_addr = match.mask->dst;
430 		flow->l3_key.ipv4.saddr.s_addr = match.key->src;
431 		flow->l3_mask.ipv4.saddr.s_addr = match.mask->src;
432 	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
433 		struct flow_match_ipv6_addrs match;
434 
435 		flow_rule_match_ipv6_addrs(rule, &match);
436 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
437 		flow->l3_key.ipv6.daddr = match.key->dst;
438 		flow->l3_mask.ipv6.daddr = match.mask->dst;
439 		flow->l3_key.ipv6.saddr = match.key->src;
440 		flow->l3_mask.ipv6.saddr = match.mask->src;
441 	}
442 
443 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
444 		struct flow_match_ports match;
445 
446 		flow_rule_match_ports(rule, &match);
447 		flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
448 		flow->l4_key.ports.dport = match.key->dst;
449 		flow->l4_mask.ports.dport = match.mask->dst;
450 		flow->l4_key.ports.sport = match.key->src;
451 		flow->l4_mask.ports.sport = match.mask->src;
452 	}
453 
454 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
455 		struct flow_match_icmp match;
456 
457 		flow_rule_match_icmp(rule, &match);
458 		flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
459 		flow->l4_key.icmp.type = match.key->type;
460 		flow->l4_key.icmp.code = match.key->code;
461 		flow->l4_mask.icmp.type = match.mask->type;
462 		flow->l4_mask.icmp.code = match.mask->code;
463 	}
464 
465 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
466 		struct flow_match_ipv4_addrs match;
467 
468 		flow_rule_match_enc_ipv4_addrs(rule, &match);
469 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS;
470 		flow->tun_key.u.ipv4.dst = match.key->dst;
471 		flow->tun_mask.u.ipv4.dst = match.mask->dst;
472 		flow->tun_key.u.ipv4.src = match.key->src;
473 		flow->tun_mask.u.ipv4.src = match.mask->src;
474 	} else if (flow_rule_match_key(rule,
475 				      FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
476 		return -EOPNOTSUPP;
477 	}
478 
479 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
480 		struct flow_match_enc_keyid match;
481 
482 		flow_rule_match_enc_keyid(rule, &match);
483 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID;
484 		flow->tun_key.tun_id = key32_to_tunnel_id(match.key->keyid);
485 		flow->tun_mask.tun_id = key32_to_tunnel_id(match.mask->keyid);
486 	}
487 
488 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
489 		struct flow_match_ports match;
490 
491 		flow_rule_match_enc_ports(rule, &match);
492 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS;
493 		flow->tun_key.tp_dst = match.key->dst;
494 		flow->tun_mask.tp_dst = match.mask->dst;
495 		flow->tun_key.tp_src = match.key->src;
496 		flow->tun_mask.tp_src = match.mask->src;
497 	}
498 
499 	return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action,
500 				     tc_flow_cmd->common.extack);
501 }
502 
503 static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
504 				   struct bnxt_tc_flow_node *flow_node)
505 {
506 	struct hwrm_cfa_flow_free_input *req;
507 	int rc;
508 
509 	rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_FREE);
510 	if (!rc) {
511 		if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
512 			req->ext_flow_handle = flow_node->ext_flow_handle;
513 		else
514 			req->flow_handle = flow_node->flow_handle;
515 
516 		rc = hwrm_req_send(bp, req);
517 	}
518 	if (rc)
519 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
520 
521 	return rc;
522 }
523 
524 static int ipv6_mask_len(struct in6_addr *mask)
525 {
526 	int mask_len = 0, i;
527 
528 	for (i = 0; i < 4; i++)
529 		mask_len += inet_mask_len(mask->s6_addr32[i]);
530 
531 	return mask_len;
532 }
533 
534 static bool is_wildcard(void *mask, int len)
535 {
536 	const u8 *p = mask;
537 	int i;
538 
539 	for (i = 0; i < len; i++) {
540 		if (p[i] != 0)
541 			return false;
542 	}
543 	return true;
544 }
545 
546 static bool is_exactmatch(void *mask, int len)
547 {
548 	const u8 *p = mask;
549 	int i;
550 
551 	for (i = 0; i < len; i++)
552 		if (p[i] != 0xff)
553 			return false;
554 
555 	return true;
556 }
557 
558 static bool is_vlan_tci_allowed(__be16  vlan_tci_mask,
559 				__be16  vlan_tci)
560 {
561 	/* VLAN priority must be either exactly zero or fully wildcarded and
562 	 * VLAN id must be exact match.
563 	 */
564 	if (is_vid_exactmatch(vlan_tci_mask) &&
565 	    ((is_vlan_pcp_exactmatch(vlan_tci_mask) &&
566 	      is_vlan_pcp_zero(vlan_tci)) ||
567 	     is_vlan_pcp_wildcarded(vlan_tci_mask)))
568 		return true;
569 
570 	return false;
571 }
572 
573 static bool bits_set(void *key, int len)
574 {
575 	const u8 *p = key;
576 	int i;
577 
578 	for (i = 0; i < len; i++)
579 		if (p[i] != 0)
580 			return true;
581 
582 	return false;
583 }
584 
585 static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
586 				    __le16 ref_flow_handle,
587 				    __le32 tunnel_handle,
588 				    struct bnxt_tc_flow_node *flow_node)
589 {
590 	struct bnxt_tc_actions *actions = &flow->actions;
591 	struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
592 	struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
593 	struct hwrm_cfa_flow_alloc_output *resp;
594 	struct hwrm_cfa_flow_alloc_input *req;
595 	u16 flow_flags = 0, action_flags = 0;
596 	int rc;
597 
598 	rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_ALLOC);
599 	if (rc)
600 		return rc;
601 
602 	req->src_fid = cpu_to_le16(flow->src_fid);
603 	req->ref_flow_handle = ref_flow_handle;
604 
605 	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
606 		memcpy(req->l2_rewrite_dmac, actions->l2_rewrite_dmac,
607 		       ETH_ALEN);
608 		memcpy(req->l2_rewrite_smac, actions->l2_rewrite_smac,
609 		       ETH_ALEN);
610 		action_flags |=
611 			CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
612 	}
613 
614 	if (actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE) {
615 		if (actions->nat.l3_is_ipv4) {
616 			action_flags |=
617 				CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS;
618 
619 			if (actions->nat.src_xlate) {
620 				action_flags |=
621 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
622 				/* L3 source rewrite */
623 				req->nat_ip_address[0] =
624 					actions->nat.l3.ipv4.saddr.s_addr;
625 				/* L4 source port */
626 				if (actions->nat.l4.ports.sport)
627 					req->nat_port =
628 						actions->nat.l4.ports.sport;
629 			} else {
630 				action_flags |=
631 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
632 				/* L3 destination rewrite */
633 				req->nat_ip_address[0] =
634 					actions->nat.l3.ipv4.daddr.s_addr;
635 				/* L4 destination port */
636 				if (actions->nat.l4.ports.dport)
637 					req->nat_port =
638 						actions->nat.l4.ports.dport;
639 			}
640 			netdev_dbg(bp->dev,
641 				   "req->nat_ip_address: %pI4 src_xlate: %d req->nat_port: %x\n",
642 				   req->nat_ip_address, actions->nat.src_xlate,
643 				   req->nat_port);
644 		} else {
645 			if (actions->nat.src_xlate) {
646 				action_flags |=
647 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
648 				/* L3 source rewrite */
649 				memcpy(req->nat_ip_address,
650 				       actions->nat.l3.ipv6.saddr.s6_addr32,
651 				       sizeof(req->nat_ip_address));
652 				/* L4 source port */
653 				if (actions->nat.l4.ports.sport)
654 					req->nat_port =
655 						actions->nat.l4.ports.sport;
656 			} else {
657 				action_flags |=
658 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
659 				/* L3 destination rewrite */
660 				memcpy(req->nat_ip_address,
661 				       actions->nat.l3.ipv6.daddr.s6_addr32,
662 				       sizeof(req->nat_ip_address));
663 				/* L4 destination port */
664 				if (actions->nat.l4.ports.dport)
665 					req->nat_port =
666 						actions->nat.l4.ports.dport;
667 			}
668 			netdev_dbg(bp->dev,
669 				   "req->nat_ip_address: %pI6 src_xlate: %d req->nat_port: %x\n",
670 				   req->nat_ip_address, actions->nat.src_xlate,
671 				   req->nat_port);
672 		}
673 	}
674 
675 	if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
676 	    actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
677 		req->tunnel_handle = tunnel_handle;
678 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL;
679 		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL;
680 	}
681 
682 	req->ethertype = flow->l2_key.ether_type;
683 	req->ip_proto = flow->l4_key.ip_proto;
684 
685 	if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
686 		memcpy(req->dmac, flow->l2_key.dmac, ETH_ALEN);
687 		memcpy(req->smac, flow->l2_key.smac, ETH_ALEN);
688 	}
689 
690 	if (flow->l2_key.num_vlans > 0) {
691 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE;
692 		/* FW expects the inner_vlan_tci value to be set
693 		 * in outer_vlan_tci when num_vlans is 1 (which is
694 		 * always the case in TC.)
695 		 */
696 		req->outer_vlan_tci = flow->l2_key.inner_vlan_tci;
697 	}
698 
699 	/* If all IP and L4 fields are wildcarded then this is an L2 flow */
700 	if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
701 	    is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
702 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
703 	} else {
704 		flow_flags |= flow->l2_key.ether_type == htons(ETH_P_IP) ?
705 				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4 :
706 				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
707 
708 		if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
709 			req->ip_dst[0] = l3_key->ipv4.daddr.s_addr;
710 			req->ip_dst_mask_len =
711 				inet_mask_len(l3_mask->ipv4.daddr.s_addr);
712 			req->ip_src[0] = l3_key->ipv4.saddr.s_addr;
713 			req->ip_src_mask_len =
714 				inet_mask_len(l3_mask->ipv4.saddr.s_addr);
715 		} else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
716 			memcpy(req->ip_dst, l3_key->ipv6.daddr.s6_addr32,
717 			       sizeof(req->ip_dst));
718 			req->ip_dst_mask_len =
719 					ipv6_mask_len(&l3_mask->ipv6.daddr);
720 			memcpy(req->ip_src, l3_key->ipv6.saddr.s6_addr32,
721 			       sizeof(req->ip_src));
722 			req->ip_src_mask_len =
723 					ipv6_mask_len(&l3_mask->ipv6.saddr);
724 		}
725 	}
726 
727 	if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
728 		req->l4_src_port = flow->l4_key.ports.sport;
729 		req->l4_src_port_mask = flow->l4_mask.ports.sport;
730 		req->l4_dst_port = flow->l4_key.ports.dport;
731 		req->l4_dst_port_mask = flow->l4_mask.ports.dport;
732 	} else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
733 		/* l4 ports serve as type/code when ip_proto is ICMP */
734 		req->l4_src_port = htons(flow->l4_key.icmp.type);
735 		req->l4_src_port_mask = htons(flow->l4_mask.icmp.type);
736 		req->l4_dst_port = htons(flow->l4_key.icmp.code);
737 		req->l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
738 	}
739 	req->flags = cpu_to_le16(flow_flags);
740 
741 	if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
742 		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
743 	} else {
744 		if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
745 			action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
746 			req->dst_fid = cpu_to_le16(actions->dst_fid);
747 		}
748 		if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
749 			action_flags |=
750 			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
751 			req->l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
752 			req->l2_rewrite_vlan_tci = actions->push_vlan_tci;
753 			memcpy(&req->l2_rewrite_dmac, &req->dmac, ETH_ALEN);
754 			memcpy(&req->l2_rewrite_smac, &req->smac, ETH_ALEN);
755 		}
756 		if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
757 			action_flags |=
758 			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
759 			/* Rewrite config with tpid = 0 implies vlan pop */
760 			req->l2_rewrite_vlan_tpid = 0;
761 			memcpy(&req->l2_rewrite_dmac, &req->dmac, ETH_ALEN);
762 			memcpy(&req->l2_rewrite_smac, &req->smac, ETH_ALEN);
763 		}
764 	}
765 	req->action_flags = cpu_to_le16(action_flags);
766 
767 	resp = hwrm_req_hold(bp, req);
768 	rc = hwrm_req_send_silent(bp, req);
769 	if (!rc) {
770 		/* CFA_FLOW_ALLOC response interpretation:
771 		 *		    fw with	     fw with
772 		 *		    16-bit	     64-bit
773 		 *		    flow handle      flow handle
774 		 *		    ===========	     ===========
775 		 * flow_handle      flow handle      flow context id
776 		 * ext_flow_handle  INVALID	     flow handle
777 		 * flow_id	    INVALID	     flow counter id
778 		 */
779 		flow_node->flow_handle = resp->flow_handle;
780 		if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
781 			flow_node->ext_flow_handle = resp->ext_flow_handle;
782 			flow_node->flow_id = resp->flow_id;
783 		}
784 	}
785 	hwrm_req_drop(bp, req);
786 	return rc;
787 }
788 
789 static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
790 				       struct bnxt_tc_flow *flow,
791 				       struct bnxt_tc_l2_key *l2_info,
792 				       __le32 ref_decap_handle,
793 				       __le32 *decap_filter_handle)
794 {
795 	struct hwrm_cfa_decap_filter_alloc_output *resp;
796 	struct ip_tunnel_key *tun_key = &flow->tun_key;
797 	struct hwrm_cfa_decap_filter_alloc_input *req;
798 	u32 enables = 0;
799 	int rc;
800 
801 	rc = hwrm_req_init(bp, req, HWRM_CFA_DECAP_FILTER_ALLOC);
802 	if (rc)
803 		goto exit;
804 
805 	req->flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
806 	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE |
807 		   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL;
808 	req->tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
809 	req->ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
810 
811 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) {
812 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID;
813 		/* tunnel_id is wrongly defined in hsi defn. as __le32 */
814 		req->tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
815 	}
816 
817 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
818 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
819 		ether_addr_copy(req->dst_macaddr, l2_info->dmac);
820 	}
821 	if (l2_info->num_vlans) {
822 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
823 		req->t_ivlan_vid = l2_info->inner_vlan_tci;
824 	}
825 
826 	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE;
827 	req->ethertype = htons(ETH_P_IP);
828 
829 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) {
830 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR |
831 			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR |
832 			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE;
833 		req->ip_addr_type =
834 			CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
835 		req->dst_ipaddr[0] = tun_key->u.ipv4.dst;
836 		req->src_ipaddr[0] = tun_key->u.ipv4.src;
837 	}
838 
839 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) {
840 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT;
841 		req->dst_port = tun_key->tp_dst;
842 	}
843 
844 	/* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc
845 	 * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16.
846 	 */
847 	req->l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
848 	req->enables = cpu_to_le32(enables);
849 
850 	resp = hwrm_req_hold(bp, req);
851 	rc = hwrm_req_send_silent(bp, req);
852 	if (!rc)
853 		*decap_filter_handle = resp->decap_filter_id;
854 	hwrm_req_drop(bp, req);
855 exit:
856 	if (rc)
857 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
858 
859 	return rc;
860 }
861 
862 static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
863 				      __le32 decap_filter_handle)
864 {
865 	struct hwrm_cfa_decap_filter_free_input *req;
866 	int rc;
867 
868 	rc = hwrm_req_init(bp, req, HWRM_CFA_DECAP_FILTER_FREE);
869 	if (!rc) {
870 		req->decap_filter_id = decap_filter_handle;
871 		rc = hwrm_req_send(bp, req);
872 	}
873 	if (rc)
874 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
875 
876 	return rc;
877 }
878 
879 static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
880 				       struct ip_tunnel_key *encap_key,
881 				       struct bnxt_tc_l2_key *l2_info,
882 				       __le32 *encap_record_handle)
883 {
884 	struct hwrm_cfa_encap_record_alloc_output *resp;
885 	struct hwrm_cfa_encap_record_alloc_input *req;
886 	struct hwrm_cfa_encap_data_vxlan *encap;
887 	struct hwrm_vxlan_ipv4_hdr *encap_ipv4;
888 	int rc;
889 
890 	rc = hwrm_req_init(bp, req, HWRM_CFA_ENCAP_RECORD_ALLOC);
891 	if (rc)
892 		goto exit;
893 
894 	encap = (struct hwrm_cfa_encap_data_vxlan *)&req->encap_data;
895 	req->encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
896 	ether_addr_copy(encap->dst_mac_addr, l2_info->dmac);
897 	ether_addr_copy(encap->src_mac_addr, l2_info->smac);
898 	if (l2_info->num_vlans) {
899 		encap->num_vlan_tags = l2_info->num_vlans;
900 		encap->ovlan_tci = l2_info->inner_vlan_tci;
901 		encap->ovlan_tpid = l2_info->inner_vlan_tpid;
902 	}
903 
904 	encap_ipv4 = (struct hwrm_vxlan_ipv4_hdr *)encap->l3;
905 	encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT;
906 	encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT;
907 	encap_ipv4->ttl = encap_key->ttl;
908 
909 	encap_ipv4->dest_ip_addr = encap_key->u.ipv4.dst;
910 	encap_ipv4->src_ip_addr = encap_key->u.ipv4.src;
911 	encap_ipv4->protocol = IPPROTO_UDP;
912 
913 	encap->dst_port = encap_key->tp_dst;
914 	encap->vni = tunnel_id_to_key32(encap_key->tun_id);
915 
916 	resp = hwrm_req_hold(bp, req);
917 	rc = hwrm_req_send_silent(bp, req);
918 	if (!rc)
919 		*encap_record_handle = resp->encap_record_id;
920 	hwrm_req_drop(bp, req);
921 exit:
922 	if (rc)
923 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
924 
925 	return rc;
926 }
927 
928 static int hwrm_cfa_encap_record_free(struct bnxt *bp,
929 				      __le32 encap_record_handle)
930 {
931 	struct hwrm_cfa_encap_record_free_input *req;
932 	int rc;
933 
934 	rc = hwrm_req_init(bp, req, HWRM_CFA_ENCAP_RECORD_FREE);
935 	if (!rc) {
936 		req->encap_record_id = encap_record_handle;
937 		rc = hwrm_req_send(bp, req);
938 	}
939 	if (rc)
940 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
941 
942 	return rc;
943 }
944 
945 static int bnxt_tc_put_l2_node(struct bnxt *bp,
946 			       struct bnxt_tc_flow_node *flow_node)
947 {
948 	struct bnxt_tc_l2_node *l2_node = flow_node->l2_node;
949 	struct bnxt_tc_info *tc_info = bp->tc_info;
950 	int rc;
951 
952 	/* remove flow_node from the L2 shared flow list */
953 	list_del(&flow_node->l2_list_node);
954 	if (--l2_node->refcount == 0) {
955 		rc =  rhashtable_remove_fast(&tc_info->l2_table, &l2_node->node,
956 					     tc_info->l2_ht_params);
957 		if (rc)
958 			netdev_err(bp->dev,
959 				   "Error: %s: rhashtable_remove_fast: %d\n",
960 				   __func__, rc);
961 		kfree_rcu(l2_node, rcu);
962 	}
963 	return 0;
964 }
965 
966 static struct bnxt_tc_l2_node *
967 bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
968 		    struct rhashtable_params ht_params,
969 		    struct bnxt_tc_l2_key *l2_key)
970 {
971 	struct bnxt_tc_l2_node *l2_node;
972 	int rc;
973 
974 	l2_node = rhashtable_lookup_fast(l2_table, l2_key, ht_params);
975 	if (!l2_node) {
976 		l2_node = kzalloc(sizeof(*l2_node), GFP_KERNEL);
977 		if (!l2_node) {
978 			rc = -ENOMEM;
979 			return NULL;
980 		}
981 
982 		l2_node->key = *l2_key;
983 		rc = rhashtable_insert_fast(l2_table, &l2_node->node,
984 					    ht_params);
985 		if (rc) {
986 			kfree_rcu(l2_node, rcu);
987 			netdev_err(bp->dev,
988 				   "Error: %s: rhashtable_insert_fast: %d\n",
989 				   __func__, rc);
990 			return NULL;
991 		}
992 		INIT_LIST_HEAD(&l2_node->common_l2_flows);
993 	}
994 	return l2_node;
995 }
996 
997 /* Get the ref_flow_handle for a flow by checking if there are any other
998  * flows that share the same L2 key as this flow.
999  */
1000 static int
1001 bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1002 			    struct bnxt_tc_flow_node *flow_node,
1003 			    __le16 *ref_flow_handle)
1004 {
1005 	struct bnxt_tc_info *tc_info = bp->tc_info;
1006 	struct bnxt_tc_flow_node *ref_flow_node;
1007 	struct bnxt_tc_l2_node *l2_node;
1008 
1009 	l2_node = bnxt_tc_get_l2_node(bp, &tc_info->l2_table,
1010 				      tc_info->l2_ht_params,
1011 				      &flow->l2_key);
1012 	if (!l2_node)
1013 		return -1;
1014 
1015 	/* If any other flow is using this l2_node, use it's flow_handle
1016 	 * as the ref_flow_handle
1017 	 */
1018 	if (l2_node->refcount > 0) {
1019 		ref_flow_node = list_first_entry(&l2_node->common_l2_flows,
1020 						 struct bnxt_tc_flow_node,
1021 						 l2_list_node);
1022 		*ref_flow_handle = ref_flow_node->flow_handle;
1023 	} else {
1024 		*ref_flow_handle = cpu_to_le16(0xffff);
1025 	}
1026 
1027 	/* Insert the l2_node into the flow_node so that subsequent flows
1028 	 * with a matching l2 key can use the flow_handle of this flow
1029 	 * as their ref_flow_handle
1030 	 */
1031 	flow_node->l2_node = l2_node;
1032 	list_add(&flow_node->l2_list_node, &l2_node->common_l2_flows);
1033 	l2_node->refcount++;
1034 	return 0;
1035 }
1036 
1037 /* After the flow parsing is done, this routine is used for checking
1038  * if there are any aspects of the flow that prevent it from being
1039  * offloaded.
1040  */
1041 static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
1042 {
1043 	/* If L4 ports are specified then ip_proto must be TCP or UDP */
1044 	if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
1045 	    (flow->l4_key.ip_proto != IPPROTO_TCP &&
1046 	     flow->l4_key.ip_proto != IPPROTO_UDP)) {
1047 		netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports\n",
1048 			    flow->l4_key.ip_proto);
1049 		return false;
1050 	}
1051 
1052 	/* Currently source/dest MAC cannot be partial wildcard  */
1053 	if (bits_set(&flow->l2_key.smac, sizeof(flow->l2_key.smac)) &&
1054 	    !is_exactmatch(flow->l2_mask.smac, sizeof(flow->l2_mask.smac))) {
1055 		netdev_info(bp->dev, "Wildcard match unsupported for Source MAC\n");
1056 		return false;
1057 	}
1058 	if (bits_set(&flow->l2_key.dmac, sizeof(flow->l2_key.dmac)) &&
1059 	    !is_exactmatch(&flow->l2_mask.dmac, sizeof(flow->l2_mask.dmac))) {
1060 		netdev_info(bp->dev, "Wildcard match unsupported for Dest MAC\n");
1061 		return false;
1062 	}
1063 
1064 	/* Currently VLAN fields cannot be partial wildcard */
1065 	if (bits_set(&flow->l2_key.inner_vlan_tci,
1066 		     sizeof(flow->l2_key.inner_vlan_tci)) &&
1067 	    !is_vlan_tci_allowed(flow->l2_mask.inner_vlan_tci,
1068 				 flow->l2_key.inner_vlan_tci)) {
1069 		netdev_info(bp->dev, "Unsupported VLAN TCI\n");
1070 		return false;
1071 	}
1072 	if (bits_set(&flow->l2_key.inner_vlan_tpid,
1073 		     sizeof(flow->l2_key.inner_vlan_tpid)) &&
1074 	    !is_exactmatch(&flow->l2_mask.inner_vlan_tpid,
1075 			   sizeof(flow->l2_mask.inner_vlan_tpid))) {
1076 		netdev_info(bp->dev, "Wildcard match unsupported for VLAN TPID\n");
1077 		return false;
1078 	}
1079 
1080 	/* Currently Ethertype must be set */
1081 	if (!is_exactmatch(&flow->l2_mask.ether_type,
1082 			   sizeof(flow->l2_mask.ether_type))) {
1083 		netdev_info(bp->dev, "Wildcard match unsupported for Ethertype\n");
1084 		return false;
1085 	}
1086 
1087 	return true;
1088 }
1089 
1090 /* Returns the final refcount of the node on success
1091  * or a -ve error code on failure
1092  */
1093 static int bnxt_tc_put_tunnel_node(struct bnxt *bp,
1094 				   struct rhashtable *tunnel_table,
1095 				   struct rhashtable_params *ht_params,
1096 				   struct bnxt_tc_tunnel_node *tunnel_node)
1097 {
1098 	int rc;
1099 
1100 	if (--tunnel_node->refcount == 0) {
1101 		rc =  rhashtable_remove_fast(tunnel_table, &tunnel_node->node,
1102 					     *ht_params);
1103 		if (rc) {
1104 			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
1105 			rc = -1;
1106 		}
1107 		kfree_rcu(tunnel_node, rcu);
1108 		return rc;
1109 	} else {
1110 		return tunnel_node->refcount;
1111 	}
1112 }
1113 
1114 /* Get (or add) either encap or decap tunnel node from/to the supplied
1115  * hash table.
1116  */
1117 static struct bnxt_tc_tunnel_node *
1118 bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table,
1119 			struct rhashtable_params *ht_params,
1120 			struct ip_tunnel_key *tun_key)
1121 {
1122 	struct bnxt_tc_tunnel_node *tunnel_node;
1123 	int rc;
1124 
1125 	tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params);
1126 	if (!tunnel_node) {
1127 		tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL);
1128 		if (!tunnel_node) {
1129 			rc = -ENOMEM;
1130 			goto err;
1131 		}
1132 
1133 		tunnel_node->key = *tun_key;
1134 		tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE;
1135 		rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node,
1136 					    *ht_params);
1137 		if (rc) {
1138 			kfree_rcu(tunnel_node, rcu);
1139 			goto err;
1140 		}
1141 	}
1142 	tunnel_node->refcount++;
1143 	return tunnel_node;
1144 err:
1145 	netdev_info(bp->dev, "error rc=%d\n", rc);
1146 	return NULL;
1147 }
1148 
1149 static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp,
1150 					struct bnxt_tc_flow *flow,
1151 					struct bnxt_tc_l2_key *l2_key,
1152 					struct bnxt_tc_flow_node *flow_node,
1153 					__le32 *ref_decap_handle)
1154 {
1155 	struct bnxt_tc_info *tc_info = bp->tc_info;
1156 	struct bnxt_tc_flow_node *ref_flow_node;
1157 	struct bnxt_tc_l2_node *decap_l2_node;
1158 
1159 	decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table,
1160 					    tc_info->decap_l2_ht_params,
1161 					    l2_key);
1162 	if (!decap_l2_node)
1163 		return -1;
1164 
1165 	/* If any other flow is using this decap_l2_node, use it's decap_handle
1166 	 * as the ref_decap_handle
1167 	 */
1168 	if (decap_l2_node->refcount > 0) {
1169 		ref_flow_node =
1170 			list_first_entry(&decap_l2_node->common_l2_flows,
1171 					 struct bnxt_tc_flow_node,
1172 					 decap_l2_list_node);
1173 		*ref_decap_handle = ref_flow_node->decap_node->tunnel_handle;
1174 	} else {
1175 		*ref_decap_handle = INVALID_TUNNEL_HANDLE;
1176 	}
1177 
1178 	/* Insert the l2_node into the flow_node so that subsequent flows
1179 	 * with a matching decap l2 key can use the decap_filter_handle of
1180 	 * this flow as their ref_decap_handle
1181 	 */
1182 	flow_node->decap_l2_node = decap_l2_node;
1183 	list_add(&flow_node->decap_l2_list_node,
1184 		 &decap_l2_node->common_l2_flows);
1185 	decap_l2_node->refcount++;
1186 	return 0;
1187 }
1188 
1189 static void bnxt_tc_put_decap_l2_node(struct bnxt *bp,
1190 				      struct bnxt_tc_flow_node *flow_node)
1191 {
1192 	struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node;
1193 	struct bnxt_tc_info *tc_info = bp->tc_info;
1194 	int rc;
1195 
1196 	/* remove flow_node from the decap L2 sharing flow list */
1197 	list_del(&flow_node->decap_l2_list_node);
1198 	if (--decap_l2_node->refcount == 0) {
1199 		rc =  rhashtable_remove_fast(&tc_info->decap_l2_table,
1200 					     &decap_l2_node->node,
1201 					     tc_info->decap_l2_ht_params);
1202 		if (rc)
1203 			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
1204 		kfree_rcu(decap_l2_node, rcu);
1205 	}
1206 }
1207 
1208 static void bnxt_tc_put_decap_handle(struct bnxt *bp,
1209 				     struct bnxt_tc_flow_node *flow_node)
1210 {
1211 	__le32 decap_handle = flow_node->decap_node->tunnel_handle;
1212 	struct bnxt_tc_info *tc_info = bp->tc_info;
1213 	int rc;
1214 
1215 	if (flow_node->decap_l2_node)
1216 		bnxt_tc_put_decap_l2_node(bp, flow_node);
1217 
1218 	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1219 				     &tc_info->decap_ht_params,
1220 				     flow_node->decap_node);
1221 	if (!rc && decap_handle != INVALID_TUNNEL_HANDLE)
1222 		hwrm_cfa_decap_filter_free(bp, decap_handle);
1223 }
1224 
1225 static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
1226 				       struct ip_tunnel_key *tun_key,
1227 				       struct bnxt_tc_l2_key *l2_info)
1228 {
1229 #ifdef CONFIG_INET
1230 	struct net_device *real_dst_dev = bp->dev;
1231 	struct flowi4 flow = { {0} };
1232 	struct net_device *dst_dev;
1233 	struct neighbour *nbr;
1234 	struct rtable *rt;
1235 	int rc;
1236 
1237 	flow.flowi4_proto = IPPROTO_UDP;
1238 	flow.fl4_dport = tun_key->tp_dst;
1239 	flow.daddr = tun_key->u.ipv4.dst;
1240 
1241 	rt = ip_route_output_key(dev_net(real_dst_dev), &flow);
1242 	if (IS_ERR(rt)) {
1243 		netdev_info(bp->dev, "no route to %pI4b\n", &flow.daddr);
1244 		return -EOPNOTSUPP;
1245 	}
1246 
1247 	/* The route must either point to the real_dst_dev or a dst_dev that
1248 	 * uses the real_dst_dev.
1249 	 */
1250 	dst_dev = rt->dst.dev;
1251 	if (is_vlan_dev(dst_dev)) {
1252 #if IS_ENABLED(CONFIG_VLAN_8021Q)
1253 		struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev);
1254 
1255 		if (vlan->real_dev != real_dst_dev) {
1256 			netdev_info(bp->dev,
1257 				    "dst_dev(%s) doesn't use PF-if(%s)\n",
1258 				    netdev_name(dst_dev),
1259 				    netdev_name(real_dst_dev));
1260 			rc = -EOPNOTSUPP;
1261 			goto put_rt;
1262 		}
1263 		l2_info->inner_vlan_tci = htons(vlan->vlan_id);
1264 		l2_info->inner_vlan_tpid = vlan->vlan_proto;
1265 		l2_info->num_vlans = 1;
1266 #endif
1267 	} else if (dst_dev != real_dst_dev) {
1268 		netdev_info(bp->dev,
1269 			    "dst_dev(%s) for %pI4b is not PF-if(%s)\n",
1270 			    netdev_name(dst_dev), &flow.daddr,
1271 			    netdev_name(real_dst_dev));
1272 		rc = -EOPNOTSUPP;
1273 		goto put_rt;
1274 	}
1275 
1276 	nbr = dst_neigh_lookup(&rt->dst, &flow.daddr);
1277 	if (!nbr) {
1278 		netdev_info(bp->dev, "can't lookup neighbor for %pI4b\n",
1279 			    &flow.daddr);
1280 		rc = -EOPNOTSUPP;
1281 		goto put_rt;
1282 	}
1283 
1284 	tun_key->u.ipv4.src = flow.saddr;
1285 	tun_key->ttl = ip4_dst_hoplimit(&rt->dst);
1286 	neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev);
1287 	ether_addr_copy(l2_info->smac, dst_dev->dev_addr);
1288 	neigh_release(nbr);
1289 	ip_rt_put(rt);
1290 
1291 	return 0;
1292 put_rt:
1293 	ip_rt_put(rt);
1294 	return rc;
1295 #else
1296 	return -EOPNOTSUPP;
1297 #endif
1298 }
1299 
1300 static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1301 				    struct bnxt_tc_flow_node *flow_node,
1302 				    __le32 *decap_filter_handle)
1303 {
1304 	struct ip_tunnel_key *decap_key = &flow->tun_key;
1305 	struct bnxt_tc_info *tc_info = bp->tc_info;
1306 	struct bnxt_tc_l2_key l2_info = { {0} };
1307 	struct bnxt_tc_tunnel_node *decap_node;
1308 	struct ip_tunnel_key tun_key = { 0 };
1309 	struct bnxt_tc_l2_key *decap_l2_info;
1310 	__le32 ref_decap_handle;
1311 	int rc;
1312 
1313 	/* Check if there's another flow using the same tunnel decap.
1314 	 * If not, add this tunnel to the table and resolve the other
1315 	 * tunnel header fileds. Ignore src_port in the tunnel_key,
1316 	 * since it is not required for decap filters.
1317 	 */
1318 	decap_key->tp_src = 0;
1319 	decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table,
1320 					     &tc_info->decap_ht_params,
1321 					     decap_key);
1322 	if (!decap_node)
1323 		return -ENOMEM;
1324 
1325 	flow_node->decap_node = decap_node;
1326 
1327 	if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1328 		goto done;
1329 
1330 	/* Resolve the L2 fields for tunnel decap
1331 	 * Resolve the route for remote vtep (saddr) of the decap key
1332 	 * Find it's next-hop mac addrs
1333 	 */
1334 	tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src;
1335 	tun_key.tp_dst = flow->tun_key.tp_dst;
1336 	rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info);
1337 	if (rc)
1338 		goto put_decap;
1339 
1340 	decap_l2_info = &decap_node->l2_info;
1341 	/* decap smac is wildcarded */
1342 	ether_addr_copy(decap_l2_info->dmac, l2_info.smac);
1343 	if (l2_info.num_vlans) {
1344 		decap_l2_info->num_vlans = l2_info.num_vlans;
1345 		decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid;
1346 		decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci;
1347 	}
1348 	flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS;
1349 
1350 	/* For getting a decap_filter_handle we first need to check if
1351 	 * there are any other decap flows that share the same tunnel L2
1352 	 * key and if so, pass that flow's decap_filter_handle as the
1353 	 * ref_decap_handle for this flow.
1354 	 */
1355 	rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node,
1356 					  &ref_decap_handle);
1357 	if (rc)
1358 		goto put_decap;
1359 
1360 	/* Issue the hwrm cmd to allocate a decap filter handle */
1361 	rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info,
1362 					 ref_decap_handle,
1363 					 &decap_node->tunnel_handle);
1364 	if (rc)
1365 		goto put_decap_l2;
1366 
1367 done:
1368 	*decap_filter_handle = decap_node->tunnel_handle;
1369 	return 0;
1370 
1371 put_decap_l2:
1372 	bnxt_tc_put_decap_l2_node(bp, flow_node);
1373 put_decap:
1374 	bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1375 				&tc_info->decap_ht_params,
1376 				flow_node->decap_node);
1377 	return rc;
1378 }
1379 
1380 static void bnxt_tc_put_encap_handle(struct bnxt *bp,
1381 				     struct bnxt_tc_tunnel_node *encap_node)
1382 {
1383 	__le32 encap_handle = encap_node->tunnel_handle;
1384 	struct bnxt_tc_info *tc_info = bp->tc_info;
1385 	int rc;
1386 
1387 	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1388 				     &tc_info->encap_ht_params, encap_node);
1389 	if (!rc && encap_handle != INVALID_TUNNEL_HANDLE)
1390 		hwrm_cfa_encap_record_free(bp, encap_handle);
1391 }
1392 
1393 /* Lookup the tunnel encap table and check if there's an encap_handle
1394  * alloc'd already.
1395  * If not, query L2 info via a route lookup and issue an encap_record_alloc
1396  * cmd to FW.
1397  */
1398 static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1399 				    struct bnxt_tc_flow_node *flow_node,
1400 				    __le32 *encap_handle)
1401 {
1402 	struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key;
1403 	struct bnxt_tc_info *tc_info = bp->tc_info;
1404 	struct bnxt_tc_tunnel_node *encap_node;
1405 	int rc;
1406 
1407 	/* Check if there's another flow using the same tunnel encap.
1408 	 * If not, add this tunnel to the table and resolve the other
1409 	 * tunnel header fileds
1410 	 */
1411 	encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table,
1412 					     &tc_info->encap_ht_params,
1413 					     encap_key);
1414 	if (!encap_node)
1415 		return -ENOMEM;
1416 
1417 	flow_node->encap_node = encap_node;
1418 
1419 	if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1420 		goto done;
1421 
1422 	rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info);
1423 	if (rc)
1424 		goto put_encap;
1425 
1426 	/* Allocate a new tunnel encap record */
1427 	rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info,
1428 					 &encap_node->tunnel_handle);
1429 	if (rc)
1430 		goto put_encap;
1431 
1432 done:
1433 	*encap_handle = encap_node->tunnel_handle;
1434 	return 0;
1435 
1436 put_encap:
1437 	bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1438 				&tc_info->encap_ht_params, encap_node);
1439 	return rc;
1440 }
1441 
1442 static void bnxt_tc_put_tunnel_handle(struct bnxt *bp,
1443 				      struct bnxt_tc_flow *flow,
1444 				      struct bnxt_tc_flow_node *flow_node)
1445 {
1446 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1447 		bnxt_tc_put_decap_handle(bp, flow_node);
1448 	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1449 		bnxt_tc_put_encap_handle(bp, flow_node->encap_node);
1450 }
1451 
1452 static int bnxt_tc_get_tunnel_handle(struct bnxt *bp,
1453 				     struct bnxt_tc_flow *flow,
1454 				     struct bnxt_tc_flow_node *flow_node,
1455 				     __le32 *tunnel_handle)
1456 {
1457 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1458 		return bnxt_tc_get_decap_handle(bp, flow, flow_node,
1459 						tunnel_handle);
1460 	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1461 		return bnxt_tc_get_encap_handle(bp, flow, flow_node,
1462 						tunnel_handle);
1463 	else
1464 		return 0;
1465 }
1466 static int __bnxt_tc_del_flow(struct bnxt *bp,
1467 			      struct bnxt_tc_flow_node *flow_node)
1468 {
1469 	struct bnxt_tc_info *tc_info = bp->tc_info;
1470 	int rc;
1471 
1472 	/* send HWRM cmd to free the flow-id */
1473 	bnxt_hwrm_cfa_flow_free(bp, flow_node);
1474 
1475 	mutex_lock(&tc_info->lock);
1476 
1477 	/* release references to any tunnel encap/decap nodes */
1478 	bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node);
1479 
1480 	/* release reference to l2 node */
1481 	bnxt_tc_put_l2_node(bp, flow_node);
1482 
1483 	mutex_unlock(&tc_info->lock);
1484 
1485 	rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
1486 				    tc_info->flow_ht_params);
1487 	if (rc)
1488 		netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d\n",
1489 			   __func__, rc);
1490 
1491 	kfree_rcu(flow_node, rcu);
1492 	return 0;
1493 }
1494 
1495 static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow,
1496 				 u16 src_fid)
1497 {
1498 	flow->l2_key.dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX;
1499 }
1500 
1501 static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
1502 				u16 src_fid)
1503 {
1504 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1505 		flow->src_fid = bp->pf.fw_fid;
1506 	else
1507 		flow->src_fid = src_fid;
1508 }
1509 
1510 /* Add a new flow or replace an existing flow.
1511  * Notes on locking:
1512  * There are essentially two critical sections here.
1513  * 1. while adding a new flow
1514  *    a) lookup l2-key
1515  *    b) issue HWRM cmd and get flow_handle
1516  *    c) link l2-key with flow
1517  * 2. while deleting a flow
1518  *    a) unlinking l2-key from flow
1519  * A lock is needed to protect these two critical sections.
1520  *
1521  * The hash-tables are already protected by the rhashtable API.
1522  */
1523 static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
1524 			    struct flow_cls_offload *tc_flow_cmd)
1525 {
1526 	struct bnxt_tc_flow_node *new_node, *old_node;
1527 	struct bnxt_tc_info *tc_info = bp->tc_info;
1528 	struct bnxt_tc_flow *flow;
1529 	__le32 tunnel_handle = 0;
1530 	__le16 ref_flow_handle;
1531 	int rc;
1532 
1533 	/* allocate memory for the new flow and it's node */
1534 	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
1535 	if (!new_node) {
1536 		rc = -ENOMEM;
1537 		goto done;
1538 	}
1539 	new_node->cookie = tc_flow_cmd->cookie;
1540 	flow = &new_node->flow;
1541 
1542 	rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
1543 	if (rc)
1544 		goto free_node;
1545 
1546 	bnxt_tc_set_src_fid(bp, flow, src_fid);
1547 	bnxt_tc_set_flow_dir(bp, flow, flow->src_fid);
1548 
1549 	if (!bnxt_tc_can_offload(bp, flow)) {
1550 		rc = -EOPNOTSUPP;
1551 		kfree_rcu(new_node, rcu);
1552 		return rc;
1553 	}
1554 
1555 	/* If a flow exists with the same cookie, delete it */
1556 	old_node = rhashtable_lookup_fast(&tc_info->flow_table,
1557 					  &tc_flow_cmd->cookie,
1558 					  tc_info->flow_ht_params);
1559 	if (old_node)
1560 		__bnxt_tc_del_flow(bp, old_node);
1561 
1562 	/* Check if the L2 part of the flow has been offloaded already.
1563 	 * If so, bump up it's refcnt and get it's reference handle.
1564 	 */
1565 	mutex_lock(&tc_info->lock);
1566 	rc = bnxt_tc_get_ref_flow_handle(bp, flow, new_node, &ref_flow_handle);
1567 	if (rc)
1568 		goto unlock;
1569 
1570 	/* If the flow involves tunnel encap/decap, get tunnel_handle */
1571 	rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle);
1572 	if (rc)
1573 		goto put_l2;
1574 
1575 	/* send HWRM cmd to alloc the flow */
1576 	rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
1577 				      tunnel_handle, new_node);
1578 	if (rc)
1579 		goto put_tunnel;
1580 
1581 	flow->lastused = jiffies;
1582 	spin_lock_init(&flow->stats_lock);
1583 	/* add new flow to flow-table */
1584 	rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node,
1585 				    tc_info->flow_ht_params);
1586 	if (rc)
1587 		goto hwrm_flow_free;
1588 
1589 	mutex_unlock(&tc_info->lock);
1590 	return 0;
1591 
1592 hwrm_flow_free:
1593 	bnxt_hwrm_cfa_flow_free(bp, new_node);
1594 put_tunnel:
1595 	bnxt_tc_put_tunnel_handle(bp, flow, new_node);
1596 put_l2:
1597 	bnxt_tc_put_l2_node(bp, new_node);
1598 unlock:
1599 	mutex_unlock(&tc_info->lock);
1600 free_node:
1601 	kfree_rcu(new_node, rcu);
1602 done:
1603 	netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d\n",
1604 		   __func__, tc_flow_cmd->cookie, rc);
1605 	return rc;
1606 }
1607 
1608 static int bnxt_tc_del_flow(struct bnxt *bp,
1609 			    struct flow_cls_offload *tc_flow_cmd)
1610 {
1611 	struct bnxt_tc_info *tc_info = bp->tc_info;
1612 	struct bnxt_tc_flow_node *flow_node;
1613 
1614 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1615 					   &tc_flow_cmd->cookie,
1616 					   tc_info->flow_ht_params);
1617 	if (!flow_node)
1618 		return -EINVAL;
1619 
1620 	return __bnxt_tc_del_flow(bp, flow_node);
1621 }
1622 
1623 static int bnxt_tc_get_flow_stats(struct bnxt *bp,
1624 				  struct flow_cls_offload *tc_flow_cmd)
1625 {
1626 	struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats;
1627 	struct bnxt_tc_info *tc_info = bp->tc_info;
1628 	struct bnxt_tc_flow_node *flow_node;
1629 	struct bnxt_tc_flow *flow;
1630 	unsigned long lastused;
1631 
1632 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1633 					   &tc_flow_cmd->cookie,
1634 					   tc_info->flow_ht_params);
1635 	if (!flow_node)
1636 		return -1;
1637 
1638 	flow = &flow_node->flow;
1639 	curr_stats = &flow->stats;
1640 	prev_stats = &flow->prev_stats;
1641 
1642 	spin_lock(&flow->stats_lock);
1643 	stats.packets = curr_stats->packets - prev_stats->packets;
1644 	stats.bytes = curr_stats->bytes - prev_stats->bytes;
1645 	*prev_stats = *curr_stats;
1646 	lastused = flow->lastused;
1647 	spin_unlock(&flow->stats_lock);
1648 
1649 	flow_stats_update(&tc_flow_cmd->stats, stats.bytes, stats.packets, 0,
1650 			  lastused, FLOW_ACTION_HW_STATS_DELAYED);
1651 	return 0;
1652 }
1653 
1654 static void bnxt_fill_cfa_stats_req(struct bnxt *bp,
1655 				    struct bnxt_tc_flow_node *flow_node,
1656 				    __le16 *flow_handle, __le32 *flow_id)
1657 {
1658 	u16 handle;
1659 
1660 	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
1661 		*flow_id = flow_node->flow_id;
1662 
1663 		/* If flow_id is used to fetch flow stats then:
1664 		 * 1. lower 12 bits of flow_handle must be set to all 1s.
1665 		 * 2. 15th bit of flow_handle must specify the flow
1666 		 *    direction (TX/RX).
1667 		 */
1668 		if (flow_node->flow.l2_key.dir == BNXT_DIR_RX)
1669 			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX |
1670 				 CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1671 		else
1672 			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1673 
1674 		*flow_handle = cpu_to_le16(handle);
1675 	} else {
1676 		*flow_handle = flow_node->flow_handle;
1677 	}
1678 }
1679 
1680 static int
1681 bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
1682 			     struct bnxt_tc_stats_batch stats_batch[])
1683 {
1684 	struct hwrm_cfa_flow_stats_output *resp;
1685 	struct hwrm_cfa_flow_stats_input *req;
1686 	__le16 *req_flow_handles;
1687 	__le32 *req_flow_ids;
1688 	int rc, i;
1689 
1690 	rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_STATS);
1691 	if (rc)
1692 		goto exit;
1693 
1694 	req_flow_handles = &req->flow_handle_0;
1695 	req_flow_ids = &req->flow_id_0;
1696 
1697 	req->num_flows = cpu_to_le16(num_flows);
1698 	for (i = 0; i < num_flows; i++) {
1699 		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1700 
1701 		bnxt_fill_cfa_stats_req(bp, flow_node,
1702 					&req_flow_handles[i], &req_flow_ids[i]);
1703 	}
1704 
1705 	resp = hwrm_req_hold(bp, req);
1706 	rc = hwrm_req_send(bp, req);
1707 	if (!rc) {
1708 		__le64 *resp_packets;
1709 		__le64 *resp_bytes;
1710 
1711 		resp_packets = &resp->packet_0;
1712 		resp_bytes = &resp->byte_0;
1713 
1714 		for (i = 0; i < num_flows; i++) {
1715 			stats_batch[i].hw_stats.packets =
1716 						le64_to_cpu(resp_packets[i]);
1717 			stats_batch[i].hw_stats.bytes =
1718 						le64_to_cpu(resp_bytes[i]);
1719 		}
1720 	}
1721 	hwrm_req_drop(bp, req);
1722 exit:
1723 	if (rc)
1724 		netdev_info(bp->dev, "error rc=%d\n", rc);
1725 
1726 	return rc;
1727 }
1728 
1729 /* Add val to accum while handling a possible wraparound
1730  * of val. Eventhough val is of type u64, its actual width
1731  * is denoted by mask and will wrap-around beyond that width.
1732  */
1733 static void accumulate_val(u64 *accum, u64 val, u64 mask)
1734 {
1735 #define low_bits(x, mask)		((x) & (mask))
1736 #define high_bits(x, mask)		((x) & ~(mask))
1737 	bool wrapped = val < low_bits(*accum, mask);
1738 
1739 	*accum = high_bits(*accum, mask) + val;
1740 	if (wrapped)
1741 		*accum += (mask + 1);
1742 }
1743 
1744 /* The HW counters' width is much less than 64bits.
1745  * Handle possible wrap-around while updating the stat counters
1746  */
1747 static void bnxt_flow_stats_accum(struct bnxt_tc_info *tc_info,
1748 				  struct bnxt_tc_flow_stats *acc_stats,
1749 				  struct bnxt_tc_flow_stats *hw_stats)
1750 {
1751 	accumulate_val(&acc_stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
1752 	accumulate_val(&acc_stats->packets, hw_stats->packets,
1753 		       tc_info->packets_mask);
1754 }
1755 
1756 static int
1757 bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows,
1758 				struct bnxt_tc_stats_batch stats_batch[])
1759 {
1760 	struct bnxt_tc_info *tc_info = bp->tc_info;
1761 	int rc, i;
1762 
1763 	rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch);
1764 	if (rc)
1765 		return rc;
1766 
1767 	for (i = 0; i < num_flows; i++) {
1768 		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1769 		struct bnxt_tc_flow *flow = &flow_node->flow;
1770 
1771 		spin_lock(&flow->stats_lock);
1772 		bnxt_flow_stats_accum(tc_info, &flow->stats,
1773 				      &stats_batch[i].hw_stats);
1774 		if (flow->stats.packets != flow->prev_stats.packets)
1775 			flow->lastused = jiffies;
1776 		spin_unlock(&flow->stats_lock);
1777 	}
1778 
1779 	return 0;
1780 }
1781 
1782 static int
1783 bnxt_tc_flow_stats_batch_prep(struct bnxt *bp,
1784 			      struct bnxt_tc_stats_batch stats_batch[],
1785 			      int *num_flows)
1786 {
1787 	struct bnxt_tc_info *tc_info = bp->tc_info;
1788 	struct rhashtable_iter *iter = &tc_info->iter;
1789 	void *flow_node;
1790 	int rc, i;
1791 
1792 	rhashtable_walk_start(iter);
1793 
1794 	rc = 0;
1795 	for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) {
1796 		flow_node = rhashtable_walk_next(iter);
1797 		if (IS_ERR(flow_node)) {
1798 			i = 0;
1799 			if (PTR_ERR(flow_node) == -EAGAIN) {
1800 				continue;
1801 			} else {
1802 				rc = PTR_ERR(flow_node);
1803 				goto done;
1804 			}
1805 		}
1806 
1807 		/* No more flows */
1808 		if (!flow_node)
1809 			goto done;
1810 
1811 		stats_batch[i].flow_node = flow_node;
1812 	}
1813 done:
1814 	rhashtable_walk_stop(iter);
1815 	*num_flows = i;
1816 	return rc;
1817 }
1818 
1819 void bnxt_tc_flow_stats_work(struct bnxt *bp)
1820 {
1821 	struct bnxt_tc_info *tc_info = bp->tc_info;
1822 	int num_flows, rc;
1823 
1824 	num_flows = atomic_read(&tc_info->flow_table.nelems);
1825 	if (!num_flows)
1826 		return;
1827 
1828 	rhashtable_walk_enter(&tc_info->flow_table, &tc_info->iter);
1829 
1830 	for (;;) {
1831 		rc = bnxt_tc_flow_stats_batch_prep(bp, tc_info->stats_batch,
1832 						   &num_flows);
1833 		if (rc) {
1834 			if (rc == -EAGAIN)
1835 				continue;
1836 			break;
1837 		}
1838 
1839 		if (!num_flows)
1840 			break;
1841 
1842 		bnxt_tc_flow_stats_batch_update(bp, num_flows,
1843 						tc_info->stats_batch);
1844 	}
1845 
1846 	rhashtable_walk_exit(&tc_info->iter);
1847 }
1848 
1849 int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
1850 			 struct flow_cls_offload *cls_flower)
1851 {
1852 	switch (cls_flower->command) {
1853 	case FLOW_CLS_REPLACE:
1854 		return bnxt_tc_add_flow(bp, src_fid, cls_flower);
1855 	case FLOW_CLS_DESTROY:
1856 		return bnxt_tc_del_flow(bp, cls_flower);
1857 	case FLOW_CLS_STATS:
1858 		return bnxt_tc_get_flow_stats(bp, cls_flower);
1859 	default:
1860 		return -EOPNOTSUPP;
1861 	}
1862 }
1863 
1864 static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type,
1865 				       void *type_data, void *cb_priv)
1866 {
1867 	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1868 	struct flow_cls_offload *flower = type_data;
1869 	struct bnxt *bp = priv->bp;
1870 
1871 	if (!tc_cls_can_offload_and_chain0(bp->dev, type_data))
1872 		return -EOPNOTSUPP;
1873 
1874 	switch (type) {
1875 	case TC_SETUP_CLSFLOWER:
1876 		return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, flower);
1877 	default:
1878 		return -EOPNOTSUPP;
1879 	}
1880 }
1881 
1882 static struct bnxt_flower_indr_block_cb_priv *
1883 bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev)
1884 {
1885 	struct bnxt_flower_indr_block_cb_priv *cb_priv;
1886 
1887 	list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list)
1888 		if (cb_priv->tunnel_netdev == netdev)
1889 			return cb_priv;
1890 
1891 	return NULL;
1892 }
1893 
1894 static void bnxt_tc_setup_indr_rel(void *cb_priv)
1895 {
1896 	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1897 
1898 	list_del(&priv->list);
1899 	kfree(priv);
1900 }
1901 
1902 static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct Qdisc *sch, struct bnxt *bp,
1903 				    struct flow_block_offload *f, void *data,
1904 				    void (*cleanup)(struct flow_block_cb *block_cb))
1905 {
1906 	struct bnxt_flower_indr_block_cb_priv *cb_priv;
1907 	struct flow_block_cb *block_cb;
1908 
1909 	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1910 		return -EOPNOTSUPP;
1911 
1912 	switch (f->command) {
1913 	case FLOW_BLOCK_BIND:
1914 		cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
1915 		if (!cb_priv)
1916 			return -ENOMEM;
1917 
1918 		cb_priv->tunnel_netdev = netdev;
1919 		cb_priv->bp = bp;
1920 		list_add(&cb_priv->list, &bp->tc_indr_block_list);
1921 
1922 		block_cb = flow_indr_block_cb_alloc(bnxt_tc_setup_indr_block_cb,
1923 						    cb_priv, cb_priv,
1924 						    bnxt_tc_setup_indr_rel, f,
1925 						    netdev, sch, data, bp, cleanup);
1926 		if (IS_ERR(block_cb)) {
1927 			list_del(&cb_priv->list);
1928 			kfree(cb_priv);
1929 			return PTR_ERR(block_cb);
1930 		}
1931 
1932 		flow_block_cb_add(block_cb, f);
1933 		list_add_tail(&block_cb->driver_list, &bnxt_block_cb_list);
1934 		break;
1935 	case FLOW_BLOCK_UNBIND:
1936 		cb_priv = bnxt_tc_indr_block_cb_lookup(bp, netdev);
1937 		if (!cb_priv)
1938 			return -ENOENT;
1939 
1940 		block_cb = flow_block_cb_lookup(f->block,
1941 						bnxt_tc_setup_indr_block_cb,
1942 						cb_priv);
1943 		if (!block_cb)
1944 			return -ENOENT;
1945 
1946 		flow_indr_block_cb_remove(block_cb, f);
1947 		list_del(&block_cb->driver_list);
1948 		break;
1949 	default:
1950 		return -EOPNOTSUPP;
1951 	}
1952 	return 0;
1953 }
1954 
1955 static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
1956 {
1957 	return netif_is_vxlan(netdev);
1958 }
1959 
1960 static int bnxt_tc_setup_indr_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
1961 				 enum tc_setup_type type, void *type_data,
1962 				 void *data,
1963 				 void (*cleanup)(struct flow_block_cb *block_cb))
1964 {
1965 	if (!netdev || !bnxt_is_netdev_indr_offload(netdev))
1966 		return -EOPNOTSUPP;
1967 
1968 	switch (type) {
1969 	case TC_SETUP_BLOCK:
1970 		return bnxt_tc_setup_indr_block(netdev, sch, cb_priv, type_data, data, cleanup);
1971 	default:
1972 		break;
1973 	}
1974 
1975 	return -EOPNOTSUPP;
1976 }
1977 
1978 static const struct rhashtable_params bnxt_tc_flow_ht_params = {
1979 	.head_offset = offsetof(struct bnxt_tc_flow_node, node),
1980 	.key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
1981 	.key_len = sizeof(((struct bnxt_tc_flow_node *)0)->cookie),
1982 	.automatic_shrinking = true
1983 };
1984 
1985 static const struct rhashtable_params bnxt_tc_l2_ht_params = {
1986 	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
1987 	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
1988 	.key_len = BNXT_TC_L2_KEY_LEN,
1989 	.automatic_shrinking = true
1990 };
1991 
1992 static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = {
1993 	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
1994 	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
1995 	.key_len = BNXT_TC_L2_KEY_LEN,
1996 	.automatic_shrinking = true
1997 };
1998 
1999 static const struct rhashtable_params bnxt_tc_tunnel_ht_params = {
2000 	.head_offset = offsetof(struct bnxt_tc_tunnel_node, node),
2001 	.key_offset = offsetof(struct bnxt_tc_tunnel_node, key),
2002 	.key_len = sizeof(struct ip_tunnel_key),
2003 	.automatic_shrinking = true
2004 };
2005 
2006 /* convert counter width in bits to a mask */
2007 #define mask(width)		((u64)~0 >> (64 - (width)))
2008 
2009 int bnxt_init_tc(struct bnxt *bp)
2010 {
2011 	struct bnxt_tc_info *tc_info;
2012 	int rc;
2013 
2014 	if (bp->hwrm_spec_code < 0x10803)
2015 		return 0;
2016 
2017 	tc_info = kzalloc(sizeof(*tc_info), GFP_KERNEL);
2018 	if (!tc_info)
2019 		return -ENOMEM;
2020 	mutex_init(&tc_info->lock);
2021 
2022 	/* Counter widths are programmed by FW */
2023 	tc_info->bytes_mask = mask(36);
2024 	tc_info->packets_mask = mask(28);
2025 
2026 	tc_info->flow_ht_params = bnxt_tc_flow_ht_params;
2027 	rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params);
2028 	if (rc)
2029 		goto free_tc_info;
2030 
2031 	tc_info->l2_ht_params = bnxt_tc_l2_ht_params;
2032 	rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params);
2033 	if (rc)
2034 		goto destroy_flow_table;
2035 
2036 	tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params;
2037 	rc = rhashtable_init(&tc_info->decap_l2_table,
2038 			     &tc_info->decap_l2_ht_params);
2039 	if (rc)
2040 		goto destroy_l2_table;
2041 
2042 	tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params;
2043 	rc = rhashtable_init(&tc_info->decap_table,
2044 			     &tc_info->decap_ht_params);
2045 	if (rc)
2046 		goto destroy_decap_l2_table;
2047 
2048 	tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params;
2049 	rc = rhashtable_init(&tc_info->encap_table,
2050 			     &tc_info->encap_ht_params);
2051 	if (rc)
2052 		goto destroy_decap_table;
2053 
2054 	tc_info->enabled = true;
2055 	bp->dev->hw_features |= NETIF_F_HW_TC;
2056 	bp->dev->features |= NETIF_F_HW_TC;
2057 	bp->tc_info = tc_info;
2058 
2059 	/* init indirect block notifications */
2060 	INIT_LIST_HEAD(&bp->tc_indr_block_list);
2061 
2062 	rc = flow_indr_dev_register(bnxt_tc_setup_indr_cb, bp);
2063 	if (!rc)
2064 		return 0;
2065 
2066 	rhashtable_destroy(&tc_info->encap_table);
2067 
2068 destroy_decap_table:
2069 	rhashtable_destroy(&tc_info->decap_table);
2070 destroy_decap_l2_table:
2071 	rhashtable_destroy(&tc_info->decap_l2_table);
2072 destroy_l2_table:
2073 	rhashtable_destroy(&tc_info->l2_table);
2074 destroy_flow_table:
2075 	rhashtable_destroy(&tc_info->flow_table);
2076 free_tc_info:
2077 	kfree(tc_info);
2078 	return rc;
2079 }
2080 
2081 void bnxt_shutdown_tc(struct bnxt *bp)
2082 {
2083 	struct bnxt_tc_info *tc_info = bp->tc_info;
2084 
2085 	if (!bnxt_tc_flower_enabled(bp))
2086 		return;
2087 
2088 	flow_indr_dev_unregister(bnxt_tc_setup_indr_cb, bp,
2089 				 bnxt_tc_setup_indr_rel);
2090 	rhashtable_destroy(&tc_info->flow_table);
2091 	rhashtable_destroy(&tc_info->l2_table);
2092 	rhashtable_destroy(&tc_info->decap_l2_table);
2093 	rhashtable_destroy(&tc_info->decap_table);
2094 	rhashtable_destroy(&tc_info->encap_table);
2095 	kfree(tc_info);
2096 	bp->tc_info = NULL;
2097 }
2098