xref: /linux/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c (revision 3ff78451b8e446e9a548b98a0d4dd8d24dc5780b)
1 /* Broadcom NetXtreme-C/E network driver.
2  *
3  * Copyright (c) 2017 Broadcom Limited
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/netdevice.h>
11 #include <linux/inetdevice.h>
12 #include <linux/if_vlan.h>
13 #include <net/flow_dissector.h>
14 #include <net/pkt_cls.h>
15 #include <net/tc_act/tc_gact.h>
16 #include <net/tc_act/tc_skbedit.h>
17 #include <net/tc_act/tc_mirred.h>
18 #include <net/tc_act/tc_vlan.h>
19 #include <net/tc_act/tc_pedit.h>
20 #include <net/tc_act/tc_tunnel_key.h>
21 #include <net/vxlan.h>
22 
23 #include "bnxt_hsi.h"
24 #include "bnxt.h"
25 #include "bnxt_hwrm.h"
26 #include "bnxt_sriov.h"
27 #include "bnxt_tc.h"
28 #include "bnxt_vfr.h"
29 
30 #define BNXT_FID_INVALID			0xffff
31 #define VLAN_TCI(vid, prio)	((vid) | ((prio) << VLAN_PRIO_SHIFT))
32 
33 #define is_vlan_pcp_wildcarded(vlan_tci_mask)	\
34 	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == 0x0000)
35 #define is_vlan_pcp_exactmatch(vlan_tci_mask)	\
36 	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == VLAN_PRIO_MASK)
37 #define is_vlan_pcp_zero(vlan_tci)	\
38 	((ntohs(vlan_tci) & VLAN_PRIO_MASK) == 0x0000)
39 #define is_vid_exactmatch(vlan_tci_mask)	\
40 	((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK)
41 
42 static bool is_wildcard(void *mask, int len);
43 static bool is_exactmatch(void *mask, int len);
44 /* Return the dst fid of the func for flow forwarding
45  * For PFs: src_fid is the fid of the PF
46  * For VF-reps: src_fid the fid of the VF
47  */
48 static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
49 {
50 	struct bnxt *bp;
51 
52 	/* check if dev belongs to the same switch */
53 	if (!netdev_port_same_parent_id(pf_bp->dev, dev)) {
54 		netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch\n",
55 			    dev->ifindex);
56 		return BNXT_FID_INVALID;
57 	}
58 
59 	/* Is dev a VF-rep? */
60 	if (bnxt_dev_is_vf_rep(dev))
61 		return bnxt_vf_rep_get_fid(dev);
62 
63 	bp = netdev_priv(dev);
64 	return bp->pf.fw_fid;
65 }
66 
67 static int bnxt_tc_parse_redir(struct bnxt *bp,
68 			       struct bnxt_tc_actions *actions,
69 			       const struct flow_action_entry *act)
70 {
71 	struct net_device *dev = act->dev;
72 
73 	if (!dev) {
74 		netdev_info(bp->dev, "no dev in mirred action\n");
75 		return -EINVAL;
76 	}
77 
78 	actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
79 	actions->dst_dev = dev;
80 	return 0;
81 }
82 
83 static int bnxt_tc_parse_vlan(struct bnxt *bp,
84 			      struct bnxt_tc_actions *actions,
85 			      const struct flow_action_entry *act)
86 {
87 	switch (act->id) {
88 	case FLOW_ACTION_VLAN_POP:
89 		actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
90 		break;
91 	case FLOW_ACTION_VLAN_PUSH:
92 		actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
93 		actions->push_vlan_tci = htons(act->vlan.vid);
94 		actions->push_vlan_tpid = act->vlan.proto;
95 		break;
96 	default:
97 		return -EOPNOTSUPP;
98 	}
99 	return 0;
100 }
101 
102 static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
103 				    struct bnxt_tc_actions *actions,
104 				    const struct flow_action_entry *act)
105 {
106 	const struct ip_tunnel_info *tun_info = act->tunnel;
107 	const struct ip_tunnel_key *tun_key = &tun_info->key;
108 
109 	if (ip_tunnel_info_af(tun_info) != AF_INET) {
110 		netdev_info(bp->dev, "only IPv4 tunnel-encap is supported\n");
111 		return -EOPNOTSUPP;
112 	}
113 
114 	actions->tun_encap_key = *tun_key;
115 	actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP;
116 	return 0;
117 }
118 
119 /* Key & Mask from the stack comes unaligned in multiple iterations of 4 bytes
120  * each(u32).
121  * This routine consolidates such multiple unaligned values into one
122  * field each for Key & Mask (for src and dst macs separately)
123  * For example,
124  *			Mask/Key	Offset	Iteration
125  *			==========	======	=========
126  *	dst mac		0xffffffff	0	1
127  *	dst mac		0x0000ffff	4	2
128  *
129  *	src mac		0xffff0000	4	1
130  *	src mac		0xffffffff	8	2
131  *
132  * The above combination coming from the stack will be consolidated as
133  *			Mask/Key
134  *			==============
135  *	src mac:	0xffffffffffff
136  *	dst mac:	0xffffffffffff
137  */
138 static void bnxt_set_l2_key_mask(u32 part_key, u32 part_mask,
139 				 u8 *actual_key, u8 *actual_mask)
140 {
141 	u32 key = get_unaligned((u32 *)actual_key);
142 	u32 mask = get_unaligned((u32 *)actual_mask);
143 
144 	part_key &= part_mask;
145 	part_key |= key & ~part_mask;
146 
147 	put_unaligned(mask | part_mask, (u32 *)actual_mask);
148 	put_unaligned(part_key, (u32 *)actual_key);
149 }
150 
151 static int
152 bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions,
153 			    u16 *eth_addr, u16 *eth_addr_mask)
154 {
155 	u16 *p;
156 	int j;
157 
158 	if (unlikely(bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)))
159 		return -EINVAL;
160 
161 	if (!is_wildcard(&eth_addr_mask[0], ETH_ALEN)) {
162 		if (!is_exactmatch(&eth_addr_mask[0], ETH_ALEN))
163 			return -EINVAL;
164 		/* FW expects dmac to be in u16 array format */
165 		p = eth_addr;
166 		for (j = 0; j < 3; j++)
167 			actions->l2_rewrite_dmac[j] = cpu_to_be16(*(p + j));
168 	}
169 
170 	if (!is_wildcard(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN)) {
171 		if (!is_exactmatch(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN))
172 			return -EINVAL;
173 		/* FW expects smac to be in u16 array format */
174 		p = &eth_addr[ETH_ALEN / 2];
175 		for (j = 0; j < 3; j++)
176 			actions->l2_rewrite_smac[j] = cpu_to_be16(*(p + j));
177 	}
178 
179 	return 0;
180 }
181 
182 static int
183 bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions,
184 		    struct flow_action_entry *act, int act_idx, u8 *eth_addr,
185 		    u8 *eth_addr_mask)
186 {
187 	size_t offset_of_ip6_daddr = offsetof(struct ipv6hdr, daddr);
188 	size_t offset_of_ip6_saddr = offsetof(struct ipv6hdr, saddr);
189 	u32 mask, val, offset, idx;
190 	u8 htype;
191 
192 	offset = act->mangle.offset;
193 	htype = act->mangle.htype;
194 	mask = ~act->mangle.mask;
195 	val = act->mangle.val;
196 
197 	switch (htype) {
198 	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
199 		if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) {
200 			netdev_err(bp->dev,
201 				   "%s: eth_hdr: Invalid pedit field\n",
202 				   __func__);
203 			return -EINVAL;
204 		}
205 		actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE;
206 
207 		bnxt_set_l2_key_mask(val, mask, &eth_addr[offset],
208 				     &eth_addr_mask[offset]);
209 		break;
210 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
211 		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
212 		actions->nat.l3_is_ipv4 = true;
213 		if (offset ==  offsetof(struct iphdr, saddr)) {
214 			actions->nat.src_xlate = true;
215 			actions->nat.l3.ipv4.saddr.s_addr = htonl(val);
216 		} else if (offset ==  offsetof(struct iphdr, daddr)) {
217 			actions->nat.src_xlate = false;
218 			actions->nat.l3.ipv4.daddr.s_addr = htonl(val);
219 		} else {
220 			netdev_err(bp->dev,
221 				   "%s: IPv4_hdr: Invalid pedit field\n",
222 				   __func__);
223 			return -EINVAL;
224 		}
225 
226 		netdev_dbg(bp->dev, "nat.src_xlate = %d src IP: %pI4 dst ip : %pI4\n",
227 			   actions->nat.src_xlate, &actions->nat.l3.ipv4.saddr,
228 			   &actions->nat.l3.ipv4.daddr);
229 		break;
230 
231 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
232 		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
233 		actions->nat.l3_is_ipv4 = false;
234 		if (offset >= offsetof(struct ipv6hdr, saddr) &&
235 		    offset < offset_of_ip6_daddr) {
236 			/* 16 byte IPv6 address comes in 4 iterations of
237 			 * 4byte chunks each
238 			 */
239 			actions->nat.src_xlate = true;
240 			idx = (offset - offset_of_ip6_saddr) / 4;
241 			/* First 4bytes will be copied to idx 0 and so on */
242 			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
243 		} else if (offset >= offset_of_ip6_daddr &&
244 			   offset < offset_of_ip6_daddr + 16) {
245 			actions->nat.src_xlate = false;
246 			idx = (offset - offset_of_ip6_daddr) / 4;
247 			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
248 		} else {
249 			netdev_err(bp->dev,
250 				   "%s: IPv6_hdr: Invalid pedit field\n",
251 				   __func__);
252 			return -EINVAL;
253 		}
254 		break;
255 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
256 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
257 		/* HW does not support L4 rewrite alone without L3
258 		 * rewrite
259 		 */
260 		if (!(actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE)) {
261 			netdev_err(bp->dev,
262 				   "Need to specify L3 rewrite as well\n");
263 			return -EINVAL;
264 		}
265 		if (actions->nat.src_xlate)
266 			actions->nat.l4.ports.sport = htons(val);
267 		else
268 			actions->nat.l4.ports.dport = htons(val);
269 		netdev_dbg(bp->dev, "actions->nat.sport = %d dport = %d\n",
270 			   actions->nat.l4.ports.sport,
271 			   actions->nat.l4.ports.dport);
272 		break;
273 	default:
274 		netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n",
275 			   __func__);
276 		return -EINVAL;
277 	}
278 	return 0;
279 }
280 
281 static int bnxt_tc_parse_actions(struct bnxt *bp,
282 				 struct bnxt_tc_actions *actions,
283 				 struct flow_action *flow_action,
284 				 struct netlink_ext_ack *extack)
285 {
286 	/* Used to store the L2 rewrite mask for dmac (6 bytes) followed by
287 	 * smac (6 bytes) if rewrite of both is specified, otherwise either
288 	 * dmac or smac
289 	 */
290 	u16 eth_addr_mask[ETH_ALEN] = { 0 };
291 	/* Used to store the L2 rewrite key for dmac (6 bytes) followed by
292 	 * smac (6 bytes) if rewrite of both is specified, otherwise either
293 	 * dmac or smac
294 	 */
295 	u16 eth_addr[ETH_ALEN] = { 0 };
296 	struct flow_action_entry *act;
297 	int i, rc;
298 
299 	if (!flow_action_has_entries(flow_action)) {
300 		netdev_info(bp->dev, "no actions\n");
301 		return -EINVAL;
302 	}
303 
304 	if (!flow_action_basic_hw_stats_check(flow_action, extack))
305 		return -EOPNOTSUPP;
306 
307 	flow_action_for_each(i, act, flow_action) {
308 		switch (act->id) {
309 		case FLOW_ACTION_DROP:
310 			actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
311 			return 0; /* don't bother with other actions */
312 		case FLOW_ACTION_REDIRECT:
313 			rc = bnxt_tc_parse_redir(bp, actions, act);
314 			if (rc)
315 				return rc;
316 			break;
317 		case FLOW_ACTION_VLAN_POP:
318 		case FLOW_ACTION_VLAN_PUSH:
319 		case FLOW_ACTION_VLAN_MANGLE:
320 			rc = bnxt_tc_parse_vlan(bp, actions, act);
321 			if (rc)
322 				return rc;
323 			break;
324 		case FLOW_ACTION_TUNNEL_ENCAP:
325 			rc = bnxt_tc_parse_tunnel_set(bp, actions, act);
326 			if (rc)
327 				return rc;
328 			break;
329 		case FLOW_ACTION_TUNNEL_DECAP:
330 			actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
331 			break;
332 		/* Packet edit: L2 rewrite, NAT, NAPT */
333 		case FLOW_ACTION_MANGLE:
334 			rc = bnxt_tc_parse_pedit(bp, actions, act, i,
335 						 (u8 *)eth_addr,
336 						 (u8 *)eth_addr_mask);
337 			if (rc)
338 				return rc;
339 			break;
340 		default:
341 			break;
342 		}
343 	}
344 
345 	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
346 		rc = bnxt_fill_l2_rewrite_fields(actions, eth_addr,
347 						 eth_addr_mask);
348 		if (rc)
349 			return rc;
350 	}
351 
352 	if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
353 		if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
354 			/* dst_fid is PF's fid */
355 			actions->dst_fid = bp->pf.fw_fid;
356 		} else {
357 			/* find the FID from dst_dev */
358 			actions->dst_fid =
359 				bnxt_flow_get_dst_fid(bp, actions->dst_dev);
360 			if (actions->dst_fid == BNXT_FID_INVALID)
361 				return -EINVAL;
362 		}
363 	}
364 
365 	return 0;
366 }
367 
368 static int bnxt_tc_parse_flow(struct bnxt *bp,
369 			      struct flow_cls_offload *tc_flow_cmd,
370 			      struct bnxt_tc_flow *flow)
371 {
372 	struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd);
373 	struct netlink_ext_ack *extack = tc_flow_cmd->common.extack;
374 	struct flow_dissector *dissector = rule->match.dissector;
375 
376 	/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
377 	if ((dissector->used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
378 	    (dissector->used_keys & BIT_ULL(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
379 		netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%llx\n",
380 			    dissector->used_keys);
381 		return -EOPNOTSUPP;
382 	}
383 
384 	if (flow_rule_match_has_control_flags(rule, extack))
385 		return -EOPNOTSUPP;
386 
387 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
388 		struct flow_match_basic match;
389 
390 		flow_rule_match_basic(rule, &match);
391 		flow->l2_key.ether_type = match.key->n_proto;
392 		flow->l2_mask.ether_type = match.mask->n_proto;
393 
394 		if (match.key->n_proto == htons(ETH_P_IP) ||
395 		    match.key->n_proto == htons(ETH_P_IPV6)) {
396 			flow->l4_key.ip_proto = match.key->ip_proto;
397 			flow->l4_mask.ip_proto = match.mask->ip_proto;
398 		}
399 	}
400 
401 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
402 		struct flow_match_eth_addrs match;
403 
404 		flow_rule_match_eth_addrs(rule, &match);
405 		flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
406 		ether_addr_copy(flow->l2_key.dmac, match.key->dst);
407 		ether_addr_copy(flow->l2_mask.dmac, match.mask->dst);
408 		ether_addr_copy(flow->l2_key.smac, match.key->src);
409 		ether_addr_copy(flow->l2_mask.smac, match.mask->src);
410 	}
411 
412 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
413 		struct flow_match_vlan match;
414 
415 		flow_rule_match_vlan(rule, &match);
416 		flow->l2_key.inner_vlan_tci =
417 			cpu_to_be16(VLAN_TCI(match.key->vlan_id,
418 					     match.key->vlan_priority));
419 		flow->l2_mask.inner_vlan_tci =
420 			cpu_to_be16((VLAN_TCI(match.mask->vlan_id,
421 					      match.mask->vlan_priority)));
422 		flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
423 		flow->l2_mask.inner_vlan_tpid = htons(0xffff);
424 		flow->l2_key.num_vlans = 1;
425 	}
426 
427 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
428 		struct flow_match_ipv4_addrs match;
429 
430 		flow_rule_match_ipv4_addrs(rule, &match);
431 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
432 		flow->l3_key.ipv4.daddr.s_addr = match.key->dst;
433 		flow->l3_mask.ipv4.daddr.s_addr = match.mask->dst;
434 		flow->l3_key.ipv4.saddr.s_addr = match.key->src;
435 		flow->l3_mask.ipv4.saddr.s_addr = match.mask->src;
436 	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
437 		struct flow_match_ipv6_addrs match;
438 
439 		flow_rule_match_ipv6_addrs(rule, &match);
440 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
441 		flow->l3_key.ipv6.daddr = match.key->dst;
442 		flow->l3_mask.ipv6.daddr = match.mask->dst;
443 		flow->l3_key.ipv6.saddr = match.key->src;
444 		flow->l3_mask.ipv6.saddr = match.mask->src;
445 	}
446 
447 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
448 		struct flow_match_ports match;
449 
450 		flow_rule_match_ports(rule, &match);
451 		flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
452 		flow->l4_key.ports.dport = match.key->dst;
453 		flow->l4_mask.ports.dport = match.mask->dst;
454 		flow->l4_key.ports.sport = match.key->src;
455 		flow->l4_mask.ports.sport = match.mask->src;
456 	}
457 
458 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
459 		struct flow_match_icmp match;
460 
461 		flow_rule_match_icmp(rule, &match);
462 		flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
463 		flow->l4_key.icmp.type = match.key->type;
464 		flow->l4_key.icmp.code = match.key->code;
465 		flow->l4_mask.icmp.type = match.mask->type;
466 		flow->l4_mask.icmp.code = match.mask->code;
467 	}
468 
469 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
470 		struct flow_match_ipv4_addrs match;
471 
472 		flow_rule_match_enc_ipv4_addrs(rule, &match);
473 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS;
474 		flow->tun_key.u.ipv4.dst = match.key->dst;
475 		flow->tun_mask.u.ipv4.dst = match.mask->dst;
476 		flow->tun_key.u.ipv4.src = match.key->src;
477 		flow->tun_mask.u.ipv4.src = match.mask->src;
478 	} else if (flow_rule_match_key(rule,
479 				      FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
480 		return -EOPNOTSUPP;
481 	}
482 
483 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
484 		struct flow_match_enc_keyid match;
485 
486 		flow_rule_match_enc_keyid(rule, &match);
487 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID;
488 		flow->tun_key.tun_id = key32_to_tunnel_id(match.key->keyid);
489 		flow->tun_mask.tun_id = key32_to_tunnel_id(match.mask->keyid);
490 	}
491 
492 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
493 		struct flow_match_ports match;
494 
495 		flow_rule_match_enc_ports(rule, &match);
496 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS;
497 		flow->tun_key.tp_dst = match.key->dst;
498 		flow->tun_mask.tp_dst = match.mask->dst;
499 		flow->tun_key.tp_src = match.key->src;
500 		flow->tun_mask.tp_src = match.mask->src;
501 	}
502 
503 	return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action,
504 				     tc_flow_cmd->common.extack);
505 }
506 
507 static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
508 				   struct bnxt_tc_flow_node *flow_node)
509 {
510 	struct hwrm_cfa_flow_free_input *req;
511 	int rc;
512 
513 	rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_FREE);
514 	if (!rc) {
515 		if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
516 			req->ext_flow_handle = flow_node->ext_flow_handle;
517 		else
518 			req->flow_handle = flow_node->flow_handle;
519 
520 		rc = hwrm_req_send(bp, req);
521 	}
522 	if (rc)
523 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
524 
525 	return rc;
526 }
527 
528 static int ipv6_mask_len(struct in6_addr *mask)
529 {
530 	int mask_len = 0, i;
531 
532 	for (i = 0; i < 4; i++)
533 		mask_len += inet_mask_len(mask->s6_addr32[i]);
534 
535 	return mask_len;
536 }
537 
538 static bool is_wildcard(void *mask, int len)
539 {
540 	const u8 *p = mask;
541 	int i;
542 
543 	for (i = 0; i < len; i++) {
544 		if (p[i] != 0)
545 			return false;
546 	}
547 	return true;
548 }
549 
550 static bool is_exactmatch(void *mask, int len)
551 {
552 	const u8 *p = mask;
553 	int i;
554 
555 	for (i = 0; i < len; i++)
556 		if (p[i] != 0xff)
557 			return false;
558 
559 	return true;
560 }
561 
562 static bool is_vlan_tci_allowed(__be16  vlan_tci_mask,
563 				__be16  vlan_tci)
564 {
565 	/* VLAN priority must be either exactly zero or fully wildcarded and
566 	 * VLAN id must be exact match.
567 	 */
568 	if (is_vid_exactmatch(vlan_tci_mask) &&
569 	    ((is_vlan_pcp_exactmatch(vlan_tci_mask) &&
570 	      is_vlan_pcp_zero(vlan_tci)) ||
571 	     is_vlan_pcp_wildcarded(vlan_tci_mask)))
572 		return true;
573 
574 	return false;
575 }
576 
577 static bool bits_set(void *key, int len)
578 {
579 	const u8 *p = key;
580 	int i;
581 
582 	for (i = 0; i < len; i++)
583 		if (p[i] != 0)
584 			return true;
585 
586 	return false;
587 }
588 
589 static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
590 				    __le16 ref_flow_handle,
591 				    __le32 tunnel_handle,
592 				    struct bnxt_tc_flow_node *flow_node)
593 {
594 	struct bnxt_tc_actions *actions = &flow->actions;
595 	struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
596 	struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
597 	struct hwrm_cfa_flow_alloc_output *resp;
598 	struct hwrm_cfa_flow_alloc_input *req;
599 	u16 flow_flags = 0, action_flags = 0;
600 	int rc;
601 
602 	rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_ALLOC);
603 	if (rc)
604 		return rc;
605 
606 	req->src_fid = cpu_to_le16(flow->src_fid);
607 	req->ref_flow_handle = ref_flow_handle;
608 
609 	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
610 		memcpy(req->l2_rewrite_dmac, actions->l2_rewrite_dmac,
611 		       ETH_ALEN);
612 		memcpy(req->l2_rewrite_smac, actions->l2_rewrite_smac,
613 		       ETH_ALEN);
614 		action_flags |=
615 			CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
616 	}
617 
618 	if (actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE) {
619 		if (actions->nat.l3_is_ipv4) {
620 			action_flags |=
621 				CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS;
622 
623 			if (actions->nat.src_xlate) {
624 				action_flags |=
625 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
626 				/* L3 source rewrite */
627 				req->nat_ip_address[0] =
628 					actions->nat.l3.ipv4.saddr.s_addr;
629 				/* L4 source port */
630 				if (actions->nat.l4.ports.sport)
631 					req->nat_port =
632 						actions->nat.l4.ports.sport;
633 			} else {
634 				action_flags |=
635 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
636 				/* L3 destination rewrite */
637 				req->nat_ip_address[0] =
638 					actions->nat.l3.ipv4.daddr.s_addr;
639 				/* L4 destination port */
640 				if (actions->nat.l4.ports.dport)
641 					req->nat_port =
642 						actions->nat.l4.ports.dport;
643 			}
644 			netdev_dbg(bp->dev,
645 				   "req->nat_ip_address: %pI4 src_xlate: %d req->nat_port: %x\n",
646 				   req->nat_ip_address, actions->nat.src_xlate,
647 				   req->nat_port);
648 		} else {
649 			if (actions->nat.src_xlate) {
650 				action_flags |=
651 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
652 				/* L3 source rewrite */
653 				memcpy(req->nat_ip_address,
654 				       actions->nat.l3.ipv6.saddr.s6_addr32,
655 				       sizeof(req->nat_ip_address));
656 				/* L4 source port */
657 				if (actions->nat.l4.ports.sport)
658 					req->nat_port =
659 						actions->nat.l4.ports.sport;
660 			} else {
661 				action_flags |=
662 					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
663 				/* L3 destination rewrite */
664 				memcpy(req->nat_ip_address,
665 				       actions->nat.l3.ipv6.daddr.s6_addr32,
666 				       sizeof(req->nat_ip_address));
667 				/* L4 destination port */
668 				if (actions->nat.l4.ports.dport)
669 					req->nat_port =
670 						actions->nat.l4.ports.dport;
671 			}
672 			netdev_dbg(bp->dev,
673 				   "req->nat_ip_address: %pI6 src_xlate: %d req->nat_port: %x\n",
674 				   req->nat_ip_address, actions->nat.src_xlate,
675 				   req->nat_port);
676 		}
677 	}
678 
679 	if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
680 	    actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
681 		req->tunnel_handle = tunnel_handle;
682 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL;
683 		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL;
684 	}
685 
686 	req->ethertype = flow->l2_key.ether_type;
687 	req->ip_proto = flow->l4_key.ip_proto;
688 
689 	if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
690 		memcpy(req->dmac, flow->l2_key.dmac, ETH_ALEN);
691 		memcpy(req->smac, flow->l2_key.smac, ETH_ALEN);
692 	}
693 
694 	if (flow->l2_key.num_vlans > 0) {
695 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE;
696 		/* FW expects the inner_vlan_tci value to be set
697 		 * in outer_vlan_tci when num_vlans is 1 (which is
698 		 * always the case in TC.)
699 		 */
700 		req->outer_vlan_tci = flow->l2_key.inner_vlan_tci;
701 	}
702 
703 	/* If all IP and L4 fields are wildcarded then this is an L2 flow */
704 	if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
705 	    is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
706 		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
707 	} else {
708 		flow_flags |= flow->l2_key.ether_type == htons(ETH_P_IP) ?
709 				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4 :
710 				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
711 
712 		if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
713 			req->ip_dst[0] = l3_key->ipv4.daddr.s_addr;
714 			req->ip_dst_mask_len =
715 				inet_mask_len(l3_mask->ipv4.daddr.s_addr);
716 			req->ip_src[0] = l3_key->ipv4.saddr.s_addr;
717 			req->ip_src_mask_len =
718 				inet_mask_len(l3_mask->ipv4.saddr.s_addr);
719 		} else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
720 			memcpy(req->ip_dst, l3_key->ipv6.daddr.s6_addr32,
721 			       sizeof(req->ip_dst));
722 			req->ip_dst_mask_len =
723 					ipv6_mask_len(&l3_mask->ipv6.daddr);
724 			memcpy(req->ip_src, l3_key->ipv6.saddr.s6_addr32,
725 			       sizeof(req->ip_src));
726 			req->ip_src_mask_len =
727 					ipv6_mask_len(&l3_mask->ipv6.saddr);
728 		}
729 	}
730 
731 	if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
732 		req->l4_src_port = flow->l4_key.ports.sport;
733 		req->l4_src_port_mask = flow->l4_mask.ports.sport;
734 		req->l4_dst_port = flow->l4_key.ports.dport;
735 		req->l4_dst_port_mask = flow->l4_mask.ports.dport;
736 	} else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
737 		/* l4 ports serve as type/code when ip_proto is ICMP */
738 		req->l4_src_port = htons(flow->l4_key.icmp.type);
739 		req->l4_src_port_mask = htons(flow->l4_mask.icmp.type);
740 		req->l4_dst_port = htons(flow->l4_key.icmp.code);
741 		req->l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
742 	}
743 	req->flags = cpu_to_le16(flow_flags);
744 
745 	if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
746 		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
747 	} else {
748 		if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
749 			action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
750 			req->dst_fid = cpu_to_le16(actions->dst_fid);
751 		}
752 		if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
753 			action_flags |=
754 			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
755 			req->l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
756 			req->l2_rewrite_vlan_tci = actions->push_vlan_tci;
757 			memcpy(&req->l2_rewrite_dmac, &req->dmac, ETH_ALEN);
758 			memcpy(&req->l2_rewrite_smac, &req->smac, ETH_ALEN);
759 		}
760 		if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
761 			action_flags |=
762 			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
763 			/* Rewrite config with tpid = 0 implies vlan pop */
764 			req->l2_rewrite_vlan_tpid = 0;
765 			memcpy(&req->l2_rewrite_dmac, &req->dmac, ETH_ALEN);
766 			memcpy(&req->l2_rewrite_smac, &req->smac, ETH_ALEN);
767 		}
768 	}
769 	req->action_flags = cpu_to_le16(action_flags);
770 
771 	resp = hwrm_req_hold(bp, req);
772 	rc = hwrm_req_send_silent(bp, req);
773 	if (!rc) {
774 		/* CFA_FLOW_ALLOC response interpretation:
775 		 *		    fw with	     fw with
776 		 *		    16-bit	     64-bit
777 		 *		    flow handle      flow handle
778 		 *		    ===========	     ===========
779 		 * flow_handle      flow handle      flow context id
780 		 * ext_flow_handle  INVALID	     flow handle
781 		 * flow_id	    INVALID	     flow counter id
782 		 */
783 		flow_node->flow_handle = resp->flow_handle;
784 		if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
785 			flow_node->ext_flow_handle = resp->ext_flow_handle;
786 			flow_node->flow_id = resp->flow_id;
787 		}
788 	}
789 	hwrm_req_drop(bp, req);
790 	return rc;
791 }
792 
793 static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
794 				       struct bnxt_tc_flow *flow,
795 				       struct bnxt_tc_l2_key *l2_info,
796 				       __le32 ref_decap_handle,
797 				       __le32 *decap_filter_handle)
798 {
799 	struct hwrm_cfa_decap_filter_alloc_output *resp;
800 	struct ip_tunnel_key *tun_key = &flow->tun_key;
801 	struct hwrm_cfa_decap_filter_alloc_input *req;
802 	u32 enables = 0;
803 	int rc;
804 
805 	rc = hwrm_req_init(bp, req, HWRM_CFA_DECAP_FILTER_ALLOC);
806 	if (rc)
807 		goto exit;
808 
809 	req->flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
810 	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE |
811 		   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL;
812 	req->tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
813 	req->ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
814 
815 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) {
816 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID;
817 		/* tunnel_id is wrongly defined in hsi defn. as __le32 */
818 		req->tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
819 	}
820 
821 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
822 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
823 		ether_addr_copy(req->dst_macaddr, l2_info->dmac);
824 	}
825 	if (l2_info->num_vlans) {
826 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
827 		req->t_ivlan_vid = l2_info->inner_vlan_tci;
828 	}
829 
830 	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE;
831 	req->ethertype = htons(ETH_P_IP);
832 
833 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) {
834 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR |
835 			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR |
836 			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE;
837 		req->ip_addr_type =
838 			CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
839 		req->dst_ipaddr[0] = tun_key->u.ipv4.dst;
840 		req->src_ipaddr[0] = tun_key->u.ipv4.src;
841 	}
842 
843 	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) {
844 		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT;
845 		req->dst_port = tun_key->tp_dst;
846 	}
847 
848 	/* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc
849 	 * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16.
850 	 */
851 	req->l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
852 	req->enables = cpu_to_le32(enables);
853 
854 	resp = hwrm_req_hold(bp, req);
855 	rc = hwrm_req_send_silent(bp, req);
856 	if (!rc)
857 		*decap_filter_handle = resp->decap_filter_id;
858 	hwrm_req_drop(bp, req);
859 exit:
860 	if (rc)
861 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
862 
863 	return rc;
864 }
865 
866 static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
867 				      __le32 decap_filter_handle)
868 {
869 	struct hwrm_cfa_decap_filter_free_input *req;
870 	int rc;
871 
872 	rc = hwrm_req_init(bp, req, HWRM_CFA_DECAP_FILTER_FREE);
873 	if (!rc) {
874 		req->decap_filter_id = decap_filter_handle;
875 		rc = hwrm_req_send(bp, req);
876 	}
877 	if (rc)
878 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
879 
880 	return rc;
881 }
882 
883 static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
884 				       struct ip_tunnel_key *encap_key,
885 				       struct bnxt_tc_l2_key *l2_info,
886 				       __le32 *encap_record_handle)
887 {
888 	struct hwrm_cfa_encap_record_alloc_output *resp;
889 	struct hwrm_cfa_encap_record_alloc_input *req;
890 	struct hwrm_cfa_encap_data_vxlan *encap;
891 	struct hwrm_vxlan_ipv4_hdr *encap_ipv4;
892 	int rc;
893 
894 	rc = hwrm_req_init(bp, req, HWRM_CFA_ENCAP_RECORD_ALLOC);
895 	if (rc)
896 		goto exit;
897 
898 	encap = (struct hwrm_cfa_encap_data_vxlan *)&req->encap_data;
899 	req->encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
900 	ether_addr_copy(encap->dst_mac_addr, l2_info->dmac);
901 	ether_addr_copy(encap->src_mac_addr, l2_info->smac);
902 	if (l2_info->num_vlans) {
903 		encap->num_vlan_tags = l2_info->num_vlans;
904 		encap->ovlan_tci = l2_info->inner_vlan_tci;
905 		encap->ovlan_tpid = l2_info->inner_vlan_tpid;
906 	}
907 
908 	encap_ipv4 = (struct hwrm_vxlan_ipv4_hdr *)encap->l3;
909 	encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT;
910 	encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT;
911 	encap_ipv4->ttl = encap_key->ttl;
912 
913 	encap_ipv4->dest_ip_addr = encap_key->u.ipv4.dst;
914 	encap_ipv4->src_ip_addr = encap_key->u.ipv4.src;
915 	encap_ipv4->protocol = IPPROTO_UDP;
916 
917 	encap->dst_port = encap_key->tp_dst;
918 	encap->vni = tunnel_id_to_key32(encap_key->tun_id);
919 
920 	resp = hwrm_req_hold(bp, req);
921 	rc = hwrm_req_send_silent(bp, req);
922 	if (!rc)
923 		*encap_record_handle = resp->encap_record_id;
924 	hwrm_req_drop(bp, req);
925 exit:
926 	if (rc)
927 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
928 
929 	return rc;
930 }
931 
932 static int hwrm_cfa_encap_record_free(struct bnxt *bp,
933 				      __le32 encap_record_handle)
934 {
935 	struct hwrm_cfa_encap_record_free_input *req;
936 	int rc;
937 
938 	rc = hwrm_req_init(bp, req, HWRM_CFA_ENCAP_RECORD_FREE);
939 	if (!rc) {
940 		req->encap_record_id = encap_record_handle;
941 		rc = hwrm_req_send(bp, req);
942 	}
943 	if (rc)
944 		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
945 
946 	return rc;
947 }
948 
949 static int bnxt_tc_put_l2_node(struct bnxt *bp,
950 			       struct bnxt_tc_flow_node *flow_node)
951 {
952 	struct bnxt_tc_l2_node *l2_node = flow_node->l2_node;
953 	struct bnxt_tc_info *tc_info = bp->tc_info;
954 	int rc;
955 
956 	/* remove flow_node from the L2 shared flow list */
957 	list_del(&flow_node->l2_list_node);
958 	if (--l2_node->refcount == 0) {
959 		rc =  rhashtable_remove_fast(&tc_info->l2_table, &l2_node->node,
960 					     tc_info->l2_ht_params);
961 		if (rc)
962 			netdev_err(bp->dev,
963 				   "Error: %s: rhashtable_remove_fast: %d\n",
964 				   __func__, rc);
965 		kfree_rcu(l2_node, rcu);
966 	}
967 	return 0;
968 }
969 
970 static struct bnxt_tc_l2_node *
971 bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
972 		    struct rhashtable_params ht_params,
973 		    struct bnxt_tc_l2_key *l2_key)
974 {
975 	struct bnxt_tc_l2_node *l2_node;
976 	int rc;
977 
978 	l2_node = rhashtable_lookup_fast(l2_table, l2_key, ht_params);
979 	if (!l2_node) {
980 		l2_node = kzalloc(sizeof(*l2_node), GFP_KERNEL);
981 		if (!l2_node) {
982 			rc = -ENOMEM;
983 			return NULL;
984 		}
985 
986 		l2_node->key = *l2_key;
987 		rc = rhashtable_insert_fast(l2_table, &l2_node->node,
988 					    ht_params);
989 		if (rc) {
990 			kfree_rcu(l2_node, rcu);
991 			netdev_err(bp->dev,
992 				   "Error: %s: rhashtable_insert_fast: %d\n",
993 				   __func__, rc);
994 			return NULL;
995 		}
996 		INIT_LIST_HEAD(&l2_node->common_l2_flows);
997 	}
998 	return l2_node;
999 }
1000 
1001 /* Get the ref_flow_handle for a flow by checking if there are any other
1002  * flows that share the same L2 key as this flow.
1003  */
1004 static int
1005 bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1006 			    struct bnxt_tc_flow_node *flow_node,
1007 			    __le16 *ref_flow_handle)
1008 {
1009 	struct bnxt_tc_info *tc_info = bp->tc_info;
1010 	struct bnxt_tc_flow_node *ref_flow_node;
1011 	struct bnxt_tc_l2_node *l2_node;
1012 
1013 	l2_node = bnxt_tc_get_l2_node(bp, &tc_info->l2_table,
1014 				      tc_info->l2_ht_params,
1015 				      &flow->l2_key);
1016 	if (!l2_node)
1017 		return -1;
1018 
1019 	/* If any other flow is using this l2_node, use it's flow_handle
1020 	 * as the ref_flow_handle
1021 	 */
1022 	if (l2_node->refcount > 0) {
1023 		ref_flow_node = list_first_entry(&l2_node->common_l2_flows,
1024 						 struct bnxt_tc_flow_node,
1025 						 l2_list_node);
1026 		*ref_flow_handle = ref_flow_node->flow_handle;
1027 	} else {
1028 		*ref_flow_handle = cpu_to_le16(0xffff);
1029 	}
1030 
1031 	/* Insert the l2_node into the flow_node so that subsequent flows
1032 	 * with a matching l2 key can use the flow_handle of this flow
1033 	 * as their ref_flow_handle
1034 	 */
1035 	flow_node->l2_node = l2_node;
1036 	list_add(&flow_node->l2_list_node, &l2_node->common_l2_flows);
1037 	l2_node->refcount++;
1038 	return 0;
1039 }
1040 
1041 /* After the flow parsing is done, this routine is used for checking
1042  * if there are any aspects of the flow that prevent it from being
1043  * offloaded.
1044  */
1045 static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
1046 {
1047 	/* If L4 ports are specified then ip_proto must be TCP or UDP */
1048 	if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
1049 	    (flow->l4_key.ip_proto != IPPROTO_TCP &&
1050 	     flow->l4_key.ip_proto != IPPROTO_UDP)) {
1051 		netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports\n",
1052 			    flow->l4_key.ip_proto);
1053 		return false;
1054 	}
1055 
1056 	/* Currently source/dest MAC cannot be partial wildcard  */
1057 	if (bits_set(&flow->l2_key.smac, sizeof(flow->l2_key.smac)) &&
1058 	    !is_exactmatch(flow->l2_mask.smac, sizeof(flow->l2_mask.smac))) {
1059 		netdev_info(bp->dev, "Wildcard match unsupported for Source MAC\n");
1060 		return false;
1061 	}
1062 	if (bits_set(&flow->l2_key.dmac, sizeof(flow->l2_key.dmac)) &&
1063 	    !is_exactmatch(&flow->l2_mask.dmac, sizeof(flow->l2_mask.dmac))) {
1064 		netdev_info(bp->dev, "Wildcard match unsupported for Dest MAC\n");
1065 		return false;
1066 	}
1067 
1068 	/* Currently VLAN fields cannot be partial wildcard */
1069 	if (bits_set(&flow->l2_key.inner_vlan_tci,
1070 		     sizeof(flow->l2_key.inner_vlan_tci)) &&
1071 	    !is_vlan_tci_allowed(flow->l2_mask.inner_vlan_tci,
1072 				 flow->l2_key.inner_vlan_tci)) {
1073 		netdev_info(bp->dev, "Unsupported VLAN TCI\n");
1074 		return false;
1075 	}
1076 	if (bits_set(&flow->l2_key.inner_vlan_tpid,
1077 		     sizeof(flow->l2_key.inner_vlan_tpid)) &&
1078 	    !is_exactmatch(&flow->l2_mask.inner_vlan_tpid,
1079 			   sizeof(flow->l2_mask.inner_vlan_tpid))) {
1080 		netdev_info(bp->dev, "Wildcard match unsupported for VLAN TPID\n");
1081 		return false;
1082 	}
1083 
1084 	/* Currently Ethertype must be set */
1085 	if (!is_exactmatch(&flow->l2_mask.ether_type,
1086 			   sizeof(flow->l2_mask.ether_type))) {
1087 		netdev_info(bp->dev, "Wildcard match unsupported for Ethertype\n");
1088 		return false;
1089 	}
1090 
1091 	return true;
1092 }
1093 
1094 /* Returns the final refcount of the node on success
1095  * or a -ve error code on failure
1096  */
1097 static int bnxt_tc_put_tunnel_node(struct bnxt *bp,
1098 				   struct rhashtable *tunnel_table,
1099 				   struct rhashtable_params *ht_params,
1100 				   struct bnxt_tc_tunnel_node *tunnel_node)
1101 {
1102 	int rc;
1103 
1104 	if (--tunnel_node->refcount == 0) {
1105 		rc =  rhashtable_remove_fast(tunnel_table, &tunnel_node->node,
1106 					     *ht_params);
1107 		if (rc) {
1108 			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
1109 			rc = -1;
1110 		}
1111 		kfree_rcu(tunnel_node, rcu);
1112 		return rc;
1113 	} else {
1114 		return tunnel_node->refcount;
1115 	}
1116 }
1117 
1118 /* Get (or add) either encap or decap tunnel node from/to the supplied
1119  * hash table.
1120  */
1121 static struct bnxt_tc_tunnel_node *
1122 bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table,
1123 			struct rhashtable_params *ht_params,
1124 			struct ip_tunnel_key *tun_key)
1125 {
1126 	struct bnxt_tc_tunnel_node *tunnel_node;
1127 	int rc;
1128 
1129 	tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params);
1130 	if (!tunnel_node) {
1131 		tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL);
1132 		if (!tunnel_node) {
1133 			rc = -ENOMEM;
1134 			goto err;
1135 		}
1136 
1137 		tunnel_node->key = *tun_key;
1138 		tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE;
1139 		rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node,
1140 					    *ht_params);
1141 		if (rc) {
1142 			kfree_rcu(tunnel_node, rcu);
1143 			goto err;
1144 		}
1145 	}
1146 	tunnel_node->refcount++;
1147 	return tunnel_node;
1148 err:
1149 	netdev_info(bp->dev, "error rc=%d\n", rc);
1150 	return NULL;
1151 }
1152 
1153 static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp,
1154 					struct bnxt_tc_flow *flow,
1155 					struct bnxt_tc_l2_key *l2_key,
1156 					struct bnxt_tc_flow_node *flow_node,
1157 					__le32 *ref_decap_handle)
1158 {
1159 	struct bnxt_tc_info *tc_info = bp->tc_info;
1160 	struct bnxt_tc_flow_node *ref_flow_node;
1161 	struct bnxt_tc_l2_node *decap_l2_node;
1162 
1163 	decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table,
1164 					    tc_info->decap_l2_ht_params,
1165 					    l2_key);
1166 	if (!decap_l2_node)
1167 		return -1;
1168 
1169 	/* If any other flow is using this decap_l2_node, use it's decap_handle
1170 	 * as the ref_decap_handle
1171 	 */
1172 	if (decap_l2_node->refcount > 0) {
1173 		ref_flow_node =
1174 			list_first_entry(&decap_l2_node->common_l2_flows,
1175 					 struct bnxt_tc_flow_node,
1176 					 decap_l2_list_node);
1177 		*ref_decap_handle = ref_flow_node->decap_node->tunnel_handle;
1178 	} else {
1179 		*ref_decap_handle = INVALID_TUNNEL_HANDLE;
1180 	}
1181 
1182 	/* Insert the l2_node into the flow_node so that subsequent flows
1183 	 * with a matching decap l2 key can use the decap_filter_handle of
1184 	 * this flow as their ref_decap_handle
1185 	 */
1186 	flow_node->decap_l2_node = decap_l2_node;
1187 	list_add(&flow_node->decap_l2_list_node,
1188 		 &decap_l2_node->common_l2_flows);
1189 	decap_l2_node->refcount++;
1190 	return 0;
1191 }
1192 
1193 static void bnxt_tc_put_decap_l2_node(struct bnxt *bp,
1194 				      struct bnxt_tc_flow_node *flow_node)
1195 {
1196 	struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node;
1197 	struct bnxt_tc_info *tc_info = bp->tc_info;
1198 	int rc;
1199 
1200 	/* remove flow_node from the decap L2 sharing flow list */
1201 	list_del(&flow_node->decap_l2_list_node);
1202 	if (--decap_l2_node->refcount == 0) {
1203 		rc =  rhashtable_remove_fast(&tc_info->decap_l2_table,
1204 					     &decap_l2_node->node,
1205 					     tc_info->decap_l2_ht_params);
1206 		if (rc)
1207 			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
1208 		kfree_rcu(decap_l2_node, rcu);
1209 	}
1210 }
1211 
1212 static void bnxt_tc_put_decap_handle(struct bnxt *bp,
1213 				     struct bnxt_tc_flow_node *flow_node)
1214 {
1215 	__le32 decap_handle = flow_node->decap_node->tunnel_handle;
1216 	struct bnxt_tc_info *tc_info = bp->tc_info;
1217 	int rc;
1218 
1219 	if (flow_node->decap_l2_node)
1220 		bnxt_tc_put_decap_l2_node(bp, flow_node);
1221 
1222 	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1223 				     &tc_info->decap_ht_params,
1224 				     flow_node->decap_node);
1225 	if (!rc && decap_handle != INVALID_TUNNEL_HANDLE)
1226 		hwrm_cfa_decap_filter_free(bp, decap_handle);
1227 }
1228 
1229 static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
1230 				       struct ip_tunnel_key *tun_key,
1231 				       struct bnxt_tc_l2_key *l2_info)
1232 {
1233 #ifdef CONFIG_INET
1234 	struct net_device *real_dst_dev = bp->dev;
1235 	struct flowi4 flow = { {0} };
1236 	struct net_device *dst_dev;
1237 	struct neighbour *nbr;
1238 	struct rtable *rt;
1239 	int rc;
1240 
1241 	flow.flowi4_proto = IPPROTO_UDP;
1242 	flow.fl4_dport = tun_key->tp_dst;
1243 	flow.daddr = tun_key->u.ipv4.dst;
1244 
1245 	rt = ip_route_output_key(dev_net(real_dst_dev), &flow);
1246 	if (IS_ERR(rt)) {
1247 		netdev_info(bp->dev, "no route to %pI4b\n", &flow.daddr);
1248 		return -EOPNOTSUPP;
1249 	}
1250 
1251 	/* The route must either point to the real_dst_dev or a dst_dev that
1252 	 * uses the real_dst_dev.
1253 	 */
1254 	dst_dev = rt->dst.dev;
1255 	if (is_vlan_dev(dst_dev)) {
1256 #if IS_ENABLED(CONFIG_VLAN_8021Q)
1257 		struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev);
1258 
1259 		if (vlan->real_dev != real_dst_dev) {
1260 			netdev_info(bp->dev,
1261 				    "dst_dev(%s) doesn't use PF-if(%s)\n",
1262 				    netdev_name(dst_dev),
1263 				    netdev_name(real_dst_dev));
1264 			rc = -EOPNOTSUPP;
1265 			goto put_rt;
1266 		}
1267 		l2_info->inner_vlan_tci = htons(vlan->vlan_id);
1268 		l2_info->inner_vlan_tpid = vlan->vlan_proto;
1269 		l2_info->num_vlans = 1;
1270 #endif
1271 	} else if (dst_dev != real_dst_dev) {
1272 		netdev_info(bp->dev,
1273 			    "dst_dev(%s) for %pI4b is not PF-if(%s)\n",
1274 			    netdev_name(dst_dev), &flow.daddr,
1275 			    netdev_name(real_dst_dev));
1276 		rc = -EOPNOTSUPP;
1277 		goto put_rt;
1278 	}
1279 
1280 	nbr = dst_neigh_lookup(&rt->dst, &flow.daddr);
1281 	if (!nbr) {
1282 		netdev_info(bp->dev, "can't lookup neighbor for %pI4b\n",
1283 			    &flow.daddr);
1284 		rc = -EOPNOTSUPP;
1285 		goto put_rt;
1286 	}
1287 
1288 	tun_key->u.ipv4.src = flow.saddr;
1289 	tun_key->ttl = ip4_dst_hoplimit(&rt->dst);
1290 	neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev);
1291 	ether_addr_copy(l2_info->smac, dst_dev->dev_addr);
1292 	neigh_release(nbr);
1293 	ip_rt_put(rt);
1294 
1295 	return 0;
1296 put_rt:
1297 	ip_rt_put(rt);
1298 	return rc;
1299 #else
1300 	return -EOPNOTSUPP;
1301 #endif
1302 }
1303 
1304 static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1305 				    struct bnxt_tc_flow_node *flow_node,
1306 				    __le32 *decap_filter_handle)
1307 {
1308 	struct ip_tunnel_key *decap_key = &flow->tun_key;
1309 	struct bnxt_tc_info *tc_info = bp->tc_info;
1310 	struct bnxt_tc_l2_key l2_info = { {0} };
1311 	struct bnxt_tc_tunnel_node *decap_node;
1312 	struct ip_tunnel_key tun_key = { 0 };
1313 	struct bnxt_tc_l2_key *decap_l2_info;
1314 	__le32 ref_decap_handle;
1315 	int rc;
1316 
1317 	/* Check if there's another flow using the same tunnel decap.
1318 	 * If not, add this tunnel to the table and resolve the other
1319 	 * tunnel header fileds. Ignore src_port in the tunnel_key,
1320 	 * since it is not required for decap filters.
1321 	 */
1322 	decap_key->tp_src = 0;
1323 	decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table,
1324 					     &tc_info->decap_ht_params,
1325 					     decap_key);
1326 	if (!decap_node)
1327 		return -ENOMEM;
1328 
1329 	flow_node->decap_node = decap_node;
1330 
1331 	if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1332 		goto done;
1333 
1334 	/* Resolve the L2 fields for tunnel decap
1335 	 * Resolve the route for remote vtep (saddr) of the decap key
1336 	 * Find it's next-hop mac addrs
1337 	 */
1338 	tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src;
1339 	tun_key.tp_dst = flow->tun_key.tp_dst;
1340 	rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info);
1341 	if (rc)
1342 		goto put_decap;
1343 
1344 	decap_l2_info = &decap_node->l2_info;
1345 	/* decap smac is wildcarded */
1346 	ether_addr_copy(decap_l2_info->dmac, l2_info.smac);
1347 	if (l2_info.num_vlans) {
1348 		decap_l2_info->num_vlans = l2_info.num_vlans;
1349 		decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid;
1350 		decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci;
1351 	}
1352 	flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS;
1353 
1354 	/* For getting a decap_filter_handle we first need to check if
1355 	 * there are any other decap flows that share the same tunnel L2
1356 	 * key and if so, pass that flow's decap_filter_handle as the
1357 	 * ref_decap_handle for this flow.
1358 	 */
1359 	rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node,
1360 					  &ref_decap_handle);
1361 	if (rc)
1362 		goto put_decap;
1363 
1364 	/* Issue the hwrm cmd to allocate a decap filter handle */
1365 	rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info,
1366 					 ref_decap_handle,
1367 					 &decap_node->tunnel_handle);
1368 	if (rc)
1369 		goto put_decap_l2;
1370 
1371 done:
1372 	*decap_filter_handle = decap_node->tunnel_handle;
1373 	return 0;
1374 
1375 put_decap_l2:
1376 	bnxt_tc_put_decap_l2_node(bp, flow_node);
1377 put_decap:
1378 	bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1379 				&tc_info->decap_ht_params,
1380 				flow_node->decap_node);
1381 	return rc;
1382 }
1383 
1384 static void bnxt_tc_put_encap_handle(struct bnxt *bp,
1385 				     struct bnxt_tc_tunnel_node *encap_node)
1386 {
1387 	__le32 encap_handle = encap_node->tunnel_handle;
1388 	struct bnxt_tc_info *tc_info = bp->tc_info;
1389 	int rc;
1390 
1391 	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1392 				     &tc_info->encap_ht_params, encap_node);
1393 	if (!rc && encap_handle != INVALID_TUNNEL_HANDLE)
1394 		hwrm_cfa_encap_record_free(bp, encap_handle);
1395 }
1396 
1397 /* Lookup the tunnel encap table and check if there's an encap_handle
1398  * alloc'd already.
1399  * If not, query L2 info via a route lookup and issue an encap_record_alloc
1400  * cmd to FW.
1401  */
1402 static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1403 				    struct bnxt_tc_flow_node *flow_node,
1404 				    __le32 *encap_handle)
1405 {
1406 	struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key;
1407 	struct bnxt_tc_info *tc_info = bp->tc_info;
1408 	struct bnxt_tc_tunnel_node *encap_node;
1409 	int rc;
1410 
1411 	/* Check if there's another flow using the same tunnel encap.
1412 	 * If not, add this tunnel to the table and resolve the other
1413 	 * tunnel header fileds
1414 	 */
1415 	encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table,
1416 					     &tc_info->encap_ht_params,
1417 					     encap_key);
1418 	if (!encap_node)
1419 		return -ENOMEM;
1420 
1421 	flow_node->encap_node = encap_node;
1422 
1423 	if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1424 		goto done;
1425 
1426 	rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info);
1427 	if (rc)
1428 		goto put_encap;
1429 
1430 	/* Allocate a new tunnel encap record */
1431 	rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info,
1432 					 &encap_node->tunnel_handle);
1433 	if (rc)
1434 		goto put_encap;
1435 
1436 done:
1437 	*encap_handle = encap_node->tunnel_handle;
1438 	return 0;
1439 
1440 put_encap:
1441 	bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1442 				&tc_info->encap_ht_params, encap_node);
1443 	return rc;
1444 }
1445 
1446 static void bnxt_tc_put_tunnel_handle(struct bnxt *bp,
1447 				      struct bnxt_tc_flow *flow,
1448 				      struct bnxt_tc_flow_node *flow_node)
1449 {
1450 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1451 		bnxt_tc_put_decap_handle(bp, flow_node);
1452 	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1453 		bnxt_tc_put_encap_handle(bp, flow_node->encap_node);
1454 }
1455 
1456 static int bnxt_tc_get_tunnel_handle(struct bnxt *bp,
1457 				     struct bnxt_tc_flow *flow,
1458 				     struct bnxt_tc_flow_node *flow_node,
1459 				     __le32 *tunnel_handle)
1460 {
1461 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1462 		return bnxt_tc_get_decap_handle(bp, flow, flow_node,
1463 						tunnel_handle);
1464 	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1465 		return bnxt_tc_get_encap_handle(bp, flow, flow_node,
1466 						tunnel_handle);
1467 	else
1468 		return 0;
1469 }
1470 static int __bnxt_tc_del_flow(struct bnxt *bp,
1471 			      struct bnxt_tc_flow_node *flow_node)
1472 {
1473 	struct bnxt_tc_info *tc_info = bp->tc_info;
1474 	int rc;
1475 
1476 	/* send HWRM cmd to free the flow-id */
1477 	bnxt_hwrm_cfa_flow_free(bp, flow_node);
1478 
1479 	mutex_lock(&tc_info->lock);
1480 
1481 	/* release references to any tunnel encap/decap nodes */
1482 	bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node);
1483 
1484 	/* release reference to l2 node */
1485 	bnxt_tc_put_l2_node(bp, flow_node);
1486 
1487 	mutex_unlock(&tc_info->lock);
1488 
1489 	rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
1490 				    tc_info->flow_ht_params);
1491 	if (rc)
1492 		netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d\n",
1493 			   __func__, rc);
1494 
1495 	kfree_rcu(flow_node, rcu);
1496 	return 0;
1497 }
1498 
1499 static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow,
1500 				 u16 src_fid)
1501 {
1502 	flow->l2_key.dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX;
1503 }
1504 
1505 static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
1506 				u16 src_fid)
1507 {
1508 	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1509 		flow->src_fid = bp->pf.fw_fid;
1510 	else
1511 		flow->src_fid = src_fid;
1512 }
1513 
1514 /* Add a new flow or replace an existing flow.
1515  * Notes on locking:
1516  * There are essentially two critical sections here.
1517  * 1. while adding a new flow
1518  *    a) lookup l2-key
1519  *    b) issue HWRM cmd and get flow_handle
1520  *    c) link l2-key with flow
1521  * 2. while deleting a flow
1522  *    a) unlinking l2-key from flow
1523  * A lock is needed to protect these two critical sections.
1524  *
1525  * The hash-tables are already protected by the rhashtable API.
1526  */
1527 static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
1528 			    struct flow_cls_offload *tc_flow_cmd)
1529 {
1530 	struct bnxt_tc_flow_node *new_node, *old_node;
1531 	struct bnxt_tc_info *tc_info = bp->tc_info;
1532 	struct bnxt_tc_flow *flow;
1533 	__le32 tunnel_handle = 0;
1534 	__le16 ref_flow_handle;
1535 	int rc;
1536 
1537 	/* allocate memory for the new flow and it's node */
1538 	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
1539 	if (!new_node) {
1540 		rc = -ENOMEM;
1541 		goto done;
1542 	}
1543 	new_node->cookie = tc_flow_cmd->cookie;
1544 	flow = &new_node->flow;
1545 
1546 	rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
1547 	if (rc)
1548 		goto free_node;
1549 
1550 	bnxt_tc_set_src_fid(bp, flow, src_fid);
1551 	bnxt_tc_set_flow_dir(bp, flow, flow->src_fid);
1552 
1553 	if (!bnxt_tc_can_offload(bp, flow)) {
1554 		rc = -EOPNOTSUPP;
1555 		kfree_rcu(new_node, rcu);
1556 		return rc;
1557 	}
1558 
1559 	/* If a flow exists with the same cookie, delete it */
1560 	old_node = rhashtable_lookup_fast(&tc_info->flow_table,
1561 					  &tc_flow_cmd->cookie,
1562 					  tc_info->flow_ht_params);
1563 	if (old_node)
1564 		__bnxt_tc_del_flow(bp, old_node);
1565 
1566 	/* Check if the L2 part of the flow has been offloaded already.
1567 	 * If so, bump up it's refcnt and get it's reference handle.
1568 	 */
1569 	mutex_lock(&tc_info->lock);
1570 	rc = bnxt_tc_get_ref_flow_handle(bp, flow, new_node, &ref_flow_handle);
1571 	if (rc)
1572 		goto unlock;
1573 
1574 	/* If the flow involves tunnel encap/decap, get tunnel_handle */
1575 	rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle);
1576 	if (rc)
1577 		goto put_l2;
1578 
1579 	/* send HWRM cmd to alloc the flow */
1580 	rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
1581 				      tunnel_handle, new_node);
1582 	if (rc)
1583 		goto put_tunnel;
1584 
1585 	flow->lastused = jiffies;
1586 	spin_lock_init(&flow->stats_lock);
1587 	/* add new flow to flow-table */
1588 	rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node,
1589 				    tc_info->flow_ht_params);
1590 	if (rc)
1591 		goto hwrm_flow_free;
1592 
1593 	mutex_unlock(&tc_info->lock);
1594 	return 0;
1595 
1596 hwrm_flow_free:
1597 	bnxt_hwrm_cfa_flow_free(bp, new_node);
1598 put_tunnel:
1599 	bnxt_tc_put_tunnel_handle(bp, flow, new_node);
1600 put_l2:
1601 	bnxt_tc_put_l2_node(bp, new_node);
1602 unlock:
1603 	mutex_unlock(&tc_info->lock);
1604 free_node:
1605 	kfree_rcu(new_node, rcu);
1606 done:
1607 	netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d\n",
1608 		   __func__, tc_flow_cmd->cookie, rc);
1609 	return rc;
1610 }
1611 
1612 static int bnxt_tc_del_flow(struct bnxt *bp,
1613 			    struct flow_cls_offload *tc_flow_cmd)
1614 {
1615 	struct bnxt_tc_info *tc_info = bp->tc_info;
1616 	struct bnxt_tc_flow_node *flow_node;
1617 
1618 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1619 					   &tc_flow_cmd->cookie,
1620 					   tc_info->flow_ht_params);
1621 	if (!flow_node)
1622 		return -EINVAL;
1623 
1624 	return __bnxt_tc_del_flow(bp, flow_node);
1625 }
1626 
1627 static int bnxt_tc_get_flow_stats(struct bnxt *bp,
1628 				  struct flow_cls_offload *tc_flow_cmd)
1629 {
1630 	struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats;
1631 	struct bnxt_tc_info *tc_info = bp->tc_info;
1632 	struct bnxt_tc_flow_node *flow_node;
1633 	struct bnxt_tc_flow *flow;
1634 	unsigned long lastused;
1635 
1636 	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1637 					   &tc_flow_cmd->cookie,
1638 					   tc_info->flow_ht_params);
1639 	if (!flow_node)
1640 		return -1;
1641 
1642 	flow = &flow_node->flow;
1643 	curr_stats = &flow->stats;
1644 	prev_stats = &flow->prev_stats;
1645 
1646 	spin_lock(&flow->stats_lock);
1647 	stats.packets = curr_stats->packets - prev_stats->packets;
1648 	stats.bytes = curr_stats->bytes - prev_stats->bytes;
1649 	*prev_stats = *curr_stats;
1650 	lastused = flow->lastused;
1651 	spin_unlock(&flow->stats_lock);
1652 
1653 	flow_stats_update(&tc_flow_cmd->stats, stats.bytes, stats.packets, 0,
1654 			  lastused, FLOW_ACTION_HW_STATS_DELAYED);
1655 	return 0;
1656 }
1657 
1658 static void bnxt_fill_cfa_stats_req(struct bnxt *bp,
1659 				    struct bnxt_tc_flow_node *flow_node,
1660 				    __le16 *flow_handle, __le32 *flow_id)
1661 {
1662 	u16 handle;
1663 
1664 	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
1665 		*flow_id = flow_node->flow_id;
1666 
1667 		/* If flow_id is used to fetch flow stats then:
1668 		 * 1. lower 12 bits of flow_handle must be set to all 1s.
1669 		 * 2. 15th bit of flow_handle must specify the flow
1670 		 *    direction (TX/RX).
1671 		 */
1672 		if (flow_node->flow.l2_key.dir == BNXT_DIR_RX)
1673 			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX |
1674 				 CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1675 		else
1676 			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1677 
1678 		*flow_handle = cpu_to_le16(handle);
1679 	} else {
1680 		*flow_handle = flow_node->flow_handle;
1681 	}
1682 }
1683 
1684 static int
1685 bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
1686 			     struct bnxt_tc_stats_batch stats_batch[])
1687 {
1688 	struct hwrm_cfa_flow_stats_output *resp;
1689 	struct hwrm_cfa_flow_stats_input *req;
1690 	__le16 *req_flow_handles;
1691 	__le32 *req_flow_ids;
1692 	int rc, i;
1693 
1694 	rc = hwrm_req_init(bp, req, HWRM_CFA_FLOW_STATS);
1695 	if (rc)
1696 		goto exit;
1697 
1698 	req_flow_handles = &req->flow_handle_0;
1699 	req_flow_ids = &req->flow_id_0;
1700 
1701 	req->num_flows = cpu_to_le16(num_flows);
1702 	for (i = 0; i < num_flows; i++) {
1703 		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1704 
1705 		bnxt_fill_cfa_stats_req(bp, flow_node,
1706 					&req_flow_handles[i], &req_flow_ids[i]);
1707 	}
1708 
1709 	resp = hwrm_req_hold(bp, req);
1710 	rc = hwrm_req_send(bp, req);
1711 	if (!rc) {
1712 		__le64 *resp_packets;
1713 		__le64 *resp_bytes;
1714 
1715 		resp_packets = &resp->packet_0;
1716 		resp_bytes = &resp->byte_0;
1717 
1718 		for (i = 0; i < num_flows; i++) {
1719 			stats_batch[i].hw_stats.packets =
1720 						le64_to_cpu(resp_packets[i]);
1721 			stats_batch[i].hw_stats.bytes =
1722 						le64_to_cpu(resp_bytes[i]);
1723 		}
1724 	}
1725 	hwrm_req_drop(bp, req);
1726 exit:
1727 	if (rc)
1728 		netdev_info(bp->dev, "error rc=%d\n", rc);
1729 
1730 	return rc;
1731 }
1732 
1733 /* Add val to accum while handling a possible wraparound
1734  * of val. Eventhough val is of type u64, its actual width
1735  * is denoted by mask and will wrap-around beyond that width.
1736  */
1737 static void accumulate_val(u64 *accum, u64 val, u64 mask)
1738 {
1739 #define low_bits(x, mask)		((x) & (mask))
1740 #define high_bits(x, mask)		((x) & ~(mask))
1741 	bool wrapped = val < low_bits(*accum, mask);
1742 
1743 	*accum = high_bits(*accum, mask) + val;
1744 	if (wrapped)
1745 		*accum += (mask + 1);
1746 }
1747 
1748 /* The HW counters' width is much less than 64bits.
1749  * Handle possible wrap-around while updating the stat counters
1750  */
1751 static void bnxt_flow_stats_accum(struct bnxt_tc_info *tc_info,
1752 				  struct bnxt_tc_flow_stats *acc_stats,
1753 				  struct bnxt_tc_flow_stats *hw_stats)
1754 {
1755 	accumulate_val(&acc_stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
1756 	accumulate_val(&acc_stats->packets, hw_stats->packets,
1757 		       tc_info->packets_mask);
1758 }
1759 
1760 static int
1761 bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows,
1762 				struct bnxt_tc_stats_batch stats_batch[])
1763 {
1764 	struct bnxt_tc_info *tc_info = bp->tc_info;
1765 	int rc, i;
1766 
1767 	rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch);
1768 	if (rc)
1769 		return rc;
1770 
1771 	for (i = 0; i < num_flows; i++) {
1772 		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1773 		struct bnxt_tc_flow *flow = &flow_node->flow;
1774 
1775 		spin_lock(&flow->stats_lock);
1776 		bnxt_flow_stats_accum(tc_info, &flow->stats,
1777 				      &stats_batch[i].hw_stats);
1778 		if (flow->stats.packets != flow->prev_stats.packets)
1779 			flow->lastused = jiffies;
1780 		spin_unlock(&flow->stats_lock);
1781 	}
1782 
1783 	return 0;
1784 }
1785 
1786 static int
1787 bnxt_tc_flow_stats_batch_prep(struct bnxt *bp,
1788 			      struct bnxt_tc_stats_batch stats_batch[],
1789 			      int *num_flows)
1790 {
1791 	struct bnxt_tc_info *tc_info = bp->tc_info;
1792 	struct rhashtable_iter *iter = &tc_info->iter;
1793 	void *flow_node;
1794 	int rc, i;
1795 
1796 	rhashtable_walk_start(iter);
1797 
1798 	rc = 0;
1799 	for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) {
1800 		flow_node = rhashtable_walk_next(iter);
1801 		if (IS_ERR(flow_node)) {
1802 			i = 0;
1803 			if (PTR_ERR(flow_node) == -EAGAIN) {
1804 				continue;
1805 			} else {
1806 				rc = PTR_ERR(flow_node);
1807 				goto done;
1808 			}
1809 		}
1810 
1811 		/* No more flows */
1812 		if (!flow_node)
1813 			goto done;
1814 
1815 		stats_batch[i].flow_node = flow_node;
1816 	}
1817 done:
1818 	rhashtable_walk_stop(iter);
1819 	*num_flows = i;
1820 	return rc;
1821 }
1822 
1823 void bnxt_tc_flow_stats_work(struct bnxt *bp)
1824 {
1825 	struct bnxt_tc_info *tc_info = bp->tc_info;
1826 	int num_flows, rc;
1827 
1828 	num_flows = atomic_read(&tc_info->flow_table.nelems);
1829 	if (!num_flows)
1830 		return;
1831 
1832 	rhashtable_walk_enter(&tc_info->flow_table, &tc_info->iter);
1833 
1834 	for (;;) {
1835 		rc = bnxt_tc_flow_stats_batch_prep(bp, tc_info->stats_batch,
1836 						   &num_flows);
1837 		if (rc) {
1838 			if (rc == -EAGAIN)
1839 				continue;
1840 			break;
1841 		}
1842 
1843 		if (!num_flows)
1844 			break;
1845 
1846 		bnxt_tc_flow_stats_batch_update(bp, num_flows,
1847 						tc_info->stats_batch);
1848 	}
1849 
1850 	rhashtable_walk_exit(&tc_info->iter);
1851 }
1852 
1853 int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
1854 			 struct flow_cls_offload *cls_flower)
1855 {
1856 	switch (cls_flower->command) {
1857 	case FLOW_CLS_REPLACE:
1858 		return bnxt_tc_add_flow(bp, src_fid, cls_flower);
1859 	case FLOW_CLS_DESTROY:
1860 		return bnxt_tc_del_flow(bp, cls_flower);
1861 	case FLOW_CLS_STATS:
1862 		return bnxt_tc_get_flow_stats(bp, cls_flower);
1863 	default:
1864 		return -EOPNOTSUPP;
1865 	}
1866 }
1867 
1868 static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type,
1869 				       void *type_data, void *cb_priv)
1870 {
1871 	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1872 	struct flow_cls_offload *flower = type_data;
1873 	struct bnxt *bp = priv->bp;
1874 
1875 	if (!tc_cls_can_offload_and_chain0(bp->dev, type_data))
1876 		return -EOPNOTSUPP;
1877 
1878 	switch (type) {
1879 	case TC_SETUP_CLSFLOWER:
1880 		return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, flower);
1881 	default:
1882 		return -EOPNOTSUPP;
1883 	}
1884 }
1885 
1886 static struct bnxt_flower_indr_block_cb_priv *
1887 bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev)
1888 {
1889 	struct bnxt_flower_indr_block_cb_priv *cb_priv;
1890 
1891 	list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list)
1892 		if (cb_priv->tunnel_netdev == netdev)
1893 			return cb_priv;
1894 
1895 	return NULL;
1896 }
1897 
1898 static void bnxt_tc_setup_indr_rel(void *cb_priv)
1899 {
1900 	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1901 
1902 	list_del(&priv->list);
1903 	kfree(priv);
1904 }
1905 
1906 static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct Qdisc *sch, struct bnxt *bp,
1907 				    struct flow_block_offload *f, void *data,
1908 				    void (*cleanup)(struct flow_block_cb *block_cb))
1909 {
1910 	struct bnxt_flower_indr_block_cb_priv *cb_priv;
1911 	struct flow_block_cb *block_cb;
1912 
1913 	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1914 		return -EOPNOTSUPP;
1915 
1916 	switch (f->command) {
1917 	case FLOW_BLOCK_BIND:
1918 		cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
1919 		if (!cb_priv)
1920 			return -ENOMEM;
1921 
1922 		cb_priv->tunnel_netdev = netdev;
1923 		cb_priv->bp = bp;
1924 		list_add(&cb_priv->list, &bp->tc_indr_block_list);
1925 
1926 		block_cb = flow_indr_block_cb_alloc(bnxt_tc_setup_indr_block_cb,
1927 						    cb_priv, cb_priv,
1928 						    bnxt_tc_setup_indr_rel, f,
1929 						    netdev, sch, data, bp, cleanup);
1930 		if (IS_ERR(block_cb)) {
1931 			list_del(&cb_priv->list);
1932 			kfree(cb_priv);
1933 			return PTR_ERR(block_cb);
1934 		}
1935 
1936 		flow_block_cb_add(block_cb, f);
1937 		list_add_tail(&block_cb->driver_list, &bnxt_block_cb_list);
1938 		break;
1939 	case FLOW_BLOCK_UNBIND:
1940 		cb_priv = bnxt_tc_indr_block_cb_lookup(bp, netdev);
1941 		if (!cb_priv)
1942 			return -ENOENT;
1943 
1944 		block_cb = flow_block_cb_lookup(f->block,
1945 						bnxt_tc_setup_indr_block_cb,
1946 						cb_priv);
1947 		if (!block_cb)
1948 			return -ENOENT;
1949 
1950 		flow_indr_block_cb_remove(block_cb, f);
1951 		list_del(&block_cb->driver_list);
1952 		break;
1953 	default:
1954 		return -EOPNOTSUPP;
1955 	}
1956 	return 0;
1957 }
1958 
1959 static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
1960 {
1961 	return netif_is_vxlan(netdev);
1962 }
1963 
1964 static int bnxt_tc_setup_indr_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
1965 				 enum tc_setup_type type, void *type_data,
1966 				 void *data,
1967 				 void (*cleanup)(struct flow_block_cb *block_cb))
1968 {
1969 	if (!netdev || !bnxt_is_netdev_indr_offload(netdev))
1970 		return -EOPNOTSUPP;
1971 
1972 	switch (type) {
1973 	case TC_SETUP_BLOCK:
1974 		return bnxt_tc_setup_indr_block(netdev, sch, cb_priv, type_data, data, cleanup);
1975 	default:
1976 		break;
1977 	}
1978 
1979 	return -EOPNOTSUPP;
1980 }
1981 
1982 static const struct rhashtable_params bnxt_tc_flow_ht_params = {
1983 	.head_offset = offsetof(struct bnxt_tc_flow_node, node),
1984 	.key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
1985 	.key_len = sizeof(((struct bnxt_tc_flow_node *)0)->cookie),
1986 	.automatic_shrinking = true
1987 };
1988 
1989 static const struct rhashtable_params bnxt_tc_l2_ht_params = {
1990 	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
1991 	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
1992 	.key_len = BNXT_TC_L2_KEY_LEN,
1993 	.automatic_shrinking = true
1994 };
1995 
1996 static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = {
1997 	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
1998 	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
1999 	.key_len = BNXT_TC_L2_KEY_LEN,
2000 	.automatic_shrinking = true
2001 };
2002 
2003 static const struct rhashtable_params bnxt_tc_tunnel_ht_params = {
2004 	.head_offset = offsetof(struct bnxt_tc_tunnel_node, node),
2005 	.key_offset = offsetof(struct bnxt_tc_tunnel_node, key),
2006 	.key_len = sizeof(struct ip_tunnel_key),
2007 	.automatic_shrinking = true
2008 };
2009 
2010 /* convert counter width in bits to a mask */
2011 #define mask(width)		((u64)~0 >> (64 - (width)))
2012 
2013 int bnxt_init_tc(struct bnxt *bp)
2014 {
2015 	struct bnxt_tc_info *tc_info;
2016 	int rc;
2017 
2018 	if (bp->hwrm_spec_code < 0x10803)
2019 		return 0;
2020 
2021 	tc_info = kzalloc(sizeof(*tc_info), GFP_KERNEL);
2022 	if (!tc_info)
2023 		return -ENOMEM;
2024 	mutex_init(&tc_info->lock);
2025 
2026 	/* Counter widths are programmed by FW */
2027 	tc_info->bytes_mask = mask(36);
2028 	tc_info->packets_mask = mask(28);
2029 
2030 	tc_info->flow_ht_params = bnxt_tc_flow_ht_params;
2031 	rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params);
2032 	if (rc)
2033 		goto free_tc_info;
2034 
2035 	tc_info->l2_ht_params = bnxt_tc_l2_ht_params;
2036 	rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params);
2037 	if (rc)
2038 		goto destroy_flow_table;
2039 
2040 	tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params;
2041 	rc = rhashtable_init(&tc_info->decap_l2_table,
2042 			     &tc_info->decap_l2_ht_params);
2043 	if (rc)
2044 		goto destroy_l2_table;
2045 
2046 	tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params;
2047 	rc = rhashtable_init(&tc_info->decap_table,
2048 			     &tc_info->decap_ht_params);
2049 	if (rc)
2050 		goto destroy_decap_l2_table;
2051 
2052 	tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params;
2053 	rc = rhashtable_init(&tc_info->encap_table,
2054 			     &tc_info->encap_ht_params);
2055 	if (rc)
2056 		goto destroy_decap_table;
2057 
2058 	tc_info->enabled = true;
2059 	bp->dev->hw_features |= NETIF_F_HW_TC;
2060 	bp->dev->features |= NETIF_F_HW_TC;
2061 	bp->tc_info = tc_info;
2062 
2063 	/* init indirect block notifications */
2064 	INIT_LIST_HEAD(&bp->tc_indr_block_list);
2065 
2066 	rc = flow_indr_dev_register(bnxt_tc_setup_indr_cb, bp);
2067 	if (!rc)
2068 		return 0;
2069 
2070 	rhashtable_destroy(&tc_info->encap_table);
2071 
2072 destroy_decap_table:
2073 	rhashtable_destroy(&tc_info->decap_table);
2074 destroy_decap_l2_table:
2075 	rhashtable_destroy(&tc_info->decap_l2_table);
2076 destroy_l2_table:
2077 	rhashtable_destroy(&tc_info->l2_table);
2078 destroy_flow_table:
2079 	rhashtable_destroy(&tc_info->flow_table);
2080 free_tc_info:
2081 	kfree(tc_info);
2082 	bp->tc_info = NULL;
2083 	return rc;
2084 }
2085 
2086 void bnxt_shutdown_tc(struct bnxt *bp)
2087 {
2088 	struct bnxt_tc_info *tc_info = bp->tc_info;
2089 
2090 	if (!bnxt_tc_flower_enabled(bp))
2091 		return;
2092 
2093 	flow_indr_dev_unregister(bnxt_tc_setup_indr_cb, bp,
2094 				 bnxt_tc_setup_indr_rel);
2095 	rhashtable_destroy(&tc_info->flow_table);
2096 	rhashtable_destroy(&tc_info->l2_table);
2097 	rhashtable_destroy(&tc_info->decap_l2_table);
2098 	rhashtable_destroy(&tc_info->decap_table);
2099 	rhashtable_destroy(&tc_info->encap_table);
2100 	kfree(tc_info);
2101 	bp->tc_info = NULL;
2102 }
2103