xref: /linux/drivers/net/ethernet/intel/ice/ice_sched.c (revision a44e4f3ab16bc808590763a543a93b6fbf3abcc4)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018, Intel Corporation. */
3 
4 #include "ice_sched.h"
5 
6 /**
7  * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
8  * @pi: port information structure
9  * @info: Scheduler element information from firmware
10  *
11  * This function inserts the root node of the scheduling tree topology
12  * to the SW DB.
13  */
14 static enum ice_status
15 ice_sched_add_root_node(struct ice_port_info *pi,
16 			struct ice_aqc_txsched_elem_data *info)
17 {
18 	struct ice_sched_node *root;
19 	struct ice_hw *hw;
20 
21 	if (!pi)
22 		return ICE_ERR_PARAM;
23 
24 	hw = pi->hw;
25 
26 	root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
27 	if (!root)
28 		return ICE_ERR_NO_MEMORY;
29 
30 	/* coverity[suspicious_sizeof] */
31 	root->children = devm_kcalloc(ice_hw_to_dev(hw), hw->max_children[0],
32 				      sizeof(*root), GFP_KERNEL);
33 	if (!root->children) {
34 		devm_kfree(ice_hw_to_dev(hw), root);
35 		return ICE_ERR_NO_MEMORY;
36 	}
37 
38 	memcpy(&root->info, info, sizeof(*info));
39 	pi->root = root;
40 	return 0;
41 }
42 
43 /**
44  * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
45  * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
46  * @teid: node TEID to search
47  *
48  * This function searches for a node matching the TEID in the scheduling tree
49  * from the SW DB. The search is recursive and is restricted by the number of
50  * layers it has searched through; stopping at the max supported layer.
51  *
52  * This function needs to be called when holding the port_info->sched_lock
53  */
54 struct ice_sched_node *
55 ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
56 {
57 	u16 i;
58 
59 	/* The TEID is same as that of the start_node */
60 	if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
61 		return start_node;
62 
63 	/* The node has no children or is at the max layer */
64 	if (!start_node->num_children ||
65 	    start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
66 	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
67 		return NULL;
68 
69 	/* Check if TEID matches to any of the children nodes */
70 	for (i = 0; i < start_node->num_children; i++)
71 		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
72 			return start_node->children[i];
73 
74 	/* Search within each child's sub-tree */
75 	for (i = 0; i < start_node->num_children; i++) {
76 		struct ice_sched_node *tmp;
77 
78 		tmp = ice_sched_find_node_by_teid(start_node->children[i],
79 						  teid);
80 		if (tmp)
81 			return tmp;
82 	}
83 
84 	return NULL;
85 }
86 
87 /**
88  * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
89  * @hw: pointer to the HW struct
90  * @cmd_opc: cmd opcode
91  * @elems_req: number of elements to request
92  * @buf: pointer to buffer
93  * @buf_size: buffer size in bytes
94  * @elems_resp: returns total number of elements response
95  * @cd: pointer to command details structure or NULL
96  *
97  * This function sends a scheduling elements cmd (cmd_opc)
98  */
99 static enum ice_status
100 ice_aqc_send_sched_elem_cmd(struct ice_hw *hw, enum ice_adminq_opc cmd_opc,
101 			    u16 elems_req, void *buf, u16 buf_size,
102 			    u16 *elems_resp, struct ice_sq_cd *cd)
103 {
104 	struct ice_aqc_sched_elem_cmd *cmd;
105 	struct ice_aq_desc desc;
106 	enum ice_status status;
107 
108 	cmd = &desc.params.sched_elem_cmd;
109 	ice_fill_dflt_direct_cmd_desc(&desc, cmd_opc);
110 	cmd->num_elem_req = cpu_to_le16(elems_req);
111 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
112 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
113 	if (!status && elems_resp)
114 		*elems_resp = le16_to_cpu(cmd->num_elem_resp);
115 
116 	return status;
117 }
118 
119 /**
120  * ice_aq_query_sched_elems - query scheduler elements
121  * @hw: pointer to the HW struct
122  * @elems_req: number of elements to query
123  * @buf: pointer to buffer
124  * @buf_size: buffer size in bytes
125  * @elems_ret: returns total number of elements returned
126  * @cd: pointer to command details structure or NULL
127  *
128  * Query scheduling elements (0x0404)
129  */
130 enum ice_status
131 ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
132 			 struct ice_aqc_get_elem *buf, u16 buf_size,
133 			 u16 *elems_ret, struct ice_sq_cd *cd)
134 {
135 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_get_sched_elems,
136 					   elems_req, (void *)buf, buf_size,
137 					   elems_ret, cd);
138 }
139 
140 /**
141  * ice_sched_add_node - Insert the Tx scheduler node in SW DB
142  * @pi: port information structure
143  * @layer: Scheduler layer of the node
144  * @info: Scheduler element information from firmware
145  *
146  * This function inserts a scheduler node to the SW DB.
147  */
148 enum ice_status
149 ice_sched_add_node(struct ice_port_info *pi, u8 layer,
150 		   struct ice_aqc_txsched_elem_data *info)
151 {
152 	struct ice_sched_node *parent;
153 	struct ice_aqc_get_elem elem;
154 	struct ice_sched_node *node;
155 	enum ice_status status;
156 	struct ice_hw *hw;
157 
158 	if (!pi)
159 		return ICE_ERR_PARAM;
160 
161 	hw = pi->hw;
162 
163 	/* A valid parent node should be there */
164 	parent = ice_sched_find_node_by_teid(pi->root,
165 					     le32_to_cpu(info->parent_teid));
166 	if (!parent) {
167 		ice_debug(hw, ICE_DBG_SCHED,
168 			  "Parent Node not found for parent_teid=0x%x\n",
169 			  le32_to_cpu(info->parent_teid));
170 		return ICE_ERR_PARAM;
171 	}
172 
173 	/* query the current node information from FW  before additing it
174 	 * to the SW DB
175 	 */
176 	status = ice_sched_query_elem(hw, le32_to_cpu(info->node_teid), &elem);
177 	if (status)
178 		return status;
179 
180 	node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
181 	if (!node)
182 		return ICE_ERR_NO_MEMORY;
183 	if (hw->max_children[layer]) {
184 		/* coverity[suspicious_sizeof] */
185 		node->children = devm_kcalloc(ice_hw_to_dev(hw),
186 					      hw->max_children[layer],
187 					      sizeof(*node), GFP_KERNEL);
188 		if (!node->children) {
189 			devm_kfree(ice_hw_to_dev(hw), node);
190 			return ICE_ERR_NO_MEMORY;
191 		}
192 	}
193 
194 	node->in_use = true;
195 	node->parent = parent;
196 	node->tx_sched_layer = layer;
197 	parent->children[parent->num_children++] = node;
198 	memcpy(&node->info, &elem.generic[0], sizeof(node->info));
199 	return 0;
200 }
201 
202 /**
203  * ice_aq_delete_sched_elems - delete scheduler elements
204  * @hw: pointer to the HW struct
205  * @grps_req: number of groups to delete
206  * @buf: pointer to buffer
207  * @buf_size: buffer size in bytes
208  * @grps_del: returns total number of elements deleted
209  * @cd: pointer to command details structure or NULL
210  *
211  * Delete scheduling elements (0x040F)
212  */
213 static enum ice_status
214 ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
215 			  struct ice_aqc_delete_elem *buf, u16 buf_size,
216 			  u16 *grps_del, struct ice_sq_cd *cd)
217 {
218 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_delete_sched_elems,
219 					   grps_req, (void *)buf, buf_size,
220 					   grps_del, cd);
221 }
222 
223 /**
224  * ice_sched_remove_elems - remove nodes from HW
225  * @hw: pointer to the HW struct
226  * @parent: pointer to the parent node
227  * @num_nodes: number of nodes
228  * @node_teids: array of node teids to be deleted
229  *
230  * This function remove nodes from HW
231  */
232 static enum ice_status
233 ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
234 		       u16 num_nodes, u32 *node_teids)
235 {
236 	struct ice_aqc_delete_elem *buf;
237 	u16 i, num_groups_removed = 0;
238 	enum ice_status status;
239 	u16 buf_size;
240 
241 	buf_size = sizeof(*buf) + sizeof(u32) * (num_nodes - 1);
242 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
243 	if (!buf)
244 		return ICE_ERR_NO_MEMORY;
245 
246 	buf->hdr.parent_teid = parent->info.node_teid;
247 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
248 	for (i = 0; i < num_nodes; i++)
249 		buf->teid[i] = cpu_to_le32(node_teids[i]);
250 
251 	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
252 					   &num_groups_removed, NULL);
253 	if (status || num_groups_removed != 1)
254 		ice_debug(hw, ICE_DBG_SCHED, "remove node failed FW error %d\n",
255 			  hw->adminq.sq_last_status);
256 
257 	devm_kfree(ice_hw_to_dev(hw), buf);
258 	return status;
259 }
260 
261 /**
262  * ice_sched_get_first_node - get the first node of the given layer
263  * @pi: port information structure
264  * @parent: pointer the base node of the subtree
265  * @layer: layer number
266  *
267  * This function retrieves the first node of the given layer from the subtree
268  */
269 static struct ice_sched_node *
270 ice_sched_get_first_node(struct ice_port_info *pi,
271 			 struct ice_sched_node *parent, u8 layer)
272 {
273 	return pi->sib_head[parent->tc_num][layer];
274 }
275 
276 /**
277  * ice_sched_get_tc_node - get pointer to TC node
278  * @pi: port information structure
279  * @tc: TC number
280  *
281  * This function returns the TC node pointer
282  */
283 struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
284 {
285 	u8 i;
286 
287 	if (!pi || !pi->root)
288 		return NULL;
289 	for (i = 0; i < pi->root->num_children; i++)
290 		if (pi->root->children[i]->tc_num == tc)
291 			return pi->root->children[i];
292 	return NULL;
293 }
294 
295 /**
296  * ice_free_sched_node - Free a Tx scheduler node from SW DB
297  * @pi: port information structure
298  * @node: pointer to the ice_sched_node struct
299  *
300  * This function frees up a node from SW DB as well as from HW
301  *
302  * This function needs to be called with the port_info->sched_lock held
303  */
304 void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
305 {
306 	struct ice_sched_node *parent;
307 	struct ice_hw *hw = pi->hw;
308 	u8 i, j;
309 
310 	/* Free the children before freeing up the parent node
311 	 * The parent array is updated below and that shifts the nodes
312 	 * in the array. So always pick the first child if num children > 0
313 	 */
314 	while (node->num_children)
315 		ice_free_sched_node(pi, node->children[0]);
316 
317 	/* Leaf, TC and root nodes can't be deleted by SW */
318 	if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
319 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
320 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
321 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
322 		u32 teid = le32_to_cpu(node->info.node_teid);
323 
324 		ice_sched_remove_elems(hw, node->parent, 1, &teid);
325 	}
326 	parent = node->parent;
327 	/* root has no parent */
328 	if (parent) {
329 		struct ice_sched_node *p;
330 
331 		/* update the parent */
332 		for (i = 0; i < parent->num_children; i++)
333 			if (parent->children[i] == node) {
334 				for (j = i + 1; j < parent->num_children; j++)
335 					parent->children[j - 1] =
336 						parent->children[j];
337 				parent->num_children--;
338 				break;
339 			}
340 
341 		p = ice_sched_get_first_node(pi, node, node->tx_sched_layer);
342 		while (p) {
343 			if (p->sibling == node) {
344 				p->sibling = node->sibling;
345 				break;
346 			}
347 			p = p->sibling;
348 		}
349 
350 		/* update the sibling head if head is getting removed */
351 		if (pi->sib_head[node->tc_num][node->tx_sched_layer] == node)
352 			pi->sib_head[node->tc_num][node->tx_sched_layer] =
353 				node->sibling;
354 	}
355 
356 	/* leaf nodes have no children */
357 	if (node->children)
358 		devm_kfree(ice_hw_to_dev(hw), node->children);
359 	devm_kfree(ice_hw_to_dev(hw), node);
360 }
361 
362 /**
363  * ice_aq_get_dflt_topo - gets default scheduler topology
364  * @hw: pointer to the HW struct
365  * @lport: logical port number
366  * @buf: pointer to buffer
367  * @buf_size: buffer size in bytes
368  * @num_branches: returns total number of queue to port branches
369  * @cd: pointer to command details structure or NULL
370  *
371  * Get default scheduler topology (0x400)
372  */
373 static enum ice_status
374 ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
375 		     struct ice_aqc_get_topo_elem *buf, u16 buf_size,
376 		     u8 *num_branches, struct ice_sq_cd *cd)
377 {
378 	struct ice_aqc_get_topo *cmd;
379 	struct ice_aq_desc desc;
380 	enum ice_status status;
381 
382 	cmd = &desc.params.get_topo;
383 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
384 	cmd->port_num = lport;
385 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
386 	if (!status && num_branches)
387 		*num_branches = cmd->num_branches;
388 
389 	return status;
390 }
391 
392 /**
393  * ice_aq_add_sched_elems - adds scheduling element
394  * @hw: pointer to the HW struct
395  * @grps_req: the number of groups that are requested to be added
396  * @buf: pointer to buffer
397  * @buf_size: buffer size in bytes
398  * @grps_added: returns total number of groups added
399  * @cd: pointer to command details structure or NULL
400  *
401  * Add scheduling elements (0x0401)
402  */
403 static enum ice_status
404 ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
405 		       struct ice_aqc_add_elem *buf, u16 buf_size,
406 		       u16 *grps_added, struct ice_sq_cd *cd)
407 {
408 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_add_sched_elems,
409 					   grps_req, (void *)buf, buf_size,
410 					   grps_added, cd);
411 }
412 
413 /**
414  * ice_aq_suspend_sched_elems - suspend scheduler elements
415  * @hw: pointer to the HW struct
416  * @elems_req: number of elements to suspend
417  * @buf: pointer to buffer
418  * @buf_size: buffer size in bytes
419  * @elems_ret: returns total number of elements suspended
420  * @cd: pointer to command details structure or NULL
421  *
422  * Suspend scheduling elements (0x0409)
423  */
424 static enum ice_status
425 ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req,
426 			   struct ice_aqc_suspend_resume_elem *buf,
427 			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
428 {
429 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_suspend_sched_elems,
430 					   elems_req, (void *)buf, buf_size,
431 					   elems_ret, cd);
432 }
433 
434 /**
435  * ice_aq_resume_sched_elems - resume scheduler elements
436  * @hw: pointer to the HW struct
437  * @elems_req: number of elements to resume
438  * @buf: pointer to buffer
439  * @buf_size: buffer size in bytes
440  * @elems_ret: returns total number of elements resumed
441  * @cd: pointer to command details structure or NULL
442  *
443  * resume scheduling elements (0x040A)
444  */
445 static enum ice_status
446 ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req,
447 			  struct ice_aqc_suspend_resume_elem *buf,
448 			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
449 {
450 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_resume_sched_elems,
451 					   elems_req, (void *)buf, buf_size,
452 					   elems_ret, cd);
453 }
454 
455 /**
456  * ice_aq_query_sched_res - query scheduler resource
457  * @hw: pointer to the HW struct
458  * @buf_size: buffer size in bytes
459  * @buf: pointer to buffer
460  * @cd: pointer to command details structure or NULL
461  *
462  * Query scheduler resource allocation (0x0412)
463  */
464 static enum ice_status
465 ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
466 		       struct ice_aqc_query_txsched_res_resp *buf,
467 		       struct ice_sq_cd *cd)
468 {
469 	struct ice_aq_desc desc;
470 
471 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
472 	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
473 }
474 
475 /**
476  * ice_sched_suspend_resume_elems - suspend or resume HW nodes
477  * @hw: pointer to the HW struct
478  * @num_nodes: number of nodes
479  * @node_teids: array of node teids to be suspended or resumed
480  * @suspend: true means suspend / false means resume
481  *
482  * This function suspends or resumes HW nodes
483  */
484 static enum ice_status
485 ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
486 			       bool suspend)
487 {
488 	struct ice_aqc_suspend_resume_elem *buf;
489 	u16 i, buf_size, num_elem_ret = 0;
490 	enum ice_status status;
491 
492 	buf_size = sizeof(*buf) * num_nodes;
493 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
494 	if (!buf)
495 		return ICE_ERR_NO_MEMORY;
496 
497 	for (i = 0; i < num_nodes; i++)
498 		buf->teid[i] = cpu_to_le32(node_teids[i]);
499 
500 	if (suspend)
501 		status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
502 						    buf_size, &num_elem_ret,
503 						    NULL);
504 	else
505 		status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
506 						   buf_size, &num_elem_ret,
507 						   NULL);
508 	if (status || num_elem_ret != num_nodes)
509 		ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
510 
511 	devm_kfree(ice_hw_to_dev(hw), buf);
512 	return status;
513 }
514 
515 /**
516  * ice_alloc_lan_q_ctx - allocate LAN queue contexts for the given VSI and TC
517  * @hw: pointer to the HW struct
518  * @vsi_handle: VSI handle
519  * @tc: TC number
520  * @new_numqs: number of queues
521  */
522 static enum ice_status
523 ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
524 {
525 	struct ice_vsi_ctx *vsi_ctx;
526 	struct ice_q_ctx *q_ctx;
527 
528 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
529 	if (!vsi_ctx)
530 		return ICE_ERR_PARAM;
531 	/* allocate LAN queue contexts */
532 	if (!vsi_ctx->lan_q_ctx[tc]) {
533 		vsi_ctx->lan_q_ctx[tc] = devm_kcalloc(ice_hw_to_dev(hw),
534 						      new_numqs,
535 						      sizeof(*q_ctx),
536 						      GFP_KERNEL);
537 		if (!vsi_ctx->lan_q_ctx[tc])
538 			return ICE_ERR_NO_MEMORY;
539 		vsi_ctx->num_lan_q_entries[tc] = new_numqs;
540 		return 0;
541 	}
542 	/* num queues are increased, update the queue contexts */
543 	if (new_numqs > vsi_ctx->num_lan_q_entries[tc]) {
544 		u16 prev_num = vsi_ctx->num_lan_q_entries[tc];
545 
546 		q_ctx = devm_kcalloc(ice_hw_to_dev(hw), new_numqs,
547 				     sizeof(*q_ctx), GFP_KERNEL);
548 		if (!q_ctx)
549 			return ICE_ERR_NO_MEMORY;
550 		memcpy(q_ctx, vsi_ctx->lan_q_ctx[tc],
551 		       prev_num * sizeof(*q_ctx));
552 		devm_kfree(ice_hw_to_dev(hw), vsi_ctx->lan_q_ctx[tc]);
553 		vsi_ctx->lan_q_ctx[tc] = q_ctx;
554 		vsi_ctx->num_lan_q_entries[tc] = new_numqs;
555 	}
556 	return 0;
557 }
558 
559 /**
560  * ice_sched_clear_agg - clears the aggregator related information
561  * @hw: pointer to the hardware structure
562  *
563  * This function removes aggregator list and free up aggregator related memory
564  * previously allocated.
565  */
566 void ice_sched_clear_agg(struct ice_hw *hw)
567 {
568 	struct ice_sched_agg_info *agg_info;
569 	struct ice_sched_agg_info *atmp;
570 
571 	list_for_each_entry_safe(agg_info, atmp, &hw->agg_list, list_entry) {
572 		struct ice_sched_agg_vsi_info *agg_vsi_info;
573 		struct ice_sched_agg_vsi_info *vtmp;
574 
575 		list_for_each_entry_safe(agg_vsi_info, vtmp,
576 					 &agg_info->agg_vsi_list, list_entry) {
577 			list_del(&agg_vsi_info->list_entry);
578 			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
579 		}
580 		list_del(&agg_info->list_entry);
581 		devm_kfree(ice_hw_to_dev(hw), agg_info);
582 	}
583 }
584 
585 /**
586  * ice_sched_clear_tx_topo - clears the scheduler tree nodes
587  * @pi: port information structure
588  *
589  * This function removes all the nodes from HW as well as from SW DB.
590  */
591 static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
592 {
593 	if (!pi)
594 		return;
595 	if (pi->root) {
596 		ice_free_sched_node(pi, pi->root);
597 		pi->root = NULL;
598 	}
599 }
600 
601 /**
602  * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
603  * @pi: port information structure
604  *
605  * Cleanup scheduling elements from SW DB
606  */
607 void ice_sched_clear_port(struct ice_port_info *pi)
608 {
609 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
610 		return;
611 
612 	pi->port_state = ICE_SCHED_PORT_STATE_INIT;
613 	mutex_lock(&pi->sched_lock);
614 	ice_sched_clear_tx_topo(pi);
615 	mutex_unlock(&pi->sched_lock);
616 	mutex_destroy(&pi->sched_lock);
617 }
618 
619 /**
620  * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
621  * @hw: pointer to the HW struct
622  *
623  * Cleanup scheduling elements from SW DB for all the ports
624  */
625 void ice_sched_cleanup_all(struct ice_hw *hw)
626 {
627 	if (!hw)
628 		return;
629 
630 	if (hw->layer_info) {
631 		devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
632 		hw->layer_info = NULL;
633 	}
634 
635 	if (hw->port_info)
636 		ice_sched_clear_port(hw->port_info);
637 
638 	hw->num_tx_sched_layers = 0;
639 	hw->num_tx_sched_phys_layers = 0;
640 	hw->flattened_layers = 0;
641 	hw->max_cgds = 0;
642 }
643 
644 /**
645  * ice_sched_add_elems - add nodes to HW and SW DB
646  * @pi: port information structure
647  * @tc_node: pointer to the branch node
648  * @parent: pointer to the parent node
649  * @layer: layer number to add nodes
650  * @num_nodes: number of nodes
651  * @num_nodes_added: pointer to num nodes added
652  * @first_node_teid: if new nodes are added then return the TEID of first node
653  *
654  * This function add nodes to HW as well as to SW DB for a given layer
655  */
656 static enum ice_status
657 ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
658 		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
659 		    u16 *num_nodes_added, u32 *first_node_teid)
660 {
661 	struct ice_sched_node *prev, *new_node;
662 	struct ice_aqc_add_elem *buf;
663 	u16 i, num_groups_added = 0;
664 	enum ice_status status = 0;
665 	struct ice_hw *hw = pi->hw;
666 	size_t buf_size;
667 	u32 teid;
668 
669 	buf_size = struct_size(buf, generic, num_nodes - 1);
670 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
671 	if (!buf)
672 		return ICE_ERR_NO_MEMORY;
673 
674 	buf->hdr.parent_teid = parent->info.node_teid;
675 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
676 	for (i = 0; i < num_nodes; i++) {
677 		buf->generic[i].parent_teid = parent->info.node_teid;
678 		buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
679 		buf->generic[i].data.valid_sections =
680 			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
681 			ICE_AQC_ELEM_VALID_EIR;
682 		buf->generic[i].data.generic = 0;
683 		buf->generic[i].data.cir_bw.bw_profile_idx =
684 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
685 		buf->generic[i].data.cir_bw.bw_alloc =
686 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
687 		buf->generic[i].data.eir_bw.bw_profile_idx =
688 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
689 		buf->generic[i].data.eir_bw.bw_alloc =
690 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
691 	}
692 
693 	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
694 					&num_groups_added, NULL);
695 	if (status || num_groups_added != 1) {
696 		ice_debug(hw, ICE_DBG_SCHED, "add node failed FW Error %d\n",
697 			  hw->adminq.sq_last_status);
698 		devm_kfree(ice_hw_to_dev(hw), buf);
699 		return ICE_ERR_CFG;
700 	}
701 
702 	*num_nodes_added = num_nodes;
703 	/* add nodes to the SW DB */
704 	for (i = 0; i < num_nodes; i++) {
705 		status = ice_sched_add_node(pi, layer, &buf->generic[i]);
706 		if (status) {
707 			ice_debug(hw, ICE_DBG_SCHED,
708 				  "add nodes in SW DB failed status =%d\n",
709 				  status);
710 			break;
711 		}
712 
713 		teid = le32_to_cpu(buf->generic[i].node_teid);
714 		new_node = ice_sched_find_node_by_teid(parent, teid);
715 		if (!new_node) {
716 			ice_debug(hw, ICE_DBG_SCHED,
717 				  "Node is missing for teid =%d\n", teid);
718 			break;
719 		}
720 
721 		new_node->sibling = NULL;
722 		new_node->tc_num = tc_node->tc_num;
723 
724 		/* add it to previous node sibling pointer */
725 		/* Note: siblings are not linked across branches */
726 		prev = ice_sched_get_first_node(pi, tc_node, layer);
727 		if (prev && prev != new_node) {
728 			while (prev->sibling)
729 				prev = prev->sibling;
730 			prev->sibling = new_node;
731 		}
732 
733 		/* initialize the sibling head */
734 		if (!pi->sib_head[tc_node->tc_num][layer])
735 			pi->sib_head[tc_node->tc_num][layer] = new_node;
736 
737 		if (i == 0)
738 			*first_node_teid = teid;
739 	}
740 
741 	devm_kfree(ice_hw_to_dev(hw), buf);
742 	return status;
743 }
744 
745 /**
746  * ice_sched_add_nodes_to_layer - Add nodes to a given layer
747  * @pi: port information structure
748  * @tc_node: pointer to TC node
749  * @parent: pointer to parent node
750  * @layer: layer number to add nodes
751  * @num_nodes: number of nodes to be added
752  * @first_node_teid: pointer to the first node TEID
753  * @num_nodes_added: pointer to number of nodes added
754  *
755  * This function add nodes to a given layer.
756  */
757 static enum ice_status
758 ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
759 			     struct ice_sched_node *tc_node,
760 			     struct ice_sched_node *parent, u8 layer,
761 			     u16 num_nodes, u32 *first_node_teid,
762 			     u16 *num_nodes_added)
763 {
764 	u32 *first_teid_ptr = first_node_teid;
765 	u16 new_num_nodes, max_child_nodes;
766 	enum ice_status status = 0;
767 	struct ice_hw *hw = pi->hw;
768 	u16 num_added = 0;
769 	u32 temp;
770 
771 	*num_nodes_added = 0;
772 
773 	if (!num_nodes)
774 		return status;
775 
776 	if (!parent || layer < hw->sw_entry_point_layer)
777 		return ICE_ERR_PARAM;
778 
779 	/* max children per node per layer */
780 	max_child_nodes = hw->max_children[parent->tx_sched_layer];
781 
782 	/* current number of children + required nodes exceed max children ? */
783 	if ((parent->num_children + num_nodes) > max_child_nodes) {
784 		/* Fail if the parent is a TC node */
785 		if (parent == tc_node)
786 			return ICE_ERR_CFG;
787 
788 		/* utilize all the spaces if the parent is not full */
789 		if (parent->num_children < max_child_nodes) {
790 			new_num_nodes = max_child_nodes - parent->num_children;
791 			/* this recursion is intentional, and wouldn't
792 			 * go more than 2 calls
793 			 */
794 			status = ice_sched_add_nodes_to_layer(pi, tc_node,
795 							      parent, layer,
796 							      new_num_nodes,
797 							      first_node_teid,
798 							      &num_added);
799 			if (status)
800 				return status;
801 
802 			*num_nodes_added += num_added;
803 		}
804 		/* Don't modify the first node TEID memory if the first node was
805 		 * added already in the above call. Instead send some temp
806 		 * memory for all other recursive calls.
807 		 */
808 		if (num_added)
809 			first_teid_ptr = &temp;
810 
811 		new_num_nodes = num_nodes - num_added;
812 
813 		/* This parent is full, try the next sibling */
814 		parent = parent->sibling;
815 
816 		/* this recursion is intentional, for 1024 queues
817 		 * per VSI, it goes max of 16 iterations.
818 		 * 1024 / 8 = 128 layer 8 nodes
819 		 * 128 /8 = 16 (add 8 nodes per iteration)
820 		 */
821 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
822 						      layer, new_num_nodes,
823 						      first_teid_ptr,
824 						      &num_added);
825 		*num_nodes_added += num_added;
826 		return status;
827 	}
828 
829 	status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
830 				     num_nodes_added, first_node_teid);
831 	return status;
832 }
833 
834 /**
835  * ice_sched_get_qgrp_layer - get the current queue group layer number
836  * @hw: pointer to the HW struct
837  *
838  * This function returns the current queue group layer number
839  */
840 static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
841 {
842 	/* It's always total layers - 1, the array is 0 relative so -2 */
843 	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
844 }
845 
846 /**
847  * ice_sched_get_vsi_layer - get the current VSI layer number
848  * @hw: pointer to the HW struct
849  *
850  * This function returns the current VSI layer number
851  */
852 static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
853 {
854 	/* Num Layers       VSI layer
855 	 *     9               6
856 	 *     7               4
857 	 *     5 or less       sw_entry_point_layer
858 	 */
859 	/* calculate the VSI layer based on number of layers. */
860 	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
861 		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
862 
863 		if (layer > hw->sw_entry_point_layer)
864 			return layer;
865 	}
866 	return hw->sw_entry_point_layer;
867 }
868 
869 /**
870  * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
871  * @pi: port information structure
872  *
873  * This function removes the leaf node that was created by the FW
874  * during initialization
875  */
876 static void ice_rm_dflt_leaf_node(struct ice_port_info *pi)
877 {
878 	struct ice_sched_node *node;
879 
880 	node = pi->root;
881 	while (node) {
882 		if (!node->num_children)
883 			break;
884 		node = node->children[0];
885 	}
886 	if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
887 		u32 teid = le32_to_cpu(node->info.node_teid);
888 		enum ice_status status;
889 
890 		/* remove the default leaf node */
891 		status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
892 		if (!status)
893 			ice_free_sched_node(pi, node);
894 	}
895 }
896 
897 /**
898  * ice_sched_rm_dflt_nodes - free the default nodes in the tree
899  * @pi: port information structure
900  *
901  * This function frees all the nodes except root and TC that were created by
902  * the FW during initialization
903  */
904 static void ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
905 {
906 	struct ice_sched_node *node;
907 
908 	ice_rm_dflt_leaf_node(pi);
909 
910 	/* remove the default nodes except TC and root nodes */
911 	node = pi->root;
912 	while (node) {
913 		if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
914 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
915 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
916 			ice_free_sched_node(pi, node);
917 			break;
918 		}
919 
920 		if (!node->num_children)
921 			break;
922 		node = node->children[0];
923 	}
924 }
925 
926 /**
927  * ice_sched_init_port - Initialize scheduler by querying information from FW
928  * @pi: port info structure for the tree to cleanup
929  *
930  * This function is the initial call to find the total number of Tx scheduler
931  * resources, default topology created by firmware and storing the information
932  * in SW DB.
933  */
934 enum ice_status ice_sched_init_port(struct ice_port_info *pi)
935 {
936 	struct ice_aqc_get_topo_elem *buf;
937 	enum ice_status status;
938 	struct ice_hw *hw;
939 	u8 num_branches;
940 	u16 num_elems;
941 	u8 i, j;
942 
943 	if (!pi)
944 		return ICE_ERR_PARAM;
945 	hw = pi->hw;
946 
947 	/* Query the Default Topology from FW */
948 	buf = devm_kzalloc(ice_hw_to_dev(hw), ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
949 	if (!buf)
950 		return ICE_ERR_NO_MEMORY;
951 
952 	/* Query default scheduling tree topology */
953 	status = ice_aq_get_dflt_topo(hw, pi->lport, buf, ICE_AQ_MAX_BUF_LEN,
954 				      &num_branches, NULL);
955 	if (status)
956 		goto err_init_port;
957 
958 	/* num_branches should be between 1-8 */
959 	if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
960 		ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
961 			  num_branches);
962 		status = ICE_ERR_PARAM;
963 		goto err_init_port;
964 	}
965 
966 	/* get the number of elements on the default/first branch */
967 	num_elems = le16_to_cpu(buf[0].hdr.num_elems);
968 
969 	/* num_elems should always be between 1-9 */
970 	if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
971 		ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
972 			  num_elems);
973 		status = ICE_ERR_PARAM;
974 		goto err_init_port;
975 	}
976 
977 	/* If the last node is a leaf node then the index of the queue group
978 	 * layer is two less than the number of elements.
979 	 */
980 	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
981 	    ICE_AQC_ELEM_TYPE_LEAF)
982 		pi->last_node_teid =
983 			le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
984 	else
985 		pi->last_node_teid =
986 			le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
987 
988 	/* Insert the Tx Sched root node */
989 	status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
990 	if (status)
991 		goto err_init_port;
992 
993 	/* Parse the default tree and cache the information */
994 	for (i = 0; i < num_branches; i++) {
995 		num_elems = le16_to_cpu(buf[i].hdr.num_elems);
996 
997 		/* Skip root element as already inserted */
998 		for (j = 1; j < num_elems; j++) {
999 			/* update the sw entry point */
1000 			if (buf[0].generic[j].data.elem_type ==
1001 			    ICE_AQC_ELEM_TYPE_ENTRY_POINT)
1002 				hw->sw_entry_point_layer = j;
1003 
1004 			status = ice_sched_add_node(pi, j, &buf[i].generic[j]);
1005 			if (status)
1006 				goto err_init_port;
1007 		}
1008 	}
1009 
1010 	/* Remove the default nodes. */
1011 	if (pi->root)
1012 		ice_sched_rm_dflt_nodes(pi);
1013 
1014 	/* initialize the port for handling the scheduler tree */
1015 	pi->port_state = ICE_SCHED_PORT_STATE_READY;
1016 	mutex_init(&pi->sched_lock);
1017 
1018 err_init_port:
1019 	if (status && pi->root) {
1020 		ice_free_sched_node(pi, pi->root);
1021 		pi->root = NULL;
1022 	}
1023 
1024 	devm_kfree(ice_hw_to_dev(hw), buf);
1025 	return status;
1026 }
1027 
1028 /**
1029  * ice_sched_query_res_alloc - query the FW for num of logical sched layers
1030  * @hw: pointer to the HW struct
1031  *
1032  * query FW for allocated scheduler resources and store in HW struct
1033  */
1034 enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
1035 {
1036 	struct ice_aqc_query_txsched_res_resp *buf;
1037 	enum ice_status status = 0;
1038 	__le16 max_sibl;
1039 	u16 i;
1040 
1041 	if (hw->layer_info)
1042 		return status;
1043 
1044 	buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
1045 	if (!buf)
1046 		return ICE_ERR_NO_MEMORY;
1047 
1048 	status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
1049 	if (status)
1050 		goto sched_query_out;
1051 
1052 	hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
1053 	hw->num_tx_sched_phys_layers =
1054 		le16_to_cpu(buf->sched_props.phys_levels);
1055 	hw->flattened_layers = buf->sched_props.flattening_bitmap;
1056 	hw->max_cgds = buf->sched_props.max_pf_cgds;
1057 
1058 	/* max sibling group size of current layer refers to the max children
1059 	 * of the below layer node.
1060 	 * layer 1 node max children will be layer 2 max sibling group size
1061 	 * layer 2 node max children will be layer 3 max sibling group size
1062 	 * and so on. This array will be populated from root (index 0) to
1063 	 * qgroup layer 7. Leaf node has no children.
1064 	 */
1065 	for (i = 0; i < hw->num_tx_sched_layers; i++) {
1066 		max_sibl = buf->layer_props[i].max_sibl_grp_sz;
1067 		hw->max_children[i] = le16_to_cpu(max_sibl);
1068 	}
1069 
1070 	hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
1071 				      (hw->num_tx_sched_layers *
1072 				       sizeof(*hw->layer_info)),
1073 				      GFP_KERNEL);
1074 	if (!hw->layer_info) {
1075 		status = ICE_ERR_NO_MEMORY;
1076 		goto sched_query_out;
1077 	}
1078 
1079 sched_query_out:
1080 	devm_kfree(ice_hw_to_dev(hw), buf);
1081 	return status;
1082 }
1083 
1084 /**
1085  * ice_sched_find_node_in_subtree - Find node in part of base node subtree
1086  * @hw: pointer to the HW struct
1087  * @base: pointer to the base node
1088  * @node: pointer to the node to search
1089  *
1090  * This function checks whether a given node is part of the base node
1091  * subtree or not
1092  */
1093 static bool
1094 ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
1095 			       struct ice_sched_node *node)
1096 {
1097 	u8 i;
1098 
1099 	for (i = 0; i < base->num_children; i++) {
1100 		struct ice_sched_node *child = base->children[i];
1101 
1102 		if (node == child)
1103 			return true;
1104 
1105 		if (child->tx_sched_layer > node->tx_sched_layer)
1106 			return false;
1107 
1108 		/* this recursion is intentional, and wouldn't
1109 		 * go more than 8 calls
1110 		 */
1111 		if (ice_sched_find_node_in_subtree(hw, child, node))
1112 			return true;
1113 	}
1114 	return false;
1115 }
1116 
1117 /**
1118  * ice_sched_get_free_qparent - Get a free LAN or RDMA queue group node
1119  * @pi: port information structure
1120  * @vsi_handle: software VSI handle
1121  * @tc: branch number
1122  * @owner: LAN or RDMA
1123  *
1124  * This function retrieves a free LAN or RDMA queue group node
1125  */
1126 struct ice_sched_node *
1127 ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
1128 			   u8 owner)
1129 {
1130 	struct ice_sched_node *vsi_node, *qgrp_node = NULL;
1131 	struct ice_vsi_ctx *vsi_ctx;
1132 	u16 max_children;
1133 	u8 qgrp_layer;
1134 
1135 	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
1136 	max_children = pi->hw->max_children[qgrp_layer];
1137 
1138 	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
1139 	if (!vsi_ctx)
1140 		return NULL;
1141 	vsi_node = vsi_ctx->sched.vsi_node[tc];
1142 	/* validate invalid VSI ID */
1143 	if (!vsi_node)
1144 		goto lan_q_exit;
1145 
1146 	/* get the first queue group node from VSI sub-tree */
1147 	qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
1148 	while (qgrp_node) {
1149 		/* make sure the qgroup node is part of the VSI subtree */
1150 		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
1151 			if (qgrp_node->num_children < max_children &&
1152 			    qgrp_node->owner == owner)
1153 				break;
1154 		qgrp_node = qgrp_node->sibling;
1155 	}
1156 
1157 lan_q_exit:
1158 	return qgrp_node;
1159 }
1160 
1161 /**
1162  * ice_sched_get_vsi_node - Get a VSI node based on VSI ID
1163  * @hw: pointer to the HW struct
1164  * @tc_node: pointer to the TC node
1165  * @vsi_handle: software VSI handle
1166  *
1167  * This function retrieves a VSI node for a given VSI ID from a given
1168  * TC branch
1169  */
1170 static struct ice_sched_node *
1171 ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
1172 		       u16 vsi_handle)
1173 {
1174 	struct ice_sched_node *node;
1175 	u8 vsi_layer;
1176 
1177 	vsi_layer = ice_sched_get_vsi_layer(hw);
1178 	node = ice_sched_get_first_node(hw->port_info, tc_node, vsi_layer);
1179 
1180 	/* Check whether it already exists */
1181 	while (node) {
1182 		if (node->vsi_handle == vsi_handle)
1183 			return node;
1184 		node = node->sibling;
1185 	}
1186 
1187 	return node;
1188 }
1189 
1190 /**
1191  * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
1192  * @hw: pointer to the HW struct
1193  * @num_qs: number of queues
1194  * @num_nodes: num nodes array
1195  *
1196  * This function calculates the number of VSI child nodes based on the
1197  * number of queues.
1198  */
1199 static void
1200 ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
1201 {
1202 	u16 num = num_qs;
1203 	u8 i, qgl, vsil;
1204 
1205 	qgl = ice_sched_get_qgrp_layer(hw);
1206 	vsil = ice_sched_get_vsi_layer(hw);
1207 
1208 	/* calculate num nodes from queue group to VSI layer */
1209 	for (i = qgl; i > vsil; i--) {
1210 		/* round to the next integer if there is a remainder */
1211 		num = DIV_ROUND_UP(num, hw->max_children[i]);
1212 
1213 		/* need at least one node */
1214 		num_nodes[i] = num ? num : 1;
1215 	}
1216 }
1217 
1218 /**
1219  * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
1220  * @pi: port information structure
1221  * @vsi_handle: software VSI handle
1222  * @tc_node: pointer to the TC node
1223  * @num_nodes: pointer to the num nodes that needs to be added per layer
1224  * @owner: node owner (LAN or RDMA)
1225  *
1226  * This function adds the VSI child nodes to tree. It gets called for
1227  * LAN and RDMA separately.
1228  */
1229 static enum ice_status
1230 ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
1231 			      struct ice_sched_node *tc_node, u16 *num_nodes,
1232 			      u8 owner)
1233 {
1234 	struct ice_sched_node *parent, *node;
1235 	struct ice_hw *hw = pi->hw;
1236 	enum ice_status status;
1237 	u32 first_node_teid;
1238 	u16 num_added = 0;
1239 	u8 i, qgl, vsil;
1240 
1241 	qgl = ice_sched_get_qgrp_layer(hw);
1242 	vsil = ice_sched_get_vsi_layer(hw);
1243 	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1244 	for (i = vsil + 1; i <= qgl; i++) {
1245 		if (!parent)
1246 			return ICE_ERR_CFG;
1247 
1248 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
1249 						      num_nodes[i],
1250 						      &first_node_teid,
1251 						      &num_added);
1252 		if (status || num_nodes[i] != num_added)
1253 			return ICE_ERR_CFG;
1254 
1255 		/* The newly added node can be a new parent for the next
1256 		 * layer nodes
1257 		 */
1258 		if (num_added) {
1259 			parent = ice_sched_find_node_by_teid(tc_node,
1260 							     first_node_teid);
1261 			node = parent;
1262 			while (node) {
1263 				node->owner = owner;
1264 				node = node->sibling;
1265 			}
1266 		} else {
1267 			parent = parent->children[0];
1268 		}
1269 	}
1270 
1271 	return 0;
1272 }
1273 
1274 /**
1275  * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
1276  * @hw: pointer to the HW struct
1277  * @tc_node: pointer to TC node
1278  * @num_nodes: pointer to num nodes array
1279  *
1280  * This function calculates the number of supported nodes needed to add this
1281  * VSI into Tx tree including the VSI, parent and intermediate nodes in below
1282  * layers
1283  */
1284 static void
1285 ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
1286 				 struct ice_sched_node *tc_node, u16 *num_nodes)
1287 {
1288 	struct ice_sched_node *node;
1289 	u8 vsil;
1290 	int i;
1291 
1292 	vsil = ice_sched_get_vsi_layer(hw);
1293 	for (i = vsil; i >= hw->sw_entry_point_layer; i--)
1294 		/* Add intermediate nodes if TC has no children and
1295 		 * need at least one node for VSI
1296 		 */
1297 		if (!tc_node->num_children || i == vsil) {
1298 			num_nodes[i]++;
1299 		} else {
1300 			/* If intermediate nodes are reached max children
1301 			 * then add a new one.
1302 			 */
1303 			node = ice_sched_get_first_node(hw->port_info, tc_node,
1304 							(u8)i);
1305 			/* scan all the siblings */
1306 			while (node) {
1307 				if (node->num_children < hw->max_children[i])
1308 					break;
1309 				node = node->sibling;
1310 			}
1311 
1312 			/* tree has one intermediate node to add this new VSI.
1313 			 * So no need to calculate supported nodes for below
1314 			 * layers.
1315 			 */
1316 			if (node)
1317 				break;
1318 			/* all the nodes are full, allocate a new one */
1319 			num_nodes[i]++;
1320 		}
1321 }
1322 
1323 /**
1324  * ice_sched_add_vsi_support_nodes - add VSI supported nodes into Tx tree
1325  * @pi: port information structure
1326  * @vsi_handle: software VSI handle
1327  * @tc_node: pointer to TC node
1328  * @num_nodes: pointer to num nodes array
1329  *
1330  * This function adds the VSI supported nodes into Tx tree including the
1331  * VSI, its parent and intermediate nodes in below layers
1332  */
1333 static enum ice_status
1334 ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
1335 				struct ice_sched_node *tc_node, u16 *num_nodes)
1336 {
1337 	struct ice_sched_node *parent = tc_node;
1338 	enum ice_status status;
1339 	u32 first_node_teid;
1340 	u16 num_added = 0;
1341 	u8 i, vsil;
1342 
1343 	if (!pi)
1344 		return ICE_ERR_PARAM;
1345 
1346 	vsil = ice_sched_get_vsi_layer(pi->hw);
1347 	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
1348 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
1349 						      i, num_nodes[i],
1350 						      &first_node_teid,
1351 						      &num_added);
1352 		if (status || num_nodes[i] != num_added)
1353 			return ICE_ERR_CFG;
1354 
1355 		/* The newly added node can be a new parent for the next
1356 		 * layer nodes
1357 		 */
1358 		if (num_added)
1359 			parent = ice_sched_find_node_by_teid(tc_node,
1360 							     first_node_teid);
1361 		else
1362 			parent = parent->children[0];
1363 
1364 		if (!parent)
1365 			return ICE_ERR_CFG;
1366 
1367 		if (i == vsil)
1368 			parent->vsi_handle = vsi_handle;
1369 	}
1370 
1371 	return 0;
1372 }
1373 
1374 /**
1375  * ice_sched_add_vsi_to_topo - add a new VSI into tree
1376  * @pi: port information structure
1377  * @vsi_handle: software VSI handle
1378  * @tc: TC number
1379  *
1380  * This function adds a new VSI into scheduler tree
1381  */
1382 static enum ice_status
1383 ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
1384 {
1385 	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1386 	struct ice_sched_node *tc_node;
1387 	struct ice_hw *hw = pi->hw;
1388 
1389 	tc_node = ice_sched_get_tc_node(pi, tc);
1390 	if (!tc_node)
1391 		return ICE_ERR_PARAM;
1392 
1393 	/* calculate number of supported nodes needed for this VSI */
1394 	ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
1395 
1396 	/* add VSI supported nodes to TC subtree */
1397 	return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
1398 					       num_nodes);
1399 }
1400 
1401 /**
1402  * ice_sched_update_vsi_child_nodes - update VSI child nodes
1403  * @pi: port information structure
1404  * @vsi_handle: software VSI handle
1405  * @tc: TC number
1406  * @new_numqs: new number of max queues
1407  * @owner: owner of this subtree
1408  *
1409  * This function updates the VSI child nodes based on the number of queues
1410  */
1411 static enum ice_status
1412 ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
1413 				 u8 tc, u16 new_numqs, u8 owner)
1414 {
1415 	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1416 	struct ice_sched_node *vsi_node;
1417 	struct ice_sched_node *tc_node;
1418 	struct ice_vsi_ctx *vsi_ctx;
1419 	enum ice_status status = 0;
1420 	struct ice_hw *hw = pi->hw;
1421 	u16 prev_numqs;
1422 
1423 	tc_node = ice_sched_get_tc_node(pi, tc);
1424 	if (!tc_node)
1425 		return ICE_ERR_CFG;
1426 
1427 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1428 	if (!vsi_node)
1429 		return ICE_ERR_CFG;
1430 
1431 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
1432 	if (!vsi_ctx)
1433 		return ICE_ERR_PARAM;
1434 
1435 	prev_numqs = vsi_ctx->sched.max_lanq[tc];
1436 	/* num queues are not changed or less than the previous number */
1437 	if (new_numqs <= prev_numqs)
1438 		return status;
1439 	status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
1440 	if (status)
1441 		return status;
1442 
1443 	if (new_numqs)
1444 		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
1445 	/* Keep the max number of queue configuration all the time. Update the
1446 	 * tree only if number of queues > previous number of queues. This may
1447 	 * leave some extra nodes in the tree if number of queues < previous
1448 	 * number but that wouldn't harm anything. Removing those extra nodes
1449 	 * may complicate the code if those nodes are part of SRL or
1450 	 * individually rate limited.
1451 	 */
1452 	status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
1453 					       new_num_nodes, owner);
1454 	if (status)
1455 		return status;
1456 	vsi_ctx->sched.max_lanq[tc] = new_numqs;
1457 
1458 	return 0;
1459 }
1460 
1461 /**
1462  * ice_sched_cfg_vsi - configure the new/existing VSI
1463  * @pi: port information structure
1464  * @vsi_handle: software VSI handle
1465  * @tc: TC number
1466  * @maxqs: max number of queues
1467  * @owner: LAN or RDMA
1468  * @enable: TC enabled or disabled
1469  *
1470  * This function adds/updates VSI nodes based on the number of queues. If TC is
1471  * enabled and VSI is in suspended state then resume the VSI back. If TC is
1472  * disabled then suspend the VSI if it is not already.
1473  */
1474 enum ice_status
1475 ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
1476 		  u8 owner, bool enable)
1477 {
1478 	struct ice_sched_node *vsi_node, *tc_node;
1479 	struct ice_vsi_ctx *vsi_ctx;
1480 	enum ice_status status = 0;
1481 	struct ice_hw *hw = pi->hw;
1482 
1483 	ice_debug(pi->hw, ICE_DBG_SCHED, "add/config VSI %d\n", vsi_handle);
1484 	tc_node = ice_sched_get_tc_node(pi, tc);
1485 	if (!tc_node)
1486 		return ICE_ERR_PARAM;
1487 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
1488 	if (!vsi_ctx)
1489 		return ICE_ERR_PARAM;
1490 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1491 
1492 	/* suspend the VSI if TC is not enabled */
1493 	if (!enable) {
1494 		if (vsi_node && vsi_node->in_use) {
1495 			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1496 
1497 			status = ice_sched_suspend_resume_elems(hw, 1, &teid,
1498 								true);
1499 			if (!status)
1500 				vsi_node->in_use = false;
1501 		}
1502 		return status;
1503 	}
1504 
1505 	/* TC is enabled, if it is a new VSI then add it to the tree */
1506 	if (!vsi_node) {
1507 		status = ice_sched_add_vsi_to_topo(pi, vsi_handle, tc);
1508 		if (status)
1509 			return status;
1510 
1511 		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1512 		if (!vsi_node)
1513 			return ICE_ERR_CFG;
1514 
1515 		vsi_ctx->sched.vsi_node[tc] = vsi_node;
1516 		vsi_node->in_use = true;
1517 		/* invalidate the max queues whenever VSI gets added first time
1518 		 * into the scheduler tree (boot or after reset). We need to
1519 		 * recreate the child nodes all the time in these cases.
1520 		 */
1521 		vsi_ctx->sched.max_lanq[tc] = 0;
1522 	}
1523 
1524 	/* update the VSI child nodes */
1525 	status = ice_sched_update_vsi_child_nodes(pi, vsi_handle, tc, maxqs,
1526 						  owner);
1527 	if (status)
1528 		return status;
1529 
1530 	/* TC is enabled, resume the VSI if it is in the suspend state */
1531 	if (!vsi_node->in_use) {
1532 		u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1533 
1534 		status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
1535 		if (!status)
1536 			vsi_node->in_use = true;
1537 	}
1538 
1539 	return status;
1540 }
1541 
1542 /**
1543  * ice_sched_rm_agg_vsi_entry - remove aggregator related VSI info entry
1544  * @pi: port information structure
1545  * @vsi_handle: software VSI handle
1546  *
1547  * This function removes single aggregator VSI info entry from
1548  * aggregator list.
1549  */
1550 static void
1551 ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
1552 {
1553 	struct ice_sched_agg_info *agg_info;
1554 	struct ice_sched_agg_info *atmp;
1555 
1556 	list_for_each_entry_safe(agg_info, atmp, &pi->hw->agg_list,
1557 				 list_entry) {
1558 		struct ice_sched_agg_vsi_info *agg_vsi_info;
1559 		struct ice_sched_agg_vsi_info *vtmp;
1560 
1561 		list_for_each_entry_safe(agg_vsi_info, vtmp,
1562 					 &agg_info->agg_vsi_list, list_entry)
1563 			if (agg_vsi_info->vsi_handle == vsi_handle) {
1564 				list_del(&agg_vsi_info->list_entry);
1565 				devm_kfree(ice_hw_to_dev(pi->hw),
1566 					   agg_vsi_info);
1567 				return;
1568 			}
1569 	}
1570 }
1571 
1572 /**
1573  * ice_sched_is_leaf_node_present - check for a leaf node in the sub-tree
1574  * @node: pointer to the sub-tree node
1575  *
1576  * This function checks for a leaf node presence in a given sub-tree node.
1577  */
1578 static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
1579 {
1580 	u8 i;
1581 
1582 	for (i = 0; i < node->num_children; i++)
1583 		if (ice_sched_is_leaf_node_present(node->children[i]))
1584 			return true;
1585 	/* check for a leaf node */
1586 	return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF);
1587 }
1588 
1589 /**
1590  * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
1591  * @pi: port information structure
1592  * @vsi_handle: software VSI handle
1593  * @owner: LAN or RDMA
1594  *
1595  * This function removes the VSI and its LAN or RDMA children nodes from the
1596  * scheduler tree.
1597  */
1598 static enum ice_status
1599 ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
1600 {
1601 	enum ice_status status = ICE_ERR_PARAM;
1602 	struct ice_vsi_ctx *vsi_ctx;
1603 	u8 i;
1604 
1605 	ice_debug(pi->hw, ICE_DBG_SCHED, "removing VSI %d\n", vsi_handle);
1606 	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
1607 		return status;
1608 	mutex_lock(&pi->sched_lock);
1609 	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
1610 	if (!vsi_ctx)
1611 		goto exit_sched_rm_vsi_cfg;
1612 
1613 	ice_for_each_traffic_class(i) {
1614 		struct ice_sched_node *vsi_node, *tc_node;
1615 		u8 j = 0;
1616 
1617 		tc_node = ice_sched_get_tc_node(pi, i);
1618 		if (!tc_node)
1619 			continue;
1620 
1621 		vsi_node = ice_sched_get_vsi_node(pi->hw, tc_node, vsi_handle);
1622 		if (!vsi_node)
1623 			continue;
1624 
1625 		if (ice_sched_is_leaf_node_present(vsi_node)) {
1626 			ice_debug(pi->hw, ICE_DBG_SCHED,
1627 				  "VSI has leaf nodes in TC %d\n", i);
1628 			status = ICE_ERR_IN_USE;
1629 			goto exit_sched_rm_vsi_cfg;
1630 		}
1631 		while (j < vsi_node->num_children) {
1632 			if (vsi_node->children[j]->owner == owner) {
1633 				ice_free_sched_node(pi, vsi_node->children[j]);
1634 
1635 				/* reset the counter again since the num
1636 				 * children will be updated after node removal
1637 				 */
1638 				j = 0;
1639 			} else {
1640 				j++;
1641 			}
1642 		}
1643 		/* remove the VSI if it has no children */
1644 		if (!vsi_node->num_children) {
1645 			ice_free_sched_node(pi, vsi_node);
1646 			vsi_ctx->sched.vsi_node[i] = NULL;
1647 
1648 			/* clean up aggregator related VSI info if any */
1649 			ice_sched_rm_agg_vsi_info(pi, vsi_handle);
1650 		}
1651 		if (owner == ICE_SCHED_NODE_OWNER_LAN)
1652 			vsi_ctx->sched.max_lanq[i] = 0;
1653 	}
1654 	status = 0;
1655 
1656 exit_sched_rm_vsi_cfg:
1657 	mutex_unlock(&pi->sched_lock);
1658 	return status;
1659 }
1660 
1661 /**
1662  * ice_rm_vsi_lan_cfg - remove VSI and its LAN children nodes
1663  * @pi: port information structure
1664  * @vsi_handle: software VSI handle
1665  *
1666  * This function clears the VSI and its LAN children nodes from scheduler tree
1667  * for all TCs.
1668  */
1669 enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
1670 {
1671 	return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
1672 }
1673