xref: /linux/drivers/net/ethernet/intel/ice/ice_sched.c (revision dd5b2498d845f925904cb2afabb6ba11bfc317c5)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018, Intel Corporation. */
3 
4 #include "ice_sched.h"
5 
6 /**
7  * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
8  * @pi: port information structure
9  * @info: Scheduler element information from firmware
10  *
11  * This function inserts the root node of the scheduling tree topology
12  * to the SW DB.
13  */
14 static enum ice_status
15 ice_sched_add_root_node(struct ice_port_info *pi,
16 			struct ice_aqc_txsched_elem_data *info)
17 {
18 	struct ice_sched_node *root;
19 	struct ice_hw *hw;
20 
21 	if (!pi)
22 		return ICE_ERR_PARAM;
23 
24 	hw = pi->hw;
25 
26 	root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
27 	if (!root)
28 		return ICE_ERR_NO_MEMORY;
29 
30 	/* coverity[suspicious_sizeof] */
31 	root->children = devm_kcalloc(ice_hw_to_dev(hw), hw->max_children[0],
32 				      sizeof(*root), GFP_KERNEL);
33 	if (!root->children) {
34 		devm_kfree(ice_hw_to_dev(hw), root);
35 		return ICE_ERR_NO_MEMORY;
36 	}
37 
38 	memcpy(&root->info, info, sizeof(*info));
39 	pi->root = root;
40 	return 0;
41 }
42 
43 /**
44  * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
45  * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
46  * @teid: node teid to search
47  *
48  * This function searches for a node matching the teid in the scheduling tree
49  * from the SW DB. The search is recursive and is restricted by the number of
50  * layers it has searched through; stopping at the max supported layer.
51  *
52  * This function needs to be called when holding the port_info->sched_lock
53  */
54 struct ice_sched_node *
55 ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
56 {
57 	u16 i;
58 
59 	/* The TEID is same as that of the start_node */
60 	if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
61 		return start_node;
62 
63 	/* The node has no children or is at the max layer */
64 	if (!start_node->num_children ||
65 	    start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
66 	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
67 		return NULL;
68 
69 	/* Check if teid matches to any of the children nodes */
70 	for (i = 0; i < start_node->num_children; i++)
71 		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
72 			return start_node->children[i];
73 
74 	/* Search within each child's sub-tree */
75 	for (i = 0; i < start_node->num_children; i++) {
76 		struct ice_sched_node *tmp;
77 
78 		tmp = ice_sched_find_node_by_teid(start_node->children[i],
79 						  teid);
80 		if (tmp)
81 			return tmp;
82 	}
83 
84 	return NULL;
85 }
86 
87 /**
88  * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
89  * @hw: pointer to the hw struct
90  * @cmd_opc: cmd opcode
91  * @elems_req: number of elements to request
92  * @buf: pointer to buffer
93  * @buf_size: buffer size in bytes
94  * @elems_resp: returns total number of elements response
95  * @cd: pointer to command details structure or NULL
96  *
97  * This function sends a scheduling elements cmd (cmd_opc)
98  */
99 static enum ice_status
100 ice_aqc_send_sched_elem_cmd(struct ice_hw *hw, enum ice_adminq_opc cmd_opc,
101 			    u16 elems_req, void *buf, u16 buf_size,
102 			    u16 *elems_resp, struct ice_sq_cd *cd)
103 {
104 	struct ice_aqc_sched_elem_cmd *cmd;
105 	struct ice_aq_desc desc;
106 	enum ice_status status;
107 
108 	cmd = &desc.params.sched_elem_cmd;
109 	ice_fill_dflt_direct_cmd_desc(&desc, cmd_opc);
110 	cmd->num_elem_req = cpu_to_le16(elems_req);
111 	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
112 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
113 	if (!status && elems_resp)
114 		*elems_resp = le16_to_cpu(cmd->num_elem_resp);
115 
116 	return status;
117 }
118 
119 /**
120  * ice_aq_query_sched_elems - query scheduler elements
121  * @hw: pointer to the hw struct
122  * @elems_req: number of elements to query
123  * @buf: pointer to buffer
124  * @buf_size: buffer size in bytes
125  * @elems_ret: returns total number of elements returned
126  * @cd: pointer to command details structure or NULL
127  *
128  * Query scheduling elements (0x0404)
129  */
130 static enum ice_status
131 ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
132 			 struct ice_aqc_get_elem *buf, u16 buf_size,
133 			 u16 *elems_ret, struct ice_sq_cd *cd)
134 {
135 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_get_sched_elems,
136 					   elems_req, (void *)buf, buf_size,
137 					   elems_ret, cd);
138 }
139 
140 /**
141  * ice_sched_query_elem - query element information from hw
142  * @hw: pointer to the hw struct
143  * @node_teid: node teid to be queried
144  * @buf: buffer to element information
145  *
146  * This function queries HW element information
147  */
148 static enum ice_status
149 ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
150 		     struct ice_aqc_get_elem *buf)
151 {
152 	u16 buf_size, num_elem_ret = 0;
153 	enum ice_status status;
154 
155 	buf_size = sizeof(*buf);
156 	memset(buf, 0, buf_size);
157 	buf->generic[0].node_teid = cpu_to_le32(node_teid);
158 	status = ice_aq_query_sched_elems(hw, 1, buf, buf_size, &num_elem_ret,
159 					  NULL);
160 	if (status || num_elem_ret != 1)
161 		ice_debug(hw, ICE_DBG_SCHED, "query element failed\n");
162 	return status;
163 }
164 
165 /**
166  * ice_sched_add_node - Insert the Tx scheduler node in SW DB
167  * @pi: port information structure
168  * @layer: Scheduler layer of the node
169  * @info: Scheduler element information from firmware
170  *
171  * This function inserts a scheduler node to the SW DB.
172  */
173 enum ice_status
174 ice_sched_add_node(struct ice_port_info *pi, u8 layer,
175 		   struct ice_aqc_txsched_elem_data *info)
176 {
177 	struct ice_sched_node *parent;
178 	struct ice_aqc_get_elem elem;
179 	struct ice_sched_node *node;
180 	enum ice_status status;
181 	struct ice_hw *hw;
182 
183 	if (!pi)
184 		return ICE_ERR_PARAM;
185 
186 	hw = pi->hw;
187 
188 	/* A valid parent node should be there */
189 	parent = ice_sched_find_node_by_teid(pi->root,
190 					     le32_to_cpu(info->parent_teid));
191 	if (!parent) {
192 		ice_debug(hw, ICE_DBG_SCHED,
193 			  "Parent Node not found for parent_teid=0x%x\n",
194 			  le32_to_cpu(info->parent_teid));
195 		return ICE_ERR_PARAM;
196 	}
197 
198 	/* query the current node information from FW  before additing it
199 	 * to the SW DB
200 	 */
201 	status = ice_sched_query_elem(hw, le32_to_cpu(info->node_teid), &elem);
202 	if (status)
203 		return status;
204 
205 	node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
206 	if (!node)
207 		return ICE_ERR_NO_MEMORY;
208 	if (hw->max_children[layer]) {
209 		/* coverity[suspicious_sizeof] */
210 		node->children = devm_kcalloc(ice_hw_to_dev(hw),
211 					      hw->max_children[layer],
212 					      sizeof(*node), GFP_KERNEL);
213 		if (!node->children) {
214 			devm_kfree(ice_hw_to_dev(hw), node);
215 			return ICE_ERR_NO_MEMORY;
216 		}
217 	}
218 
219 	node->in_use = true;
220 	node->parent = parent;
221 	node->tx_sched_layer = layer;
222 	parent->children[parent->num_children++] = node;
223 	memcpy(&node->info, &elem.generic[0], sizeof(node->info));
224 	return 0;
225 }
226 
227 /**
228  * ice_aq_delete_sched_elems - delete scheduler elements
229  * @hw: pointer to the hw struct
230  * @grps_req: number of groups to delete
231  * @buf: pointer to buffer
232  * @buf_size: buffer size in bytes
233  * @grps_del: returns total number of elements deleted
234  * @cd: pointer to command details structure or NULL
235  *
236  * Delete scheduling elements (0x040F)
237  */
238 static enum ice_status
239 ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
240 			  struct ice_aqc_delete_elem *buf, u16 buf_size,
241 			  u16 *grps_del, struct ice_sq_cd *cd)
242 {
243 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_delete_sched_elems,
244 					   grps_req, (void *)buf, buf_size,
245 					   grps_del, cd);
246 }
247 
248 /**
249  * ice_sched_remove_elems - remove nodes from hw
250  * @hw: pointer to the hw struct
251  * @parent: pointer to the parent node
252  * @num_nodes: number of nodes
253  * @node_teids: array of node teids to be deleted
254  *
255  * This function remove nodes from hw
256  */
257 static enum ice_status
258 ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
259 		       u16 num_nodes, u32 *node_teids)
260 {
261 	struct ice_aqc_delete_elem *buf;
262 	u16 i, num_groups_removed = 0;
263 	enum ice_status status;
264 	u16 buf_size;
265 
266 	buf_size = sizeof(*buf) + sizeof(u32) * (num_nodes - 1);
267 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
268 	if (!buf)
269 		return ICE_ERR_NO_MEMORY;
270 
271 	buf->hdr.parent_teid = parent->info.node_teid;
272 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
273 	for (i = 0; i < num_nodes; i++)
274 		buf->teid[i] = cpu_to_le32(node_teids[i]);
275 
276 	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
277 					   &num_groups_removed, NULL);
278 	if (status || num_groups_removed != 1)
279 		ice_debug(hw, ICE_DBG_SCHED, "remove node failed FW error %d\n",
280 			  hw->adminq.sq_last_status);
281 
282 	devm_kfree(ice_hw_to_dev(hw), buf);
283 	return status;
284 }
285 
286 /**
287  * ice_sched_get_first_node - get the first node of the given layer
288  * @hw: pointer to the hw struct
289  * @parent: pointer the base node of the subtree
290  * @layer: layer number
291  *
292  * This function retrieves the first node of the given layer from the subtree
293  */
294 static struct ice_sched_node *
295 ice_sched_get_first_node(struct ice_hw *hw, struct ice_sched_node *parent,
296 			 u8 layer)
297 {
298 	u8 i;
299 
300 	if (layer < hw->sw_entry_point_layer)
301 		return NULL;
302 	for (i = 0; i < parent->num_children; i++) {
303 		struct ice_sched_node *node = parent->children[i];
304 
305 		if (node) {
306 			if (node->tx_sched_layer == layer)
307 				return node;
308 			/* this recursion is intentional, and wouldn't
309 			 * go more than 9 calls
310 			 */
311 			return ice_sched_get_first_node(hw, node, layer);
312 		}
313 	}
314 	return NULL;
315 }
316 
317 /**
318  * ice_sched_get_tc_node - get pointer to TC node
319  * @pi: port information structure
320  * @tc: TC number
321  *
322  * This function returns the TC node pointer
323  */
324 struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
325 {
326 	u8 i;
327 
328 	if (!pi)
329 		return NULL;
330 	for (i = 0; i < pi->root->num_children; i++)
331 		if (pi->root->children[i]->tc_num == tc)
332 			return pi->root->children[i];
333 	return NULL;
334 }
335 
336 /**
337  * ice_free_sched_node - Free a Tx scheduler node from SW DB
338  * @pi: port information structure
339  * @node: pointer to the ice_sched_node struct
340  *
341  * This function frees up a node from SW DB as well as from HW
342  *
343  * This function needs to be called with the port_info->sched_lock held
344  */
345 void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
346 {
347 	struct ice_sched_node *parent;
348 	struct ice_hw *hw = pi->hw;
349 	u8 i, j;
350 
351 	/* Free the children before freeing up the parent node
352 	 * The parent array is updated below and that shifts the nodes
353 	 * in the array. So always pick the first child if num children > 0
354 	 */
355 	while (node->num_children)
356 		ice_free_sched_node(pi, node->children[0]);
357 
358 	/* Leaf, TC and root nodes can't be deleted by SW */
359 	if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
360 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
361 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
362 	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
363 		u32 teid = le32_to_cpu(node->info.node_teid);
364 
365 		ice_sched_remove_elems(hw, node->parent, 1, &teid);
366 	}
367 	parent = node->parent;
368 	/* root has no parent */
369 	if (parent) {
370 		struct ice_sched_node *p, *tc_node;
371 
372 		/* update the parent */
373 		for (i = 0; i < parent->num_children; i++)
374 			if (parent->children[i] == node) {
375 				for (j = i + 1; j < parent->num_children; j++)
376 					parent->children[j - 1] =
377 						parent->children[j];
378 				parent->num_children--;
379 				break;
380 			}
381 
382 		/* search for previous sibling that points to this node and
383 		 * remove the reference
384 		 */
385 		tc_node = ice_sched_get_tc_node(pi, node->tc_num);
386 		if (!tc_node) {
387 			ice_debug(hw, ICE_DBG_SCHED,
388 				  "Invalid TC number %d\n", node->tc_num);
389 			goto err_exit;
390 		}
391 		p = ice_sched_get_first_node(hw, tc_node, node->tx_sched_layer);
392 		while (p) {
393 			if (p->sibling == node) {
394 				p->sibling = node->sibling;
395 				break;
396 			}
397 			p = p->sibling;
398 		}
399 	}
400 err_exit:
401 	/* leaf nodes have no children */
402 	if (node->children)
403 		devm_kfree(ice_hw_to_dev(hw), node->children);
404 	devm_kfree(ice_hw_to_dev(hw), node);
405 }
406 
407 /**
408  * ice_aq_get_dflt_topo - gets default scheduler topology
409  * @hw: pointer to the hw struct
410  * @lport: logical port number
411  * @buf: pointer to buffer
412  * @buf_size: buffer size in bytes
413  * @num_branches: returns total number of queue to port branches
414  * @cd: pointer to command details structure or NULL
415  *
416  * Get default scheduler topology (0x400)
417  */
418 static enum ice_status
419 ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
420 		     struct ice_aqc_get_topo_elem *buf, u16 buf_size,
421 		     u8 *num_branches, struct ice_sq_cd *cd)
422 {
423 	struct ice_aqc_get_topo *cmd;
424 	struct ice_aq_desc desc;
425 	enum ice_status status;
426 
427 	cmd = &desc.params.get_topo;
428 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
429 	cmd->port_num = lport;
430 	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
431 	if (!status && num_branches)
432 		*num_branches = cmd->num_branches;
433 
434 	return status;
435 }
436 
437 /**
438  * ice_aq_add_sched_elems - adds scheduling element
439  * @hw: pointer to the hw struct
440  * @grps_req: the number of groups that are requested to be added
441  * @buf: pointer to buffer
442  * @buf_size: buffer size in bytes
443  * @grps_added: returns total number of groups added
444  * @cd: pointer to command details structure or NULL
445  *
446  * Add scheduling elements (0x0401)
447  */
448 static enum ice_status
449 ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
450 		       struct ice_aqc_add_elem *buf, u16 buf_size,
451 		       u16 *grps_added, struct ice_sq_cd *cd)
452 {
453 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_add_sched_elems,
454 					   grps_req, (void *)buf, buf_size,
455 					   grps_added, cd);
456 }
457 
458 /**
459  * ice_aq_suspend_sched_elems - suspend scheduler elements
460  * @hw: pointer to the hw struct
461  * @elems_req: number of elements to suspend
462  * @buf: pointer to buffer
463  * @buf_size: buffer size in bytes
464  * @elems_ret: returns total number of elements suspended
465  * @cd: pointer to command details structure or NULL
466  *
467  * Suspend scheduling elements (0x0409)
468  */
469 static enum ice_status
470 ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req,
471 			   struct ice_aqc_suspend_resume_elem *buf,
472 			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
473 {
474 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_suspend_sched_elems,
475 					   elems_req, (void *)buf, buf_size,
476 					   elems_ret, cd);
477 }
478 
479 /**
480  * ice_aq_resume_sched_elems - resume scheduler elements
481  * @hw: pointer to the hw struct
482  * @elems_req: number of elements to resume
483  * @buf: pointer to buffer
484  * @buf_size: buffer size in bytes
485  * @elems_ret: returns total number of elements resumed
486  * @cd: pointer to command details structure or NULL
487  *
488  * resume scheduling elements (0x040A)
489  */
490 static enum ice_status
491 ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req,
492 			  struct ice_aqc_suspend_resume_elem *buf,
493 			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
494 {
495 	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_resume_sched_elems,
496 					   elems_req, (void *)buf, buf_size,
497 					   elems_ret, cd);
498 }
499 
500 /**
501  * ice_aq_query_sched_res - query scheduler resource
502  * @hw: pointer to the hw struct
503  * @buf_size: buffer size in bytes
504  * @buf: pointer to buffer
505  * @cd: pointer to command details structure or NULL
506  *
507  * Query scheduler resource allocation (0x0412)
508  */
509 static enum ice_status
510 ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
511 		       struct ice_aqc_query_txsched_res_resp *buf,
512 		       struct ice_sq_cd *cd)
513 {
514 	struct ice_aq_desc desc;
515 
516 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
517 	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
518 }
519 
520 /**
521  * ice_sched_suspend_resume_elems - suspend or resume hw nodes
522  * @hw: pointer to the hw struct
523  * @num_nodes: number of nodes
524  * @node_teids: array of node teids to be suspended or resumed
525  * @suspend: true means suspend / false means resume
526  *
527  * This function suspends or resumes hw nodes
528  */
529 static enum ice_status
530 ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
531 			       bool suspend)
532 {
533 	struct ice_aqc_suspend_resume_elem *buf;
534 	u16 i, buf_size, num_elem_ret = 0;
535 	enum ice_status status;
536 
537 	buf_size = sizeof(*buf) * num_nodes;
538 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
539 	if (!buf)
540 		return ICE_ERR_NO_MEMORY;
541 
542 	for (i = 0; i < num_nodes; i++)
543 		buf->teid[i] = cpu_to_le32(node_teids[i]);
544 
545 	if (suspend)
546 		status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
547 						    buf_size, &num_elem_ret,
548 						    NULL);
549 	else
550 		status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
551 						   buf_size, &num_elem_ret,
552 						   NULL);
553 	if (status || num_elem_ret != num_nodes)
554 		ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
555 
556 	devm_kfree(ice_hw_to_dev(hw), buf);
557 	return status;
558 }
559 
560 /**
561  * ice_sched_clear_agg - clears the agg related information
562  * @hw: pointer to the hardware structure
563  *
564  * This function removes agg list and free up agg related memory
565  * previously allocated.
566  */
567 void ice_sched_clear_agg(struct ice_hw *hw)
568 {
569 	struct ice_sched_agg_info *agg_info;
570 	struct ice_sched_agg_info *atmp;
571 
572 	list_for_each_entry_safe(agg_info, atmp, &hw->agg_list, list_entry) {
573 		struct ice_sched_agg_vsi_info *agg_vsi_info;
574 		struct ice_sched_agg_vsi_info *vtmp;
575 
576 		list_for_each_entry_safe(agg_vsi_info, vtmp,
577 					 &agg_info->agg_vsi_list, list_entry) {
578 			list_del(&agg_vsi_info->list_entry);
579 			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
580 		}
581 		list_del(&agg_info->list_entry);
582 		devm_kfree(ice_hw_to_dev(hw), agg_info);
583 	}
584 }
585 
586 /**
587  * ice_sched_clear_tx_topo - clears the scheduler tree nodes
588  * @pi: port information structure
589  *
590  * This function removes all the nodes from HW as well as from SW DB.
591  */
592 static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
593 {
594 	if (!pi)
595 		return;
596 	if (pi->root) {
597 		ice_free_sched_node(pi, pi->root);
598 		pi->root = NULL;
599 	}
600 }
601 
602 /**
603  * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
604  * @pi: port information structure
605  *
606  * Cleanup scheduling elements from SW DB
607  */
608 void ice_sched_clear_port(struct ice_port_info *pi)
609 {
610 	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
611 		return;
612 
613 	pi->port_state = ICE_SCHED_PORT_STATE_INIT;
614 	mutex_lock(&pi->sched_lock);
615 	ice_sched_clear_tx_topo(pi);
616 	mutex_unlock(&pi->sched_lock);
617 	mutex_destroy(&pi->sched_lock);
618 }
619 
620 /**
621  * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
622  * @hw: pointer to the hw struct
623  *
624  * Cleanup scheduling elements from SW DB for all the ports
625  */
626 void ice_sched_cleanup_all(struct ice_hw *hw)
627 {
628 	if (!hw)
629 		return;
630 
631 	if (hw->layer_info) {
632 		devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
633 		hw->layer_info = NULL;
634 	}
635 
636 	if (hw->port_info)
637 		ice_sched_clear_port(hw->port_info);
638 
639 	hw->num_tx_sched_layers = 0;
640 	hw->num_tx_sched_phys_layers = 0;
641 	hw->flattened_layers = 0;
642 	hw->max_cgds = 0;
643 }
644 
645 /**
646  * ice_sched_add_elems - add nodes to hw and SW DB
647  * @pi: port information structure
648  * @tc_node: pointer to the branch node
649  * @parent: pointer to the parent node
650  * @layer: layer number to add nodes
651  * @num_nodes: number of nodes
652  * @num_nodes_added: pointer to num nodes added
653  * @first_node_teid: if new nodes are added then return the teid of first node
654  *
655  * This function add nodes to hw as well as to SW DB for a given layer
656  */
657 static enum ice_status
658 ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
659 		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
660 		    u16 *num_nodes_added, u32 *first_node_teid)
661 {
662 	struct ice_sched_node *prev, *new_node;
663 	struct ice_aqc_add_elem *buf;
664 	u16 i, num_groups_added = 0;
665 	enum ice_status status = 0;
666 	struct ice_hw *hw = pi->hw;
667 	u16 buf_size;
668 	u32 teid;
669 
670 	buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1);
671 	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
672 	if (!buf)
673 		return ICE_ERR_NO_MEMORY;
674 
675 	buf->hdr.parent_teid = parent->info.node_teid;
676 	buf->hdr.num_elems = cpu_to_le16(num_nodes);
677 	for (i = 0; i < num_nodes; i++) {
678 		buf->generic[i].parent_teid = parent->info.node_teid;
679 		buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
680 		buf->generic[i].data.valid_sections =
681 			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
682 			ICE_AQC_ELEM_VALID_EIR;
683 		buf->generic[i].data.generic = 0;
684 		buf->generic[i].data.cir_bw.bw_profile_idx =
685 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
686 		buf->generic[i].data.cir_bw.bw_alloc =
687 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
688 		buf->generic[i].data.eir_bw.bw_profile_idx =
689 			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
690 		buf->generic[i].data.eir_bw.bw_alloc =
691 			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
692 	}
693 
694 	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
695 					&num_groups_added, NULL);
696 	if (status || num_groups_added != 1) {
697 		ice_debug(hw, ICE_DBG_SCHED, "add node failed FW Error %d\n",
698 			  hw->adminq.sq_last_status);
699 		devm_kfree(ice_hw_to_dev(hw), buf);
700 		return ICE_ERR_CFG;
701 	}
702 
703 	*num_nodes_added = num_nodes;
704 	/* add nodes to the SW DB */
705 	for (i = 0; i < num_nodes; i++) {
706 		status = ice_sched_add_node(pi, layer, &buf->generic[i]);
707 		if (status) {
708 			ice_debug(hw, ICE_DBG_SCHED,
709 				  "add nodes in SW DB failed status =%d\n",
710 				  status);
711 			break;
712 		}
713 
714 		teid = le32_to_cpu(buf->generic[i].node_teid);
715 		new_node = ice_sched_find_node_by_teid(parent, teid);
716 		if (!new_node) {
717 			ice_debug(hw, ICE_DBG_SCHED,
718 				  "Node is missing for teid =%d\n", teid);
719 			break;
720 		}
721 
722 		new_node->sibling = NULL;
723 		new_node->tc_num = tc_node->tc_num;
724 
725 		/* add it to previous node sibling pointer */
726 		/* Note: siblings are not linked across branches */
727 		prev = ice_sched_get_first_node(hw, tc_node, layer);
728 		if (prev && prev != new_node) {
729 			while (prev->sibling)
730 				prev = prev->sibling;
731 			prev->sibling = new_node;
732 		}
733 
734 		if (i == 0)
735 			*first_node_teid = teid;
736 	}
737 
738 	devm_kfree(ice_hw_to_dev(hw), buf);
739 	return status;
740 }
741 
742 /**
743  * ice_sched_add_nodes_to_layer - Add nodes to a given layer
744  * @pi: port information structure
745  * @tc_node: pointer to TC node
746  * @parent: pointer to parent node
747  * @layer: layer number to add nodes
748  * @num_nodes: number of nodes to be added
749  * @first_node_teid: pointer to the first node teid
750  * @num_nodes_added: pointer to number of nodes added
751  *
752  * This function add nodes to a given layer.
753  */
754 static enum ice_status
755 ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
756 			     struct ice_sched_node *tc_node,
757 			     struct ice_sched_node *parent, u8 layer,
758 			     u16 num_nodes, u32 *first_node_teid,
759 			     u16 *num_nodes_added)
760 {
761 	u32 *first_teid_ptr = first_node_teid;
762 	u16 new_num_nodes, max_child_nodes;
763 	enum ice_status status = 0;
764 	struct ice_hw *hw = pi->hw;
765 	u16 num_added = 0;
766 	u32 temp;
767 
768 	*num_nodes_added = 0;
769 
770 	if (!num_nodes)
771 		return status;
772 
773 	if (!parent || layer < hw->sw_entry_point_layer)
774 		return ICE_ERR_PARAM;
775 
776 	/* max children per node per layer */
777 	max_child_nodes = hw->max_children[parent->tx_sched_layer];
778 
779 	/* current number of children + required nodes exceed max children ? */
780 	if ((parent->num_children + num_nodes) > max_child_nodes) {
781 		/* Fail if the parent is a TC node */
782 		if (parent == tc_node)
783 			return ICE_ERR_CFG;
784 
785 		/* utilize all the spaces if the parent is not full */
786 		if (parent->num_children < max_child_nodes) {
787 			new_num_nodes = max_child_nodes - parent->num_children;
788 			/* this recursion is intentional, and wouldn't
789 			 * go more than 2 calls
790 			 */
791 			status = ice_sched_add_nodes_to_layer(pi, tc_node,
792 							      parent, layer,
793 							      new_num_nodes,
794 							      first_node_teid,
795 							      &num_added);
796 			if (status)
797 				return status;
798 
799 			*num_nodes_added += num_added;
800 		}
801 		/* Don't modify the first node teid memory if the first node was
802 		 * added already in the above call. Instead send some temp
803 		 * memory for all other recursive calls.
804 		 */
805 		if (num_added)
806 			first_teid_ptr = &temp;
807 
808 		new_num_nodes = num_nodes - num_added;
809 
810 		/* This parent is full, try the next sibling */
811 		parent = parent->sibling;
812 
813 		/* this recursion is intentional, for 1024 queues
814 		 * per VSI, it goes max of 16 iterations.
815 		 * 1024 / 8 = 128 layer 8 nodes
816 		 * 128 /8 = 16 (add 8 nodes per iteration)
817 		 */
818 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
819 						      layer, new_num_nodes,
820 						      first_teid_ptr,
821 						      &num_added);
822 		*num_nodes_added += num_added;
823 		return status;
824 	}
825 
826 	status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
827 				     num_nodes_added, first_node_teid);
828 	return status;
829 }
830 
831 /**
832  * ice_sched_get_qgrp_layer - get the current queue group layer number
833  * @hw: pointer to the hw struct
834  *
835  * This function returns the current queue group layer number
836  */
837 static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
838 {
839 	/* It's always total layers - 1, the array is 0 relative so -2 */
840 	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
841 }
842 
843 /**
844  * ice_sched_get_vsi_layer - get the current VSI layer number
845  * @hw: pointer to the hw struct
846  *
847  * This function returns the current VSI layer number
848  */
849 static u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
850 {
851 	/* Num Layers       VSI layer
852 	 *     9               6
853 	 *     7               4
854 	 *     5 or less       sw_entry_point_layer
855 	 */
856 	/* calculate the vsi layer based on number of layers. */
857 	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
858 		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
859 
860 		if (layer > hw->sw_entry_point_layer)
861 			return layer;
862 	}
863 	return hw->sw_entry_point_layer;
864 }
865 
866 /**
867  * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
868  * @pi: port information structure
869  *
870  * This function removes the leaf node that was created by the FW
871  * during initialization
872  */
873 static void ice_rm_dflt_leaf_node(struct ice_port_info *pi)
874 {
875 	struct ice_sched_node *node;
876 
877 	node = pi->root;
878 	while (node) {
879 		if (!node->num_children)
880 			break;
881 		node = node->children[0];
882 	}
883 	if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
884 		u32 teid = le32_to_cpu(node->info.node_teid);
885 		enum ice_status status;
886 
887 		/* remove the default leaf node */
888 		status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
889 		if (!status)
890 			ice_free_sched_node(pi, node);
891 	}
892 }
893 
894 /**
895  * ice_sched_rm_dflt_nodes - free the default nodes in the tree
896  * @pi: port information structure
897  *
898  * This function frees all the nodes except root and TC that were created by
899  * the FW during initialization
900  */
901 static void ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
902 {
903 	struct ice_sched_node *node;
904 
905 	ice_rm_dflt_leaf_node(pi);
906 
907 	/* remove the default nodes except TC and root nodes */
908 	node = pi->root;
909 	while (node) {
910 		if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
911 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
912 		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
913 			ice_free_sched_node(pi, node);
914 			break;
915 		}
916 
917 		if (!node->num_children)
918 			break;
919 		node = node->children[0];
920 	}
921 }
922 
923 /**
924  * ice_sched_init_port - Initialize scheduler by querying information from FW
925  * @pi: port info structure for the tree to cleanup
926  *
927  * This function is the initial call to find the total number of Tx scheduler
928  * resources, default topology created by firmware and storing the information
929  * in SW DB.
930  */
931 enum ice_status ice_sched_init_port(struct ice_port_info *pi)
932 {
933 	struct ice_aqc_get_topo_elem *buf;
934 	enum ice_status status;
935 	struct ice_hw *hw;
936 	u8 num_branches;
937 	u16 num_elems;
938 	u8 i, j;
939 
940 	if (!pi)
941 		return ICE_ERR_PARAM;
942 	hw = pi->hw;
943 
944 	/* Query the Default Topology from FW */
945 	buf = devm_kzalloc(ice_hw_to_dev(hw), ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
946 	if (!buf)
947 		return ICE_ERR_NO_MEMORY;
948 
949 	/* Query default scheduling tree topology */
950 	status = ice_aq_get_dflt_topo(hw, pi->lport, buf, ICE_AQ_MAX_BUF_LEN,
951 				      &num_branches, NULL);
952 	if (status)
953 		goto err_init_port;
954 
955 	/* num_branches should be between 1-8 */
956 	if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
957 		ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
958 			  num_branches);
959 		status = ICE_ERR_PARAM;
960 		goto err_init_port;
961 	}
962 
963 	/* get the number of elements on the default/first branch */
964 	num_elems = le16_to_cpu(buf[0].hdr.num_elems);
965 
966 	/* num_elems should always be between 1-9 */
967 	if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
968 		ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
969 			  num_elems);
970 		status = ICE_ERR_PARAM;
971 		goto err_init_port;
972 	}
973 
974 	/* If the last node is a leaf node then the index of the Q group
975 	 * layer is two less than the number of elements.
976 	 */
977 	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
978 	    ICE_AQC_ELEM_TYPE_LEAF)
979 		pi->last_node_teid =
980 			le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
981 	else
982 		pi->last_node_teid =
983 			le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
984 
985 	/* Insert the Tx Sched root node */
986 	status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
987 	if (status)
988 		goto err_init_port;
989 
990 	/* Parse the default tree and cache the information */
991 	for (i = 0; i < num_branches; i++) {
992 		num_elems = le16_to_cpu(buf[i].hdr.num_elems);
993 
994 		/* Skip root element as already inserted */
995 		for (j = 1; j < num_elems; j++) {
996 			/* update the sw entry point */
997 			if (buf[0].generic[j].data.elem_type ==
998 			    ICE_AQC_ELEM_TYPE_ENTRY_POINT)
999 				hw->sw_entry_point_layer = j;
1000 
1001 			status = ice_sched_add_node(pi, j, &buf[i].generic[j]);
1002 			if (status)
1003 				goto err_init_port;
1004 		}
1005 	}
1006 
1007 	/* Remove the default nodes. */
1008 	if (pi->root)
1009 		ice_sched_rm_dflt_nodes(pi);
1010 
1011 	/* initialize the port for handling the scheduler tree */
1012 	pi->port_state = ICE_SCHED_PORT_STATE_READY;
1013 	mutex_init(&pi->sched_lock);
1014 
1015 err_init_port:
1016 	if (status && pi->root) {
1017 		ice_free_sched_node(pi, pi->root);
1018 		pi->root = NULL;
1019 	}
1020 
1021 	devm_kfree(ice_hw_to_dev(hw), buf);
1022 	return status;
1023 }
1024 
1025 /**
1026  * ice_sched_query_res_alloc - query the FW for num of logical sched layers
1027  * @hw: pointer to the HW struct
1028  *
1029  * query FW for allocated scheduler resources and store in HW struct
1030  */
1031 enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
1032 {
1033 	struct ice_aqc_query_txsched_res_resp *buf;
1034 	enum ice_status status = 0;
1035 	__le16 max_sibl;
1036 	u8 i;
1037 
1038 	if (hw->layer_info)
1039 		return status;
1040 
1041 	buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
1042 	if (!buf)
1043 		return ICE_ERR_NO_MEMORY;
1044 
1045 	status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
1046 	if (status)
1047 		goto sched_query_out;
1048 
1049 	hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
1050 	hw->num_tx_sched_phys_layers =
1051 		le16_to_cpu(buf->sched_props.phys_levels);
1052 	hw->flattened_layers = buf->sched_props.flattening_bitmap;
1053 	hw->max_cgds = buf->sched_props.max_pf_cgds;
1054 
1055 	/* max sibling group size of current layer refers to the max children
1056 	 * of the below layer node.
1057 	 * layer 1 node max children will be layer 2 max sibling group size
1058 	 * layer 2 node max children will be layer 3 max sibling group size
1059 	 * and so on. This array will be populated from root (index 0) to
1060 	 * qgroup layer 7. Leaf node has no children.
1061 	 */
1062 	for (i = 0; i < hw->num_tx_sched_layers; i++) {
1063 		max_sibl = buf->layer_props[i].max_sibl_grp_sz;
1064 		hw->max_children[i] = le16_to_cpu(max_sibl);
1065 	}
1066 
1067 	hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
1068 				      (hw->num_tx_sched_layers *
1069 				       sizeof(*hw->layer_info)),
1070 				      GFP_KERNEL);
1071 	if (!hw->layer_info) {
1072 		status = ICE_ERR_NO_MEMORY;
1073 		goto sched_query_out;
1074 	}
1075 
1076 sched_query_out:
1077 	devm_kfree(ice_hw_to_dev(hw), buf);
1078 	return status;
1079 }
1080 
1081 /**
1082  * ice_sched_find_node_in_subtree - Find node in part of base node subtree
1083  * @hw: pointer to the hw struct
1084  * @base: pointer to the base node
1085  * @node: pointer to the node to search
1086  *
1087  * This function checks whether a given node is part of the base node
1088  * subtree or not
1089  */
1090 static bool
1091 ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
1092 			       struct ice_sched_node *node)
1093 {
1094 	u8 i;
1095 
1096 	for (i = 0; i < base->num_children; i++) {
1097 		struct ice_sched_node *child = base->children[i];
1098 
1099 		if (node == child)
1100 			return true;
1101 
1102 		if (child->tx_sched_layer > node->tx_sched_layer)
1103 			return false;
1104 
1105 		/* this recursion is intentional, and wouldn't
1106 		 * go more than 8 calls
1107 		 */
1108 		if (ice_sched_find_node_in_subtree(hw, child, node))
1109 			return true;
1110 	}
1111 	return false;
1112 }
1113 
1114 /**
1115  * ice_sched_get_free_qparent - Get a free lan or rdma q group node
1116  * @pi: port information structure
1117  * @vsi_handle: software VSI handle
1118  * @tc: branch number
1119  * @owner: lan or rdma
1120  *
1121  * This function retrieves a free lan or rdma q group node
1122  */
1123 struct ice_sched_node *
1124 ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
1125 			   u8 owner)
1126 {
1127 	struct ice_sched_node *vsi_node, *qgrp_node = NULL;
1128 	struct ice_vsi_ctx *vsi_ctx;
1129 	u16 max_children;
1130 	u8 qgrp_layer;
1131 
1132 	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
1133 	max_children = pi->hw->max_children[qgrp_layer];
1134 
1135 	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
1136 	if (!vsi_ctx)
1137 		return NULL;
1138 	vsi_node = vsi_ctx->sched.vsi_node[tc];
1139 	/* validate invalid VSI id */
1140 	if (!vsi_node)
1141 		goto lan_q_exit;
1142 
1143 	/* get the first q group node from VSI sub-tree */
1144 	qgrp_node = ice_sched_get_first_node(pi->hw, vsi_node, qgrp_layer);
1145 	while (qgrp_node) {
1146 		/* make sure the qgroup node is part of the VSI subtree */
1147 		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
1148 			if (qgrp_node->num_children < max_children &&
1149 			    qgrp_node->owner == owner)
1150 				break;
1151 		qgrp_node = qgrp_node->sibling;
1152 	}
1153 
1154 lan_q_exit:
1155 	return qgrp_node;
1156 }
1157 
1158 /**
1159  * ice_sched_get_vsi_node - Get a VSI node based on VSI id
1160  * @hw: pointer to the hw struct
1161  * @tc_node: pointer to the TC node
1162  * @vsi_handle: software VSI handle
1163  *
1164  * This function retrieves a VSI node for a given VSI id from a given
1165  * TC branch
1166  */
1167 static struct ice_sched_node *
1168 ice_sched_get_vsi_node(struct ice_hw *hw, struct ice_sched_node *tc_node,
1169 		       u16 vsi_handle)
1170 {
1171 	struct ice_sched_node *node;
1172 	u8 vsi_layer;
1173 
1174 	vsi_layer = ice_sched_get_vsi_layer(hw);
1175 	node = ice_sched_get_first_node(hw, tc_node, vsi_layer);
1176 
1177 	/* Check whether it already exists */
1178 	while (node) {
1179 		if (node->vsi_handle == vsi_handle)
1180 			return node;
1181 		node = node->sibling;
1182 	}
1183 
1184 	return node;
1185 }
1186 
1187 /**
1188  * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
1189  * @hw: pointer to the hw struct
1190  * @num_qs: number of queues
1191  * @num_nodes: num nodes array
1192  *
1193  * This function calculates the number of VSI child nodes based on the
1194  * number of queues.
1195  */
1196 static void
1197 ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
1198 {
1199 	u16 num = num_qs;
1200 	u8 i, qgl, vsil;
1201 
1202 	qgl = ice_sched_get_qgrp_layer(hw);
1203 	vsil = ice_sched_get_vsi_layer(hw);
1204 
1205 	/* calculate num nodes from q group to VSI layer */
1206 	for (i = qgl; i > vsil; i--) {
1207 		/* round to the next integer if there is a remainder */
1208 		num = DIV_ROUND_UP(num, hw->max_children[i]);
1209 
1210 		/* need at least one node */
1211 		num_nodes[i] = num ? num : 1;
1212 	}
1213 }
1214 
1215 /**
1216  * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
1217  * @pi: port information structure
1218  * @vsi_handle: software VSI handle
1219  * @tc_node: pointer to the TC node
1220  * @num_nodes: pointer to the num nodes that needs to be added per layer
1221  * @owner: node owner (lan or rdma)
1222  *
1223  * This function adds the VSI child nodes to tree. It gets called for
1224  * lan and rdma separately.
1225  */
1226 static enum ice_status
1227 ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
1228 			      struct ice_sched_node *tc_node, u16 *num_nodes,
1229 			      u8 owner)
1230 {
1231 	struct ice_sched_node *parent, *node;
1232 	struct ice_hw *hw = pi->hw;
1233 	enum ice_status status;
1234 	u32 first_node_teid;
1235 	u16 num_added = 0;
1236 	u8 i, qgl, vsil;
1237 
1238 	qgl = ice_sched_get_qgrp_layer(hw);
1239 	vsil = ice_sched_get_vsi_layer(hw);
1240 	parent = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1241 	for (i = vsil + 1; i <= qgl; i++) {
1242 		if (!parent)
1243 			return ICE_ERR_CFG;
1244 
1245 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
1246 						      num_nodes[i],
1247 						      &first_node_teid,
1248 						      &num_added);
1249 		if (status || num_nodes[i] != num_added)
1250 			return ICE_ERR_CFG;
1251 
1252 		/* The newly added node can be a new parent for the next
1253 		 * layer nodes
1254 		 */
1255 		if (num_added) {
1256 			parent = ice_sched_find_node_by_teid(tc_node,
1257 							     first_node_teid);
1258 			node = parent;
1259 			while (node) {
1260 				node->owner = owner;
1261 				node = node->sibling;
1262 			}
1263 		} else {
1264 			parent = parent->children[0];
1265 		}
1266 	}
1267 
1268 	return 0;
1269 }
1270 
1271 /**
1272  * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
1273  * @hw: pointer to the hw struct
1274  * @tc_node: pointer to TC node
1275  * @num_nodes: pointer to num nodes array
1276  *
1277  * This function calculates the number of supported nodes needed to add this
1278  * VSI into Tx tree including the VSI, parent and intermediate nodes in below
1279  * layers
1280  */
1281 static void
1282 ice_sched_calc_vsi_support_nodes(struct ice_hw *hw,
1283 				 struct ice_sched_node *tc_node, u16 *num_nodes)
1284 {
1285 	struct ice_sched_node *node;
1286 	u8 vsil;
1287 	int i;
1288 
1289 	vsil = ice_sched_get_vsi_layer(hw);
1290 	for (i = vsil; i >= hw->sw_entry_point_layer; i--)
1291 		/* Add intermediate nodes if TC has no children and
1292 		 * need at least one node for VSI
1293 		 */
1294 		if (!tc_node->num_children || i == vsil) {
1295 			num_nodes[i]++;
1296 		} else {
1297 			/* If intermediate nodes are reached max children
1298 			 * then add a new one.
1299 			 */
1300 			node = ice_sched_get_first_node(hw, tc_node, (u8)i);
1301 			/* scan all the siblings */
1302 			while (node) {
1303 				if (node->num_children < hw->max_children[i])
1304 					break;
1305 				node = node->sibling;
1306 			}
1307 
1308 			/* tree has one intermediate node to add this new VSI.
1309 			 * So no need to calculate supported nodes for below
1310 			 * layers.
1311 			 */
1312 			if (node)
1313 				break;
1314 			/* all the nodes are full, allocate a new one */
1315 			num_nodes[i]++;
1316 		}
1317 }
1318 
1319 /**
1320  * ice_sched_add_vsi_support_nodes - add VSI supported nodes into Tx tree
1321  * @pi: port information structure
1322  * @vsi_handle: software VSI handle
1323  * @tc_node: pointer to TC node
1324  * @num_nodes: pointer to num nodes array
1325  *
1326  * This function adds the VSI supported nodes into Tx tree including the
1327  * VSI, its parent and intermediate nodes in below layers
1328  */
1329 static enum ice_status
1330 ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
1331 				struct ice_sched_node *tc_node, u16 *num_nodes)
1332 {
1333 	struct ice_sched_node *parent = tc_node;
1334 	enum ice_status status;
1335 	u32 first_node_teid;
1336 	u16 num_added = 0;
1337 	u8 i, vsil;
1338 
1339 	if (!pi)
1340 		return ICE_ERR_PARAM;
1341 
1342 	vsil = ice_sched_get_vsi_layer(pi->hw);
1343 	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
1344 		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
1345 						      i, num_nodes[i],
1346 						      &first_node_teid,
1347 						      &num_added);
1348 		if (status || num_nodes[i] != num_added)
1349 			return ICE_ERR_CFG;
1350 
1351 		/* The newly added node can be a new parent for the next
1352 		 * layer nodes
1353 		 */
1354 		if (num_added)
1355 			parent = ice_sched_find_node_by_teid(tc_node,
1356 							     first_node_teid);
1357 		else
1358 			parent = parent->children[0];
1359 
1360 		if (!parent)
1361 			return ICE_ERR_CFG;
1362 
1363 		if (i == vsil)
1364 			parent->vsi_handle = vsi_handle;
1365 	}
1366 
1367 	return 0;
1368 }
1369 
1370 /**
1371  * ice_sched_add_vsi_to_topo - add a new VSI into tree
1372  * @pi: port information structure
1373  * @vsi_handle: software VSI handle
1374  * @tc: TC number
1375  *
1376  * This function adds a new VSI into scheduler tree
1377  */
1378 static enum ice_status
1379 ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
1380 {
1381 	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1382 	struct ice_sched_node *tc_node;
1383 	struct ice_hw *hw = pi->hw;
1384 
1385 	tc_node = ice_sched_get_tc_node(pi, tc);
1386 	if (!tc_node)
1387 		return ICE_ERR_PARAM;
1388 
1389 	/* calculate number of supported nodes needed for this VSI */
1390 	ice_sched_calc_vsi_support_nodes(hw, tc_node, num_nodes);
1391 
1392 	/* add vsi supported nodes to tc subtree */
1393 	return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
1394 					       num_nodes);
1395 }
1396 
1397 /**
1398  * ice_sched_update_vsi_child_nodes - update VSI child nodes
1399  * @pi: port information structure
1400  * @vsi_handle: software VSI handle
1401  * @tc: TC number
1402  * @new_numqs: new number of max queues
1403  * @owner: owner of this subtree
1404  *
1405  * This function updates the VSI child nodes based on the number of queues
1406  */
1407 static enum ice_status
1408 ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
1409 				 u8 tc, u16 new_numqs, u8 owner)
1410 {
1411 	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
1412 	struct ice_sched_node *vsi_node;
1413 	struct ice_sched_node *tc_node;
1414 	struct ice_vsi_ctx *vsi_ctx;
1415 	enum ice_status status = 0;
1416 	struct ice_hw *hw = pi->hw;
1417 	u16 prev_numqs;
1418 
1419 	tc_node = ice_sched_get_tc_node(pi, tc);
1420 	if (!tc_node)
1421 		return ICE_ERR_CFG;
1422 
1423 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1424 	if (!vsi_node)
1425 		return ICE_ERR_CFG;
1426 
1427 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
1428 	if (!vsi_ctx)
1429 		return ICE_ERR_PARAM;
1430 
1431 	if (owner == ICE_SCHED_NODE_OWNER_LAN)
1432 		prev_numqs = vsi_ctx->sched.max_lanq[tc];
1433 	else
1434 		return ICE_ERR_PARAM;
1435 
1436 	/* num queues are not changed or less than the previous number */
1437 	if (new_numqs <= prev_numqs)
1438 		return status;
1439 	if (new_numqs)
1440 		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
1441 	/* Keep the max number of queue configuration all the time. Update the
1442 	 * tree only if number of queues > previous number of queues. This may
1443 	 * leave some extra nodes in the tree if number of queues < previous
1444 	 * number but that wouldn't harm anything. Removing those extra nodes
1445 	 * may complicate the code if those nodes are part of SRL or
1446 	 * individually rate limited.
1447 	 */
1448 	status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
1449 					       new_num_nodes, owner);
1450 	if (status)
1451 		return status;
1452 	vsi_ctx->sched.max_lanq[tc] = new_numqs;
1453 
1454 	return 0;
1455 }
1456 
1457 /**
1458  * ice_sched_cfg_vsi - configure the new/existing VSI
1459  * @pi: port information structure
1460  * @vsi_handle: software VSI handle
1461  * @tc: TC number
1462  * @maxqs: max number of queues
1463  * @owner: lan or rdma
1464  * @enable: TC enabled or disabled
1465  *
1466  * This function adds/updates VSI nodes based on the number of queues. If TC is
1467  * enabled and VSI is in suspended state then resume the VSI back. If TC is
1468  * disabled then suspend the VSI if it is not already.
1469  */
1470 enum ice_status
1471 ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
1472 		  u8 owner, bool enable)
1473 {
1474 	struct ice_sched_node *vsi_node, *tc_node;
1475 	struct ice_vsi_ctx *vsi_ctx;
1476 	enum ice_status status = 0;
1477 	struct ice_hw *hw = pi->hw;
1478 
1479 	ice_debug(pi->hw, ICE_DBG_SCHED, "add/config VSI %d\n", vsi_handle);
1480 	tc_node = ice_sched_get_tc_node(pi, tc);
1481 	if (!tc_node)
1482 		return ICE_ERR_PARAM;
1483 	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
1484 	if (!vsi_ctx)
1485 		return ICE_ERR_PARAM;
1486 	vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1487 
1488 	/* suspend the VSI if tc is not enabled */
1489 	if (!enable) {
1490 		if (vsi_node && vsi_node->in_use) {
1491 			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1492 
1493 			status = ice_sched_suspend_resume_elems(hw, 1, &teid,
1494 								true);
1495 			if (!status)
1496 				vsi_node->in_use = false;
1497 		}
1498 		return status;
1499 	}
1500 
1501 	/* TC is enabled, if it is a new VSI then add it to the tree */
1502 	if (!vsi_node) {
1503 		status = ice_sched_add_vsi_to_topo(pi, vsi_handle, tc);
1504 		if (status)
1505 			return status;
1506 
1507 		vsi_node = ice_sched_get_vsi_node(hw, tc_node, vsi_handle);
1508 		if (!vsi_node)
1509 			return ICE_ERR_CFG;
1510 
1511 		vsi_ctx->sched.vsi_node[tc] = vsi_node;
1512 		vsi_node->in_use = true;
1513 		/* invalidate the max queues whenever VSI gets added first time
1514 		 * into the scheduler tree (boot or after reset). We need to
1515 		 * recreate the child nodes all the time in these cases.
1516 		 */
1517 		vsi_ctx->sched.max_lanq[tc] = 0;
1518 	}
1519 
1520 	/* update the VSI child nodes */
1521 	status = ice_sched_update_vsi_child_nodes(pi, vsi_handle, tc, maxqs,
1522 						  owner);
1523 	if (status)
1524 		return status;
1525 
1526 	/* TC is enabled, resume the VSI if it is in the suspend state */
1527 	if (!vsi_node->in_use) {
1528 		u32 teid = le32_to_cpu(vsi_node->info.node_teid);
1529 
1530 		status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
1531 		if (!status)
1532 			vsi_node->in_use = true;
1533 	}
1534 
1535 	return status;
1536 }
1537 
1538 /**
1539  * ice_sched_rm_agg_vsi_entry - remove agg related VSI info entry
1540  * @pi: port information structure
1541  * @vsi_handle: software VSI handle
1542  *
1543  * This function removes single aggregator VSI info entry from
1544  * aggregator list.
1545  */
1546 static void
1547 ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
1548 {
1549 	struct ice_sched_agg_info *agg_info;
1550 	struct ice_sched_agg_info *atmp;
1551 
1552 	list_for_each_entry_safe(agg_info, atmp, &pi->hw->agg_list,
1553 				 list_entry) {
1554 		struct ice_sched_agg_vsi_info *agg_vsi_info;
1555 		struct ice_sched_agg_vsi_info *vtmp;
1556 
1557 		list_for_each_entry_safe(agg_vsi_info, vtmp,
1558 					 &agg_info->agg_vsi_list, list_entry)
1559 			if (agg_vsi_info->vsi_handle == vsi_handle) {
1560 				list_del(&agg_vsi_info->list_entry);
1561 				devm_kfree(ice_hw_to_dev(pi->hw),
1562 					   agg_vsi_info);
1563 				return;
1564 			}
1565 	}
1566 }
1567 
1568 /**
1569  * ice_sched_is_leaf_node_present - check for a leaf node in the sub-tree
1570  * @node: pointer to the sub-tree node
1571  *
1572  * This function checks for a leaf node presence in a given sub-tree node.
1573  */
1574 static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
1575 {
1576 	u8 i;
1577 
1578 	for (i = 0; i < node->num_children; i++)
1579 		if (ice_sched_is_leaf_node_present(node->children[i]))
1580 			return true;
1581 	/* check for a leaf node */
1582 	return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF);
1583 }
1584 
1585 /**
1586  * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
1587  * @pi: port information structure
1588  * @vsi_handle: software VSI handle
1589  * @owner: LAN or RDMA
1590  *
1591  * This function removes the VSI and its LAN or RDMA children nodes from the
1592  * scheduler tree.
1593  */
1594 static enum ice_status
1595 ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
1596 {
1597 	enum ice_status status = ICE_ERR_PARAM;
1598 	struct ice_vsi_ctx *vsi_ctx;
1599 	u8 i;
1600 
1601 	ice_debug(pi->hw, ICE_DBG_SCHED, "removing VSI %d\n", vsi_handle);
1602 	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
1603 		return status;
1604 	mutex_lock(&pi->sched_lock);
1605 	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
1606 	if (!vsi_ctx)
1607 		goto exit_sched_rm_vsi_cfg;
1608 
1609 	ice_for_each_traffic_class(i) {
1610 		struct ice_sched_node *vsi_node, *tc_node;
1611 		u8 j = 0;
1612 
1613 		tc_node = ice_sched_get_tc_node(pi, i);
1614 		if (!tc_node)
1615 			continue;
1616 
1617 		vsi_node = ice_sched_get_vsi_node(pi->hw, tc_node, vsi_handle);
1618 		if (!vsi_node)
1619 			continue;
1620 
1621 		if (ice_sched_is_leaf_node_present(vsi_node)) {
1622 			ice_debug(pi->hw, ICE_DBG_SCHED,
1623 				  "VSI has leaf nodes in TC %d\n", i);
1624 			status = ICE_ERR_IN_USE;
1625 			goto exit_sched_rm_vsi_cfg;
1626 		}
1627 		while (j < vsi_node->num_children) {
1628 			if (vsi_node->children[j]->owner == owner) {
1629 				ice_free_sched_node(pi, vsi_node->children[j]);
1630 
1631 				/* reset the counter again since the num
1632 				 * children will be updated after node removal
1633 				 */
1634 				j = 0;
1635 			} else {
1636 				j++;
1637 			}
1638 		}
1639 		/* remove the VSI if it has no children */
1640 		if (!vsi_node->num_children) {
1641 			ice_free_sched_node(pi, vsi_node);
1642 			vsi_ctx->sched.vsi_node[i] = NULL;
1643 
1644 			/* clean up agg related vsi info if any */
1645 			ice_sched_rm_agg_vsi_info(pi, vsi_handle);
1646 		}
1647 		if (owner == ICE_SCHED_NODE_OWNER_LAN)
1648 			vsi_ctx->sched.max_lanq[i] = 0;
1649 	}
1650 	status = 0;
1651 
1652 exit_sched_rm_vsi_cfg:
1653 	mutex_unlock(&pi->sched_lock);
1654 	return status;
1655 }
1656 
1657 /**
1658  * ice_rm_vsi_lan_cfg - remove VSI and its LAN children nodes
1659  * @pi: port information structure
1660  * @vsi_handle: software VSI handle
1661  *
1662  * This function clears the VSI and its LAN children nodes from scheduler tree
1663  * for all TCs.
1664  */
1665 enum ice_status ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
1666 {
1667 	return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
1668 }
1669