xref: /linux/drivers/accel/amdxdna/aie2_solver.c (revision f96a974170b749e3a56844e25b31d46a7233b6f6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
4  */
5 
6 #include <drm/drm_device.h>
7 #include <drm/drm_managed.h>
8 #include <drm/drm_print.h>
9 #include <linux/bitops.h>
10 #include <linux/bitmap.h>
11 #include <linux/slab.h>
12 
13 #include "aie2_solver.h"
14 
15 struct partition_node {
16 	struct list_head	list;
17 	u32			nshared;	/* # shared requests */
18 	u32			start_col;	/* start column */
19 	u32			ncols;		/* # columns */
20 	bool			exclusive;	/* can not be shared if set */
21 };
22 
23 struct solver_node {
24 	struct list_head	list;
25 	u64			rid;		/* Request ID from consumer */
26 
27 	struct partition_node	*pt_node;
28 	void			*cb_arg;
29 	u32			dpm_level;
30 	u32			cols_len;
31 	u32			start_cols[] __counted_by(cols_len);
32 };
33 
34 struct solver_rgroup {
35 	u32				rgid;
36 	u32				nnode;
37 	u32				npartition_node;
38 
39 	DECLARE_BITMAP(resbit, XRS_MAX_COL);
40 	struct list_head		node_list;
41 	struct list_head		pt_node_list;
42 };
43 
44 struct solver_state {
45 	struct solver_rgroup		rgp;
46 	struct init_config		cfg;
47 	struct xrs_action_ops		*actions;
48 };
49 
50 static u32 calculate_gops(struct aie_qos *rqos)
51 {
52 	u32 service_rate = 0;
53 
54 	if (rqos->latency)
55 		service_rate = (1000 / rqos->latency);
56 
57 	if (rqos->fps > service_rate)
58 		return rqos->fps * rqos->gops;
59 
60 	return service_rate * rqos->gops;
61 }
62 
63 /*
64  * qos_meet() - Check the QOS request can be met.
65  */
66 static int qos_meet(struct solver_state *xrs, struct aie_qos *rqos, u32 cgops)
67 {
68 	u32 request_gops = calculate_gops(rqos) * xrs->cfg.sys_eff_factor;
69 
70 	if (request_gops <= cgops)
71 		return 0;
72 
73 	return -EINVAL;
74 }
75 
76 /*
77  * sanity_check() - Do a basic sanity check on allocation request.
78  */
79 static int sanity_check(struct solver_state *xrs, struct alloc_requests *req)
80 {
81 	struct cdo_parts *cdop = &req->cdo;
82 	struct aie_qos *rqos = &req->rqos;
83 	u32 cu_clk_freq;
84 
85 	if (cdop->ncols > xrs->cfg.total_col)
86 		return -EINVAL;
87 
88 	/*
89 	 * We can find at least one CDOs groups that meet the
90 	 * GOPs requirement.
91 	 */
92 	cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[xrs->cfg.clk_list.num_levels - 1];
93 
94 	if (qos_meet(xrs, rqos, cdop->qos_cap.opc * cu_clk_freq / 1000))
95 		return -EINVAL;
96 
97 	return 0;
98 }
99 
100 static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
101 {
102 	/*
103 	 * gops is retrieved from the xmodel, so it's always set
104 	 * fps and latency are the configurable params from the application
105 	 */
106 	if (rqos->gops > 0 && (rqos->fps > 0 ||  rqos->latency > 0))
107 		return true;
108 
109 	return false;
110 }
111 
112 static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level)
113 {
114 	struct solver_rgroup *rgp = &xrs->rgp;
115 	struct cdo_parts *cdop = &req->cdo;
116 	struct aie_qos *rqos = &req->rqos;
117 	u32 freq, max_dpm_level, level;
118 	struct solver_node *node;
119 
120 	max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
121 	/* If no QoS parameters are passed, set it to the max DPM level */
122 	if (!is_valid_qos_dpm_params(rqos)) {
123 		level = max_dpm_level;
124 		goto set_dpm;
125 	}
126 
127 	/* Find one CDO group that meet the GOPs requirement. */
128 	for (level = 0; level < max_dpm_level; level++) {
129 		freq = xrs->cfg.clk_list.cu_clk_list[level];
130 		if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
131 			break;
132 	}
133 
134 	/* set the dpm level which fits all the sessions */
135 	list_for_each_entry(node, &rgp->node_list, list) {
136 		if (node->dpm_level > level)
137 			level = node->dpm_level;
138 	}
139 
140 set_dpm:
141 	*dpm_level = level;
142 	return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
143 }
144 
145 static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid)
146 {
147 	struct solver_node *node;
148 
149 	list_for_each_entry(node, &rgp->node_list, list) {
150 		if (node->rid == rid)
151 			return node;
152 	}
153 
154 	return NULL;
155 }
156 
157 static void remove_partition_node(struct solver_rgroup *rgp,
158 				  struct partition_node *pt_node)
159 {
160 	pt_node->nshared--;
161 	if (pt_node->nshared > 0)
162 		return;
163 
164 	list_del(&pt_node->list);
165 	rgp->npartition_node--;
166 
167 	bitmap_clear(rgp->resbit, pt_node->start_col, pt_node->ncols);
168 	kfree(pt_node);
169 }
170 
171 static void remove_solver_node(struct solver_rgroup *rgp,
172 			       struct solver_node *node)
173 {
174 	list_del(&node->list);
175 	rgp->nnode--;
176 
177 	if (node->pt_node)
178 		remove_partition_node(rgp, node->pt_node);
179 
180 	kfree(node);
181 }
182 
183 static int get_free_partition(struct solver_state *xrs,
184 			      struct solver_node *snode,
185 			      struct alloc_requests *req)
186 {
187 	struct partition_node *pt_node;
188 	u32 ncols = req->cdo.ncols;
189 	u32 col, i;
190 
191 	for (i = 0; i < snode->cols_len; i++) {
192 		col = snode->start_cols[i];
193 		if (find_next_bit(xrs->rgp.resbit, XRS_MAX_COL, col) >= col + ncols)
194 			break;
195 	}
196 
197 	if (i == snode->cols_len)
198 		return -ENODEV;
199 
200 	pt_node = kzalloc(sizeof(*pt_node), GFP_KERNEL);
201 	if (!pt_node)
202 		return -ENOMEM;
203 
204 	pt_node->nshared = 1;
205 	pt_node->start_col = col;
206 	pt_node->ncols = ncols;
207 
208 	/*
209 	 * Always set exclusive to false for now.
210 	 */
211 	pt_node->exclusive = false;
212 
213 	list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
214 	xrs->rgp.npartition_node++;
215 	bitmap_set(xrs->rgp.resbit, pt_node->start_col, pt_node->ncols);
216 
217 	snode->pt_node = pt_node;
218 
219 	return 0;
220 }
221 
222 static int allocate_partition(struct solver_state *xrs,
223 			      struct solver_node *snode,
224 			      struct alloc_requests *req)
225 {
226 	struct partition_node *pt_node, *rpt_node = NULL;
227 	int idx, ret;
228 
229 	ret = get_free_partition(xrs, snode, req);
230 	if (!ret)
231 		return ret;
232 
233 	/* try to get a share-able partition */
234 	list_for_each_entry(pt_node, &xrs->rgp.pt_node_list, list) {
235 		if (pt_node->exclusive)
236 			continue;
237 
238 		if (rpt_node && pt_node->nshared >= rpt_node->nshared)
239 			continue;
240 
241 		for (idx = 0; idx < snode->cols_len; idx++) {
242 			if (snode->start_cols[idx] != pt_node->start_col)
243 				continue;
244 
245 			if (req->cdo.ncols != pt_node->ncols)
246 				continue;
247 
248 			rpt_node = pt_node;
249 			break;
250 		}
251 	}
252 
253 	if (!rpt_node)
254 		return -ENODEV;
255 
256 	rpt_node->nshared++;
257 	snode->pt_node = rpt_node;
258 
259 	return 0;
260 }
261 
262 static struct solver_node *create_solver_node(struct solver_state *xrs,
263 					      struct alloc_requests *req)
264 {
265 	struct cdo_parts *cdop = &req->cdo;
266 	struct solver_node *node;
267 	int ret;
268 
269 	node = kzalloc(struct_size(node, start_cols, cdop->cols_len), GFP_KERNEL);
270 	if (!node)
271 		return ERR_PTR(-ENOMEM);
272 
273 	node->rid = req->rid;
274 	node->cols_len = cdop->cols_len;
275 	memcpy(node->start_cols, cdop->start_cols, cdop->cols_len * sizeof(u32));
276 
277 	ret = allocate_partition(xrs, node, req);
278 	if (ret)
279 		goto free_node;
280 
281 	list_add_tail(&node->list, &xrs->rgp.node_list);
282 	xrs->rgp.nnode++;
283 	return node;
284 
285 free_node:
286 	kfree(node);
287 	return ERR_PTR(ret);
288 }
289 
290 static void fill_load_action(struct solver_state *xrs,
291 			     struct solver_node *snode,
292 			     struct xrs_action_load *action)
293 {
294 	action->rid = snode->rid;
295 	action->part.start_col = snode->pt_node->start_col;
296 	action->part.ncols = snode->pt_node->ncols;
297 }
298 
299 int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
300 {
301 	struct xrs_action_load load_act;
302 	struct solver_node *snode;
303 	struct solver_state *xrs;
304 	u32 dpm_level;
305 	int ret;
306 
307 	xrs = (struct solver_state *)hdl;
308 
309 	ret = sanity_check(xrs, req);
310 	if (ret) {
311 		drm_err(xrs->cfg.ddev, "invalid request");
312 		return ret;
313 	}
314 
315 	if (rg_search_node(&xrs->rgp, req->rid)) {
316 		drm_err(xrs->cfg.ddev, "rid %lld is in-use", req->rid);
317 		return -EEXIST;
318 	}
319 
320 	snode = create_solver_node(xrs, req);
321 	if (IS_ERR(snode))
322 		return PTR_ERR(snode);
323 
324 	fill_load_action(xrs, snode, &load_act);
325 	ret = xrs->cfg.actions->load(cb_arg, &load_act);
326 	if (ret)
327 		goto free_node;
328 
329 	ret = set_dpm_level(xrs, req, &dpm_level);
330 	if (ret)
331 		goto free_node;
332 
333 	snode->dpm_level = dpm_level;
334 	snode->cb_arg = cb_arg;
335 
336 	drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
337 		snode->pt_node->start_col, snode->pt_node->ncols);
338 
339 	return 0;
340 
341 free_node:
342 	remove_solver_node(&xrs->rgp, snode);
343 
344 	return ret;
345 }
346 
347 int xrs_release_resource(void *hdl, u64 rid)
348 {
349 	struct solver_state *xrs = hdl;
350 	struct solver_node *node;
351 
352 	node = rg_search_node(&xrs->rgp, rid);
353 	if (!node) {
354 		drm_err(xrs->cfg.ddev, "node not exist");
355 		return -ENODEV;
356 	}
357 
358 	xrs->cfg.actions->unload(node->cb_arg);
359 	remove_solver_node(&xrs->rgp, node);
360 
361 	return 0;
362 }
363 
364 void *xrsm_init(struct init_config *cfg)
365 {
366 	struct solver_rgroup *rgp;
367 	struct solver_state *xrs;
368 
369 	xrs = drmm_kzalloc(cfg->ddev, sizeof(*xrs), GFP_KERNEL);
370 	if (!xrs)
371 		return NULL;
372 
373 	memcpy(&xrs->cfg, cfg, sizeof(*cfg));
374 
375 	rgp = &xrs->rgp;
376 	INIT_LIST_HEAD(&rgp->node_list);
377 	INIT_LIST_HEAD(&rgp->pt_node_list);
378 
379 	return xrs;
380 }
381