xref: /linux/include/uapi/drm/ivpu_accel.h (revision e45f72b6782f88ed50932033ad206df5dd3d7103)
1 /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
2 /*
3  * Copyright (C) 2020-2025 Intel Corporation
4  */
5 
6 #ifndef __UAPI_IVPU_DRM_H__
7 #define __UAPI_IVPU_DRM_H__
8 
9 #include "drm.h"
10 
11 #if defined(__cplusplus)
12 extern "C" {
13 #endif
14 
15 #define DRM_IVPU_GET_PARAM		  0x00
16 #define DRM_IVPU_SET_PARAM		  0x01
17 #define DRM_IVPU_BO_CREATE		  0x02
18 #define DRM_IVPU_BO_INFO		  0x03
19 #define DRM_IVPU_SUBMIT			  0x05
20 #define DRM_IVPU_BO_WAIT		  0x06
21 #define DRM_IVPU_METRIC_STREAMER_START	  0x07
22 #define DRM_IVPU_METRIC_STREAMER_STOP	  0x08
23 #define DRM_IVPU_METRIC_STREAMER_GET_DATA 0x09
24 #define DRM_IVPU_METRIC_STREAMER_GET_INFO 0x0a
25 #define DRM_IVPU_CMDQ_CREATE              0x0b
26 #define DRM_IVPU_CMDQ_DESTROY             0x0c
27 #define DRM_IVPU_CMDQ_SUBMIT              0x0d
28 
29 #define DRM_IOCTL_IVPU_GET_PARAM                                               \
30 	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param)
31 
32 #define DRM_IOCTL_IVPU_SET_PARAM                                               \
33 	DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SET_PARAM, struct drm_ivpu_param)
34 
35 #define DRM_IOCTL_IVPU_BO_CREATE                                               \
36 	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_CREATE, struct drm_ivpu_bo_create)
37 
38 #define DRM_IOCTL_IVPU_BO_INFO                                                 \
39 	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_INFO, struct drm_ivpu_bo_info)
40 
41 #define DRM_IOCTL_IVPU_SUBMIT                                                  \
42 	DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SUBMIT, struct drm_ivpu_submit)
43 
44 #define DRM_IOCTL_IVPU_BO_WAIT                                                 \
45 	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait)
46 
47 #define DRM_IOCTL_IVPU_METRIC_STREAMER_START                                   \
48 	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_START,            \
49 		 struct drm_ivpu_metric_streamer_start)
50 
51 #define DRM_IOCTL_IVPU_METRIC_STREAMER_STOP                                    \
52 	DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_STOP,              \
53 		struct drm_ivpu_metric_streamer_stop)
54 
55 #define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA                                \
56 	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_DATA,         \
57 		 struct drm_ivpu_metric_streamer_get_data)
58 
59 #define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_INFO                                \
60 	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_INFO,         \
61 		 struct drm_ivpu_metric_streamer_get_data)
62 
63 #define DRM_IOCTL_IVPU_CMDQ_CREATE                                             \
64 	DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_CREATE, struct drm_ivpu_cmdq_create)
65 
66 #define DRM_IOCTL_IVPU_CMDQ_DESTROY                                            \
67 	DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_DESTROY, struct drm_ivpu_cmdq_destroy)
68 
69 #define DRM_IOCTL_IVPU_CMDQ_SUBMIT                                             \
70 	DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_SUBMIT, struct drm_ivpu_cmdq_submit)
71 
72 /**
73  * DOC: contexts
74  *
75  * VPU contexts have private virtual address space, job queues and priority.
76  * Each context is identified by an unique ID. Context is created on open().
77  */
78 
79 #define DRM_IVPU_PARAM_DEVICE_ID	    0
80 #define DRM_IVPU_PARAM_DEVICE_REVISION	    1
81 #define DRM_IVPU_PARAM_PLATFORM_TYPE	    2
82 #define DRM_IVPU_PARAM_CORE_CLOCK_RATE	    3
83 #define DRM_IVPU_PARAM_NUM_CONTEXTS	    4
84 #define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5
85 #define DRM_IVPU_PARAM_CONTEXT_PRIORITY	    6 /* Deprecated */
86 #define DRM_IVPU_PARAM_CONTEXT_ID	    7
87 #define DRM_IVPU_PARAM_FW_API_VERSION	    8
88 #define DRM_IVPU_PARAM_ENGINE_HEARTBEAT	    9
89 #define DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID  10
90 #define DRM_IVPU_PARAM_TILE_CONFIG	    11
91 #define DRM_IVPU_PARAM_SKU		    12
92 #define DRM_IVPU_PARAM_CAPABILITIES	    13
93 #define DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE  14
94 
95 #define DRM_IVPU_PLATFORM_TYPE_SILICON	    0
96 
97 /* Deprecated, use DRM_IVPU_JOB_PRIORITY */
98 #define DRM_IVPU_CONTEXT_PRIORITY_IDLE	    0
99 #define DRM_IVPU_CONTEXT_PRIORITY_NORMAL    1
100 #define DRM_IVPU_CONTEXT_PRIORITY_FOCUS	    2
101 #define DRM_IVPU_CONTEXT_PRIORITY_REALTIME  3
102 
103 #define DRM_IVPU_JOB_PRIORITY_DEFAULT  0
104 #define DRM_IVPU_JOB_PRIORITY_IDLE     1
105 #define DRM_IVPU_JOB_PRIORITY_NORMAL   2
106 #define DRM_IVPU_JOB_PRIORITY_FOCUS    3
107 #define DRM_IVPU_JOB_PRIORITY_REALTIME 4
108 
109 /**
110  * DRM_IVPU_CAP_METRIC_STREAMER
111  *
112  * Metric streamer support. Provides sampling of various hardware performance
113  * metrics like DMA bandwidth and cache miss/hits. Can be used for profiling.
114  */
115 #define DRM_IVPU_CAP_METRIC_STREAMER	1
116 /**
117  * DRM_IVPU_CAP_DMA_MEMORY_RANGE
118  *
119  * Driver has capability to allocate separate memory range
120  * accessible by hardware DMA.
121  */
122 #define DRM_IVPU_CAP_DMA_MEMORY_RANGE	2
123 /**
124  * DRM_IVPU_CAP_MANAGE_CMDQ
125  *
126  * Driver supports explicit command queue operations like command queue create,
127  * command queue destroy and submit job on specific command queue.
128  */
129 #define DRM_IVPU_CAP_MANAGE_CMDQ       3
130 
131 /**
132  * struct drm_ivpu_param - Get/Set VPU parameters
133  */
134 struct drm_ivpu_param {
135 	/**
136 	 * @param:
137 	 *
138 	 * Supported params:
139 	 *
140 	 * %DRM_IVPU_PARAM_DEVICE_ID:
141 	 * PCI Device ID of the VPU device (read-only)
142 	 *
143 	 * %DRM_IVPU_PARAM_DEVICE_REVISION:
144 	 * VPU device revision (read-only)
145 	 *
146 	 * %DRM_IVPU_PARAM_PLATFORM_TYPE:
147 	 * Returns %DRM_IVPU_PLATFORM_TYPE_SILICON on real hardware or device specific
148 	 * platform type when executing on a simulator or emulator (read-only)
149 	 *
150 	 * %DRM_IVPU_PARAM_CORE_CLOCK_RATE:
151 	 * Maximum frequency of the NPU data processing unit clock (read-only)
152 	 *
153 	 * %DRM_IVPU_PARAM_NUM_CONTEXTS:
154 	 * Maximum number of simultaneously existing contexts (read-only)
155 	 *
156 	 * %DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS:
157 	 * Lowest VPU virtual address available in the current context (read-only)
158 	 *
159 	 * %DRM_IVPU_PARAM_CONTEXT_ID:
160 	 * Current context ID, always greater than 0 (read-only)
161 	 *
162 	 * %DRM_IVPU_PARAM_FW_API_VERSION:
163 	 * Firmware API version array (read-only)
164 	 *
165 	 * %DRM_IVPU_PARAM_ENGINE_HEARTBEAT:
166 	 * Heartbeat value from an engine (read-only).
167 	 * Engine ID (i.e. DRM_IVPU_ENGINE_COMPUTE) is given via index.
168 	 *
169 	 * %DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID:
170 	 * Device-unique inference ID (read-only)
171 	 *
172 	 * %DRM_IVPU_PARAM_TILE_CONFIG:
173 	 * VPU tile configuration  (read-only)
174 	 *
175 	 * %DRM_IVPU_PARAM_SKU:
176 	 * VPU SKU ID (read-only)
177 	 *
178 	 * %DRM_IVPU_PARAM_CAPABILITIES:
179 	 * Supported capabilities (read-only)
180 	 *
181 	 * %DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE:
182 	 * Size of the preemption buffer (read-only)
183 	 */
184 	__u32 param;
185 
186 	/** @index: Index for params that have multiple instances */
187 	__u32 index;
188 
189 	/** @value: Param value */
190 	__u64 value;
191 };
192 
193 #define DRM_IVPU_BO_SHAVE_MEM  0x00000001
194 #define DRM_IVPU_BO_HIGH_MEM   DRM_IVPU_BO_SHAVE_MEM
195 #define DRM_IVPU_BO_MAPPABLE   0x00000002
196 #define DRM_IVPU_BO_DMA_MEM    0x00000004
197 
198 #define DRM_IVPU_BO_CACHED     0x00000000
199 #define DRM_IVPU_BO_UNCACHED   0x00010000
200 #define DRM_IVPU_BO_WC	       0x00020000
201 #define DRM_IVPU_BO_CACHE_MASK 0x00030000
202 
203 #define DRM_IVPU_BO_FLAGS \
204 	(DRM_IVPU_BO_HIGH_MEM | \
205 	 DRM_IVPU_BO_MAPPABLE | \
206 	 DRM_IVPU_BO_DMA_MEM | \
207 	 DRM_IVPU_BO_CACHE_MASK)
208 
209 /**
210  * struct drm_ivpu_bo_create - Create BO backed by SHMEM
211  *
212  * Create GEM buffer object allocated in SHMEM memory.
213  */
214 struct drm_ivpu_bo_create {
215 	/** @size: The size in bytes of the allocated memory */
216 	__u64 size;
217 
218 	/**
219 	 * @flags:
220 	 *
221 	 * Supported flags:
222 	 *
223 	 * %DRM_IVPU_BO_HIGH_MEM:
224 	 *
225 	 * Allocate VPU address from >4GB range.
226 	 * Buffer object with vpu address >4GB can be always accessed by the
227 	 * VPU DMA engine, but some HW generation may not be able to access
228 	 * this memory from then firmware running on the VPU management processor.
229 	 * Suitable for input, output and some scratch buffers.
230 	 *
231 	 * %DRM_IVPU_BO_MAPPABLE:
232 	 *
233 	 * Buffer object can be mapped using mmap().
234 	 *
235 	 * %DRM_IVPU_BO_CACHED:
236 	 *
237 	 * Allocated BO will be cached on host side (WB) and snooped on the VPU side.
238 	 * This is the default caching mode.
239 	 *
240 	 * %DRM_IVPU_BO_UNCACHED:
241 	 *
242 	 * Not supported. Use DRM_IVPU_BO_WC instead.
243 	 *
244 	 * %DRM_IVPU_BO_WC:
245 	 *
246 	 * Allocated BO will use write combining buffer for writes but reads will be
247 	 * uncached.
248 	 */
249 	__u32 flags;
250 
251 	/** @handle: Returned GEM object handle */
252 	__u32 handle;
253 
254 	/** @vpu_addr: Returned VPU virtual address */
255 	__u64 vpu_addr;
256 };
257 
258 /**
259  * struct drm_ivpu_bo_info - Query buffer object info
260  */
261 struct drm_ivpu_bo_info {
262 	/** @handle: Handle of the queried BO */
263 	__u32 handle;
264 
265 	/** @flags: Returned flags used to create the BO */
266 	__u32 flags;
267 
268 	/** @vpu_addr: Returned VPU virtual address */
269 	__u64 vpu_addr;
270 
271 	/**
272 	 * @mmap_offset:
273 	 *
274 	 * Returned offset to be used in mmap(). 0 in case the BO is not mappable.
275 	 */
276 	__u64 mmap_offset;
277 
278 	/** @size: Returned GEM object size, aligned to PAGE_SIZE */
279 	__u64 size;
280 };
281 
282 /* drm_ivpu_submit engines */
283 #define DRM_IVPU_ENGINE_COMPUTE 0
284 #define DRM_IVPU_ENGINE_COPY    1 /* Deprecated */
285 
286 /**
287  * struct drm_ivpu_submit - Submit commands to the VPU
288  *
289  * Execute a single command buffer on a given VPU engine.
290  * Handles to all referenced buffer objects have to be provided in @buffers_ptr.
291  *
292  * User space may wait on job completion using %DRM_IVPU_BO_WAIT ioctl.
293  */
294 struct drm_ivpu_submit {
295 	/**
296 	 * @buffers_ptr:
297 	 *
298 	 * A pointer to an u32 array of GEM handles of the BOs required for this job.
299 	 * The number of elements in the array must be equal to the value given by @buffer_count.
300 	 *
301 	 * The first BO is the command buffer. The rest of array has to contain all
302 	 * BOs referenced from the command buffer.
303 	 */
304 	__u64 buffers_ptr;
305 
306 	/** @buffer_count: Number of elements in the @buffers_ptr */
307 	__u32 buffer_count;
308 
309 	/**
310 	 * @engine: Select the engine this job should be executed on
311 	 *
312 	 * %DRM_IVPU_ENGINE_COMPUTE:
313 	 *
314 	 * Performs Deep Learning Neural Compute Inference Operations
315 	 */
316 	__u32 engine;
317 
318 	/** @flags: Reserved for future use - must be zero */
319 	__u32 flags;
320 
321 	/**
322 	 * @commands_offset:
323 	 *
324 	 * Offset inside the first buffer in @buffers_ptr containing commands
325 	 * to be executed. The offset has to be 8-byte aligned.
326 	 */
327 	__u32 commands_offset;
328 
329 	/**
330 	 * @priority:
331 	 *
332 	 * Priority to be set for related job command queue, can be one of the following:
333 	 * %DRM_IVPU_JOB_PRIORITY_DEFAULT
334 	 * %DRM_IVPU_JOB_PRIORITY_IDLE
335 	 * %DRM_IVPU_JOB_PRIORITY_NORMAL
336 	 * %DRM_IVPU_JOB_PRIORITY_FOCUS
337 	 * %DRM_IVPU_JOB_PRIORITY_REALTIME
338 	 */
339 	__u32 priority;
340 };
341 
342 /**
343  * struct drm_ivpu_cmdq_submit - Submit commands to the VPU using explicit command queue
344  *
345  * Execute a single command buffer on a given command queue.
346  * Handles to all referenced buffer objects have to be provided in @buffers_ptr.
347  *
348  * User space may wait on job completion using %DRM_IVPU_BO_WAIT ioctl.
349  */
350 struct drm_ivpu_cmdq_submit {
351 	/**
352 	 * @buffers_ptr:
353 	 *
354 	 * A pointer to an u32 array of GEM handles of the BOs required for this job.
355 	 * The number of elements in the array must be equal to the value given by @buffer_count.
356 	 *
357 	 * The first BO is the command buffer. The rest of array has to contain all
358 	 * BOs referenced from the command buffer.
359 	 */
360 	__u64 buffers_ptr;
361 
362 	/** @buffer_count: Number of elements in the @buffers_ptr */
363 	__u32 buffer_count;
364 
365 	/** @cmdq_id: ID for the command queue where job will be submitted */
366 	__u32 cmdq_id;
367 
368 	/** @flags: Reserved for future use - must be zero */
369 	__u32 flags;
370 
371 	/**
372 	 * @commands_offset:
373 	 *
374 	 * Offset inside the first buffer in @buffers_ptr containing commands
375 	 * to be executed. The offset has to be 8-byte aligned.
376 	 */
377 	__u32 commands_offset;
378 	/**
379 	 * @preempt_buffer_index:
380 	 *
381 	 * Index of the preemption buffer in the buffers_ptr array.
382 	 */
383 	__u32 preempt_buffer_index;
384 	__u32 reserved;
385 };
386 
387 /* drm_ivpu_bo_wait job status codes */
388 #define DRM_IVPU_JOB_STATUS_SUCCESS 0
389 #define DRM_IVPU_JOB_STATUS_ABORTED 256
390 
391 /**
392  * struct drm_ivpu_bo_wait - Wait for BO to become inactive
393  *
394  * Blocks until a given buffer object becomes inactive.
395  * With @timeout_ms set to 0 returns immediately.
396  */
397 struct drm_ivpu_bo_wait {
398 	/** @handle: Handle to the buffer object to be waited on */
399 	__u32 handle;
400 
401 	/** @flags: Reserved for future use - must be zero */
402 	__u32 flags;
403 
404 	/** @timeout_ns: Absolute timeout in nanoseconds (may be zero) */
405 	__s64 timeout_ns;
406 
407 	/**
408 	 * @job_status:
409 	 *
410 	 * Job status code which is updated after the job is completed.
411 	 * &DRM_IVPU_JOB_STATUS_SUCCESS or device specific error otherwise.
412 	 * Valid only if @handle points to a command buffer.
413 	 */
414 	__u32 job_status;
415 
416 	/** @pad: Padding - must be zero */
417 	__u32 pad;
418 };
419 
420 /**
421  * struct drm_ivpu_metric_streamer_start - Start collecting metric data
422  */
423 struct drm_ivpu_metric_streamer_start {
424 	/** @metric_group_mask: Indicates metric streamer instance */
425 	__u64 metric_group_mask;
426 	/** @sampling_period_ns: Sampling period in nanoseconds */
427 	__u64 sampling_period_ns;
428 	/**
429 	 * @read_period_samples:
430 	 *
431 	 * Number of samples after which user space will try to read the data.
432 	 * Reading the data after significantly longer period may cause data loss.
433 	 */
434 	__u32 read_period_samples;
435 	/** @sample_size: Returned size of a single sample in bytes */
436 	__u32 sample_size;
437 	/** @max_data_size: Returned max @data_size from %DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA */
438 	__u32 max_data_size;
439 };
440 
441 /**
442  * struct drm_ivpu_metric_streamer_get_data - Copy collected metric data
443  */
444 struct drm_ivpu_metric_streamer_get_data {
445 	/** @metric_group_mask: Indicates metric streamer instance */
446 	__u64 metric_group_mask;
447 	/** @buffer_ptr: A pointer to a destination for the copied data */
448 	__u64 buffer_ptr;
449 	/** @buffer_size: Size of the destination buffer */
450 	__u64 buffer_size;
451 	/**
452 	 * @data_size: Returned size of copied metric data
453 	 *
454 	 * If the @buffer_size is zero, returns the amount of data ready to be copied.
455 	 */
456 	__u64 data_size;
457 };
458 
459 /* Command queue flags */
460 #define DRM_IVPU_CMDQ_FLAG_TURBO 0x00000001
461 
462 /**
463  * struct drm_ivpu_cmdq_create - Create command queue for job submission
464  */
465 struct drm_ivpu_cmdq_create {
466 	/** @cmdq_id: Returned ID of created command queue */
467 	__u32 cmdq_id;
468 	/**
469 	 * @priority:
470 	 *
471 	 * Priority to be set for related job command queue, can be one of the following:
472 	 * %DRM_IVPU_JOB_PRIORITY_DEFAULT
473 	 * %DRM_IVPU_JOB_PRIORITY_IDLE
474 	 * %DRM_IVPU_JOB_PRIORITY_NORMAL
475 	 * %DRM_IVPU_JOB_PRIORITY_FOCUS
476 	 * %DRM_IVPU_JOB_PRIORITY_REALTIME
477 	 */
478 	__u32 priority;
479 	/**
480 	 * @flags:
481 	 *
482 	 * Supported flags:
483 	 *
484 	 * %DRM_IVPU_CMDQ_FLAG_TURBO
485 	 *
486 	 * Enable low-latency mode for the command queue. The NPU will maximize performance
487 	 * when executing jobs from such queue at the cost of increased power usage.
488 	 */
489 	__u32 flags;
490 };
491 
492 /**
493  * struct drm_ivpu_cmdq_destroy - Destroy a command queue
494  */
495 struct drm_ivpu_cmdq_destroy {
496 	/** @cmdq_id: ID of command queue to destroy */
497 	__u32 cmdq_id;
498 };
499 
500 /**
501  * struct drm_ivpu_metric_streamer_stop - Stop collecting metric data
502  */
503 struct drm_ivpu_metric_streamer_stop {
504 	/** @metric_group_mask: Indicates metric streamer instance */
505 	__u64 metric_group_mask;
506 };
507 
508 #if defined(__cplusplus)
509 }
510 #endif
511 
512 #endif /* __UAPI_IVPU_DRM_H__ */
513