1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2025 Intel Corporation
4 */
5
6 #include <linux/anon_inodes.h>
7 #include <linux/fs.h>
8 #include <linux/poll.h>
9 #include <linux/types.h>
10
11 #include <drm/drm_drv.h>
12 #include <generated/xe_wa_oob.h>
13 #include <uapi/drm/xe_drm.h>
14
15 #include "xe_bo.h"
16 #include "xe_device.h"
17 #include "xe_eu_stall.h"
18 #include "xe_force_wake.h"
19 #include "xe_gt_mcr.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_topology.h"
22 #include "xe_macros.h"
23 #include "xe_observation.h"
24 #include "xe_pm.h"
25 #include "xe_trace.h"
26 #include "xe_wa.h"
27
28 #include "regs/xe_eu_stall_regs.h"
29 #include "regs/xe_gt_regs.h"
30
31 #define POLL_PERIOD_MS 5
32
33 static size_t per_xecore_buf_size = SZ_512K;
34
35 struct per_xecore_buf {
36 /* Buffer vaddr */
37 u8 *vaddr;
38 /* Write pointer */
39 u32 write;
40 /* Read pointer */
41 u32 read;
42 };
43
44 struct xe_eu_stall_data_stream {
45 bool pollin;
46 bool enabled;
47 int wait_num_reports;
48 int sampling_rate_mult;
49 wait_queue_head_t poll_wq;
50 size_t data_record_size;
51 size_t per_xecore_buf_size;
52 unsigned int fw_ref;
53
54 struct xe_gt *gt;
55 struct xe_bo *bo;
56 /* Lock to protect data buffer pointers */
57 struct mutex xecore_buf_lock;
58 struct per_xecore_buf *xecore_buf;
59 struct {
60 bool reported_to_user;
61 xe_dss_mask_t mask;
62 } data_drop;
63 struct delayed_work buf_poll_work;
64 };
65
66 struct xe_eu_stall_gt {
67 /* Lock to protect stream */
68 struct mutex stream_lock;
69 /* EU stall data stream */
70 struct xe_eu_stall_data_stream *stream;
71 /* Workqueue to schedule buffer pointers polling work */
72 struct workqueue_struct *buf_ptr_poll_wq;
73 };
74
75 /**
76 * struct eu_stall_open_properties - EU stall sampling properties received
77 * from user space at open.
78 * @sampling_rate_mult: EU stall sampling rate multiplier.
79 * HW will sample every (sampling_rate_mult x 251) cycles.
80 * @wait_num_reports: Minimum number of EU stall data reports to unblock poll().
81 * @gt: GT on which EU stall data will be captured.
82 */
83 struct eu_stall_open_properties {
84 int sampling_rate_mult;
85 int wait_num_reports;
86 struct xe_gt *gt;
87 };
88
89 /*
90 * EU stall data format for PVC
91 */
92 struct xe_eu_stall_data_pvc {
93 __u64 ip_addr:29; /* Bits 0 to 28 */
94 __u64 active_count:8; /* Bits 29 to 36 */
95 __u64 other_count:8; /* Bits 37 to 44 */
96 __u64 control_count:8; /* Bits 45 to 52 */
97 __u64 pipestall_count:8; /* Bits 53 to 60 */
98 __u64 send_count:8; /* Bits 61 to 68 */
99 __u64 dist_acc_count:8; /* Bits 69 to 76 */
100 __u64 sbid_count:8; /* Bits 77 to 84 */
101 __u64 sync_count:8; /* Bits 85 to 92 */
102 __u64 inst_fetch_count:8; /* Bits 93 to 100 */
103 __u64 unused_bits:27;
104 __u64 unused[6];
105 } __packed;
106
107 /*
108 * EU stall data format for Xe2 arch GPUs (LNL, BMG).
109 */
110 struct xe_eu_stall_data_xe2 {
111 __u64 ip_addr:29; /* Bits 0 to 28 */
112 __u64 tdr_count:8; /* Bits 29 to 36 */
113 __u64 other_count:8; /* Bits 37 to 44 */
114 __u64 control_count:8; /* Bits 45 to 52 */
115 __u64 pipestall_count:8; /* Bits 53 to 60 */
116 __u64 send_count:8; /* Bits 61 to 68 */
117 __u64 dist_acc_count:8; /* Bits 69 to 76 */
118 __u64 sbid_count:8; /* Bits 77 to 84 */
119 __u64 sync_count:8; /* Bits 85 to 92 */
120 __u64 inst_fetch_count:8; /* Bits 93 to 100 */
121 __u64 active_count:8; /* Bits 101 to 108 */
122 __u64 ex_id:3; /* Bits 109 to 111 */
123 __u64 end_flag:1; /* Bit 112 */
124 __u64 unused_bits:15;
125 __u64 unused[6];
126 } __packed;
127
128 /*
129 * EU stall data format for Xe3p arch GPUs.
130 */
131 struct xe_eu_stall_data_xe3p {
132 __u64 ip_addr:61; /* Bits 0 to 60 */
133 __u64 tdr_count:8; /* Bits 61 to 68 */
134 __u64 other_count:8; /* Bits 69 to 76 */
135 __u64 control_count:8; /* Bits 77 to 84 */
136 __u64 pipestall_count:8; /* Bits 85 to 92 */
137 __u64 send_count:8; /* Bits 93 to 100 */
138 __u64 dist_acc_count:8; /* Bits 101 to 108 */
139 __u64 sbid_count:8; /* Bits 109 to 116 */
140 __u64 sync_count:8; /* Bits 117 to 124 */
141 __u64 inst_fetch_count:8; /* Bits 125 to 132 */
142 __u64 active_count:8; /* Bits 133 to 140 */
143 __u64 ex_id:3; /* Bits 141 to 143 */
144 __u64 end_flag:1; /* Bit 144 */
145 __u64 unused_bits:47;
146 __u64 unused[5];
147 } __packed;
148
149 const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7};
150
151 /**
152 * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information.
153 *
154 * @num_rates: Pointer to a u32 to return the number of sampling rates.
155 * @rates: double u64 pointer to point to an array of sampling rates.
156 *
157 * Stores the number of sampling rates and pointer to the array of
158 * sampling rates in the input pointers.
159 *
160 * Returns: Size of the EU stall sampling rates array.
161 */
xe_eu_stall_get_sampling_rates(u32 * num_rates,const u64 ** rates)162 size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates)
163 {
164 *num_rates = ARRAY_SIZE(eu_stall_sampling_rates);
165 *rates = eu_stall_sampling_rates;
166
167 return sizeof(eu_stall_sampling_rates);
168 }
169
170 /**
171 * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size.
172 *
173 * Returns: The per XeCore buffer size used to allocate the per GT
174 * EU stall data buffer.
175 */
xe_eu_stall_get_per_xecore_buf_size(void)176 size_t xe_eu_stall_get_per_xecore_buf_size(void)
177 {
178 return per_xecore_buf_size;
179 }
180
181 /**
182 * xe_eu_stall_data_record_size - get EU stall data record size.
183 *
184 * @xe: Pointer to a Xe device.
185 *
186 * Returns: EU stall data record size.
187 */
xe_eu_stall_data_record_size(struct xe_device * xe)188 size_t xe_eu_stall_data_record_size(struct xe_device *xe)
189 {
190 size_t record_size = 0;
191
192 if (GRAPHICS_VER(xe) >= 35)
193 record_size = sizeof(struct xe_eu_stall_data_xe3p);
194 else if (GRAPHICS_VER(xe) >= 20)
195 record_size = sizeof(struct xe_eu_stall_data_xe2);
196 else if (xe->info.platform == XE_PVC)
197 record_size = sizeof(struct xe_eu_stall_data_pvc);
198
199
200 xe_assert(xe, is_power_of_2(record_size));
201
202 return record_size;
203 }
204
205 /**
206 * num_data_rows - Return the number of EU stall data rows of 64B each
207 * for a given data size.
208 *
209 * @data_size: EU stall data size
210 */
num_data_rows(u32 data_size)211 static u32 num_data_rows(u32 data_size)
212 {
213 return data_size >> 6;
214 }
215
xe_eu_stall_fini(void * arg)216 static void xe_eu_stall_fini(void *arg)
217 {
218 struct xe_gt *gt = arg;
219
220 destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq);
221 mutex_destroy(>->eu_stall->stream_lock);
222 kfree(gt->eu_stall);
223 }
224
225 /**
226 * xe_eu_stall_init() - Allocate and initialize GT level EU stall data
227 * structure xe_eu_stall_gt within struct xe_gt.
228 *
229 * @gt: GT being initialized.
230 *
231 * Returns: zero on success or a negative error code.
232 */
xe_eu_stall_init(struct xe_gt * gt)233 int xe_eu_stall_init(struct xe_gt *gt)
234 {
235 struct xe_device *xe = gt_to_xe(gt);
236 int ret;
237
238 if (!xe_eu_stall_supported_on_platform(xe))
239 return 0;
240
241 gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL);
242 if (!gt->eu_stall) {
243 ret = -ENOMEM;
244 goto exit;
245 }
246
247 mutex_init(>->eu_stall->stream_lock);
248
249 gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0);
250 if (!gt->eu_stall->buf_ptr_poll_wq) {
251 ret = -ENOMEM;
252 goto exit_free;
253 }
254
255 return devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt);
256 exit_free:
257 mutex_destroy(>->eu_stall->stream_lock);
258 kfree(gt->eu_stall);
259 exit:
260 return ret;
261 }
262
set_prop_eu_stall_sampling_rate(struct xe_device * xe,u64 value,struct eu_stall_open_properties * props)263 static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value,
264 struct eu_stall_open_properties *props)
265 {
266 value = div_u64(value, 251);
267 if (value == 0 || value > 7) {
268 drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value);
269 return -EINVAL;
270 }
271 props->sampling_rate_mult = value;
272 return 0;
273 }
274
set_prop_eu_stall_wait_num_reports(struct xe_device * xe,u64 value,struct eu_stall_open_properties * props)275 static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value,
276 struct eu_stall_open_properties *props)
277 {
278 props->wait_num_reports = value;
279
280 return 0;
281 }
282
set_prop_eu_stall_gt_id(struct xe_device * xe,u64 value,struct eu_stall_open_properties * props)283 static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value,
284 struct eu_stall_open_properties *props)
285 {
286 struct xe_gt *gt = xe_device_get_gt(xe, value);
287
288 if (!gt) {
289 drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value);
290 return -EINVAL;
291 }
292 props->gt = gt;
293 return 0;
294 }
295
296 typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value,
297 struct eu_stall_open_properties *props);
298
299 static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = {
300 [DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate,
301 [DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports,
302 [DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id,
303 };
304
xe_eu_stall_user_ext_set_property(struct xe_device * xe,u64 extension,struct eu_stall_open_properties * props)305 static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension,
306 struct eu_stall_open_properties *props)
307 {
308 u64 __user *address = u64_to_user_ptr(extension);
309 struct drm_xe_ext_set_property ext;
310 int err;
311 u32 idx;
312
313 err = copy_from_user(&ext, address, sizeof(ext));
314 if (XE_IOCTL_DBG(xe, err))
315 return -EFAULT;
316
317 if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) ||
318 XE_IOCTL_DBG(xe, !ext.property) || XE_IOCTL_DBG(xe, ext.pad))
319 return -EINVAL;
320
321 idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs));
322 return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props);
323 }
324
325 typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension,
326 struct eu_stall_open_properties *props);
327 static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = {
328 [DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property,
329 };
330
331 #define MAX_USER_EXTENSIONS 5
xe_eu_stall_user_extensions(struct xe_device * xe,u64 extension,int ext_number,struct eu_stall_open_properties * props)332 static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension,
333 int ext_number, struct eu_stall_open_properties *props)
334 {
335 u64 __user *address = u64_to_user_ptr(extension);
336 struct drm_xe_user_extension ext;
337 int err;
338 u32 idx;
339
340 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
341 return -E2BIG;
342
343 err = copy_from_user(&ext, address, sizeof(ext));
344 if (XE_IOCTL_DBG(xe, err))
345 return -EFAULT;
346
347 if (XE_IOCTL_DBG(xe, ext.pad) ||
348 XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs)))
349 return -EINVAL;
350
351 idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs));
352 err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props);
353 if (XE_IOCTL_DBG(xe, err))
354 return err;
355
356 if (ext.next_extension)
357 return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props);
358
359 return 0;
360 }
361
362 /**
363 * buf_data_size - Calculate the number of bytes in a circular buffer
364 * given the read and write pointers and the size of
365 * the buffer.
366 *
367 * @buf_size: Size of the circular buffer
368 * @read_ptr: Read pointer with an additional overflow bit
369 * @write_ptr: Write pointer with an additional overflow bit
370 *
371 * Since the read and write pointers have an additional overflow bit,
372 * this function calculates the offsets from the pointers and use the
373 * offsets to calculate the data size in the buffer.
374 *
375 * Returns: number of bytes of data in the buffer
376 */
buf_data_size(size_t buf_size,u32 read_ptr,u32 write_ptr)377 static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr)
378 {
379 u32 read_offset, write_offset, size = 0;
380
381 if (read_ptr == write_ptr)
382 goto exit;
383
384 read_offset = read_ptr & (buf_size - 1);
385 write_offset = write_ptr & (buf_size - 1);
386
387 if (write_offset > read_offset)
388 size = write_offset - read_offset;
389 else
390 size = buf_size - read_offset + write_offset;
391 exit:
392 return size;
393 }
394
395 /**
396 * eu_stall_data_buf_poll - Poll for EU stall data in the buffer.
397 *
398 * @stream: xe EU stall data stream instance
399 *
400 * Returns: true if the EU stall buffer contains minimum stall data as
401 * specified by the event report count, else false.
402 */
eu_stall_data_buf_poll(struct xe_eu_stall_data_stream * stream)403 static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream)
404 {
405 u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0;
406 u32 buf_size = stream->per_xecore_buf_size;
407 struct per_xecore_buf *xecore_buf;
408 struct xe_gt *gt = stream->gt;
409 bool min_data_present = false;
410 u16 group, instance;
411 unsigned int xecore;
412
413 mutex_lock(&stream->xecore_buf_lock);
414 for_each_dss_steering(xecore, gt, group, instance) {
415 xecore_buf = &stream->xecore_buf[xecore];
416 read_ptr = xecore_buf->read;
417 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT,
418 group, instance);
419 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
420 write_ptr <<= 6;
421 write_ptr &= ((buf_size << 1) - 1);
422 if (!min_data_present) {
423 total_data += buf_data_size(buf_size, read_ptr, write_ptr);
424 if (num_data_rows(total_data) >= stream->wait_num_reports)
425 min_data_present = true;
426 }
427 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
428 set_bit(xecore, stream->data_drop.mask);
429 xecore_buf->write = write_ptr;
430 }
431 mutex_unlock(&stream->xecore_buf_lock);
432
433 return min_data_present;
434 }
435
clear_dropped_eviction_line_bit(struct xe_gt * gt,u16 group,u16 instance)436 static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance)
437 {
438 struct xe_device *xe = gt_to_xe(gt);
439 u32 write_ptr_reg;
440
441 /* On PVC, the overflow bit has to be cleared by writing 1 to it.
442 * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it.
443 */
444 if (GRAPHICS_VER(xe) >= 20)
445 write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
446 else
447 write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
448
449 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance);
450 }
451
xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream * stream,char __user * buf,size_t count,size_t * total_data_size,struct xe_gt * gt,u16 group,u16 instance,unsigned int xecore)452 static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream,
453 char __user *buf, size_t count,
454 size_t *total_data_size, struct xe_gt *gt,
455 u16 group, u16 instance, unsigned int xecore)
456 {
457 size_t read_data_size, copy_size, buf_size;
458 u32 read_ptr_reg, read_ptr, write_ptr;
459 u8 *xecore_start_vaddr, *read_vaddr;
460 struct per_xecore_buf *xecore_buf;
461 u32 read_offset, write_offset;
462
463 /* Hardware increments the read and write pointers such that they can
464 * overflow into one additional bit. For example, a 256KB size buffer
465 * offset pointer needs 18 bits. But HW uses 19 bits for the read and
466 * write pointers. This technique avoids wasting a slot in the buffer.
467 * Read and write offsets are calculated from the pointers in order to
468 * check if the write pointer has wrapped around the array.
469 */
470 xecore_buf = &stream->xecore_buf[xecore];
471 xecore_start_vaddr = xecore_buf->vaddr;
472 read_ptr = xecore_buf->read;
473 write_ptr = xecore_buf->write;
474 buf_size = stream->per_xecore_buf_size;
475
476 read_data_size = buf_data_size(buf_size, read_ptr, write_ptr);
477 /* Read only the data that the user space buffer can accommodate */
478 read_data_size = min_t(size_t, count - *total_data_size, read_data_size);
479 if (read_data_size == 0)
480 goto exit_drop;
481
482 read_offset = read_ptr & (buf_size - 1);
483 write_offset = write_ptr & (buf_size - 1);
484 read_vaddr = xecore_start_vaddr + read_offset;
485
486 if (write_offset > read_offset) {
487 if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size))
488 return -EFAULT;
489 } else {
490 if (read_data_size >= buf_size - read_offset)
491 copy_size = buf_size - read_offset;
492 else
493 copy_size = read_data_size;
494 if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size))
495 return -EFAULT;
496 if (copy_to_user(buf + *total_data_size + copy_size,
497 xecore_start_vaddr, read_data_size - copy_size))
498 return -EFAULT;
499 }
500
501 *total_data_size += read_data_size;
502 read_ptr += read_data_size;
503
504 /* Read pointer can overflow into one additional bit */
505 read_ptr &= (buf_size << 1) - 1;
506 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6));
507 read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
508 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
509 xecore_buf->read = read_ptr;
510 trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr,
511 read_data_size, *total_data_size);
512 exit_drop:
513 /* Clear drop bit (if set) after any data was read or if the buffer was empty.
514 * Drop bit can be set even if the buffer is empty as the buffer may have been emptied
515 * in the previous read() and the data drop bit was set during the previous read().
516 */
517 if (test_bit(xecore, stream->data_drop.mask)) {
518 clear_dropped_eviction_line_bit(gt, group, instance);
519 clear_bit(xecore, stream->data_drop.mask);
520 }
521 return 0;
522 }
523
524 /**
525 * xe_eu_stall_stream_read_locked - copy EU stall counters data from the
526 * per xecore buffers to the userspace buffer
527 * @stream: A stream opened for EU stall count metrics
528 * @file: An xe EU stall data stream file
529 * @buf: destination buffer given by userspace
530 * @count: the number of bytes userspace wants to read
531 *
532 * Returns: Number of bytes copied or a negative error code
533 * If we've successfully copied any data then reporting that takes
534 * precedence over any internal error status, so the data isn't lost.
535 */
xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream * stream,struct file * file,char __user * buf,size_t count)536 static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream,
537 struct file *file, char __user *buf,
538 size_t count)
539 {
540 struct xe_gt *gt = stream->gt;
541 size_t total_size = 0;
542 u16 group, instance;
543 unsigned int xecore;
544 int ret = 0;
545
546 mutex_lock(&stream->xecore_buf_lock);
547 if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) {
548 if (!stream->data_drop.reported_to_user) {
549 stream->data_drop.reported_to_user = true;
550 xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n",
551 XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask);
552 mutex_unlock(&stream->xecore_buf_lock);
553 return -EIO;
554 }
555 stream->data_drop.reported_to_user = false;
556 }
557
558 for_each_dss_steering(xecore, gt, group, instance) {
559 ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size,
560 gt, group, instance, xecore);
561 if (ret || count == total_size)
562 break;
563 }
564 mutex_unlock(&stream->xecore_buf_lock);
565 return total_size ?: (ret ?: -EAGAIN);
566 }
567
568 /*
569 * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE
570 * before calling read().
571 *
572 * Returns: The number of bytes copied or a negative error code on failure.
573 * -EIO if HW drops any EU stall data when the buffer is full.
574 */
xe_eu_stall_stream_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)575 static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf,
576 size_t count, loff_t *ppos)
577 {
578 struct xe_eu_stall_data_stream *stream = file->private_data;
579 struct xe_gt *gt = stream->gt;
580 ssize_t ret, aligned_count;
581
582 aligned_count = ALIGN_DOWN(count, stream->data_record_size);
583 if (aligned_count == 0)
584 return -EINVAL;
585
586 if (!stream->enabled) {
587 xe_gt_dbg(gt, "EU stall data stream not enabled to read\n");
588 return -EINVAL;
589 }
590
591 if (!(file->f_flags & O_NONBLOCK)) {
592 do {
593 ret = wait_event_interruptible(stream->poll_wq, stream->pollin);
594 if (ret)
595 return -EINTR;
596
597 mutex_lock(>->eu_stall->stream_lock);
598 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
599 mutex_unlock(>->eu_stall->stream_lock);
600 } while (ret == -EAGAIN);
601 } else {
602 mutex_lock(>->eu_stall->stream_lock);
603 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
604 mutex_unlock(>->eu_stall->stream_lock);
605 }
606
607 /*
608 * This may not work correctly if the user buffer is very small.
609 * We don't want to block the next read() when there is data in the buffer
610 * now, but couldn't be accommodated in the small user buffer.
611 */
612 stream->pollin = false;
613
614 return ret;
615 }
616
xe_eu_stall_stream_free(struct xe_eu_stall_data_stream * stream)617 static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream)
618 {
619 struct xe_gt *gt = stream->gt;
620
621 mutex_destroy(&stream->xecore_buf_lock);
622 gt->eu_stall->stream = NULL;
623 kfree(stream);
624 }
625
xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream * stream)626 static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream)
627 {
628 xe_bo_unpin_map_no_vm(stream->bo);
629 kfree(stream->xecore_buf);
630 }
631
xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream * stream,u16 last_xecore)632 static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
633 u16 last_xecore)
634 {
635 struct xe_tile *tile = stream->gt->tile;
636 struct xe_bo *bo;
637 u32 size;
638
639 stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL);
640 if (!stream->xecore_buf)
641 return -ENOMEM;
642
643 size = stream->per_xecore_buf_size * last_xecore;
644
645 bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel,
646 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false);
647 if (IS_ERR(bo)) {
648 kfree(stream->xecore_buf);
649 return PTR_ERR(bo);
650 }
651
652 XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64));
653 stream->bo = bo;
654
655 return 0;
656 }
657
xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream * stream)658 static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
659 {
660 u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value;
661 struct per_xecore_buf *xecore_buf;
662 struct xe_gt *gt = stream->gt;
663 u16 group, instance;
664 int xecore;
665
666 /* Take runtime pm ref and forcewake to disable RC6 */
667 xe_pm_runtime_get(gt_to_xe(gt));
668 stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER);
669 if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FW_RENDER)) {
670 xe_gt_err(gt, "Failed to get RENDER forcewake\n");
671 xe_pm_runtime_put(gt_to_xe(gt));
672 return -ETIMEDOUT;
673 }
674
675 if (XE_GT_WA(gt, 22016596838))
676 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
677 _MASKED_BIT_ENABLE(DISABLE_DOP_GATING));
678
679 for_each_dss_steering(xecore, gt, group, instance) {
680 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance);
681 /* Clear any drop bits set and not cleared in the previous session. */
682 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
683 clear_dropped_eviction_line_bit(gt, group, instance);
684 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
685 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr);
686 read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
687 /* Initialize the read pointer to the write pointer */
688 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
689 write_ptr <<= 6;
690 write_ptr &= (stream->per_xecore_buf_size << 1) - 1;
691 xecore_buf = &stream->xecore_buf[xecore];
692 xecore_buf->write = write_ptr;
693 xecore_buf->read = write_ptr;
694 }
695 stream->data_drop.reported_to_user = false;
696 bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS);
697
698 reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE,
699 REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) |
700 REG_FIELD_PREP(EUSTALL_SAMPLE_RATE,
701 stream->sampling_rate_mult));
702 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value);
703 /* GGTT addresses can never be > 32 bits */
704 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0);
705 reg_value = xe_bo_ggtt_addr(stream->bo);
706 reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ,
707 stream->per_xecore_buf_size / SZ_256K);
708 reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING;
709 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value);
710
711 return 0;
712 }
713
eu_stall_data_buf_poll_work_fn(struct work_struct * work)714 static void eu_stall_data_buf_poll_work_fn(struct work_struct *work)
715 {
716 struct xe_eu_stall_data_stream *stream =
717 container_of(work, typeof(*stream), buf_poll_work.work);
718 struct xe_gt *gt = stream->gt;
719
720 if (eu_stall_data_buf_poll(stream)) {
721 stream->pollin = true;
722 wake_up(&stream->poll_wq);
723 }
724 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
725 &stream->buf_poll_work,
726 msecs_to_jiffies(POLL_PERIOD_MS));
727 }
728
xe_eu_stall_stream_init(struct xe_eu_stall_data_stream * stream,struct eu_stall_open_properties * props)729 static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
730 struct eu_stall_open_properties *props)
731 {
732 unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores;
733 struct per_xecore_buf *xecore_buf;
734 struct xe_gt *gt = stream->gt;
735 xe_dss_mask_t all_xecores;
736 u16 group, instance;
737 u32 vaddr_offset;
738 int ret;
739
740 bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
741 XE_MAX_DSS_FUSE_BITS);
742 num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS);
743 last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1;
744
745 max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores);
746 if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) {
747 xe_gt_dbg(gt, "Invalid EU stall event report count %u\n",
748 props->wait_num_reports);
749 xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n",
750 max_wait_num_reports);
751 return -EINVAL;
752 }
753
754 init_waitqueue_head(&stream->poll_wq);
755 mutex_init(&stream->xecore_buf_lock);
756 INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn);
757 stream->per_xecore_buf_size = per_xecore_buf_size;
758 stream->sampling_rate_mult = props->sampling_rate_mult;
759 stream->wait_num_reports = props->wait_num_reports;
760 stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
761
762 ret = xe_eu_stall_data_buf_alloc(stream, last_xecore);
763 if (ret)
764 return ret;
765
766 for_each_dss_steering(xecore, gt, group, instance) {
767 xecore_buf = &stream->xecore_buf[xecore];
768 vaddr_offset = xecore * stream->per_xecore_buf_size;
769 xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset;
770 }
771 return 0;
772 }
773
xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream * stream,struct file * file,poll_table * wait)774 static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream,
775 struct file *file, poll_table *wait)
776 {
777 __poll_t events = 0;
778
779 poll_wait(file, &stream->poll_wq, wait);
780
781 if (stream->pollin)
782 events |= EPOLLIN;
783
784 return events;
785 }
786
xe_eu_stall_stream_poll(struct file * file,poll_table * wait)787 static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait)
788 {
789 struct xe_eu_stall_data_stream *stream = file->private_data;
790 struct xe_gt *gt = stream->gt;
791 __poll_t ret;
792
793 mutex_lock(>->eu_stall->stream_lock);
794 ret = xe_eu_stall_stream_poll_locked(stream, file, wait);
795 mutex_unlock(>->eu_stall->stream_lock);
796
797 return ret;
798 }
799
xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream * stream)800 static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream)
801 {
802 struct xe_gt *gt = stream->gt;
803 int ret = 0;
804
805 if (stream->enabled)
806 return ret;
807
808 stream->enabled = true;
809
810 ret = xe_eu_stall_stream_enable(stream);
811
812 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
813 &stream->buf_poll_work,
814 msecs_to_jiffies(POLL_PERIOD_MS));
815 return ret;
816 }
817
xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream * stream)818 static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
819 {
820 struct xe_gt *gt = stream->gt;
821
822 if (!stream->enabled)
823 return 0;
824
825 stream->enabled = false;
826
827 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0);
828
829 cancel_delayed_work_sync(&stream->buf_poll_work);
830
831 if (XE_GT_WA(gt, 22016596838))
832 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
833 _MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
834
835 xe_force_wake_put(gt_to_fw(gt), stream->fw_ref);
836 xe_pm_runtime_put(gt_to_xe(gt));
837
838 return 0;
839 }
840
xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream * stream,unsigned int cmd,unsigned long arg)841 static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream,
842 unsigned int cmd, unsigned long arg)
843 {
844 switch (cmd) {
845 case DRM_XE_OBSERVATION_IOCTL_ENABLE:
846 return xe_eu_stall_enable_locked(stream);
847 case DRM_XE_OBSERVATION_IOCTL_DISABLE:
848 return xe_eu_stall_disable_locked(stream);
849 }
850
851 return -EINVAL;
852 }
853
xe_eu_stall_stream_ioctl(struct file * file,unsigned int cmd,unsigned long arg)854 static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
855 {
856 struct xe_eu_stall_data_stream *stream = file->private_data;
857 struct xe_gt *gt = stream->gt;
858 long ret;
859
860 mutex_lock(>->eu_stall->stream_lock);
861 ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg);
862 mutex_unlock(>->eu_stall->stream_lock);
863
864 return ret;
865 }
866
xe_eu_stall_stream_close(struct inode * inode,struct file * file)867 static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
868 {
869 struct xe_eu_stall_data_stream *stream = file->private_data;
870 struct xe_gt *gt = stream->gt;
871
872 drm_dev_put(>->tile->xe->drm);
873
874 mutex_lock(>->eu_stall->stream_lock);
875 xe_eu_stall_disable_locked(stream);
876 xe_eu_stall_data_buf_destroy(stream);
877 xe_eu_stall_stream_free(stream);
878 mutex_unlock(>->eu_stall->stream_lock);
879
880 return 0;
881 }
882
883 static const struct file_operations fops_eu_stall = {
884 .owner = THIS_MODULE,
885 .llseek = noop_llseek,
886 .release = xe_eu_stall_stream_close,
887 .poll = xe_eu_stall_stream_poll,
888 .read = xe_eu_stall_stream_read,
889 .unlocked_ioctl = xe_eu_stall_stream_ioctl,
890 .compat_ioctl = xe_eu_stall_stream_ioctl,
891 };
892
xe_eu_stall_stream_open_locked(struct drm_device * dev,struct eu_stall_open_properties * props,struct drm_file * file)893 static int xe_eu_stall_stream_open_locked(struct drm_device *dev,
894 struct eu_stall_open_properties *props,
895 struct drm_file *file)
896 {
897 struct xe_eu_stall_data_stream *stream;
898 struct xe_gt *gt = props->gt;
899 unsigned long f_flags = 0;
900 int ret, stream_fd;
901
902 /* Only one session can be active at any time */
903 if (gt->eu_stall->stream) {
904 xe_gt_dbg(gt, "EU stall sampling session already active\n");
905 return -EBUSY;
906 }
907
908 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
909 if (!stream)
910 return -ENOMEM;
911
912 gt->eu_stall->stream = stream;
913 stream->gt = gt;
914
915 ret = xe_eu_stall_stream_init(stream, props);
916 if (ret) {
917 xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret);
918 goto err_free;
919 }
920
921 stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags);
922 if (stream_fd < 0) {
923 ret = stream_fd;
924 xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret);
925 goto err_destroy;
926 }
927
928 /* Take a reference on the driver that will be kept with stream_fd
929 * until its release.
930 */
931 drm_dev_get(>->tile->xe->drm);
932
933 return stream_fd;
934
935 err_destroy:
936 xe_eu_stall_data_buf_destroy(stream);
937 err_free:
938 xe_eu_stall_stream_free(stream);
939 return ret;
940 }
941
942 /**
943 * xe_eu_stall_stream_open - Open a xe EU stall data stream fd
944 *
945 * @dev: DRM device pointer
946 * @data: pointer to first struct @drm_xe_ext_set_property in
947 * the chain of input properties from the user space.
948 * @file: DRM file pointer
949 *
950 * This function opens a EU stall data stream with input properties from
951 * the user space.
952 *
953 * Returns: EU stall data stream fd on success or a negative error code.
954 */
xe_eu_stall_stream_open(struct drm_device * dev,u64 data,struct drm_file * file)955 int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file)
956 {
957 struct xe_device *xe = to_xe_device(dev);
958 struct eu_stall_open_properties props = {};
959 int ret;
960
961 if (!xe_eu_stall_supported_on_platform(xe)) {
962 drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
963 return -ENODEV;
964 }
965
966 if (xe_observation_paranoid && !perfmon_capable()) {
967 drm_dbg(&xe->drm, "Insufficient privileges for EU stall monitoring\n");
968 return -EACCES;
969 }
970
971 /* Initialize and set default values */
972 props.wait_num_reports = 1;
973 props.sampling_rate_mult = 4;
974
975 ret = xe_eu_stall_user_extensions(xe, data, 0, &props);
976 if (ret)
977 return ret;
978
979 if (!props.gt) {
980 drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n");
981 return -EINVAL;
982 }
983
984 mutex_lock(&props.gt->eu_stall->stream_lock);
985 ret = xe_eu_stall_stream_open_locked(dev, &props, file);
986 mutex_unlock(&props.gt->eu_stall->stream_lock);
987
988 return ret;
989 }
990