1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2025 Intel Corporation
4 */
5
6 #include <linux/anon_inodes.h>
7 #include <linux/fs.h>
8 #include <linux/poll.h>
9 #include <linux/types.h>
10
11 #include <drm/drm_drv.h>
12 #include <generated/xe_wa_oob.h>
13 #include <uapi/drm/xe_drm.h>
14
15 #include "xe_bo.h"
16 #include "xe_device.h"
17 #include "xe_eu_stall.h"
18 #include "xe_force_wake.h"
19 #include "xe_gt_mcr.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_topology.h"
22 #include "xe_macros.h"
23 #include "xe_observation.h"
24 #include "xe_pm.h"
25 #include "xe_trace.h"
26 #include "xe_wa.h"
27
28 #include "regs/xe_eu_stall_regs.h"
29 #include "regs/xe_gt_regs.h"
30
31 #define POLL_PERIOD_MS 5
32
33 static size_t per_xecore_buf_size = SZ_512K;
34
35 struct per_xecore_buf {
36 /* Buffer vaddr */
37 u8 *vaddr;
38 /* Write pointer */
39 u32 write;
40 /* Read pointer */
41 u32 read;
42 };
43
44 struct xe_eu_stall_data_stream {
45 bool pollin;
46 bool enabled;
47 int wait_num_reports;
48 int sampling_rate_mult;
49 wait_queue_head_t poll_wq;
50 size_t data_record_size;
51 size_t per_xecore_buf_size;
52
53 struct xe_gt *gt;
54 struct xe_bo *bo;
55 /* Lock to protect data buffer pointers */
56 struct mutex xecore_buf_lock;
57 struct per_xecore_buf *xecore_buf;
58 struct {
59 bool reported_to_user;
60 xe_dss_mask_t mask;
61 } data_drop;
62 struct delayed_work buf_poll_work;
63 };
64
65 struct xe_eu_stall_gt {
66 /* Lock to protect stream */
67 struct mutex stream_lock;
68 /* EU stall data stream */
69 struct xe_eu_stall_data_stream *stream;
70 /* Workqueue to schedule buffer pointers polling work */
71 struct workqueue_struct *buf_ptr_poll_wq;
72 };
73
74 /**
75 * struct eu_stall_open_properties - EU stall sampling properties received
76 * from user space at open.
77 * @sampling_rate_mult: EU stall sampling rate multiplier.
78 * HW will sample every (sampling_rate_mult x 251) cycles.
79 * @wait_num_reports: Minimum number of EU stall data reports to unblock poll().
80 * @gt: GT on which EU stall data will be captured.
81 */
82 struct eu_stall_open_properties {
83 int sampling_rate_mult;
84 int wait_num_reports;
85 struct xe_gt *gt;
86 };
87
88 /*
89 * EU stall data format for PVC
90 */
91 struct xe_eu_stall_data_pvc {
92 __u64 ip_addr:29; /* Bits 0 to 28 */
93 __u64 active_count:8; /* Bits 29 to 36 */
94 __u64 other_count:8; /* Bits 37 to 44 */
95 __u64 control_count:8; /* Bits 45 to 52 */
96 __u64 pipestall_count:8; /* Bits 53 to 60 */
97 __u64 send_count:8; /* Bits 61 to 68 */
98 __u64 dist_acc_count:8; /* Bits 69 to 76 */
99 __u64 sbid_count:8; /* Bits 77 to 84 */
100 __u64 sync_count:8; /* Bits 85 to 92 */
101 __u64 inst_fetch_count:8; /* Bits 93 to 100 */
102 __u64 unused_bits:27;
103 __u64 unused[6];
104 } __packed;
105
106 /*
107 * EU stall data format for Xe2 arch GPUs (LNL, BMG).
108 */
109 struct xe_eu_stall_data_xe2 {
110 __u64 ip_addr:29; /* Bits 0 to 28 */
111 __u64 tdr_count:8; /* Bits 29 to 36 */
112 __u64 other_count:8; /* Bits 37 to 44 */
113 __u64 control_count:8; /* Bits 45 to 52 */
114 __u64 pipestall_count:8; /* Bits 53 to 60 */
115 __u64 send_count:8; /* Bits 61 to 68 */
116 __u64 dist_acc_count:8; /* Bits 69 to 76 */
117 __u64 sbid_count:8; /* Bits 77 to 84 */
118 __u64 sync_count:8; /* Bits 85 to 92 */
119 __u64 inst_fetch_count:8; /* Bits 93 to 100 */
120 __u64 active_count:8; /* Bits 101 to 108 */
121 __u64 ex_id:3; /* Bits 109 to 111 */
122 __u64 end_flag:1; /* Bit 112 */
123 __u64 unused_bits:15;
124 __u64 unused[6];
125 } __packed;
126
127 const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7};
128
129 /**
130 * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information.
131 *
132 * @num_rates: Pointer to a u32 to return the number of sampling rates.
133 * @rates: double u64 pointer to point to an array of sampling rates.
134 *
135 * Stores the number of sampling rates and pointer to the array of
136 * sampling rates in the input pointers.
137 *
138 * Returns: Size of the EU stall sampling rates array.
139 */
xe_eu_stall_get_sampling_rates(u32 * num_rates,const u64 ** rates)140 size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates)
141 {
142 *num_rates = ARRAY_SIZE(eu_stall_sampling_rates);
143 *rates = eu_stall_sampling_rates;
144
145 return sizeof(eu_stall_sampling_rates);
146 }
147
148 /**
149 * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size.
150 *
151 * Returns: The per XeCore buffer size used to allocate the per GT
152 * EU stall data buffer.
153 */
xe_eu_stall_get_per_xecore_buf_size(void)154 size_t xe_eu_stall_get_per_xecore_buf_size(void)
155 {
156 return per_xecore_buf_size;
157 }
158
159 /**
160 * xe_eu_stall_data_record_size - get EU stall data record size.
161 *
162 * @xe: Pointer to a Xe device.
163 *
164 * Returns: EU stall data record size.
165 */
xe_eu_stall_data_record_size(struct xe_device * xe)166 size_t xe_eu_stall_data_record_size(struct xe_device *xe)
167 {
168 size_t record_size = 0;
169
170 if (xe->info.platform == XE_PVC)
171 record_size = sizeof(struct xe_eu_stall_data_pvc);
172 else if (GRAPHICS_VER(xe) >= 20)
173 record_size = sizeof(struct xe_eu_stall_data_xe2);
174
175 xe_assert(xe, is_power_of_2(record_size));
176
177 return record_size;
178 }
179
180 /**
181 * num_data_rows - Return the number of EU stall data rows of 64B each
182 * for a given data size.
183 *
184 * @data_size: EU stall data size
185 */
num_data_rows(u32 data_size)186 static u32 num_data_rows(u32 data_size)
187 {
188 return data_size >> 6;
189 }
190
xe_eu_stall_fini(void * arg)191 static void xe_eu_stall_fini(void *arg)
192 {
193 struct xe_gt *gt = arg;
194
195 destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq);
196 mutex_destroy(>->eu_stall->stream_lock);
197 kfree(gt->eu_stall);
198 }
199
200 /**
201 * xe_eu_stall_init() - Allocate and initialize GT level EU stall data
202 * structure xe_eu_stall_gt within struct xe_gt.
203 *
204 * @gt: GT being initialized.
205 *
206 * Returns: zero on success or a negative error code.
207 */
xe_eu_stall_init(struct xe_gt * gt)208 int xe_eu_stall_init(struct xe_gt *gt)
209 {
210 struct xe_device *xe = gt_to_xe(gt);
211 int ret;
212
213 if (!xe_eu_stall_supported_on_platform(xe))
214 return 0;
215
216 gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL);
217 if (!gt->eu_stall) {
218 ret = -ENOMEM;
219 goto exit;
220 }
221
222 mutex_init(>->eu_stall->stream_lock);
223
224 gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0);
225 if (!gt->eu_stall->buf_ptr_poll_wq) {
226 ret = -ENOMEM;
227 goto exit_free;
228 }
229
230 return devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt);
231 exit_free:
232 mutex_destroy(>->eu_stall->stream_lock);
233 kfree(gt->eu_stall);
234 exit:
235 return ret;
236 }
237
set_prop_eu_stall_sampling_rate(struct xe_device * xe,u64 value,struct eu_stall_open_properties * props)238 static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value,
239 struct eu_stall_open_properties *props)
240 {
241 value = div_u64(value, 251);
242 if (value == 0 || value > 7) {
243 drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value);
244 return -EINVAL;
245 }
246 props->sampling_rate_mult = value;
247 return 0;
248 }
249
set_prop_eu_stall_wait_num_reports(struct xe_device * xe,u64 value,struct eu_stall_open_properties * props)250 static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value,
251 struct eu_stall_open_properties *props)
252 {
253 props->wait_num_reports = value;
254
255 return 0;
256 }
257
set_prop_eu_stall_gt_id(struct xe_device * xe,u64 value,struct eu_stall_open_properties * props)258 static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value,
259 struct eu_stall_open_properties *props)
260 {
261 if (value >= xe->info.gt_count) {
262 drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value);
263 return -EINVAL;
264 }
265 props->gt = xe_device_get_gt(xe, value);
266 return 0;
267 }
268
269 typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value,
270 struct eu_stall_open_properties *props);
271
272 static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = {
273 [DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate,
274 [DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports,
275 [DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id,
276 };
277
xe_eu_stall_user_ext_set_property(struct xe_device * xe,u64 extension,struct eu_stall_open_properties * props)278 static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension,
279 struct eu_stall_open_properties *props)
280 {
281 u64 __user *address = u64_to_user_ptr(extension);
282 struct drm_xe_ext_set_property ext;
283 int err;
284 u32 idx;
285
286 err = copy_from_user(&ext, address, sizeof(ext));
287 if (XE_IOCTL_DBG(xe, err))
288 return -EFAULT;
289
290 if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) ||
291 XE_IOCTL_DBG(xe, ext.pad))
292 return -EINVAL;
293
294 idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs));
295 return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props);
296 }
297
298 typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension,
299 struct eu_stall_open_properties *props);
300 static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = {
301 [DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property,
302 };
303
304 #define MAX_USER_EXTENSIONS 5
xe_eu_stall_user_extensions(struct xe_device * xe,u64 extension,int ext_number,struct eu_stall_open_properties * props)305 static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension,
306 int ext_number, struct eu_stall_open_properties *props)
307 {
308 u64 __user *address = u64_to_user_ptr(extension);
309 struct drm_xe_user_extension ext;
310 int err;
311 u32 idx;
312
313 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
314 return -E2BIG;
315
316 err = copy_from_user(&ext, address, sizeof(ext));
317 if (XE_IOCTL_DBG(xe, err))
318 return -EFAULT;
319
320 if (XE_IOCTL_DBG(xe, ext.pad) ||
321 XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs)))
322 return -EINVAL;
323
324 idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs));
325 err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props);
326 if (XE_IOCTL_DBG(xe, err))
327 return err;
328
329 if (ext.next_extension)
330 return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props);
331
332 return 0;
333 }
334
335 /**
336 * buf_data_size - Calculate the number of bytes in a circular buffer
337 * given the read and write pointers and the size of
338 * the buffer.
339 *
340 * @buf_size: Size of the circular buffer
341 * @read_ptr: Read pointer with an additional overflow bit
342 * @write_ptr: Write pointer with an additional overflow bit
343 *
344 * Since the read and write pointers have an additional overflow bit,
345 * this function calculates the offsets from the pointers and use the
346 * offsets to calculate the data size in the buffer.
347 *
348 * Returns: number of bytes of data in the buffer
349 */
buf_data_size(size_t buf_size,u32 read_ptr,u32 write_ptr)350 static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr)
351 {
352 u32 read_offset, write_offset, size = 0;
353
354 if (read_ptr == write_ptr)
355 goto exit;
356
357 read_offset = read_ptr & (buf_size - 1);
358 write_offset = write_ptr & (buf_size - 1);
359
360 if (write_offset > read_offset)
361 size = write_offset - read_offset;
362 else
363 size = buf_size - read_offset + write_offset;
364 exit:
365 return size;
366 }
367
368 /**
369 * eu_stall_data_buf_poll - Poll for EU stall data in the buffer.
370 *
371 * @stream: xe EU stall data stream instance
372 *
373 * Returns: true if the EU stall buffer contains minimum stall data as
374 * specified by the event report count, else false.
375 */
eu_stall_data_buf_poll(struct xe_eu_stall_data_stream * stream)376 static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream)
377 {
378 u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0;
379 u32 buf_size = stream->per_xecore_buf_size;
380 struct per_xecore_buf *xecore_buf;
381 struct xe_gt *gt = stream->gt;
382 bool min_data_present = false;
383 u16 group, instance;
384 unsigned int xecore;
385
386 mutex_lock(&stream->xecore_buf_lock);
387 for_each_dss_steering(xecore, gt, group, instance) {
388 xecore_buf = &stream->xecore_buf[xecore];
389 read_ptr = xecore_buf->read;
390 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT,
391 group, instance);
392 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
393 write_ptr <<= 6;
394 write_ptr &= ((buf_size << 1) - 1);
395 if (!min_data_present) {
396 total_data += buf_data_size(buf_size, read_ptr, write_ptr);
397 if (num_data_rows(total_data) >= stream->wait_num_reports)
398 min_data_present = true;
399 }
400 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
401 set_bit(xecore, stream->data_drop.mask);
402 xecore_buf->write = write_ptr;
403 }
404 mutex_unlock(&stream->xecore_buf_lock);
405
406 return min_data_present;
407 }
408
clear_dropped_eviction_line_bit(struct xe_gt * gt,u16 group,u16 instance)409 static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance)
410 {
411 struct xe_device *xe = gt_to_xe(gt);
412 u32 write_ptr_reg;
413
414 /* On PVC, the overflow bit has to be cleared by writing 1 to it.
415 * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it.
416 */
417 if (GRAPHICS_VER(xe) >= 20)
418 write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
419 else
420 write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
421
422 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance);
423 }
424
xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream * stream,char __user * buf,size_t count,size_t * total_data_size,struct xe_gt * gt,u16 group,u16 instance,unsigned int xecore)425 static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream,
426 char __user *buf, size_t count,
427 size_t *total_data_size, struct xe_gt *gt,
428 u16 group, u16 instance, unsigned int xecore)
429 {
430 size_t read_data_size, copy_size, buf_size;
431 u32 read_ptr_reg, read_ptr, write_ptr;
432 u8 *xecore_start_vaddr, *read_vaddr;
433 struct per_xecore_buf *xecore_buf;
434 u32 read_offset, write_offset;
435
436 /* Hardware increments the read and write pointers such that they can
437 * overflow into one additional bit. For example, a 256KB size buffer
438 * offset pointer needs 18 bits. But HW uses 19 bits for the read and
439 * write pointers. This technique avoids wasting a slot in the buffer.
440 * Read and write offsets are calculated from the pointers in order to
441 * check if the write pointer has wrapped around the array.
442 */
443 xecore_buf = &stream->xecore_buf[xecore];
444 xecore_start_vaddr = xecore_buf->vaddr;
445 read_ptr = xecore_buf->read;
446 write_ptr = xecore_buf->write;
447 buf_size = stream->per_xecore_buf_size;
448
449 read_data_size = buf_data_size(buf_size, read_ptr, write_ptr);
450 /* Read only the data that the user space buffer can accommodate */
451 read_data_size = min_t(size_t, count - *total_data_size, read_data_size);
452 if (read_data_size == 0)
453 goto exit_drop;
454
455 read_offset = read_ptr & (buf_size - 1);
456 write_offset = write_ptr & (buf_size - 1);
457 read_vaddr = xecore_start_vaddr + read_offset;
458
459 if (write_offset > read_offset) {
460 if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size))
461 return -EFAULT;
462 } else {
463 if (read_data_size >= buf_size - read_offset)
464 copy_size = buf_size - read_offset;
465 else
466 copy_size = read_data_size;
467 if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size))
468 return -EFAULT;
469 if (copy_to_user(buf + *total_data_size + copy_size,
470 xecore_start_vaddr, read_data_size - copy_size))
471 return -EFAULT;
472 }
473
474 *total_data_size += read_data_size;
475 read_ptr += read_data_size;
476
477 /* Read pointer can overflow into one additional bit */
478 read_ptr &= (buf_size << 1) - 1;
479 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6));
480 read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
481 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
482 xecore_buf->read = read_ptr;
483 trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr,
484 read_data_size, *total_data_size);
485 exit_drop:
486 /* Clear drop bit (if set) after any data was read or if the buffer was empty.
487 * Drop bit can be set even if the buffer is empty as the buffer may have been emptied
488 * in the previous read() and the data drop bit was set during the previous read().
489 */
490 if (test_bit(xecore, stream->data_drop.mask)) {
491 clear_dropped_eviction_line_bit(gt, group, instance);
492 clear_bit(xecore, stream->data_drop.mask);
493 }
494 return 0;
495 }
496
497 /**
498 * xe_eu_stall_stream_read_locked - copy EU stall counters data from the
499 * per xecore buffers to the userspace buffer
500 * @stream: A stream opened for EU stall count metrics
501 * @file: An xe EU stall data stream file
502 * @buf: destination buffer given by userspace
503 * @count: the number of bytes userspace wants to read
504 *
505 * Returns: Number of bytes copied or a negative error code
506 * If we've successfully copied any data then reporting that takes
507 * precedence over any internal error status, so the data isn't lost.
508 */
xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream * stream,struct file * file,char __user * buf,size_t count)509 static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream,
510 struct file *file, char __user *buf,
511 size_t count)
512 {
513 struct xe_gt *gt = stream->gt;
514 size_t total_size = 0;
515 u16 group, instance;
516 unsigned int xecore;
517 int ret = 0;
518
519 mutex_lock(&stream->xecore_buf_lock);
520 if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) {
521 if (!stream->data_drop.reported_to_user) {
522 stream->data_drop.reported_to_user = true;
523 xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n",
524 XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask);
525 mutex_unlock(&stream->xecore_buf_lock);
526 return -EIO;
527 }
528 stream->data_drop.reported_to_user = false;
529 }
530
531 for_each_dss_steering(xecore, gt, group, instance) {
532 ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size,
533 gt, group, instance, xecore);
534 if (ret || count == total_size)
535 break;
536 }
537 mutex_unlock(&stream->xecore_buf_lock);
538 return total_size ?: (ret ?: -EAGAIN);
539 }
540
541 /*
542 * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE
543 * before calling read().
544 *
545 * Returns: The number of bytes copied or a negative error code on failure.
546 * -EIO if HW drops any EU stall data when the buffer is full.
547 */
xe_eu_stall_stream_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)548 static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf,
549 size_t count, loff_t *ppos)
550 {
551 struct xe_eu_stall_data_stream *stream = file->private_data;
552 struct xe_gt *gt = stream->gt;
553 ssize_t ret, aligned_count;
554
555 aligned_count = ALIGN_DOWN(count, stream->data_record_size);
556 if (aligned_count == 0)
557 return -EINVAL;
558
559 if (!stream->enabled) {
560 xe_gt_dbg(gt, "EU stall data stream not enabled to read\n");
561 return -EINVAL;
562 }
563
564 if (!(file->f_flags & O_NONBLOCK)) {
565 do {
566 ret = wait_event_interruptible(stream->poll_wq, stream->pollin);
567 if (ret)
568 return -EINTR;
569
570 mutex_lock(>->eu_stall->stream_lock);
571 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
572 mutex_unlock(>->eu_stall->stream_lock);
573 } while (ret == -EAGAIN);
574 } else {
575 mutex_lock(>->eu_stall->stream_lock);
576 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
577 mutex_unlock(>->eu_stall->stream_lock);
578 }
579
580 /*
581 * This may not work correctly if the user buffer is very small.
582 * We don't want to block the next read() when there is data in the buffer
583 * now, but couldn't be accommodated in the small user buffer.
584 */
585 stream->pollin = false;
586
587 return ret;
588 }
589
xe_eu_stall_stream_free(struct xe_eu_stall_data_stream * stream)590 static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream)
591 {
592 struct xe_gt *gt = stream->gt;
593
594 mutex_destroy(&stream->xecore_buf_lock);
595 gt->eu_stall->stream = NULL;
596 kfree(stream);
597 }
598
xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream * stream)599 static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream)
600 {
601 xe_bo_unpin_map_no_vm(stream->bo);
602 kfree(stream->xecore_buf);
603 }
604
xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream * stream,u16 last_xecore)605 static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
606 u16 last_xecore)
607 {
608 struct xe_tile *tile = stream->gt->tile;
609 struct xe_bo *bo;
610 u32 size;
611
612 stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL);
613 if (!stream->xecore_buf)
614 return -ENOMEM;
615
616 size = stream->per_xecore_buf_size * last_xecore;
617
618 bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL,
619 size, ~0ull, ttm_bo_type_kernel,
620 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64);
621 if (IS_ERR(bo)) {
622 kfree(stream->xecore_buf);
623 return PTR_ERR(bo);
624 }
625
626 XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64));
627 stream->bo = bo;
628
629 return 0;
630 }
631
xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream * stream)632 static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
633 {
634 u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value;
635 struct per_xecore_buf *xecore_buf;
636 struct xe_gt *gt = stream->gt;
637 u16 group, instance;
638 unsigned int fw_ref;
639 int xecore;
640
641 /* Take runtime pm ref and forcewake to disable RC6 */
642 xe_pm_runtime_get(gt_to_xe(gt));
643 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER);
644 if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) {
645 xe_gt_err(gt, "Failed to get RENDER forcewake\n");
646 xe_pm_runtime_put(gt_to_xe(gt));
647 return -ETIMEDOUT;
648 }
649
650 if (XE_WA(gt, 22016596838))
651 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
652 _MASKED_BIT_ENABLE(DISABLE_DOP_GATING));
653
654 for_each_dss_steering(xecore, gt, group, instance) {
655 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance);
656 /* Clear any drop bits set and not cleared in the previous session. */
657 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
658 clear_dropped_eviction_line_bit(gt, group, instance);
659 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
660 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr);
661 read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
662 /* Initialize the read pointer to the write pointer */
663 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
664 write_ptr <<= 6;
665 write_ptr &= (stream->per_xecore_buf_size << 1) - 1;
666 xecore_buf = &stream->xecore_buf[xecore];
667 xecore_buf->write = write_ptr;
668 xecore_buf->read = write_ptr;
669 }
670 stream->data_drop.reported_to_user = false;
671 bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS);
672
673 reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE,
674 REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) |
675 REG_FIELD_PREP(EUSTALL_SAMPLE_RATE,
676 stream->sampling_rate_mult));
677 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value);
678 /* GGTT addresses can never be > 32 bits */
679 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0);
680 reg_value = xe_bo_ggtt_addr(stream->bo);
681 reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ,
682 stream->per_xecore_buf_size / SZ_256K);
683 reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING;
684 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value);
685
686 return 0;
687 }
688
eu_stall_data_buf_poll_work_fn(struct work_struct * work)689 static void eu_stall_data_buf_poll_work_fn(struct work_struct *work)
690 {
691 struct xe_eu_stall_data_stream *stream =
692 container_of(work, typeof(*stream), buf_poll_work.work);
693 struct xe_gt *gt = stream->gt;
694
695 if (eu_stall_data_buf_poll(stream)) {
696 stream->pollin = true;
697 wake_up(&stream->poll_wq);
698 }
699 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
700 &stream->buf_poll_work,
701 msecs_to_jiffies(POLL_PERIOD_MS));
702 }
703
xe_eu_stall_stream_init(struct xe_eu_stall_data_stream * stream,struct eu_stall_open_properties * props)704 static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
705 struct eu_stall_open_properties *props)
706 {
707 unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores;
708 struct per_xecore_buf *xecore_buf;
709 struct xe_gt *gt = stream->gt;
710 xe_dss_mask_t all_xecores;
711 u16 group, instance;
712 u32 vaddr_offset;
713 int ret;
714
715 bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
716 XE_MAX_DSS_FUSE_BITS);
717 num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS);
718 last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1;
719
720 max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores);
721 if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) {
722 xe_gt_dbg(gt, "Invalid EU stall event report count %u\n",
723 props->wait_num_reports);
724 xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n",
725 max_wait_num_reports);
726 return -EINVAL;
727 }
728
729 init_waitqueue_head(&stream->poll_wq);
730 mutex_init(&stream->xecore_buf_lock);
731 INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn);
732 stream->per_xecore_buf_size = per_xecore_buf_size;
733 stream->sampling_rate_mult = props->sampling_rate_mult;
734 stream->wait_num_reports = props->wait_num_reports;
735 stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
736
737 ret = xe_eu_stall_data_buf_alloc(stream, last_xecore);
738 if (ret)
739 return ret;
740
741 for_each_dss_steering(xecore, gt, group, instance) {
742 xecore_buf = &stream->xecore_buf[xecore];
743 vaddr_offset = xecore * stream->per_xecore_buf_size;
744 xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset;
745 }
746 return 0;
747 }
748
xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream * stream,struct file * file,poll_table * wait)749 static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream,
750 struct file *file, poll_table *wait)
751 {
752 __poll_t events = 0;
753
754 poll_wait(file, &stream->poll_wq, wait);
755
756 if (stream->pollin)
757 events |= EPOLLIN;
758
759 return events;
760 }
761
xe_eu_stall_stream_poll(struct file * file,poll_table * wait)762 static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait)
763 {
764 struct xe_eu_stall_data_stream *stream = file->private_data;
765 struct xe_gt *gt = stream->gt;
766 __poll_t ret;
767
768 mutex_lock(>->eu_stall->stream_lock);
769 ret = xe_eu_stall_stream_poll_locked(stream, file, wait);
770 mutex_unlock(>->eu_stall->stream_lock);
771
772 return ret;
773 }
774
xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream * stream)775 static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream)
776 {
777 struct xe_gt *gt = stream->gt;
778 int ret = 0;
779
780 if (stream->enabled)
781 return ret;
782
783 stream->enabled = true;
784
785 ret = xe_eu_stall_stream_enable(stream);
786
787 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
788 &stream->buf_poll_work,
789 msecs_to_jiffies(POLL_PERIOD_MS));
790 return ret;
791 }
792
xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream * stream)793 static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
794 {
795 struct xe_gt *gt = stream->gt;
796
797 if (!stream->enabled)
798 return 0;
799
800 stream->enabled = false;
801
802 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0);
803
804 cancel_delayed_work_sync(&stream->buf_poll_work);
805
806 if (XE_WA(gt, 22016596838))
807 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
808 _MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
809
810 xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER);
811 xe_pm_runtime_put(gt_to_xe(gt));
812
813 return 0;
814 }
815
xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream * stream,unsigned int cmd,unsigned long arg)816 static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream,
817 unsigned int cmd, unsigned long arg)
818 {
819 switch (cmd) {
820 case DRM_XE_OBSERVATION_IOCTL_ENABLE:
821 return xe_eu_stall_enable_locked(stream);
822 case DRM_XE_OBSERVATION_IOCTL_DISABLE:
823 return xe_eu_stall_disable_locked(stream);
824 }
825
826 return -EINVAL;
827 }
828
xe_eu_stall_stream_ioctl(struct file * file,unsigned int cmd,unsigned long arg)829 static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
830 {
831 struct xe_eu_stall_data_stream *stream = file->private_data;
832 struct xe_gt *gt = stream->gt;
833 long ret;
834
835 mutex_lock(>->eu_stall->stream_lock);
836 ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg);
837 mutex_unlock(>->eu_stall->stream_lock);
838
839 return ret;
840 }
841
xe_eu_stall_stream_close(struct inode * inode,struct file * file)842 static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
843 {
844 struct xe_eu_stall_data_stream *stream = file->private_data;
845 struct xe_gt *gt = stream->gt;
846
847 drm_dev_put(>->tile->xe->drm);
848
849 mutex_lock(>->eu_stall->stream_lock);
850 xe_eu_stall_disable_locked(stream);
851 xe_eu_stall_data_buf_destroy(stream);
852 xe_eu_stall_stream_free(stream);
853 mutex_unlock(>->eu_stall->stream_lock);
854
855 return 0;
856 }
857
858 static const struct file_operations fops_eu_stall = {
859 .owner = THIS_MODULE,
860 .llseek = noop_llseek,
861 .release = xe_eu_stall_stream_close,
862 .poll = xe_eu_stall_stream_poll,
863 .read = xe_eu_stall_stream_read,
864 .unlocked_ioctl = xe_eu_stall_stream_ioctl,
865 .compat_ioctl = xe_eu_stall_stream_ioctl,
866 };
867
xe_eu_stall_stream_open_locked(struct drm_device * dev,struct eu_stall_open_properties * props,struct drm_file * file)868 static int xe_eu_stall_stream_open_locked(struct drm_device *dev,
869 struct eu_stall_open_properties *props,
870 struct drm_file *file)
871 {
872 struct xe_eu_stall_data_stream *stream;
873 struct xe_gt *gt = props->gt;
874 unsigned long f_flags = 0;
875 int ret, stream_fd;
876
877 /* Only one session can be active at any time */
878 if (gt->eu_stall->stream) {
879 xe_gt_dbg(gt, "EU stall sampling session already active\n");
880 return -EBUSY;
881 }
882
883 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
884 if (!stream)
885 return -ENOMEM;
886
887 gt->eu_stall->stream = stream;
888 stream->gt = gt;
889
890 ret = xe_eu_stall_stream_init(stream, props);
891 if (ret) {
892 xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret);
893 goto err_free;
894 }
895
896 stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags);
897 if (stream_fd < 0) {
898 ret = stream_fd;
899 xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret);
900 goto err_destroy;
901 }
902
903 /* Take a reference on the driver that will be kept with stream_fd
904 * until its release.
905 */
906 drm_dev_get(>->tile->xe->drm);
907
908 return stream_fd;
909
910 err_destroy:
911 xe_eu_stall_data_buf_destroy(stream);
912 err_free:
913 xe_eu_stall_stream_free(stream);
914 return ret;
915 }
916
917 /**
918 * xe_eu_stall_stream_open - Open a xe EU stall data stream fd
919 *
920 * @dev: DRM device pointer
921 * @data: pointer to first struct @drm_xe_ext_set_property in
922 * the chain of input properties from the user space.
923 * @file: DRM file pointer
924 *
925 * This function opens a EU stall data stream with input properties from
926 * the user space.
927 *
928 * Returns: EU stall data stream fd on success or a negative error code.
929 */
xe_eu_stall_stream_open(struct drm_device * dev,u64 data,struct drm_file * file)930 int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file)
931 {
932 struct xe_device *xe = to_xe_device(dev);
933 struct eu_stall_open_properties props = {};
934 int ret;
935
936 if (!xe_eu_stall_supported_on_platform(xe)) {
937 drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
938 return -ENODEV;
939 }
940
941 if (xe_observation_paranoid && !perfmon_capable()) {
942 drm_dbg(&xe->drm, "Insufficient privileges for EU stall monitoring\n");
943 return -EACCES;
944 }
945
946 /* Initialize and set default values */
947 props.wait_num_reports = 1;
948 props.sampling_rate_mult = 4;
949
950 ret = xe_eu_stall_user_extensions(xe, data, 0, &props);
951 if (ret)
952 return ret;
953
954 if (!props.gt) {
955 drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n");
956 return -EINVAL;
957 }
958
959 mutex_lock(&props.gt->eu_stall->stream_lock);
960 ret = xe_eu_stall_stream_open_locked(dev, &props, file);
961 mutex_unlock(&props.gt->eu_stall->stream_lock);
962
963 return ret;
964 }
965