xref: /linux/drivers/gpu/drm/xe/xe_eu_stall.c (revision 71477b7e702c88f214866a3dc400812a5da59905)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include <linux/anon_inodes.h>
7 #include <linux/fs.h>
8 #include <linux/poll.h>
9 #include <linux/types.h>
10 
11 #include <drm/drm_drv.h>
12 #include <generated/xe_wa_oob.h>
13 #include <uapi/drm/xe_drm.h>
14 
15 #include "xe_bo.h"
16 #include "xe_device.h"
17 #include "xe_eu_stall.h"
18 #include "xe_force_wake.h"
19 #include "xe_gt_mcr.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_topology.h"
22 #include "xe_macros.h"
23 #include "xe_observation.h"
24 #include "xe_pm.h"
25 #include "xe_trace.h"
26 #include "xe_wa.h"
27 
28 #include "regs/xe_eu_stall_regs.h"
29 #include "regs/xe_gt_regs.h"
30 
31 #define POLL_PERIOD_MS	5
32 
33 static size_t per_xecore_buf_size = SZ_512K;
34 
35 struct per_xecore_buf {
36 	/* Buffer vaddr */
37 	u8 *vaddr;
38 	/* Write pointer */
39 	u32 write;
40 	/* Read pointer */
41 	u32 read;
42 };
43 
44 struct xe_eu_stall_data_stream {
45 	bool pollin;
46 	bool enabled;
47 	bool reset_detected;
48 	int wait_num_reports;
49 	int sampling_rate_mult;
50 	wait_queue_head_t poll_wq;
51 	size_t data_record_size;
52 	size_t per_xecore_buf_size;
53 	unsigned int fw_ref;
54 
55 	struct xe_gt *gt;
56 	struct xe_bo *bo;
57 	/* Lock to protect data buffer pointers */
58 	struct mutex xecore_buf_lock;
59 	struct per_xecore_buf *xecore_buf;
60 	struct {
61 		bool reported_to_user;
62 		xe_dss_mask_t mask;
63 	} data_drop;
64 	struct delayed_work buf_poll_work;
65 };
66 
67 struct xe_eu_stall_gt {
68 	/* Lock to protect stream */
69 	struct mutex stream_lock;
70 	/* EU stall data stream */
71 	struct xe_eu_stall_data_stream *stream;
72 	/* Workqueue to schedule buffer pointers polling work */
73 	struct workqueue_struct *buf_ptr_poll_wq;
74 };
75 
76 /**
77  * struct eu_stall_open_properties - EU stall sampling properties received
78  *				     from user space at open.
79  * @sampling_rate_mult: EU stall sampling rate multiplier.
80  *			HW will sample every (sampling_rate_mult x 251) cycles.
81  * @wait_num_reports: Minimum number of EU stall data reports to unblock poll().
82  * @gt: GT on which EU stall data will be captured.
83  */
84 struct eu_stall_open_properties {
85 	int sampling_rate_mult;
86 	int wait_num_reports;
87 	struct xe_gt *gt;
88 };
89 
90 /*
91  * EU stall data format for PVC
92  */
93 struct xe_eu_stall_data_pvc {
94 	__u64 ip_addr:29;	  /* Bits 0  to 28  */
95 	__u64 active_count:8;	  /* Bits 29 to 36  */
96 	__u64 other_count:8;	  /* Bits 37 to 44  */
97 	__u64 control_count:8;	  /* Bits 45 to 52  */
98 	__u64 pipestall_count:8;  /* Bits 53 to 60  */
99 	__u64 send_count:8;	  /* Bits 61 to 68  */
100 	__u64 dist_acc_count:8;	  /* Bits 69 to 76  */
101 	__u64 sbid_count:8;	  /* Bits 77 to 84  */
102 	__u64 sync_count:8;	  /* Bits 85 to 92  */
103 	__u64 inst_fetch_count:8; /* Bits 93 to 100 */
104 	__u64 unused_bits:27;
105 	__u64 unused[6];
106 } __packed;
107 
108 /*
109  * EU stall data format for Xe2 arch GPUs (LNL, BMG).
110  */
111 struct xe_eu_stall_data_xe2 {
112 	__u64 ip_addr:29;	  /* Bits 0  to 28  */
113 	__u64 tdr_count:8;	  /* Bits 29 to 36  */
114 	__u64 other_count:8;	  /* Bits 37 to 44  */
115 	__u64 control_count:8;	  /* Bits 45 to 52  */
116 	__u64 pipestall_count:8;  /* Bits 53 to 60  */
117 	__u64 send_count:8;	  /* Bits 61 to 68  */
118 	__u64 dist_acc_count:8;   /* Bits 69 to 76  */
119 	__u64 sbid_count:8;	  /* Bits 77 to 84  */
120 	__u64 sync_count:8;	  /* Bits 85 to 92  */
121 	__u64 inst_fetch_count:8; /* Bits 93 to 100 */
122 	__u64 active_count:8;	  /* Bits 101 to 108 */
123 	__u64 ex_id:3;		  /* Bits 109 to 111 */
124 	__u64 end_flag:1;	  /* Bit  112 */
125 	__u64 unused_bits:15;
126 	__u64 unused[6];
127 } __packed;
128 
129 /*
130  * EU stall data format for Xe3p arch GPUs.
131  */
132 struct xe_eu_stall_data_xe3p {
133 	__u64 ip_addr:61;	  /* Bits 0  to 60  */
134 	__u64 tdr_count:8;	  /* Bits 61 to 68  */
135 	__u64 other_count:8;	  /* Bits 69 to 76  */
136 	__u64 control_count:8;	  /* Bits 77 to 84  */
137 	__u64 pipestall_count:8;  /* Bits 85 to 92  */
138 	__u64 send_count:8;	  /* Bits 93 to 100 */
139 	__u64 dist_acc_count:8;   /* Bits 101 to 108 */
140 	__u64 sbid_count:8;	  /* Bits 109 to 116 */
141 	__u64 sync_count:8;	  /* Bits 117 to 124 */
142 	__u64 inst_fetch_count:8; /* Bits 125 to 132 */
143 	__u64 active_count:8;	  /* Bits 133 to 140 */
144 	__u64 ex_id:3;		  /* Bits 141 to 143 */
145 	__u64 end_flag:1;	  /* Bit  144 */
146 	__u64 unused_bits:47;
147 	__u64 unused[5];
148 } __packed;
149 
150 const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7};
151 
152 /**
153  * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information.
154  *
155  * @num_rates: Pointer to a u32 to return the number of sampling rates.
156  * @rates: double u64 pointer to point to an array of sampling rates.
157  *
158  * Stores the number of sampling rates and pointer to the array of
159  * sampling rates in the input pointers.
160  *
161  * Returns: Size of the EU stall sampling rates array.
162  */
163 size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates)
164 {
165 	*num_rates = ARRAY_SIZE(eu_stall_sampling_rates);
166 	*rates = eu_stall_sampling_rates;
167 
168 	return sizeof(eu_stall_sampling_rates);
169 }
170 
171 /**
172  * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size.
173  *
174  * Returns: The per XeCore buffer size used to allocate the per GT
175  *	    EU stall data buffer.
176  */
177 size_t xe_eu_stall_get_per_xecore_buf_size(void)
178 {
179 	return per_xecore_buf_size;
180 }
181 
182 /**
183  * xe_eu_stall_data_record_size - get EU stall data record size.
184  *
185  * @xe: Pointer to a Xe device.
186  *
187  * Returns: EU stall data record size.
188  */
189 size_t xe_eu_stall_data_record_size(struct xe_device *xe)
190 {
191 	size_t record_size = 0;
192 
193 	if (GRAPHICS_VER(xe) >= 35)
194 		record_size = sizeof(struct xe_eu_stall_data_xe3p);
195 	else if (GRAPHICS_VER(xe) >= 20)
196 		record_size = sizeof(struct xe_eu_stall_data_xe2);
197 	else if (xe->info.platform == XE_PVC)
198 		record_size = sizeof(struct xe_eu_stall_data_pvc);
199 
200 
201 	xe_assert(xe, is_power_of_2(record_size));
202 
203 	return record_size;
204 }
205 
206 /**
207  * num_data_rows - Return the number of EU stall data rows of 64B each
208  *		   for a given data size.
209  *
210  * @data_size: EU stall data size
211  */
212 static u32 num_data_rows(u32 data_size)
213 {
214 	return data_size >> 6;
215 }
216 
217 static void xe_eu_stall_fini(void *arg)
218 {
219 	struct xe_gt *gt = arg;
220 
221 	destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq);
222 	mutex_destroy(&gt->eu_stall->stream_lock);
223 	kfree(gt->eu_stall);
224 }
225 
226 /**
227  * xe_eu_stall_init() - Allocate and initialize GT level EU stall data
228  *			structure xe_eu_stall_gt within struct xe_gt.
229  *
230  * @gt: GT being initialized.
231  *
232  * Returns: zero on success or a negative error code.
233  */
234 int xe_eu_stall_init(struct xe_gt *gt)
235 {
236 	struct xe_device *xe = gt_to_xe(gt);
237 	int ret;
238 
239 	if (!xe_eu_stall_supported_on_platform(xe))
240 		return 0;
241 
242 	gt->eu_stall = kzalloc_obj(*gt->eu_stall);
243 	if (!gt->eu_stall) {
244 		ret = -ENOMEM;
245 		goto exit;
246 	}
247 
248 	mutex_init(&gt->eu_stall->stream_lock);
249 
250 	gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0);
251 	if (!gt->eu_stall->buf_ptr_poll_wq) {
252 		ret = -ENOMEM;
253 		goto exit_free;
254 	}
255 
256 	return devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt);
257 exit_free:
258 	mutex_destroy(&gt->eu_stall->stream_lock);
259 	kfree(gt->eu_stall);
260 exit:
261 	return ret;
262 }
263 
264 static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value,
265 					   struct eu_stall_open_properties *props)
266 {
267 	value = div_u64(value, 251);
268 	if (value == 0 || value > 7) {
269 		drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value);
270 		return -EINVAL;
271 	}
272 	props->sampling_rate_mult = value;
273 	return 0;
274 }
275 
276 static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value,
277 					      struct eu_stall_open_properties *props)
278 {
279 	props->wait_num_reports = value;
280 
281 	return 0;
282 }
283 
284 static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value,
285 				   struct eu_stall_open_properties *props)
286 {
287 	struct xe_gt *gt = xe_device_get_gt(xe, value);
288 
289 	if (!gt) {
290 		drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value);
291 		return -EINVAL;
292 	}
293 	props->gt = gt;
294 	return 0;
295 }
296 
297 typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value,
298 					struct eu_stall_open_properties *props);
299 
300 static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = {
301 	[DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate,
302 	[DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports,
303 	[DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id,
304 };
305 
306 static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension,
307 					     struct eu_stall_open_properties *props)
308 {
309 	u64 __user *address = u64_to_user_ptr(extension);
310 	struct drm_xe_ext_set_property ext;
311 	int err;
312 	u32 idx;
313 
314 	err = copy_from_user(&ext, address, sizeof(ext));
315 	if (XE_IOCTL_DBG(xe, err))
316 		return -EFAULT;
317 
318 	if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) ||
319 	    XE_IOCTL_DBG(xe, !ext.property) || XE_IOCTL_DBG(xe, ext.pad))
320 		return -EINVAL;
321 
322 	idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs));
323 	return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props);
324 }
325 
326 typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension,
327 					     struct eu_stall_open_properties *props);
328 static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = {
329 	[DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property,
330 };
331 
332 #define MAX_USER_EXTENSIONS	5
333 static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension,
334 				       int ext_number, struct eu_stall_open_properties *props)
335 {
336 	u64 __user *address = u64_to_user_ptr(extension);
337 	struct drm_xe_user_extension ext;
338 	int err;
339 	u32 idx;
340 
341 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
342 		return -E2BIG;
343 
344 	err = copy_from_user(&ext, address, sizeof(ext));
345 	if (XE_IOCTL_DBG(xe, err))
346 		return -EFAULT;
347 
348 	if (XE_IOCTL_DBG(xe, ext.pad) ||
349 	    XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs)))
350 		return -EINVAL;
351 
352 	idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs));
353 	err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props);
354 	if (XE_IOCTL_DBG(xe, err))
355 		return err;
356 
357 	if (ext.next_extension)
358 		return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props);
359 
360 	return 0;
361 }
362 
363 /**
364  * buf_data_size - Calculate the number of bytes in a circular buffer
365  *		   given the read and write pointers and the size of
366  *		   the buffer.
367  *
368  * @buf_size: Size of the circular buffer
369  * @read_ptr: Read pointer with an additional overflow bit
370  * @write_ptr: Write pointer with an additional overflow bit
371  *
372  * Since the read and write pointers have an additional overflow bit,
373  * this function calculates the offsets from the pointers and use the
374  * offsets to calculate the data size in the buffer.
375  *
376  * Returns: number of bytes of data in the buffer
377  */
378 static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr)
379 {
380 	u32 read_offset, write_offset, size = 0;
381 
382 	if (read_ptr == write_ptr)
383 		goto exit;
384 
385 	read_offset = read_ptr & (buf_size - 1);
386 	write_offset = write_ptr & (buf_size - 1);
387 
388 	if (write_offset > read_offset)
389 		size = write_offset - read_offset;
390 	else
391 		size = buf_size - read_offset + write_offset;
392 exit:
393 	return size;
394 }
395 
396 /**
397  * eu_stall_data_buf_poll - Poll for EU stall data in the buffer.
398  *
399  * @stream: xe EU stall data stream instance
400  *
401  * Returns: true if the EU stall buffer contains minimum stall data as
402  *	    specified by the event report count, else false.
403  */
404 static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream)
405 {
406 	u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0;
407 	u32 buf_size = stream->per_xecore_buf_size;
408 	struct per_xecore_buf *xecore_buf;
409 	struct xe_gt *gt = stream->gt;
410 	bool min_data_present = false;
411 	u16 group, instance;
412 	unsigned int xecore;
413 
414 	mutex_lock(&stream->xecore_buf_lock);
415 	for_each_dss_steering(xecore, gt, group, instance) {
416 		xecore_buf = &stream->xecore_buf[xecore];
417 		read_ptr = xecore_buf->read;
418 		write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT,
419 						       group, instance);
420 		write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
421 		write_ptr <<= 6;
422 		write_ptr &= ((buf_size << 1) - 1);
423 		if (!min_data_present) {
424 			total_data += buf_data_size(buf_size, read_ptr, write_ptr);
425 			if (num_data_rows(total_data) >= stream->wait_num_reports)
426 				min_data_present = true;
427 		}
428 		if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
429 			set_bit(xecore, stream->data_drop.mask);
430 		xecore_buf->write = write_ptr;
431 	}
432 	/* If a GT or engine reset happens during EU stall sampling,
433 	 * all EU stall registers get reset to 0 and the cached values of
434 	 * the EU stall data buffers' read pointers are out of sync with
435 	 * the register values. This causes invalid data to be returned
436 	 * from read(). To prevent this, check the value of a EU stall base
437 	 * register. If it is zero, there has been a reset.
438 	 */
439 	if (unlikely(!xe_gt_mcr_unicast_read_any(gt, XEHPC_EUSTALL_BASE)))
440 		stream->reset_detected = true;
441 
442 	stream->pollin = min_data_present || stream->reset_detected;
443 	mutex_unlock(&stream->xecore_buf_lock);
444 
445 	return stream->pollin;
446 }
447 
448 static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance)
449 {
450 	struct xe_device *xe = gt_to_xe(gt);
451 	u32 write_ptr_reg;
452 
453 	/* On PVC, the overflow bit has to be cleared by writing 1 to it.
454 	 * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it.
455 	 */
456 	if (GRAPHICS_VER(xe) >= 20)
457 		write_ptr_reg = REG_MASKED_FIELD_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
458 	else
459 		write_ptr_reg = REG_MASKED_FIELD_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
460 
461 	xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance);
462 }
463 
464 static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream,
465 				     char __user *buf, size_t count,
466 				     size_t *total_data_size, struct xe_gt *gt,
467 				     u16 group, u16 instance, unsigned int xecore)
468 {
469 	size_t read_data_size, copy_size, buf_size;
470 	u32 read_ptr_reg, read_ptr, write_ptr;
471 	u8 *xecore_start_vaddr, *read_vaddr;
472 	struct per_xecore_buf *xecore_buf;
473 	u32 read_offset, write_offset;
474 
475 	/* Hardware increments the read and write pointers such that they can
476 	 * overflow into one additional bit. For example, a 256KB size buffer
477 	 * offset pointer needs 18 bits. But HW uses 19 bits for the read and
478 	 * write pointers. This technique avoids wasting a slot in the buffer.
479 	 * Read and write offsets are calculated from the pointers in order to
480 	 * check if the write pointer has wrapped around the array.
481 	 */
482 	xecore_buf = &stream->xecore_buf[xecore];
483 	xecore_start_vaddr = xecore_buf->vaddr;
484 	read_ptr = xecore_buf->read;
485 	write_ptr = xecore_buf->write;
486 	buf_size = stream->per_xecore_buf_size;
487 
488 	read_data_size = buf_data_size(buf_size, read_ptr, write_ptr);
489 	/* Read only the data that the user space buffer can accommodate */
490 	read_data_size = min_t(size_t, count - *total_data_size, read_data_size);
491 	if (read_data_size == 0)
492 		goto exit_drop;
493 
494 	read_offset = read_ptr & (buf_size - 1);
495 	write_offset = write_ptr & (buf_size - 1);
496 	read_vaddr = xecore_start_vaddr + read_offset;
497 
498 	if (write_offset > read_offset) {
499 		if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size))
500 			return -EFAULT;
501 	} else {
502 		if (read_data_size >= buf_size - read_offset)
503 			copy_size = buf_size - read_offset;
504 		else
505 			copy_size = read_data_size;
506 		if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size))
507 			return -EFAULT;
508 		if (copy_to_user(buf + *total_data_size + copy_size,
509 				 xecore_start_vaddr, read_data_size - copy_size))
510 			return -EFAULT;
511 	}
512 
513 	*total_data_size += read_data_size;
514 	read_ptr += read_data_size;
515 
516 	/* Read pointer can overflow into one additional bit */
517 	read_ptr &= (buf_size << 1) - 1;
518 	read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6));
519 	read_ptr_reg = REG_MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
520 	xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
521 	xecore_buf->read = read_ptr;
522 	trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr,
523 				    read_data_size, *total_data_size);
524 exit_drop:
525 	/* Clear drop bit (if set) after any data was read or if the buffer was empty.
526 	 * Drop bit can be set even if the buffer is empty as the buffer may have been emptied
527 	 * in the previous read() and the data drop bit was set during the previous read().
528 	 */
529 	if (test_bit(xecore, stream->data_drop.mask)) {
530 		clear_dropped_eviction_line_bit(gt, group, instance);
531 		clear_bit(xecore, stream->data_drop.mask);
532 	}
533 	return 0;
534 }
535 
536 /**
537  * xe_eu_stall_stream_read_locked - copy EU stall counters data from the
538  *				    per xecore buffers to the userspace buffer
539  * @stream: A stream opened for EU stall count metrics
540  * @file: An xe EU stall data stream file
541  * @buf: destination buffer given by userspace
542  * @count: the number of bytes userspace wants to read
543  *
544  * Returns: Number of bytes copied or a negative error code
545  * If we've successfully copied any data then reporting that takes
546  * precedence over any internal error status, so the data isn't lost.
547  */
548 static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream,
549 					      struct file *file, char __user *buf,
550 					      size_t count)
551 {
552 	struct xe_gt *gt = stream->gt;
553 	size_t total_size = 0;
554 	u16 group, instance;
555 	unsigned int xecore;
556 	int ret = 0;
557 
558 	mutex_lock(&stream->xecore_buf_lock);
559 	/* If EU stall registers got reset due to a GT/engine reset,
560 	 * continuing with the read() will return invalid data to
561 	 * the user space. Just return -ENODEV instead.
562 	 */
563 	if (unlikely(stream->reset_detected)) {
564 		xe_gt_dbg(gt, "EU stall base register has been reset\n");
565 		mutex_unlock(&stream->xecore_buf_lock);
566 		return -ENODEV;
567 	}
568 	if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) {
569 		if (!stream->data_drop.reported_to_user) {
570 			stream->data_drop.reported_to_user = true;
571 			xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n",
572 				  XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask);
573 			mutex_unlock(&stream->xecore_buf_lock);
574 			return -EIO;
575 		}
576 		stream->data_drop.reported_to_user = false;
577 	}
578 	for_each_dss_steering(xecore, gt, group, instance) {
579 		ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size,
580 						gt, group, instance, xecore);
581 		if (ret || count == total_size)
582 			break;
583 	}
584 	mutex_unlock(&stream->xecore_buf_lock);
585 	return total_size ?: (ret ?: -EAGAIN);
586 }
587 
588 /*
589  * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE
590  * before calling read().
591  *
592  * Returns: The number of bytes copied or a negative error code on failure.
593  *	    -EIO if HW drops any EU stall data when the buffer is full.
594  */
595 static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf,
596 				       size_t count, loff_t *ppos)
597 {
598 	struct xe_eu_stall_data_stream *stream = file->private_data;
599 	struct xe_gt *gt = stream->gt;
600 	ssize_t ret, aligned_count;
601 
602 	aligned_count = ALIGN_DOWN(count, stream->data_record_size);
603 	if (aligned_count == 0)
604 		return -EINVAL;
605 
606 	if (!stream->enabled) {
607 		xe_gt_dbg(gt, "EU stall data stream not enabled to read\n");
608 		return -EINVAL;
609 	}
610 
611 	if (!(file->f_flags & O_NONBLOCK)) {
612 		do {
613 			ret = wait_event_interruptible(stream->poll_wq, stream->pollin);
614 			if (ret)
615 				return -EINTR;
616 
617 			mutex_lock(&gt->eu_stall->stream_lock);
618 			ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
619 			mutex_unlock(&gt->eu_stall->stream_lock);
620 		} while (ret == -EAGAIN);
621 	} else {
622 		mutex_lock(&gt->eu_stall->stream_lock);
623 		ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
624 		mutex_unlock(&gt->eu_stall->stream_lock);
625 	}
626 
627 	/*
628 	 * This may not work correctly if the user buffer is very small.
629 	 * We don't want to block the next read() when there is data in the buffer
630 	 * now, but couldn't be accommodated in the small user buffer.
631 	 */
632 	if (!stream->reset_detected)
633 		stream->pollin = false;
634 
635 	return ret;
636 }
637 
638 static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream)
639 {
640 	struct xe_gt *gt = stream->gt;
641 
642 	mutex_destroy(&stream->xecore_buf_lock);
643 	gt->eu_stall->stream = NULL;
644 	kfree(stream);
645 }
646 
647 static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream)
648 {
649 	xe_bo_unpin_map_no_vm(stream->bo);
650 	kfree(stream->xecore_buf);
651 }
652 
653 static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
654 				      u16 last_xecore)
655 {
656 	struct xe_tile *tile = stream->gt->tile;
657 	struct xe_bo *bo;
658 	u32 size;
659 
660 	stream->xecore_buf = kzalloc_objs(*stream->xecore_buf, last_xecore);
661 	if (!stream->xecore_buf)
662 		return -ENOMEM;
663 
664 	size = stream->per_xecore_buf_size * last_xecore;
665 
666 	bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel,
667 					  XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false);
668 	if (IS_ERR(bo)) {
669 		kfree(stream->xecore_buf);
670 		return PTR_ERR(bo);
671 	}
672 
673 	XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64));
674 	stream->bo = bo;
675 
676 	return 0;
677 }
678 
679 static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
680 {
681 	u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value;
682 	struct per_xecore_buf *xecore_buf;
683 	struct xe_gt *gt = stream->gt;
684 	u16 group, instance;
685 	int xecore;
686 
687 	/* Take runtime pm ref and forcewake to disable RC6 */
688 	xe_pm_runtime_get(gt_to_xe(gt));
689 	stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER);
690 	if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FW_RENDER)) {
691 		xe_gt_err(gt, "Failed to get RENDER forcewake\n");
692 		xe_pm_runtime_put(gt_to_xe(gt));
693 		return -ETIMEDOUT;
694 	}
695 
696 	if (XE_GT_WA(gt, 22016596838))
697 		xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
698 					  REG_MASKED_FIELD_ENABLE(DISABLE_DOP_GATING));
699 
700 	for_each_dss_steering(xecore, gt, group, instance) {
701 		write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance);
702 		/* Clear any drop bits set and not cleared in the previous session. */
703 		if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
704 			clear_dropped_eviction_line_bit(gt, group, instance);
705 		write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
706 		read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr);
707 		read_ptr_reg = REG_MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
708 		/* Initialize the read pointer to the write pointer */
709 		xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
710 		write_ptr <<= 6;
711 		write_ptr &= (stream->per_xecore_buf_size << 1) - 1;
712 		xecore_buf = &stream->xecore_buf[xecore];
713 		xecore_buf->write = write_ptr;
714 		xecore_buf->read = write_ptr;
715 	}
716 	stream->reset_detected = false;
717 	stream->data_drop.reported_to_user = false;
718 	bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS);
719 
720 	reg_value = REG_MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE,
721 				     REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) |
722 				     REG_FIELD_PREP(EUSTALL_SAMPLE_RATE,
723 						    stream->sampling_rate_mult));
724 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value);
725 	/* GGTT addresses can never be > 32 bits */
726 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0);
727 	reg_value = xe_bo_ggtt_addr(stream->bo);
728 	reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ,
729 				    stream->per_xecore_buf_size / SZ_256K);
730 	reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING;
731 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value);
732 
733 	return 0;
734 }
735 
736 static void eu_stall_data_buf_poll_work_fn(struct work_struct *work)
737 {
738 	struct xe_eu_stall_data_stream *stream =
739 		container_of(work, typeof(*stream), buf_poll_work.work);
740 	struct xe_gt *gt = stream->gt;
741 
742 	if (eu_stall_data_buf_poll(stream))
743 		wake_up(&stream->poll_wq);
744 
745 	if (!stream->reset_detected)
746 		queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
747 				   &stream->buf_poll_work,
748 				   msecs_to_jiffies(POLL_PERIOD_MS));
749 }
750 
751 static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
752 				   struct eu_stall_open_properties *props)
753 {
754 	unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores;
755 	struct per_xecore_buf *xecore_buf;
756 	struct xe_gt *gt = stream->gt;
757 	xe_dss_mask_t all_xecores;
758 	u16 group, instance;
759 	u32 vaddr_offset;
760 	int ret;
761 
762 	bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
763 		  XE_MAX_DSS_FUSE_BITS);
764 	num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS);
765 	last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1;
766 
767 	max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores);
768 	if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) {
769 		xe_gt_dbg(gt, "Invalid EU stall event report count %u\n",
770 			  props->wait_num_reports);
771 		xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n",
772 			  max_wait_num_reports);
773 		return -EINVAL;
774 	}
775 
776 	init_waitqueue_head(&stream->poll_wq);
777 	mutex_init(&stream->xecore_buf_lock);
778 	INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn);
779 	stream->per_xecore_buf_size = per_xecore_buf_size;
780 	stream->sampling_rate_mult = props->sampling_rate_mult;
781 	stream->wait_num_reports = props->wait_num_reports;
782 	stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
783 
784 	ret = xe_eu_stall_data_buf_alloc(stream, last_xecore);
785 	if (ret)
786 		return ret;
787 
788 	for_each_dss_steering(xecore, gt, group, instance) {
789 		xecore_buf = &stream->xecore_buf[xecore];
790 		vaddr_offset = xecore * stream->per_xecore_buf_size;
791 		xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset;
792 	}
793 	return 0;
794 }
795 
796 static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream,
797 					       struct file *file, poll_table *wait)
798 {
799 	__poll_t events = 0;
800 
801 	poll_wait(file, &stream->poll_wq, wait);
802 
803 	if (stream->pollin)
804 		events |= EPOLLIN;
805 
806 	return events;
807 }
808 
809 static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait)
810 {
811 	struct xe_eu_stall_data_stream *stream = file->private_data;
812 	struct xe_gt *gt = stream->gt;
813 	__poll_t ret;
814 
815 	mutex_lock(&gt->eu_stall->stream_lock);
816 	ret = xe_eu_stall_stream_poll_locked(stream, file, wait);
817 	mutex_unlock(&gt->eu_stall->stream_lock);
818 
819 	return ret;
820 }
821 
822 static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream)
823 {
824 	struct xe_gt *gt = stream->gt;
825 	int ret = 0;
826 
827 	if (stream->enabled)
828 		return ret;
829 
830 	stream->enabled = true;
831 
832 	ret = xe_eu_stall_stream_enable(stream);
833 
834 	queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
835 			   &stream->buf_poll_work,
836 			   msecs_to_jiffies(POLL_PERIOD_MS));
837 	return ret;
838 }
839 
840 static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
841 {
842 	struct xe_gt *gt = stream->gt;
843 
844 	if (!stream->enabled)
845 		return 0;
846 
847 	stream->enabled = false;
848 
849 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0);
850 
851 	cancel_delayed_work_sync(&stream->buf_poll_work);
852 
853 	if (XE_GT_WA(gt, 22016596838))
854 		xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
855 					  REG_MASKED_FIELD_DISABLE(DISABLE_DOP_GATING));
856 
857 	xe_force_wake_put(gt_to_fw(gt), stream->fw_ref);
858 	xe_pm_runtime_put(gt_to_xe(gt));
859 
860 	return 0;
861 }
862 
863 static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream,
864 					    unsigned int cmd, unsigned long arg)
865 {
866 	switch (cmd) {
867 	case DRM_XE_OBSERVATION_IOCTL_ENABLE:
868 		return xe_eu_stall_enable_locked(stream);
869 	case DRM_XE_OBSERVATION_IOCTL_DISABLE:
870 		return xe_eu_stall_disable_locked(stream);
871 	}
872 
873 	return -EINVAL;
874 }
875 
876 static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
877 {
878 	struct xe_eu_stall_data_stream *stream = file->private_data;
879 	struct xe_gt *gt = stream->gt;
880 	long ret;
881 
882 	mutex_lock(&gt->eu_stall->stream_lock);
883 	ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg);
884 	mutex_unlock(&gt->eu_stall->stream_lock);
885 
886 	return ret;
887 }
888 
889 static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
890 {
891 	struct xe_eu_stall_data_stream *stream = file->private_data;
892 	struct xe_gt *gt = stream->gt;
893 
894 	mutex_lock(&gt->eu_stall->stream_lock);
895 	xe_eu_stall_disable_locked(stream);
896 	xe_eu_stall_data_buf_destroy(stream);
897 	xe_eu_stall_stream_free(stream);
898 	mutex_unlock(&gt->eu_stall->stream_lock);
899 
900 	drm_dev_put(&gt->tile->xe->drm);
901 
902 	return 0;
903 }
904 
905 static const struct file_operations fops_eu_stall = {
906 	.owner		= THIS_MODULE,
907 	.llseek		= noop_llseek,
908 	.release	= xe_eu_stall_stream_close,
909 	.poll		= xe_eu_stall_stream_poll,
910 	.read		= xe_eu_stall_stream_read,
911 	.unlocked_ioctl = xe_eu_stall_stream_ioctl,
912 	.compat_ioctl   = xe_eu_stall_stream_ioctl,
913 };
914 
915 static int xe_eu_stall_stream_open_locked(struct drm_device *dev,
916 					  struct eu_stall_open_properties *props,
917 					  struct drm_file *file)
918 {
919 	struct xe_eu_stall_data_stream *stream;
920 	struct xe_gt *gt = props->gt;
921 	unsigned long f_flags = 0;
922 	int ret, stream_fd;
923 
924 	/* Only one session can be active at any time */
925 	if (gt->eu_stall->stream) {
926 		xe_gt_dbg(gt, "EU stall sampling session already active\n");
927 		return -EBUSY;
928 	}
929 
930 	stream = kzalloc_obj(*stream);
931 	if (!stream)
932 		return -ENOMEM;
933 
934 	gt->eu_stall->stream = stream;
935 	stream->gt = gt;
936 
937 	ret = xe_eu_stall_stream_init(stream, props);
938 	if (ret) {
939 		xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret);
940 		goto err_free;
941 	}
942 
943 	stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags);
944 	if (stream_fd < 0) {
945 		ret = stream_fd;
946 		xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret);
947 		goto err_destroy;
948 	}
949 
950 	/* Take a reference on the driver that will be kept with stream_fd
951 	 * until its release.
952 	 */
953 	drm_dev_get(&gt->tile->xe->drm);
954 
955 	return stream_fd;
956 
957 err_destroy:
958 	xe_eu_stall_data_buf_destroy(stream);
959 err_free:
960 	xe_eu_stall_stream_free(stream);
961 	return ret;
962 }
963 
964 /**
965  * xe_eu_stall_stream_open - Open a xe EU stall data stream fd
966  *
967  * @dev: DRM device pointer
968  * @data: pointer to first struct @drm_xe_ext_set_property in
969  *	  the chain of input properties from the user space.
970  * @file: DRM file pointer
971  *
972  * This function opens a EU stall data stream with input properties from
973  * the user space.
974  *
975  * Returns: EU stall data stream fd on success or a negative error code.
976  */
977 int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file)
978 {
979 	struct xe_device *xe = to_xe_device(dev);
980 	struct eu_stall_open_properties props = {};
981 	int ret;
982 
983 	if (!xe_eu_stall_supported_on_platform(xe)) {
984 		drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
985 		return -ENODEV;
986 	}
987 
988 	if (xe_observation_paranoid && !perfmon_capable()) {
989 		drm_dbg(&xe->drm,  "Insufficient privileges for EU stall monitoring\n");
990 		return -EACCES;
991 	}
992 
993 	/* Initialize and set default values */
994 	props.wait_num_reports = 1;
995 	props.sampling_rate_mult = 4;
996 
997 	ret = xe_eu_stall_user_extensions(xe, data, 0, &props);
998 	if (ret)
999 		return ret;
1000 
1001 	if (!props.gt) {
1002 		drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n");
1003 		return -EINVAL;
1004 	}
1005 
1006 	mutex_lock(&props.gt->eu_stall->stream_lock);
1007 	ret = xe_eu_stall_stream_open_locked(dev, &props, file);
1008 	mutex_unlock(&props.gt->eu_stall->stream_lock);
1009 
1010 	return ret;
1011 }
1012