xref: /linux/drivers/gpu/drm/xe/xe_eu_stall.c (revision face6a3615a649456eb4549f6d474221d877d604)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include <linux/anon_inodes.h>
7 #include <linux/fs.h>
8 #include <linux/poll.h>
9 #include <linux/types.h>
10 
11 #include <drm/drm_drv.h>
12 #include <generated/xe_wa_oob.h>
13 #include <uapi/drm/xe_drm.h>
14 
15 #include "xe_bo.h"
16 #include "xe_device.h"
17 #include "xe_eu_stall.h"
18 #include "xe_force_wake.h"
19 #include "xe_gt_mcr.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_topology.h"
22 #include "xe_macros.h"
23 #include "xe_observation.h"
24 #include "xe_pm.h"
25 #include "xe_trace.h"
26 #include "xe_wa.h"
27 
28 #include "regs/xe_eu_stall_regs.h"
29 #include "regs/xe_gt_regs.h"
30 
31 #define POLL_PERIOD_MS	5
32 
33 static size_t per_xecore_buf_size = SZ_512K;
34 
35 struct per_xecore_buf {
36 	/* Buffer vaddr */
37 	u8 *vaddr;
38 	/* Write pointer */
39 	u32 write;
40 	/* Read pointer */
41 	u32 read;
42 };
43 
44 struct xe_eu_stall_data_stream {
45 	bool pollin;
46 	bool enabled;
47 	int wait_num_reports;
48 	int sampling_rate_mult;
49 	wait_queue_head_t poll_wq;
50 	size_t data_record_size;
51 	size_t per_xecore_buf_size;
52 
53 	struct xe_gt *gt;
54 	struct xe_bo *bo;
55 	/* Lock to protect data buffer pointers */
56 	struct mutex xecore_buf_lock;
57 	struct per_xecore_buf *xecore_buf;
58 	struct {
59 		bool reported_to_user;
60 		xe_dss_mask_t mask;
61 	} data_drop;
62 	struct delayed_work buf_poll_work;
63 };
64 
65 struct xe_eu_stall_gt {
66 	/* Lock to protect stream */
67 	struct mutex stream_lock;
68 	/* EU stall data stream */
69 	struct xe_eu_stall_data_stream *stream;
70 	/* Workqueue to schedule buffer pointers polling work */
71 	struct workqueue_struct *buf_ptr_poll_wq;
72 };
73 
74 /**
75  * struct eu_stall_open_properties - EU stall sampling properties received
76  *				     from user space at open.
77  * @sampling_rate_mult: EU stall sampling rate multiplier.
78  *			HW will sample every (sampling_rate_mult x 251) cycles.
79  * @wait_num_reports: Minimum number of EU stall data reports to unblock poll().
80  * @gt: GT on which EU stall data will be captured.
81  */
82 struct eu_stall_open_properties {
83 	int sampling_rate_mult;
84 	int wait_num_reports;
85 	struct xe_gt *gt;
86 };
87 
88 /*
89  * EU stall data format for PVC
90  */
91 struct xe_eu_stall_data_pvc {
92 	__u64 ip_addr:29;	  /* Bits 0  to 28  */
93 	__u64 active_count:8;	  /* Bits 29 to 36  */
94 	__u64 other_count:8;	  /* Bits 37 to 44  */
95 	__u64 control_count:8;	  /* Bits 45 to 52  */
96 	__u64 pipestall_count:8;  /* Bits 53 to 60  */
97 	__u64 send_count:8;	  /* Bits 61 to 68  */
98 	__u64 dist_acc_count:8;	  /* Bits 69 to 76  */
99 	__u64 sbid_count:8;	  /* Bits 77 to 84  */
100 	__u64 sync_count:8;	  /* Bits 85 to 92  */
101 	__u64 inst_fetch_count:8; /* Bits 93 to 100 */
102 	__u64 unused_bits:27;
103 	__u64 unused[6];
104 } __packed;
105 
106 /*
107  * EU stall data format for Xe2 arch GPUs (LNL, BMG).
108  */
109 struct xe_eu_stall_data_xe2 {
110 	__u64 ip_addr:29;	  /* Bits 0  to 28  */
111 	__u64 tdr_count:8;	  /* Bits 29 to 36  */
112 	__u64 other_count:8;	  /* Bits 37 to 44  */
113 	__u64 control_count:8;	  /* Bits 45 to 52  */
114 	__u64 pipestall_count:8;  /* Bits 53 to 60  */
115 	__u64 send_count:8;	  /* Bits 61 to 68  */
116 	__u64 dist_acc_count:8;   /* Bits 69 to 76  */
117 	__u64 sbid_count:8;	  /* Bits 77 to 84  */
118 	__u64 sync_count:8;	  /* Bits 85 to 92  */
119 	__u64 inst_fetch_count:8; /* Bits 93 to 100 */
120 	__u64 active_count:8;	  /* Bits 101 to 108 */
121 	__u64 ex_id:3;		  /* Bits 109 to 111 */
122 	__u64 end_flag:1;	  /* Bit  112 */
123 	__u64 unused_bits:15;
124 	__u64 unused[6];
125 } __packed;
126 
127 /*
128  * EU stall data format for Xe3p arch GPUs.
129  */
130 struct xe_eu_stall_data_xe3p {
131 	__u64 ip_addr:61;	  /* Bits 0  to 60  */
132 	__u64 tdr_count:8;	  /* Bits 61 to 68  */
133 	__u64 other_count:8;	  /* Bits 69 to 76  */
134 	__u64 control_count:8;	  /* Bits 77 to 84  */
135 	__u64 pipestall_count:8;  /* Bits 85 to 92  */
136 	__u64 send_count:8;	  /* Bits 93 to 100 */
137 	__u64 dist_acc_count:8;   /* Bits 101 to 108 */
138 	__u64 sbid_count:8;	  /* Bits 109 to 116 */
139 	__u64 sync_count:8;	  /* Bits 117 to 124 */
140 	__u64 inst_fetch_count:8; /* Bits 125 to 132 */
141 	__u64 active_count:8;	  /* Bits 133 to 140 */
142 	__u64 ex_id:3;		  /* Bits 141 to 143 */
143 	__u64 end_flag:1;	  /* Bit  144 */
144 	__u64 unused_bits:47;
145 	__u64 unused[5];
146 } __packed;
147 
148 const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7};
149 
150 /**
151  * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information.
152  *
153  * @num_rates: Pointer to a u32 to return the number of sampling rates.
154  * @rates: double u64 pointer to point to an array of sampling rates.
155  *
156  * Stores the number of sampling rates and pointer to the array of
157  * sampling rates in the input pointers.
158  *
159  * Returns: Size of the EU stall sampling rates array.
160  */
161 size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates)
162 {
163 	*num_rates = ARRAY_SIZE(eu_stall_sampling_rates);
164 	*rates = eu_stall_sampling_rates;
165 
166 	return sizeof(eu_stall_sampling_rates);
167 }
168 
169 /**
170  * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size.
171  *
172  * Returns: The per XeCore buffer size used to allocate the per GT
173  *	    EU stall data buffer.
174  */
175 size_t xe_eu_stall_get_per_xecore_buf_size(void)
176 {
177 	return per_xecore_buf_size;
178 }
179 
180 /**
181  * xe_eu_stall_data_record_size - get EU stall data record size.
182  *
183  * @xe: Pointer to a Xe device.
184  *
185  * Returns: EU stall data record size.
186  */
187 size_t xe_eu_stall_data_record_size(struct xe_device *xe)
188 {
189 	size_t record_size = 0;
190 
191 	if (GRAPHICS_VER(xe) >= 35)
192 		record_size = sizeof(struct xe_eu_stall_data_xe3p);
193 	else if (GRAPHICS_VER(xe) >= 20)
194 		record_size = sizeof(struct xe_eu_stall_data_xe2);
195 	else if (xe->info.platform == XE_PVC)
196 		record_size = sizeof(struct xe_eu_stall_data_pvc);
197 
198 
199 	xe_assert(xe, is_power_of_2(record_size));
200 
201 	return record_size;
202 }
203 
204 /**
205  * num_data_rows - Return the number of EU stall data rows of 64B each
206  *		   for a given data size.
207  *
208  * @data_size: EU stall data size
209  */
210 static u32 num_data_rows(u32 data_size)
211 {
212 	return data_size >> 6;
213 }
214 
215 static void xe_eu_stall_fini(void *arg)
216 {
217 	struct xe_gt *gt = arg;
218 
219 	destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq);
220 	mutex_destroy(&gt->eu_stall->stream_lock);
221 	kfree(gt->eu_stall);
222 }
223 
224 /**
225  * xe_eu_stall_init() - Allocate and initialize GT level EU stall data
226  *			structure xe_eu_stall_gt within struct xe_gt.
227  *
228  * @gt: GT being initialized.
229  *
230  * Returns: zero on success or a negative error code.
231  */
232 int xe_eu_stall_init(struct xe_gt *gt)
233 {
234 	struct xe_device *xe = gt_to_xe(gt);
235 	int ret;
236 
237 	if (!xe_eu_stall_supported_on_platform(xe))
238 		return 0;
239 
240 	gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL);
241 	if (!gt->eu_stall) {
242 		ret = -ENOMEM;
243 		goto exit;
244 	}
245 
246 	mutex_init(&gt->eu_stall->stream_lock);
247 
248 	gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0);
249 	if (!gt->eu_stall->buf_ptr_poll_wq) {
250 		ret = -ENOMEM;
251 		goto exit_free;
252 	}
253 
254 	return devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt);
255 exit_free:
256 	mutex_destroy(&gt->eu_stall->stream_lock);
257 	kfree(gt->eu_stall);
258 exit:
259 	return ret;
260 }
261 
262 static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value,
263 					   struct eu_stall_open_properties *props)
264 {
265 	value = div_u64(value, 251);
266 	if (value == 0 || value > 7) {
267 		drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value);
268 		return -EINVAL;
269 	}
270 	props->sampling_rate_mult = value;
271 	return 0;
272 }
273 
274 static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value,
275 					      struct eu_stall_open_properties *props)
276 {
277 	props->wait_num_reports = value;
278 
279 	return 0;
280 }
281 
282 static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value,
283 				   struct eu_stall_open_properties *props)
284 {
285 	struct xe_gt *gt = xe_device_get_gt(xe, value);
286 
287 	if (!gt) {
288 		drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value);
289 		return -EINVAL;
290 	}
291 	props->gt = gt;
292 	return 0;
293 }
294 
295 typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value,
296 					struct eu_stall_open_properties *props);
297 
298 static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = {
299 	[DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate,
300 	[DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports,
301 	[DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id,
302 };
303 
304 static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension,
305 					     struct eu_stall_open_properties *props)
306 {
307 	u64 __user *address = u64_to_user_ptr(extension);
308 	struct drm_xe_ext_set_property ext;
309 	int err;
310 	u32 idx;
311 
312 	err = copy_from_user(&ext, address, sizeof(ext));
313 	if (XE_IOCTL_DBG(xe, err))
314 		return -EFAULT;
315 
316 	if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) ||
317 	    XE_IOCTL_DBG(xe, ext.pad))
318 		return -EINVAL;
319 
320 	idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs));
321 	return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props);
322 }
323 
324 typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension,
325 					     struct eu_stall_open_properties *props);
326 static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = {
327 	[DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property,
328 };
329 
330 #define MAX_USER_EXTENSIONS	5
331 static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension,
332 				       int ext_number, struct eu_stall_open_properties *props)
333 {
334 	u64 __user *address = u64_to_user_ptr(extension);
335 	struct drm_xe_user_extension ext;
336 	int err;
337 	u32 idx;
338 
339 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
340 		return -E2BIG;
341 
342 	err = copy_from_user(&ext, address, sizeof(ext));
343 	if (XE_IOCTL_DBG(xe, err))
344 		return -EFAULT;
345 
346 	if (XE_IOCTL_DBG(xe, ext.pad) ||
347 	    XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs)))
348 		return -EINVAL;
349 
350 	idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs));
351 	err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props);
352 	if (XE_IOCTL_DBG(xe, err))
353 		return err;
354 
355 	if (ext.next_extension)
356 		return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props);
357 
358 	return 0;
359 }
360 
361 /**
362  * buf_data_size - Calculate the number of bytes in a circular buffer
363  *		   given the read and write pointers and the size of
364  *		   the buffer.
365  *
366  * @buf_size: Size of the circular buffer
367  * @read_ptr: Read pointer with an additional overflow bit
368  * @write_ptr: Write pointer with an additional overflow bit
369  *
370  * Since the read and write pointers have an additional overflow bit,
371  * this function calculates the offsets from the pointers and use the
372  * offsets to calculate the data size in the buffer.
373  *
374  * Returns: number of bytes of data in the buffer
375  */
376 static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr)
377 {
378 	u32 read_offset, write_offset, size = 0;
379 
380 	if (read_ptr == write_ptr)
381 		goto exit;
382 
383 	read_offset = read_ptr & (buf_size - 1);
384 	write_offset = write_ptr & (buf_size - 1);
385 
386 	if (write_offset > read_offset)
387 		size = write_offset - read_offset;
388 	else
389 		size = buf_size - read_offset + write_offset;
390 exit:
391 	return size;
392 }
393 
394 /**
395  * eu_stall_data_buf_poll - Poll for EU stall data in the buffer.
396  *
397  * @stream: xe EU stall data stream instance
398  *
399  * Returns: true if the EU stall buffer contains minimum stall data as
400  *	    specified by the event report count, else false.
401  */
402 static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream)
403 {
404 	u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0;
405 	u32 buf_size = stream->per_xecore_buf_size;
406 	struct per_xecore_buf *xecore_buf;
407 	struct xe_gt *gt = stream->gt;
408 	bool min_data_present = false;
409 	u16 group, instance;
410 	unsigned int xecore;
411 
412 	mutex_lock(&stream->xecore_buf_lock);
413 	for_each_dss_steering(xecore, gt, group, instance) {
414 		xecore_buf = &stream->xecore_buf[xecore];
415 		read_ptr = xecore_buf->read;
416 		write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT,
417 						       group, instance);
418 		write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
419 		write_ptr <<= 6;
420 		write_ptr &= ((buf_size << 1) - 1);
421 		if (!min_data_present) {
422 			total_data += buf_data_size(buf_size, read_ptr, write_ptr);
423 			if (num_data_rows(total_data) >= stream->wait_num_reports)
424 				min_data_present = true;
425 		}
426 		if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
427 			set_bit(xecore, stream->data_drop.mask);
428 		xecore_buf->write = write_ptr;
429 	}
430 	mutex_unlock(&stream->xecore_buf_lock);
431 
432 	return min_data_present;
433 }
434 
435 static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance)
436 {
437 	struct xe_device *xe = gt_to_xe(gt);
438 	u32 write_ptr_reg;
439 
440 	/* On PVC, the overflow bit has to be cleared by writing 1 to it.
441 	 * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it.
442 	 */
443 	if (GRAPHICS_VER(xe) >= 20)
444 		write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
445 	else
446 		write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
447 
448 	xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance);
449 }
450 
451 static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream,
452 				     char __user *buf, size_t count,
453 				     size_t *total_data_size, struct xe_gt *gt,
454 				     u16 group, u16 instance, unsigned int xecore)
455 {
456 	size_t read_data_size, copy_size, buf_size;
457 	u32 read_ptr_reg, read_ptr, write_ptr;
458 	u8 *xecore_start_vaddr, *read_vaddr;
459 	struct per_xecore_buf *xecore_buf;
460 	u32 read_offset, write_offset;
461 
462 	/* Hardware increments the read and write pointers such that they can
463 	 * overflow into one additional bit. For example, a 256KB size buffer
464 	 * offset pointer needs 18 bits. But HW uses 19 bits for the read and
465 	 * write pointers. This technique avoids wasting a slot in the buffer.
466 	 * Read and write offsets are calculated from the pointers in order to
467 	 * check if the write pointer has wrapped around the array.
468 	 */
469 	xecore_buf = &stream->xecore_buf[xecore];
470 	xecore_start_vaddr = xecore_buf->vaddr;
471 	read_ptr = xecore_buf->read;
472 	write_ptr = xecore_buf->write;
473 	buf_size = stream->per_xecore_buf_size;
474 
475 	read_data_size = buf_data_size(buf_size, read_ptr, write_ptr);
476 	/* Read only the data that the user space buffer can accommodate */
477 	read_data_size = min_t(size_t, count - *total_data_size, read_data_size);
478 	if (read_data_size == 0)
479 		goto exit_drop;
480 
481 	read_offset = read_ptr & (buf_size - 1);
482 	write_offset = write_ptr & (buf_size - 1);
483 	read_vaddr = xecore_start_vaddr + read_offset;
484 
485 	if (write_offset > read_offset) {
486 		if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size))
487 			return -EFAULT;
488 	} else {
489 		if (read_data_size >= buf_size - read_offset)
490 			copy_size = buf_size - read_offset;
491 		else
492 			copy_size = read_data_size;
493 		if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size))
494 			return -EFAULT;
495 		if (copy_to_user(buf + *total_data_size + copy_size,
496 				 xecore_start_vaddr, read_data_size - copy_size))
497 			return -EFAULT;
498 	}
499 
500 	*total_data_size += read_data_size;
501 	read_ptr += read_data_size;
502 
503 	/* Read pointer can overflow into one additional bit */
504 	read_ptr &= (buf_size << 1) - 1;
505 	read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6));
506 	read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
507 	xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
508 	xecore_buf->read = read_ptr;
509 	trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr,
510 				    read_data_size, *total_data_size);
511 exit_drop:
512 	/* Clear drop bit (if set) after any data was read or if the buffer was empty.
513 	 * Drop bit can be set even if the buffer is empty as the buffer may have been emptied
514 	 * in the previous read() and the data drop bit was set during the previous read().
515 	 */
516 	if (test_bit(xecore, stream->data_drop.mask)) {
517 		clear_dropped_eviction_line_bit(gt, group, instance);
518 		clear_bit(xecore, stream->data_drop.mask);
519 	}
520 	return 0;
521 }
522 
523 /**
524  * xe_eu_stall_stream_read_locked - copy EU stall counters data from the
525  *				    per xecore buffers to the userspace buffer
526  * @stream: A stream opened for EU stall count metrics
527  * @file: An xe EU stall data stream file
528  * @buf: destination buffer given by userspace
529  * @count: the number of bytes userspace wants to read
530  *
531  * Returns: Number of bytes copied or a negative error code
532  * If we've successfully copied any data then reporting that takes
533  * precedence over any internal error status, so the data isn't lost.
534  */
535 static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream,
536 					      struct file *file, char __user *buf,
537 					      size_t count)
538 {
539 	struct xe_gt *gt = stream->gt;
540 	size_t total_size = 0;
541 	u16 group, instance;
542 	unsigned int xecore;
543 	int ret = 0;
544 
545 	mutex_lock(&stream->xecore_buf_lock);
546 	if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) {
547 		if (!stream->data_drop.reported_to_user) {
548 			stream->data_drop.reported_to_user = true;
549 			xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n",
550 				  XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask);
551 			mutex_unlock(&stream->xecore_buf_lock);
552 			return -EIO;
553 		}
554 		stream->data_drop.reported_to_user = false;
555 	}
556 
557 	for_each_dss_steering(xecore, gt, group, instance) {
558 		ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size,
559 						gt, group, instance, xecore);
560 		if (ret || count == total_size)
561 			break;
562 	}
563 	mutex_unlock(&stream->xecore_buf_lock);
564 	return total_size ?: (ret ?: -EAGAIN);
565 }
566 
567 /*
568  * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE
569  * before calling read().
570  *
571  * Returns: The number of bytes copied or a negative error code on failure.
572  *	    -EIO if HW drops any EU stall data when the buffer is full.
573  */
574 static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf,
575 				       size_t count, loff_t *ppos)
576 {
577 	struct xe_eu_stall_data_stream *stream = file->private_data;
578 	struct xe_gt *gt = stream->gt;
579 	ssize_t ret, aligned_count;
580 
581 	aligned_count = ALIGN_DOWN(count, stream->data_record_size);
582 	if (aligned_count == 0)
583 		return -EINVAL;
584 
585 	if (!stream->enabled) {
586 		xe_gt_dbg(gt, "EU stall data stream not enabled to read\n");
587 		return -EINVAL;
588 	}
589 
590 	if (!(file->f_flags & O_NONBLOCK)) {
591 		do {
592 			ret = wait_event_interruptible(stream->poll_wq, stream->pollin);
593 			if (ret)
594 				return -EINTR;
595 
596 			mutex_lock(&gt->eu_stall->stream_lock);
597 			ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
598 			mutex_unlock(&gt->eu_stall->stream_lock);
599 		} while (ret == -EAGAIN);
600 	} else {
601 		mutex_lock(&gt->eu_stall->stream_lock);
602 		ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
603 		mutex_unlock(&gt->eu_stall->stream_lock);
604 	}
605 
606 	/*
607 	 * This may not work correctly if the user buffer is very small.
608 	 * We don't want to block the next read() when there is data in the buffer
609 	 * now, but couldn't be accommodated in the small user buffer.
610 	 */
611 	stream->pollin = false;
612 
613 	return ret;
614 }
615 
616 static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream)
617 {
618 	struct xe_gt *gt = stream->gt;
619 
620 	mutex_destroy(&stream->xecore_buf_lock);
621 	gt->eu_stall->stream = NULL;
622 	kfree(stream);
623 }
624 
625 static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream)
626 {
627 	xe_bo_unpin_map_no_vm(stream->bo);
628 	kfree(stream->xecore_buf);
629 }
630 
631 static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
632 				      u16 last_xecore)
633 {
634 	struct xe_tile *tile = stream->gt->tile;
635 	struct xe_bo *bo;
636 	u32 size;
637 
638 	stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL);
639 	if (!stream->xecore_buf)
640 		return -ENOMEM;
641 
642 	size = stream->per_xecore_buf_size * last_xecore;
643 
644 	bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel,
645 					  XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false);
646 	if (IS_ERR(bo)) {
647 		kfree(stream->xecore_buf);
648 		return PTR_ERR(bo);
649 	}
650 
651 	XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64));
652 	stream->bo = bo;
653 
654 	return 0;
655 }
656 
657 static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
658 {
659 	u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value;
660 	struct per_xecore_buf *xecore_buf;
661 	struct xe_gt *gt = stream->gt;
662 	u16 group, instance;
663 	unsigned int fw_ref;
664 	int xecore;
665 
666 	/* Take runtime pm ref and forcewake to disable RC6 */
667 	xe_pm_runtime_get(gt_to_xe(gt));
668 	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER);
669 	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) {
670 		xe_gt_err(gt, "Failed to get RENDER forcewake\n");
671 		xe_pm_runtime_put(gt_to_xe(gt));
672 		return -ETIMEDOUT;
673 	}
674 
675 	if (XE_GT_WA(gt, 22016596838))
676 		xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
677 					  _MASKED_BIT_ENABLE(DISABLE_DOP_GATING));
678 
679 	for_each_dss_steering(xecore, gt, group, instance) {
680 		write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance);
681 		/* Clear any drop bits set and not cleared in the previous session. */
682 		if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
683 			clear_dropped_eviction_line_bit(gt, group, instance);
684 		write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
685 		read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr);
686 		read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
687 		/* Initialize the read pointer to the write pointer */
688 		xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
689 		write_ptr <<= 6;
690 		write_ptr &= (stream->per_xecore_buf_size << 1) - 1;
691 		xecore_buf = &stream->xecore_buf[xecore];
692 		xecore_buf->write = write_ptr;
693 		xecore_buf->read = write_ptr;
694 	}
695 	stream->data_drop.reported_to_user = false;
696 	bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS);
697 
698 	reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE,
699 				  REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) |
700 				  REG_FIELD_PREP(EUSTALL_SAMPLE_RATE,
701 						 stream->sampling_rate_mult));
702 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value);
703 	/* GGTT addresses can never be > 32 bits */
704 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0);
705 	reg_value = xe_bo_ggtt_addr(stream->bo);
706 	reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ,
707 				    stream->per_xecore_buf_size / SZ_256K);
708 	reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING;
709 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value);
710 
711 	return 0;
712 }
713 
714 static void eu_stall_data_buf_poll_work_fn(struct work_struct *work)
715 {
716 	struct xe_eu_stall_data_stream *stream =
717 		container_of(work, typeof(*stream), buf_poll_work.work);
718 	struct xe_gt *gt = stream->gt;
719 
720 	if (eu_stall_data_buf_poll(stream)) {
721 		stream->pollin = true;
722 		wake_up(&stream->poll_wq);
723 	}
724 	queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
725 			   &stream->buf_poll_work,
726 			   msecs_to_jiffies(POLL_PERIOD_MS));
727 }
728 
729 static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
730 				   struct eu_stall_open_properties *props)
731 {
732 	unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores;
733 	struct per_xecore_buf *xecore_buf;
734 	struct xe_gt *gt = stream->gt;
735 	xe_dss_mask_t all_xecores;
736 	u16 group, instance;
737 	u32 vaddr_offset;
738 	int ret;
739 
740 	bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
741 		  XE_MAX_DSS_FUSE_BITS);
742 	num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS);
743 	last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1;
744 
745 	max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores);
746 	if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) {
747 		xe_gt_dbg(gt, "Invalid EU stall event report count %u\n",
748 			  props->wait_num_reports);
749 		xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n",
750 			  max_wait_num_reports);
751 		return -EINVAL;
752 	}
753 
754 	init_waitqueue_head(&stream->poll_wq);
755 	mutex_init(&stream->xecore_buf_lock);
756 	INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn);
757 	stream->per_xecore_buf_size = per_xecore_buf_size;
758 	stream->sampling_rate_mult = props->sampling_rate_mult;
759 	stream->wait_num_reports = props->wait_num_reports;
760 	stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
761 
762 	ret = xe_eu_stall_data_buf_alloc(stream, last_xecore);
763 	if (ret)
764 		return ret;
765 
766 	for_each_dss_steering(xecore, gt, group, instance) {
767 		xecore_buf = &stream->xecore_buf[xecore];
768 		vaddr_offset = xecore * stream->per_xecore_buf_size;
769 		xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset;
770 	}
771 	return 0;
772 }
773 
774 static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream,
775 					       struct file *file, poll_table *wait)
776 {
777 	__poll_t events = 0;
778 
779 	poll_wait(file, &stream->poll_wq, wait);
780 
781 	if (stream->pollin)
782 		events |= EPOLLIN;
783 
784 	return events;
785 }
786 
787 static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait)
788 {
789 	struct xe_eu_stall_data_stream *stream = file->private_data;
790 	struct xe_gt *gt = stream->gt;
791 	__poll_t ret;
792 
793 	mutex_lock(&gt->eu_stall->stream_lock);
794 	ret = xe_eu_stall_stream_poll_locked(stream, file, wait);
795 	mutex_unlock(&gt->eu_stall->stream_lock);
796 
797 	return ret;
798 }
799 
800 static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream)
801 {
802 	struct xe_gt *gt = stream->gt;
803 	int ret = 0;
804 
805 	if (stream->enabled)
806 		return ret;
807 
808 	stream->enabled = true;
809 
810 	ret = xe_eu_stall_stream_enable(stream);
811 
812 	queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
813 			   &stream->buf_poll_work,
814 			   msecs_to_jiffies(POLL_PERIOD_MS));
815 	return ret;
816 }
817 
818 static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
819 {
820 	struct xe_gt *gt = stream->gt;
821 
822 	if (!stream->enabled)
823 		return 0;
824 
825 	stream->enabled = false;
826 
827 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0);
828 
829 	cancel_delayed_work_sync(&stream->buf_poll_work);
830 
831 	if (XE_GT_WA(gt, 22016596838))
832 		xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
833 					  _MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
834 
835 	xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER);
836 	xe_pm_runtime_put(gt_to_xe(gt));
837 
838 	return 0;
839 }
840 
841 static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream,
842 					    unsigned int cmd, unsigned long arg)
843 {
844 	switch (cmd) {
845 	case DRM_XE_OBSERVATION_IOCTL_ENABLE:
846 		return xe_eu_stall_enable_locked(stream);
847 	case DRM_XE_OBSERVATION_IOCTL_DISABLE:
848 		return xe_eu_stall_disable_locked(stream);
849 	}
850 
851 	return -EINVAL;
852 }
853 
854 static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
855 {
856 	struct xe_eu_stall_data_stream *stream = file->private_data;
857 	struct xe_gt *gt = stream->gt;
858 	long ret;
859 
860 	mutex_lock(&gt->eu_stall->stream_lock);
861 	ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg);
862 	mutex_unlock(&gt->eu_stall->stream_lock);
863 
864 	return ret;
865 }
866 
867 static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
868 {
869 	struct xe_eu_stall_data_stream *stream = file->private_data;
870 	struct xe_gt *gt = stream->gt;
871 
872 	drm_dev_put(&gt->tile->xe->drm);
873 
874 	mutex_lock(&gt->eu_stall->stream_lock);
875 	xe_eu_stall_disable_locked(stream);
876 	xe_eu_stall_data_buf_destroy(stream);
877 	xe_eu_stall_stream_free(stream);
878 	mutex_unlock(&gt->eu_stall->stream_lock);
879 
880 	return 0;
881 }
882 
883 static const struct file_operations fops_eu_stall = {
884 	.owner		= THIS_MODULE,
885 	.llseek		= noop_llseek,
886 	.release	= xe_eu_stall_stream_close,
887 	.poll		= xe_eu_stall_stream_poll,
888 	.read		= xe_eu_stall_stream_read,
889 	.unlocked_ioctl = xe_eu_stall_stream_ioctl,
890 	.compat_ioctl   = xe_eu_stall_stream_ioctl,
891 };
892 
893 static int xe_eu_stall_stream_open_locked(struct drm_device *dev,
894 					  struct eu_stall_open_properties *props,
895 					  struct drm_file *file)
896 {
897 	struct xe_eu_stall_data_stream *stream;
898 	struct xe_gt *gt = props->gt;
899 	unsigned long f_flags = 0;
900 	int ret, stream_fd;
901 
902 	/* Only one session can be active at any time */
903 	if (gt->eu_stall->stream) {
904 		xe_gt_dbg(gt, "EU stall sampling session already active\n");
905 		return -EBUSY;
906 	}
907 
908 	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
909 	if (!stream)
910 		return -ENOMEM;
911 
912 	gt->eu_stall->stream = stream;
913 	stream->gt = gt;
914 
915 	ret = xe_eu_stall_stream_init(stream, props);
916 	if (ret) {
917 		xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret);
918 		goto err_free;
919 	}
920 
921 	stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags);
922 	if (stream_fd < 0) {
923 		ret = stream_fd;
924 		xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret);
925 		goto err_destroy;
926 	}
927 
928 	/* Take a reference on the driver that will be kept with stream_fd
929 	 * until its release.
930 	 */
931 	drm_dev_get(&gt->tile->xe->drm);
932 
933 	return stream_fd;
934 
935 err_destroy:
936 	xe_eu_stall_data_buf_destroy(stream);
937 err_free:
938 	xe_eu_stall_stream_free(stream);
939 	return ret;
940 }
941 
942 /**
943  * xe_eu_stall_stream_open - Open a xe EU stall data stream fd
944  *
945  * @dev: DRM device pointer
946  * @data: pointer to first struct @drm_xe_ext_set_property in
947  *	  the chain of input properties from the user space.
948  * @file: DRM file pointer
949  *
950  * This function opens a EU stall data stream with input properties from
951  * the user space.
952  *
953  * Returns: EU stall data stream fd on success or a negative error code.
954  */
955 int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file)
956 {
957 	struct xe_device *xe = to_xe_device(dev);
958 	struct eu_stall_open_properties props = {};
959 	int ret;
960 
961 	if (!xe_eu_stall_supported_on_platform(xe)) {
962 		drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
963 		return -ENODEV;
964 	}
965 
966 	if (xe_observation_paranoid && !perfmon_capable()) {
967 		drm_dbg(&xe->drm,  "Insufficient privileges for EU stall monitoring\n");
968 		return -EACCES;
969 	}
970 
971 	/* Initialize and set default values */
972 	props.wait_num_reports = 1;
973 	props.sampling_rate_mult = 4;
974 
975 	ret = xe_eu_stall_user_extensions(xe, data, 0, &props);
976 	if (ret)
977 		return ret;
978 
979 	if (!props.gt) {
980 		drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n");
981 		return -EINVAL;
982 	}
983 
984 	mutex_lock(&props.gt->eu_stall->stream_lock);
985 	ret = xe_eu_stall_stream_open_locked(dev, &props, file);
986 	mutex_unlock(&props.gt->eu_stall->stream_lock);
987 
988 	return ret;
989 }
990