xref: /linux/drivers/gpu/drm/xe/xe_eu_stall.c (revision 220994d61cebfc04f071d69049127657c7e8191b)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2025 Intel Corporation
4  */
5 
6 #include <linux/anon_inodes.h>
7 #include <linux/fs.h>
8 #include <linux/poll.h>
9 #include <linux/types.h>
10 
11 #include <drm/drm_drv.h>
12 #include <generated/xe_wa_oob.h>
13 #include <uapi/drm/xe_drm.h>
14 
15 #include "xe_bo.h"
16 #include "xe_device.h"
17 #include "xe_eu_stall.h"
18 #include "xe_force_wake.h"
19 #include "xe_gt_mcr.h"
20 #include "xe_gt_printk.h"
21 #include "xe_gt_topology.h"
22 #include "xe_macros.h"
23 #include "xe_observation.h"
24 #include "xe_pm.h"
25 #include "xe_trace.h"
26 #include "xe_wa.h"
27 
28 #include "regs/xe_eu_stall_regs.h"
29 #include "regs/xe_gt_regs.h"
30 
31 #define POLL_PERIOD_MS	5
32 
33 static size_t per_xecore_buf_size = SZ_512K;
34 
35 struct per_xecore_buf {
36 	/* Buffer vaddr */
37 	u8 *vaddr;
38 	/* Write pointer */
39 	u32 write;
40 	/* Read pointer */
41 	u32 read;
42 };
43 
44 struct xe_eu_stall_data_stream {
45 	bool pollin;
46 	bool enabled;
47 	int wait_num_reports;
48 	int sampling_rate_mult;
49 	wait_queue_head_t poll_wq;
50 	size_t data_record_size;
51 	size_t per_xecore_buf_size;
52 
53 	struct xe_gt *gt;
54 	struct xe_bo *bo;
55 	/* Lock to protect data buffer pointers */
56 	struct mutex xecore_buf_lock;
57 	struct per_xecore_buf *xecore_buf;
58 	struct {
59 		bool reported_to_user;
60 		xe_dss_mask_t mask;
61 	} data_drop;
62 	struct delayed_work buf_poll_work;
63 };
64 
65 struct xe_eu_stall_gt {
66 	/* Lock to protect stream */
67 	struct mutex stream_lock;
68 	/* EU stall data stream */
69 	struct xe_eu_stall_data_stream *stream;
70 	/* Workqueue to schedule buffer pointers polling work */
71 	struct workqueue_struct *buf_ptr_poll_wq;
72 };
73 
74 /**
75  * struct eu_stall_open_properties - EU stall sampling properties received
76  *				     from user space at open.
77  * @sampling_rate_mult: EU stall sampling rate multiplier.
78  *			HW will sample every (sampling_rate_mult x 251) cycles.
79  * @wait_num_reports: Minimum number of EU stall data reports to unblock poll().
80  * @gt: GT on which EU stall data will be captured.
81  */
82 struct eu_stall_open_properties {
83 	int sampling_rate_mult;
84 	int wait_num_reports;
85 	struct xe_gt *gt;
86 };
87 
88 /*
89  * EU stall data format for PVC
90  */
91 struct xe_eu_stall_data_pvc {
92 	__u64 ip_addr:29;	  /* Bits 0  to 28  */
93 	__u64 active_count:8;	  /* Bits 29 to 36  */
94 	__u64 other_count:8;	  /* Bits 37 to 44  */
95 	__u64 control_count:8;	  /* Bits 45 to 52  */
96 	__u64 pipestall_count:8;  /* Bits 53 to 60  */
97 	__u64 send_count:8;	  /* Bits 61 to 68  */
98 	__u64 dist_acc_count:8;	  /* Bits 69 to 76  */
99 	__u64 sbid_count:8;	  /* Bits 77 to 84  */
100 	__u64 sync_count:8;	  /* Bits 85 to 92  */
101 	__u64 inst_fetch_count:8; /* Bits 93 to 100 */
102 	__u64 unused_bits:27;
103 	__u64 unused[6];
104 } __packed;
105 
106 /*
107  * EU stall data format for Xe2 arch GPUs (LNL, BMG).
108  */
109 struct xe_eu_stall_data_xe2 {
110 	__u64 ip_addr:29;	  /* Bits 0  to 28  */
111 	__u64 tdr_count:8;	  /* Bits 29 to 36  */
112 	__u64 other_count:8;	  /* Bits 37 to 44  */
113 	__u64 control_count:8;	  /* Bits 45 to 52  */
114 	__u64 pipestall_count:8;  /* Bits 53 to 60  */
115 	__u64 send_count:8;	  /* Bits 61 to 68  */
116 	__u64 dist_acc_count:8;   /* Bits 69 to 76  */
117 	__u64 sbid_count:8;	  /* Bits 77 to 84  */
118 	__u64 sync_count:8;	  /* Bits 85 to 92  */
119 	__u64 inst_fetch_count:8; /* Bits 93 to 100 */
120 	__u64 active_count:8;	  /* Bits 101 to 108 */
121 	__u64 ex_id:3;		  /* Bits 109 to 111 */
122 	__u64 end_flag:1;	  /* Bit  112 */
123 	__u64 unused_bits:15;
124 	__u64 unused[6];
125 } __packed;
126 
127 const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7};
128 
129 /**
130  * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information.
131  *
132  * @num_rates: Pointer to a u32 to return the number of sampling rates.
133  * @rates: double u64 pointer to point to an array of sampling rates.
134  *
135  * Stores the number of sampling rates and pointer to the array of
136  * sampling rates in the input pointers.
137  *
138  * Returns: Size of the EU stall sampling rates array.
139  */
xe_eu_stall_get_sampling_rates(u32 * num_rates,const u64 ** rates)140 size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates)
141 {
142 	*num_rates = ARRAY_SIZE(eu_stall_sampling_rates);
143 	*rates = eu_stall_sampling_rates;
144 
145 	return sizeof(eu_stall_sampling_rates);
146 }
147 
148 /**
149  * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size.
150  *
151  * Returns: The per XeCore buffer size used to allocate the per GT
152  *	    EU stall data buffer.
153  */
xe_eu_stall_get_per_xecore_buf_size(void)154 size_t xe_eu_stall_get_per_xecore_buf_size(void)
155 {
156 	return per_xecore_buf_size;
157 }
158 
159 /**
160  * xe_eu_stall_data_record_size - get EU stall data record size.
161  *
162  * @xe: Pointer to a Xe device.
163  *
164  * Returns: EU stall data record size.
165  */
xe_eu_stall_data_record_size(struct xe_device * xe)166 size_t xe_eu_stall_data_record_size(struct xe_device *xe)
167 {
168 	size_t record_size = 0;
169 
170 	if (xe->info.platform == XE_PVC)
171 		record_size = sizeof(struct xe_eu_stall_data_pvc);
172 	else if (GRAPHICS_VER(xe) >= 20)
173 		record_size = sizeof(struct xe_eu_stall_data_xe2);
174 
175 	xe_assert(xe, is_power_of_2(record_size));
176 
177 	return record_size;
178 }
179 
180 /**
181  * num_data_rows - Return the number of EU stall data rows of 64B each
182  *		   for a given data size.
183  *
184  * @data_size: EU stall data size
185  */
num_data_rows(u32 data_size)186 static u32 num_data_rows(u32 data_size)
187 {
188 	return data_size >> 6;
189 }
190 
xe_eu_stall_fini(void * arg)191 static void xe_eu_stall_fini(void *arg)
192 {
193 	struct xe_gt *gt = arg;
194 
195 	destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq);
196 	mutex_destroy(&gt->eu_stall->stream_lock);
197 	kfree(gt->eu_stall);
198 }
199 
200 /**
201  * xe_eu_stall_init() - Allocate and initialize GT level EU stall data
202  *			structure xe_eu_stall_gt within struct xe_gt.
203  *
204  * @gt: GT being initialized.
205  *
206  * Returns: zero on success or a negative error code.
207  */
xe_eu_stall_init(struct xe_gt * gt)208 int xe_eu_stall_init(struct xe_gt *gt)
209 {
210 	struct xe_device *xe = gt_to_xe(gt);
211 	int ret;
212 
213 	if (!xe_eu_stall_supported_on_platform(xe))
214 		return 0;
215 
216 	gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL);
217 	if (!gt->eu_stall) {
218 		ret = -ENOMEM;
219 		goto exit;
220 	}
221 
222 	mutex_init(&gt->eu_stall->stream_lock);
223 
224 	gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0);
225 	if (!gt->eu_stall->buf_ptr_poll_wq) {
226 		ret = -ENOMEM;
227 		goto exit_free;
228 	}
229 
230 	return devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt);
231 exit_free:
232 	mutex_destroy(&gt->eu_stall->stream_lock);
233 	kfree(gt->eu_stall);
234 exit:
235 	return ret;
236 }
237 
set_prop_eu_stall_sampling_rate(struct xe_device * xe,u64 value,struct eu_stall_open_properties * props)238 static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value,
239 					   struct eu_stall_open_properties *props)
240 {
241 	value = div_u64(value, 251);
242 	if (value == 0 || value > 7) {
243 		drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value);
244 		return -EINVAL;
245 	}
246 	props->sampling_rate_mult = value;
247 	return 0;
248 }
249 
set_prop_eu_stall_wait_num_reports(struct xe_device * xe,u64 value,struct eu_stall_open_properties * props)250 static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value,
251 					      struct eu_stall_open_properties *props)
252 {
253 	props->wait_num_reports = value;
254 
255 	return 0;
256 }
257 
set_prop_eu_stall_gt_id(struct xe_device * xe,u64 value,struct eu_stall_open_properties * props)258 static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value,
259 				   struct eu_stall_open_properties *props)
260 {
261 	struct xe_gt *gt = xe_device_get_gt(xe, value);
262 
263 	if (!gt) {
264 		drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value);
265 		return -EINVAL;
266 	}
267 	props->gt = gt;
268 	return 0;
269 }
270 
271 typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value,
272 					struct eu_stall_open_properties *props);
273 
274 static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = {
275 	[DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate,
276 	[DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports,
277 	[DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id,
278 };
279 
xe_eu_stall_user_ext_set_property(struct xe_device * xe,u64 extension,struct eu_stall_open_properties * props)280 static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension,
281 					     struct eu_stall_open_properties *props)
282 {
283 	u64 __user *address = u64_to_user_ptr(extension);
284 	struct drm_xe_ext_set_property ext;
285 	int err;
286 	u32 idx;
287 
288 	err = copy_from_user(&ext, address, sizeof(ext));
289 	if (XE_IOCTL_DBG(xe, err))
290 		return -EFAULT;
291 
292 	if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) ||
293 	    XE_IOCTL_DBG(xe, ext.pad))
294 		return -EINVAL;
295 
296 	idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs));
297 	return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props);
298 }
299 
300 typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension,
301 					     struct eu_stall_open_properties *props);
302 static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = {
303 	[DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property,
304 };
305 
306 #define MAX_USER_EXTENSIONS	5
xe_eu_stall_user_extensions(struct xe_device * xe,u64 extension,int ext_number,struct eu_stall_open_properties * props)307 static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension,
308 				       int ext_number, struct eu_stall_open_properties *props)
309 {
310 	u64 __user *address = u64_to_user_ptr(extension);
311 	struct drm_xe_user_extension ext;
312 	int err;
313 	u32 idx;
314 
315 	if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
316 		return -E2BIG;
317 
318 	err = copy_from_user(&ext, address, sizeof(ext));
319 	if (XE_IOCTL_DBG(xe, err))
320 		return -EFAULT;
321 
322 	if (XE_IOCTL_DBG(xe, ext.pad) ||
323 	    XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs)))
324 		return -EINVAL;
325 
326 	idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs));
327 	err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props);
328 	if (XE_IOCTL_DBG(xe, err))
329 		return err;
330 
331 	if (ext.next_extension)
332 		return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props);
333 
334 	return 0;
335 }
336 
337 /**
338  * buf_data_size - Calculate the number of bytes in a circular buffer
339  *		   given the read and write pointers and the size of
340  *		   the buffer.
341  *
342  * @buf_size: Size of the circular buffer
343  * @read_ptr: Read pointer with an additional overflow bit
344  * @write_ptr: Write pointer with an additional overflow bit
345  *
346  * Since the read and write pointers have an additional overflow bit,
347  * this function calculates the offsets from the pointers and use the
348  * offsets to calculate the data size in the buffer.
349  *
350  * Returns: number of bytes of data in the buffer
351  */
buf_data_size(size_t buf_size,u32 read_ptr,u32 write_ptr)352 static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr)
353 {
354 	u32 read_offset, write_offset, size = 0;
355 
356 	if (read_ptr == write_ptr)
357 		goto exit;
358 
359 	read_offset = read_ptr & (buf_size - 1);
360 	write_offset = write_ptr & (buf_size - 1);
361 
362 	if (write_offset > read_offset)
363 		size = write_offset - read_offset;
364 	else
365 		size = buf_size - read_offset + write_offset;
366 exit:
367 	return size;
368 }
369 
370 /**
371  * eu_stall_data_buf_poll - Poll for EU stall data in the buffer.
372  *
373  * @stream: xe EU stall data stream instance
374  *
375  * Returns: true if the EU stall buffer contains minimum stall data as
376  *	    specified by the event report count, else false.
377  */
eu_stall_data_buf_poll(struct xe_eu_stall_data_stream * stream)378 static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream)
379 {
380 	u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0;
381 	u32 buf_size = stream->per_xecore_buf_size;
382 	struct per_xecore_buf *xecore_buf;
383 	struct xe_gt *gt = stream->gt;
384 	bool min_data_present = false;
385 	u16 group, instance;
386 	unsigned int xecore;
387 
388 	mutex_lock(&stream->xecore_buf_lock);
389 	for_each_dss_steering(xecore, gt, group, instance) {
390 		xecore_buf = &stream->xecore_buf[xecore];
391 		read_ptr = xecore_buf->read;
392 		write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT,
393 						       group, instance);
394 		write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
395 		write_ptr <<= 6;
396 		write_ptr &= ((buf_size << 1) - 1);
397 		if (!min_data_present) {
398 			total_data += buf_data_size(buf_size, read_ptr, write_ptr);
399 			if (num_data_rows(total_data) >= stream->wait_num_reports)
400 				min_data_present = true;
401 		}
402 		if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
403 			set_bit(xecore, stream->data_drop.mask);
404 		xecore_buf->write = write_ptr;
405 	}
406 	mutex_unlock(&stream->xecore_buf_lock);
407 
408 	return min_data_present;
409 }
410 
clear_dropped_eviction_line_bit(struct xe_gt * gt,u16 group,u16 instance)411 static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance)
412 {
413 	struct xe_device *xe = gt_to_xe(gt);
414 	u32 write_ptr_reg;
415 
416 	/* On PVC, the overflow bit has to be cleared by writing 1 to it.
417 	 * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it.
418 	 */
419 	if (GRAPHICS_VER(xe) >= 20)
420 		write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
421 	else
422 		write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
423 
424 	xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance);
425 }
426 
xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream * stream,char __user * buf,size_t count,size_t * total_data_size,struct xe_gt * gt,u16 group,u16 instance,unsigned int xecore)427 static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream,
428 				     char __user *buf, size_t count,
429 				     size_t *total_data_size, struct xe_gt *gt,
430 				     u16 group, u16 instance, unsigned int xecore)
431 {
432 	size_t read_data_size, copy_size, buf_size;
433 	u32 read_ptr_reg, read_ptr, write_ptr;
434 	u8 *xecore_start_vaddr, *read_vaddr;
435 	struct per_xecore_buf *xecore_buf;
436 	u32 read_offset, write_offset;
437 
438 	/* Hardware increments the read and write pointers such that they can
439 	 * overflow into one additional bit. For example, a 256KB size buffer
440 	 * offset pointer needs 18 bits. But HW uses 19 bits for the read and
441 	 * write pointers. This technique avoids wasting a slot in the buffer.
442 	 * Read and write offsets are calculated from the pointers in order to
443 	 * check if the write pointer has wrapped around the array.
444 	 */
445 	xecore_buf = &stream->xecore_buf[xecore];
446 	xecore_start_vaddr = xecore_buf->vaddr;
447 	read_ptr = xecore_buf->read;
448 	write_ptr = xecore_buf->write;
449 	buf_size = stream->per_xecore_buf_size;
450 
451 	read_data_size = buf_data_size(buf_size, read_ptr, write_ptr);
452 	/* Read only the data that the user space buffer can accommodate */
453 	read_data_size = min_t(size_t, count - *total_data_size, read_data_size);
454 	if (read_data_size == 0)
455 		goto exit_drop;
456 
457 	read_offset = read_ptr & (buf_size - 1);
458 	write_offset = write_ptr & (buf_size - 1);
459 	read_vaddr = xecore_start_vaddr + read_offset;
460 
461 	if (write_offset > read_offset) {
462 		if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size))
463 			return -EFAULT;
464 	} else {
465 		if (read_data_size >= buf_size - read_offset)
466 			copy_size = buf_size - read_offset;
467 		else
468 			copy_size = read_data_size;
469 		if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size))
470 			return -EFAULT;
471 		if (copy_to_user(buf + *total_data_size + copy_size,
472 				 xecore_start_vaddr, read_data_size - copy_size))
473 			return -EFAULT;
474 	}
475 
476 	*total_data_size += read_data_size;
477 	read_ptr += read_data_size;
478 
479 	/* Read pointer can overflow into one additional bit */
480 	read_ptr &= (buf_size << 1) - 1;
481 	read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6));
482 	read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
483 	xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
484 	xecore_buf->read = read_ptr;
485 	trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr,
486 				    read_data_size, *total_data_size);
487 exit_drop:
488 	/* Clear drop bit (if set) after any data was read or if the buffer was empty.
489 	 * Drop bit can be set even if the buffer is empty as the buffer may have been emptied
490 	 * in the previous read() and the data drop bit was set during the previous read().
491 	 */
492 	if (test_bit(xecore, stream->data_drop.mask)) {
493 		clear_dropped_eviction_line_bit(gt, group, instance);
494 		clear_bit(xecore, stream->data_drop.mask);
495 	}
496 	return 0;
497 }
498 
499 /**
500  * xe_eu_stall_stream_read_locked - copy EU stall counters data from the
501  *				    per xecore buffers to the userspace buffer
502  * @stream: A stream opened for EU stall count metrics
503  * @file: An xe EU stall data stream file
504  * @buf: destination buffer given by userspace
505  * @count: the number of bytes userspace wants to read
506  *
507  * Returns: Number of bytes copied or a negative error code
508  * If we've successfully copied any data then reporting that takes
509  * precedence over any internal error status, so the data isn't lost.
510  */
xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream * stream,struct file * file,char __user * buf,size_t count)511 static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream,
512 					      struct file *file, char __user *buf,
513 					      size_t count)
514 {
515 	struct xe_gt *gt = stream->gt;
516 	size_t total_size = 0;
517 	u16 group, instance;
518 	unsigned int xecore;
519 	int ret = 0;
520 
521 	mutex_lock(&stream->xecore_buf_lock);
522 	if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) {
523 		if (!stream->data_drop.reported_to_user) {
524 			stream->data_drop.reported_to_user = true;
525 			xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n",
526 				  XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask);
527 			mutex_unlock(&stream->xecore_buf_lock);
528 			return -EIO;
529 		}
530 		stream->data_drop.reported_to_user = false;
531 	}
532 
533 	for_each_dss_steering(xecore, gt, group, instance) {
534 		ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size,
535 						gt, group, instance, xecore);
536 		if (ret || count == total_size)
537 			break;
538 	}
539 	mutex_unlock(&stream->xecore_buf_lock);
540 	return total_size ?: (ret ?: -EAGAIN);
541 }
542 
543 /*
544  * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE
545  * before calling read().
546  *
547  * Returns: The number of bytes copied or a negative error code on failure.
548  *	    -EIO if HW drops any EU stall data when the buffer is full.
549  */
xe_eu_stall_stream_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)550 static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf,
551 				       size_t count, loff_t *ppos)
552 {
553 	struct xe_eu_stall_data_stream *stream = file->private_data;
554 	struct xe_gt *gt = stream->gt;
555 	ssize_t ret, aligned_count;
556 
557 	aligned_count = ALIGN_DOWN(count, stream->data_record_size);
558 	if (aligned_count == 0)
559 		return -EINVAL;
560 
561 	if (!stream->enabled) {
562 		xe_gt_dbg(gt, "EU stall data stream not enabled to read\n");
563 		return -EINVAL;
564 	}
565 
566 	if (!(file->f_flags & O_NONBLOCK)) {
567 		do {
568 			ret = wait_event_interruptible(stream->poll_wq, stream->pollin);
569 			if (ret)
570 				return -EINTR;
571 
572 			mutex_lock(&gt->eu_stall->stream_lock);
573 			ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
574 			mutex_unlock(&gt->eu_stall->stream_lock);
575 		} while (ret == -EAGAIN);
576 	} else {
577 		mutex_lock(&gt->eu_stall->stream_lock);
578 		ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
579 		mutex_unlock(&gt->eu_stall->stream_lock);
580 	}
581 
582 	/*
583 	 * This may not work correctly if the user buffer is very small.
584 	 * We don't want to block the next read() when there is data in the buffer
585 	 * now, but couldn't be accommodated in the small user buffer.
586 	 */
587 	stream->pollin = false;
588 
589 	return ret;
590 }
591 
xe_eu_stall_stream_free(struct xe_eu_stall_data_stream * stream)592 static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream)
593 {
594 	struct xe_gt *gt = stream->gt;
595 
596 	mutex_destroy(&stream->xecore_buf_lock);
597 	gt->eu_stall->stream = NULL;
598 	kfree(stream);
599 }
600 
xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream * stream)601 static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream)
602 {
603 	xe_bo_unpin_map_no_vm(stream->bo);
604 	kfree(stream->xecore_buf);
605 }
606 
xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream * stream,u16 last_xecore)607 static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
608 				      u16 last_xecore)
609 {
610 	struct xe_tile *tile = stream->gt->tile;
611 	struct xe_bo *bo;
612 	u32 size;
613 
614 	stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL);
615 	if (!stream->xecore_buf)
616 		return -ENOMEM;
617 
618 	size = stream->per_xecore_buf_size * last_xecore;
619 
620 	bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL,
621 					     size, ~0ull, ttm_bo_type_kernel,
622 					     XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64);
623 	if (IS_ERR(bo)) {
624 		kfree(stream->xecore_buf);
625 		return PTR_ERR(bo);
626 	}
627 
628 	XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64));
629 	stream->bo = bo;
630 
631 	return 0;
632 }
633 
xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream * stream)634 static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
635 {
636 	u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value;
637 	struct per_xecore_buf *xecore_buf;
638 	struct xe_gt *gt = stream->gt;
639 	u16 group, instance;
640 	unsigned int fw_ref;
641 	int xecore;
642 
643 	/* Take runtime pm ref and forcewake to disable RC6 */
644 	xe_pm_runtime_get(gt_to_xe(gt));
645 	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER);
646 	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) {
647 		xe_gt_err(gt, "Failed to get RENDER forcewake\n");
648 		xe_pm_runtime_put(gt_to_xe(gt));
649 		return -ETIMEDOUT;
650 	}
651 
652 	if (XE_WA(gt, 22016596838))
653 		xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
654 					  _MASKED_BIT_ENABLE(DISABLE_DOP_GATING));
655 
656 	for_each_dss_steering(xecore, gt, group, instance) {
657 		write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance);
658 		/* Clear any drop bits set and not cleared in the previous session. */
659 		if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
660 			clear_dropped_eviction_line_bit(gt, group, instance);
661 		write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
662 		read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr);
663 		read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
664 		/* Initialize the read pointer to the write pointer */
665 		xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
666 		write_ptr <<= 6;
667 		write_ptr &= (stream->per_xecore_buf_size << 1) - 1;
668 		xecore_buf = &stream->xecore_buf[xecore];
669 		xecore_buf->write = write_ptr;
670 		xecore_buf->read = write_ptr;
671 	}
672 	stream->data_drop.reported_to_user = false;
673 	bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS);
674 
675 	reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE,
676 				  REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) |
677 				  REG_FIELD_PREP(EUSTALL_SAMPLE_RATE,
678 						 stream->sampling_rate_mult));
679 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value);
680 	/* GGTT addresses can never be > 32 bits */
681 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0);
682 	reg_value = xe_bo_ggtt_addr(stream->bo);
683 	reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ,
684 				    stream->per_xecore_buf_size / SZ_256K);
685 	reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING;
686 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value);
687 
688 	return 0;
689 }
690 
eu_stall_data_buf_poll_work_fn(struct work_struct * work)691 static void eu_stall_data_buf_poll_work_fn(struct work_struct *work)
692 {
693 	struct xe_eu_stall_data_stream *stream =
694 		container_of(work, typeof(*stream), buf_poll_work.work);
695 	struct xe_gt *gt = stream->gt;
696 
697 	if (eu_stall_data_buf_poll(stream)) {
698 		stream->pollin = true;
699 		wake_up(&stream->poll_wq);
700 	}
701 	queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
702 			   &stream->buf_poll_work,
703 			   msecs_to_jiffies(POLL_PERIOD_MS));
704 }
705 
xe_eu_stall_stream_init(struct xe_eu_stall_data_stream * stream,struct eu_stall_open_properties * props)706 static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
707 				   struct eu_stall_open_properties *props)
708 {
709 	unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores;
710 	struct per_xecore_buf *xecore_buf;
711 	struct xe_gt *gt = stream->gt;
712 	xe_dss_mask_t all_xecores;
713 	u16 group, instance;
714 	u32 vaddr_offset;
715 	int ret;
716 
717 	bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
718 		  XE_MAX_DSS_FUSE_BITS);
719 	num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS);
720 	last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1;
721 
722 	max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores);
723 	if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) {
724 		xe_gt_dbg(gt, "Invalid EU stall event report count %u\n",
725 			  props->wait_num_reports);
726 		xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n",
727 			  max_wait_num_reports);
728 		return -EINVAL;
729 	}
730 
731 	init_waitqueue_head(&stream->poll_wq);
732 	mutex_init(&stream->xecore_buf_lock);
733 	INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn);
734 	stream->per_xecore_buf_size = per_xecore_buf_size;
735 	stream->sampling_rate_mult = props->sampling_rate_mult;
736 	stream->wait_num_reports = props->wait_num_reports;
737 	stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
738 
739 	ret = xe_eu_stall_data_buf_alloc(stream, last_xecore);
740 	if (ret)
741 		return ret;
742 
743 	for_each_dss_steering(xecore, gt, group, instance) {
744 		xecore_buf = &stream->xecore_buf[xecore];
745 		vaddr_offset = xecore * stream->per_xecore_buf_size;
746 		xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset;
747 	}
748 	return 0;
749 }
750 
xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream * stream,struct file * file,poll_table * wait)751 static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream,
752 					       struct file *file, poll_table *wait)
753 {
754 	__poll_t events = 0;
755 
756 	poll_wait(file, &stream->poll_wq, wait);
757 
758 	if (stream->pollin)
759 		events |= EPOLLIN;
760 
761 	return events;
762 }
763 
xe_eu_stall_stream_poll(struct file * file,poll_table * wait)764 static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait)
765 {
766 	struct xe_eu_stall_data_stream *stream = file->private_data;
767 	struct xe_gt *gt = stream->gt;
768 	__poll_t ret;
769 
770 	mutex_lock(&gt->eu_stall->stream_lock);
771 	ret = xe_eu_stall_stream_poll_locked(stream, file, wait);
772 	mutex_unlock(&gt->eu_stall->stream_lock);
773 
774 	return ret;
775 }
776 
xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream * stream)777 static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream)
778 {
779 	struct xe_gt *gt = stream->gt;
780 	int ret = 0;
781 
782 	if (stream->enabled)
783 		return ret;
784 
785 	stream->enabled = true;
786 
787 	ret = xe_eu_stall_stream_enable(stream);
788 
789 	queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
790 			   &stream->buf_poll_work,
791 			   msecs_to_jiffies(POLL_PERIOD_MS));
792 	return ret;
793 }
794 
xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream * stream)795 static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
796 {
797 	struct xe_gt *gt = stream->gt;
798 
799 	if (!stream->enabled)
800 		return 0;
801 
802 	stream->enabled = false;
803 
804 	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0);
805 
806 	cancel_delayed_work_sync(&stream->buf_poll_work);
807 
808 	if (XE_WA(gt, 22016596838))
809 		xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
810 					  _MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
811 
812 	xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER);
813 	xe_pm_runtime_put(gt_to_xe(gt));
814 
815 	return 0;
816 }
817 
xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream * stream,unsigned int cmd,unsigned long arg)818 static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream,
819 					    unsigned int cmd, unsigned long arg)
820 {
821 	switch (cmd) {
822 	case DRM_XE_OBSERVATION_IOCTL_ENABLE:
823 		return xe_eu_stall_enable_locked(stream);
824 	case DRM_XE_OBSERVATION_IOCTL_DISABLE:
825 		return xe_eu_stall_disable_locked(stream);
826 	}
827 
828 	return -EINVAL;
829 }
830 
xe_eu_stall_stream_ioctl(struct file * file,unsigned int cmd,unsigned long arg)831 static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
832 {
833 	struct xe_eu_stall_data_stream *stream = file->private_data;
834 	struct xe_gt *gt = stream->gt;
835 	long ret;
836 
837 	mutex_lock(&gt->eu_stall->stream_lock);
838 	ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg);
839 	mutex_unlock(&gt->eu_stall->stream_lock);
840 
841 	return ret;
842 }
843 
xe_eu_stall_stream_close(struct inode * inode,struct file * file)844 static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
845 {
846 	struct xe_eu_stall_data_stream *stream = file->private_data;
847 	struct xe_gt *gt = stream->gt;
848 
849 	drm_dev_put(&gt->tile->xe->drm);
850 
851 	mutex_lock(&gt->eu_stall->stream_lock);
852 	xe_eu_stall_disable_locked(stream);
853 	xe_eu_stall_data_buf_destroy(stream);
854 	xe_eu_stall_stream_free(stream);
855 	mutex_unlock(&gt->eu_stall->stream_lock);
856 
857 	return 0;
858 }
859 
860 static const struct file_operations fops_eu_stall = {
861 	.owner		= THIS_MODULE,
862 	.llseek		= noop_llseek,
863 	.release	= xe_eu_stall_stream_close,
864 	.poll		= xe_eu_stall_stream_poll,
865 	.read		= xe_eu_stall_stream_read,
866 	.unlocked_ioctl = xe_eu_stall_stream_ioctl,
867 	.compat_ioctl   = xe_eu_stall_stream_ioctl,
868 };
869 
xe_eu_stall_stream_open_locked(struct drm_device * dev,struct eu_stall_open_properties * props,struct drm_file * file)870 static int xe_eu_stall_stream_open_locked(struct drm_device *dev,
871 					  struct eu_stall_open_properties *props,
872 					  struct drm_file *file)
873 {
874 	struct xe_eu_stall_data_stream *stream;
875 	struct xe_gt *gt = props->gt;
876 	unsigned long f_flags = 0;
877 	int ret, stream_fd;
878 
879 	/* Only one session can be active at any time */
880 	if (gt->eu_stall->stream) {
881 		xe_gt_dbg(gt, "EU stall sampling session already active\n");
882 		return -EBUSY;
883 	}
884 
885 	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
886 	if (!stream)
887 		return -ENOMEM;
888 
889 	gt->eu_stall->stream = stream;
890 	stream->gt = gt;
891 
892 	ret = xe_eu_stall_stream_init(stream, props);
893 	if (ret) {
894 		xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret);
895 		goto err_free;
896 	}
897 
898 	stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags);
899 	if (stream_fd < 0) {
900 		ret = stream_fd;
901 		xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret);
902 		goto err_destroy;
903 	}
904 
905 	/* Take a reference on the driver that will be kept with stream_fd
906 	 * until its release.
907 	 */
908 	drm_dev_get(&gt->tile->xe->drm);
909 
910 	return stream_fd;
911 
912 err_destroy:
913 	xe_eu_stall_data_buf_destroy(stream);
914 err_free:
915 	xe_eu_stall_stream_free(stream);
916 	return ret;
917 }
918 
919 /**
920  * xe_eu_stall_stream_open - Open a xe EU stall data stream fd
921  *
922  * @dev: DRM device pointer
923  * @data: pointer to first struct @drm_xe_ext_set_property in
924  *	  the chain of input properties from the user space.
925  * @file: DRM file pointer
926  *
927  * This function opens a EU stall data stream with input properties from
928  * the user space.
929  *
930  * Returns: EU stall data stream fd on success or a negative error code.
931  */
xe_eu_stall_stream_open(struct drm_device * dev,u64 data,struct drm_file * file)932 int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file)
933 {
934 	struct xe_device *xe = to_xe_device(dev);
935 	struct eu_stall_open_properties props = {};
936 	int ret;
937 
938 	if (!xe_eu_stall_supported_on_platform(xe)) {
939 		drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
940 		return -ENODEV;
941 	}
942 
943 	if (xe_observation_paranoid && !perfmon_capable()) {
944 		drm_dbg(&xe->drm,  "Insufficient privileges for EU stall monitoring\n");
945 		return -EACCES;
946 	}
947 
948 	/* Initialize and set default values */
949 	props.wait_num_reports = 1;
950 	props.sampling_rate_mult = 4;
951 
952 	ret = xe_eu_stall_user_extensions(xe, data, 0, &props);
953 	if (ret)
954 		return ret;
955 
956 	if (!props.gt) {
957 		drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n");
958 		return -EINVAL;
959 	}
960 
961 	mutex_lock(&props.gt->eu_stall->stream_lock);
962 	ret = xe_eu_stall_stream_open_locked(dev, &props, file);
963 	mutex_unlock(&props.gt->eu_stall->stream_lock);
964 
965 	return ret;
966 }
967