1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include <linux/anon_inodes.h> 7 #include <linux/fs.h> 8 #include <linux/poll.h> 9 #include <linux/types.h> 10 11 #include <drm/drm_drv.h> 12 #include <generated/xe_wa_oob.h> 13 #include <uapi/drm/xe_drm.h> 14 15 #include "xe_bo.h" 16 #include "xe_device.h" 17 #include "xe_eu_stall.h" 18 #include "xe_force_wake.h" 19 #include "xe_gt_mcr.h" 20 #include "xe_gt_printk.h" 21 #include "xe_gt_topology.h" 22 #include "xe_macros.h" 23 #include "xe_observation.h" 24 #include "xe_pm.h" 25 #include "xe_trace.h" 26 #include "xe_wa.h" 27 28 #include "regs/xe_eu_stall_regs.h" 29 #include "regs/xe_gt_regs.h" 30 31 #define POLL_PERIOD_MS 5 32 33 static size_t per_xecore_buf_size = SZ_512K; 34 35 struct per_xecore_buf { 36 /* Buffer vaddr */ 37 u8 *vaddr; 38 /* Write pointer */ 39 u32 write; 40 /* Read pointer */ 41 u32 read; 42 }; 43 44 struct xe_eu_stall_data_stream { 45 bool pollin; 46 bool enabled; 47 int wait_num_reports; 48 int sampling_rate_mult; 49 wait_queue_head_t poll_wq; 50 size_t data_record_size; 51 size_t per_xecore_buf_size; 52 unsigned int fw_ref; 53 54 struct xe_gt *gt; 55 struct xe_bo *bo; 56 /* Lock to protect data buffer pointers */ 57 struct mutex xecore_buf_lock; 58 struct per_xecore_buf *xecore_buf; 59 struct { 60 bool reported_to_user; 61 xe_dss_mask_t mask; 62 } data_drop; 63 struct delayed_work buf_poll_work; 64 }; 65 66 struct xe_eu_stall_gt { 67 /* Lock to protect stream */ 68 struct mutex stream_lock; 69 /* EU stall data stream */ 70 struct xe_eu_stall_data_stream *stream; 71 /* Workqueue to schedule buffer pointers polling work */ 72 struct workqueue_struct *buf_ptr_poll_wq; 73 }; 74 75 /** 76 * struct eu_stall_open_properties - EU stall sampling properties received 77 * from user space at open. 78 * @sampling_rate_mult: EU stall sampling rate multiplier. 79 * HW will sample every (sampling_rate_mult x 251) cycles. 80 * @wait_num_reports: Minimum number of EU stall data reports to unblock poll(). 81 * @gt: GT on which EU stall data will be captured. 82 */ 83 struct eu_stall_open_properties { 84 int sampling_rate_mult; 85 int wait_num_reports; 86 struct xe_gt *gt; 87 }; 88 89 /* 90 * EU stall data format for PVC 91 */ 92 struct xe_eu_stall_data_pvc { 93 __u64 ip_addr:29; /* Bits 0 to 28 */ 94 __u64 active_count:8; /* Bits 29 to 36 */ 95 __u64 other_count:8; /* Bits 37 to 44 */ 96 __u64 control_count:8; /* Bits 45 to 52 */ 97 __u64 pipestall_count:8; /* Bits 53 to 60 */ 98 __u64 send_count:8; /* Bits 61 to 68 */ 99 __u64 dist_acc_count:8; /* Bits 69 to 76 */ 100 __u64 sbid_count:8; /* Bits 77 to 84 */ 101 __u64 sync_count:8; /* Bits 85 to 92 */ 102 __u64 inst_fetch_count:8; /* Bits 93 to 100 */ 103 __u64 unused_bits:27; 104 __u64 unused[6]; 105 } __packed; 106 107 /* 108 * EU stall data format for Xe2 arch GPUs (LNL, BMG). 109 */ 110 struct xe_eu_stall_data_xe2 { 111 __u64 ip_addr:29; /* Bits 0 to 28 */ 112 __u64 tdr_count:8; /* Bits 29 to 36 */ 113 __u64 other_count:8; /* Bits 37 to 44 */ 114 __u64 control_count:8; /* Bits 45 to 52 */ 115 __u64 pipestall_count:8; /* Bits 53 to 60 */ 116 __u64 send_count:8; /* Bits 61 to 68 */ 117 __u64 dist_acc_count:8; /* Bits 69 to 76 */ 118 __u64 sbid_count:8; /* Bits 77 to 84 */ 119 __u64 sync_count:8; /* Bits 85 to 92 */ 120 __u64 inst_fetch_count:8; /* Bits 93 to 100 */ 121 __u64 active_count:8; /* Bits 101 to 108 */ 122 __u64 ex_id:3; /* Bits 109 to 111 */ 123 __u64 end_flag:1; /* Bit 112 */ 124 __u64 unused_bits:15; 125 __u64 unused[6]; 126 } __packed; 127 128 /* 129 * EU stall data format for Xe3p arch GPUs. 130 */ 131 struct xe_eu_stall_data_xe3p { 132 __u64 ip_addr:61; /* Bits 0 to 60 */ 133 __u64 tdr_count:8; /* Bits 61 to 68 */ 134 __u64 other_count:8; /* Bits 69 to 76 */ 135 __u64 control_count:8; /* Bits 77 to 84 */ 136 __u64 pipestall_count:8; /* Bits 85 to 92 */ 137 __u64 send_count:8; /* Bits 93 to 100 */ 138 __u64 dist_acc_count:8; /* Bits 101 to 108 */ 139 __u64 sbid_count:8; /* Bits 109 to 116 */ 140 __u64 sync_count:8; /* Bits 117 to 124 */ 141 __u64 inst_fetch_count:8; /* Bits 125 to 132 */ 142 __u64 active_count:8; /* Bits 133 to 140 */ 143 __u64 ex_id:3; /* Bits 141 to 143 */ 144 __u64 end_flag:1; /* Bit 144 */ 145 __u64 unused_bits:47; 146 __u64 unused[5]; 147 } __packed; 148 149 const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7}; 150 151 /** 152 * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information. 153 * 154 * @num_rates: Pointer to a u32 to return the number of sampling rates. 155 * @rates: double u64 pointer to point to an array of sampling rates. 156 * 157 * Stores the number of sampling rates and pointer to the array of 158 * sampling rates in the input pointers. 159 * 160 * Returns: Size of the EU stall sampling rates array. 161 */ 162 size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates) 163 { 164 *num_rates = ARRAY_SIZE(eu_stall_sampling_rates); 165 *rates = eu_stall_sampling_rates; 166 167 return sizeof(eu_stall_sampling_rates); 168 } 169 170 /** 171 * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size. 172 * 173 * Returns: The per XeCore buffer size used to allocate the per GT 174 * EU stall data buffer. 175 */ 176 size_t xe_eu_stall_get_per_xecore_buf_size(void) 177 { 178 return per_xecore_buf_size; 179 } 180 181 /** 182 * xe_eu_stall_data_record_size - get EU stall data record size. 183 * 184 * @xe: Pointer to a Xe device. 185 * 186 * Returns: EU stall data record size. 187 */ 188 size_t xe_eu_stall_data_record_size(struct xe_device *xe) 189 { 190 size_t record_size = 0; 191 192 if (GRAPHICS_VER(xe) >= 35) 193 record_size = sizeof(struct xe_eu_stall_data_xe3p); 194 else if (GRAPHICS_VER(xe) >= 20) 195 record_size = sizeof(struct xe_eu_stall_data_xe2); 196 else if (xe->info.platform == XE_PVC) 197 record_size = sizeof(struct xe_eu_stall_data_pvc); 198 199 200 xe_assert(xe, is_power_of_2(record_size)); 201 202 return record_size; 203 } 204 205 /** 206 * num_data_rows - Return the number of EU stall data rows of 64B each 207 * for a given data size. 208 * 209 * @data_size: EU stall data size 210 */ 211 static u32 num_data_rows(u32 data_size) 212 { 213 return data_size >> 6; 214 } 215 216 static void xe_eu_stall_fini(void *arg) 217 { 218 struct xe_gt *gt = arg; 219 220 destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq); 221 mutex_destroy(>->eu_stall->stream_lock); 222 kfree(gt->eu_stall); 223 } 224 225 /** 226 * xe_eu_stall_init() - Allocate and initialize GT level EU stall data 227 * structure xe_eu_stall_gt within struct xe_gt. 228 * 229 * @gt: GT being initialized. 230 * 231 * Returns: zero on success or a negative error code. 232 */ 233 int xe_eu_stall_init(struct xe_gt *gt) 234 { 235 struct xe_device *xe = gt_to_xe(gt); 236 int ret; 237 238 if (!xe_eu_stall_supported_on_platform(xe)) 239 return 0; 240 241 gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); 242 if (!gt->eu_stall) { 243 ret = -ENOMEM; 244 goto exit; 245 } 246 247 mutex_init(>->eu_stall->stream_lock); 248 249 gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0); 250 if (!gt->eu_stall->buf_ptr_poll_wq) { 251 ret = -ENOMEM; 252 goto exit_free; 253 } 254 255 return devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt); 256 exit_free: 257 mutex_destroy(>->eu_stall->stream_lock); 258 kfree(gt->eu_stall); 259 exit: 260 return ret; 261 } 262 263 static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value, 264 struct eu_stall_open_properties *props) 265 { 266 value = div_u64(value, 251); 267 if (value == 0 || value > 7) { 268 drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value); 269 return -EINVAL; 270 } 271 props->sampling_rate_mult = value; 272 return 0; 273 } 274 275 static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value, 276 struct eu_stall_open_properties *props) 277 { 278 props->wait_num_reports = value; 279 280 return 0; 281 } 282 283 static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, 284 struct eu_stall_open_properties *props) 285 { 286 struct xe_gt *gt = xe_device_get_gt(xe, value); 287 288 if (!gt) { 289 drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); 290 return -EINVAL; 291 } 292 props->gt = gt; 293 return 0; 294 } 295 296 typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value, 297 struct eu_stall_open_properties *props); 298 299 static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = { 300 [DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate, 301 [DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports, 302 [DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id, 303 }; 304 305 static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension, 306 struct eu_stall_open_properties *props) 307 { 308 u64 __user *address = u64_to_user_ptr(extension); 309 struct drm_xe_ext_set_property ext; 310 int err; 311 u32 idx; 312 313 err = copy_from_user(&ext, address, sizeof(ext)); 314 if (XE_IOCTL_DBG(xe, err)) 315 return -EFAULT; 316 317 if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) || 318 XE_IOCTL_DBG(xe, !ext.property) || XE_IOCTL_DBG(xe, ext.pad)) 319 return -EINVAL; 320 321 idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs)); 322 return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props); 323 } 324 325 typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension, 326 struct eu_stall_open_properties *props); 327 static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = { 328 [DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property, 329 }; 330 331 #define MAX_USER_EXTENSIONS 5 332 static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension, 333 int ext_number, struct eu_stall_open_properties *props) 334 { 335 u64 __user *address = u64_to_user_ptr(extension); 336 struct drm_xe_user_extension ext; 337 int err; 338 u32 idx; 339 340 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 341 return -E2BIG; 342 343 err = copy_from_user(&ext, address, sizeof(ext)); 344 if (XE_IOCTL_DBG(xe, err)) 345 return -EFAULT; 346 347 if (XE_IOCTL_DBG(xe, ext.pad) || 348 XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs))) 349 return -EINVAL; 350 351 idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs)); 352 err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props); 353 if (XE_IOCTL_DBG(xe, err)) 354 return err; 355 356 if (ext.next_extension) 357 return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props); 358 359 return 0; 360 } 361 362 /** 363 * buf_data_size - Calculate the number of bytes in a circular buffer 364 * given the read and write pointers and the size of 365 * the buffer. 366 * 367 * @buf_size: Size of the circular buffer 368 * @read_ptr: Read pointer with an additional overflow bit 369 * @write_ptr: Write pointer with an additional overflow bit 370 * 371 * Since the read and write pointers have an additional overflow bit, 372 * this function calculates the offsets from the pointers and use the 373 * offsets to calculate the data size in the buffer. 374 * 375 * Returns: number of bytes of data in the buffer 376 */ 377 static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr) 378 { 379 u32 read_offset, write_offset, size = 0; 380 381 if (read_ptr == write_ptr) 382 goto exit; 383 384 read_offset = read_ptr & (buf_size - 1); 385 write_offset = write_ptr & (buf_size - 1); 386 387 if (write_offset > read_offset) 388 size = write_offset - read_offset; 389 else 390 size = buf_size - read_offset + write_offset; 391 exit: 392 return size; 393 } 394 395 /** 396 * eu_stall_data_buf_poll - Poll for EU stall data in the buffer. 397 * 398 * @stream: xe EU stall data stream instance 399 * 400 * Returns: true if the EU stall buffer contains minimum stall data as 401 * specified by the event report count, else false. 402 */ 403 static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) 404 { 405 u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0; 406 u32 buf_size = stream->per_xecore_buf_size; 407 struct per_xecore_buf *xecore_buf; 408 struct xe_gt *gt = stream->gt; 409 bool min_data_present = false; 410 u16 group, instance; 411 unsigned int xecore; 412 413 mutex_lock(&stream->xecore_buf_lock); 414 for_each_dss_steering(xecore, gt, group, instance) { 415 xecore_buf = &stream->xecore_buf[xecore]; 416 read_ptr = xecore_buf->read; 417 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, 418 group, instance); 419 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg); 420 write_ptr <<= 6; 421 write_ptr &= ((buf_size << 1) - 1); 422 if (!min_data_present) { 423 total_data += buf_data_size(buf_size, read_ptr, write_ptr); 424 if (num_data_rows(total_data) >= stream->wait_num_reports) 425 min_data_present = true; 426 } 427 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP) 428 set_bit(xecore, stream->data_drop.mask); 429 xecore_buf->write = write_ptr; 430 } 431 mutex_unlock(&stream->xecore_buf_lock); 432 433 return min_data_present; 434 } 435 436 static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance) 437 { 438 struct xe_device *xe = gt_to_xe(gt); 439 u32 write_ptr_reg; 440 441 /* On PVC, the overflow bit has to be cleared by writing 1 to it. 442 * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it. 443 */ 444 if (GRAPHICS_VER(xe) >= 20) 445 write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP); 446 else 447 write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP); 448 449 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance); 450 } 451 452 static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream, 453 char __user *buf, size_t count, 454 size_t *total_data_size, struct xe_gt *gt, 455 u16 group, u16 instance, unsigned int xecore) 456 { 457 size_t read_data_size, copy_size, buf_size; 458 u32 read_ptr_reg, read_ptr, write_ptr; 459 u8 *xecore_start_vaddr, *read_vaddr; 460 struct per_xecore_buf *xecore_buf; 461 u32 read_offset, write_offset; 462 463 /* Hardware increments the read and write pointers such that they can 464 * overflow into one additional bit. For example, a 256KB size buffer 465 * offset pointer needs 18 bits. But HW uses 19 bits for the read and 466 * write pointers. This technique avoids wasting a slot in the buffer. 467 * Read and write offsets are calculated from the pointers in order to 468 * check if the write pointer has wrapped around the array. 469 */ 470 xecore_buf = &stream->xecore_buf[xecore]; 471 xecore_start_vaddr = xecore_buf->vaddr; 472 read_ptr = xecore_buf->read; 473 write_ptr = xecore_buf->write; 474 buf_size = stream->per_xecore_buf_size; 475 476 read_data_size = buf_data_size(buf_size, read_ptr, write_ptr); 477 /* Read only the data that the user space buffer can accommodate */ 478 read_data_size = min_t(size_t, count - *total_data_size, read_data_size); 479 if (read_data_size == 0) 480 goto exit_drop; 481 482 read_offset = read_ptr & (buf_size - 1); 483 write_offset = write_ptr & (buf_size - 1); 484 read_vaddr = xecore_start_vaddr + read_offset; 485 486 if (write_offset > read_offset) { 487 if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size)) 488 return -EFAULT; 489 } else { 490 if (read_data_size >= buf_size - read_offset) 491 copy_size = buf_size - read_offset; 492 else 493 copy_size = read_data_size; 494 if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size)) 495 return -EFAULT; 496 if (copy_to_user(buf + *total_data_size + copy_size, 497 xecore_start_vaddr, read_data_size - copy_size)) 498 return -EFAULT; 499 } 500 501 *total_data_size += read_data_size; 502 read_ptr += read_data_size; 503 504 /* Read pointer can overflow into one additional bit */ 505 read_ptr &= (buf_size << 1) - 1; 506 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6)); 507 read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg); 508 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance); 509 xecore_buf->read = read_ptr; 510 trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr, 511 read_data_size, *total_data_size); 512 exit_drop: 513 /* Clear drop bit (if set) after any data was read or if the buffer was empty. 514 * Drop bit can be set even if the buffer is empty as the buffer may have been emptied 515 * in the previous read() and the data drop bit was set during the previous read(). 516 */ 517 if (test_bit(xecore, stream->data_drop.mask)) { 518 clear_dropped_eviction_line_bit(gt, group, instance); 519 clear_bit(xecore, stream->data_drop.mask); 520 } 521 return 0; 522 } 523 524 /** 525 * xe_eu_stall_stream_read_locked - copy EU stall counters data from the 526 * per xecore buffers to the userspace buffer 527 * @stream: A stream opened for EU stall count metrics 528 * @file: An xe EU stall data stream file 529 * @buf: destination buffer given by userspace 530 * @count: the number of bytes userspace wants to read 531 * 532 * Returns: Number of bytes copied or a negative error code 533 * If we've successfully copied any data then reporting that takes 534 * precedence over any internal error status, so the data isn't lost. 535 */ 536 static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream, 537 struct file *file, char __user *buf, 538 size_t count) 539 { 540 struct xe_gt *gt = stream->gt; 541 size_t total_size = 0; 542 u16 group, instance; 543 unsigned int xecore; 544 int ret = 0; 545 546 mutex_lock(&stream->xecore_buf_lock); 547 if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { 548 if (!stream->data_drop.reported_to_user) { 549 stream->data_drop.reported_to_user = true; 550 xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", 551 XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); 552 mutex_unlock(&stream->xecore_buf_lock); 553 return -EIO; 554 } 555 stream->data_drop.reported_to_user = false; 556 } 557 558 for_each_dss_steering(xecore, gt, group, instance) { 559 ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size, 560 gt, group, instance, xecore); 561 if (ret || count == total_size) 562 break; 563 } 564 mutex_unlock(&stream->xecore_buf_lock); 565 return total_size ?: (ret ?: -EAGAIN); 566 } 567 568 /* 569 * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE 570 * before calling read(). 571 * 572 * Returns: The number of bytes copied or a negative error code on failure. 573 * -EIO if HW drops any EU stall data when the buffer is full. 574 */ 575 static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf, 576 size_t count, loff_t *ppos) 577 { 578 struct xe_eu_stall_data_stream *stream = file->private_data; 579 struct xe_gt *gt = stream->gt; 580 ssize_t ret, aligned_count; 581 582 aligned_count = ALIGN_DOWN(count, stream->data_record_size); 583 if (aligned_count == 0) 584 return -EINVAL; 585 586 if (!stream->enabled) { 587 xe_gt_dbg(gt, "EU stall data stream not enabled to read\n"); 588 return -EINVAL; 589 } 590 591 if (!(file->f_flags & O_NONBLOCK)) { 592 do { 593 ret = wait_event_interruptible(stream->poll_wq, stream->pollin); 594 if (ret) 595 return -EINTR; 596 597 mutex_lock(>->eu_stall->stream_lock); 598 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count); 599 mutex_unlock(>->eu_stall->stream_lock); 600 } while (ret == -EAGAIN); 601 } else { 602 mutex_lock(>->eu_stall->stream_lock); 603 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count); 604 mutex_unlock(>->eu_stall->stream_lock); 605 } 606 607 /* 608 * This may not work correctly if the user buffer is very small. 609 * We don't want to block the next read() when there is data in the buffer 610 * now, but couldn't be accommodated in the small user buffer. 611 */ 612 stream->pollin = false; 613 614 return ret; 615 } 616 617 static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) 618 { 619 struct xe_gt *gt = stream->gt; 620 621 mutex_destroy(&stream->xecore_buf_lock); 622 gt->eu_stall->stream = NULL; 623 kfree(stream); 624 } 625 626 static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream) 627 { 628 xe_bo_unpin_map_no_vm(stream->bo); 629 kfree(stream->xecore_buf); 630 } 631 632 static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream, 633 u16 last_xecore) 634 { 635 struct xe_tile *tile = stream->gt->tile; 636 struct xe_bo *bo; 637 u32 size; 638 639 stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL); 640 if (!stream->xecore_buf) 641 return -ENOMEM; 642 643 size = stream->per_xecore_buf_size * last_xecore; 644 645 bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel, 646 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false); 647 if (IS_ERR(bo)) { 648 kfree(stream->xecore_buf); 649 return PTR_ERR(bo); 650 } 651 652 XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64)); 653 stream->bo = bo; 654 655 return 0; 656 } 657 658 static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream) 659 { 660 u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value; 661 struct per_xecore_buf *xecore_buf; 662 struct xe_gt *gt = stream->gt; 663 u16 group, instance; 664 int xecore; 665 666 /* Take runtime pm ref and forcewake to disable RC6 */ 667 xe_pm_runtime_get(gt_to_xe(gt)); 668 stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); 669 if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FW_RENDER)) { 670 xe_gt_err(gt, "Failed to get RENDER forcewake\n"); 671 xe_pm_runtime_put(gt_to_xe(gt)); 672 return -ETIMEDOUT; 673 } 674 675 if (XE_GT_WA(gt, 22016596838)) 676 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, 677 _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); 678 679 for_each_dss_steering(xecore, gt, group, instance) { 680 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance); 681 /* Clear any drop bits set and not cleared in the previous session. */ 682 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP) 683 clear_dropped_eviction_line_bit(gt, group, instance); 684 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg); 685 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr); 686 read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg); 687 /* Initialize the read pointer to the write pointer */ 688 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance); 689 write_ptr <<= 6; 690 write_ptr &= (stream->per_xecore_buf_size << 1) - 1; 691 xecore_buf = &stream->xecore_buf[xecore]; 692 xecore_buf->write = write_ptr; 693 xecore_buf->read = write_ptr; 694 } 695 stream->data_drop.reported_to_user = false; 696 bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS); 697 698 reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE, 699 REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) | 700 REG_FIELD_PREP(EUSTALL_SAMPLE_RATE, 701 stream->sampling_rate_mult)); 702 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value); 703 /* GGTT addresses can never be > 32 bits */ 704 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0); 705 reg_value = xe_bo_ggtt_addr(stream->bo); 706 reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ, 707 stream->per_xecore_buf_size / SZ_256K); 708 reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING; 709 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value); 710 711 return 0; 712 } 713 714 static void eu_stall_data_buf_poll_work_fn(struct work_struct *work) 715 { 716 struct xe_eu_stall_data_stream *stream = 717 container_of(work, typeof(*stream), buf_poll_work.work); 718 struct xe_gt *gt = stream->gt; 719 720 if (eu_stall_data_buf_poll(stream)) { 721 stream->pollin = true; 722 wake_up(&stream->poll_wq); 723 } 724 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, 725 &stream->buf_poll_work, 726 msecs_to_jiffies(POLL_PERIOD_MS)); 727 } 728 729 static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, 730 struct eu_stall_open_properties *props) 731 { 732 unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores; 733 struct per_xecore_buf *xecore_buf; 734 struct xe_gt *gt = stream->gt; 735 xe_dss_mask_t all_xecores; 736 u16 group, instance; 737 u32 vaddr_offset; 738 int ret; 739 740 bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, 741 XE_MAX_DSS_FUSE_BITS); 742 num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS); 743 last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1; 744 745 max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores); 746 if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) { 747 xe_gt_dbg(gt, "Invalid EU stall event report count %u\n", 748 props->wait_num_reports); 749 xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n", 750 max_wait_num_reports); 751 return -EINVAL; 752 } 753 754 init_waitqueue_head(&stream->poll_wq); 755 mutex_init(&stream->xecore_buf_lock); 756 INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); 757 stream->per_xecore_buf_size = per_xecore_buf_size; 758 stream->sampling_rate_mult = props->sampling_rate_mult; 759 stream->wait_num_reports = props->wait_num_reports; 760 stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt)); 761 762 ret = xe_eu_stall_data_buf_alloc(stream, last_xecore); 763 if (ret) 764 return ret; 765 766 for_each_dss_steering(xecore, gt, group, instance) { 767 xecore_buf = &stream->xecore_buf[xecore]; 768 vaddr_offset = xecore * stream->per_xecore_buf_size; 769 xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset; 770 } 771 return 0; 772 } 773 774 static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream, 775 struct file *file, poll_table *wait) 776 { 777 __poll_t events = 0; 778 779 poll_wait(file, &stream->poll_wq, wait); 780 781 if (stream->pollin) 782 events |= EPOLLIN; 783 784 return events; 785 } 786 787 static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait) 788 { 789 struct xe_eu_stall_data_stream *stream = file->private_data; 790 struct xe_gt *gt = stream->gt; 791 __poll_t ret; 792 793 mutex_lock(>->eu_stall->stream_lock); 794 ret = xe_eu_stall_stream_poll_locked(stream, file, wait); 795 mutex_unlock(>->eu_stall->stream_lock); 796 797 return ret; 798 } 799 800 static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream) 801 { 802 struct xe_gt *gt = stream->gt; 803 int ret = 0; 804 805 if (stream->enabled) 806 return ret; 807 808 stream->enabled = true; 809 810 ret = xe_eu_stall_stream_enable(stream); 811 812 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, 813 &stream->buf_poll_work, 814 msecs_to_jiffies(POLL_PERIOD_MS)); 815 return ret; 816 } 817 818 static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream) 819 { 820 struct xe_gt *gt = stream->gt; 821 822 if (!stream->enabled) 823 return 0; 824 825 stream->enabled = false; 826 827 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0); 828 829 cancel_delayed_work_sync(&stream->buf_poll_work); 830 831 if (XE_GT_WA(gt, 22016596838)) 832 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, 833 _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); 834 835 xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); 836 xe_pm_runtime_put(gt_to_xe(gt)); 837 838 return 0; 839 } 840 841 static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream, 842 unsigned int cmd, unsigned long arg) 843 { 844 switch (cmd) { 845 case DRM_XE_OBSERVATION_IOCTL_ENABLE: 846 return xe_eu_stall_enable_locked(stream); 847 case DRM_XE_OBSERVATION_IOCTL_DISABLE: 848 return xe_eu_stall_disable_locked(stream); 849 } 850 851 return -EINVAL; 852 } 853 854 static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 855 { 856 struct xe_eu_stall_data_stream *stream = file->private_data; 857 struct xe_gt *gt = stream->gt; 858 long ret; 859 860 mutex_lock(>->eu_stall->stream_lock); 861 ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg); 862 mutex_unlock(>->eu_stall->stream_lock); 863 864 return ret; 865 } 866 867 static int xe_eu_stall_stream_close(struct inode *inode, struct file *file) 868 { 869 struct xe_eu_stall_data_stream *stream = file->private_data; 870 struct xe_gt *gt = stream->gt; 871 872 drm_dev_put(>->tile->xe->drm); 873 874 mutex_lock(>->eu_stall->stream_lock); 875 xe_eu_stall_disable_locked(stream); 876 xe_eu_stall_data_buf_destroy(stream); 877 xe_eu_stall_stream_free(stream); 878 mutex_unlock(>->eu_stall->stream_lock); 879 880 return 0; 881 } 882 883 static const struct file_operations fops_eu_stall = { 884 .owner = THIS_MODULE, 885 .llseek = noop_llseek, 886 .release = xe_eu_stall_stream_close, 887 .poll = xe_eu_stall_stream_poll, 888 .read = xe_eu_stall_stream_read, 889 .unlocked_ioctl = xe_eu_stall_stream_ioctl, 890 .compat_ioctl = xe_eu_stall_stream_ioctl, 891 }; 892 893 static int xe_eu_stall_stream_open_locked(struct drm_device *dev, 894 struct eu_stall_open_properties *props, 895 struct drm_file *file) 896 { 897 struct xe_eu_stall_data_stream *stream; 898 struct xe_gt *gt = props->gt; 899 unsigned long f_flags = 0; 900 int ret, stream_fd; 901 902 /* Only one session can be active at any time */ 903 if (gt->eu_stall->stream) { 904 xe_gt_dbg(gt, "EU stall sampling session already active\n"); 905 return -EBUSY; 906 } 907 908 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 909 if (!stream) 910 return -ENOMEM; 911 912 gt->eu_stall->stream = stream; 913 stream->gt = gt; 914 915 ret = xe_eu_stall_stream_init(stream, props); 916 if (ret) { 917 xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret); 918 goto err_free; 919 } 920 921 stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags); 922 if (stream_fd < 0) { 923 ret = stream_fd; 924 xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret); 925 goto err_destroy; 926 } 927 928 /* Take a reference on the driver that will be kept with stream_fd 929 * until its release. 930 */ 931 drm_dev_get(>->tile->xe->drm); 932 933 return stream_fd; 934 935 err_destroy: 936 xe_eu_stall_data_buf_destroy(stream); 937 err_free: 938 xe_eu_stall_stream_free(stream); 939 return ret; 940 } 941 942 /** 943 * xe_eu_stall_stream_open - Open a xe EU stall data stream fd 944 * 945 * @dev: DRM device pointer 946 * @data: pointer to first struct @drm_xe_ext_set_property in 947 * the chain of input properties from the user space. 948 * @file: DRM file pointer 949 * 950 * This function opens a EU stall data stream with input properties from 951 * the user space. 952 * 953 * Returns: EU stall data stream fd on success or a negative error code. 954 */ 955 int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file) 956 { 957 struct xe_device *xe = to_xe_device(dev); 958 struct eu_stall_open_properties props = {}; 959 int ret; 960 961 if (!xe_eu_stall_supported_on_platform(xe)) { 962 drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n"); 963 return -ENODEV; 964 } 965 966 if (xe_observation_paranoid && !perfmon_capable()) { 967 drm_dbg(&xe->drm, "Insufficient privileges for EU stall monitoring\n"); 968 return -EACCES; 969 } 970 971 /* Initialize and set default values */ 972 props.wait_num_reports = 1; 973 props.sampling_rate_mult = 4; 974 975 ret = xe_eu_stall_user_extensions(xe, data, 0, &props); 976 if (ret) 977 return ret; 978 979 if (!props.gt) { 980 drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n"); 981 return -EINVAL; 982 } 983 984 mutex_lock(&props.gt->eu_stall->stream_lock); 985 ret = xe_eu_stall_stream_open_locked(dev, &props, file); 986 mutex_unlock(&props.gt->eu_stall->stream_lock); 987 988 return ret; 989 } 990