1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include <linux/anon_inodes.h> 7 #include <linux/fs.h> 8 #include <linux/poll.h> 9 #include <linux/types.h> 10 11 #include <drm/drm_drv.h> 12 #include <generated/xe_wa_oob.h> 13 #include <uapi/drm/xe_drm.h> 14 15 #include "xe_bo.h" 16 #include "xe_device.h" 17 #include "xe_eu_stall.h" 18 #include "xe_force_wake.h" 19 #include "xe_gt_mcr.h" 20 #include "xe_gt_printk.h" 21 #include "xe_gt_topology.h" 22 #include "xe_macros.h" 23 #include "xe_observation.h" 24 #include "xe_pm.h" 25 #include "xe_trace.h" 26 #include "xe_wa.h" 27 28 #include "regs/xe_eu_stall_regs.h" 29 #include "regs/xe_gt_regs.h" 30 31 #define POLL_PERIOD_MS 5 32 33 static size_t per_xecore_buf_size = SZ_512K; 34 35 struct per_xecore_buf { 36 /* Buffer vaddr */ 37 u8 *vaddr; 38 /* Write pointer */ 39 u32 write; 40 /* Read pointer */ 41 u32 read; 42 }; 43 44 struct xe_eu_stall_data_stream { 45 bool pollin; 46 bool enabled; 47 int wait_num_reports; 48 int sampling_rate_mult; 49 wait_queue_head_t poll_wq; 50 size_t data_record_size; 51 size_t per_xecore_buf_size; 52 53 struct xe_gt *gt; 54 struct xe_bo *bo; 55 /* Lock to protect data buffer pointers */ 56 struct mutex xecore_buf_lock; 57 struct per_xecore_buf *xecore_buf; 58 struct { 59 bool reported_to_user; 60 xe_dss_mask_t mask; 61 } data_drop; 62 struct delayed_work buf_poll_work; 63 }; 64 65 struct xe_eu_stall_gt { 66 /* Lock to protect stream */ 67 struct mutex stream_lock; 68 /* EU stall data stream */ 69 struct xe_eu_stall_data_stream *stream; 70 /* Workqueue to schedule buffer pointers polling work */ 71 struct workqueue_struct *buf_ptr_poll_wq; 72 }; 73 74 /** 75 * struct eu_stall_open_properties - EU stall sampling properties received 76 * from user space at open. 77 * @sampling_rate_mult: EU stall sampling rate multiplier. 78 * HW will sample every (sampling_rate_mult x 251) cycles. 79 * @wait_num_reports: Minimum number of EU stall data reports to unblock poll(). 80 * @gt: GT on which EU stall data will be captured. 81 */ 82 struct eu_stall_open_properties { 83 int sampling_rate_mult; 84 int wait_num_reports; 85 struct xe_gt *gt; 86 }; 87 88 /* 89 * EU stall data format for PVC 90 */ 91 struct xe_eu_stall_data_pvc { 92 __u64 ip_addr:29; /* Bits 0 to 28 */ 93 __u64 active_count:8; /* Bits 29 to 36 */ 94 __u64 other_count:8; /* Bits 37 to 44 */ 95 __u64 control_count:8; /* Bits 45 to 52 */ 96 __u64 pipestall_count:8; /* Bits 53 to 60 */ 97 __u64 send_count:8; /* Bits 61 to 68 */ 98 __u64 dist_acc_count:8; /* Bits 69 to 76 */ 99 __u64 sbid_count:8; /* Bits 77 to 84 */ 100 __u64 sync_count:8; /* Bits 85 to 92 */ 101 __u64 inst_fetch_count:8; /* Bits 93 to 100 */ 102 __u64 unused_bits:27; 103 __u64 unused[6]; 104 } __packed; 105 106 /* 107 * EU stall data format for Xe2 arch GPUs (LNL, BMG). 108 */ 109 struct xe_eu_stall_data_xe2 { 110 __u64 ip_addr:29; /* Bits 0 to 28 */ 111 __u64 tdr_count:8; /* Bits 29 to 36 */ 112 __u64 other_count:8; /* Bits 37 to 44 */ 113 __u64 control_count:8; /* Bits 45 to 52 */ 114 __u64 pipestall_count:8; /* Bits 53 to 60 */ 115 __u64 send_count:8; /* Bits 61 to 68 */ 116 __u64 dist_acc_count:8; /* Bits 69 to 76 */ 117 __u64 sbid_count:8; /* Bits 77 to 84 */ 118 __u64 sync_count:8; /* Bits 85 to 92 */ 119 __u64 inst_fetch_count:8; /* Bits 93 to 100 */ 120 __u64 active_count:8; /* Bits 101 to 108 */ 121 __u64 ex_id:3; /* Bits 109 to 111 */ 122 __u64 end_flag:1; /* Bit 112 */ 123 __u64 unused_bits:15; 124 __u64 unused[6]; 125 } __packed; 126 127 const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7}; 128 129 /** 130 * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information. 131 * 132 * @num_rates: Pointer to a u32 to return the number of sampling rates. 133 * @rates: double u64 pointer to point to an array of sampling rates. 134 * 135 * Stores the number of sampling rates and pointer to the array of 136 * sampling rates in the input pointers. 137 * 138 * Returns: Size of the EU stall sampling rates array. 139 */ 140 size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates) 141 { 142 *num_rates = ARRAY_SIZE(eu_stall_sampling_rates); 143 *rates = eu_stall_sampling_rates; 144 145 return sizeof(eu_stall_sampling_rates); 146 } 147 148 /** 149 * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size. 150 * 151 * Returns: The per XeCore buffer size used to allocate the per GT 152 * EU stall data buffer. 153 */ 154 size_t xe_eu_stall_get_per_xecore_buf_size(void) 155 { 156 return per_xecore_buf_size; 157 } 158 159 /** 160 * xe_eu_stall_data_record_size - get EU stall data record size. 161 * 162 * @xe: Pointer to a Xe device. 163 * 164 * Returns: EU stall data record size. 165 */ 166 size_t xe_eu_stall_data_record_size(struct xe_device *xe) 167 { 168 size_t record_size = 0; 169 170 if (xe->info.platform == XE_PVC) 171 record_size = sizeof(struct xe_eu_stall_data_pvc); 172 else if (GRAPHICS_VER(xe) >= 20) 173 record_size = sizeof(struct xe_eu_stall_data_xe2); 174 175 xe_assert(xe, is_power_of_2(record_size)); 176 177 return record_size; 178 } 179 180 /** 181 * num_data_rows - Return the number of EU stall data rows of 64B each 182 * for a given data size. 183 * 184 * @data_size: EU stall data size 185 */ 186 static u32 num_data_rows(u32 data_size) 187 { 188 return data_size >> 6; 189 } 190 191 static void xe_eu_stall_fini(void *arg) 192 { 193 struct xe_gt *gt = arg; 194 195 destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq); 196 mutex_destroy(>->eu_stall->stream_lock); 197 kfree(gt->eu_stall); 198 } 199 200 /** 201 * xe_eu_stall_init() - Allocate and initialize GT level EU stall data 202 * structure xe_eu_stall_gt within struct xe_gt. 203 * 204 * @gt: GT being initialized. 205 * 206 * Returns: zero on success or a negative error code. 207 */ 208 int xe_eu_stall_init(struct xe_gt *gt) 209 { 210 struct xe_device *xe = gt_to_xe(gt); 211 int ret; 212 213 if (!xe_eu_stall_supported_on_platform(xe)) 214 return 0; 215 216 gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); 217 if (!gt->eu_stall) { 218 ret = -ENOMEM; 219 goto exit; 220 } 221 222 mutex_init(>->eu_stall->stream_lock); 223 224 gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0); 225 if (!gt->eu_stall->buf_ptr_poll_wq) { 226 ret = -ENOMEM; 227 goto exit_free; 228 } 229 230 return devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt); 231 exit_free: 232 mutex_destroy(>->eu_stall->stream_lock); 233 kfree(gt->eu_stall); 234 exit: 235 return ret; 236 } 237 238 static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value, 239 struct eu_stall_open_properties *props) 240 { 241 value = div_u64(value, 251); 242 if (value == 0 || value > 7) { 243 drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value); 244 return -EINVAL; 245 } 246 props->sampling_rate_mult = value; 247 return 0; 248 } 249 250 static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value, 251 struct eu_stall_open_properties *props) 252 { 253 props->wait_num_reports = value; 254 255 return 0; 256 } 257 258 static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, 259 struct eu_stall_open_properties *props) 260 { 261 struct xe_gt *gt = xe_device_get_gt(xe, value); 262 263 if (!gt) { 264 drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); 265 return -EINVAL; 266 } 267 props->gt = gt; 268 return 0; 269 } 270 271 typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value, 272 struct eu_stall_open_properties *props); 273 274 static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = { 275 [DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate, 276 [DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports, 277 [DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id, 278 }; 279 280 static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension, 281 struct eu_stall_open_properties *props) 282 { 283 u64 __user *address = u64_to_user_ptr(extension); 284 struct drm_xe_ext_set_property ext; 285 int err; 286 u32 idx; 287 288 err = copy_from_user(&ext, address, sizeof(ext)); 289 if (XE_IOCTL_DBG(xe, err)) 290 return -EFAULT; 291 292 if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) || 293 XE_IOCTL_DBG(xe, ext.pad)) 294 return -EINVAL; 295 296 idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs)); 297 return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props); 298 } 299 300 typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension, 301 struct eu_stall_open_properties *props); 302 static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = { 303 [DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property, 304 }; 305 306 #define MAX_USER_EXTENSIONS 5 307 static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension, 308 int ext_number, struct eu_stall_open_properties *props) 309 { 310 u64 __user *address = u64_to_user_ptr(extension); 311 struct drm_xe_user_extension ext; 312 int err; 313 u32 idx; 314 315 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 316 return -E2BIG; 317 318 err = copy_from_user(&ext, address, sizeof(ext)); 319 if (XE_IOCTL_DBG(xe, err)) 320 return -EFAULT; 321 322 if (XE_IOCTL_DBG(xe, ext.pad) || 323 XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs))) 324 return -EINVAL; 325 326 idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs)); 327 err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props); 328 if (XE_IOCTL_DBG(xe, err)) 329 return err; 330 331 if (ext.next_extension) 332 return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props); 333 334 return 0; 335 } 336 337 /** 338 * buf_data_size - Calculate the number of bytes in a circular buffer 339 * given the read and write pointers and the size of 340 * the buffer. 341 * 342 * @buf_size: Size of the circular buffer 343 * @read_ptr: Read pointer with an additional overflow bit 344 * @write_ptr: Write pointer with an additional overflow bit 345 * 346 * Since the read and write pointers have an additional overflow bit, 347 * this function calculates the offsets from the pointers and use the 348 * offsets to calculate the data size in the buffer. 349 * 350 * Returns: number of bytes of data in the buffer 351 */ 352 static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr) 353 { 354 u32 read_offset, write_offset, size = 0; 355 356 if (read_ptr == write_ptr) 357 goto exit; 358 359 read_offset = read_ptr & (buf_size - 1); 360 write_offset = write_ptr & (buf_size - 1); 361 362 if (write_offset > read_offset) 363 size = write_offset - read_offset; 364 else 365 size = buf_size - read_offset + write_offset; 366 exit: 367 return size; 368 } 369 370 /** 371 * eu_stall_data_buf_poll - Poll for EU stall data in the buffer. 372 * 373 * @stream: xe EU stall data stream instance 374 * 375 * Returns: true if the EU stall buffer contains minimum stall data as 376 * specified by the event report count, else false. 377 */ 378 static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) 379 { 380 u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0; 381 u32 buf_size = stream->per_xecore_buf_size; 382 struct per_xecore_buf *xecore_buf; 383 struct xe_gt *gt = stream->gt; 384 bool min_data_present = false; 385 u16 group, instance; 386 unsigned int xecore; 387 388 mutex_lock(&stream->xecore_buf_lock); 389 for_each_dss_steering(xecore, gt, group, instance) { 390 xecore_buf = &stream->xecore_buf[xecore]; 391 read_ptr = xecore_buf->read; 392 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, 393 group, instance); 394 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg); 395 write_ptr <<= 6; 396 write_ptr &= ((buf_size << 1) - 1); 397 if (!min_data_present) { 398 total_data += buf_data_size(buf_size, read_ptr, write_ptr); 399 if (num_data_rows(total_data) >= stream->wait_num_reports) 400 min_data_present = true; 401 } 402 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP) 403 set_bit(xecore, stream->data_drop.mask); 404 xecore_buf->write = write_ptr; 405 } 406 mutex_unlock(&stream->xecore_buf_lock); 407 408 return min_data_present; 409 } 410 411 static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance) 412 { 413 struct xe_device *xe = gt_to_xe(gt); 414 u32 write_ptr_reg; 415 416 /* On PVC, the overflow bit has to be cleared by writing 1 to it. 417 * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it. 418 */ 419 if (GRAPHICS_VER(xe) >= 20) 420 write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP); 421 else 422 write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP); 423 424 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance); 425 } 426 427 static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream, 428 char __user *buf, size_t count, 429 size_t *total_data_size, struct xe_gt *gt, 430 u16 group, u16 instance, unsigned int xecore) 431 { 432 size_t read_data_size, copy_size, buf_size; 433 u32 read_ptr_reg, read_ptr, write_ptr; 434 u8 *xecore_start_vaddr, *read_vaddr; 435 struct per_xecore_buf *xecore_buf; 436 u32 read_offset, write_offset; 437 438 /* Hardware increments the read and write pointers such that they can 439 * overflow into one additional bit. For example, a 256KB size buffer 440 * offset pointer needs 18 bits. But HW uses 19 bits for the read and 441 * write pointers. This technique avoids wasting a slot in the buffer. 442 * Read and write offsets are calculated from the pointers in order to 443 * check if the write pointer has wrapped around the array. 444 */ 445 xecore_buf = &stream->xecore_buf[xecore]; 446 xecore_start_vaddr = xecore_buf->vaddr; 447 read_ptr = xecore_buf->read; 448 write_ptr = xecore_buf->write; 449 buf_size = stream->per_xecore_buf_size; 450 451 read_data_size = buf_data_size(buf_size, read_ptr, write_ptr); 452 /* Read only the data that the user space buffer can accommodate */ 453 read_data_size = min_t(size_t, count - *total_data_size, read_data_size); 454 if (read_data_size == 0) 455 goto exit_drop; 456 457 read_offset = read_ptr & (buf_size - 1); 458 write_offset = write_ptr & (buf_size - 1); 459 read_vaddr = xecore_start_vaddr + read_offset; 460 461 if (write_offset > read_offset) { 462 if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size)) 463 return -EFAULT; 464 } else { 465 if (read_data_size >= buf_size - read_offset) 466 copy_size = buf_size - read_offset; 467 else 468 copy_size = read_data_size; 469 if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size)) 470 return -EFAULT; 471 if (copy_to_user(buf + *total_data_size + copy_size, 472 xecore_start_vaddr, read_data_size - copy_size)) 473 return -EFAULT; 474 } 475 476 *total_data_size += read_data_size; 477 read_ptr += read_data_size; 478 479 /* Read pointer can overflow into one additional bit */ 480 read_ptr &= (buf_size << 1) - 1; 481 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6)); 482 read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg); 483 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance); 484 xecore_buf->read = read_ptr; 485 trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr, 486 read_data_size, *total_data_size); 487 exit_drop: 488 /* Clear drop bit (if set) after any data was read or if the buffer was empty. 489 * Drop bit can be set even if the buffer is empty as the buffer may have been emptied 490 * in the previous read() and the data drop bit was set during the previous read(). 491 */ 492 if (test_bit(xecore, stream->data_drop.mask)) { 493 clear_dropped_eviction_line_bit(gt, group, instance); 494 clear_bit(xecore, stream->data_drop.mask); 495 } 496 return 0; 497 } 498 499 /** 500 * xe_eu_stall_stream_read_locked - copy EU stall counters data from the 501 * per xecore buffers to the userspace buffer 502 * @stream: A stream opened for EU stall count metrics 503 * @file: An xe EU stall data stream file 504 * @buf: destination buffer given by userspace 505 * @count: the number of bytes userspace wants to read 506 * 507 * Returns: Number of bytes copied or a negative error code 508 * If we've successfully copied any data then reporting that takes 509 * precedence over any internal error status, so the data isn't lost. 510 */ 511 static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream, 512 struct file *file, char __user *buf, 513 size_t count) 514 { 515 struct xe_gt *gt = stream->gt; 516 size_t total_size = 0; 517 u16 group, instance; 518 unsigned int xecore; 519 int ret = 0; 520 521 mutex_lock(&stream->xecore_buf_lock); 522 if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { 523 if (!stream->data_drop.reported_to_user) { 524 stream->data_drop.reported_to_user = true; 525 xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", 526 XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); 527 mutex_unlock(&stream->xecore_buf_lock); 528 return -EIO; 529 } 530 stream->data_drop.reported_to_user = false; 531 } 532 533 for_each_dss_steering(xecore, gt, group, instance) { 534 ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size, 535 gt, group, instance, xecore); 536 if (ret || count == total_size) 537 break; 538 } 539 mutex_unlock(&stream->xecore_buf_lock); 540 return total_size ?: (ret ?: -EAGAIN); 541 } 542 543 /* 544 * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE 545 * before calling read(). 546 * 547 * Returns: The number of bytes copied or a negative error code on failure. 548 * -EIO if HW drops any EU stall data when the buffer is full. 549 */ 550 static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf, 551 size_t count, loff_t *ppos) 552 { 553 struct xe_eu_stall_data_stream *stream = file->private_data; 554 struct xe_gt *gt = stream->gt; 555 ssize_t ret, aligned_count; 556 557 aligned_count = ALIGN_DOWN(count, stream->data_record_size); 558 if (aligned_count == 0) 559 return -EINVAL; 560 561 if (!stream->enabled) { 562 xe_gt_dbg(gt, "EU stall data stream not enabled to read\n"); 563 return -EINVAL; 564 } 565 566 if (!(file->f_flags & O_NONBLOCK)) { 567 do { 568 ret = wait_event_interruptible(stream->poll_wq, stream->pollin); 569 if (ret) 570 return -EINTR; 571 572 mutex_lock(>->eu_stall->stream_lock); 573 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count); 574 mutex_unlock(>->eu_stall->stream_lock); 575 } while (ret == -EAGAIN); 576 } else { 577 mutex_lock(>->eu_stall->stream_lock); 578 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count); 579 mutex_unlock(>->eu_stall->stream_lock); 580 } 581 582 /* 583 * This may not work correctly if the user buffer is very small. 584 * We don't want to block the next read() when there is data in the buffer 585 * now, but couldn't be accommodated in the small user buffer. 586 */ 587 stream->pollin = false; 588 589 return ret; 590 } 591 592 static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) 593 { 594 struct xe_gt *gt = stream->gt; 595 596 mutex_destroy(&stream->xecore_buf_lock); 597 gt->eu_stall->stream = NULL; 598 kfree(stream); 599 } 600 601 static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream) 602 { 603 xe_bo_unpin_map_no_vm(stream->bo); 604 kfree(stream->xecore_buf); 605 } 606 607 static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream, 608 u16 last_xecore) 609 { 610 struct xe_tile *tile = stream->gt->tile; 611 struct xe_bo *bo; 612 u32 size; 613 614 stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL); 615 if (!stream->xecore_buf) 616 return -ENOMEM; 617 618 size = stream->per_xecore_buf_size * last_xecore; 619 620 bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel, 621 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false); 622 if (IS_ERR(bo)) { 623 kfree(stream->xecore_buf); 624 return PTR_ERR(bo); 625 } 626 627 XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64)); 628 stream->bo = bo; 629 630 return 0; 631 } 632 633 static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream) 634 { 635 u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value; 636 struct per_xecore_buf *xecore_buf; 637 struct xe_gt *gt = stream->gt; 638 u16 group, instance; 639 unsigned int fw_ref; 640 int xecore; 641 642 /* Take runtime pm ref and forcewake to disable RC6 */ 643 xe_pm_runtime_get(gt_to_xe(gt)); 644 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); 645 if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) { 646 xe_gt_err(gt, "Failed to get RENDER forcewake\n"); 647 xe_pm_runtime_put(gt_to_xe(gt)); 648 return -ETIMEDOUT; 649 } 650 651 if (XE_GT_WA(gt, 22016596838)) 652 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, 653 _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); 654 655 for_each_dss_steering(xecore, gt, group, instance) { 656 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance); 657 /* Clear any drop bits set and not cleared in the previous session. */ 658 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP) 659 clear_dropped_eviction_line_bit(gt, group, instance); 660 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg); 661 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr); 662 read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg); 663 /* Initialize the read pointer to the write pointer */ 664 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance); 665 write_ptr <<= 6; 666 write_ptr &= (stream->per_xecore_buf_size << 1) - 1; 667 xecore_buf = &stream->xecore_buf[xecore]; 668 xecore_buf->write = write_ptr; 669 xecore_buf->read = write_ptr; 670 } 671 stream->data_drop.reported_to_user = false; 672 bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS); 673 674 reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE, 675 REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) | 676 REG_FIELD_PREP(EUSTALL_SAMPLE_RATE, 677 stream->sampling_rate_mult)); 678 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value); 679 /* GGTT addresses can never be > 32 bits */ 680 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0); 681 reg_value = xe_bo_ggtt_addr(stream->bo); 682 reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ, 683 stream->per_xecore_buf_size / SZ_256K); 684 reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING; 685 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value); 686 687 return 0; 688 } 689 690 static void eu_stall_data_buf_poll_work_fn(struct work_struct *work) 691 { 692 struct xe_eu_stall_data_stream *stream = 693 container_of(work, typeof(*stream), buf_poll_work.work); 694 struct xe_gt *gt = stream->gt; 695 696 if (eu_stall_data_buf_poll(stream)) { 697 stream->pollin = true; 698 wake_up(&stream->poll_wq); 699 } 700 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, 701 &stream->buf_poll_work, 702 msecs_to_jiffies(POLL_PERIOD_MS)); 703 } 704 705 static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, 706 struct eu_stall_open_properties *props) 707 { 708 unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores; 709 struct per_xecore_buf *xecore_buf; 710 struct xe_gt *gt = stream->gt; 711 xe_dss_mask_t all_xecores; 712 u16 group, instance; 713 u32 vaddr_offset; 714 int ret; 715 716 bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, 717 XE_MAX_DSS_FUSE_BITS); 718 num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS); 719 last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1; 720 721 max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores); 722 if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) { 723 xe_gt_dbg(gt, "Invalid EU stall event report count %u\n", 724 props->wait_num_reports); 725 xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n", 726 max_wait_num_reports); 727 return -EINVAL; 728 } 729 730 init_waitqueue_head(&stream->poll_wq); 731 mutex_init(&stream->xecore_buf_lock); 732 INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); 733 stream->per_xecore_buf_size = per_xecore_buf_size; 734 stream->sampling_rate_mult = props->sampling_rate_mult; 735 stream->wait_num_reports = props->wait_num_reports; 736 stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt)); 737 738 ret = xe_eu_stall_data_buf_alloc(stream, last_xecore); 739 if (ret) 740 return ret; 741 742 for_each_dss_steering(xecore, gt, group, instance) { 743 xecore_buf = &stream->xecore_buf[xecore]; 744 vaddr_offset = xecore * stream->per_xecore_buf_size; 745 xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset; 746 } 747 return 0; 748 } 749 750 static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream, 751 struct file *file, poll_table *wait) 752 { 753 __poll_t events = 0; 754 755 poll_wait(file, &stream->poll_wq, wait); 756 757 if (stream->pollin) 758 events |= EPOLLIN; 759 760 return events; 761 } 762 763 static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait) 764 { 765 struct xe_eu_stall_data_stream *stream = file->private_data; 766 struct xe_gt *gt = stream->gt; 767 __poll_t ret; 768 769 mutex_lock(>->eu_stall->stream_lock); 770 ret = xe_eu_stall_stream_poll_locked(stream, file, wait); 771 mutex_unlock(>->eu_stall->stream_lock); 772 773 return ret; 774 } 775 776 static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream) 777 { 778 struct xe_gt *gt = stream->gt; 779 int ret = 0; 780 781 if (stream->enabled) 782 return ret; 783 784 stream->enabled = true; 785 786 ret = xe_eu_stall_stream_enable(stream); 787 788 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, 789 &stream->buf_poll_work, 790 msecs_to_jiffies(POLL_PERIOD_MS)); 791 return ret; 792 } 793 794 static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream) 795 { 796 struct xe_gt *gt = stream->gt; 797 798 if (!stream->enabled) 799 return 0; 800 801 stream->enabled = false; 802 803 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0); 804 805 cancel_delayed_work_sync(&stream->buf_poll_work); 806 807 if (XE_GT_WA(gt, 22016596838)) 808 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, 809 _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); 810 811 xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER); 812 xe_pm_runtime_put(gt_to_xe(gt)); 813 814 return 0; 815 } 816 817 static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream, 818 unsigned int cmd, unsigned long arg) 819 { 820 switch (cmd) { 821 case DRM_XE_OBSERVATION_IOCTL_ENABLE: 822 return xe_eu_stall_enable_locked(stream); 823 case DRM_XE_OBSERVATION_IOCTL_DISABLE: 824 return xe_eu_stall_disable_locked(stream); 825 } 826 827 return -EINVAL; 828 } 829 830 static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 831 { 832 struct xe_eu_stall_data_stream *stream = file->private_data; 833 struct xe_gt *gt = stream->gt; 834 long ret; 835 836 mutex_lock(>->eu_stall->stream_lock); 837 ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg); 838 mutex_unlock(>->eu_stall->stream_lock); 839 840 return ret; 841 } 842 843 static int xe_eu_stall_stream_close(struct inode *inode, struct file *file) 844 { 845 struct xe_eu_stall_data_stream *stream = file->private_data; 846 struct xe_gt *gt = stream->gt; 847 848 drm_dev_put(>->tile->xe->drm); 849 850 mutex_lock(>->eu_stall->stream_lock); 851 xe_eu_stall_disable_locked(stream); 852 xe_eu_stall_data_buf_destroy(stream); 853 xe_eu_stall_stream_free(stream); 854 mutex_unlock(>->eu_stall->stream_lock); 855 856 return 0; 857 } 858 859 static const struct file_operations fops_eu_stall = { 860 .owner = THIS_MODULE, 861 .llseek = noop_llseek, 862 .release = xe_eu_stall_stream_close, 863 .poll = xe_eu_stall_stream_poll, 864 .read = xe_eu_stall_stream_read, 865 .unlocked_ioctl = xe_eu_stall_stream_ioctl, 866 .compat_ioctl = xe_eu_stall_stream_ioctl, 867 }; 868 869 static int xe_eu_stall_stream_open_locked(struct drm_device *dev, 870 struct eu_stall_open_properties *props, 871 struct drm_file *file) 872 { 873 struct xe_eu_stall_data_stream *stream; 874 struct xe_gt *gt = props->gt; 875 unsigned long f_flags = 0; 876 int ret, stream_fd; 877 878 /* Only one session can be active at any time */ 879 if (gt->eu_stall->stream) { 880 xe_gt_dbg(gt, "EU stall sampling session already active\n"); 881 return -EBUSY; 882 } 883 884 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 885 if (!stream) 886 return -ENOMEM; 887 888 gt->eu_stall->stream = stream; 889 stream->gt = gt; 890 891 ret = xe_eu_stall_stream_init(stream, props); 892 if (ret) { 893 xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret); 894 goto err_free; 895 } 896 897 stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags); 898 if (stream_fd < 0) { 899 ret = stream_fd; 900 xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret); 901 goto err_destroy; 902 } 903 904 /* Take a reference on the driver that will be kept with stream_fd 905 * until its release. 906 */ 907 drm_dev_get(>->tile->xe->drm); 908 909 return stream_fd; 910 911 err_destroy: 912 xe_eu_stall_data_buf_destroy(stream); 913 err_free: 914 xe_eu_stall_stream_free(stream); 915 return ret; 916 } 917 918 /** 919 * xe_eu_stall_stream_open - Open a xe EU stall data stream fd 920 * 921 * @dev: DRM device pointer 922 * @data: pointer to first struct @drm_xe_ext_set_property in 923 * the chain of input properties from the user space. 924 * @file: DRM file pointer 925 * 926 * This function opens a EU stall data stream with input properties from 927 * the user space. 928 * 929 * Returns: EU stall data stream fd on success or a negative error code. 930 */ 931 int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file) 932 { 933 struct xe_device *xe = to_xe_device(dev); 934 struct eu_stall_open_properties props = {}; 935 int ret; 936 937 if (!xe_eu_stall_supported_on_platform(xe)) { 938 drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n"); 939 return -ENODEV; 940 } 941 942 if (xe_observation_paranoid && !perfmon_capable()) { 943 drm_dbg(&xe->drm, "Insufficient privileges for EU stall monitoring\n"); 944 return -EACCES; 945 } 946 947 /* Initialize and set default values */ 948 props.wait_num_reports = 1; 949 props.sampling_rate_mult = 4; 950 951 ret = xe_eu_stall_user_extensions(xe, data, 0, &props); 952 if (ret) 953 return ret; 954 955 if (!props.gt) { 956 drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n"); 957 return -EINVAL; 958 } 959 960 mutex_lock(&props.gt->eu_stall->stream_lock); 961 ret = xe_eu_stall_stream_open_locked(dev, &props, file); 962 mutex_unlock(&props.gt->eu_stall->stream_lock); 963 964 return ret; 965 } 966