1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include <linux/anon_inodes.h> 7 #include <linux/fs.h> 8 #include <linux/poll.h> 9 #include <linux/types.h> 10 11 #include <drm/drm_drv.h> 12 #include <generated/xe_wa_oob.h> 13 #include <uapi/drm/xe_drm.h> 14 15 #include "xe_bo.h" 16 #include "xe_device.h" 17 #include "xe_eu_stall.h" 18 #include "xe_force_wake.h" 19 #include "xe_gt_mcr.h" 20 #include "xe_gt_printk.h" 21 #include "xe_gt_topology.h" 22 #include "xe_macros.h" 23 #include "xe_observation.h" 24 #include "xe_pm.h" 25 #include "xe_trace.h" 26 #include "xe_wa.h" 27 28 #include "regs/xe_eu_stall_regs.h" 29 #include "regs/xe_gt_regs.h" 30 31 #define POLL_PERIOD_MS 5 32 33 static size_t per_xecore_buf_size = SZ_512K; 34 35 struct per_xecore_buf { 36 /* Buffer vaddr */ 37 u8 *vaddr; 38 /* Write pointer */ 39 u32 write; 40 /* Read pointer */ 41 u32 read; 42 }; 43 44 struct xe_eu_stall_data_stream { 45 bool pollin; 46 bool enabled; 47 int wait_num_reports; 48 int sampling_rate_mult; 49 wait_queue_head_t poll_wq; 50 size_t data_record_size; 51 size_t per_xecore_buf_size; 52 53 struct xe_gt *gt; 54 struct xe_bo *bo; 55 struct per_xecore_buf *xecore_buf; 56 struct { 57 bool reported_to_user; 58 xe_dss_mask_t mask; 59 } data_drop; 60 struct delayed_work buf_poll_work; 61 }; 62 63 struct xe_eu_stall_gt { 64 /* Lock to protect stream */ 65 struct mutex stream_lock; 66 /* EU stall data stream */ 67 struct xe_eu_stall_data_stream *stream; 68 /* Workqueue to schedule buffer pointers polling work */ 69 struct workqueue_struct *buf_ptr_poll_wq; 70 }; 71 72 /** 73 * struct eu_stall_open_properties - EU stall sampling properties received 74 * from user space at open. 75 * @sampling_rate_mult: EU stall sampling rate multiplier. 76 * HW will sample every (sampling_rate_mult x 251) cycles. 77 * @wait_num_reports: Minimum number of EU stall data reports to unblock poll(). 78 * @gt: GT on which EU stall data will be captured. 79 */ 80 struct eu_stall_open_properties { 81 int sampling_rate_mult; 82 int wait_num_reports; 83 struct xe_gt *gt; 84 }; 85 86 /* 87 * EU stall data format for PVC 88 */ 89 struct xe_eu_stall_data_pvc { 90 __u64 ip_addr:29; /* Bits 0 to 28 */ 91 __u64 active_count:8; /* Bits 29 to 36 */ 92 __u64 other_count:8; /* Bits 37 to 44 */ 93 __u64 control_count:8; /* Bits 45 to 52 */ 94 __u64 pipestall_count:8; /* Bits 53 to 60 */ 95 __u64 send_count:8; /* Bits 61 to 68 */ 96 __u64 dist_acc_count:8; /* Bits 69 to 76 */ 97 __u64 sbid_count:8; /* Bits 77 to 84 */ 98 __u64 sync_count:8; /* Bits 85 to 92 */ 99 __u64 inst_fetch_count:8; /* Bits 93 to 100 */ 100 __u64 unused_bits:27; 101 __u64 unused[6]; 102 } __packed; 103 104 /* 105 * EU stall data format for Xe2 arch GPUs (LNL, BMG). 106 */ 107 struct xe_eu_stall_data_xe2 { 108 __u64 ip_addr:29; /* Bits 0 to 28 */ 109 __u64 tdr_count:8; /* Bits 29 to 36 */ 110 __u64 other_count:8; /* Bits 37 to 44 */ 111 __u64 control_count:8; /* Bits 45 to 52 */ 112 __u64 pipestall_count:8; /* Bits 53 to 60 */ 113 __u64 send_count:8; /* Bits 61 to 68 */ 114 __u64 dist_acc_count:8; /* Bits 69 to 76 */ 115 __u64 sbid_count:8; /* Bits 77 to 84 */ 116 __u64 sync_count:8; /* Bits 85 to 92 */ 117 __u64 inst_fetch_count:8; /* Bits 93 to 100 */ 118 __u64 active_count:8; /* Bits 101 to 108 */ 119 __u64 ex_id:3; /* Bits 109 to 111 */ 120 __u64 end_flag:1; /* Bit 112 */ 121 __u64 unused_bits:15; 122 __u64 unused[6]; 123 } __packed; 124 125 const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7}; 126 127 /** 128 * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information. 129 * 130 * @num_rates: Pointer to a u32 to return the number of sampling rates. 131 * @rates: double u64 pointer to point to an array of sampling rates. 132 * 133 * Stores the number of sampling rates and pointer to the array of 134 * sampling rates in the input pointers. 135 * 136 * Returns: Size of the EU stall sampling rates array. 137 */ 138 size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates) 139 { 140 *num_rates = ARRAY_SIZE(eu_stall_sampling_rates); 141 *rates = eu_stall_sampling_rates; 142 143 return sizeof(eu_stall_sampling_rates); 144 } 145 146 /** 147 * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size. 148 * 149 * Returns: The per XeCore buffer size used to allocate the per GT 150 * EU stall data buffer. 151 */ 152 size_t xe_eu_stall_get_per_xecore_buf_size(void) 153 { 154 return per_xecore_buf_size; 155 } 156 157 /** 158 * xe_eu_stall_data_record_size - get EU stall data record size. 159 * 160 * @xe: Pointer to a Xe device. 161 * 162 * Returns: EU stall data record size. 163 */ 164 size_t xe_eu_stall_data_record_size(struct xe_device *xe) 165 { 166 size_t record_size = 0; 167 168 if (xe->info.platform == XE_PVC) 169 record_size = sizeof(struct xe_eu_stall_data_pvc); 170 else if (GRAPHICS_VER(xe) >= 20) 171 record_size = sizeof(struct xe_eu_stall_data_xe2); 172 173 xe_assert(xe, is_power_of_2(record_size)); 174 175 return record_size; 176 } 177 178 /** 179 * num_data_rows - Return the number of EU stall data rows of 64B each 180 * for a given data size. 181 * 182 * @data_size: EU stall data size 183 */ 184 static u32 num_data_rows(u32 data_size) 185 { 186 return data_size >> 6; 187 } 188 189 static void xe_eu_stall_fini(void *arg) 190 { 191 struct xe_gt *gt = arg; 192 193 destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq); 194 mutex_destroy(>->eu_stall->stream_lock); 195 kfree(gt->eu_stall); 196 } 197 198 /** 199 * xe_eu_stall_init() - Allocate and initialize GT level EU stall data 200 * structure xe_eu_stall_gt within struct xe_gt. 201 * 202 * @gt: GT being initialized. 203 * 204 * Returns: zero on success or a negative error code. 205 */ 206 int xe_eu_stall_init(struct xe_gt *gt) 207 { 208 struct xe_device *xe = gt_to_xe(gt); 209 int ret; 210 211 gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); 212 if (!gt->eu_stall) { 213 ret = -ENOMEM; 214 goto exit; 215 } 216 217 mutex_init(>->eu_stall->stream_lock); 218 219 gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0); 220 if (!gt->eu_stall->buf_ptr_poll_wq) { 221 ret = -ENOMEM; 222 goto exit_free; 223 } 224 225 ret = devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt); 226 if (ret) 227 goto exit_destroy; 228 229 return 0; 230 exit_destroy: 231 destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq); 232 exit_free: 233 mutex_destroy(>->eu_stall->stream_lock); 234 kfree(gt->eu_stall); 235 exit: 236 return ret; 237 } 238 239 static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value, 240 struct eu_stall_open_properties *props) 241 { 242 value = div_u64(value, 251); 243 if (value == 0 || value > 7) { 244 drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value); 245 return -EINVAL; 246 } 247 props->sampling_rate_mult = value; 248 return 0; 249 } 250 251 static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value, 252 struct eu_stall_open_properties *props) 253 { 254 props->wait_num_reports = value; 255 256 return 0; 257 } 258 259 static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, 260 struct eu_stall_open_properties *props) 261 { 262 if (value >= xe->info.gt_count) { 263 drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); 264 return -EINVAL; 265 } 266 props->gt = xe_device_get_gt(xe, value); 267 return 0; 268 } 269 270 typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value, 271 struct eu_stall_open_properties *props); 272 273 static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = { 274 [DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate, 275 [DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports, 276 [DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id, 277 }; 278 279 static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension, 280 struct eu_stall_open_properties *props) 281 { 282 u64 __user *address = u64_to_user_ptr(extension); 283 struct drm_xe_ext_set_property ext; 284 int err; 285 u32 idx; 286 287 err = __copy_from_user(&ext, address, sizeof(ext)); 288 if (XE_IOCTL_DBG(xe, err)) 289 return -EFAULT; 290 291 if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) || 292 XE_IOCTL_DBG(xe, ext.pad)) 293 return -EINVAL; 294 295 idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs)); 296 return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props); 297 } 298 299 typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension, 300 struct eu_stall_open_properties *props); 301 static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = { 302 [DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property, 303 }; 304 305 #define MAX_USER_EXTENSIONS 5 306 static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension, 307 int ext_number, struct eu_stall_open_properties *props) 308 { 309 u64 __user *address = u64_to_user_ptr(extension); 310 struct drm_xe_user_extension ext; 311 int err; 312 u32 idx; 313 314 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 315 return -E2BIG; 316 317 err = __copy_from_user(&ext, address, sizeof(ext)); 318 if (XE_IOCTL_DBG(xe, err)) 319 return -EFAULT; 320 321 if (XE_IOCTL_DBG(xe, ext.pad) || 322 XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs))) 323 return -EINVAL; 324 325 idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs)); 326 err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props); 327 if (XE_IOCTL_DBG(xe, err)) 328 return err; 329 330 if (ext.next_extension) 331 return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props); 332 333 return 0; 334 } 335 336 /** 337 * buf_data_size - Calculate the number of bytes in a circular buffer 338 * given the read and write pointers and the size of 339 * the buffer. 340 * 341 * @buf_size: Size of the circular buffer 342 * @read_ptr: Read pointer with an additional overflow bit 343 * @write_ptr: Write pointer with an additional overflow bit 344 * 345 * Since the read and write pointers have an additional overflow bit, 346 * this function calculates the offsets from the pointers and use the 347 * offsets to calculate the data size in the buffer. 348 * 349 * Returns: number of bytes of data in the buffer 350 */ 351 static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr) 352 { 353 u32 read_offset, write_offset, size = 0; 354 355 if (read_ptr == write_ptr) 356 goto exit; 357 358 read_offset = read_ptr & (buf_size - 1); 359 write_offset = write_ptr & (buf_size - 1); 360 361 if (write_offset > read_offset) 362 size = write_offset - read_offset; 363 else 364 size = buf_size - read_offset + write_offset; 365 exit: 366 return size; 367 } 368 369 /** 370 * eu_stall_data_buf_poll - Poll for EU stall data in the buffer. 371 * 372 * @stream: xe EU stall data stream instance 373 * 374 * Returns: true if the EU stall buffer contains minimum stall data as 375 * specified by the event report count, else false. 376 */ 377 static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) 378 { 379 u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0; 380 u32 buf_size = stream->per_xecore_buf_size; 381 struct per_xecore_buf *xecore_buf; 382 struct xe_gt *gt = stream->gt; 383 bool min_data_present = false; 384 u16 group, instance; 385 unsigned int xecore; 386 387 mutex_lock(>->eu_stall->stream_lock); 388 for_each_dss_steering(xecore, gt, group, instance) { 389 xecore_buf = &stream->xecore_buf[xecore]; 390 read_ptr = xecore_buf->read; 391 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, 392 group, instance); 393 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg); 394 write_ptr <<= 6; 395 write_ptr &= ((buf_size << 1) - 1); 396 if (!min_data_present) { 397 total_data += buf_data_size(buf_size, read_ptr, write_ptr); 398 if (num_data_rows(total_data) >= stream->wait_num_reports) 399 min_data_present = true; 400 } 401 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP) 402 set_bit(xecore, stream->data_drop.mask); 403 xecore_buf->write = write_ptr; 404 } 405 mutex_unlock(>->eu_stall->stream_lock); 406 407 return min_data_present; 408 } 409 410 static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance) 411 { 412 struct xe_device *xe = gt_to_xe(gt); 413 u32 write_ptr_reg; 414 415 /* On PVC, the overflow bit has to be cleared by writing 1 to it. 416 * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it. 417 */ 418 if (GRAPHICS_VER(xe) >= 20) 419 write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP); 420 else 421 write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP); 422 423 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance); 424 } 425 426 static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream, 427 char __user *buf, size_t count, 428 size_t *total_data_size, struct xe_gt *gt, 429 u16 group, u16 instance, unsigned int xecore) 430 { 431 size_t read_data_size, copy_size, buf_size; 432 u32 read_ptr_reg, read_ptr, write_ptr; 433 u8 *xecore_start_vaddr, *read_vaddr; 434 struct per_xecore_buf *xecore_buf; 435 u32 read_offset, write_offset; 436 437 /* Hardware increments the read and write pointers such that they can 438 * overflow into one additional bit. For example, a 256KB size buffer 439 * offset pointer needs 18 bits. But HW uses 19 bits for the read and 440 * write pointers. This technique avoids wasting a slot in the buffer. 441 * Read and write offsets are calculated from the pointers in order to 442 * check if the write pointer has wrapped around the array. 443 */ 444 xecore_buf = &stream->xecore_buf[xecore]; 445 xecore_start_vaddr = xecore_buf->vaddr; 446 read_ptr = xecore_buf->read; 447 write_ptr = xecore_buf->write; 448 buf_size = stream->per_xecore_buf_size; 449 450 read_data_size = buf_data_size(buf_size, read_ptr, write_ptr); 451 /* Read only the data that the user space buffer can accommodate */ 452 read_data_size = min_t(size_t, count - *total_data_size, read_data_size); 453 if (read_data_size == 0) 454 goto exit_drop; 455 456 read_offset = read_ptr & (buf_size - 1); 457 write_offset = write_ptr & (buf_size - 1); 458 read_vaddr = xecore_start_vaddr + read_offset; 459 460 if (write_offset > read_offset) { 461 if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size)) 462 return -EFAULT; 463 } else { 464 if (read_data_size >= buf_size - read_offset) 465 copy_size = buf_size - read_offset; 466 else 467 copy_size = read_data_size; 468 if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size)) 469 return -EFAULT; 470 if (copy_to_user(buf + *total_data_size + copy_size, 471 xecore_start_vaddr, read_data_size - copy_size)) 472 return -EFAULT; 473 } 474 475 *total_data_size += read_data_size; 476 read_ptr += read_data_size; 477 478 /* Read pointer can overflow into one additional bit */ 479 read_ptr &= (buf_size << 1) - 1; 480 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6)); 481 read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg); 482 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance); 483 xecore_buf->read = read_ptr; 484 trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr, 485 read_data_size, *total_data_size); 486 exit_drop: 487 /* Clear drop bit (if set) after any data was read or if the buffer was empty. 488 * Drop bit can be set even if the buffer is empty as the buffer may have been emptied 489 * in the previous read() and the data drop bit was set during the previous read(). 490 */ 491 if (test_bit(xecore, stream->data_drop.mask)) { 492 clear_dropped_eviction_line_bit(gt, group, instance); 493 clear_bit(xecore, stream->data_drop.mask); 494 } 495 return 0; 496 } 497 498 /** 499 * xe_eu_stall_stream_read_locked - copy EU stall counters data from the 500 * per xecore buffers to the userspace buffer 501 * @stream: A stream opened for EU stall count metrics 502 * @file: An xe EU stall data stream file 503 * @buf: destination buffer given by userspace 504 * @count: the number of bytes userspace wants to read 505 * 506 * Returns: Number of bytes copied or a negative error code 507 * If we've successfully copied any data then reporting that takes 508 * precedence over any internal error status, so the data isn't lost. 509 */ 510 static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream, 511 struct file *file, char __user *buf, 512 size_t count) 513 { 514 struct xe_gt *gt = stream->gt; 515 size_t total_size = 0; 516 u16 group, instance; 517 unsigned int xecore; 518 int ret = 0; 519 520 if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { 521 if (!stream->data_drop.reported_to_user) { 522 stream->data_drop.reported_to_user = true; 523 xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", 524 XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); 525 return -EIO; 526 } 527 stream->data_drop.reported_to_user = false; 528 } 529 530 for_each_dss_steering(xecore, gt, group, instance) { 531 ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size, 532 gt, group, instance, xecore); 533 if (ret || count == total_size) 534 break; 535 } 536 return total_size ?: (ret ?: -EAGAIN); 537 } 538 539 /* 540 * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE 541 * before calling read(). 542 * 543 * Returns: The number of bytes copied or a negative error code on failure. 544 * -EIO if HW drops any EU stall data when the buffer is full. 545 */ 546 static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf, 547 size_t count, loff_t *ppos) 548 { 549 struct xe_eu_stall_data_stream *stream = file->private_data; 550 struct xe_gt *gt = stream->gt; 551 ssize_t ret, aligned_count; 552 553 aligned_count = ALIGN_DOWN(count, stream->data_record_size); 554 if (aligned_count == 0) 555 return -EINVAL; 556 557 if (!stream->enabled) { 558 xe_gt_dbg(gt, "EU stall data stream not enabled to read\n"); 559 return -EINVAL; 560 } 561 562 if (!(file->f_flags & O_NONBLOCK)) { 563 do { 564 ret = wait_event_interruptible(stream->poll_wq, stream->pollin); 565 if (ret) 566 return -EINTR; 567 568 mutex_lock(>->eu_stall->stream_lock); 569 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count); 570 mutex_unlock(>->eu_stall->stream_lock); 571 } while (ret == -EAGAIN); 572 } else { 573 mutex_lock(>->eu_stall->stream_lock); 574 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count); 575 mutex_unlock(>->eu_stall->stream_lock); 576 } 577 578 /* 579 * This may not work correctly if the user buffer is very small. 580 * We don't want to block the next read() when there is data in the buffer 581 * now, but couldn't be accommodated in the small user buffer. 582 */ 583 stream->pollin = false; 584 585 return ret; 586 } 587 588 static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) 589 { 590 struct xe_gt *gt = stream->gt; 591 592 gt->eu_stall->stream = NULL; 593 kfree(stream); 594 } 595 596 static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream) 597 { 598 xe_bo_unpin_map_no_vm(stream->bo); 599 kfree(stream->xecore_buf); 600 } 601 602 static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream, 603 u16 last_xecore) 604 { 605 struct xe_tile *tile = stream->gt->tile; 606 struct xe_bo *bo; 607 u32 size; 608 609 stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL); 610 if (!stream->xecore_buf) 611 return -ENOMEM; 612 613 size = stream->per_xecore_buf_size * last_xecore; 614 615 bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL, 616 size, ~0ull, ttm_bo_type_kernel, 617 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64); 618 if (IS_ERR(bo)) { 619 kfree(stream->xecore_buf); 620 return PTR_ERR(bo); 621 } 622 623 XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64)); 624 stream->bo = bo; 625 626 return 0; 627 } 628 629 static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream) 630 { 631 u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value; 632 struct per_xecore_buf *xecore_buf; 633 struct xe_gt *gt = stream->gt; 634 u16 group, instance; 635 unsigned int fw_ref; 636 int xecore; 637 638 /* Take runtime pm ref and forcewake to disable RC6 */ 639 xe_pm_runtime_get(gt_to_xe(gt)); 640 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); 641 if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) { 642 xe_gt_err(gt, "Failed to get RENDER forcewake\n"); 643 xe_pm_runtime_put(gt_to_xe(gt)); 644 return -ETIMEDOUT; 645 } 646 647 if (XE_WA(gt, 22016596838)) 648 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, 649 _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); 650 651 for_each_dss_steering(xecore, gt, group, instance) { 652 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance); 653 /* Clear any drop bits set and not cleared in the previous session. */ 654 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP) 655 clear_dropped_eviction_line_bit(gt, group, instance); 656 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg); 657 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr); 658 read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg); 659 /* Initialize the read pointer to the write pointer */ 660 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance); 661 write_ptr <<= 6; 662 write_ptr &= (stream->per_xecore_buf_size << 1) - 1; 663 xecore_buf = &stream->xecore_buf[xecore]; 664 xecore_buf->write = write_ptr; 665 xecore_buf->read = write_ptr; 666 } 667 stream->data_drop.reported_to_user = false; 668 bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS); 669 670 reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE, 671 REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) | 672 REG_FIELD_PREP(EUSTALL_SAMPLE_RATE, 673 stream->sampling_rate_mult)); 674 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value); 675 /* GGTT addresses can never be > 32 bits */ 676 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0); 677 reg_value = xe_bo_ggtt_addr(stream->bo); 678 reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ, 679 stream->per_xecore_buf_size / SZ_256K); 680 reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING; 681 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value); 682 683 return 0; 684 } 685 686 static void eu_stall_data_buf_poll_work_fn(struct work_struct *work) 687 { 688 struct xe_eu_stall_data_stream *stream = 689 container_of(work, typeof(*stream), buf_poll_work.work); 690 struct xe_gt *gt = stream->gt; 691 692 if (eu_stall_data_buf_poll(stream)) { 693 stream->pollin = true; 694 wake_up(&stream->poll_wq); 695 } 696 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, 697 &stream->buf_poll_work, 698 msecs_to_jiffies(POLL_PERIOD_MS)); 699 } 700 701 static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, 702 struct eu_stall_open_properties *props) 703 { 704 unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores; 705 struct per_xecore_buf *xecore_buf; 706 struct xe_gt *gt = stream->gt; 707 xe_dss_mask_t all_xecores; 708 u16 group, instance; 709 u32 vaddr_offset; 710 int ret; 711 712 bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, 713 XE_MAX_DSS_FUSE_BITS); 714 num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS); 715 last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1; 716 717 max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores); 718 if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) { 719 xe_gt_dbg(gt, "Invalid EU stall event report count %u\n", 720 props->wait_num_reports); 721 xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n", 722 max_wait_num_reports); 723 return -EINVAL; 724 } 725 726 init_waitqueue_head(&stream->poll_wq); 727 INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); 728 stream->per_xecore_buf_size = per_xecore_buf_size; 729 stream->sampling_rate_mult = props->sampling_rate_mult; 730 stream->wait_num_reports = props->wait_num_reports; 731 stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt)); 732 733 ret = xe_eu_stall_data_buf_alloc(stream, last_xecore); 734 if (ret) 735 return ret; 736 737 for_each_dss_steering(xecore, gt, group, instance) { 738 xecore_buf = &stream->xecore_buf[xecore]; 739 vaddr_offset = xecore * stream->per_xecore_buf_size; 740 xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset; 741 } 742 return 0; 743 } 744 745 static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream, 746 struct file *file, poll_table *wait) 747 { 748 __poll_t events = 0; 749 750 poll_wait(file, &stream->poll_wq, wait); 751 752 if (stream->pollin) 753 events |= EPOLLIN; 754 755 return events; 756 } 757 758 static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait) 759 { 760 struct xe_eu_stall_data_stream *stream = file->private_data; 761 struct xe_gt *gt = stream->gt; 762 __poll_t ret; 763 764 mutex_lock(>->eu_stall->stream_lock); 765 ret = xe_eu_stall_stream_poll_locked(stream, file, wait); 766 mutex_unlock(>->eu_stall->stream_lock); 767 768 return ret; 769 } 770 771 static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream) 772 { 773 struct xe_gt *gt = stream->gt; 774 int ret = 0; 775 776 if (stream->enabled) 777 return ret; 778 779 stream->enabled = true; 780 781 ret = xe_eu_stall_stream_enable(stream); 782 783 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, 784 &stream->buf_poll_work, 785 msecs_to_jiffies(POLL_PERIOD_MS)); 786 return ret; 787 } 788 789 static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream) 790 { 791 struct xe_gt *gt = stream->gt; 792 793 if (!stream->enabled) 794 return 0; 795 796 stream->enabled = false; 797 798 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0); 799 800 cancel_delayed_work_sync(&stream->buf_poll_work); 801 802 if (XE_WA(gt, 22016596838)) 803 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, 804 _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); 805 806 xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER); 807 xe_pm_runtime_put(gt_to_xe(gt)); 808 809 return 0; 810 } 811 812 static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream, 813 unsigned int cmd, unsigned long arg) 814 { 815 switch (cmd) { 816 case DRM_XE_OBSERVATION_IOCTL_ENABLE: 817 return xe_eu_stall_enable_locked(stream); 818 case DRM_XE_OBSERVATION_IOCTL_DISABLE: 819 return xe_eu_stall_disable_locked(stream); 820 } 821 822 return -EINVAL; 823 } 824 825 static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 826 { 827 struct xe_eu_stall_data_stream *stream = file->private_data; 828 struct xe_gt *gt = stream->gt; 829 long ret; 830 831 mutex_lock(>->eu_stall->stream_lock); 832 ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg); 833 mutex_unlock(>->eu_stall->stream_lock); 834 835 return ret; 836 } 837 838 static int xe_eu_stall_stream_close(struct inode *inode, struct file *file) 839 { 840 struct xe_eu_stall_data_stream *stream = file->private_data; 841 struct xe_gt *gt = stream->gt; 842 843 drm_dev_put(>->tile->xe->drm); 844 845 mutex_lock(>->eu_stall->stream_lock); 846 xe_eu_stall_disable_locked(stream); 847 xe_eu_stall_data_buf_destroy(stream); 848 xe_eu_stall_stream_free(stream); 849 mutex_unlock(>->eu_stall->stream_lock); 850 851 return 0; 852 } 853 854 static const struct file_operations fops_eu_stall = { 855 .owner = THIS_MODULE, 856 .llseek = noop_llseek, 857 .release = xe_eu_stall_stream_close, 858 .poll = xe_eu_stall_stream_poll, 859 .read = xe_eu_stall_stream_read, 860 .unlocked_ioctl = xe_eu_stall_stream_ioctl, 861 .compat_ioctl = xe_eu_stall_stream_ioctl, 862 }; 863 864 static int xe_eu_stall_stream_open_locked(struct drm_device *dev, 865 struct eu_stall_open_properties *props, 866 struct drm_file *file) 867 { 868 struct xe_eu_stall_data_stream *stream; 869 struct xe_gt *gt = props->gt; 870 unsigned long f_flags = 0; 871 int ret, stream_fd; 872 873 /* Only one session can be active at any time */ 874 if (gt->eu_stall->stream) { 875 xe_gt_dbg(gt, "EU stall sampling session already active\n"); 876 return -EBUSY; 877 } 878 879 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 880 if (!stream) 881 return -ENOMEM; 882 883 gt->eu_stall->stream = stream; 884 stream->gt = gt; 885 886 ret = xe_eu_stall_stream_init(stream, props); 887 if (ret) { 888 xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret); 889 goto err_free; 890 } 891 892 stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags); 893 if (stream_fd < 0) { 894 ret = stream_fd; 895 xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret); 896 goto err_destroy; 897 } 898 899 /* Take a reference on the driver that will be kept with stream_fd 900 * until its release. 901 */ 902 drm_dev_get(>->tile->xe->drm); 903 904 return stream_fd; 905 906 err_destroy: 907 xe_eu_stall_data_buf_destroy(stream); 908 err_free: 909 xe_eu_stall_stream_free(stream); 910 return ret; 911 } 912 913 /** 914 * xe_eu_stall_stream_open - Open a xe EU stall data stream fd 915 * 916 * @dev: DRM device pointer 917 * @data: pointer to first struct @drm_xe_ext_set_property in 918 * the chain of input properties from the user space. 919 * @file: DRM file pointer 920 * 921 * This function opens a EU stall data stream with input properties from 922 * the user space. 923 * 924 * Returns: EU stall data stream fd on success or a negative error code. 925 */ 926 int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file) 927 { 928 struct xe_device *xe = to_xe_device(dev); 929 struct eu_stall_open_properties props = {}; 930 int ret; 931 932 if (!xe_eu_stall_supported_on_platform(xe)) { 933 drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n"); 934 return -ENODEV; 935 } 936 937 if (xe_observation_paranoid && !perfmon_capable()) { 938 drm_dbg(&xe->drm, "Insufficient privileges for EU stall monitoring\n"); 939 return -EACCES; 940 } 941 942 /* Initialize and set default values */ 943 props.wait_num_reports = 1; 944 props.sampling_rate_mult = 4; 945 946 ret = xe_eu_stall_user_extensions(xe, data, 0, &props); 947 if (ret) 948 return ret; 949 950 if (!props.gt) { 951 drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n"); 952 return -EINVAL; 953 } 954 955 mutex_lock(&props.gt->eu_stall->stream_lock); 956 ret = xe_eu_stall_stream_open_locked(dev, &props, file); 957 mutex_unlock(&props.gt->eu_stall->stream_lock); 958 959 return ret; 960 } 961