1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include <linux/anon_inodes.h> 7 #include <linux/fs.h> 8 #include <linux/poll.h> 9 #include <linux/types.h> 10 11 #include <drm/drm_drv.h> 12 #include <generated/xe_wa_oob.h> 13 #include <uapi/drm/xe_drm.h> 14 15 #include "xe_bo.h" 16 #include "xe_device.h" 17 #include "xe_eu_stall.h" 18 #include "xe_force_wake.h" 19 #include "xe_gt_mcr.h" 20 #include "xe_gt_printk.h" 21 #include "xe_gt_topology.h" 22 #include "xe_macros.h" 23 #include "xe_observation.h" 24 #include "xe_pm.h" 25 #include "xe_trace.h" 26 #include "xe_wa.h" 27 28 #include "regs/xe_eu_stall_regs.h" 29 #include "regs/xe_gt_regs.h" 30 31 #define POLL_PERIOD_MS 5 32 33 static size_t per_xecore_buf_size = SZ_512K; 34 35 struct per_xecore_buf { 36 /* Buffer vaddr */ 37 u8 *vaddr; 38 /* Write pointer */ 39 u32 write; 40 /* Read pointer */ 41 u32 read; 42 }; 43 44 struct xe_eu_stall_data_stream { 45 bool pollin; 46 bool enabled; 47 bool reset_detected; 48 int wait_num_reports; 49 int sampling_rate_mult; 50 wait_queue_head_t poll_wq; 51 size_t data_record_size; 52 size_t per_xecore_buf_size; 53 unsigned int fw_ref; 54 55 struct xe_gt *gt; 56 struct xe_bo *bo; 57 /* Lock to protect data buffer pointers */ 58 struct mutex xecore_buf_lock; 59 struct per_xecore_buf *xecore_buf; 60 struct { 61 bool reported_to_user; 62 xe_dss_mask_t mask; 63 } data_drop; 64 struct delayed_work buf_poll_work; 65 }; 66 67 struct xe_eu_stall_gt { 68 /* Lock to protect stream */ 69 struct mutex stream_lock; 70 /* EU stall data stream */ 71 struct xe_eu_stall_data_stream *stream; 72 /* Workqueue to schedule buffer pointers polling work */ 73 struct workqueue_struct *buf_ptr_poll_wq; 74 }; 75 76 /** 77 * struct eu_stall_open_properties - EU stall sampling properties received 78 * from user space at open. 79 * @sampling_rate_mult: EU stall sampling rate multiplier. 80 * HW will sample every (sampling_rate_mult x 251) cycles. 81 * @wait_num_reports: Minimum number of EU stall data reports to unblock poll(). 82 * @gt: GT on which EU stall data will be captured. 83 */ 84 struct eu_stall_open_properties { 85 int sampling_rate_mult; 86 int wait_num_reports; 87 struct xe_gt *gt; 88 }; 89 90 /* 91 * EU stall data format for PVC 92 */ 93 struct xe_eu_stall_data_pvc { 94 __u64 ip_addr:29; /* Bits 0 to 28 */ 95 __u64 active_count:8; /* Bits 29 to 36 */ 96 __u64 other_count:8; /* Bits 37 to 44 */ 97 __u64 control_count:8; /* Bits 45 to 52 */ 98 __u64 pipestall_count:8; /* Bits 53 to 60 */ 99 __u64 send_count:8; /* Bits 61 to 68 */ 100 __u64 dist_acc_count:8; /* Bits 69 to 76 */ 101 __u64 sbid_count:8; /* Bits 77 to 84 */ 102 __u64 sync_count:8; /* Bits 85 to 92 */ 103 __u64 inst_fetch_count:8; /* Bits 93 to 100 */ 104 __u64 unused_bits:27; 105 __u64 unused[6]; 106 } __packed; 107 108 /* 109 * EU stall data format for Xe2 arch GPUs (LNL, BMG). 110 */ 111 struct xe_eu_stall_data_xe2 { 112 __u64 ip_addr:29; /* Bits 0 to 28 */ 113 __u64 tdr_count:8; /* Bits 29 to 36 */ 114 __u64 other_count:8; /* Bits 37 to 44 */ 115 __u64 control_count:8; /* Bits 45 to 52 */ 116 __u64 pipestall_count:8; /* Bits 53 to 60 */ 117 __u64 send_count:8; /* Bits 61 to 68 */ 118 __u64 dist_acc_count:8; /* Bits 69 to 76 */ 119 __u64 sbid_count:8; /* Bits 77 to 84 */ 120 __u64 sync_count:8; /* Bits 85 to 92 */ 121 __u64 inst_fetch_count:8; /* Bits 93 to 100 */ 122 __u64 active_count:8; /* Bits 101 to 108 */ 123 __u64 ex_id:3; /* Bits 109 to 111 */ 124 __u64 end_flag:1; /* Bit 112 */ 125 __u64 unused_bits:15; 126 __u64 unused[6]; 127 } __packed; 128 129 /* 130 * EU stall data format for Xe3p arch GPUs. 131 */ 132 struct xe_eu_stall_data_xe3p { 133 __u64 ip_addr:61; /* Bits 0 to 60 */ 134 __u64 tdr_count:8; /* Bits 61 to 68 */ 135 __u64 other_count:8; /* Bits 69 to 76 */ 136 __u64 control_count:8; /* Bits 77 to 84 */ 137 __u64 pipestall_count:8; /* Bits 85 to 92 */ 138 __u64 send_count:8; /* Bits 93 to 100 */ 139 __u64 dist_acc_count:8; /* Bits 101 to 108 */ 140 __u64 sbid_count:8; /* Bits 109 to 116 */ 141 __u64 sync_count:8; /* Bits 117 to 124 */ 142 __u64 inst_fetch_count:8; /* Bits 125 to 132 */ 143 __u64 active_count:8; /* Bits 133 to 140 */ 144 __u64 ex_id:3; /* Bits 141 to 143 */ 145 __u64 end_flag:1; /* Bit 144 */ 146 __u64 unused_bits:47; 147 __u64 unused[5]; 148 } __packed; 149 150 const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7}; 151 152 /** 153 * xe_eu_stall_get_sampling_rates - get EU stall sampling rates information. 154 * 155 * @num_rates: Pointer to a u32 to return the number of sampling rates. 156 * @rates: double u64 pointer to point to an array of sampling rates. 157 * 158 * Stores the number of sampling rates and pointer to the array of 159 * sampling rates in the input pointers. 160 * 161 * Returns: Size of the EU stall sampling rates array. 162 */ 163 size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates) 164 { 165 *num_rates = ARRAY_SIZE(eu_stall_sampling_rates); 166 *rates = eu_stall_sampling_rates; 167 168 return sizeof(eu_stall_sampling_rates); 169 } 170 171 /** 172 * xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size. 173 * 174 * Returns: The per XeCore buffer size used to allocate the per GT 175 * EU stall data buffer. 176 */ 177 size_t xe_eu_stall_get_per_xecore_buf_size(void) 178 { 179 return per_xecore_buf_size; 180 } 181 182 /** 183 * xe_eu_stall_data_record_size - get EU stall data record size. 184 * 185 * @xe: Pointer to a Xe device. 186 * 187 * Returns: EU stall data record size. 188 */ 189 size_t xe_eu_stall_data_record_size(struct xe_device *xe) 190 { 191 size_t record_size = 0; 192 193 if (GRAPHICS_VER(xe) >= 35) 194 record_size = sizeof(struct xe_eu_stall_data_xe3p); 195 else if (GRAPHICS_VER(xe) >= 20) 196 record_size = sizeof(struct xe_eu_stall_data_xe2); 197 else if (xe->info.platform == XE_PVC) 198 record_size = sizeof(struct xe_eu_stall_data_pvc); 199 200 201 xe_assert(xe, is_power_of_2(record_size)); 202 203 return record_size; 204 } 205 206 /** 207 * num_data_rows - Return the number of EU stall data rows of 64B each 208 * for a given data size. 209 * 210 * @data_size: EU stall data size 211 */ 212 static u32 num_data_rows(u32 data_size) 213 { 214 return data_size >> 6; 215 } 216 217 static void xe_eu_stall_fini(void *arg) 218 { 219 struct xe_gt *gt = arg; 220 221 destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq); 222 mutex_destroy(>->eu_stall->stream_lock); 223 kfree(gt->eu_stall); 224 } 225 226 /** 227 * xe_eu_stall_init() - Allocate and initialize GT level EU stall data 228 * structure xe_eu_stall_gt within struct xe_gt. 229 * 230 * @gt: GT being initialized. 231 * 232 * Returns: zero on success or a negative error code. 233 */ 234 int xe_eu_stall_init(struct xe_gt *gt) 235 { 236 struct xe_device *xe = gt_to_xe(gt); 237 int ret; 238 239 if (!xe_eu_stall_supported_on_platform(xe)) 240 return 0; 241 242 gt->eu_stall = kzalloc_obj(*gt->eu_stall); 243 if (!gt->eu_stall) { 244 ret = -ENOMEM; 245 goto exit; 246 } 247 248 mutex_init(>->eu_stall->stream_lock); 249 250 gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0); 251 if (!gt->eu_stall->buf_ptr_poll_wq) { 252 ret = -ENOMEM; 253 goto exit_free; 254 } 255 256 return devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt); 257 exit_free: 258 mutex_destroy(>->eu_stall->stream_lock); 259 kfree(gt->eu_stall); 260 exit: 261 return ret; 262 } 263 264 static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value, 265 struct eu_stall_open_properties *props) 266 { 267 value = div_u64(value, 251); 268 if (value == 0 || value > 7) { 269 drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value); 270 return -EINVAL; 271 } 272 props->sampling_rate_mult = value; 273 return 0; 274 } 275 276 static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value, 277 struct eu_stall_open_properties *props) 278 { 279 props->wait_num_reports = value; 280 281 return 0; 282 } 283 284 static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, 285 struct eu_stall_open_properties *props) 286 { 287 struct xe_gt *gt = xe_device_get_gt(xe, value); 288 289 if (!gt) { 290 drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); 291 return -EINVAL; 292 } 293 props->gt = gt; 294 return 0; 295 } 296 297 typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value, 298 struct eu_stall_open_properties *props); 299 300 static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = { 301 [DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate, 302 [DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports, 303 [DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id, 304 }; 305 306 static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension, 307 struct eu_stall_open_properties *props) 308 { 309 u64 __user *address = u64_to_user_ptr(extension); 310 struct drm_xe_ext_set_property ext; 311 int err; 312 u32 idx; 313 314 err = copy_from_user(&ext, address, sizeof(ext)); 315 if (XE_IOCTL_DBG(xe, err)) 316 return -EFAULT; 317 318 if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) || 319 XE_IOCTL_DBG(xe, !ext.property) || XE_IOCTL_DBG(xe, ext.pad)) 320 return -EINVAL; 321 322 idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs)); 323 return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props); 324 } 325 326 typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension, 327 struct eu_stall_open_properties *props); 328 static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = { 329 [DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property, 330 }; 331 332 #define MAX_USER_EXTENSIONS 5 333 static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension, 334 int ext_number, struct eu_stall_open_properties *props) 335 { 336 u64 __user *address = u64_to_user_ptr(extension); 337 struct drm_xe_user_extension ext; 338 int err; 339 u32 idx; 340 341 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 342 return -E2BIG; 343 344 err = copy_from_user(&ext, address, sizeof(ext)); 345 if (XE_IOCTL_DBG(xe, err)) 346 return -EFAULT; 347 348 if (XE_IOCTL_DBG(xe, ext.pad) || 349 XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs))) 350 return -EINVAL; 351 352 idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs)); 353 err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props); 354 if (XE_IOCTL_DBG(xe, err)) 355 return err; 356 357 if (ext.next_extension) 358 return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props); 359 360 return 0; 361 } 362 363 /** 364 * buf_data_size - Calculate the number of bytes in a circular buffer 365 * given the read and write pointers and the size of 366 * the buffer. 367 * 368 * @buf_size: Size of the circular buffer 369 * @read_ptr: Read pointer with an additional overflow bit 370 * @write_ptr: Write pointer with an additional overflow bit 371 * 372 * Since the read and write pointers have an additional overflow bit, 373 * this function calculates the offsets from the pointers and use the 374 * offsets to calculate the data size in the buffer. 375 * 376 * Returns: number of bytes of data in the buffer 377 */ 378 static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr) 379 { 380 u32 read_offset, write_offset, size = 0; 381 382 if (read_ptr == write_ptr) 383 goto exit; 384 385 read_offset = read_ptr & (buf_size - 1); 386 write_offset = write_ptr & (buf_size - 1); 387 388 if (write_offset > read_offset) 389 size = write_offset - read_offset; 390 else 391 size = buf_size - read_offset + write_offset; 392 exit: 393 return size; 394 } 395 396 /** 397 * eu_stall_data_buf_poll - Poll for EU stall data in the buffer. 398 * 399 * @stream: xe EU stall data stream instance 400 * 401 * Returns: true if the EU stall buffer contains minimum stall data as 402 * specified by the event report count, else false. 403 */ 404 static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) 405 { 406 u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0; 407 u32 buf_size = stream->per_xecore_buf_size; 408 struct per_xecore_buf *xecore_buf; 409 struct xe_gt *gt = stream->gt; 410 bool min_data_present = false; 411 u16 group, instance; 412 unsigned int xecore; 413 414 mutex_lock(&stream->xecore_buf_lock); 415 for_each_dss_steering(xecore, gt, group, instance) { 416 xecore_buf = &stream->xecore_buf[xecore]; 417 read_ptr = xecore_buf->read; 418 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, 419 group, instance); 420 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg); 421 write_ptr <<= 6; 422 write_ptr &= ((buf_size << 1) - 1); 423 if (!min_data_present) { 424 total_data += buf_data_size(buf_size, read_ptr, write_ptr); 425 if (num_data_rows(total_data) >= stream->wait_num_reports) 426 min_data_present = true; 427 } 428 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP) 429 set_bit(xecore, stream->data_drop.mask); 430 xecore_buf->write = write_ptr; 431 } 432 /* If a GT or engine reset happens during EU stall sampling, 433 * all EU stall registers get reset to 0 and the cached values of 434 * the EU stall data buffers' read pointers are out of sync with 435 * the register values. This causes invalid data to be returned 436 * from read(). To prevent this, check the value of a EU stall base 437 * register. If it is zero, there has been a reset. 438 */ 439 if (unlikely(!xe_gt_mcr_unicast_read_any(gt, XEHPC_EUSTALL_BASE))) 440 stream->reset_detected = true; 441 442 stream->pollin = min_data_present || stream->reset_detected; 443 mutex_unlock(&stream->xecore_buf_lock); 444 445 return stream->pollin; 446 } 447 448 static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance) 449 { 450 struct xe_device *xe = gt_to_xe(gt); 451 u32 write_ptr_reg; 452 453 /* On PVC, the overflow bit has to be cleared by writing 1 to it. 454 * On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it. 455 */ 456 if (GRAPHICS_VER(xe) >= 20) 457 write_ptr_reg = REG_MASKED_FIELD_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP); 458 else 459 write_ptr_reg = REG_MASKED_FIELD_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP); 460 461 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance); 462 } 463 464 static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream, 465 char __user *buf, size_t count, 466 size_t *total_data_size, struct xe_gt *gt, 467 u16 group, u16 instance, unsigned int xecore) 468 { 469 size_t read_data_size, copy_size, buf_size; 470 u32 read_ptr_reg, read_ptr, write_ptr; 471 u8 *xecore_start_vaddr, *read_vaddr; 472 struct per_xecore_buf *xecore_buf; 473 u32 read_offset, write_offset; 474 475 /* Hardware increments the read and write pointers such that they can 476 * overflow into one additional bit. For example, a 256KB size buffer 477 * offset pointer needs 18 bits. But HW uses 19 bits for the read and 478 * write pointers. This technique avoids wasting a slot in the buffer. 479 * Read and write offsets are calculated from the pointers in order to 480 * check if the write pointer has wrapped around the array. 481 */ 482 xecore_buf = &stream->xecore_buf[xecore]; 483 xecore_start_vaddr = xecore_buf->vaddr; 484 read_ptr = xecore_buf->read; 485 write_ptr = xecore_buf->write; 486 buf_size = stream->per_xecore_buf_size; 487 488 read_data_size = buf_data_size(buf_size, read_ptr, write_ptr); 489 /* Read only the data that the user space buffer can accommodate */ 490 read_data_size = min_t(size_t, count - *total_data_size, read_data_size); 491 if (read_data_size == 0) 492 goto exit_drop; 493 494 read_offset = read_ptr & (buf_size - 1); 495 write_offset = write_ptr & (buf_size - 1); 496 read_vaddr = xecore_start_vaddr + read_offset; 497 498 if (write_offset > read_offset) { 499 if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size)) 500 return -EFAULT; 501 } else { 502 if (read_data_size >= buf_size - read_offset) 503 copy_size = buf_size - read_offset; 504 else 505 copy_size = read_data_size; 506 if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size)) 507 return -EFAULT; 508 if (copy_to_user(buf + *total_data_size + copy_size, 509 xecore_start_vaddr, read_data_size - copy_size)) 510 return -EFAULT; 511 } 512 513 *total_data_size += read_data_size; 514 read_ptr += read_data_size; 515 516 /* Read pointer can overflow into one additional bit */ 517 read_ptr &= (buf_size << 1) - 1; 518 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6)); 519 read_ptr_reg = REG_MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg); 520 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance); 521 xecore_buf->read = read_ptr; 522 trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr, 523 read_data_size, *total_data_size); 524 exit_drop: 525 /* Clear drop bit (if set) after any data was read or if the buffer was empty. 526 * Drop bit can be set even if the buffer is empty as the buffer may have been emptied 527 * in the previous read() and the data drop bit was set during the previous read(). 528 */ 529 if (test_bit(xecore, stream->data_drop.mask)) { 530 clear_dropped_eviction_line_bit(gt, group, instance); 531 clear_bit(xecore, stream->data_drop.mask); 532 } 533 return 0; 534 } 535 536 /** 537 * xe_eu_stall_stream_read_locked - copy EU stall counters data from the 538 * per xecore buffers to the userspace buffer 539 * @stream: A stream opened for EU stall count metrics 540 * @file: An xe EU stall data stream file 541 * @buf: destination buffer given by userspace 542 * @count: the number of bytes userspace wants to read 543 * 544 * Returns: Number of bytes copied or a negative error code 545 * If we've successfully copied any data then reporting that takes 546 * precedence over any internal error status, so the data isn't lost. 547 */ 548 static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream, 549 struct file *file, char __user *buf, 550 size_t count) 551 { 552 struct xe_gt *gt = stream->gt; 553 size_t total_size = 0; 554 u16 group, instance; 555 unsigned int xecore; 556 int ret = 0; 557 558 mutex_lock(&stream->xecore_buf_lock); 559 /* If EU stall registers got reset due to a GT/engine reset, 560 * continuing with the read() will return invalid data to 561 * the user space. Just return -ENODEV instead. 562 */ 563 if (unlikely(stream->reset_detected)) { 564 xe_gt_dbg(gt, "EU stall base register has been reset\n"); 565 mutex_unlock(&stream->xecore_buf_lock); 566 return -ENODEV; 567 } 568 if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { 569 if (!stream->data_drop.reported_to_user) { 570 stream->data_drop.reported_to_user = true; 571 xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", 572 XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); 573 mutex_unlock(&stream->xecore_buf_lock); 574 return -EIO; 575 } 576 stream->data_drop.reported_to_user = false; 577 } 578 for_each_dss_steering(xecore, gt, group, instance) { 579 ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size, 580 gt, group, instance, xecore); 581 if (ret || count == total_size) 582 break; 583 } 584 mutex_unlock(&stream->xecore_buf_lock); 585 return total_size ?: (ret ?: -EAGAIN); 586 } 587 588 /* 589 * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE 590 * before calling read(). 591 * 592 * Returns: The number of bytes copied or a negative error code on failure. 593 * -EIO if HW drops any EU stall data when the buffer is full. 594 */ 595 static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf, 596 size_t count, loff_t *ppos) 597 { 598 struct xe_eu_stall_data_stream *stream = file->private_data; 599 struct xe_gt *gt = stream->gt; 600 ssize_t ret, aligned_count; 601 602 aligned_count = ALIGN_DOWN(count, stream->data_record_size); 603 if (aligned_count == 0) 604 return -EINVAL; 605 606 if (!stream->enabled) { 607 xe_gt_dbg(gt, "EU stall data stream not enabled to read\n"); 608 return -EINVAL; 609 } 610 611 if (!(file->f_flags & O_NONBLOCK)) { 612 do { 613 ret = wait_event_interruptible(stream->poll_wq, stream->pollin); 614 if (ret) 615 return -EINTR; 616 617 mutex_lock(>->eu_stall->stream_lock); 618 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count); 619 mutex_unlock(>->eu_stall->stream_lock); 620 } while (ret == -EAGAIN); 621 } else { 622 mutex_lock(>->eu_stall->stream_lock); 623 ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count); 624 mutex_unlock(>->eu_stall->stream_lock); 625 } 626 627 /* 628 * This may not work correctly if the user buffer is very small. 629 * We don't want to block the next read() when there is data in the buffer 630 * now, but couldn't be accommodated in the small user buffer. 631 */ 632 if (!stream->reset_detected) 633 stream->pollin = false; 634 635 return ret; 636 } 637 638 static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) 639 { 640 struct xe_gt *gt = stream->gt; 641 642 mutex_destroy(&stream->xecore_buf_lock); 643 gt->eu_stall->stream = NULL; 644 kfree(stream); 645 } 646 647 static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream) 648 { 649 xe_bo_unpin_map_no_vm(stream->bo); 650 kfree(stream->xecore_buf); 651 } 652 653 static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream, 654 u16 last_xecore) 655 { 656 struct xe_tile *tile = stream->gt->tile; 657 struct xe_bo *bo; 658 u32 size; 659 660 stream->xecore_buf = kzalloc_objs(*stream->xecore_buf, last_xecore); 661 if (!stream->xecore_buf) 662 return -ENOMEM; 663 664 size = stream->per_xecore_buf_size * last_xecore; 665 666 bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel, 667 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false); 668 if (IS_ERR(bo)) { 669 kfree(stream->xecore_buf); 670 return PTR_ERR(bo); 671 } 672 673 XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64)); 674 stream->bo = bo; 675 676 return 0; 677 } 678 679 static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream) 680 { 681 u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value; 682 struct per_xecore_buf *xecore_buf; 683 struct xe_gt *gt = stream->gt; 684 u16 group, instance; 685 int xecore; 686 687 /* Take runtime pm ref and forcewake to disable RC6 */ 688 xe_pm_runtime_get(gt_to_xe(gt)); 689 stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER); 690 if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FW_RENDER)) { 691 xe_gt_err(gt, "Failed to get RENDER forcewake\n"); 692 xe_pm_runtime_put(gt_to_xe(gt)); 693 return -ETIMEDOUT; 694 } 695 696 if (XE_GT_WA(gt, 22016596838)) 697 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, 698 REG_MASKED_FIELD_ENABLE(DISABLE_DOP_GATING)); 699 700 for_each_dss_steering(xecore, gt, group, instance) { 701 write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance); 702 /* Clear any drop bits set and not cleared in the previous session. */ 703 if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP) 704 clear_dropped_eviction_line_bit(gt, group, instance); 705 write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg); 706 read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr); 707 read_ptr_reg = REG_MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg); 708 /* Initialize the read pointer to the write pointer */ 709 xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance); 710 write_ptr <<= 6; 711 write_ptr &= (stream->per_xecore_buf_size << 1) - 1; 712 xecore_buf = &stream->xecore_buf[xecore]; 713 xecore_buf->write = write_ptr; 714 xecore_buf->read = write_ptr; 715 } 716 stream->reset_detected = false; 717 stream->data_drop.reported_to_user = false; 718 bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS); 719 720 reg_value = REG_MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE, 721 REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) | 722 REG_FIELD_PREP(EUSTALL_SAMPLE_RATE, 723 stream->sampling_rate_mult)); 724 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value); 725 /* GGTT addresses can never be > 32 bits */ 726 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0); 727 reg_value = xe_bo_ggtt_addr(stream->bo); 728 reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ, 729 stream->per_xecore_buf_size / SZ_256K); 730 reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING; 731 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value); 732 733 return 0; 734 } 735 736 static void eu_stall_data_buf_poll_work_fn(struct work_struct *work) 737 { 738 struct xe_eu_stall_data_stream *stream = 739 container_of(work, typeof(*stream), buf_poll_work.work); 740 struct xe_gt *gt = stream->gt; 741 742 if (eu_stall_data_buf_poll(stream)) 743 wake_up(&stream->poll_wq); 744 745 if (!stream->reset_detected) 746 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, 747 &stream->buf_poll_work, 748 msecs_to_jiffies(POLL_PERIOD_MS)); 749 } 750 751 static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, 752 struct eu_stall_open_properties *props) 753 { 754 unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores; 755 struct per_xecore_buf *xecore_buf; 756 struct xe_gt *gt = stream->gt; 757 xe_dss_mask_t all_xecores; 758 u16 group, instance; 759 u32 vaddr_offset; 760 int ret; 761 762 bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, 763 XE_MAX_DSS_FUSE_BITS); 764 num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS); 765 last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1; 766 767 max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores); 768 if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) { 769 xe_gt_dbg(gt, "Invalid EU stall event report count %u\n", 770 props->wait_num_reports); 771 xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n", 772 max_wait_num_reports); 773 return -EINVAL; 774 } 775 776 init_waitqueue_head(&stream->poll_wq); 777 mutex_init(&stream->xecore_buf_lock); 778 INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); 779 stream->per_xecore_buf_size = per_xecore_buf_size; 780 stream->sampling_rate_mult = props->sampling_rate_mult; 781 stream->wait_num_reports = props->wait_num_reports; 782 stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt)); 783 784 ret = xe_eu_stall_data_buf_alloc(stream, last_xecore); 785 if (ret) 786 return ret; 787 788 for_each_dss_steering(xecore, gt, group, instance) { 789 xecore_buf = &stream->xecore_buf[xecore]; 790 vaddr_offset = xecore * stream->per_xecore_buf_size; 791 xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset; 792 } 793 return 0; 794 } 795 796 static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream, 797 struct file *file, poll_table *wait) 798 { 799 __poll_t events = 0; 800 801 poll_wait(file, &stream->poll_wq, wait); 802 803 if (stream->pollin) 804 events |= EPOLLIN; 805 806 return events; 807 } 808 809 static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait) 810 { 811 struct xe_eu_stall_data_stream *stream = file->private_data; 812 struct xe_gt *gt = stream->gt; 813 __poll_t ret; 814 815 mutex_lock(>->eu_stall->stream_lock); 816 ret = xe_eu_stall_stream_poll_locked(stream, file, wait); 817 mutex_unlock(>->eu_stall->stream_lock); 818 819 return ret; 820 } 821 822 static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream) 823 { 824 struct xe_gt *gt = stream->gt; 825 int ret = 0; 826 827 if (stream->enabled) 828 return ret; 829 830 stream->enabled = true; 831 832 ret = xe_eu_stall_stream_enable(stream); 833 834 queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq, 835 &stream->buf_poll_work, 836 msecs_to_jiffies(POLL_PERIOD_MS)); 837 return ret; 838 } 839 840 static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream) 841 { 842 struct xe_gt *gt = stream->gt; 843 844 if (!stream->enabled) 845 return 0; 846 847 stream->enabled = false; 848 849 xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0); 850 851 cancel_delayed_work_sync(&stream->buf_poll_work); 852 853 if (XE_GT_WA(gt, 22016596838)) 854 xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2, 855 REG_MASKED_FIELD_DISABLE(DISABLE_DOP_GATING)); 856 857 xe_force_wake_put(gt_to_fw(gt), stream->fw_ref); 858 xe_pm_runtime_put(gt_to_xe(gt)); 859 860 return 0; 861 } 862 863 static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream, 864 unsigned int cmd, unsigned long arg) 865 { 866 switch (cmd) { 867 case DRM_XE_OBSERVATION_IOCTL_ENABLE: 868 return xe_eu_stall_enable_locked(stream); 869 case DRM_XE_OBSERVATION_IOCTL_DISABLE: 870 return xe_eu_stall_disable_locked(stream); 871 } 872 873 return -EINVAL; 874 } 875 876 static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 877 { 878 struct xe_eu_stall_data_stream *stream = file->private_data; 879 struct xe_gt *gt = stream->gt; 880 long ret; 881 882 mutex_lock(>->eu_stall->stream_lock); 883 ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg); 884 mutex_unlock(>->eu_stall->stream_lock); 885 886 return ret; 887 } 888 889 static int xe_eu_stall_stream_close(struct inode *inode, struct file *file) 890 { 891 struct xe_eu_stall_data_stream *stream = file->private_data; 892 struct xe_gt *gt = stream->gt; 893 894 mutex_lock(>->eu_stall->stream_lock); 895 xe_eu_stall_disable_locked(stream); 896 xe_eu_stall_data_buf_destroy(stream); 897 xe_eu_stall_stream_free(stream); 898 mutex_unlock(>->eu_stall->stream_lock); 899 900 drm_dev_put(>->tile->xe->drm); 901 902 return 0; 903 } 904 905 static const struct file_operations fops_eu_stall = { 906 .owner = THIS_MODULE, 907 .llseek = noop_llseek, 908 .release = xe_eu_stall_stream_close, 909 .poll = xe_eu_stall_stream_poll, 910 .read = xe_eu_stall_stream_read, 911 .unlocked_ioctl = xe_eu_stall_stream_ioctl, 912 .compat_ioctl = xe_eu_stall_stream_ioctl, 913 }; 914 915 static int xe_eu_stall_stream_open_locked(struct drm_device *dev, 916 struct eu_stall_open_properties *props, 917 struct drm_file *file) 918 { 919 struct xe_eu_stall_data_stream *stream; 920 struct xe_gt *gt = props->gt; 921 unsigned long f_flags = 0; 922 int ret, stream_fd; 923 924 /* Only one session can be active at any time */ 925 if (gt->eu_stall->stream) { 926 xe_gt_dbg(gt, "EU stall sampling session already active\n"); 927 return -EBUSY; 928 } 929 930 stream = kzalloc_obj(*stream); 931 if (!stream) 932 return -ENOMEM; 933 934 gt->eu_stall->stream = stream; 935 stream->gt = gt; 936 937 ret = xe_eu_stall_stream_init(stream, props); 938 if (ret) { 939 xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret); 940 goto err_free; 941 } 942 943 stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags); 944 if (stream_fd < 0) { 945 ret = stream_fd; 946 xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret); 947 goto err_destroy; 948 } 949 950 /* Take a reference on the driver that will be kept with stream_fd 951 * until its release. 952 */ 953 drm_dev_get(>->tile->xe->drm); 954 955 return stream_fd; 956 957 err_destroy: 958 xe_eu_stall_data_buf_destroy(stream); 959 err_free: 960 xe_eu_stall_stream_free(stream); 961 return ret; 962 } 963 964 /** 965 * xe_eu_stall_stream_open - Open a xe EU stall data stream fd 966 * 967 * @dev: DRM device pointer 968 * @data: pointer to first struct @drm_xe_ext_set_property in 969 * the chain of input properties from the user space. 970 * @file: DRM file pointer 971 * 972 * This function opens a EU stall data stream with input properties from 973 * the user space. 974 * 975 * Returns: EU stall data stream fd on success or a negative error code. 976 */ 977 int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file) 978 { 979 struct xe_device *xe = to_xe_device(dev); 980 struct eu_stall_open_properties props = {}; 981 int ret; 982 983 if (!xe_eu_stall_supported_on_platform(xe)) { 984 drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n"); 985 return -ENODEV; 986 } 987 988 if (xe_observation_paranoid && !perfmon_capable()) { 989 drm_dbg(&xe->drm, "Insufficient privileges for EU stall monitoring\n"); 990 return -EACCES; 991 } 992 993 /* Initialize and set default values */ 994 props.wait_num_reports = 1; 995 props.sampling_rate_mult = 4; 996 997 ret = xe_eu_stall_user_extensions(xe, data, 0, &props); 998 if (ret) 999 return ret; 1000 1001 if (!props.gt) { 1002 drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n"); 1003 return -EINVAL; 1004 } 1005 1006 mutex_lock(&props.gt->eu_stall->stream_lock); 1007 ret = xe_eu_stall_stream_open_locked(dev, &props, file); 1008 mutex_unlock(&props.gt->eu_stall->stream_lock); 1009 1010 return ret; 1011 } 1012