1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2023-2024 Intel Corporation 4 */ 5 6 #include <linux/anon_inodes.h> 7 #include <linux/delay.h> 8 #include <linux/nospec.h> 9 #include <linux/poll.h> 10 11 #include <drm/drm_drv.h> 12 #include <drm/drm_managed.h> 13 #include <uapi/drm/xe_drm.h> 14 15 #include "abi/guc_actions_slpc_abi.h" 16 #include "instructions/xe_mi_commands.h" 17 #include "regs/xe_engine_regs.h" 18 #include "regs/xe_gt_regs.h" 19 #include "regs/xe_oa_regs.h" 20 #include "xe_assert.h" 21 #include "xe_bb.h" 22 #include "xe_bo.h" 23 #include "xe_device.h" 24 #include "xe_exec_queue.h" 25 #include "xe_force_wake.h" 26 #include "xe_gt.h" 27 #include "xe_gt_mcr.h" 28 #include "xe_gt_printk.h" 29 #include "xe_guc_pc.h" 30 #include "xe_macros.h" 31 #include "xe_mmio.h" 32 #include "xe_oa.h" 33 #include "xe_observation.h" 34 #include "xe_pm.h" 35 #include "xe_sched_job.h" 36 #include "xe_sriov.h" 37 #include "xe_sync.h" 38 39 #define DEFAULT_POLL_FREQUENCY_HZ 200 40 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) 41 #define XE_OA_UNIT_INVALID U32_MAX 42 43 enum xe_oa_submit_deps { 44 XE_OA_SUBMIT_NO_DEPS, 45 XE_OA_SUBMIT_ADD_DEPS, 46 }; 47 48 enum xe_oa_user_extn_from { 49 XE_OA_USER_EXTN_FROM_OPEN, 50 XE_OA_USER_EXTN_FROM_CONFIG, 51 }; 52 53 struct xe_oa_reg { 54 struct xe_reg addr; 55 u32 value; 56 }; 57 58 struct xe_oa_config { 59 struct xe_oa *oa; 60 61 char uuid[UUID_STRING_LEN + 1]; 62 int id; 63 64 const struct xe_oa_reg *regs; 65 u32 regs_len; 66 67 struct attribute_group sysfs_metric; 68 struct attribute *attrs[2]; 69 struct kobj_attribute sysfs_metric_id; 70 71 struct kref ref; 72 struct rcu_head rcu; 73 }; 74 75 struct xe_oa_open_param { 76 struct xe_file *xef; 77 u32 oa_unit_id; 78 bool sample; 79 u32 metric_set; 80 enum xe_oa_format_name oa_format; 81 int period_exponent; 82 bool disabled; 83 int exec_queue_id; 84 int engine_instance; 85 struct xe_exec_queue *exec_q; 86 struct xe_hw_engine *hwe; 87 bool no_preempt; 88 struct drm_xe_sync __user *syncs_user; 89 int num_syncs; 90 struct xe_sync_entry *syncs; 91 size_t oa_buffer_size; 92 int wait_num_reports; 93 }; 94 95 struct xe_oa_config_bo { 96 struct llist_node node; 97 98 struct xe_oa_config *oa_config; 99 struct xe_bb *bb; 100 }; 101 102 struct xe_oa_fence { 103 /* @base: dma fence base */ 104 struct dma_fence base; 105 /* @lock: lock for the fence */ 106 spinlock_t lock; 107 /* @work: work to signal @base */ 108 struct delayed_work work; 109 /* @cb: callback to schedule @work */ 110 struct dma_fence_cb cb; 111 }; 112 113 #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x 114 115 static const struct xe_oa_format oa_formats[] = { 116 [XE_OA_FORMAT_C4_B8] = { 7, 64, DRM_FMT(OAG) }, 117 [XE_OA_FORMAT_A12] = { 0, 64, DRM_FMT(OAG) }, 118 [XE_OA_FORMAT_A12_B8_C8] = { 2, 128, DRM_FMT(OAG) }, 119 [XE_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, 120 [XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAR) }, 121 [XE_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, 122 [XE_OAC_FORMAT_A24u64_B8_C8] = { 1, 320, DRM_FMT(OAC), HDR_64_BIT }, 123 [XE_OAC_FORMAT_A22u32_R2u32_B8_C8] = { 2, 192, DRM_FMT(OAC), HDR_64_BIT }, 124 [XE_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, DRM_FMT(OAM_MPEC), HDR_64_BIT }, 125 [XE_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, DRM_FMT(OAM_MPEC), HDR_64_BIT }, 126 [XE_OA_FORMAT_PEC64u64] = { 1, 576, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, 127 [XE_OA_FORMAT_PEC64u64_B8_C8] = { 1, 640, DRM_FMT(PEC), HDR_64_BIT, 1, 1 }, 128 [XE_OA_FORMAT_PEC64u32] = { 1, 320, DRM_FMT(PEC), HDR_64_BIT }, 129 [XE_OA_FORMAT_PEC32u64_G1] = { 5, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, 130 [XE_OA_FORMAT_PEC32u32_G1] = { 5, 192, DRM_FMT(PEC), HDR_64_BIT }, 131 [XE_OA_FORMAT_PEC32u64_G2] = { 6, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, 132 [XE_OA_FORMAT_PEC32u32_G2] = { 6, 192, DRM_FMT(PEC), HDR_64_BIT }, 133 [XE_OA_FORMAT_PEC36u64_G1_32_G2_4] = { 3, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, 134 [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, 135 }; 136 137 static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head) 138 { 139 return tail >= head ? tail - head : 140 tail + stream->oa_buffer.circ_size - head; 141 } 142 143 static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n) 144 { 145 return ptr + n >= stream->oa_buffer.circ_size ? 146 ptr + n - stream->oa_buffer.circ_size : ptr + n; 147 } 148 149 static void xe_oa_config_release(struct kref *ref) 150 { 151 struct xe_oa_config *oa_config = 152 container_of(ref, typeof(*oa_config), ref); 153 154 kfree(oa_config->regs); 155 156 kfree_rcu(oa_config, rcu); 157 } 158 159 static void xe_oa_config_put(struct xe_oa_config *oa_config) 160 { 161 if (!oa_config) 162 return; 163 164 kref_put(&oa_config->ref, xe_oa_config_release); 165 } 166 167 static struct xe_oa_config *xe_oa_config_get(struct xe_oa_config *oa_config) 168 { 169 return kref_get_unless_zero(&oa_config->ref) ? oa_config : NULL; 170 } 171 172 static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_set) 173 { 174 struct xe_oa_config *oa_config; 175 176 rcu_read_lock(); 177 oa_config = idr_find(&oa->metrics_idr, metrics_set); 178 if (oa_config) 179 oa_config = xe_oa_config_get(oa_config); 180 rcu_read_unlock(); 181 182 return oa_config; 183 } 184 185 static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence) 186 { 187 xe_oa_config_put(oa_bo->oa_config); 188 xe_bb_free(oa_bo->bb, last_fence); 189 kfree(oa_bo); 190 } 191 192 static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) 193 { 194 return &stream->hwe->oa_unit->regs; 195 } 196 197 static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) 198 { 199 return xe_mmio_read32(&stream->gt->mmio, __oa_regs(stream)->oa_tail_ptr) & 200 OAG_OATAILPTR_MASK; 201 } 202 203 #define oa_report_header_64bit(__s) \ 204 ((__s)->oa_buffer.format->header == HDR_64_BIT) 205 206 static u64 oa_report_id(struct xe_oa_stream *stream, void *report) 207 { 208 return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; 209 } 210 211 static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) 212 { 213 if (oa_report_header_64bit(stream)) 214 *(u64 *)report = 0; 215 else 216 *report = 0; 217 } 218 219 static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) 220 { 221 return oa_report_header_64bit(stream) ? 222 *((u64 *)report + 1) : 223 *((u32 *)report + 1); 224 } 225 226 static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) 227 { 228 if (oa_report_header_64bit(stream)) 229 *(u64 *)&report[2] = 0; 230 else 231 report[1] = 0; 232 } 233 234 static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) 235 { 236 u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); 237 u32 tail, hw_tail, partial_report_size, available; 238 int report_size = stream->oa_buffer.format->size; 239 unsigned long flags; 240 bool pollin; 241 242 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 243 244 hw_tail = xe_oa_hw_tail_read(stream); 245 hw_tail -= gtt_offset; 246 247 /* 248 * The tail pointer increases in 64 byte (cacheline size), not in report_size 249 * increments. Also report size may not be a power of 2. Compute potential 250 * partially landed report in OA buffer. 251 */ 252 partial_report_size = xe_oa_circ_diff(stream, hw_tail, stream->oa_buffer.tail); 253 partial_report_size %= report_size; 254 255 /* Subtract partial amount off the tail */ 256 hw_tail = xe_oa_circ_diff(stream, hw_tail, partial_report_size); 257 258 tail = hw_tail; 259 260 /* 261 * Walk the stream backward until we find a report with report id and timestamp 262 * not 0. We can't tell whether a report has fully landed in memory before the 263 * report id and timestamp of the following report have landed. 264 * 265 * This is assuming that the writes of the OA unit land in memory in the order 266 * they were written. If not : (╯°□°)╯︵ ┻━┻ 267 */ 268 while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) { 269 void *report = stream->oa_buffer.vaddr + tail; 270 271 if (oa_report_id(stream, report) || oa_timestamp(stream, report)) 272 break; 273 274 tail = xe_oa_circ_diff(stream, tail, report_size); 275 } 276 277 if (xe_oa_circ_diff(stream, hw_tail, tail) > report_size) 278 drm_dbg(&stream->oa->xe->drm, 279 "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", 280 stream->oa_buffer.head, tail, hw_tail); 281 282 stream->oa_buffer.tail = tail; 283 284 available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head); 285 pollin = available >= stream->wait_num_reports * report_size; 286 287 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 288 289 return pollin; 290 } 291 292 static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) 293 { 294 struct xe_oa_stream *stream = 295 container_of(hrtimer, typeof(*stream), poll_check_timer); 296 297 if (xe_oa_buffer_check_unlocked(stream)) { 298 stream->pollin = true; 299 wake_up(&stream->poll_wq); 300 } 301 302 hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); 303 304 return HRTIMER_RESTART; 305 } 306 307 static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, 308 size_t count, size_t *offset, const u8 *report) 309 { 310 int report_size = stream->oa_buffer.format->size; 311 int report_size_partial; 312 u8 *oa_buf_end; 313 314 if ((count - *offset) < report_size) 315 return -ENOSPC; 316 317 buf += *offset; 318 319 oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; 320 report_size_partial = oa_buf_end - report; 321 322 if (report_size_partial < report_size) { 323 if (copy_to_user(buf, report, report_size_partial)) 324 return -EFAULT; 325 buf += report_size_partial; 326 327 if (copy_to_user(buf, stream->oa_buffer.vaddr, 328 report_size - report_size_partial)) 329 return -EFAULT; 330 } else if (copy_to_user(buf, report, report_size)) { 331 return -EFAULT; 332 } 333 334 *offset += report_size; 335 336 return 0; 337 } 338 339 static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, 340 size_t count, size_t *offset) 341 { 342 int report_size = stream->oa_buffer.format->size; 343 u8 *oa_buf_base = stream->oa_buffer.vaddr; 344 u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); 345 size_t start_offset = *offset; 346 unsigned long flags; 347 u32 head, tail; 348 int ret = 0; 349 350 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 351 head = stream->oa_buffer.head; 352 tail = stream->oa_buffer.tail; 353 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 354 355 xe_assert(stream->oa->xe, 356 head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size); 357 358 for (; xe_oa_circ_diff(stream, tail, head); 359 head = xe_oa_circ_incr(stream, head, report_size)) { 360 u8 *report = oa_buf_base + head; 361 362 ret = xe_oa_append_report(stream, buf, count, offset, report); 363 if (ret) 364 break; 365 366 if (!(stream->oa_buffer.circ_size % report_size)) { 367 /* Clear out report id and timestamp to detect unlanded reports */ 368 oa_report_id_clear(stream, (void *)report); 369 oa_timestamp_clear(stream, (void *)report); 370 } else { 371 u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; 372 u32 part = oa_buf_end - report; 373 374 /* Zero out the entire report */ 375 if (report_size <= part) { 376 memset(report, 0, report_size); 377 } else { 378 memset(report, 0, part); 379 memset(oa_buf_base, 0, report_size - part); 380 } 381 } 382 } 383 384 if (start_offset != *offset) { 385 struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; 386 387 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 388 xe_mmio_write32(&stream->gt->mmio, oaheadptr, 389 (head + gtt_offset) & OAG_OAHEADPTR_MASK); 390 stream->oa_buffer.head = head; 391 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 392 } 393 394 return ret; 395 } 396 397 static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) 398 { 399 u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); 400 int size_exponent = __ffs(stream->oa_buffer.bo->size); 401 u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; 402 struct xe_mmio *mmio = &stream->gt->mmio; 403 unsigned long flags; 404 405 /* 406 * If oa buffer size is more than 16MB (exponent greater than 24), the 407 * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set. 408 */ 409 oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK, 410 size_exponent > 24 ? size_exponent - 20 : size_exponent - 17); 411 412 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 413 414 xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0); 415 xe_mmio_write32(mmio, __oa_regs(stream)->oa_head_ptr, 416 gtt_offset & OAG_OAHEADPTR_MASK); 417 stream->oa_buffer.head = 0; 418 /* 419 * PRM says: "This MMIO must be set before the OATAILPTR register and after the 420 * OAHEADPTR register. This is to enable proper functionality of the overflow bit". 421 */ 422 xe_mmio_write32(mmio, __oa_regs(stream)->oa_buffer, oa_buf); 423 xe_mmio_write32(mmio, __oa_regs(stream)->oa_tail_ptr, 424 gtt_offset & OAG_OATAILPTR_MASK); 425 426 /* Mark that we need updated tail pointer to read from */ 427 stream->oa_buffer.tail = 0; 428 429 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 430 431 /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ 432 memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); 433 } 434 435 static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) 436 { 437 return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) | 438 REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) | 439 REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size); 440 } 441 442 static u32 __oa_ccs_select(struct xe_oa_stream *stream) 443 { 444 u32 val; 445 446 if (stream->hwe->class != XE_ENGINE_CLASS_COMPUTE) 447 return 0; 448 449 val = REG_FIELD_PREP(OAG_OACONTROL_OA_CCS_SELECT_MASK, stream->hwe->instance); 450 xe_assert(stream->oa->xe, 451 REG_FIELD_GET(OAG_OACONTROL_OA_CCS_SELECT_MASK, val) == stream->hwe->instance); 452 return val; 453 } 454 455 static void xe_oa_enable(struct xe_oa_stream *stream) 456 { 457 const struct xe_oa_format *format = stream->oa_buffer.format; 458 const struct xe_oa_regs *regs; 459 u32 val; 460 461 /* 462 * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA 463 * buffer must be correctly initialized 464 */ 465 xe_oa_init_oa_buffer(stream); 466 467 regs = __oa_regs(stream); 468 val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | 469 __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; 470 471 if (GRAPHICS_VER(stream->oa->xe) >= 20 && 472 stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) 473 val |= OAG_OACONTROL_OA_PES_DISAG_EN; 474 475 xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val); 476 } 477 478 static void xe_oa_disable(struct xe_oa_stream *stream) 479 { 480 struct xe_mmio *mmio = &stream->gt->mmio; 481 482 xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0); 483 if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl, 484 OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) 485 drm_err(&stream->oa->xe->drm, 486 "wait for OA to be disabled timed out\n"); 487 488 if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { 489 /* <= XE_METEORLAKE except XE_PVC */ 490 xe_mmio_write32(mmio, OA_TLB_INV_CR, 1); 491 if (xe_mmio_wait32(mmio, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) 492 drm_err(&stream->oa->xe->drm, 493 "wait for OA tlb invalidate timed out\n"); 494 } 495 } 496 497 static int xe_oa_wait_unlocked(struct xe_oa_stream *stream) 498 { 499 /* We might wait indefinitely if periodic sampling is not enabled */ 500 if (!stream->periodic) 501 return -EINVAL; 502 503 return wait_event_interruptible(stream->poll_wq, 504 xe_oa_buffer_check_unlocked(stream)); 505 } 506 507 #define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \ 508 OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST) 509 510 static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, 511 size_t count, size_t *offset) 512 { 513 /* Only clear our bits to avoid side-effects */ 514 stream->oa_status = xe_mmio_rmw32(&stream->gt->mmio, __oa_regs(stream)->oa_status, 515 OASTATUS_RELEVANT_BITS, 0); 516 /* 517 * Signal to userspace that there is non-zero OA status to read via 518 * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl 519 */ 520 if (stream->oa_status & OASTATUS_RELEVANT_BITS) 521 return -EIO; 522 523 return xe_oa_append_reports(stream, buf, count, offset); 524 } 525 526 static ssize_t xe_oa_read(struct file *file, char __user *buf, 527 size_t count, loff_t *ppos) 528 { 529 struct xe_oa_stream *stream = file->private_data; 530 size_t offset = 0; 531 int ret; 532 533 /* Can't read from disabled streams */ 534 if (!stream->enabled || !stream->sample) 535 return -EINVAL; 536 537 if (!(file->f_flags & O_NONBLOCK)) { 538 do { 539 ret = xe_oa_wait_unlocked(stream); 540 if (ret) 541 return ret; 542 543 mutex_lock(&stream->stream_lock); 544 ret = __xe_oa_read(stream, buf, count, &offset); 545 mutex_unlock(&stream->stream_lock); 546 } while (!offset && !ret); 547 } else { 548 mutex_lock(&stream->stream_lock); 549 ret = __xe_oa_read(stream, buf, count, &offset); 550 mutex_unlock(&stream->stream_lock); 551 } 552 553 /* 554 * Typically we clear pollin here in order to wait for the new hrtimer callback 555 * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, 556 * which means that more OA data is available than could fit in the user provided 557 * buffer. In this case we want the next poll() call to not block. 558 * 559 * Also in case of -EIO, we have already waited for data before returning 560 * -EIO, so need to wait again 561 */ 562 if (ret != -ENOSPC && ret != -EIO) 563 stream->pollin = false; 564 565 /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ 566 return offset ?: (ret ?: -EAGAIN); 567 } 568 569 static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, 570 struct file *file, poll_table *wait) 571 { 572 __poll_t events = 0; 573 574 poll_wait(file, &stream->poll_wq, wait); 575 576 /* 577 * We don't explicitly check whether there's something to read here since this 578 * path may be hot depending on what else userspace is polling, or on the timeout 579 * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there 580 * are samples to read 581 */ 582 if (stream->pollin) 583 events |= EPOLLIN; 584 585 return events; 586 } 587 588 static __poll_t xe_oa_poll(struct file *file, poll_table *wait) 589 { 590 struct xe_oa_stream *stream = file->private_data; 591 __poll_t ret; 592 593 mutex_lock(&stream->stream_lock); 594 ret = xe_oa_poll_locked(stream, file, wait); 595 mutex_unlock(&stream->stream_lock); 596 597 return ret; 598 } 599 600 static void xe_oa_lock_vma(struct xe_exec_queue *q) 601 { 602 if (q->vm) { 603 down_read(&q->vm->lock); 604 xe_vm_lock(q->vm, false); 605 } 606 } 607 608 static void xe_oa_unlock_vma(struct xe_exec_queue *q) 609 { 610 if (q->vm) { 611 xe_vm_unlock(q->vm); 612 up_read(&q->vm->lock); 613 } 614 } 615 616 static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, 617 struct xe_bb *bb) 618 { 619 struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q; 620 struct xe_sched_job *job; 621 struct dma_fence *fence; 622 int err = 0; 623 624 xe_oa_lock_vma(q); 625 626 job = xe_bb_create_job(q, bb); 627 if (IS_ERR(job)) { 628 err = PTR_ERR(job); 629 goto exit; 630 } 631 job->ggtt = true; 632 633 if (deps == XE_OA_SUBMIT_ADD_DEPS) { 634 for (int i = 0; i < stream->num_syncs && !err; i++) 635 err = xe_sync_entry_add_deps(&stream->syncs[i], job); 636 if (err) { 637 drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err); 638 goto err_put_job; 639 } 640 } 641 642 xe_sched_job_arm(job); 643 fence = dma_fence_get(&job->drm.s_fence->finished); 644 xe_sched_job_push(job); 645 646 xe_oa_unlock_vma(q); 647 648 return fence; 649 err_put_job: 650 xe_sched_job_put(job); 651 exit: 652 xe_oa_unlock_vma(q); 653 return ERR_PTR(err); 654 } 655 656 static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) 657 { 658 u32 i; 659 660 #define MI_LOAD_REGISTER_IMM_MAX_REGS (126) 661 662 for (i = 0; i < n_regs; i++) { 663 if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { 664 u32 n_lri = min_t(u32, n_regs - i, 665 MI_LOAD_REGISTER_IMM_MAX_REGS); 666 667 bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(n_lri); 668 } 669 bb->cs[bb->len++] = reg_data[i].addr.addr; 670 bb->cs[bb->len++] = reg_data[i].value; 671 } 672 } 673 674 static int num_lri_dwords(int num_regs) 675 { 676 int count = 0; 677 678 if (num_regs > 0) { 679 count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); 680 count += num_regs * 2; 681 } 682 683 return count; 684 } 685 686 static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream) 687 { 688 xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); 689 } 690 691 static void xe_oa_free_configs(struct xe_oa_stream *stream) 692 { 693 struct xe_oa_config_bo *oa_bo, *tmp; 694 695 xe_oa_config_put(stream->oa_config); 696 llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) 697 free_oa_config_bo(oa_bo, stream->last_fence); 698 dma_fence_put(stream->last_fence); 699 } 700 701 static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count) 702 { 703 struct dma_fence *fence; 704 struct xe_bb *bb; 705 int err; 706 707 bb = xe_bb_new(stream->gt, 2 * count + 1, false); 708 if (IS_ERR(bb)) { 709 err = PTR_ERR(bb); 710 goto exit; 711 } 712 713 write_cs_mi_lri(bb, reg_lri, count); 714 715 fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); 716 if (IS_ERR(fence)) { 717 err = PTR_ERR(fence); 718 goto free_bb; 719 } 720 xe_bb_free(bb, fence); 721 dma_fence_put(fence); 722 723 return 0; 724 free_bb: 725 xe_bb_free(bb, NULL); 726 exit: 727 return err; 728 } 729 730 static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) 731 { 732 const struct xe_oa_format *format = stream->oa_buffer.format; 733 u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | 734 (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); 735 736 struct xe_oa_reg reg_lri[] = { 737 { 738 OACTXCONTROL(stream->hwe->mmio_base), 739 enable ? OA_COUNTER_RESUME : 0, 740 }, 741 { 742 OAR_OACONTROL, 743 oacontrol, 744 }, 745 { 746 RING_CONTEXT_CONTROL(stream->hwe->mmio_base), 747 _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, 748 enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) 749 }, 750 }; 751 752 return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); 753 } 754 755 static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) 756 { 757 const struct xe_oa_format *format = stream->oa_buffer.format; 758 u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | 759 (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); 760 struct xe_oa_reg reg_lri[] = { 761 { 762 OACTXCONTROL(stream->hwe->mmio_base), 763 enable ? OA_COUNTER_RESUME : 0, 764 }, 765 { 766 OAC_OACONTROL, 767 oacontrol 768 }, 769 { 770 RING_CONTEXT_CONTROL(stream->hwe->mmio_base), 771 _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, 772 enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | 773 _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), 774 }, 775 }; 776 777 /* Set ccs select to enable programming of OAC_OACONTROL */ 778 xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl, 779 __oa_ccs_select(stream)); 780 781 return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); 782 } 783 784 static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) 785 { 786 switch (stream->hwe->class) { 787 case XE_ENGINE_CLASS_RENDER: 788 return xe_oa_configure_oar_context(stream, enable); 789 case XE_ENGINE_CLASS_COMPUTE: 790 return xe_oa_configure_oac_context(stream, enable); 791 default: 792 /* Video engines do not support MI_REPORT_PERF_COUNT */ 793 return 0; 794 } 795 } 796 797 #define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255) 798 799 static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool enable) 800 { 801 return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_MMIO_TRG, 802 enable && stream && stream->sample ? 803 0 : OAG_OA_DEBUG_DISABLE_MMIO_TRG); 804 } 805 806 static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) 807 { 808 struct xe_mmio *mmio = &stream->gt->mmio; 809 u32 sqcnt1; 810 811 /* 812 * Wa_1508761755:xehpsdv, dg2 813 * Enable thread stall DOP gating and EU DOP gating. 814 */ 815 if (stream->oa->xe->info.platform == XE_DG2) { 816 xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, 817 _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); 818 xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, 819 _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); 820 } 821 822 xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, 823 oag_configure_mmio_trigger(stream, false)); 824 825 /* disable the context save/restore or OAR counters */ 826 if (stream->exec_q) 827 xe_oa_configure_oa_context(stream, false); 828 829 /* Make sure we disable noa to save power. */ 830 xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0); 831 832 sqcnt1 = SQCNT1_PMON_ENABLE | 833 (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); 834 835 /* Reset PMON Enable to save power. */ 836 xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0); 837 } 838 839 static void xe_oa_stream_destroy(struct xe_oa_stream *stream) 840 { 841 struct xe_oa_unit *u = stream->hwe->oa_unit; 842 struct xe_gt *gt = stream->hwe->gt; 843 844 if (WARN_ON(stream != u->exclusive_stream)) 845 return; 846 847 WRITE_ONCE(u->exclusive_stream, NULL); 848 849 mutex_destroy(&stream->stream_lock); 850 851 xe_oa_disable_metric_set(stream); 852 xe_exec_queue_put(stream->k_exec_q); 853 854 xe_oa_free_oa_buffer(stream); 855 856 xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); 857 xe_pm_runtime_put(stream->oa->xe); 858 859 /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ 860 if (stream->override_gucrc) 861 xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); 862 863 xe_oa_free_configs(stream); 864 xe_file_put(stream->xef); 865 } 866 867 static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) 868 { 869 struct xe_bo *bo; 870 871 bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, 872 size, ttm_bo_type_kernel, 873 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); 874 if (IS_ERR(bo)) 875 return PTR_ERR(bo); 876 877 stream->oa_buffer.bo = bo; 878 /* mmap implementation requires OA buffer to be in system memory */ 879 xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0); 880 stream->oa_buffer.vaddr = bo->vmap.vaddr; 881 return 0; 882 } 883 884 static struct xe_oa_config_bo * 885 __xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) 886 { 887 struct xe_oa_config_bo *oa_bo; 888 size_t config_length; 889 struct xe_bb *bb; 890 891 oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); 892 if (!oa_bo) 893 return ERR_PTR(-ENOMEM); 894 895 config_length = num_lri_dwords(oa_config->regs_len); 896 config_length = ALIGN(sizeof(u32) * config_length, XE_PAGE_SIZE) / sizeof(u32); 897 898 bb = xe_bb_new(stream->gt, config_length, false); 899 if (IS_ERR(bb)) 900 goto err_free; 901 902 write_cs_mi_lri(bb, oa_config->regs, oa_config->regs_len); 903 904 oa_bo->bb = bb; 905 oa_bo->oa_config = xe_oa_config_get(oa_config); 906 llist_add(&oa_bo->node, &stream->oa_config_bos); 907 908 return oa_bo; 909 err_free: 910 kfree(oa_bo); 911 return ERR_CAST(bb); 912 } 913 914 static struct xe_oa_config_bo * 915 xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) 916 { 917 struct xe_oa_config_bo *oa_bo; 918 919 /* Look for the buffer in the already allocated BOs attached to the stream */ 920 llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { 921 if (oa_bo->oa_config == oa_config && 922 memcmp(oa_bo->oa_config->uuid, oa_config->uuid, 923 sizeof(oa_config->uuid)) == 0) 924 goto out; 925 } 926 927 oa_bo = __xe_oa_alloc_config_buffer(stream, oa_config); 928 out: 929 return oa_bo; 930 } 931 932 static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence) 933 { 934 dma_fence_put(stream->last_fence); 935 stream->last_fence = dma_fence_get(fence); 936 } 937 938 static void xe_oa_fence_work_fn(struct work_struct *w) 939 { 940 struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work); 941 942 /* Signal fence to indicate new OA configuration is active */ 943 dma_fence_signal(&ofence->base); 944 dma_fence_put(&ofence->base); 945 } 946 947 static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 948 { 949 /* Additional empirical delay needed for NOA programming after registers are written */ 950 #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 951 952 struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb); 953 954 INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn); 955 queue_delayed_work(system_unbound_wq, &ofence->work, 956 usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US)); 957 dma_fence_put(fence); 958 } 959 960 static const char *xe_oa_get_driver_name(struct dma_fence *fence) 961 { 962 return "xe_oa"; 963 } 964 965 static const char *xe_oa_get_timeline_name(struct dma_fence *fence) 966 { 967 return "unbound"; 968 } 969 970 static const struct dma_fence_ops xe_oa_fence_ops = { 971 .get_driver_name = xe_oa_get_driver_name, 972 .get_timeline_name = xe_oa_get_timeline_name, 973 }; 974 975 static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config) 976 { 977 #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 978 struct xe_oa_config_bo *oa_bo; 979 struct xe_oa_fence *ofence; 980 int i, err, num_signal = 0; 981 struct dma_fence *fence; 982 983 ofence = kzalloc(sizeof(*ofence), GFP_KERNEL); 984 if (!ofence) { 985 err = -ENOMEM; 986 goto exit; 987 } 988 989 oa_bo = xe_oa_alloc_config_buffer(stream, config); 990 if (IS_ERR(oa_bo)) { 991 err = PTR_ERR(oa_bo); 992 goto exit; 993 } 994 995 /* Emit OA configuration batch */ 996 fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb); 997 if (IS_ERR(fence)) { 998 err = PTR_ERR(fence); 999 goto exit; 1000 } 1001 1002 /* Point of no return: initialize and set fence to signal */ 1003 spin_lock_init(&ofence->lock); 1004 dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0); 1005 1006 for (i = 0; i < stream->num_syncs; i++) { 1007 if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL) 1008 num_signal++; 1009 xe_sync_entry_signal(&stream->syncs[i], &ofence->base); 1010 } 1011 1012 /* Additional dma_fence_get in case we dma_fence_wait */ 1013 if (!num_signal) 1014 dma_fence_get(&ofence->base); 1015 1016 /* Update last fence too before adding callback */ 1017 xe_oa_update_last_fence(stream, fence); 1018 1019 /* Add job fence callback to schedule work to signal ofence->base */ 1020 err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb); 1021 xe_gt_assert(stream->gt, !err || err == -ENOENT); 1022 if (err == -ENOENT) 1023 xe_oa_config_cb(fence, &ofence->cb); 1024 1025 /* If nothing needs to be signaled we wait synchronously */ 1026 if (!num_signal) { 1027 dma_fence_wait(&ofence->base, false); 1028 dma_fence_put(&ofence->base); 1029 } 1030 1031 /* Done with syncs */ 1032 for (i = 0; i < stream->num_syncs; i++) 1033 xe_sync_entry_cleanup(&stream->syncs[i]); 1034 kfree(stream->syncs); 1035 1036 return 0; 1037 exit: 1038 kfree(ofence); 1039 return err; 1040 } 1041 1042 static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) 1043 { 1044 /* If user didn't require OA reports, ask HW not to emit ctx switch reports */ 1045 return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS, 1046 stream->sample ? 1047 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); 1048 } 1049 1050 static u32 oag_buf_size_select(const struct xe_oa_stream *stream) 1051 { 1052 return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, 1053 stream->oa_buffer.bo->size > SZ_16M ? 1054 OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); 1055 } 1056 1057 static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) 1058 { 1059 struct xe_mmio *mmio = &stream->gt->mmio; 1060 u32 oa_debug, sqcnt1; 1061 int ret; 1062 1063 /* 1064 * Wa_1508761755:xehpsdv, dg2 1065 * EU NOA signals behave incorrectly if EU clock gating is enabled. 1066 * Disable thread stall DOP gating and EU DOP gating. 1067 */ 1068 if (stream->oa->xe->info.platform == XE_DG2) { 1069 xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, 1070 _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); 1071 xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, 1072 _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); 1073 } 1074 1075 /* Disable clk ratio reports */ 1076 oa_debug = OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | 1077 OAG_OA_DEBUG_INCLUDE_CLK_RATIO; 1078 1079 if (GRAPHICS_VER(stream->oa->xe) >= 20) 1080 oa_debug |= 1081 /* The three bits below are needed to get PEC counters running */ 1082 OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL | 1083 OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL | 1084 OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL; 1085 1086 xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, 1087 _MASKED_BIT_ENABLE(oa_debug) | 1088 oag_report_ctx_switches(stream) | 1089 oag_buf_size_select(stream) | 1090 oag_configure_mmio_trigger(stream, true)); 1091 1092 xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? 1093 (OAG_OAGLBCTXCTRL_COUNTER_RESUME | 1094 OAG_OAGLBCTXCTRL_TIMER_ENABLE | 1095 REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK, 1096 stream->period_exponent)) : 0); 1097 1098 /* 1099 * Initialize Super Queue Internal Cnt Register 1100 * Set PMON Enable in order to collect valid metrics 1101 * Enable bytes per clock reporting 1102 */ 1103 sqcnt1 = SQCNT1_PMON_ENABLE | 1104 (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); 1105 1106 xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1); 1107 1108 /* Configure OAR/OAC */ 1109 if (stream->exec_q) { 1110 ret = xe_oa_configure_oa_context(stream, true); 1111 if (ret) 1112 return ret; 1113 } 1114 1115 return xe_oa_emit_oa_config(stream, stream->oa_config); 1116 } 1117 1118 static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) 1119 { 1120 u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); 1121 u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); 1122 u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); 1123 u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); 1124 int idx; 1125 1126 for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { 1127 const struct xe_oa_format *f = &oa->oa_formats[idx]; 1128 1129 if (counter_size == f->counter_size && bc_report == f->bc_report && 1130 type == f->type && counter_sel == f->counter_select) { 1131 *name = idx; 1132 return 0; 1133 } 1134 } 1135 1136 return -EINVAL; 1137 } 1138 1139 static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, 1140 struct xe_oa_open_param *param) 1141 { 1142 if (value >= oa->oa_unit_ids) { 1143 drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); 1144 return -EINVAL; 1145 } 1146 param->oa_unit_id = value; 1147 return 0; 1148 } 1149 1150 static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, 1151 struct xe_oa_open_param *param) 1152 { 1153 param->sample = value; 1154 return 0; 1155 } 1156 1157 static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, 1158 struct xe_oa_open_param *param) 1159 { 1160 param->metric_set = value; 1161 return 0; 1162 } 1163 1164 static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, 1165 struct xe_oa_open_param *param) 1166 { 1167 int ret = decode_oa_format(oa, value, ¶m->oa_format); 1168 1169 if (ret) { 1170 drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); 1171 return ret; 1172 } 1173 return 0; 1174 } 1175 1176 static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, 1177 struct xe_oa_open_param *param) 1178 { 1179 #define OA_EXPONENT_MAX 31 1180 1181 if (value > OA_EXPONENT_MAX) { 1182 drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); 1183 return -EINVAL; 1184 } 1185 param->period_exponent = value; 1186 return 0; 1187 } 1188 1189 static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, 1190 struct xe_oa_open_param *param) 1191 { 1192 param->disabled = value; 1193 return 0; 1194 } 1195 1196 static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, 1197 struct xe_oa_open_param *param) 1198 { 1199 param->exec_queue_id = value; 1200 return 0; 1201 } 1202 1203 static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, 1204 struct xe_oa_open_param *param) 1205 { 1206 param->engine_instance = value; 1207 return 0; 1208 } 1209 1210 static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, 1211 struct xe_oa_open_param *param) 1212 { 1213 param->no_preempt = value; 1214 return 0; 1215 } 1216 1217 static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value, 1218 struct xe_oa_open_param *param) 1219 { 1220 param->num_syncs = value; 1221 return 0; 1222 } 1223 1224 static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value, 1225 struct xe_oa_open_param *param) 1226 { 1227 param->syncs_user = u64_to_user_ptr(value); 1228 return 0; 1229 } 1230 1231 static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value, 1232 struct xe_oa_open_param *param) 1233 { 1234 if (!is_power_of_2(value) || value < SZ_128K || value > SZ_128M) { 1235 drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n", value); 1236 return -EINVAL; 1237 } 1238 param->oa_buffer_size = value; 1239 return 0; 1240 } 1241 1242 static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value, 1243 struct xe_oa_open_param *param) 1244 { 1245 if (!value) { 1246 drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value); 1247 return -EINVAL; 1248 } 1249 param->wait_num_reports = value; 1250 return 0; 1251 } 1252 1253 static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, 1254 struct xe_oa_open_param *param) 1255 { 1256 return -EINVAL; 1257 } 1258 1259 typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, 1260 struct xe_oa_open_param *param); 1261 static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = { 1262 [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, 1263 [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, 1264 [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, 1265 [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, 1266 [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, 1267 [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, 1268 [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, 1269 [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, 1270 [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, 1271 [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, 1272 [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, 1273 [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size, 1274 [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports, 1275 }; 1276 1277 static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { 1278 [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_ret_inval, 1279 [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_ret_inval, 1280 [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, 1281 [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_ret_inval, 1282 [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_ret_inval, 1283 [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_ret_inval, 1284 [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_ret_inval, 1285 [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_ret_inval, 1286 [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval, 1287 [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, 1288 [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, 1289 [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval, 1290 [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval, 1291 }; 1292 1293 static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, 1294 u64 extension, struct xe_oa_open_param *param) 1295 { 1296 u64 __user *address = u64_to_user_ptr(extension); 1297 struct drm_xe_ext_set_property ext; 1298 int err; 1299 u32 idx; 1300 1301 err = __copy_from_user(&ext, address, sizeof(ext)); 1302 if (XE_IOCTL_DBG(oa->xe, err)) 1303 return -EFAULT; 1304 1305 BUILD_BUG_ON(ARRAY_SIZE(xe_oa_set_property_funcs_open) != 1306 ARRAY_SIZE(xe_oa_set_property_funcs_config)); 1307 1308 if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) || 1309 XE_IOCTL_DBG(oa->xe, ext.pad)) 1310 return -EINVAL; 1311 1312 idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open)); 1313 1314 if (from == XE_OA_USER_EXTN_FROM_CONFIG) 1315 return xe_oa_set_property_funcs_config[idx](oa, ext.value, param); 1316 else 1317 return xe_oa_set_property_funcs_open[idx](oa, ext.value, param); 1318 } 1319 1320 typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, enum xe_oa_user_extn_from from, 1321 u64 extension, struct xe_oa_open_param *param); 1322 static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { 1323 [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, 1324 }; 1325 1326 #define MAX_USER_EXTENSIONS 16 1327 static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from from, u64 extension, 1328 int ext_number, struct xe_oa_open_param *param) 1329 { 1330 u64 __user *address = u64_to_user_ptr(extension); 1331 struct drm_xe_user_extension ext; 1332 int err; 1333 u32 idx; 1334 1335 if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) 1336 return -E2BIG; 1337 1338 err = __copy_from_user(&ext, address, sizeof(ext)); 1339 if (XE_IOCTL_DBG(oa->xe, err)) 1340 return -EFAULT; 1341 1342 if (XE_IOCTL_DBG(oa->xe, ext.pad) || 1343 XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) 1344 return -EINVAL; 1345 1346 idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); 1347 err = xe_oa_user_extension_funcs[idx](oa, from, extension, param); 1348 if (XE_IOCTL_DBG(oa->xe, err)) 1349 return err; 1350 1351 if (ext.next_extension) 1352 return xe_oa_user_extensions(oa, from, ext.next_extension, ++ext_number, param); 1353 1354 return 0; 1355 } 1356 1357 static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) 1358 { 1359 int ret, num_syncs, num_ufence = 0; 1360 1361 if (param->num_syncs && !param->syncs_user) { 1362 drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n"); 1363 ret = -EINVAL; 1364 goto exit; 1365 } 1366 1367 if (param->num_syncs) { 1368 param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL); 1369 if (!param->syncs) { 1370 ret = -ENOMEM; 1371 goto exit; 1372 } 1373 } 1374 1375 for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { 1376 ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], 1377 ¶m->syncs_user[num_syncs], 0); 1378 if (ret) 1379 goto err_syncs; 1380 1381 if (xe_sync_is_ufence(¶m->syncs[num_syncs])) 1382 num_ufence++; 1383 } 1384 1385 if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) { 1386 ret = -EINVAL; 1387 goto err_syncs; 1388 } 1389 1390 return 0; 1391 1392 err_syncs: 1393 while (num_syncs--) 1394 xe_sync_entry_cleanup(¶m->syncs[num_syncs]); 1395 kfree(param->syncs); 1396 exit: 1397 return ret; 1398 } 1399 1400 static void xe_oa_stream_enable(struct xe_oa_stream *stream) 1401 { 1402 stream->pollin = false; 1403 1404 xe_oa_enable(stream); 1405 1406 if (stream->sample) 1407 hrtimer_start(&stream->poll_check_timer, 1408 ns_to_ktime(stream->poll_period_ns), 1409 HRTIMER_MODE_REL_PINNED); 1410 } 1411 1412 static void xe_oa_stream_disable(struct xe_oa_stream *stream) 1413 { 1414 xe_oa_disable(stream); 1415 1416 if (stream->sample) 1417 hrtimer_cancel(&stream->poll_check_timer); 1418 } 1419 1420 static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream) 1421 { 1422 struct xe_exec_queue *q = stream->exec_q; 1423 int ret1, ret2; 1424 1425 /* Best effort recovery: try to revert both to original, irrespective of error */ 1426 ret1 = q->ops->set_timeslice(q, stream->hwe->eclass->sched_props.timeslice_us); 1427 ret2 = q->ops->set_preempt_timeout(q, stream->hwe->eclass->sched_props.preempt_timeout_us); 1428 if (ret1 || ret2) 1429 goto err; 1430 return 0; 1431 err: 1432 drm_dbg(&stream->oa->xe->drm, "%s failed ret1 %d ret2 %d\n", __func__, ret1, ret2); 1433 return ret1 ?: ret2; 1434 } 1435 1436 static int xe_oa_disable_preempt_timeslice(struct xe_oa_stream *stream) 1437 { 1438 struct xe_exec_queue *q = stream->exec_q; 1439 int ret; 1440 1441 /* Setting values to 0 will disable timeslice and preempt_timeout */ 1442 ret = q->ops->set_timeslice(q, 0); 1443 if (ret) 1444 goto err; 1445 1446 ret = q->ops->set_preempt_timeout(q, 0); 1447 if (ret) 1448 goto err; 1449 1450 return 0; 1451 err: 1452 xe_oa_enable_preempt_timeslice(stream); 1453 drm_dbg(&stream->oa->xe->drm, "%s failed %d\n", __func__, ret); 1454 return ret; 1455 } 1456 1457 static int xe_oa_enable_locked(struct xe_oa_stream *stream) 1458 { 1459 if (stream->enabled) 1460 return 0; 1461 1462 if (stream->no_preempt) { 1463 int ret = xe_oa_disable_preempt_timeslice(stream); 1464 1465 if (ret) 1466 return ret; 1467 } 1468 1469 xe_oa_stream_enable(stream); 1470 1471 stream->enabled = true; 1472 return 0; 1473 } 1474 1475 static int xe_oa_disable_locked(struct xe_oa_stream *stream) 1476 { 1477 int ret = 0; 1478 1479 if (!stream->enabled) 1480 return 0; 1481 1482 xe_oa_stream_disable(stream); 1483 1484 if (stream->no_preempt) 1485 ret = xe_oa_enable_preempt_timeslice(stream); 1486 1487 stream->enabled = false; 1488 return ret; 1489 } 1490 1491 static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) 1492 { 1493 struct xe_oa_open_param param = {}; 1494 long ret = stream->oa_config->id; 1495 struct xe_oa_config *config; 1496 int err; 1497 1498 err = xe_oa_user_extensions(stream->oa, XE_OA_USER_EXTN_FROM_CONFIG, arg, 0, ¶m); 1499 if (err) 1500 return err; 1501 1502 config = xe_oa_get_oa_config(stream->oa, param.metric_set); 1503 if (!config) 1504 return -ENODEV; 1505 1506 param.xef = stream->xef; 1507 err = xe_oa_parse_syncs(stream->oa, ¶m); 1508 if (err) 1509 goto err_config_put; 1510 1511 stream->num_syncs = param.num_syncs; 1512 stream->syncs = param.syncs; 1513 1514 err = xe_oa_emit_oa_config(stream, config); 1515 if (!err) { 1516 config = xchg(&stream->oa_config, config); 1517 drm_dbg(&stream->oa->xe->drm, "changed to oa config uuid=%s\n", 1518 stream->oa_config->uuid); 1519 } 1520 1521 err_config_put: 1522 xe_oa_config_put(config); 1523 1524 return err ?: ret; 1525 } 1526 1527 static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) 1528 { 1529 struct drm_xe_oa_stream_status status = {}; 1530 void __user *uaddr = (void __user *)arg; 1531 1532 /* Map from register to uapi bits */ 1533 if (stream->oa_status & OASTATUS_REPORT_LOST) 1534 status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST; 1535 if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW) 1536 status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW; 1537 if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW) 1538 status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW; 1539 if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL) 1540 status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL; 1541 1542 if (copy_to_user(uaddr, &status, sizeof(status))) 1543 return -EFAULT; 1544 1545 return 0; 1546 } 1547 1548 static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) 1549 { 1550 struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; 1551 void __user *uaddr = (void __user *)arg; 1552 1553 if (copy_to_user(uaddr, &info, sizeof(info))) 1554 return -EFAULT; 1555 1556 return 0; 1557 } 1558 1559 static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, 1560 unsigned int cmd, 1561 unsigned long arg) 1562 { 1563 switch (cmd) { 1564 case DRM_XE_OBSERVATION_IOCTL_ENABLE: 1565 return xe_oa_enable_locked(stream); 1566 case DRM_XE_OBSERVATION_IOCTL_DISABLE: 1567 return xe_oa_disable_locked(stream); 1568 case DRM_XE_OBSERVATION_IOCTL_CONFIG: 1569 return xe_oa_config_locked(stream, arg); 1570 case DRM_XE_OBSERVATION_IOCTL_STATUS: 1571 return xe_oa_status_locked(stream, arg); 1572 case DRM_XE_OBSERVATION_IOCTL_INFO: 1573 return xe_oa_info_locked(stream, arg); 1574 } 1575 1576 return -EINVAL; 1577 } 1578 1579 static long xe_oa_ioctl(struct file *file, 1580 unsigned int cmd, 1581 unsigned long arg) 1582 { 1583 struct xe_oa_stream *stream = file->private_data; 1584 long ret; 1585 1586 mutex_lock(&stream->stream_lock); 1587 ret = xe_oa_ioctl_locked(stream, cmd, arg); 1588 mutex_unlock(&stream->stream_lock); 1589 1590 return ret; 1591 } 1592 1593 static void xe_oa_destroy_locked(struct xe_oa_stream *stream) 1594 { 1595 if (stream->enabled) 1596 xe_oa_disable_locked(stream); 1597 1598 xe_oa_stream_destroy(stream); 1599 1600 if (stream->exec_q) 1601 xe_exec_queue_put(stream->exec_q); 1602 1603 kfree(stream); 1604 } 1605 1606 static int xe_oa_release(struct inode *inode, struct file *file) 1607 { 1608 struct xe_oa_stream *stream = file->private_data; 1609 struct xe_gt *gt = stream->gt; 1610 1611 xe_pm_runtime_get(gt_to_xe(gt)); 1612 mutex_lock(>->oa.gt_lock); 1613 xe_oa_destroy_locked(stream); 1614 mutex_unlock(>->oa.gt_lock); 1615 xe_pm_runtime_put(gt_to_xe(gt)); 1616 1617 /* Release the reference the OA stream kept on the driver */ 1618 drm_dev_put(>_to_xe(gt)->drm); 1619 1620 return 0; 1621 } 1622 1623 static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) 1624 { 1625 struct xe_oa_stream *stream = file->private_data; 1626 struct xe_bo *bo = stream->oa_buffer.bo; 1627 unsigned long start = vma->vm_start; 1628 int i, ret; 1629 1630 if (xe_observation_paranoid && !perfmon_capable()) { 1631 drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n"); 1632 return -EACCES; 1633 } 1634 1635 /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ 1636 if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { 1637 drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); 1638 return -EINVAL; 1639 } 1640 1641 /* 1642 * Only support VM_READ, enforce MAP_PRIVATE by checking for 1643 * VM_MAYSHARE, don't copy the vma on fork 1644 */ 1645 if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_SHARED | VM_MAYSHARE)) { 1646 drm_dbg(&stream->oa->xe->drm, "mmap must be read only\n"); 1647 return -EINVAL; 1648 } 1649 vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, 1650 VM_MAYWRITE | VM_MAYEXEC); 1651 1652 xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); 1653 for (i = 0; i < bo->ttm.ttm->num_pages; i++) { 1654 ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), 1655 PAGE_SIZE, vma->vm_page_prot); 1656 if (ret) 1657 break; 1658 1659 start += PAGE_SIZE; 1660 } 1661 1662 return ret; 1663 } 1664 1665 static const struct file_operations xe_oa_fops = { 1666 .owner = THIS_MODULE, 1667 .release = xe_oa_release, 1668 .poll = xe_oa_poll, 1669 .read = xe_oa_read, 1670 .unlocked_ioctl = xe_oa_ioctl, 1671 .mmap = xe_oa_mmap, 1672 }; 1673 1674 static int xe_oa_stream_init(struct xe_oa_stream *stream, 1675 struct xe_oa_open_param *param) 1676 { 1677 struct xe_oa_unit *u = param->hwe->oa_unit; 1678 struct xe_gt *gt = param->hwe->gt; 1679 unsigned int fw_ref; 1680 int ret; 1681 1682 stream->exec_q = param->exec_q; 1683 stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; 1684 stream->hwe = param->hwe; 1685 stream->gt = stream->hwe->gt; 1686 stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; 1687 1688 stream->sample = param->sample; 1689 stream->periodic = param->period_exponent > 0; 1690 stream->period_exponent = param->period_exponent; 1691 stream->no_preempt = param->no_preempt; 1692 stream->wait_num_reports = param->wait_num_reports; 1693 1694 stream->xef = xe_file_get(param->xef); 1695 stream->num_syncs = param->num_syncs; 1696 stream->syncs = param->syncs; 1697 1698 /* 1699 * For Xe2+, when overrun mode is enabled, there are no partial reports at the end 1700 * of buffer, making the OA buffer effectively a non-power-of-2 size circular 1701 * buffer whose size, circ_size, is a multiple of the report size 1702 */ 1703 if (GRAPHICS_VER(stream->oa->xe) >= 20 && 1704 stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) 1705 stream->oa_buffer.circ_size = 1706 param->oa_buffer_size - 1707 param->oa_buffer_size % stream->oa_buffer.format->size; 1708 else 1709 stream->oa_buffer.circ_size = param->oa_buffer_size; 1710 1711 stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); 1712 if (!stream->oa_config) { 1713 drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set); 1714 ret = -EINVAL; 1715 goto exit; 1716 } 1717 1718 /* 1719 * Wa_1509372804:pvc 1720 * 1721 * GuC reset of engines causes OA to lose configuration 1722 * state. Prevent this by overriding GUCRC mode. 1723 */ 1724 if (stream->oa->xe->info.platform == XE_PVC) { 1725 ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, 1726 SLPC_GUCRC_MODE_GUCRC_NO_RC6); 1727 if (ret) 1728 goto err_free_configs; 1729 1730 stream->override_gucrc = true; 1731 } 1732 1733 /* Take runtime pm ref and forcewake to disable RC6 */ 1734 xe_pm_runtime_get(stream->oa->xe); 1735 fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); 1736 if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { 1737 ret = -ETIMEDOUT; 1738 goto err_fw_put; 1739 } 1740 1741 ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size); 1742 if (ret) 1743 goto err_fw_put; 1744 1745 stream->k_exec_q = xe_exec_queue_create(stream->oa->xe, NULL, 1746 BIT(stream->hwe->logical_instance), 1, 1747 stream->hwe, EXEC_QUEUE_FLAG_KERNEL, 0); 1748 if (IS_ERR(stream->k_exec_q)) { 1749 ret = PTR_ERR(stream->k_exec_q); 1750 drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_exec_queue_create failed=%d", 1751 stream->gt->info.id, stream->hwe->name, ret); 1752 goto err_free_oa_buf; 1753 } 1754 1755 ret = xe_oa_enable_metric_set(stream); 1756 if (ret) { 1757 drm_dbg(&stream->oa->xe->drm, "Unable to enable metric set\n"); 1758 goto err_put_k_exec_q; 1759 } 1760 1761 drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n", 1762 stream->oa_config->uuid); 1763 1764 WRITE_ONCE(u->exclusive_stream, stream); 1765 1766 hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 1767 stream->poll_check_timer.function = xe_oa_poll_check_timer_cb; 1768 init_waitqueue_head(&stream->poll_wq); 1769 1770 spin_lock_init(&stream->oa_buffer.ptr_lock); 1771 mutex_init(&stream->stream_lock); 1772 1773 return 0; 1774 1775 err_put_k_exec_q: 1776 xe_oa_disable_metric_set(stream); 1777 xe_exec_queue_put(stream->k_exec_q); 1778 err_free_oa_buf: 1779 xe_oa_free_oa_buffer(stream); 1780 err_fw_put: 1781 xe_force_wake_put(gt_to_fw(gt), fw_ref); 1782 xe_pm_runtime_put(stream->oa->xe); 1783 if (stream->override_gucrc) 1784 xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); 1785 err_free_configs: 1786 xe_oa_free_configs(stream); 1787 exit: 1788 xe_file_put(stream->xef); 1789 return ret; 1790 } 1791 1792 static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, 1793 struct xe_oa_open_param *param) 1794 { 1795 struct xe_oa_stream *stream; 1796 int stream_fd; 1797 int ret; 1798 1799 /* We currently only allow exclusive access */ 1800 if (param->hwe->oa_unit->exclusive_stream) { 1801 drm_dbg(&oa->xe->drm, "OA unit already in use\n"); 1802 ret = -EBUSY; 1803 goto exit; 1804 } 1805 1806 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 1807 if (!stream) { 1808 ret = -ENOMEM; 1809 goto exit; 1810 } 1811 1812 stream->oa = oa; 1813 ret = xe_oa_stream_init(stream, param); 1814 if (ret) 1815 goto err_free; 1816 1817 if (!param->disabled) { 1818 ret = xe_oa_enable_locked(stream); 1819 if (ret) 1820 goto err_destroy; 1821 } 1822 1823 stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0); 1824 if (stream_fd < 0) { 1825 ret = stream_fd; 1826 goto err_disable; 1827 } 1828 1829 /* Hold a reference on the drm device till stream_fd is released */ 1830 drm_dev_get(&stream->oa->xe->drm); 1831 1832 return stream_fd; 1833 err_disable: 1834 if (!param->disabled) 1835 xe_oa_disable_locked(stream); 1836 err_destroy: 1837 xe_oa_stream_destroy(stream); 1838 err_free: 1839 kfree(stream); 1840 exit: 1841 return ret; 1842 } 1843 1844 /** 1845 * xe_oa_timestamp_frequency - Return OA timestamp frequency 1846 * @gt: @xe_gt 1847 * 1848 * OA timestamp frequency = CS timestamp frequency in most platforms. On some 1849 * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such 1850 * cases, return the adjusted CS timestamp frequency to the user. 1851 */ 1852 u32 xe_oa_timestamp_frequency(struct xe_gt *gt) 1853 { 1854 u32 reg, shift; 1855 1856 /* 1857 * Wa_18013179988:dg2 1858 * Wa_14015568240:pvc 1859 * Wa_14015846243:mtl 1860 */ 1861 switch (gt_to_xe(gt)->info.platform) { 1862 case XE_DG2: 1863 case XE_PVC: 1864 case XE_METEORLAKE: 1865 xe_pm_runtime_get(gt_to_xe(gt)); 1866 reg = xe_mmio_read32(>->mmio, RPM_CONFIG0); 1867 xe_pm_runtime_put(gt_to_xe(gt)); 1868 1869 shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); 1870 return gt->info.reference_clock << (3 - shift); 1871 1872 default: 1873 return gt->info.reference_clock; 1874 } 1875 } 1876 1877 static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) 1878 { 1879 u64 nom = (2ULL << exponent) * NSEC_PER_SEC; 1880 u32 den = xe_oa_timestamp_frequency(gt); 1881 1882 return div_u64(nom + den - 1, den); 1883 } 1884 1885 static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) 1886 { 1887 switch (hwe->oa_unit->type) { 1888 case DRM_XE_OA_UNIT_TYPE_OAG: 1889 return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || 1890 type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; 1891 case DRM_XE_OA_UNIT_TYPE_OAM: 1892 return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; 1893 default: 1894 return false; 1895 } 1896 } 1897 1898 /** 1899 * xe_oa_unit_id - Return OA unit ID for a hardware engine 1900 * @hwe: @xe_hw_engine 1901 * 1902 * Return OA unit ID for a hardware engine when available 1903 */ 1904 u16 xe_oa_unit_id(struct xe_hw_engine *hwe) 1905 { 1906 return hwe->oa_unit && hwe->oa_unit->num_engines ? 1907 hwe->oa_unit->oa_unit_id : U16_MAX; 1908 } 1909 1910 static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) 1911 { 1912 struct xe_gt *gt; 1913 int i, ret = 0; 1914 1915 if (param->exec_q) { 1916 /* When we have an exec_q, get hwe from the exec_q */ 1917 param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, 1918 param->engine_instance, true); 1919 } else { 1920 struct xe_hw_engine *hwe; 1921 enum xe_hw_engine_id id; 1922 1923 /* Else just get the first hwe attached to the oa unit */ 1924 for_each_gt(gt, oa->xe, i) { 1925 for_each_hw_engine(hwe, gt, id) { 1926 if (xe_oa_unit_id(hwe) == param->oa_unit_id) { 1927 param->hwe = hwe; 1928 goto out; 1929 } 1930 } 1931 } 1932 } 1933 out: 1934 if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { 1935 drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", 1936 param->exec_q ? param->exec_q->class : -1, 1937 param->engine_instance, param->oa_unit_id); 1938 ret = -EINVAL; 1939 } 1940 1941 return ret; 1942 } 1943 1944 /** 1945 * xe_oa_stream_open_ioctl - Opens an OA stream 1946 * @dev: @drm_device 1947 * @data: pointer to struct @drm_xe_oa_config 1948 * @file: @drm_file 1949 * 1950 * The functions opens an OA stream. An OA stream, opened with specified 1951 * properties, enables OA counter samples to be collected, either 1952 * periodically (time based sampling), or on request (using OA queries) 1953 */ 1954 int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) 1955 { 1956 struct xe_device *xe = to_xe_device(dev); 1957 struct xe_oa *oa = &xe->oa; 1958 struct xe_file *xef = to_xe_file(file); 1959 struct xe_oa_open_param param = {}; 1960 const struct xe_oa_format *f; 1961 bool privileged_op = true; 1962 int ret; 1963 1964 if (!oa->xe) { 1965 drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); 1966 return -ENODEV; 1967 } 1968 1969 param.xef = xef; 1970 ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, ¶m); 1971 if (ret) 1972 return ret; 1973 1974 if (param.exec_queue_id > 0) { 1975 param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); 1976 if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) 1977 return -ENOENT; 1978 1979 if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) 1980 return -EOPNOTSUPP; 1981 } 1982 1983 /* 1984 * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC, 1985 * without global stream access, can be an unprivileged operation 1986 */ 1987 if (param.exec_q && !param.sample) 1988 privileged_op = false; 1989 1990 if (param.no_preempt) { 1991 if (!param.exec_q) { 1992 drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n"); 1993 ret = -EINVAL; 1994 goto err_exec_q; 1995 } 1996 privileged_op = true; 1997 } 1998 1999 if (privileged_op && xe_observation_paranoid && !perfmon_capable()) { 2000 drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n"); 2001 ret = -EACCES; 2002 goto err_exec_q; 2003 } 2004 2005 if (!param.exec_q && !param.sample) { 2006 drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n"); 2007 ret = -EINVAL; 2008 goto err_exec_q; 2009 } 2010 2011 ret = xe_oa_assign_hwe(oa, ¶m); 2012 if (ret) 2013 goto err_exec_q; 2014 2015 f = &oa->oa_formats[param.oa_format]; 2016 if (!param.oa_format || !f->size || 2017 !engine_supports_oa_format(param.hwe, f->type)) { 2018 drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", 2019 param.oa_format, f->type, f->size, param.hwe->class); 2020 ret = -EINVAL; 2021 goto err_exec_q; 2022 } 2023 2024 if (param.period_exponent > 0) { 2025 u64 oa_period, oa_freq_hz; 2026 2027 /* Requesting samples from OAG buffer is a privileged operation */ 2028 if (!param.sample) { 2029 drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n"); 2030 ret = -EINVAL; 2031 goto err_exec_q; 2032 } 2033 oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent); 2034 oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); 2035 drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); 2036 } 2037 2038 if (!param.oa_buffer_size) 2039 param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE; 2040 2041 if (!param.wait_num_reports) 2042 param.wait_num_reports = 1; 2043 if (param.wait_num_reports > param.oa_buffer_size / f->size) { 2044 drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports); 2045 ret = -EINVAL; 2046 goto err_exec_q; 2047 } 2048 2049 ret = xe_oa_parse_syncs(oa, ¶m); 2050 if (ret) 2051 goto err_exec_q; 2052 2053 mutex_lock(¶m.hwe->gt->oa.gt_lock); 2054 ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); 2055 mutex_unlock(¶m.hwe->gt->oa.gt_lock); 2056 if (ret < 0) 2057 goto err_sync_cleanup; 2058 2059 return ret; 2060 2061 err_sync_cleanup: 2062 while (param.num_syncs--) 2063 xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); 2064 kfree(param.syncs); 2065 err_exec_q: 2066 if (param.exec_q) 2067 xe_exec_queue_put(param.exec_q); 2068 return ret; 2069 } 2070 2071 static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) 2072 { 2073 static const struct xe_reg flex_eu_regs[] = { 2074 EU_PERF_CNTL0, 2075 EU_PERF_CNTL1, 2076 EU_PERF_CNTL2, 2077 EU_PERF_CNTL3, 2078 EU_PERF_CNTL4, 2079 EU_PERF_CNTL5, 2080 EU_PERF_CNTL6, 2081 }; 2082 int i; 2083 2084 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { 2085 if (flex_eu_regs[i].addr == addr) 2086 return true; 2087 } 2088 return false; 2089 } 2090 2091 static bool xe_oa_reg_in_range_table(u32 addr, const struct xe_mmio_range *table) 2092 { 2093 while (table->start && table->end) { 2094 if (addr >= table->start && addr <= table->end) 2095 return true; 2096 2097 table++; 2098 } 2099 2100 return false; 2101 } 2102 2103 static const struct xe_mmio_range xehp_oa_b_counters[] = { 2104 { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ 2105 { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ 2106 {} 2107 }; 2108 2109 static const struct xe_mmio_range gen12_oa_b_counters[] = { 2110 { .start = 0x2b2c, .end = 0x2b2c }, /* OAG_OA_PESS */ 2111 { .start = 0xd900, .end = 0xd91c }, /* OAG_OASTARTTRIG[1-8] */ 2112 { .start = 0xd920, .end = 0xd93c }, /* OAG_OAREPORTTRIG1[1-8] */ 2113 { .start = 0xd940, .end = 0xd97c }, /* OAG_CEC[0-7][0-1] */ 2114 { .start = 0xdc00, .end = 0xdc3c }, /* OAG_SCEC[0-7][0-1] */ 2115 { .start = 0xdc40, .end = 0xdc40 }, /* OAG_SPCTR_CNF */ 2116 { .start = 0xdc44, .end = 0xdc44 }, /* OAA_DBG_REG */ 2117 {} 2118 }; 2119 2120 static const struct xe_mmio_range mtl_oam_b_counters[] = { 2121 { .start = 0x393000, .end = 0x39301c }, /* OAM_STARTTRIG1[1-8] */ 2122 { .start = 0x393020, .end = 0x39303c }, /* OAM_REPORTTRIG1[1-8] */ 2123 { .start = 0x393040, .end = 0x39307c }, /* OAM_CEC[0-7][0-1] */ 2124 { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ 2125 {} 2126 }; 2127 2128 static const struct xe_mmio_range xe2_oa_b_counters[] = { 2129 { .start = 0x393200, .end = 0x39323C }, /* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */ 2130 { .start = 0x394200, .end = 0x39423C }, /* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */ 2131 { .start = 0x394A00, .end = 0x394A3C }, /* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */ 2132 {}, 2133 }; 2134 2135 static bool xe_oa_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr) 2136 { 2137 return xe_oa_reg_in_range_table(addr, xehp_oa_b_counters) || 2138 xe_oa_reg_in_range_table(addr, gen12_oa_b_counters) || 2139 xe_oa_reg_in_range_table(addr, mtl_oam_b_counters) || 2140 (GRAPHICS_VER(oa->xe) >= 20 && 2141 xe_oa_reg_in_range_table(addr, xe2_oa_b_counters)); 2142 } 2143 2144 static const struct xe_mmio_range mtl_oa_mux_regs[] = { 2145 { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ 2146 { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ 2147 { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ 2148 { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ 2149 { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ 2150 {} 2151 }; 2152 2153 static const struct xe_mmio_range gen12_oa_mux_regs[] = { 2154 { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ 2155 { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ 2156 { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ 2157 { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ 2158 { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */ 2159 {} 2160 }; 2161 2162 static const struct xe_mmio_range xe2_oa_mux_regs[] = { 2163 { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ 2164 { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ 2165 { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ 2166 { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ 2167 { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ 2168 { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ 2169 { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ 2170 {}, 2171 }; 2172 2173 static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr) 2174 { 2175 if (GRAPHICS_VER(oa->xe) >= 20) 2176 return xe_oa_reg_in_range_table(addr, xe2_oa_mux_regs); 2177 else if (GRAPHICS_VERx100(oa->xe) >= 1270) 2178 return xe_oa_reg_in_range_table(addr, mtl_oa_mux_regs); 2179 else 2180 return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs); 2181 } 2182 2183 static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr) 2184 { 2185 return xe_oa_is_valid_flex_addr(oa, addr) || 2186 xe_oa_is_valid_b_counter_addr(oa, addr) || 2187 xe_oa_is_valid_mux_addr(oa, addr); 2188 } 2189 2190 static struct xe_oa_reg * 2191 xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), 2192 u32 __user *regs, u32 n_regs) 2193 { 2194 struct xe_oa_reg *oa_regs; 2195 int err; 2196 u32 i; 2197 2198 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); 2199 if (!oa_regs) 2200 return ERR_PTR(-ENOMEM); 2201 2202 for (i = 0; i < n_regs; i++) { 2203 u32 addr, value; 2204 2205 err = get_user(addr, regs); 2206 if (err) 2207 goto addr_err; 2208 2209 if (!is_valid(oa, addr)) { 2210 drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr); 2211 err = -EINVAL; 2212 goto addr_err; 2213 } 2214 2215 err = get_user(value, regs + 1); 2216 if (err) 2217 goto addr_err; 2218 2219 oa_regs[i].addr = XE_REG(addr); 2220 oa_regs[i].value = value; 2221 2222 regs += 2; 2223 } 2224 2225 return oa_regs; 2226 2227 addr_err: 2228 kfree(oa_regs); 2229 return ERR_PTR(err); 2230 } 2231 2232 static ssize_t show_dynamic_id(struct kobject *kobj, 2233 struct kobj_attribute *attr, 2234 char *buf) 2235 { 2236 struct xe_oa_config *oa_config = 2237 container_of(attr, typeof(*oa_config), sysfs_metric_id); 2238 2239 return sysfs_emit(buf, "%d\n", oa_config->id); 2240 } 2241 2242 static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa, 2243 struct xe_oa_config *oa_config) 2244 { 2245 sysfs_attr_init(&oa_config->sysfs_metric_id.attr); 2246 oa_config->sysfs_metric_id.attr.name = "id"; 2247 oa_config->sysfs_metric_id.attr.mode = 0444; 2248 oa_config->sysfs_metric_id.show = show_dynamic_id; 2249 oa_config->sysfs_metric_id.store = NULL; 2250 2251 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; 2252 oa_config->attrs[1] = NULL; 2253 2254 oa_config->sysfs_metric.name = oa_config->uuid; 2255 oa_config->sysfs_metric.attrs = oa_config->attrs; 2256 2257 return sysfs_create_group(oa->metrics_kobj, &oa_config->sysfs_metric); 2258 } 2259 2260 /** 2261 * xe_oa_add_config_ioctl - Adds one OA config 2262 * @dev: @drm_device 2263 * @data: pointer to struct @drm_xe_oa_config 2264 * @file: @drm_file 2265 * 2266 * The functions adds an OA config to the set of OA configs maintained in 2267 * the kernel. The config determines which OA metrics are collected for an 2268 * OA stream. 2269 */ 2270 int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) 2271 { 2272 struct xe_device *xe = to_xe_device(dev); 2273 struct xe_oa *oa = &xe->oa; 2274 struct drm_xe_oa_config param; 2275 struct drm_xe_oa_config *arg = ¶m; 2276 struct xe_oa_config *oa_config, *tmp; 2277 struct xe_oa_reg *regs; 2278 int err, id; 2279 2280 if (!oa->xe) { 2281 drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); 2282 return -ENODEV; 2283 } 2284 2285 if (xe_observation_paranoid && !perfmon_capable()) { 2286 drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); 2287 return -EACCES; 2288 } 2289 2290 err = __copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); 2291 if (XE_IOCTL_DBG(oa->xe, err)) 2292 return -EFAULT; 2293 2294 if (XE_IOCTL_DBG(oa->xe, arg->extensions) || 2295 XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) || 2296 XE_IOCTL_DBG(oa->xe, !arg->n_regs)) 2297 return -EINVAL; 2298 2299 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); 2300 if (!oa_config) 2301 return -ENOMEM; 2302 2303 oa_config->oa = oa; 2304 kref_init(&oa_config->ref); 2305 2306 if (!uuid_is_valid(arg->uuid)) { 2307 drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n"); 2308 err = -EINVAL; 2309 goto reg_err; 2310 } 2311 2312 /* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */ 2313 memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid)); 2314 2315 oa_config->regs_len = arg->n_regs; 2316 regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr, 2317 u64_to_user_ptr(arg->regs_ptr), 2318 arg->n_regs); 2319 if (IS_ERR(regs)) { 2320 drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n"); 2321 err = PTR_ERR(regs); 2322 goto reg_err; 2323 } 2324 oa_config->regs = regs; 2325 2326 err = mutex_lock_interruptible(&oa->metrics_lock); 2327 if (err) 2328 goto reg_err; 2329 2330 /* We shouldn't have too many configs, so this iteration shouldn't be too costly */ 2331 idr_for_each_entry(&oa->metrics_idr, tmp, id) { 2332 if (!strcmp(tmp->uuid, oa_config->uuid)) { 2333 drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n"); 2334 err = -EADDRINUSE; 2335 goto sysfs_err; 2336 } 2337 } 2338 2339 err = create_dynamic_oa_sysfs_entry(oa, oa_config); 2340 if (err) { 2341 drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); 2342 goto sysfs_err; 2343 } 2344 2345 oa_config->id = idr_alloc(&oa->metrics_idr, oa_config, 1, 0, GFP_KERNEL); 2346 if (oa_config->id < 0) { 2347 drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); 2348 err = oa_config->id; 2349 goto sysfs_err; 2350 } 2351 2352 mutex_unlock(&oa->metrics_lock); 2353 2354 drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); 2355 2356 return oa_config->id; 2357 2358 sysfs_err: 2359 mutex_unlock(&oa->metrics_lock); 2360 reg_err: 2361 xe_oa_config_put(oa_config); 2362 drm_dbg(&oa->xe->drm, "Failed to add new OA config\n"); 2363 return err; 2364 } 2365 2366 /** 2367 * xe_oa_remove_config_ioctl - Removes one OA config 2368 * @dev: @drm_device 2369 * @data: pointer to struct @drm_xe_observation_param 2370 * @file: @drm_file 2371 */ 2372 int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) 2373 { 2374 struct xe_device *xe = to_xe_device(dev); 2375 struct xe_oa *oa = &xe->oa; 2376 struct xe_oa_config *oa_config; 2377 u64 arg, *ptr = u64_to_user_ptr(data); 2378 int ret; 2379 2380 if (!oa->xe) { 2381 drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); 2382 return -ENODEV; 2383 } 2384 2385 if (xe_observation_paranoid && !perfmon_capable()) { 2386 drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n"); 2387 return -EACCES; 2388 } 2389 2390 ret = get_user(arg, ptr); 2391 if (XE_IOCTL_DBG(oa->xe, ret)) 2392 return ret; 2393 2394 ret = mutex_lock_interruptible(&oa->metrics_lock); 2395 if (ret) 2396 return ret; 2397 2398 oa_config = idr_find(&oa->metrics_idr, arg); 2399 if (!oa_config) { 2400 drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n"); 2401 ret = -ENOENT; 2402 goto err_unlock; 2403 } 2404 2405 WARN_ON(arg != oa_config->id); 2406 2407 sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric); 2408 idr_remove(&oa->metrics_idr, arg); 2409 2410 mutex_unlock(&oa->metrics_lock); 2411 2412 drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); 2413 2414 xe_oa_config_put(oa_config); 2415 2416 return 0; 2417 2418 err_unlock: 2419 mutex_unlock(&oa->metrics_lock); 2420 return ret; 2421 } 2422 2423 /** 2424 * xe_oa_register - Xe OA registration 2425 * @xe: @xe_device 2426 * 2427 * Exposes the metrics sysfs directory upon completion of module initialization 2428 */ 2429 void xe_oa_register(struct xe_device *xe) 2430 { 2431 struct xe_oa *oa = &xe->oa; 2432 2433 if (!oa->xe) 2434 return; 2435 2436 oa->metrics_kobj = kobject_create_and_add("metrics", 2437 &xe->drm.primary->kdev->kobj); 2438 } 2439 2440 /** 2441 * xe_oa_unregister - Xe OA de-registration 2442 * @xe: @xe_device 2443 */ 2444 void xe_oa_unregister(struct xe_device *xe) 2445 { 2446 struct xe_oa *oa = &xe->oa; 2447 2448 if (!oa->metrics_kobj) 2449 return; 2450 2451 kobject_put(oa->metrics_kobj); 2452 oa->metrics_kobj = NULL; 2453 } 2454 2455 static u32 num_oa_units_per_gt(struct xe_gt *gt) 2456 { 2457 return 1; 2458 } 2459 2460 static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) 2461 { 2462 if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { 2463 /* 2464 * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices 2465 * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA 2466 */ 2467 xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); 2468 2469 return 0; 2470 } 2471 2472 return XE_OA_UNIT_INVALID; 2473 } 2474 2475 static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) 2476 { 2477 switch (hwe->class) { 2478 case XE_ENGINE_CLASS_RENDER: 2479 case XE_ENGINE_CLASS_COMPUTE: 2480 return 0; 2481 2482 case XE_ENGINE_CLASS_VIDEO_DECODE: 2483 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 2484 return __hwe_oam_unit(hwe); 2485 2486 default: 2487 return XE_OA_UNIT_INVALID; 2488 } 2489 } 2490 2491 static struct xe_oa_regs __oam_regs(u32 base) 2492 { 2493 return (struct xe_oa_regs) { 2494 base, 2495 OAM_HEAD_POINTER(base), 2496 OAM_TAIL_POINTER(base), 2497 OAM_BUFFER(base), 2498 OAM_CONTEXT_CONTROL(base), 2499 OAM_CONTROL(base), 2500 OAM_DEBUG(base), 2501 OAM_STATUS(base), 2502 OAM_CONTROL_COUNTER_SEL_MASK, 2503 }; 2504 } 2505 2506 static struct xe_oa_regs __oag_regs(void) 2507 { 2508 return (struct xe_oa_regs) { 2509 0, 2510 OAG_OAHEADPTR, 2511 OAG_OATAILPTR, 2512 OAG_OABUFFER, 2513 OAG_OAGLBCTXCTRL, 2514 OAG_OACONTROL, 2515 OAG_OA_DEBUG, 2516 OAG_OASTATUS, 2517 OAG_OACONTROL_OA_COUNTER_SEL_MASK, 2518 }; 2519 } 2520 2521 static void __xe_oa_init_oa_units(struct xe_gt *gt) 2522 { 2523 const u32 mtl_oa_base[] = { 0x13000 }; 2524 int i, num_units = gt->oa.num_oa_units; 2525 2526 for (i = 0; i < num_units; i++) { 2527 struct xe_oa_unit *u = >->oa.oa_unit[i]; 2528 2529 if (gt->info.type != XE_GT_TYPE_MEDIA) { 2530 u->regs = __oag_regs(); 2531 u->type = DRM_XE_OA_UNIT_TYPE_OAG; 2532 } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { 2533 u->regs = __oam_regs(mtl_oa_base[i]); 2534 u->type = DRM_XE_OA_UNIT_TYPE_OAM; 2535 } 2536 2537 /* Ensure MMIO trigger remains disabled till there is a stream */ 2538 xe_mmio_write32(>->mmio, u->regs.oa_debug, 2539 oag_configure_mmio_trigger(NULL, false)); 2540 2541 /* Set oa_unit_ids now to ensure ids remain contiguous */ 2542 u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++; 2543 } 2544 } 2545 2546 static int xe_oa_init_gt(struct xe_gt *gt) 2547 { 2548 u32 num_oa_units = num_oa_units_per_gt(gt); 2549 struct xe_hw_engine *hwe; 2550 enum xe_hw_engine_id id; 2551 struct xe_oa_unit *u; 2552 2553 u = drmm_kcalloc(>_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL); 2554 if (!u) 2555 return -ENOMEM; 2556 2557 for_each_hw_engine(hwe, gt, id) { 2558 u32 index = __hwe_oa_unit(hwe); 2559 2560 hwe->oa_unit = NULL; 2561 if (index < num_oa_units) { 2562 u[index].num_engines++; 2563 hwe->oa_unit = &u[index]; 2564 } 2565 } 2566 2567 /* 2568 * Fused off engines can result in oa_unit's with num_engines == 0. These units 2569 * will appear in OA unit query, but no OA streams can be opened on them. 2570 */ 2571 gt->oa.num_oa_units = num_oa_units; 2572 gt->oa.oa_unit = u; 2573 2574 __xe_oa_init_oa_units(gt); 2575 2576 drmm_mutex_init(>_to_xe(gt)->drm, >->oa.gt_lock); 2577 2578 return 0; 2579 } 2580 2581 static int xe_oa_init_oa_units(struct xe_oa *oa) 2582 { 2583 struct xe_gt *gt; 2584 int i, ret; 2585 2586 for_each_gt(gt, oa->xe, i) { 2587 ret = xe_oa_init_gt(gt); 2588 if (ret) 2589 return ret; 2590 } 2591 2592 return 0; 2593 } 2594 2595 static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) 2596 { 2597 __set_bit(format, oa->format_mask); 2598 } 2599 2600 static void xe_oa_init_supported_formats(struct xe_oa *oa) 2601 { 2602 if (GRAPHICS_VER(oa->xe) >= 20) { 2603 /* Xe2+ */ 2604 oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); 2605 oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); 2606 oa_format_add(oa, XE_OA_FORMAT_PEC64u64); 2607 oa_format_add(oa, XE_OA_FORMAT_PEC64u64_B8_C8); 2608 oa_format_add(oa, XE_OA_FORMAT_PEC64u32); 2609 oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G1); 2610 oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G1); 2611 oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G2); 2612 oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G2); 2613 oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_32_G2_4); 2614 oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_4_G2_32); 2615 } else if (GRAPHICS_VERx100(oa->xe) >= 1270) { 2616 /* XE_METEORLAKE */ 2617 oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); 2618 oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); 2619 oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); 2620 oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); 2621 oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); 2622 oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); 2623 } else if (GRAPHICS_VERx100(oa->xe) >= 1255) { 2624 /* XE_DG2, XE_PVC */ 2625 oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); 2626 oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); 2627 oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); 2628 oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); 2629 } else { 2630 /* Gen12+ */ 2631 xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12); 2632 oa_format_add(oa, XE_OA_FORMAT_A12); 2633 oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8); 2634 oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8); 2635 oa_format_add(oa, XE_OA_FORMAT_C4_B8); 2636 } 2637 } 2638 2639 /** 2640 * xe_oa_init - OA initialization during device probe 2641 * @xe: @xe_device 2642 * 2643 * Return: 0 on success or a negative error code on failure 2644 */ 2645 int xe_oa_init(struct xe_device *xe) 2646 { 2647 struct xe_oa *oa = &xe->oa; 2648 int ret; 2649 2650 /* Support OA only with GuC submission and Gen12+ */ 2651 if (!xe_device_uc_enabled(xe) || GRAPHICS_VER(xe) < 12) 2652 return 0; 2653 2654 if (IS_SRIOV_VF(xe)) 2655 return 0; 2656 2657 oa->xe = xe; 2658 oa->oa_formats = oa_formats; 2659 2660 drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock); 2661 idr_init_base(&oa->metrics_idr, 1); 2662 2663 ret = xe_oa_init_oa_units(oa); 2664 if (ret) { 2665 drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); 2666 goto exit; 2667 } 2668 2669 xe_oa_init_supported_formats(oa); 2670 return 0; 2671 exit: 2672 oa->xe = NULL; 2673 return ret; 2674 } 2675 2676 static int destroy_config(int id, void *p, void *data) 2677 { 2678 xe_oa_config_put(p); 2679 return 0; 2680 } 2681 2682 /** 2683 * xe_oa_fini - OA de-initialization during device remove 2684 * @xe: @xe_device 2685 */ 2686 void xe_oa_fini(struct xe_device *xe) 2687 { 2688 struct xe_oa *oa = &xe->oa; 2689 2690 if (!oa->xe) 2691 return; 2692 2693 idr_for_each(&oa->metrics_idr, destroy_config, oa); 2694 idr_destroy(&oa->metrics_idr); 2695 2696 oa->xe = NULL; 2697 } 2698