Lines Matching +full:f +full:- +full:tile
1 // SPDX-License-Identifier: MIT
3 * Copyright © 2023-2024 Intel Corporation
149 return tail >= head ? tail - head :
150 tail + stream->oa_buffer.circ_size - head;
155 return ptr + n >= stream->oa_buffer.circ_size ?
156 ptr + n - stream->oa_buffer.circ_size : ptr + n;
164 kfree(oa_config->regs);
174 kref_put(&oa_config->ref, xe_oa_config_release);
179 return kref_get_unless_zero(&oa_config->ref) ? oa_config : NULL;
187 oa_config = idr_find(&oa->metrics_idr, metrics_set);
197 xe_oa_config_put(oa_bo->oa_config);
198 xe_bb_free(oa_bo->bb, last_fence);
204 return &stream->oa_unit->regs;
209 return xe_mmio_read32(&stream->gt->mmio, __oa_regs(stream)->oa_tail_ptr) &
214 ((__s)->oa_buffer.format->header == HDR_64_BIT)
246 u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
248 int report_size = stream->oa_buffer.format->size;
251 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
254 hw_tail -= gtt_offset;
261 partial_report_size = xe_oa_circ_diff(stream, hw_tail, stream->oa_buffer.tail);
277 while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) {
278 void *report = stream->oa_buffer.vaddr + tail;
287 drm_dbg(&stream->oa->xe->drm,
289 stream->oa_buffer.head, tail, hw_tail);
291 stream->oa_buffer.tail = tail;
293 available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head);
294 stream->pollin = available >= stream->wait_num_reports * report_size;
296 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
298 return stream->pollin;
307 wake_up(&stream->poll_wq);
309 hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns));
317 int report_size = stream->oa_buffer.format->size;
321 if ((count - *offset) < report_size)
322 return -ENOSPC;
326 oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
327 report_size_partial = oa_buf_end - report;
331 return -EFAULT;
334 if (copy_to_user(buf, stream->oa_buffer.vaddr,
335 report_size - report_size_partial))
336 return -EFAULT;
338 return -EFAULT;
349 int report_size = stream->oa_buffer.format->size;
350 u8 *oa_buf_base = stream->oa_buffer.vaddr;
351 u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
357 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
358 head = stream->oa_buffer.head;
359 tail = stream->oa_buffer.tail;
360 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
362 xe_assert(stream->oa->xe,
363 head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size);
373 if (!(stream->oa_buffer.circ_size % report_size)) {
378 u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size;
379 u32 part = oa_buf_end - report;
386 memset(oa_buf_base, 0, report_size - part);
392 struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr;
394 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
395 xe_mmio_write32(&stream->gt->mmio, oaheadptr,
397 stream->oa_buffer.head = head;
398 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
406 u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
407 int size_exponent = __ffs(xe_bo_size(stream->oa_buffer.bo));
409 struct xe_mmio *mmio = &stream->gt->mmio;
417 size_exponent > 24 ? size_exponent - 20 : size_exponent - 17);
419 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
421 xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0);
422 xe_mmio_write32(mmio, __oa_regs(stream)->oa_head_ptr,
424 stream->oa_buffer.head = 0;
429 xe_mmio_write32(mmio, __oa_regs(stream)->oa_buffer, oa_buf);
430 xe_mmio_write32(mmio, __oa_regs(stream)->oa_tail_ptr,
434 stream->oa_buffer.tail = 0;
436 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
439 memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
444 return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) |
445 REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) |
446 REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size);
453 if (stream->hwe->class != XE_ENGINE_CLASS_COMPUTE)
456 val = REG_FIELD_PREP(OAG_OACONTROL_OA_CCS_SELECT_MASK, stream->hwe->instance);
457 xe_assert(stream->oa->xe,
458 REG_FIELD_GET(OAG_OACONTROL_OA_CCS_SELECT_MASK, val) == stream->hwe->instance);
464 return stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ?
470 const struct xe_oa_format *format = stream->oa_buffer.format;
475 * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA
481 val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) |
484 if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
485 stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG)
488 xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val);
493 struct xe_mmio *mmio = &stream->gt->mmio;
495 xe_mmio_rmw32(mmio, __oa_regs(stream)->oa_ctrl, __oactrl_used_bits(stream), 0);
496 if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl,
498 drm_err(&stream->oa->xe->drm,
501 if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) {
505 drm_err(&stream->oa->xe->drm,
513 if (!stream->periodic)
514 return -EINVAL;
516 return wait_event_interruptible(stream->poll_wq,
526 /* Only clear our bits to avoid side-effects */
527 stream->oa_status = xe_mmio_rmw32(&stream->gt->mmio, __oa_regs(stream)->oa_status,
530 * Signal to userspace that there is non-zero OA status to read via
533 if (stream->oa_status & OASTATUS_RELEVANT_BITS)
534 return -EIO;
542 struct xe_oa_stream *stream = file->private_data;
547 if (!stream->enabled || !stream->sample)
548 return -EINVAL;
550 if (!(file->f_flags & O_NONBLOCK)) {
556 mutex_lock(&stream->stream_lock);
558 mutex_unlock(&stream->stream_lock);
562 mutex_lock(&stream->stream_lock);
564 mutex_unlock(&stream->stream_lock);
569 * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC,
573 * Also in case of -EIO, we have already waited for data before returning
574 * -EIO, so need to wait again
576 if (ret != -ENOSPC && ret != -EIO)
577 stream->pollin = false;
579 /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */
580 return offset ?: (ret ?: -EAGAIN);
588 poll_wait(file, &stream->poll_wq, wait);
596 if (stream->pollin)
604 struct xe_oa_stream *stream = file->private_data;
607 mutex_lock(&stream->stream_lock);
609 mutex_unlock(&stream->stream_lock);
616 if (q->vm) {
617 down_read(&q->vm->lock);
618 xe_vm_lock(q->vm, false);
624 if (q->vm) {
625 xe_vm_unlock(q->vm);
626 up_read(&q->vm->lock);
633 struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q;
645 job->ggtt = true;
648 for (int i = 0; i < stream->num_syncs && !err; i++)
649 err = xe_sync_entry_add_deps(&stream->syncs[i], job);
651 drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err);
657 fence = dma_fence_get(&job->drm.s_fence->finished);
678 u32 n_lri = min_t(u32, n_regs - i,
681 bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(n_lri);
683 bb->cs[bb->len++] = reg_data[i].addr.addr;
684 bb->cs[bb->len++] = reg_data[i].value;
702 xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
709 xe_oa_config_put(stream->oa_config);
710 llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
711 free_oa_config_bo(oa_bo, stream->last_fence);
712 dma_fence_put(stream->last_fence);
721 bb = xe_bb_new(stream->gt, 2 * count + 1, false);
746 const struct xe_oa_format *format = stream->oa_buffer.format;
752 OACTXCONTROL(stream->hwe->mmio_base),
760 RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
771 const struct xe_oa_format *format = stream->oa_buffer.format;
776 OACTXCONTROL(stream->hwe->mmio_base),
784 RING_CONTEXT_CONTROL(stream->hwe->mmio_base),
792 xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl,
800 switch (stream->hwe->class) {
816 enable && stream && stream->sample ?
822 struct xe_mmio *mmio = &stream->gt->mmio;
826 if (XE_GT_WA(stream->gt, 1508761755)) {
827 xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN,
829 xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2,
833 xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
837 if (stream->exec_q)
841 if (GT_VER(stream->gt) < 35)
845 (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
850 if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM ||
851 stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) &&
852 GRAPHICS_VER(stream->oa->xe) >= 30)
858 struct xe_oa_unit *u = stream->oa_unit;
859 struct xe_gt *gt = stream->hwe->gt;
861 if (WARN_ON(stream != u->exclusive_stream))
864 WRITE_ONCE(u->exclusive_stream, NULL);
866 mutex_destroy(&stream->stream_lock);
869 xe_exec_queue_put(stream->k_exec_q);
873 xe_force_wake_put(gt_to_fw(gt), stream->fw_ref);
874 xe_pm_runtime_put(stream->oa->xe);
877 if (stream->override_gucrc)
878 xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc));
881 xe_file_put(stream->xef);
888 bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile,
894 stream->oa_buffer.bo = bo;
896 xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0);
897 stream->oa_buffer.vaddr = bo->vmap.vaddr;
910 return ERR_PTR(-ENOMEM);
912 config_length = num_lri_dwords(oa_config->regs_len);
915 bb = xe_bb_new(stream->gt, config_length, false);
919 write_cs_mi_lri(bb, oa_config->regs, oa_config->regs_len);
921 oa_bo->bb = bb;
922 oa_bo->oa_config = xe_oa_config_get(oa_config);
923 llist_add(&oa_bo->node, &stream->oa_config_bos);
937 llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) {
938 if (oa_bo->oa_config == oa_config &&
939 memcmp(oa_bo->oa_config->uuid, oa_config->uuid,
940 sizeof(oa_config->uuid)) == 0)
951 dma_fence_put(stream->last_fence);
952 stream->last_fence = dma_fence_get(fence);
960 dma_fence_signal(&ofence->base);
961 dma_fence_put(&ofence->base);
971 INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn);
972 queue_delayed_work(system_unbound_wq, &ofence->work,
1002 err = -ENOMEM;
1013 fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb);
1020 spin_lock_init(&ofence->lock);
1021 dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0);
1023 for (i = 0; i < stream->num_syncs; i++) {
1024 if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL)
1026 xe_sync_entry_signal(&stream->syncs[i], &ofence->base);
1031 dma_fence_get(&ofence->base);
1036 /* Add job fence callback to schedule work to signal ofence->base */
1037 err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb);
1038 xe_gt_assert(stream->gt, !err || err == -ENOENT);
1039 if (err == -ENOENT)
1040 xe_oa_config_cb(fence, &ofence->cb);
1044 dma_fence_wait(&ofence->base, false);
1045 dma_fence_put(&ofence->base);
1049 for (i = 0; i < stream->num_syncs; i++)
1050 xe_sync_entry_cleanup(&stream->syncs[i]);
1051 kfree(stream->syncs);
1063 stream->sample ?
1070 xe_bo_size(stream->oa_buffer.bo) > SZ_16M ?
1076 struct xe_mmio *mmio = &stream->gt->mmio;
1084 if (XE_GT_WA(stream->gt, 1508761755)) {
1085 xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN,
1087 xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2,
1095 if (GRAPHICS_VER(stream->oa->xe) >= 20)
1102 xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
1108 xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
1112 stream->period_exponent)) : 0);
1120 (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
1123 if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM ||
1124 stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) &&
1125 GRAPHICS_VER(stream->oa->xe) >= 30)
1129 if (stream->exec_q) {
1135 return xe_oa_emit_oa_config(stream, stream->oa_config);
1146 for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) {
1147 const struct xe_oa_format *f = &oa->oa_formats[idx];
1149 if (counter_size == f->counter_size && bc_report == f->bc_report &&
1150 type == f->type && counter_sel == f->counter_select) {
1156 return -EINVAL;
1164 for_each_gt(gt, oa->xe, gt_id) {
1165 for (i = 0; i < gt->oa.num_oa_units; i++) {
1166 struct xe_oa_unit *u = >->oa.oa_unit[i];
1168 if (u->oa_unit_id == oa_unit_id)
1179 param->oa_unit = xe_oa_lookup_oa_unit(oa, value);
1180 if (!param->oa_unit) {
1181 drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
1182 return -EINVAL;
1190 param->sample = value;
1197 param->metric_set = value;
1204 int ret = decode_oa_format(oa, value, ¶m->oa_format);
1207 drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value);
1219 drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX);
1220 return -EINVAL;
1222 param->period_exponent = value;
1229 param->disabled = value;
1236 param->exec_queue_id = value;
1243 param->engine_instance = value;
1250 param->no_preempt = value;
1257 param->num_syncs = value;
1264 param->syncs_user = u64_to_user_ptr(value);
1272 drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n", value);
1273 return -EINVAL;
1275 param->oa_buffer_size = value;
1283 drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value);
1284 return -EINVAL;
1286 param->wait_num_reports = value;
1293 return -EINVAL;
1339 if (XE_IOCTL_DBG(oa->xe, err))
1340 return -EFAULT;
1345 if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) ||
1346 XE_IOCTL_DBG(oa->xe, ext.pad))
1347 return -EINVAL;
1372 if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS))
1373 return -E2BIG;
1376 if (XE_IOCTL_DBG(oa->xe, err))
1377 return -EFAULT;
1379 if (XE_IOCTL_DBG(oa->xe, ext.pad) ||
1380 XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs)))
1381 return -EINVAL;
1385 if (XE_IOCTL_DBG(oa->xe, err))
1400 if (param->num_syncs && !param->syncs_user) {
1401 drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n");
1402 ret = -EINVAL;
1406 if (param->num_syncs) {
1407 param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL);
1408 if (!param->syncs) {
1409 ret = -ENOMEM;
1414 for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) {
1415 ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs],
1416 ¶m->syncs_user[num_syncs],
1417 stream->ufence_syncobj,
1418 ++stream->ufence_timeline_value, 0);
1422 if (xe_sync_is_ufence(¶m->syncs[num_syncs]))
1426 if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) {
1427 ret = -EINVAL;
1434 while (num_syncs--)
1435 xe_sync_entry_cleanup(¶m->syncs[num_syncs]);
1436 kfree(param->syncs);
1443 stream->pollin = false;
1447 if (stream->sample)
1448 hrtimer_start(&stream->poll_check_timer,
1449 ns_to_ktime(stream->poll_period_ns),
1457 if (stream->sample)
1458 hrtimer_cancel(&stream->poll_check_timer);
1463 struct xe_exec_queue *q = stream->exec_q;
1467 ret1 = q->ops->set_timeslice(q, stream->hwe->eclass->sched_props.timeslice_us);
1468 ret2 = q->ops->set_preempt_timeout(q, stream->hwe->eclass->sched_props.preempt_timeout_us);
1473 drm_dbg(&stream->oa->xe->drm, "%s failed ret1 %d ret2 %d\n", __func__, ret1, ret2);
1479 struct xe_exec_queue *q = stream->exec_q;
1483 ret = q->ops->set_timeslice(q, 0);
1487 ret = q->ops->set_preempt_timeout(q, 0);
1494 drm_dbg(&stream->oa->xe->drm, "%s failed %d\n", __func__, ret);
1500 if (stream->enabled)
1503 if (stream->no_preempt) {
1512 stream->enabled = true;
1520 if (!stream->enabled)
1525 if (stream->no_preempt)
1528 stream->enabled = false;
1535 long ret = stream->oa_config->id;
1539 err = xe_oa_user_extensions(stream->oa, XE_OA_USER_EXTN_FROM_CONFIG, arg, 0, ¶m);
1543 config = xe_oa_get_oa_config(stream->oa, param.metric_set);
1545 return -ENODEV;
1547 param.xef = stream->xef;
1548 err = xe_oa_parse_syncs(stream->oa, stream, ¶m);
1552 stream->num_syncs = param.num_syncs;
1553 stream->syncs = param.syncs;
1557 config = xchg(&stream->oa_config, config);
1558 drm_dbg(&stream->oa->xe->drm, "changed to oa config uuid=%s\n",
1559 stream->oa_config->uuid);
1574 if (stream->oa_status & OASTATUS_REPORT_LOST)
1576 if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW)
1578 if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW)
1580 if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL)
1584 return -EFAULT;
1591 struct drm_xe_oa_stream_info info = { .oa_buf_size = xe_bo_size(stream->oa_buffer.bo), };
1595 return -EFAULT;
1617 return -EINVAL;
1624 struct xe_oa_stream *stream = file->private_data;
1627 mutex_lock(&stream->stream_lock);
1629 mutex_unlock(&stream->stream_lock);
1636 if (stream->enabled)
1641 if (stream->exec_q)
1642 xe_exec_queue_put(stream->exec_q);
1644 drm_syncobj_put(stream->ufence_syncobj);
1650 struct xe_oa_stream *stream = file->private_data;
1651 struct xe_gt *gt = stream->gt;
1654 mutex_lock(>->oa.gt_lock);
1656 mutex_unlock(>->oa.gt_lock);
1660 drm_dev_put(>_to_xe(gt)->drm);
1667 struct xe_oa_stream *stream = file->private_data;
1668 struct xe_bo *bo = stream->oa_buffer.bo;
1669 unsigned long start = vma->vm_start;
1673 drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n");
1674 return -EACCES;
1678 if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) {
1679 drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n");
1680 return -EINVAL;
1687 if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_SHARED | VM_MAYSHARE)) {
1688 drm_dbg(&stream->oa->xe->drm, "mmap must be read only\n");
1689 return -EINVAL;
1694 xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma));
1695 for (i = 0; i < bo->ttm.ttm->num_pages; i++) {
1696 ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]),
1697 PAGE_SIZE, vma->vm_page_prot);
1719 struct xe_gt *gt = param->hwe->gt;
1722 stream->exec_q = param->exec_q;
1723 stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS;
1724 stream->oa_unit = param->oa_unit;
1725 stream->hwe = param->hwe;
1726 stream->gt = stream->hwe->gt;
1727 stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format];
1729 stream->sample = param->sample;
1730 stream->periodic = param->period_exponent >= 0;
1731 stream->period_exponent = param->period_exponent;
1732 stream->no_preempt = param->no_preempt;
1733 stream->wait_num_reports = param->wait_num_reports;
1735 stream->xef = xe_file_get(param->xef);
1736 stream->num_syncs = param->num_syncs;
1737 stream->syncs = param->syncs;
1741 * of buffer, making the OA buffer effectively a non-power-of-2 size circular
1744 if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
1745 stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
1746 stream->oa_buffer.circ_size =
1747 param->oa_buffer_size -
1748 param->oa_buffer_size % stream->oa_buffer.format->size;
1750 stream->oa_buffer.circ_size = param->oa_buffer_size;
1752 stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set);
1753 if (!stream->oa_config) {
1754 drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set);
1755 ret = -EINVAL;
1763 if (XE_GT_WA(stream->gt, 1509372804)) {
1764 ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc,
1769 stream->override_gucrc = true;
1773 xe_pm_runtime_get(stream->oa->xe);
1774 stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
1775 if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FORCEWAKE_ALL)) {
1776 ret = -ETIMEDOUT;
1780 ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size);
1784 stream->k_exec_q = xe_exec_queue_create(stream->oa->xe, NULL,
1785 BIT(stream->hwe->logical_instance), 1,
1786 stream->hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
1787 if (IS_ERR(stream->k_exec_q)) {
1788 ret = PTR_ERR(stream->k_exec_q);
1789 drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_exec_queue_create failed=%d",
1790 stream->gt->info.id, stream->hwe->name, ret);
1796 drm_dbg(&stream->oa->xe->drm, "Unable to enable metric set\n");
1800 drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n",
1801 stream->oa_config->uuid);
1803 WRITE_ONCE(stream->oa_unit->exclusive_stream, stream);
1805 hrtimer_setup(&stream->poll_check_timer, xe_oa_poll_check_timer_cb, CLOCK_MONOTONIC,
1807 init_waitqueue_head(&stream->poll_wq);
1809 spin_lock_init(&stream->oa_buffer.ptr_lock);
1810 mutex_init(&stream->stream_lock);
1816 xe_exec_queue_put(stream->k_exec_q);
1820 xe_force_wake_put(gt_to_fw(gt), stream->fw_ref);
1821 xe_pm_runtime_put(stream->oa->xe);
1822 if (stream->override_gucrc)
1823 xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc));
1827 xe_file_put(stream->xef);
1840 if (param->oa_unit->exclusive_stream) {
1841 drm_dbg(&oa->xe->drm, "OA unit already in use\n");
1842 ret = -EBUSY;
1853 ret = -ENOMEM;
1856 stream->ufence_syncobj = ufence_syncobj;
1857 stream->oa = oa;
1865 while (param->num_syncs--)
1866 xe_sync_entry_cleanup(¶m->syncs[param->num_syncs]);
1867 kfree(param->syncs);
1871 if (!param->disabled) {
1884 drm_dev_get(&stream->oa->xe->drm);
1888 if (!param->disabled)
1901 * xe_oa_timestamp_frequency - Return OA timestamp frequency
1914 reg = xe_mmio_read32(>->mmio, RPM_CONFIG0);
1918 return gt->info.reference_clock << (3 - shift);
1920 return gt->info.reference_clock;
1929 return div_u64(nom + den - 1, den);
1934 switch (param->oa_unit->type) {
1947 * xe_oa_unit_id - Return OA unit ID for a hardware engine
1954 return hwe->oa_unit && hwe->oa_unit->num_engines ?
1955 hwe->oa_unit->oa_unit_id : U16_MAX;
1966 if (!param->oa_unit)
1967 param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0];
1970 if (param->exec_q) {
1971 param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class,
1972 param->engine_instance, true);
1973 if (!param->hwe || param->hwe->oa_unit != param->oa_unit)
1979 for_each_hw_engine(hwe, param->oa_unit->gt, id) {
1980 if (hwe->oa_unit == param->oa_unit) {
1981 param->hwe = hwe;
1987 for_each_hw_engine(hwe, param->oa_unit->gt, id) {
1988 if (!hwe->oa_unit)
1991 param->hwe = hwe;
1995 drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n",
1996 param->exec_q ? param->exec_q->class : -1,
1997 param->engine_instance, param->oa_unit->oa_unit_id);
1998 ret = -EINVAL;
2004 * xe_oa_stream_open_ioctl - Opens an OA stream
2016 struct xe_oa *oa = &xe->oa;
2019 const struct xe_oa_format *f;
2023 if (!oa->xe) {
2024 drm_dbg(&xe->drm, "xe oa interface not available for this system\n");
2025 return -ENODEV;
2029 param.period_exponent = -1;
2036 if (XE_IOCTL_DBG(oa->xe, !param.exec_q))
2037 return -ENOENT;
2039 if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1))
2040 return -EOPNOTSUPP;
2052 drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n");
2053 ret = -EINVAL;
2060 drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n");
2061 ret = -EACCES;
2066 drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n");
2067 ret = -EINVAL;
2075 f = &oa->oa_formats[param.oa_format];
2076 if (!param.oa_format || !f->size ||
2077 !oa_unit_supports_oa_format(¶m, f->type)) {
2078 drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n",
2079 param.oa_format, f->type, f->size, param.hwe->class);
2080 ret = -EINVAL;
2089 drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n");
2090 ret = -EINVAL;
2093 oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent);
2095 drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
2103 if (param.wait_num_reports > param.oa_buffer_size / f->size) {
2104 drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports);
2105 ret = -EINVAL;
2109 mutex_lock(¶m.hwe->gt->oa.gt_lock);
2111 mutex_unlock(¶m.hwe->gt->oa.gt_lock);
2145 while (table->start && table->end) {
2146 if (addr >= table->start && addr <= table->end)
2157 { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */
2163 { .start = 0xd900, .end = 0xd91c }, /* OAG_OASTARTTRIG[1-8] */
2164 { .start = 0xd920, .end = 0xd93c }, /* OAG_OAREPORTTRIG1[1-8] */
2165 { .start = 0xd940, .end = 0xd97c }, /* OAG_CEC[0-7][0-1] */
2166 { .start = 0xdc00, .end = 0xdc3c }, /* OAG_SCEC[0-7][0-1] */
2173 { .start = 0x393000, .end = 0x39301c }, /* OAM_STARTTRIG1[1-8] */
2174 { .start = 0x393020, .end = 0x39303c }, /* OAM_REPORTTRIG1[1-8] */
2175 { .start = 0x393040, .end = 0x39307c }, /* OAM_CEC[0-7][0-1] */
2176 { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */
2181 { .start = 0x393200, .end = 0x39323C }, /* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */
2182 { .start = 0x394200, .end = 0x39423C }, /* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */
2183 { .start = 0x394A00, .end = 0x394A3C }, /* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */
2192 (GRAPHICS_VER(oa->xe) >= 20 &&
2197 { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */
2198 { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
2206 { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */
2207 { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
2222 { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */
2228 if (GRAPHICS_VER(oa->xe) >= 20)
2230 else if (GRAPHICS_VERx100(oa->xe) >= 1270)
2253 return ERR_PTR(-ENOMEM);
2263 drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr);
2264 err = -EINVAL;
2293 return sysfs_emit(buf, "%d\n", oa_config->id);
2299 sysfs_attr_init(&oa_config->sysfs_metric_id.attr);
2300 oa_config->sysfs_metric_id.attr.name = "id";
2301 oa_config->sysfs_metric_id.attr.mode = 0444;
2302 oa_config->sysfs_metric_id.show = show_dynamic_id;
2303 oa_config->sysfs_metric_id.store = NULL;
2305 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr;
2306 oa_config->attrs[1] = NULL;
2308 oa_config->sysfs_metric.name = oa_config->uuid;
2309 oa_config->sysfs_metric.attrs = oa_config->attrs;
2311 return sysfs_create_group(oa->metrics_kobj, &oa_config->sysfs_metric);
2315 * xe_oa_add_config_ioctl - Adds one OA config
2327 struct xe_oa *oa = &xe->oa;
2334 if (!oa->xe) {
2335 drm_dbg(&xe->drm, "xe oa interface not available for this system\n");
2336 return -ENODEV;
2340 drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n");
2341 return -EACCES;
2345 if (XE_IOCTL_DBG(oa->xe, err))
2346 return -EFAULT;
2348 if (XE_IOCTL_DBG(oa->xe, arg->extensions) ||
2349 XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) ||
2350 XE_IOCTL_DBG(oa->xe, !arg->n_regs))
2351 return -EINVAL;
2355 return -ENOMEM;
2357 oa_config->oa = oa;
2358 kref_init(&oa_config->ref);
2360 if (!uuid_is_valid(arg->uuid)) {
2361 drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n");
2362 err = -EINVAL;
2366 /* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */
2367 memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid));
2369 oa_config->regs_len = arg->n_regs;
2371 u64_to_user_ptr(arg->regs_ptr),
2372 arg->n_regs);
2374 drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n");
2378 oa_config->regs = regs;
2380 err = mutex_lock_interruptible(&oa->metrics_lock);
2385 idr_for_each_entry(&oa->metrics_idr, tmp, id) {
2386 if (!strcmp(tmp->uuid, oa_config->uuid)) {
2387 drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n");
2388 err = -EADDRINUSE;
2395 drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n");
2399 oa_config->id = idr_alloc(&oa->metrics_idr, oa_config, 1, 0, GFP_KERNEL);
2400 if (oa_config->id < 0) {
2401 drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n");
2402 err = oa_config->id;
2406 id = oa_config->id;
2408 drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, id);
2410 mutex_unlock(&oa->metrics_lock);
2415 mutex_unlock(&oa->metrics_lock);
2418 drm_dbg(&oa->xe->drm, "Failed to add new OA config\n");
2423 * xe_oa_remove_config_ioctl - Removes one OA config
2431 struct xe_oa *oa = &xe->oa;
2436 if (!oa->xe) {
2437 drm_dbg(&xe->drm, "xe oa interface not available for this system\n");
2438 return -ENODEV;
2442 drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n");
2443 return -EACCES;
2447 if (XE_IOCTL_DBG(oa->xe, ret))
2450 ret = mutex_lock_interruptible(&oa->metrics_lock);
2454 oa_config = idr_find(&oa->metrics_idr, arg);
2456 drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n");
2457 ret = -ENOENT;
2461 WARN_ON(arg != oa_config->id);
2463 sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric);
2464 idr_remove(&oa->metrics_idr, arg);
2466 mutex_unlock(&oa->metrics_lock);
2468 drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
2475 mutex_unlock(&oa->metrics_lock);
2483 if (!oa->metrics_kobj)
2486 kobject_put(oa->metrics_kobj);
2487 oa->metrics_kobj = NULL;
2491 * xe_oa_register - Xe OA registration
2498 struct xe_oa *oa = &xe->oa;
2500 if (!oa->xe)
2503 oa->metrics_kobj = kobject_create_and_add("metrics",
2504 &xe->drm.primary->kdev->kobj);
2505 if (!oa->metrics_kobj)
2506 return -ENOMEM;
2508 return devm_add_action_or_reset(xe->drm.dev, xe_oa_unregister, oa);
2523 if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270)
2526 xe_gt_WARN_ON(hwe->gt, xe_gt_is_main_type(hwe->gt));
2528 if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20)
2537 else if (hwe->engine_id == XE_HW_ENGINE_GSCCS0)
2539 else if (!IS_DGFX(gt_to_xe(hwe->gt)))
2541 else if (hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE)
2542 return (hwe->instance / 2 & 0x1) + 1;
2543 else if (hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE)
2544 return (hwe->instance & 0x1) + 1;
2551 switch (hwe->class) {
2604 int i, num_units = gt->oa.num_oa_units;
2607 struct xe_oa_unit *u = >->oa.oa_unit[i];
2610 u->regs = __oag_regs();
2611 u->type = DRM_XE_OA_UNIT_TYPE_OAG;
2614 u->regs = __oam_regs(oam_base_addr[i]);
2615 u->type = i == XE_OAM_UNIT_SAG && GRAPHICS_VER(gt_to_xe(gt)) >= 20 ?
2619 u->gt = gt;
2621 xe_mmio_write32(>->mmio, u->regs.oa_ctrl, 0);
2624 xe_mmio_write32(>->mmio, u->regs.oa_debug,
2628 u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++;
2639 u = drmm_kcalloc(>_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL);
2641 return -ENOMEM;
2646 hwe->oa_unit = NULL;
2649 hwe->oa_unit = &u[index];
2653 gt->oa.num_oa_units = num_oa_units;
2654 gt->oa.oa_unit = u;
2658 drmm_mutex_init(>_to_xe(gt)->drm, >->oa.gt_lock);
2671 for (i = 0; i < gt->oa.num_oa_units; i++) {
2672 u = >->oa.oa_unit[i];
2677 if (xe_oa_unit_id(hwe) == u->oa_unit_id)
2678 n += scnprintf(buf + n, sizeof(buf) - n, "%s ", hwe->name);
2680 xe_gt_dbg(gt, "oa_unit %d, type %d, Engines: %s\n", u->oa_unit_id, u->type, buf);
2689 for_each_gt(gt, oa->xe, gt_id)
2703 for_each_gt(gt, oa->xe, i) {
2716 __set_bit(format, oa->format_mask);
2721 if (GRAPHICS_VER(oa->xe) >= 20) {
2734 } else if (GRAPHICS_VERx100(oa->xe) >= 1270) {
2742 } else if (GRAPHICS_VERx100(oa->xe) >= 1255) {
2750 xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12);
2768 struct xe_oa *oa = &xe->oa;
2770 if (!oa->xe)
2773 idr_for_each(&oa->metrics_idr, destroy_config, oa);
2774 idr_destroy(&oa->metrics_idr);
2776 oa->xe = NULL;
2780 * xe_oa_init - OA initialization during device probe
2787 struct xe_oa *oa = &xe->oa;
2797 oa->xe = xe;
2798 oa->oa_formats = oa_formats;
2800 drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock);
2801 idr_init_base(&oa->metrics_idr, 1);
2805 drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret));
2811 return devm_add_action_or_reset(xe->drm.dev, xe_oa_fini, xe);
2814 oa->xe = NULL;