1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_gt_pagefault.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/circ_buf.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_managed.h> 13 14 #include "abi/guc_actions_abi.h" 15 #include "xe_bo.h" 16 #include "xe_gt.h" 17 #include "xe_gt_printk.h" 18 #include "xe_gt_stats.h" 19 #include "xe_gt_tlb_invalidation.h" 20 #include "xe_guc.h" 21 #include "xe_guc_ct.h" 22 #include "xe_migrate.h" 23 #include "xe_svm.h" 24 #include "xe_trace_bo.h" 25 #include "xe_vm.h" 26 27 struct pagefault { 28 u64 page_addr; 29 u32 asid; 30 u16 pdata; 31 u8 vfid; 32 u8 access_type; 33 u8 fault_type; 34 u8 fault_level; 35 u8 engine_class; 36 u8 engine_instance; 37 u8 fault_unsuccessful; 38 bool trva_fault; 39 }; 40 41 enum access_type { 42 ACCESS_TYPE_READ = 0, 43 ACCESS_TYPE_WRITE = 1, 44 ACCESS_TYPE_ATOMIC = 2, 45 ACCESS_TYPE_RESERVED = 3, 46 }; 47 48 enum fault_type { 49 NOT_PRESENT = 0, 50 WRITE_ACCESS_VIOLATION = 1, 51 ATOMIC_ACCESS_VIOLATION = 2, 52 }; 53 54 struct acc { 55 u64 va_range_base; 56 u32 asid; 57 u32 sub_granularity; 58 u8 granularity; 59 u8 vfid; 60 u8 access_type; 61 u8 engine_class; 62 u8 engine_instance; 63 }; 64 65 static bool access_is_atomic(enum access_type access_type) 66 { 67 return access_type == ACCESS_TYPE_ATOMIC; 68 } 69 70 static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) 71 { 72 return xe_vm_has_valid_gpu_mapping(tile, vma->tile_present, 73 vma->tile_invalidated); 74 } 75 76 static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, 77 bool atomic, unsigned int id) 78 { 79 struct xe_bo *bo = xe_vma_bo(vma); 80 struct xe_vm *vm = xe_vma_vm(vma); 81 int err; 82 83 err = xe_vm_lock_vma(exec, vma); 84 if (err) 85 return err; 86 87 if (atomic && IS_DGFX(vm->xe)) { 88 if (xe_vma_is_userptr(vma)) { 89 err = -EACCES; 90 return err; 91 } 92 93 /* Migrate to VRAM, move should invalidate the VMA first */ 94 err = xe_bo_migrate(bo, XE_PL_VRAM0 + id); 95 if (err) 96 return err; 97 } else if (bo) { 98 /* Create backing store if needed */ 99 err = xe_bo_validate(bo, vm, true); 100 if (err) 101 return err; 102 } 103 104 return 0; 105 } 106 107 static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, 108 bool atomic) 109 { 110 struct xe_vm *vm = xe_vma_vm(vma); 111 struct xe_tile *tile = gt_to_tile(gt); 112 struct drm_exec exec; 113 struct dma_fence *fence; 114 ktime_t end = 0; 115 int err; 116 117 lockdep_assert_held_write(&vm->lock); 118 119 xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); 120 xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024); 121 122 trace_xe_vma_pagefault(vma); 123 124 /* Check if VMA is valid, opportunistic check only */ 125 if (vma_is_valid(tile, vma) && !atomic) 126 return 0; 127 128 retry_userptr: 129 if (xe_vma_is_userptr(vma) && 130 xe_vma_userptr_check_repin(to_userptr_vma(vma))) { 131 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 132 133 err = xe_vma_userptr_pin_pages(uvma); 134 if (err) 135 return err; 136 } 137 138 /* Lock VM and BOs dma-resv */ 139 drm_exec_init(&exec, 0, 0); 140 drm_exec_until_all_locked(&exec) { 141 err = xe_pf_begin(&exec, vma, atomic, tile->id); 142 drm_exec_retry_on_contention(&exec); 143 if (xe_vm_validate_should_retry(&exec, err, &end)) 144 err = -EAGAIN; 145 if (err) 146 goto unlock_dma_resv; 147 148 /* Bind VMA only to the GT that has faulted */ 149 trace_xe_vma_pf_bind(vma); 150 fence = xe_vma_rebind(vm, vma, BIT(tile->id)); 151 if (IS_ERR(fence)) { 152 err = PTR_ERR(fence); 153 if (xe_vm_validate_should_retry(&exec, err, &end)) 154 err = -EAGAIN; 155 goto unlock_dma_resv; 156 } 157 } 158 159 dma_fence_wait(fence, false); 160 dma_fence_put(fence); 161 162 unlock_dma_resv: 163 drm_exec_fini(&exec); 164 if (err == -EAGAIN) 165 goto retry_userptr; 166 167 return err; 168 } 169 170 static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) 171 { 172 struct xe_vm *vm; 173 174 down_read(&xe->usm.lock); 175 vm = xa_load(&xe->usm.asid_to_vm, asid); 176 if (vm && xe_vm_in_fault_mode(vm)) 177 xe_vm_get(vm); 178 else 179 vm = ERR_PTR(-EINVAL); 180 up_read(&xe->usm.lock); 181 182 return vm; 183 } 184 185 static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) 186 { 187 struct xe_device *xe = gt_to_xe(gt); 188 struct xe_vm *vm; 189 struct xe_vma *vma = NULL; 190 int err; 191 bool atomic; 192 193 /* SW isn't expected to handle TRTT faults */ 194 if (pf->trva_fault) 195 return -EFAULT; 196 197 vm = asid_to_vm(xe, pf->asid); 198 if (IS_ERR(vm)) 199 return PTR_ERR(vm); 200 201 /* 202 * TODO: Change to read lock? Using write lock for simplicity. 203 */ 204 down_write(&vm->lock); 205 206 if (xe_vm_is_closed(vm)) { 207 err = -ENOENT; 208 goto unlock_vm; 209 } 210 211 vma = xe_vm_find_vma_by_addr(vm, pf->page_addr); 212 if (!vma) { 213 err = -EINVAL; 214 goto unlock_vm; 215 } 216 217 atomic = access_is_atomic(pf->access_type); 218 219 if (xe_vma_is_cpu_addr_mirror(vma)) 220 err = xe_svm_handle_pagefault(vm, vma, gt, 221 pf->page_addr, atomic); 222 else 223 err = handle_vma_pagefault(gt, vma, atomic); 224 225 unlock_vm: 226 if (!err) 227 vm->usm.last_fault_vma = vma; 228 up_write(&vm->lock); 229 xe_vm_put(vm); 230 231 return err; 232 } 233 234 static int send_pagefault_reply(struct xe_guc *guc, 235 struct xe_guc_pagefault_reply *reply) 236 { 237 u32 action[] = { 238 XE_GUC_ACTION_PAGE_FAULT_RES_DESC, 239 reply->dw0, 240 reply->dw1, 241 }; 242 243 return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 244 } 245 246 static void print_pagefault(struct xe_gt *gt, struct pagefault *pf) 247 { 248 xe_gt_dbg(gt, "\n\tASID: %d\n" 249 "\tVFID: %d\n" 250 "\tPDATA: 0x%04x\n" 251 "\tFaulted Address: 0x%08x%08x\n" 252 "\tFaultType: %d\n" 253 "\tAccessType: %d\n" 254 "\tFaultLevel: %d\n" 255 "\tEngineClass: %d %s\n" 256 "\tEngineInstance: %d\n", 257 pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), 258 lower_32_bits(pf->page_addr), 259 pf->fault_type, pf->access_type, pf->fault_level, 260 pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), 261 pf->engine_instance); 262 } 263 264 #define PF_MSG_LEN_DW 4 265 266 static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) 267 { 268 const struct xe_guc_pagefault_desc *desc; 269 bool ret = false; 270 271 spin_lock_irq(&pf_queue->lock); 272 if (pf_queue->tail != pf_queue->head) { 273 desc = (const struct xe_guc_pagefault_desc *) 274 (pf_queue->data + pf_queue->tail); 275 276 pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); 277 pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0); 278 pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); 279 pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); 280 pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << 281 PFD_PDATA_HI_SHIFT; 282 pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); 283 pf->asid = FIELD_GET(PFD_ASID, desc->dw1); 284 pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); 285 pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); 286 pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); 287 pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << 288 PFD_VIRTUAL_ADDR_HI_SHIFT; 289 pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << 290 PFD_VIRTUAL_ADDR_LO_SHIFT; 291 292 pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % 293 pf_queue->num_dw; 294 ret = true; 295 } 296 spin_unlock_irq(&pf_queue->lock); 297 298 return ret; 299 } 300 301 static bool pf_queue_full(struct pf_queue *pf_queue) 302 { 303 lockdep_assert_held(&pf_queue->lock); 304 305 return CIRC_SPACE(pf_queue->head, pf_queue->tail, 306 pf_queue->num_dw) <= 307 PF_MSG_LEN_DW; 308 } 309 310 int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) 311 { 312 struct xe_gt *gt = guc_to_gt(guc); 313 struct pf_queue *pf_queue; 314 unsigned long flags; 315 u32 asid; 316 bool full; 317 318 if (unlikely(len != PF_MSG_LEN_DW)) 319 return -EPROTO; 320 321 asid = FIELD_GET(PFD_ASID, msg[1]); 322 pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); 323 324 /* 325 * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 326 */ 327 xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); 328 329 spin_lock_irqsave(&pf_queue->lock, flags); 330 full = pf_queue_full(pf_queue); 331 if (!full) { 332 memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); 333 pf_queue->head = (pf_queue->head + len) % 334 pf_queue->num_dw; 335 queue_work(gt->usm.pf_wq, &pf_queue->worker); 336 } else { 337 xe_gt_warn(gt, "PageFault Queue full, shouldn't be possible\n"); 338 } 339 spin_unlock_irqrestore(&pf_queue->lock, flags); 340 341 return full ? -ENOSPC : 0; 342 } 343 344 #define USM_QUEUE_MAX_RUNTIME_MS 20 345 346 static void pf_queue_work_func(struct work_struct *w) 347 { 348 struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); 349 struct xe_gt *gt = pf_queue->gt; 350 struct xe_guc_pagefault_reply reply = {}; 351 struct pagefault pf = {}; 352 unsigned long threshold; 353 int ret; 354 355 threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); 356 357 while (get_pagefault(pf_queue, &pf)) { 358 ret = handle_pagefault(gt, &pf); 359 if (unlikely(ret)) { 360 print_pagefault(gt, &pf); 361 pf.fault_unsuccessful = 1; 362 xe_gt_dbg(gt, "Fault response: Unsuccessful %pe\n", ERR_PTR(ret)); 363 } 364 365 reply.dw0 = FIELD_PREP(PFR_VALID, 1) | 366 FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | 367 FIELD_PREP(PFR_REPLY, PFR_ACCESS) | 368 FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | 369 FIELD_PREP(PFR_ASID, pf.asid); 370 371 reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | 372 FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | 373 FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | 374 FIELD_PREP(PFR_PDATA, pf.pdata); 375 376 send_pagefault_reply(>->uc.guc, &reply); 377 378 if (time_after(jiffies, threshold) && 379 pf_queue->tail != pf_queue->head) { 380 queue_work(gt->usm.pf_wq, w); 381 break; 382 } 383 } 384 } 385 386 static void acc_queue_work_func(struct work_struct *w); 387 388 static void pagefault_fini(void *arg) 389 { 390 struct xe_gt *gt = arg; 391 struct xe_device *xe = gt_to_xe(gt); 392 393 if (!xe->info.has_usm) 394 return; 395 396 destroy_workqueue(gt->usm.acc_wq); 397 destroy_workqueue(gt->usm.pf_wq); 398 } 399 400 static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) 401 { 402 struct xe_device *xe = gt_to_xe(gt); 403 xe_dss_mask_t all_dss; 404 int num_dss, num_eus; 405 406 bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, 407 XE_MAX_DSS_FUSE_BITS); 408 409 num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); 410 num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, 411 XE_MAX_EU_FUSE_BITS) * num_dss; 412 413 /* 414 * user can issue separate page faults per EU and per CS 415 * 416 * XXX: Multiplier required as compute UMD are getting PF queue errors 417 * without it. Follow on why this multiplier is required. 418 */ 419 #define PF_MULTIPLIER 8 420 pf_queue->num_dw = 421 (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; 422 pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); 423 #undef PF_MULTIPLIER 424 425 pf_queue->gt = gt; 426 pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, 427 sizeof(u32), GFP_KERNEL); 428 if (!pf_queue->data) 429 return -ENOMEM; 430 431 spin_lock_init(&pf_queue->lock); 432 INIT_WORK(&pf_queue->worker, pf_queue_work_func); 433 434 return 0; 435 } 436 437 int xe_gt_pagefault_init(struct xe_gt *gt) 438 { 439 struct xe_device *xe = gt_to_xe(gt); 440 int i, ret = 0; 441 442 if (!xe->info.has_usm) 443 return 0; 444 445 for (i = 0; i < NUM_PF_QUEUE; ++i) { 446 ret = xe_alloc_pf_queue(gt, >->usm.pf_queue[i]); 447 if (ret) 448 return ret; 449 } 450 for (i = 0; i < NUM_ACC_QUEUE; ++i) { 451 gt->usm.acc_queue[i].gt = gt; 452 spin_lock_init(>->usm.acc_queue[i].lock); 453 INIT_WORK(>->usm.acc_queue[i].worker, acc_queue_work_func); 454 } 455 456 gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", 457 WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); 458 if (!gt->usm.pf_wq) 459 return -ENOMEM; 460 461 gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", 462 WQ_UNBOUND | WQ_HIGHPRI, 463 NUM_ACC_QUEUE); 464 if (!gt->usm.acc_wq) { 465 destroy_workqueue(gt->usm.pf_wq); 466 return -ENOMEM; 467 } 468 469 return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); 470 } 471 472 void xe_gt_pagefault_reset(struct xe_gt *gt) 473 { 474 struct xe_device *xe = gt_to_xe(gt); 475 int i; 476 477 if (!xe->info.has_usm) 478 return; 479 480 for (i = 0; i < NUM_PF_QUEUE; ++i) { 481 spin_lock_irq(>->usm.pf_queue[i].lock); 482 gt->usm.pf_queue[i].head = 0; 483 gt->usm.pf_queue[i].tail = 0; 484 spin_unlock_irq(>->usm.pf_queue[i].lock); 485 } 486 487 for (i = 0; i < NUM_ACC_QUEUE; ++i) { 488 spin_lock(>->usm.acc_queue[i].lock); 489 gt->usm.acc_queue[i].head = 0; 490 gt->usm.acc_queue[i].tail = 0; 491 spin_unlock(>->usm.acc_queue[i].lock); 492 } 493 } 494 495 static int granularity_in_byte(int val) 496 { 497 switch (val) { 498 case 0: 499 return SZ_128K; 500 case 1: 501 return SZ_2M; 502 case 2: 503 return SZ_16M; 504 case 3: 505 return SZ_64M; 506 default: 507 return 0; 508 } 509 } 510 511 static int sub_granularity_in_byte(int val) 512 { 513 return (granularity_in_byte(val) / 32); 514 } 515 516 static void print_acc(struct xe_gt *gt, struct acc *acc) 517 { 518 xe_gt_warn(gt, "Access counter request:\n" 519 "\tType: %s\n" 520 "\tASID: %d\n" 521 "\tVFID: %d\n" 522 "\tEngine: %d:%d\n" 523 "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" 524 "\tSub_Granularity Vector: 0x%08x\n" 525 "\tVA Range base: 0x%016llx\n", 526 acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", 527 acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, 528 granularity_in_byte(acc->granularity) / SZ_1K, 529 sub_granularity_in_byte(acc->granularity) / SZ_1K, 530 acc->sub_granularity, acc->va_range_base); 531 } 532 533 static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) 534 { 535 u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * 536 sub_granularity_in_byte(acc->granularity); 537 538 return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K); 539 } 540 541 static int handle_acc(struct xe_gt *gt, struct acc *acc) 542 { 543 struct xe_device *xe = gt_to_xe(gt); 544 struct xe_tile *tile = gt_to_tile(gt); 545 struct drm_exec exec; 546 struct xe_vm *vm; 547 struct xe_vma *vma; 548 int ret = 0; 549 550 /* We only support ACC_TRIGGER at the moment */ 551 if (acc->access_type != ACC_TRIGGER) 552 return -EINVAL; 553 554 vm = asid_to_vm(xe, acc->asid); 555 if (IS_ERR(vm)) 556 return PTR_ERR(vm); 557 558 down_read(&vm->lock); 559 560 /* Lookup VMA */ 561 vma = get_acc_vma(vm, acc); 562 if (!vma) { 563 ret = -EINVAL; 564 goto unlock_vm; 565 } 566 567 trace_xe_vma_acc(vma); 568 569 /* Userptr or null can't be migrated, nothing to do */ 570 if (xe_vma_has_no_bo(vma)) 571 goto unlock_vm; 572 573 /* Lock VM and BOs dma-resv */ 574 drm_exec_init(&exec, 0, 0); 575 drm_exec_until_all_locked(&exec) { 576 ret = xe_pf_begin(&exec, vma, true, tile->id); 577 drm_exec_retry_on_contention(&exec); 578 if (ret) 579 break; 580 } 581 582 drm_exec_fini(&exec); 583 unlock_vm: 584 up_read(&vm->lock); 585 xe_vm_put(vm); 586 587 return ret; 588 } 589 590 #define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) 591 592 #define ACC_MSG_LEN_DW 4 593 594 static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) 595 { 596 const struct xe_guc_acc_desc *desc; 597 bool ret = false; 598 599 spin_lock(&acc_queue->lock); 600 if (acc_queue->tail != acc_queue->head) { 601 desc = (const struct xe_guc_acc_desc *) 602 (acc_queue->data + acc_queue->tail); 603 604 acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); 605 acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | 606 FIELD_GET(ACC_SUBG_LO, desc->dw0); 607 acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); 608 acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); 609 acc->asid = FIELD_GET(ACC_ASID, desc->dw1); 610 acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); 611 acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); 612 acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, 613 desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); 614 615 acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) % 616 ACC_QUEUE_NUM_DW; 617 ret = true; 618 } 619 spin_unlock(&acc_queue->lock); 620 621 return ret; 622 } 623 624 static void acc_queue_work_func(struct work_struct *w) 625 { 626 struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); 627 struct xe_gt *gt = acc_queue->gt; 628 struct acc acc = {}; 629 unsigned long threshold; 630 int ret; 631 632 threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); 633 634 while (get_acc(acc_queue, &acc)) { 635 ret = handle_acc(gt, &acc); 636 if (unlikely(ret)) { 637 print_acc(gt, &acc); 638 xe_gt_warn(gt, "ACC: Unsuccessful %pe\n", ERR_PTR(ret)); 639 } 640 641 if (time_after(jiffies, threshold) && 642 acc_queue->tail != acc_queue->head) { 643 queue_work(gt->usm.acc_wq, w); 644 break; 645 } 646 } 647 } 648 649 static bool acc_queue_full(struct acc_queue *acc_queue) 650 { 651 lockdep_assert_held(&acc_queue->lock); 652 653 return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <= 654 ACC_MSG_LEN_DW; 655 } 656 657 int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) 658 { 659 struct xe_gt *gt = guc_to_gt(guc); 660 struct acc_queue *acc_queue; 661 u32 asid; 662 bool full; 663 664 /* 665 * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0 666 */ 667 BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW); 668 669 if (unlikely(len != ACC_MSG_LEN_DW)) 670 return -EPROTO; 671 672 asid = FIELD_GET(ACC_ASID, msg[1]); 673 acc_queue = >->usm.acc_queue[asid % NUM_ACC_QUEUE]; 674 675 spin_lock(&acc_queue->lock); 676 full = acc_queue_full(acc_queue); 677 if (!full) { 678 memcpy(acc_queue->data + acc_queue->head, msg, 679 len * sizeof(u32)); 680 acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW; 681 queue_work(gt->usm.acc_wq, &acc_queue->worker); 682 } else { 683 xe_gt_warn(gt, "ACC Queue full, dropping ACC\n"); 684 } 685 spin_unlock(&acc_queue->lock); 686 687 return full ? -ENOSPC : 0; 688 } 689