1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_gt_pagefault.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/circ_buf.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_managed.h> 13 14 #include "abi/guc_actions_abi.h" 15 #include "xe_bo.h" 16 #include "xe_gt.h" 17 #include "xe_gt_printk.h" 18 #include "xe_gt_stats.h" 19 #include "xe_guc.h" 20 #include "xe_guc_ct.h" 21 #include "xe_migrate.h" 22 #include "xe_svm.h" 23 #include "xe_trace_bo.h" 24 #include "xe_vm.h" 25 #include "xe_vram_types.h" 26 27 struct pagefault { 28 u64 page_addr; 29 u32 asid; 30 u16 pdata; 31 u8 vfid; 32 u8 access_type; 33 u8 fault_type; 34 u8 fault_level; 35 u8 engine_class; 36 u8 engine_instance; 37 u8 fault_unsuccessful; 38 bool trva_fault; 39 }; 40 41 enum access_type { 42 ACCESS_TYPE_READ = 0, 43 ACCESS_TYPE_WRITE = 1, 44 ACCESS_TYPE_ATOMIC = 2, 45 ACCESS_TYPE_RESERVED = 3, 46 }; 47 48 enum fault_type { 49 NOT_PRESENT = 0, 50 WRITE_ACCESS_VIOLATION = 1, 51 ATOMIC_ACCESS_VIOLATION = 2, 52 }; 53 54 struct acc { 55 u64 va_range_base; 56 u32 asid; 57 u32 sub_granularity; 58 u8 granularity; 59 u8 vfid; 60 u8 access_type; 61 u8 engine_class; 62 u8 engine_instance; 63 }; 64 65 static bool access_is_atomic(enum access_type access_type) 66 { 67 return access_type == ACCESS_TYPE_ATOMIC; 68 } 69 70 static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) 71 { 72 return xe_vm_has_valid_gpu_mapping(tile, vma->tile_present, 73 vma->tile_invalidated); 74 } 75 76 static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, 77 bool need_vram_move, struct xe_vram_region *vram) 78 { 79 struct xe_bo *bo = xe_vma_bo(vma); 80 struct xe_vm *vm = xe_vma_vm(vma); 81 int err; 82 83 err = xe_vm_lock_vma(exec, vma); 84 if (err) 85 return err; 86 87 if (!bo) 88 return 0; 89 90 return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) : 91 xe_bo_validate(bo, vm, true, exec); 92 } 93 94 static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma, 95 bool atomic) 96 { 97 struct xe_vm *vm = xe_vma_vm(vma); 98 struct xe_tile *tile = gt_to_tile(gt); 99 struct xe_validation_ctx ctx; 100 struct drm_exec exec; 101 struct dma_fence *fence; 102 int err, needs_vram; 103 104 lockdep_assert_held_write(&vm->lock); 105 106 needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic); 107 if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma))) 108 return needs_vram < 0 ? needs_vram : -EACCES; 109 110 xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1); 111 xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024); 112 113 trace_xe_vma_pagefault(vma); 114 115 /* Check if VMA is valid, opportunistic check only */ 116 if (vma_is_valid(tile, vma) && !atomic) 117 return 0; 118 119 retry_userptr: 120 if (xe_vma_is_userptr(vma) && 121 xe_vma_userptr_check_repin(to_userptr_vma(vma))) { 122 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 123 124 err = xe_vma_userptr_pin_pages(uvma); 125 if (err) 126 return err; 127 } 128 129 /* Lock VM and BOs dma-resv */ 130 xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); 131 drm_exec_until_all_locked(&exec) { 132 err = xe_pf_begin(&exec, vma, needs_vram == 1, tile->mem.vram); 133 drm_exec_retry_on_contention(&exec); 134 xe_validation_retry_on_oom(&ctx, &err); 135 if (err) 136 goto unlock_dma_resv; 137 138 /* Bind VMA only to the GT that has faulted */ 139 trace_xe_vma_pf_bind(vma); 140 xe_vm_set_validation_exec(vm, &exec); 141 fence = xe_vma_rebind(vm, vma, BIT(tile->id)); 142 xe_vm_set_validation_exec(vm, NULL); 143 if (IS_ERR(fence)) { 144 err = PTR_ERR(fence); 145 xe_validation_retry_on_oom(&ctx, &err); 146 goto unlock_dma_resv; 147 } 148 } 149 150 dma_fence_wait(fence, false); 151 dma_fence_put(fence); 152 153 unlock_dma_resv: 154 xe_validation_ctx_fini(&ctx); 155 if (err == -EAGAIN) 156 goto retry_userptr; 157 158 return err; 159 } 160 161 static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid) 162 { 163 struct xe_vm *vm; 164 165 down_read(&xe->usm.lock); 166 vm = xa_load(&xe->usm.asid_to_vm, asid); 167 if (vm && xe_vm_in_fault_mode(vm)) 168 xe_vm_get(vm); 169 else 170 vm = ERR_PTR(-EINVAL); 171 up_read(&xe->usm.lock); 172 173 return vm; 174 } 175 176 static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) 177 { 178 struct xe_device *xe = gt_to_xe(gt); 179 struct xe_vm *vm; 180 struct xe_vma *vma = NULL; 181 int err; 182 bool atomic; 183 184 /* SW isn't expected to handle TRTT faults */ 185 if (pf->trva_fault) 186 return -EFAULT; 187 188 vm = asid_to_vm(xe, pf->asid); 189 if (IS_ERR(vm)) 190 return PTR_ERR(vm); 191 192 /* 193 * TODO: Change to read lock? Using write lock for simplicity. 194 */ 195 down_write(&vm->lock); 196 197 if (xe_vm_is_closed(vm)) { 198 err = -ENOENT; 199 goto unlock_vm; 200 } 201 202 vma = xe_vm_find_vma_by_addr(vm, pf->page_addr); 203 if (!vma) { 204 err = -EINVAL; 205 goto unlock_vm; 206 } 207 208 atomic = access_is_atomic(pf->access_type); 209 210 if (xe_vma_is_cpu_addr_mirror(vma)) 211 err = xe_svm_handle_pagefault(vm, vma, gt, 212 pf->page_addr, atomic); 213 else 214 err = handle_vma_pagefault(gt, vma, atomic); 215 216 unlock_vm: 217 if (!err) 218 vm->usm.last_fault_vma = vma; 219 up_write(&vm->lock); 220 xe_vm_put(vm); 221 222 return err; 223 } 224 225 static int send_pagefault_reply(struct xe_guc *guc, 226 struct xe_guc_pagefault_reply *reply) 227 { 228 u32 action[] = { 229 XE_GUC_ACTION_PAGE_FAULT_RES_DESC, 230 reply->dw0, 231 reply->dw1, 232 }; 233 234 return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 235 } 236 237 static void print_pagefault(struct xe_gt *gt, struct pagefault *pf) 238 { 239 xe_gt_dbg(gt, "\n\tASID: %d\n" 240 "\tVFID: %d\n" 241 "\tPDATA: 0x%04x\n" 242 "\tFaulted Address: 0x%08x%08x\n" 243 "\tFaultType: %d\n" 244 "\tAccessType: %d\n" 245 "\tFaultLevel: %d\n" 246 "\tEngineClass: %d %s\n" 247 "\tEngineInstance: %d\n", 248 pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), 249 lower_32_bits(pf->page_addr), 250 pf->fault_type, pf->access_type, pf->fault_level, 251 pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class), 252 pf->engine_instance); 253 } 254 255 #define PF_MSG_LEN_DW 4 256 257 static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) 258 { 259 const struct xe_guc_pagefault_desc *desc; 260 bool ret = false; 261 262 spin_lock_irq(&pf_queue->lock); 263 if (pf_queue->tail != pf_queue->head) { 264 desc = (const struct xe_guc_pagefault_desc *) 265 (pf_queue->data + pf_queue->tail); 266 267 pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); 268 pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0); 269 pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); 270 pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); 271 pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << 272 PFD_PDATA_HI_SHIFT; 273 pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); 274 pf->asid = FIELD_GET(PFD_ASID, desc->dw1); 275 pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); 276 pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); 277 pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); 278 pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << 279 PFD_VIRTUAL_ADDR_HI_SHIFT; 280 pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << 281 PFD_VIRTUAL_ADDR_LO_SHIFT; 282 283 pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) % 284 pf_queue->num_dw; 285 ret = true; 286 } 287 spin_unlock_irq(&pf_queue->lock); 288 289 return ret; 290 } 291 292 static bool pf_queue_full(struct pf_queue *pf_queue) 293 { 294 lockdep_assert_held(&pf_queue->lock); 295 296 return CIRC_SPACE(pf_queue->head, pf_queue->tail, 297 pf_queue->num_dw) <= 298 PF_MSG_LEN_DW; 299 } 300 301 int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) 302 { 303 struct xe_gt *gt = guc_to_gt(guc); 304 struct pf_queue *pf_queue; 305 unsigned long flags; 306 u32 asid; 307 bool full; 308 309 if (unlikely(len != PF_MSG_LEN_DW)) 310 return -EPROTO; 311 312 asid = FIELD_GET(PFD_ASID, msg[1]); 313 pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); 314 315 /* 316 * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0 317 */ 318 xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW)); 319 320 spin_lock_irqsave(&pf_queue->lock, flags); 321 full = pf_queue_full(pf_queue); 322 if (!full) { 323 memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32)); 324 pf_queue->head = (pf_queue->head + len) % 325 pf_queue->num_dw; 326 queue_work(gt->usm.pf_wq, &pf_queue->worker); 327 } else { 328 xe_gt_warn(gt, "PageFault Queue full, shouldn't be possible\n"); 329 } 330 spin_unlock_irqrestore(&pf_queue->lock, flags); 331 332 return full ? -ENOSPC : 0; 333 } 334 335 #define USM_QUEUE_MAX_RUNTIME_MS 20 336 337 static void pf_queue_work_func(struct work_struct *w) 338 { 339 struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); 340 struct xe_gt *gt = pf_queue->gt; 341 struct xe_guc_pagefault_reply reply = {}; 342 struct pagefault pf = {}; 343 unsigned long threshold; 344 int ret; 345 346 threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); 347 348 while (get_pagefault(pf_queue, &pf)) { 349 ret = handle_pagefault(gt, &pf); 350 if (unlikely(ret)) { 351 print_pagefault(gt, &pf); 352 pf.fault_unsuccessful = 1; 353 xe_gt_dbg(gt, "Fault response: Unsuccessful %pe\n", ERR_PTR(ret)); 354 } 355 356 reply.dw0 = FIELD_PREP(PFR_VALID, 1) | 357 FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | 358 FIELD_PREP(PFR_REPLY, PFR_ACCESS) | 359 FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | 360 FIELD_PREP(PFR_ASID, pf.asid); 361 362 reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | 363 FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | 364 FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | 365 FIELD_PREP(PFR_PDATA, pf.pdata); 366 367 send_pagefault_reply(>->uc.guc, &reply); 368 369 if (time_after(jiffies, threshold) && 370 pf_queue->tail != pf_queue->head) { 371 queue_work(gt->usm.pf_wq, w); 372 break; 373 } 374 } 375 } 376 377 static void acc_queue_work_func(struct work_struct *w); 378 379 static void pagefault_fini(void *arg) 380 { 381 struct xe_gt *gt = arg; 382 struct xe_device *xe = gt_to_xe(gt); 383 384 if (!xe->info.has_usm) 385 return; 386 387 destroy_workqueue(gt->usm.acc_wq); 388 destroy_workqueue(gt->usm.pf_wq); 389 } 390 391 static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) 392 { 393 struct xe_device *xe = gt_to_xe(gt); 394 xe_dss_mask_t all_dss; 395 int num_dss, num_eus; 396 397 bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask, 398 XE_MAX_DSS_FUSE_BITS); 399 400 num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS); 401 num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss, 402 XE_MAX_EU_FUSE_BITS) * num_dss; 403 404 /* 405 * user can issue separate page faults per EU and per CS 406 * 407 * XXX: Multiplier required as compute UMD are getting PF queue errors 408 * without it. Follow on why this multiplier is required. 409 */ 410 #define PF_MULTIPLIER 8 411 pf_queue->num_dw = 412 (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; 413 pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); 414 #undef PF_MULTIPLIER 415 416 pf_queue->gt = gt; 417 pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw, 418 sizeof(u32), GFP_KERNEL); 419 if (!pf_queue->data) 420 return -ENOMEM; 421 422 spin_lock_init(&pf_queue->lock); 423 INIT_WORK(&pf_queue->worker, pf_queue_work_func); 424 425 return 0; 426 } 427 428 int xe_gt_pagefault_init(struct xe_gt *gt) 429 { 430 struct xe_device *xe = gt_to_xe(gt); 431 int i, ret = 0; 432 433 if (!xe->info.has_usm) 434 return 0; 435 436 for (i = 0; i < NUM_PF_QUEUE; ++i) { 437 ret = xe_alloc_pf_queue(gt, >->usm.pf_queue[i]); 438 if (ret) 439 return ret; 440 } 441 for (i = 0; i < NUM_ACC_QUEUE; ++i) { 442 gt->usm.acc_queue[i].gt = gt; 443 spin_lock_init(>->usm.acc_queue[i].lock); 444 INIT_WORK(>->usm.acc_queue[i].worker, acc_queue_work_func); 445 } 446 447 gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", 448 WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); 449 if (!gt->usm.pf_wq) 450 return -ENOMEM; 451 452 gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", 453 WQ_UNBOUND | WQ_HIGHPRI, 454 NUM_ACC_QUEUE); 455 if (!gt->usm.acc_wq) { 456 destroy_workqueue(gt->usm.pf_wq); 457 return -ENOMEM; 458 } 459 460 return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); 461 } 462 463 void xe_gt_pagefault_reset(struct xe_gt *gt) 464 { 465 struct xe_device *xe = gt_to_xe(gt); 466 int i; 467 468 if (!xe->info.has_usm) 469 return; 470 471 for (i = 0; i < NUM_PF_QUEUE; ++i) { 472 spin_lock_irq(>->usm.pf_queue[i].lock); 473 gt->usm.pf_queue[i].head = 0; 474 gt->usm.pf_queue[i].tail = 0; 475 spin_unlock_irq(>->usm.pf_queue[i].lock); 476 } 477 478 for (i = 0; i < NUM_ACC_QUEUE; ++i) { 479 spin_lock(>->usm.acc_queue[i].lock); 480 gt->usm.acc_queue[i].head = 0; 481 gt->usm.acc_queue[i].tail = 0; 482 spin_unlock(>->usm.acc_queue[i].lock); 483 } 484 } 485 486 static int granularity_in_byte(int val) 487 { 488 switch (val) { 489 case 0: 490 return SZ_128K; 491 case 1: 492 return SZ_2M; 493 case 2: 494 return SZ_16M; 495 case 3: 496 return SZ_64M; 497 default: 498 return 0; 499 } 500 } 501 502 static int sub_granularity_in_byte(int val) 503 { 504 return (granularity_in_byte(val) / 32); 505 } 506 507 static void print_acc(struct xe_gt *gt, struct acc *acc) 508 { 509 xe_gt_warn(gt, "Access counter request:\n" 510 "\tType: %s\n" 511 "\tASID: %d\n" 512 "\tVFID: %d\n" 513 "\tEngine: %d:%d\n" 514 "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" 515 "\tSub_Granularity Vector: 0x%08x\n" 516 "\tVA Range base: 0x%016llx\n", 517 acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", 518 acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, 519 granularity_in_byte(acc->granularity) / SZ_1K, 520 sub_granularity_in_byte(acc->granularity) / SZ_1K, 521 acc->sub_granularity, acc->va_range_base); 522 } 523 524 static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) 525 { 526 u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * 527 sub_granularity_in_byte(acc->granularity); 528 529 return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K); 530 } 531 532 static int handle_acc(struct xe_gt *gt, struct acc *acc) 533 { 534 struct xe_device *xe = gt_to_xe(gt); 535 struct xe_tile *tile = gt_to_tile(gt); 536 struct xe_validation_ctx ctx; 537 struct drm_exec exec; 538 struct xe_vm *vm; 539 struct xe_vma *vma; 540 int ret = 0; 541 542 /* We only support ACC_TRIGGER at the moment */ 543 if (acc->access_type != ACC_TRIGGER) 544 return -EINVAL; 545 546 vm = asid_to_vm(xe, acc->asid); 547 if (IS_ERR(vm)) 548 return PTR_ERR(vm); 549 550 down_read(&vm->lock); 551 552 /* Lookup VMA */ 553 vma = get_acc_vma(vm, acc); 554 if (!vma) { 555 ret = -EINVAL; 556 goto unlock_vm; 557 } 558 559 trace_xe_vma_acc(vma); 560 561 /* Userptr or null can't be migrated, nothing to do */ 562 if (xe_vma_has_no_bo(vma)) 563 goto unlock_vm; 564 565 /* Lock VM and BOs dma-resv */ 566 xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {}); 567 drm_exec_until_all_locked(&exec) { 568 ret = xe_pf_begin(&exec, vma, IS_DGFX(vm->xe), tile->mem.vram); 569 drm_exec_retry_on_contention(&exec); 570 xe_validation_retry_on_oom(&ctx, &ret); 571 } 572 573 xe_validation_ctx_fini(&ctx); 574 unlock_vm: 575 up_read(&vm->lock); 576 xe_vm_put(vm); 577 578 return ret; 579 } 580 581 #define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) 582 583 #define ACC_MSG_LEN_DW 4 584 585 static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) 586 { 587 const struct xe_guc_acc_desc *desc; 588 bool ret = false; 589 590 spin_lock(&acc_queue->lock); 591 if (acc_queue->tail != acc_queue->head) { 592 desc = (const struct xe_guc_acc_desc *) 593 (acc_queue->data + acc_queue->tail); 594 595 acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); 596 acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | 597 FIELD_GET(ACC_SUBG_LO, desc->dw0); 598 acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); 599 acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); 600 acc->asid = FIELD_GET(ACC_ASID, desc->dw1); 601 acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); 602 acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); 603 acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, 604 desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); 605 606 acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) % 607 ACC_QUEUE_NUM_DW; 608 ret = true; 609 } 610 spin_unlock(&acc_queue->lock); 611 612 return ret; 613 } 614 615 static void acc_queue_work_func(struct work_struct *w) 616 { 617 struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); 618 struct xe_gt *gt = acc_queue->gt; 619 struct acc acc = {}; 620 unsigned long threshold; 621 int ret; 622 623 threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); 624 625 while (get_acc(acc_queue, &acc)) { 626 ret = handle_acc(gt, &acc); 627 if (unlikely(ret)) { 628 print_acc(gt, &acc); 629 xe_gt_warn(gt, "ACC: Unsuccessful %pe\n", ERR_PTR(ret)); 630 } 631 632 if (time_after(jiffies, threshold) && 633 acc_queue->tail != acc_queue->head) { 634 queue_work(gt->usm.acc_wq, w); 635 break; 636 } 637 } 638 } 639 640 static bool acc_queue_full(struct acc_queue *acc_queue) 641 { 642 lockdep_assert_held(&acc_queue->lock); 643 644 return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <= 645 ACC_MSG_LEN_DW; 646 } 647 648 int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) 649 { 650 struct xe_gt *gt = guc_to_gt(guc); 651 struct acc_queue *acc_queue; 652 u32 asid; 653 bool full; 654 655 /* 656 * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0 657 */ 658 BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW); 659 660 if (unlikely(len != ACC_MSG_LEN_DW)) 661 return -EPROTO; 662 663 asid = FIELD_GET(ACC_ASID, msg[1]); 664 acc_queue = >->usm.acc_queue[asid % NUM_ACC_QUEUE]; 665 666 spin_lock(&acc_queue->lock); 667 full = acc_queue_full(acc_queue); 668 if (!full) { 669 memcpy(acc_queue->data + acc_queue->head, msg, 670 len * sizeof(u32)); 671 acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW; 672 queue_work(gt->usm.acc_wq, &acc_queue->worker); 673 } else { 674 xe_gt_warn(gt, "ACC Queue full, dropping ACC\n"); 675 } 676 spin_unlock(&acc_queue->lock); 677 678 return full ? -ENOSPC : 0; 679 } 680