1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_gt_pagefault.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/circ_buf.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_managed.h> 13 #include <drm/ttm/ttm_execbuf_util.h> 14 15 #include "abi/guc_actions_abi.h" 16 #include "xe_bo.h" 17 #include "xe_gt.h" 18 #include "xe_gt_tlb_invalidation.h" 19 #include "xe_guc.h" 20 #include "xe_guc_ct.h" 21 #include "xe_migrate.h" 22 #include "xe_pt.h" 23 #include "xe_trace.h" 24 #include "xe_vm.h" 25 26 struct pagefault { 27 u64 page_addr; 28 u32 asid; 29 u16 pdata; 30 u8 vfid; 31 u8 access_type; 32 u8 fault_type; 33 u8 fault_level; 34 u8 engine_class; 35 u8 engine_instance; 36 u8 fault_unsuccessful; 37 bool trva_fault; 38 }; 39 40 enum access_type { 41 ACCESS_TYPE_READ = 0, 42 ACCESS_TYPE_WRITE = 1, 43 ACCESS_TYPE_ATOMIC = 2, 44 ACCESS_TYPE_RESERVED = 3, 45 }; 46 47 enum fault_type { 48 NOT_PRESENT = 0, 49 WRITE_ACCESS_VIOLATION = 1, 50 ATOMIC_ACCESS_VIOLATION = 2, 51 }; 52 53 struct acc { 54 u64 va_range_base; 55 u32 asid; 56 u32 sub_granularity; 57 u8 granularity; 58 u8 vfid; 59 u8 access_type; 60 u8 engine_class; 61 u8 engine_instance; 62 }; 63 64 static bool access_is_atomic(enum access_type access_type) 65 { 66 return access_type == ACCESS_TYPE_ATOMIC; 67 } 68 69 static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) 70 { 71 return BIT(tile->id) & vma->tile_present && 72 !(BIT(tile->id) & vma->usm.tile_invalidated); 73 } 74 75 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 76 { 77 if (page_addr > xe_vma_end(vma) - 1 || 78 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 79 return false; 80 81 return true; 82 } 83 84 static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) 85 { 86 struct xe_vma *vma = NULL; 87 88 if (vm->usm.last_fault_vma) { /* Fast lookup */ 89 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 90 vma = vm->usm.last_fault_vma; 91 } 92 if (!vma) 93 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 94 95 return vma; 96 } 97 98 static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, 99 bool atomic, unsigned int id) 100 { 101 struct xe_bo *bo = xe_vma_bo(vma); 102 struct xe_vm *vm = xe_vma_vm(vma); 103 unsigned int num_shared = 2; /* slots for bind + move */ 104 int err; 105 106 err = xe_vm_prepare_vma(exec, vma, num_shared); 107 if (err) 108 return err; 109 110 if (atomic && IS_DGFX(vm->xe)) { 111 if (xe_vma_is_userptr(vma)) { 112 err = -EACCES; 113 return err; 114 } 115 116 /* Migrate to VRAM, move should invalidate the VMA first */ 117 err = xe_bo_migrate(bo, XE_PL_VRAM0 + id); 118 if (err) 119 return err; 120 } else if (bo) { 121 /* Create backing store if needed */ 122 err = xe_bo_validate(bo, vm, true); 123 if (err) 124 return err; 125 } 126 127 return 0; 128 } 129 130 static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) 131 { 132 struct xe_device *xe = gt_to_xe(gt); 133 struct xe_tile *tile = gt_to_tile(gt); 134 struct drm_exec exec; 135 struct xe_vm *vm; 136 struct xe_vma *vma = NULL; 137 struct dma_fence *fence; 138 bool write_locked; 139 int ret = 0; 140 bool atomic; 141 142 /* SW isn't expected to handle TRTT faults */ 143 if (pf->trva_fault) 144 return -EFAULT; 145 146 /* ASID to VM */ 147 mutex_lock(&xe->usm.lock); 148 vm = xa_load(&xe->usm.asid_to_vm, pf->asid); 149 if (vm) 150 xe_vm_get(vm); 151 mutex_unlock(&xe->usm.lock); 152 if (!vm || !xe_vm_in_fault_mode(vm)) 153 return -EINVAL; 154 155 retry_userptr: 156 /* 157 * TODO: Avoid exclusive lock if VM doesn't have userptrs, or 158 * start out read-locked? 159 */ 160 down_write(&vm->lock); 161 write_locked = true; 162 vma = lookup_vma(vm, pf->page_addr); 163 if (!vma) { 164 ret = -EINVAL; 165 goto unlock_vm; 166 } 167 168 if (!xe_vma_is_userptr(vma) || 169 !xe_vma_userptr_check_repin(to_userptr_vma(vma))) { 170 downgrade_write(&vm->lock); 171 write_locked = false; 172 } 173 174 trace_xe_vma_pagefault(vma); 175 176 atomic = access_is_atomic(pf->access_type); 177 178 /* Check if VMA is valid */ 179 if (vma_is_valid(tile, vma) && !atomic) 180 goto unlock_vm; 181 182 /* TODO: Validate fault */ 183 184 if (xe_vma_is_userptr(vma) && write_locked) { 185 struct xe_userptr_vma *uvma = to_userptr_vma(vma); 186 187 spin_lock(&vm->userptr.invalidated_lock); 188 list_del_init(&uvma->userptr.invalidate_link); 189 spin_unlock(&vm->userptr.invalidated_lock); 190 191 ret = xe_vma_userptr_pin_pages(uvma); 192 if (ret) 193 goto unlock_vm; 194 195 downgrade_write(&vm->lock); 196 write_locked = false; 197 } 198 199 /* Lock VM and BOs dma-resv */ 200 drm_exec_init(&exec, 0, 0); 201 drm_exec_until_all_locked(&exec) { 202 ret = xe_pf_begin(&exec, vma, atomic, tile->id); 203 drm_exec_retry_on_contention(&exec); 204 if (ret) 205 goto unlock_dma_resv; 206 } 207 208 /* Bind VMA only to the GT that has faulted */ 209 trace_xe_vma_pf_bind(vma); 210 fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0, 211 vma->tile_present & BIT(tile->id)); 212 if (IS_ERR(fence)) { 213 ret = PTR_ERR(fence); 214 goto unlock_dma_resv; 215 } 216 217 /* 218 * XXX: Should we drop the lock before waiting? This only helps if doing 219 * GPU binds which is currently only done if we have to wait for more 220 * than 10ms on a move. 221 */ 222 dma_fence_wait(fence, false); 223 dma_fence_put(fence); 224 225 if (xe_vma_is_userptr(vma)) 226 ret = xe_vma_userptr_check_repin(to_userptr_vma(vma)); 227 vma->usm.tile_invalidated &= ~BIT(tile->id); 228 229 unlock_dma_resv: 230 drm_exec_fini(&exec); 231 unlock_vm: 232 if (!ret) 233 vm->usm.last_fault_vma = vma; 234 if (write_locked) 235 up_write(&vm->lock); 236 else 237 up_read(&vm->lock); 238 if (ret == -EAGAIN) 239 goto retry_userptr; 240 241 if (!ret) { 242 ret = xe_gt_tlb_invalidation_vma(gt, NULL, vma); 243 if (ret >= 0) 244 ret = 0; 245 } 246 xe_vm_put(vm); 247 248 return ret; 249 } 250 251 static int send_pagefault_reply(struct xe_guc *guc, 252 struct xe_guc_pagefault_reply *reply) 253 { 254 u32 action[] = { 255 XE_GUC_ACTION_PAGE_FAULT_RES_DESC, 256 reply->dw0, 257 reply->dw1, 258 }; 259 260 return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 261 } 262 263 static void print_pagefault(struct xe_device *xe, struct pagefault *pf) 264 { 265 drm_dbg(&xe->drm, "\n\tASID: %d\n" 266 "\tVFID: %d\n" 267 "\tPDATA: 0x%04x\n" 268 "\tFaulted Address: 0x%08x%08x\n" 269 "\tFaultType: %d\n" 270 "\tAccessType: %d\n" 271 "\tFaultLevel: %d\n" 272 "\tEngineClass: %d\n" 273 "\tEngineInstance: %d\n", 274 pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), 275 lower_32_bits(pf->page_addr), 276 pf->fault_type, pf->access_type, pf->fault_level, 277 pf->engine_class, pf->engine_instance); 278 } 279 280 #define PF_MSG_LEN_DW 4 281 282 static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) 283 { 284 const struct xe_guc_pagefault_desc *desc; 285 bool ret = false; 286 287 spin_lock_irq(&pf_queue->lock); 288 if (pf_queue->head != pf_queue->tail) { 289 desc = (const struct xe_guc_pagefault_desc *) 290 (pf_queue->data + pf_queue->head); 291 292 pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); 293 pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0); 294 pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); 295 pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); 296 pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << 297 PFD_PDATA_HI_SHIFT; 298 pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); 299 pf->asid = FIELD_GET(PFD_ASID, desc->dw1); 300 pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); 301 pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); 302 pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); 303 pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << 304 PFD_VIRTUAL_ADDR_HI_SHIFT; 305 pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << 306 PFD_VIRTUAL_ADDR_LO_SHIFT; 307 308 pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) % 309 PF_QUEUE_NUM_DW; 310 ret = true; 311 } 312 spin_unlock_irq(&pf_queue->lock); 313 314 return ret; 315 } 316 317 static bool pf_queue_full(struct pf_queue *pf_queue) 318 { 319 lockdep_assert_held(&pf_queue->lock); 320 321 return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <= 322 PF_MSG_LEN_DW; 323 } 324 325 int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) 326 { 327 struct xe_gt *gt = guc_to_gt(guc); 328 struct xe_device *xe = gt_to_xe(gt); 329 struct pf_queue *pf_queue; 330 unsigned long flags; 331 u32 asid; 332 bool full; 333 334 if (unlikely(len != PF_MSG_LEN_DW)) 335 return -EPROTO; 336 337 asid = FIELD_GET(PFD_ASID, msg[1]); 338 pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE); 339 340 spin_lock_irqsave(&pf_queue->lock, flags); 341 full = pf_queue_full(pf_queue); 342 if (!full) { 343 memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32)); 344 pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW; 345 queue_work(gt->usm.pf_wq, &pf_queue->worker); 346 } else { 347 drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); 348 } 349 spin_unlock_irqrestore(&pf_queue->lock, flags); 350 351 return full ? -ENOSPC : 0; 352 } 353 354 #define USM_QUEUE_MAX_RUNTIME_MS 20 355 356 static void pf_queue_work_func(struct work_struct *w) 357 { 358 struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); 359 struct xe_gt *gt = pf_queue->gt; 360 struct xe_device *xe = gt_to_xe(gt); 361 struct xe_guc_pagefault_reply reply = {}; 362 struct pagefault pf = {}; 363 unsigned long threshold; 364 int ret; 365 366 threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); 367 368 while (get_pagefault(pf_queue, &pf)) { 369 ret = handle_pagefault(gt, &pf); 370 if (unlikely(ret)) { 371 print_pagefault(xe, &pf); 372 pf.fault_unsuccessful = 1; 373 drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret); 374 } 375 376 reply.dw0 = FIELD_PREP(PFR_VALID, 1) | 377 FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | 378 FIELD_PREP(PFR_REPLY, PFR_ACCESS) | 379 FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | 380 FIELD_PREP(PFR_ASID, pf.asid); 381 382 reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | 383 FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | 384 FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | 385 FIELD_PREP(PFR_PDATA, pf.pdata); 386 387 send_pagefault_reply(>->uc.guc, &reply); 388 389 if (time_after(jiffies, threshold) && 390 pf_queue->head != pf_queue->tail) { 391 queue_work(gt->usm.pf_wq, w); 392 break; 393 } 394 } 395 } 396 397 static void acc_queue_work_func(struct work_struct *w); 398 399 int xe_gt_pagefault_init(struct xe_gt *gt) 400 { 401 struct xe_device *xe = gt_to_xe(gt); 402 int i; 403 404 if (!xe->info.has_usm) 405 return 0; 406 407 for (i = 0; i < NUM_PF_QUEUE; ++i) { 408 gt->usm.pf_queue[i].gt = gt; 409 spin_lock_init(>->usm.pf_queue[i].lock); 410 INIT_WORK(>->usm.pf_queue[i].worker, pf_queue_work_func); 411 } 412 for (i = 0; i < NUM_ACC_QUEUE; ++i) { 413 gt->usm.acc_queue[i].gt = gt; 414 spin_lock_init(>->usm.acc_queue[i].lock); 415 INIT_WORK(>->usm.acc_queue[i].worker, acc_queue_work_func); 416 } 417 418 gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", 419 WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); 420 if (!gt->usm.pf_wq) 421 return -ENOMEM; 422 423 gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", 424 WQ_UNBOUND | WQ_HIGHPRI, 425 NUM_ACC_QUEUE); 426 if (!gt->usm.acc_wq) 427 return -ENOMEM; 428 429 return 0; 430 } 431 432 void xe_gt_pagefault_reset(struct xe_gt *gt) 433 { 434 struct xe_device *xe = gt_to_xe(gt); 435 int i; 436 437 if (!xe->info.has_usm) 438 return; 439 440 for (i = 0; i < NUM_PF_QUEUE; ++i) { 441 spin_lock_irq(>->usm.pf_queue[i].lock); 442 gt->usm.pf_queue[i].head = 0; 443 gt->usm.pf_queue[i].tail = 0; 444 spin_unlock_irq(>->usm.pf_queue[i].lock); 445 } 446 447 for (i = 0; i < NUM_ACC_QUEUE; ++i) { 448 spin_lock(>->usm.acc_queue[i].lock); 449 gt->usm.acc_queue[i].head = 0; 450 gt->usm.acc_queue[i].tail = 0; 451 spin_unlock(>->usm.acc_queue[i].lock); 452 } 453 } 454 455 static int granularity_in_byte(int val) 456 { 457 switch (val) { 458 case 0: 459 return SZ_128K; 460 case 1: 461 return SZ_2M; 462 case 2: 463 return SZ_16M; 464 case 3: 465 return SZ_64M; 466 default: 467 return 0; 468 } 469 } 470 471 static int sub_granularity_in_byte(int val) 472 { 473 return (granularity_in_byte(val) / 32); 474 } 475 476 static void print_acc(struct xe_device *xe, struct acc *acc) 477 { 478 drm_warn(&xe->drm, "Access counter request:\n" 479 "\tType: %s\n" 480 "\tASID: %d\n" 481 "\tVFID: %d\n" 482 "\tEngine: %d:%d\n" 483 "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" 484 "\tSub_Granularity Vector: 0x%08x\n" 485 "\tVA Range base: 0x%016llx\n", 486 acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", 487 acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, 488 granularity_in_byte(acc->granularity) / SZ_1K, 489 sub_granularity_in_byte(acc->granularity) / SZ_1K, 490 acc->sub_granularity, acc->va_range_base); 491 } 492 493 static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) 494 { 495 u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * 496 sub_granularity_in_byte(acc->granularity); 497 498 return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K); 499 } 500 501 static int handle_acc(struct xe_gt *gt, struct acc *acc) 502 { 503 struct xe_device *xe = gt_to_xe(gt); 504 struct xe_tile *tile = gt_to_tile(gt); 505 struct drm_exec exec; 506 struct xe_vm *vm; 507 struct xe_vma *vma; 508 int ret = 0; 509 510 /* We only support ACC_TRIGGER at the moment */ 511 if (acc->access_type != ACC_TRIGGER) 512 return -EINVAL; 513 514 /* ASID to VM */ 515 mutex_lock(&xe->usm.lock); 516 vm = xa_load(&xe->usm.asid_to_vm, acc->asid); 517 if (vm) 518 xe_vm_get(vm); 519 mutex_unlock(&xe->usm.lock); 520 if (!vm || !xe_vm_in_fault_mode(vm)) 521 return -EINVAL; 522 523 down_read(&vm->lock); 524 525 /* Lookup VMA */ 526 vma = get_acc_vma(vm, acc); 527 if (!vma) { 528 ret = -EINVAL; 529 goto unlock_vm; 530 } 531 532 trace_xe_vma_acc(vma); 533 534 /* Userptr or null can't be migrated, nothing to do */ 535 if (xe_vma_has_no_bo(vma)) 536 goto unlock_vm; 537 538 /* Lock VM and BOs dma-resv */ 539 drm_exec_init(&exec, 0, 0); 540 drm_exec_until_all_locked(&exec) { 541 ret = xe_pf_begin(&exec, vma, true, tile->id); 542 drm_exec_retry_on_contention(&exec); 543 if (ret) 544 break; 545 } 546 547 drm_exec_fini(&exec); 548 unlock_vm: 549 up_read(&vm->lock); 550 xe_vm_put(vm); 551 552 return ret; 553 } 554 555 #define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) 556 557 #define ACC_MSG_LEN_DW 4 558 559 static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) 560 { 561 const struct xe_guc_acc_desc *desc; 562 bool ret = false; 563 564 spin_lock(&acc_queue->lock); 565 if (acc_queue->head != acc_queue->tail) { 566 desc = (const struct xe_guc_acc_desc *) 567 (acc_queue->data + acc_queue->head); 568 569 acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); 570 acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | 571 FIELD_GET(ACC_SUBG_LO, desc->dw0); 572 acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); 573 acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); 574 acc->asid = FIELD_GET(ACC_ASID, desc->dw1); 575 acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); 576 acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); 577 acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, 578 desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); 579 580 acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) % 581 ACC_QUEUE_NUM_DW; 582 ret = true; 583 } 584 spin_unlock(&acc_queue->lock); 585 586 return ret; 587 } 588 589 static void acc_queue_work_func(struct work_struct *w) 590 { 591 struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); 592 struct xe_gt *gt = acc_queue->gt; 593 struct xe_device *xe = gt_to_xe(gt); 594 struct acc acc = {}; 595 unsigned long threshold; 596 int ret; 597 598 threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); 599 600 while (get_acc(acc_queue, &acc)) { 601 ret = handle_acc(gt, &acc); 602 if (unlikely(ret)) { 603 print_acc(xe, &acc); 604 drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); 605 } 606 607 if (time_after(jiffies, threshold) && 608 acc_queue->head != acc_queue->tail) { 609 queue_work(gt->usm.acc_wq, w); 610 break; 611 } 612 } 613 } 614 615 static bool acc_queue_full(struct acc_queue *acc_queue) 616 { 617 lockdep_assert_held(&acc_queue->lock); 618 619 return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <= 620 ACC_MSG_LEN_DW; 621 } 622 623 int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) 624 { 625 struct xe_gt *gt = guc_to_gt(guc); 626 struct acc_queue *acc_queue; 627 u32 asid; 628 bool full; 629 630 if (unlikely(len != ACC_MSG_LEN_DW)) 631 return -EPROTO; 632 633 asid = FIELD_GET(ACC_ASID, msg[1]); 634 acc_queue = >->usm.acc_queue[asid % NUM_ACC_QUEUE]; 635 636 spin_lock(&acc_queue->lock); 637 full = acc_queue_full(acc_queue); 638 if (!full) { 639 memcpy(acc_queue->data + acc_queue->tail, msg, 640 len * sizeof(u32)); 641 acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW; 642 queue_work(gt->usm.acc_wq, &acc_queue->worker); 643 } else { 644 drm_warn(>_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); 645 } 646 spin_unlock(&acc_queue->lock); 647 648 return full ? -ENOSPC : 0; 649 } 650