1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "xe_gt_pagefault.h" 7 8 #include <linux/bitfield.h> 9 #include <linux/circ_buf.h> 10 11 #include <drm/drm_exec.h> 12 #include <drm/drm_managed.h> 13 #include <drm/ttm/ttm_execbuf_util.h> 14 15 #include "abi/guc_actions_abi.h" 16 #include "xe_bo.h" 17 #include "xe_gt.h" 18 #include "xe_gt_tlb_invalidation.h" 19 #include "xe_guc.h" 20 #include "xe_guc_ct.h" 21 #include "xe_migrate.h" 22 #include "xe_pt.h" 23 #include "xe_trace.h" 24 #include "xe_vm.h" 25 26 struct pagefault { 27 u64 page_addr; 28 u32 asid; 29 u16 pdata; 30 u8 vfid; 31 u8 access_type; 32 u8 fault_type; 33 u8 fault_level; 34 u8 engine_class; 35 u8 engine_instance; 36 u8 fault_unsuccessful; 37 bool trva_fault; 38 }; 39 40 enum access_type { 41 ACCESS_TYPE_READ = 0, 42 ACCESS_TYPE_WRITE = 1, 43 ACCESS_TYPE_ATOMIC = 2, 44 ACCESS_TYPE_RESERVED = 3, 45 }; 46 47 enum fault_type { 48 NOT_PRESENT = 0, 49 WRITE_ACCESS_VIOLATION = 1, 50 ATOMIC_ACCESS_VIOLATION = 2, 51 }; 52 53 struct acc { 54 u64 va_range_base; 55 u32 asid; 56 u32 sub_granularity; 57 u8 granularity; 58 u8 vfid; 59 u8 access_type; 60 u8 engine_class; 61 u8 engine_instance; 62 }; 63 64 static bool access_is_atomic(enum access_type access_type) 65 { 66 return access_type == ACCESS_TYPE_ATOMIC; 67 } 68 69 static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma) 70 { 71 return BIT(tile->id) & vma->tile_present && 72 !(BIT(tile->id) & vma->usm.tile_invalidated); 73 } 74 75 static bool vma_matches(struct xe_vma *vma, u64 page_addr) 76 { 77 if (page_addr > xe_vma_end(vma) - 1 || 78 page_addr + SZ_4K - 1 < xe_vma_start(vma)) 79 return false; 80 81 return true; 82 } 83 84 static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr) 85 { 86 struct xe_vma *vma = NULL; 87 88 if (vm->usm.last_fault_vma) { /* Fast lookup */ 89 if (vma_matches(vm->usm.last_fault_vma, page_addr)) 90 vma = vm->usm.last_fault_vma; 91 } 92 if (!vma) 93 vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K); 94 95 return vma; 96 } 97 98 static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma, 99 bool atomic, unsigned int id) 100 { 101 struct xe_bo *bo = xe_vma_bo(vma); 102 struct xe_vm *vm = xe_vma_vm(vma); 103 unsigned int num_shared = 2; /* slots for bind + move */ 104 int err; 105 106 err = xe_vm_prepare_vma(exec, vma, num_shared); 107 if (err) 108 return err; 109 110 if (atomic && IS_DGFX(vm->xe)) { 111 if (xe_vma_is_userptr(vma)) { 112 err = -EACCES; 113 return err; 114 } 115 116 /* Migrate to VRAM, move should invalidate the VMA first */ 117 err = xe_bo_migrate(bo, XE_PL_VRAM0 + id); 118 if (err) 119 return err; 120 } else if (bo) { 121 /* Create backing store if needed */ 122 err = xe_bo_validate(bo, vm, true); 123 if (err) 124 return err; 125 } 126 127 return 0; 128 } 129 130 static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf) 131 { 132 struct xe_device *xe = gt_to_xe(gt); 133 struct xe_tile *tile = gt_to_tile(gt); 134 struct drm_exec exec; 135 struct xe_vm *vm; 136 struct xe_vma *vma = NULL; 137 struct dma_fence *fence; 138 bool write_locked; 139 int ret = 0; 140 bool atomic; 141 142 /* SW isn't expected to handle TRTT faults */ 143 if (pf->trva_fault) 144 return -EFAULT; 145 146 /* ASID to VM */ 147 mutex_lock(&xe->usm.lock); 148 vm = xa_load(&xe->usm.asid_to_vm, pf->asid); 149 if (vm) 150 xe_vm_get(vm); 151 mutex_unlock(&xe->usm.lock); 152 if (!vm || !xe_vm_in_fault_mode(vm)) 153 return -EINVAL; 154 155 retry_userptr: 156 /* 157 * TODO: Avoid exclusive lock if VM doesn't have userptrs, or 158 * start out read-locked? 159 */ 160 down_write(&vm->lock); 161 write_locked = true; 162 vma = lookup_vma(vm, pf->page_addr); 163 if (!vma) { 164 ret = -EINVAL; 165 goto unlock_vm; 166 } 167 168 if (!xe_vma_is_userptr(vma) || !xe_vma_userptr_check_repin(vma)) { 169 downgrade_write(&vm->lock); 170 write_locked = false; 171 } 172 173 trace_xe_vma_pagefault(vma); 174 175 atomic = access_is_atomic(pf->access_type); 176 177 /* Check if VMA is valid */ 178 if (vma_is_valid(tile, vma) && !atomic) 179 goto unlock_vm; 180 181 /* TODO: Validate fault */ 182 183 if (xe_vma_is_userptr(vma) && write_locked) { 184 spin_lock(&vm->userptr.invalidated_lock); 185 list_del_init(&vma->userptr.invalidate_link); 186 spin_unlock(&vm->userptr.invalidated_lock); 187 188 ret = xe_vma_userptr_pin_pages(vma); 189 if (ret) 190 goto unlock_vm; 191 192 downgrade_write(&vm->lock); 193 write_locked = false; 194 } 195 196 /* Lock VM and BOs dma-resv */ 197 drm_exec_init(&exec, 0, 0); 198 drm_exec_until_all_locked(&exec) { 199 ret = xe_pf_begin(&exec, vma, atomic, tile->id); 200 drm_exec_retry_on_contention(&exec); 201 if (ret) 202 goto unlock_dma_resv; 203 } 204 205 /* Bind VMA only to the GT that has faulted */ 206 trace_xe_vma_pf_bind(vma); 207 fence = __xe_pt_bind_vma(tile, vma, xe_tile_migrate_engine(tile), NULL, 0, 208 vma->tile_present & BIT(tile->id)); 209 if (IS_ERR(fence)) { 210 ret = PTR_ERR(fence); 211 goto unlock_dma_resv; 212 } 213 214 /* 215 * XXX: Should we drop the lock before waiting? This only helps if doing 216 * GPU binds which is currently only done if we have to wait for more 217 * than 10ms on a move. 218 */ 219 dma_fence_wait(fence, false); 220 dma_fence_put(fence); 221 222 if (xe_vma_is_userptr(vma)) 223 ret = xe_vma_userptr_check_repin(vma); 224 vma->usm.tile_invalidated &= ~BIT(tile->id); 225 226 unlock_dma_resv: 227 drm_exec_fini(&exec); 228 unlock_vm: 229 if (!ret) 230 vm->usm.last_fault_vma = vma; 231 if (write_locked) 232 up_write(&vm->lock); 233 else 234 up_read(&vm->lock); 235 if (ret == -EAGAIN) 236 goto retry_userptr; 237 238 if (!ret) { 239 ret = xe_gt_tlb_invalidation_vma(gt, NULL, vma); 240 if (ret >= 0) 241 ret = 0; 242 } 243 xe_vm_put(vm); 244 245 return ret; 246 } 247 248 static int send_pagefault_reply(struct xe_guc *guc, 249 struct xe_guc_pagefault_reply *reply) 250 { 251 u32 action[] = { 252 XE_GUC_ACTION_PAGE_FAULT_RES_DESC, 253 reply->dw0, 254 reply->dw1, 255 }; 256 257 return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); 258 } 259 260 static void print_pagefault(struct xe_device *xe, struct pagefault *pf) 261 { 262 drm_dbg(&xe->drm, "\n\tASID: %d\n" 263 "\tVFID: %d\n" 264 "\tPDATA: 0x%04x\n" 265 "\tFaulted Address: 0x%08x%08x\n" 266 "\tFaultType: %d\n" 267 "\tAccessType: %d\n" 268 "\tFaultLevel: %d\n" 269 "\tEngineClass: %d\n" 270 "\tEngineInstance: %d\n", 271 pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr), 272 lower_32_bits(pf->page_addr), 273 pf->fault_type, pf->access_type, pf->fault_level, 274 pf->engine_class, pf->engine_instance); 275 } 276 277 #define PF_MSG_LEN_DW 4 278 279 static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf) 280 { 281 const struct xe_guc_pagefault_desc *desc; 282 bool ret = false; 283 284 spin_lock_irq(&pf_queue->lock); 285 if (pf_queue->head != pf_queue->tail) { 286 desc = (const struct xe_guc_pagefault_desc *) 287 (pf_queue->data + pf_queue->head); 288 289 pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0); 290 pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0); 291 pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0); 292 pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0); 293 pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) << 294 PFD_PDATA_HI_SHIFT; 295 pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0); 296 pf->asid = FIELD_GET(PFD_ASID, desc->dw1); 297 pf->vfid = FIELD_GET(PFD_VFID, desc->dw2); 298 pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2); 299 pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2); 300 pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) << 301 PFD_VIRTUAL_ADDR_HI_SHIFT; 302 pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) << 303 PFD_VIRTUAL_ADDR_LO_SHIFT; 304 305 pf_queue->head = (pf_queue->head + PF_MSG_LEN_DW) % 306 PF_QUEUE_NUM_DW; 307 ret = true; 308 } 309 spin_unlock_irq(&pf_queue->lock); 310 311 return ret; 312 } 313 314 static bool pf_queue_full(struct pf_queue *pf_queue) 315 { 316 lockdep_assert_held(&pf_queue->lock); 317 318 return CIRC_SPACE(pf_queue->tail, pf_queue->head, PF_QUEUE_NUM_DW) <= 319 PF_MSG_LEN_DW; 320 } 321 322 int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len) 323 { 324 struct xe_gt *gt = guc_to_gt(guc); 325 struct xe_device *xe = gt_to_xe(gt); 326 struct pf_queue *pf_queue; 327 unsigned long flags; 328 u32 asid; 329 bool full; 330 331 if (unlikely(len != PF_MSG_LEN_DW)) 332 return -EPROTO; 333 334 asid = FIELD_GET(PFD_ASID, msg[1]); 335 pf_queue = >->usm.pf_queue[asid % NUM_PF_QUEUE]; 336 337 spin_lock_irqsave(&pf_queue->lock, flags); 338 full = pf_queue_full(pf_queue); 339 if (!full) { 340 memcpy(pf_queue->data + pf_queue->tail, msg, len * sizeof(u32)); 341 pf_queue->tail = (pf_queue->tail + len) % PF_QUEUE_NUM_DW; 342 queue_work(gt->usm.pf_wq, &pf_queue->worker); 343 } else { 344 drm_warn(&xe->drm, "PF Queue full, shouldn't be possible"); 345 } 346 spin_unlock_irqrestore(&pf_queue->lock, flags); 347 348 return full ? -ENOSPC : 0; 349 } 350 351 #define USM_QUEUE_MAX_RUNTIME_MS 20 352 353 static void pf_queue_work_func(struct work_struct *w) 354 { 355 struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker); 356 struct xe_gt *gt = pf_queue->gt; 357 struct xe_device *xe = gt_to_xe(gt); 358 struct xe_guc_pagefault_reply reply = {}; 359 struct pagefault pf = {}; 360 unsigned long threshold; 361 int ret; 362 363 threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); 364 365 while (get_pagefault(pf_queue, &pf)) { 366 ret = handle_pagefault(gt, &pf); 367 if (unlikely(ret)) { 368 print_pagefault(xe, &pf); 369 pf.fault_unsuccessful = 1; 370 drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret); 371 } 372 373 reply.dw0 = FIELD_PREP(PFR_VALID, 1) | 374 FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) | 375 FIELD_PREP(PFR_REPLY, PFR_ACCESS) | 376 FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) | 377 FIELD_PREP(PFR_ASID, pf.asid); 378 379 reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) | 380 FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) | 381 FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) | 382 FIELD_PREP(PFR_PDATA, pf.pdata); 383 384 send_pagefault_reply(>->uc.guc, &reply); 385 386 if (time_after(jiffies, threshold) && 387 pf_queue->head != pf_queue->tail) { 388 queue_work(gt->usm.pf_wq, w); 389 break; 390 } 391 } 392 } 393 394 static void acc_queue_work_func(struct work_struct *w); 395 396 int xe_gt_pagefault_init(struct xe_gt *gt) 397 { 398 struct xe_device *xe = gt_to_xe(gt); 399 int i; 400 401 if (!xe->info.has_usm) 402 return 0; 403 404 for (i = 0; i < NUM_PF_QUEUE; ++i) { 405 gt->usm.pf_queue[i].gt = gt; 406 spin_lock_init(>->usm.pf_queue[i].lock); 407 INIT_WORK(>->usm.pf_queue[i].worker, pf_queue_work_func); 408 } 409 for (i = 0; i < NUM_ACC_QUEUE; ++i) { 410 gt->usm.acc_queue[i].gt = gt; 411 spin_lock_init(>->usm.acc_queue[i].lock); 412 INIT_WORK(>->usm.acc_queue[i].worker, acc_queue_work_func); 413 } 414 415 gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue", 416 WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE); 417 if (!gt->usm.pf_wq) 418 return -ENOMEM; 419 420 gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", 421 WQ_UNBOUND | WQ_HIGHPRI, 422 NUM_ACC_QUEUE); 423 if (!gt->usm.acc_wq) 424 return -ENOMEM; 425 426 return 0; 427 } 428 429 void xe_gt_pagefault_reset(struct xe_gt *gt) 430 { 431 struct xe_device *xe = gt_to_xe(gt); 432 int i; 433 434 if (!xe->info.has_usm) 435 return; 436 437 for (i = 0; i < NUM_PF_QUEUE; ++i) { 438 spin_lock_irq(>->usm.pf_queue[i].lock); 439 gt->usm.pf_queue[i].head = 0; 440 gt->usm.pf_queue[i].tail = 0; 441 spin_unlock_irq(>->usm.pf_queue[i].lock); 442 } 443 444 for (i = 0; i < NUM_ACC_QUEUE; ++i) { 445 spin_lock(>->usm.acc_queue[i].lock); 446 gt->usm.acc_queue[i].head = 0; 447 gt->usm.acc_queue[i].tail = 0; 448 spin_unlock(>->usm.acc_queue[i].lock); 449 } 450 } 451 452 static int granularity_in_byte(int val) 453 { 454 switch (val) { 455 case 0: 456 return SZ_128K; 457 case 1: 458 return SZ_2M; 459 case 2: 460 return SZ_16M; 461 case 3: 462 return SZ_64M; 463 default: 464 return 0; 465 } 466 } 467 468 static int sub_granularity_in_byte(int val) 469 { 470 return (granularity_in_byte(val) / 32); 471 } 472 473 static void print_acc(struct xe_device *xe, struct acc *acc) 474 { 475 drm_warn(&xe->drm, "Access counter request:\n" 476 "\tType: %s\n" 477 "\tASID: %d\n" 478 "\tVFID: %d\n" 479 "\tEngine: %d:%d\n" 480 "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n" 481 "\tSub_Granularity Vector: 0x%08x\n" 482 "\tVA Range base: 0x%016llx\n", 483 acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL", 484 acc->asid, acc->vfid, acc->engine_class, acc->engine_instance, 485 granularity_in_byte(acc->granularity) / SZ_1K, 486 sub_granularity_in_byte(acc->granularity) / SZ_1K, 487 acc->sub_granularity, acc->va_range_base); 488 } 489 490 static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc) 491 { 492 u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) * 493 sub_granularity_in_byte(acc->granularity); 494 495 return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K); 496 } 497 498 static int handle_acc(struct xe_gt *gt, struct acc *acc) 499 { 500 struct xe_device *xe = gt_to_xe(gt); 501 struct xe_tile *tile = gt_to_tile(gt); 502 struct drm_exec exec; 503 struct xe_vm *vm; 504 struct xe_vma *vma; 505 int ret = 0; 506 507 /* We only support ACC_TRIGGER at the moment */ 508 if (acc->access_type != ACC_TRIGGER) 509 return -EINVAL; 510 511 /* ASID to VM */ 512 mutex_lock(&xe->usm.lock); 513 vm = xa_load(&xe->usm.asid_to_vm, acc->asid); 514 if (vm) 515 xe_vm_get(vm); 516 mutex_unlock(&xe->usm.lock); 517 if (!vm || !xe_vm_in_fault_mode(vm)) 518 return -EINVAL; 519 520 down_read(&vm->lock); 521 522 /* Lookup VMA */ 523 vma = get_acc_vma(vm, acc); 524 if (!vma) { 525 ret = -EINVAL; 526 goto unlock_vm; 527 } 528 529 trace_xe_vma_acc(vma); 530 531 /* Userptr or null can't be migrated, nothing to do */ 532 if (xe_vma_has_no_bo(vma)) 533 goto unlock_vm; 534 535 /* Lock VM and BOs dma-resv */ 536 drm_exec_init(&exec, 0, 0); 537 drm_exec_until_all_locked(&exec) { 538 ret = xe_pf_begin(&exec, vma, true, tile->id); 539 drm_exec_retry_on_contention(&exec); 540 if (ret) 541 break; 542 } 543 544 drm_exec_fini(&exec); 545 unlock_vm: 546 up_read(&vm->lock); 547 xe_vm_put(vm); 548 549 return ret; 550 } 551 552 #define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__)) 553 554 #define ACC_MSG_LEN_DW 4 555 556 static bool get_acc(struct acc_queue *acc_queue, struct acc *acc) 557 { 558 const struct xe_guc_acc_desc *desc; 559 bool ret = false; 560 561 spin_lock(&acc_queue->lock); 562 if (acc_queue->head != acc_queue->tail) { 563 desc = (const struct xe_guc_acc_desc *) 564 (acc_queue->data + acc_queue->head); 565 566 acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2); 567 acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 | 568 FIELD_GET(ACC_SUBG_LO, desc->dw0); 569 acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1); 570 acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1); 571 acc->asid = FIELD_GET(ACC_ASID, desc->dw1); 572 acc->vfid = FIELD_GET(ACC_VFID, desc->dw2); 573 acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0); 574 acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI, 575 desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO); 576 577 acc_queue->head = (acc_queue->head + ACC_MSG_LEN_DW) % 578 ACC_QUEUE_NUM_DW; 579 ret = true; 580 } 581 spin_unlock(&acc_queue->lock); 582 583 return ret; 584 } 585 586 static void acc_queue_work_func(struct work_struct *w) 587 { 588 struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker); 589 struct xe_gt *gt = acc_queue->gt; 590 struct xe_device *xe = gt_to_xe(gt); 591 struct acc acc = {}; 592 unsigned long threshold; 593 int ret; 594 595 threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS); 596 597 while (get_acc(acc_queue, &acc)) { 598 ret = handle_acc(gt, &acc); 599 if (unlikely(ret)) { 600 print_acc(xe, &acc); 601 drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret); 602 } 603 604 if (time_after(jiffies, threshold) && 605 acc_queue->head != acc_queue->tail) { 606 queue_work(gt->usm.acc_wq, w); 607 break; 608 } 609 } 610 } 611 612 static bool acc_queue_full(struct acc_queue *acc_queue) 613 { 614 lockdep_assert_held(&acc_queue->lock); 615 616 return CIRC_SPACE(acc_queue->tail, acc_queue->head, ACC_QUEUE_NUM_DW) <= 617 ACC_MSG_LEN_DW; 618 } 619 620 int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len) 621 { 622 struct xe_gt *gt = guc_to_gt(guc); 623 struct acc_queue *acc_queue; 624 u32 asid; 625 bool full; 626 627 if (unlikely(len != ACC_MSG_LEN_DW)) 628 return -EPROTO; 629 630 asid = FIELD_GET(ACC_ASID, msg[1]); 631 acc_queue = >->usm.acc_queue[asid % NUM_ACC_QUEUE]; 632 633 spin_lock(&acc_queue->lock); 634 full = acc_queue_full(acc_queue); 635 if (!full) { 636 memcpy(acc_queue->data + acc_queue->tail, msg, 637 len * sizeof(u32)); 638 acc_queue->tail = (acc_queue->tail + len) % ACC_QUEUE_NUM_DW; 639 queue_work(gt->usm.acc_wq, &acc_queue->worker); 640 } else { 641 drm_warn(>_to_xe(gt)->drm, "ACC Queue full, dropping ACC"); 642 } 643 spin_unlock(&acc_queue->lock); 644 645 return full ? -ENOSPC : 0; 646 } 647