1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include <linux/device.h> 25 #include <linux/err.h> 26 #include <linux/fs.h> 27 #include <linux/file.h> 28 #include <linux/overflow.h> 29 #include <linux/sched.h> 30 #include <linux/slab.h> 31 #include <linux/uaccess.h> 32 #include <linux/compat.h> 33 #include <uapi/linux/kfd_ioctl.h> 34 #include <linux/time.h> 35 #include <linux/mm.h> 36 #include <linux/mman.h> 37 #include <linux/ptrace.h> 38 #include <linux/dma-buf.h> 39 #include <linux/processor.h> 40 #include "kfd_priv.h" 41 #include "kfd_device_queue_manager.h" 42 #include "kfd_svm.h" 43 #include "amdgpu_amdkfd.h" 44 #include "kfd_smi_events.h" 45 #include "amdgpu_dma_buf.h" 46 #include "kfd_debug.h" 47 48 static long kfd_ioctl(struct file *, unsigned int, unsigned long); 49 static int kfd_open(struct inode *, struct file *); 50 static int kfd_release(struct inode *, struct file *); 51 static int kfd_mmap(struct file *, struct vm_area_struct *); 52 53 static const char kfd_dev_name[] = "kfd"; 54 55 static const struct file_operations kfd_fops = { 56 .owner = THIS_MODULE, 57 .unlocked_ioctl = kfd_ioctl, 58 .compat_ioctl = compat_ptr_ioctl, 59 .open = kfd_open, 60 .release = kfd_release, 61 .mmap = kfd_mmap, 62 }; 63 64 static int kfd_char_dev_major = -1; 65 struct device *kfd_device; 66 static const struct class kfd_class = { 67 .name = kfd_dev_name, 68 }; 69 70 static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id) 71 { 72 struct kfd_process_device *pdd; 73 74 mutex_lock(&p->mutex); 75 pdd = kfd_process_device_data_by_id(p, gpu_id); 76 77 if (pdd) 78 return pdd; 79 80 mutex_unlock(&p->mutex); 81 return NULL; 82 } 83 84 static inline void kfd_unlock_pdd(struct kfd_process_device *pdd) 85 { 86 mutex_unlock(&pdd->process->mutex); 87 } 88 89 int kfd_chardev_init(void) 90 { 91 int err = 0; 92 93 kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops); 94 err = kfd_char_dev_major; 95 if (err < 0) 96 goto err_register_chrdev; 97 98 err = class_register(&kfd_class); 99 if (err) 100 goto err_class_create; 101 102 kfd_device = device_create(&kfd_class, NULL, 103 MKDEV(kfd_char_dev_major, 0), 104 NULL, kfd_dev_name); 105 err = PTR_ERR(kfd_device); 106 if (IS_ERR(kfd_device)) 107 goto err_device_create; 108 109 return 0; 110 111 err_device_create: 112 class_unregister(&kfd_class); 113 err_class_create: 114 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 115 err_register_chrdev: 116 return err; 117 } 118 119 void kfd_chardev_exit(void) 120 { 121 device_destroy(&kfd_class, MKDEV(kfd_char_dev_major, 0)); 122 class_unregister(&kfd_class); 123 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 124 kfd_device = NULL; 125 } 126 127 128 static int kfd_open(struct inode *inode, struct file *filep) 129 { 130 struct kfd_process *process; 131 bool is_32bit_user_mode; 132 133 if (iminor(inode) != 0) 134 return -ENODEV; 135 136 is_32bit_user_mode = in_compat_syscall(); 137 138 if (is_32bit_user_mode) { 139 dev_warn(kfd_device, 140 "Process %d (32-bit) failed to open /dev/kfd\n" 141 "32-bit processes are not supported by amdkfd\n", 142 current->pid); 143 return -EPERM; 144 } 145 146 process = kfd_create_process(current); 147 if (IS_ERR(process)) 148 return PTR_ERR(process); 149 150 if (kfd_process_init_cwsr_apu(process, filep)) { 151 kfd_unref_process(process); 152 return -EFAULT; 153 } 154 155 /* filep now owns the reference returned by kfd_create_process */ 156 filep->private_data = process; 157 158 dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n", 159 process->lead_thread->pid, process->is_32bit_user_mode); 160 161 return 0; 162 } 163 164 static int kfd_release(struct inode *inode, struct file *filep) 165 { 166 struct kfd_process *process = filep->private_data; 167 168 if (!process) 169 return 0; 170 171 if (process->context_id != KFD_CONTEXT_ID_PRIMARY) 172 kfd_process_notifier_release_internal(process); 173 174 kfd_unref_process(process); 175 176 return 0; 177 } 178 179 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, 180 void *data) 181 { 182 struct kfd_ioctl_get_version_args *args = data; 183 184 args->major_version = KFD_IOCTL_MAJOR_VERSION; 185 args->minor_version = KFD_IOCTL_MINOR_VERSION; 186 187 return 0; 188 } 189 190 static int set_queue_properties_from_user(struct queue_properties *q_properties, 191 struct kfd_ioctl_create_queue_args *args) 192 { 193 /* 194 * Repurpose queue percentage to accommodate new features: 195 * bit 0-7: queue percentage 196 * bit 8-15: pm4_target_xcc 197 */ 198 if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) { 199 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 200 return -EINVAL; 201 } 202 203 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 204 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 205 return -EINVAL; 206 } 207 208 if ((args->ring_base_address) && 209 (!access_ok((const void __user *) args->ring_base_address, 210 sizeof(uint64_t)))) { 211 pr_err("Can't access ring base address\n"); 212 return -EFAULT; 213 } 214 215 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 216 pr_err("Ring size must be a power of 2 or 0\n"); 217 return -EINVAL; 218 } 219 220 if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) { 221 args->ring_size = KFD_MIN_QUEUE_RING_SIZE; 222 pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); 223 } 224 225 if ((args->metadata_ring_size != 0) && !is_power_of_2(args->metadata_ring_size)) { 226 pr_err("Metadata ring size must be a power of 2 or 0\n"); 227 return -EINVAL; 228 } 229 230 if (!access_ok((const void __user *) args->read_pointer_address, 231 sizeof(uint32_t))) { 232 pr_err("Can't access read pointer\n"); 233 return -EFAULT; 234 } 235 236 if (!access_ok((const void __user *) args->write_pointer_address, 237 sizeof(uint32_t))) { 238 pr_err("Can't access write pointer\n"); 239 return -EFAULT; 240 } 241 242 if (args->eop_buffer_address && 243 !access_ok((const void __user *) args->eop_buffer_address, 244 sizeof(uint32_t))) { 245 pr_debug("Can't access eop buffer"); 246 return -EFAULT; 247 } 248 249 if (args->ctx_save_restore_address && 250 !access_ok((const void __user *) args->ctx_save_restore_address, 251 sizeof(uint32_t))) { 252 pr_debug("Can't access ctx save restore buffer"); 253 return -EFAULT; 254 } 255 256 q_properties->is_interop = false; 257 q_properties->is_gws = false; 258 q_properties->queue_percent = args->queue_percentage & 0xFF; 259 /* bit 8-15 are repurposed to be PM4 target XCC */ 260 q_properties->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF; 261 q_properties->priority = args->queue_priority; 262 q_properties->queue_address = args->ring_base_address; 263 q_properties->queue_size = args->ring_size; 264 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 265 q_properties->metadata_queue_size = args->metadata_ring_size; 266 267 q_properties->read_ptr = (void __user *)args->read_pointer_address; 268 q_properties->write_ptr = (void __user *)args->write_pointer_address; 269 q_properties->eop_ring_buffer_address = args->eop_buffer_address; 270 q_properties->eop_ring_buffer_size = args->eop_buffer_size; 271 q_properties->ctx_save_restore_area_address = 272 args->ctx_save_restore_address; 273 q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; 274 q_properties->ctl_stack_size = args->ctl_stack_size; 275 q_properties->sdma_engine_id = args->sdma_engine_id; 276 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || 277 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 278 q_properties->type = KFD_QUEUE_TYPE_COMPUTE; 279 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) 280 q_properties->type = KFD_QUEUE_TYPE_SDMA; 281 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) 282 q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; 283 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID) 284 q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID; 285 else 286 return -ENOTSUPP; 287 288 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 289 q_properties->format = KFD_QUEUE_FORMAT_AQL; 290 else 291 q_properties->format = KFD_QUEUE_FORMAT_PM4; 292 293 pr_debug("Queue Percentage: %d, %d\n", 294 q_properties->queue_percent, args->queue_percentage); 295 296 pr_debug("Queue Priority: %d, %d\n", 297 q_properties->priority, args->queue_priority); 298 299 pr_debug("Queue Address: 0x%llX, 0x%llX\n", 300 q_properties->queue_address, args->ring_base_address); 301 302 pr_debug("Queue Size: 0x%llX, %u\n", 303 q_properties->queue_size, args->ring_size); 304 305 pr_debug("Queue r/w Pointers: %px, %px\n", 306 q_properties->read_ptr, 307 q_properties->write_ptr); 308 309 pr_debug("Queue Format: %d\n", q_properties->format); 310 311 pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); 312 313 pr_debug("Queue CTX save area: 0x%llX\n", 314 q_properties->ctx_save_restore_area_address); 315 316 return 0; 317 } 318 319 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, 320 void *data) 321 { 322 struct kfd_ioctl_create_queue_args *args = data; 323 struct kfd_node *dev; 324 int err = 0; 325 unsigned int queue_id; 326 struct kfd_process_device *pdd; 327 struct queue_properties q_properties; 328 uint32_t doorbell_offset_in_process = 0; 329 330 memset(&q_properties, 0, sizeof(struct queue_properties)); 331 332 pr_debug("Creating queue ioctl\n"); 333 334 err = set_queue_properties_from_user(&q_properties, args); 335 if (err) 336 return err; 337 338 pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); 339 340 mutex_lock(&p->mutex); 341 342 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 343 if (!pdd) { 344 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 345 err = -EINVAL; 346 goto err_pdd; 347 } 348 dev = pdd->dev; 349 350 pdd = kfd_bind_process_to_device(dev, p); 351 if (IS_ERR(pdd)) { 352 err = -ESRCH; 353 goto err_bind_process; 354 } 355 356 if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 357 int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) + 358 kfd_get_num_xgmi_sdma_engines(dev) - 1; 359 360 if (q_properties.sdma_engine_id > max_sdma_eng_id) { 361 err = -EINVAL; 362 pr_err("sdma_engine_id %i exceeds maximum id of %i\n", 363 q_properties.sdma_engine_id, max_sdma_eng_id); 364 goto err_sdma_engine_id; 365 } 366 } 367 368 if (!pdd->qpd.proc_doorbells) { 369 err = kfd_alloc_process_doorbells(dev->kfd, pdd); 370 if (err) { 371 pr_debug("failed to allocate process doorbells\n"); 372 goto err_bind_process; 373 } 374 } 375 376 err = kfd_queue_acquire_buffers(pdd, &q_properties); 377 if (err) { 378 pr_debug("failed to acquire user queue buffers\n"); 379 goto err_acquire_queue_buf; 380 } 381 382 pr_debug("Creating queue for process pid %d on gpu 0x%x\n", 383 p->lead_thread->pid, 384 dev->id); 385 386 err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id, 387 NULL, NULL, NULL, &doorbell_offset_in_process); 388 if (err != 0) 389 goto err_create_queue; 390 391 args->queue_id = queue_id; 392 393 394 /* Return gpu_id as doorbell offset for mmap usage */ 395 args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; 396 args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); 397 if (KFD_IS_SOC15(dev)) 398 /* On SOC15 ASICs, include the doorbell offset within the 399 * process doorbell frame, which is 2 pages. 400 */ 401 args->doorbell_offset |= doorbell_offset_in_process; 402 403 mutex_unlock(&p->mutex); 404 405 pr_debug("Queue id %d was created successfully\n", args->queue_id); 406 407 pr_debug("Ring buffer address == 0x%016llX\n", 408 args->ring_base_address); 409 410 pr_debug("Read ptr address == 0x%016llX\n", 411 args->read_pointer_address); 412 413 pr_debug("Write ptr address == 0x%016llX\n", 414 args->write_pointer_address); 415 416 kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_NEW), p, dev, queue_id, false, NULL, 0); 417 return 0; 418 419 err_create_queue: 420 kfd_queue_unref_bo_vas(pdd, &q_properties); 421 kfd_queue_release_buffers(pdd, &q_properties); 422 err_acquire_queue_buf: 423 err_sdma_engine_id: 424 err_bind_process: 425 err_pdd: 426 mutex_unlock(&p->mutex); 427 return err; 428 } 429 430 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, 431 void *data) 432 { 433 int retval; 434 struct kfd_ioctl_destroy_queue_args *args = data; 435 436 pr_debug("Destroying queue id %d for process pid %d\n", 437 args->queue_id, 438 p->lead_thread->pid); 439 440 mutex_lock(&p->mutex); 441 442 retval = pqm_destroy_queue(&p->pqm, args->queue_id); 443 444 mutex_unlock(&p->mutex); 445 return retval; 446 } 447 448 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, 449 void *data) 450 { 451 int retval; 452 struct kfd_ioctl_update_queue_args *args = data; 453 struct queue_properties properties; 454 455 /* 456 * Repurpose queue percentage to accommodate new features: 457 * bit 0-7: queue percentage 458 * bit 8-15: pm4_target_xcc 459 */ 460 if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) { 461 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 462 return -EINVAL; 463 } 464 465 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 466 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 467 return -EINVAL; 468 } 469 470 if ((args->ring_base_address) && 471 (!access_ok((const void __user *) args->ring_base_address, 472 sizeof(uint64_t)))) { 473 pr_err("Can't access ring base address\n"); 474 return -EFAULT; 475 } 476 477 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 478 pr_err("Ring size must be a power of 2 or 0\n"); 479 return -EINVAL; 480 } 481 482 if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) { 483 args->ring_size = KFD_MIN_QUEUE_RING_SIZE; 484 pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); 485 } 486 487 properties.queue_address = args->ring_base_address; 488 properties.queue_size = args->ring_size; 489 properties.queue_percent = args->queue_percentage & 0xFF; 490 /* bit 8-15 are repurposed to be PM4 target XCC */ 491 properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF; 492 properties.priority = args->queue_priority; 493 494 pr_debug("Updating queue id %d for process pid %d\n", 495 args->queue_id, p->lead_thread->pid); 496 497 mutex_lock(&p->mutex); 498 499 retval = pqm_update_queue_properties(&p->pqm, args->queue_id, &properties); 500 501 mutex_unlock(&p->mutex); 502 503 return retval; 504 } 505 506 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, 507 void *data) 508 { 509 int retval; 510 const int max_num_cus = 1024; 511 struct kfd_ioctl_set_cu_mask_args *args = data; 512 struct mqd_update_info minfo = {0}; 513 uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; 514 size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); 515 516 if ((args->num_cu_mask % 32) != 0) { 517 pr_debug("num_cu_mask 0x%x must be a multiple of 32", 518 args->num_cu_mask); 519 return -EINVAL; 520 } 521 522 minfo.cu_mask.count = args->num_cu_mask; 523 if (minfo.cu_mask.count == 0) { 524 pr_debug("CU mask cannot be 0"); 525 return -EINVAL; 526 } 527 528 /* To prevent an unreasonably large CU mask size, set an arbitrary 529 * limit of max_num_cus bits. We can then just drop any CU mask bits 530 * past max_num_cus bits and just use the first max_num_cus bits. 531 */ 532 if (minfo.cu_mask.count > max_num_cus) { 533 pr_debug("CU mask cannot be greater than 1024 bits"); 534 minfo.cu_mask.count = max_num_cus; 535 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); 536 } 537 538 minfo.cu_mask.ptr = memdup_user(cu_mask_ptr, cu_mask_size); 539 if (IS_ERR(minfo.cu_mask.ptr)) { 540 pr_debug("Could not copy CU mask from userspace"); 541 return PTR_ERR(minfo.cu_mask.ptr); 542 } 543 544 mutex_lock(&p->mutex); 545 546 retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo); 547 548 mutex_unlock(&p->mutex); 549 550 kfree(minfo.cu_mask.ptr); 551 return retval; 552 } 553 554 static int kfd_ioctl_get_queue_wave_state(struct file *filep, 555 struct kfd_process *p, void *data) 556 { 557 struct kfd_ioctl_get_queue_wave_state_args *args = data; 558 int r; 559 560 mutex_lock(&p->mutex); 561 562 r = pqm_get_wave_state(&p->pqm, args->queue_id, 563 (void __user *)args->ctl_stack_address, 564 &args->ctl_stack_used_size, 565 &args->save_area_used_size); 566 567 mutex_unlock(&p->mutex); 568 569 return r; 570 } 571 572 static int kfd_ioctl_set_memory_policy(struct file *filep, 573 struct kfd_process *p, void *data) 574 { 575 struct kfd_ioctl_set_memory_policy_args *args = data; 576 int err = 0; 577 struct kfd_process_device *pdd; 578 enum cache_policy default_policy, alternate_policy; 579 580 if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT 581 && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 582 return -EINVAL; 583 } 584 585 if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT 586 && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 587 return -EINVAL; 588 } 589 590 mutex_lock(&p->mutex); 591 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 592 if (!pdd) { 593 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 594 err = -EINVAL; 595 goto err_pdd; 596 } 597 598 pdd = kfd_bind_process_to_device(pdd->dev, p); 599 if (IS_ERR(pdd)) { 600 err = -ESRCH; 601 goto out; 602 } 603 604 default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) 605 ? cache_policy_coherent : cache_policy_noncoherent; 606 607 alternate_policy = 608 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) 609 ? cache_policy_coherent : cache_policy_noncoherent; 610 611 if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm, 612 &pdd->qpd, 613 default_policy, 614 alternate_policy, 615 (void __user *)args->alternate_aperture_base, 616 args->alternate_aperture_size, 617 args->misc_process_flag)) 618 err = -EINVAL; 619 620 out: 621 err_pdd: 622 mutex_unlock(&p->mutex); 623 624 return err; 625 } 626 627 static int kfd_ioctl_set_trap_handler(struct file *filep, 628 struct kfd_process *p, void *data) 629 { 630 struct kfd_ioctl_set_trap_handler_args *args = data; 631 int err = 0; 632 struct kfd_process_device *pdd; 633 634 mutex_lock(&p->mutex); 635 636 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 637 if (!pdd) { 638 err = -EINVAL; 639 goto err_pdd; 640 } 641 642 pdd = kfd_bind_process_to_device(pdd->dev, p); 643 if (IS_ERR(pdd)) { 644 err = -ESRCH; 645 goto out; 646 } 647 648 kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr); 649 650 out: 651 err_pdd: 652 mutex_unlock(&p->mutex); 653 654 return err; 655 } 656 657 static int kfd_ioctl_dbg_register(struct file *filep, 658 struct kfd_process *p, void *data) 659 { 660 return -EPERM; 661 } 662 663 static int kfd_ioctl_dbg_unregister(struct file *filep, 664 struct kfd_process *p, void *data) 665 { 666 return -EPERM; 667 } 668 669 static int kfd_ioctl_dbg_address_watch(struct file *filep, 670 struct kfd_process *p, void *data) 671 { 672 return -EPERM; 673 } 674 675 /* Parse and generate fixed size data structure for wave control */ 676 static int kfd_ioctl_dbg_wave_control(struct file *filep, 677 struct kfd_process *p, void *data) 678 { 679 return -EPERM; 680 } 681 682 static int kfd_ioctl_get_clock_counters(struct file *filep, 683 struct kfd_process *p, void *data) 684 { 685 struct kfd_ioctl_get_clock_counters_args *args = data; 686 struct kfd_process_device *pdd; 687 688 mutex_lock(&p->mutex); 689 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 690 mutex_unlock(&p->mutex); 691 if (pdd) 692 /* Reading GPU clock counter from KGD */ 693 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev); 694 else 695 /* Node without GPU resource */ 696 args->gpu_clock_counter = 0; 697 698 /* No access to rdtsc. Using raw monotonic time */ 699 args->cpu_clock_counter = ktime_get_raw_ns(); 700 args->system_clock_counter = ktime_get_boottime_ns(); 701 702 /* Since the counter is in nano-seconds we use 1GHz frequency */ 703 args->system_clock_freq = 1000000000; 704 705 return 0; 706 } 707 708 709 static int kfd_ioctl_get_process_apertures(struct file *filp, 710 struct kfd_process *p, void *data) 711 { 712 struct kfd_ioctl_get_process_apertures_args *args = data; 713 struct kfd_process_device_apertures *pAperture; 714 int i; 715 716 dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid); 717 718 args->num_of_nodes = 0; 719 720 mutex_lock(&p->mutex); 721 /* Run over all pdd of the process */ 722 for (i = 0; i < p->n_pdds; i++) { 723 struct kfd_process_device *pdd = p->pdds[i]; 724 725 pAperture = 726 &args->process_apertures[args->num_of_nodes]; 727 pAperture->gpu_id = pdd->dev->id; 728 pAperture->lds_base = pdd->lds_base; 729 pAperture->lds_limit = pdd->lds_limit; 730 pAperture->gpuvm_base = pdd->gpuvm_base; 731 pAperture->gpuvm_limit = pdd->gpuvm_limit; 732 pAperture->scratch_base = pdd->scratch_base; 733 pAperture->scratch_limit = pdd->scratch_limit; 734 735 dev_dbg(kfd_device, 736 "node id %u\n", args->num_of_nodes); 737 dev_dbg(kfd_device, 738 "gpu id %u\n", pdd->dev->id); 739 dev_dbg(kfd_device, 740 "lds_base %llX\n", pdd->lds_base); 741 dev_dbg(kfd_device, 742 "lds_limit %llX\n", pdd->lds_limit); 743 dev_dbg(kfd_device, 744 "gpuvm_base %llX\n", pdd->gpuvm_base); 745 dev_dbg(kfd_device, 746 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 747 dev_dbg(kfd_device, 748 "scratch_base %llX\n", pdd->scratch_base); 749 dev_dbg(kfd_device, 750 "scratch_limit %llX\n", pdd->scratch_limit); 751 752 if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS) 753 break; 754 } 755 mutex_unlock(&p->mutex); 756 757 return 0; 758 } 759 760 static int kfd_ioctl_get_process_apertures_new(struct file *filp, 761 struct kfd_process *p, void *data) 762 { 763 struct kfd_ioctl_get_process_apertures_new_args *args = data; 764 struct kfd_process_device_apertures *pa; 765 int ret; 766 int i; 767 768 dev_dbg(kfd_device, "get apertures for process pid %d", 769 p->lead_thread->pid); 770 771 if (args->num_of_nodes == 0) { 772 /* Return number of nodes, so that user space can alloacate 773 * sufficient memory 774 */ 775 mutex_lock(&p->mutex); 776 args->num_of_nodes = p->n_pdds; 777 goto out_unlock; 778 } 779 780 if (args->num_of_nodes > kfd_topology_get_num_devices()) 781 return -EINVAL; 782 783 /* Fill in process-aperture information for all available 784 * nodes, but not more than args->num_of_nodes as that is 785 * the amount of memory allocated by user 786 */ 787 pa = kzalloc_objs(struct kfd_process_device_apertures, 788 args->num_of_nodes); 789 if (!pa) 790 return -ENOMEM; 791 792 mutex_lock(&p->mutex); 793 794 if (!p->n_pdds) { 795 args->num_of_nodes = 0; 796 kfree(pa); 797 goto out_unlock; 798 } 799 800 /* Run over all pdd of the process */ 801 for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) { 802 struct kfd_process_device *pdd = p->pdds[i]; 803 804 pa[i].gpu_id = pdd->dev->id; 805 pa[i].lds_base = pdd->lds_base; 806 pa[i].lds_limit = pdd->lds_limit; 807 pa[i].gpuvm_base = pdd->gpuvm_base; 808 pa[i].gpuvm_limit = pdd->gpuvm_limit; 809 pa[i].scratch_base = pdd->scratch_base; 810 pa[i].scratch_limit = pdd->scratch_limit; 811 812 dev_dbg(kfd_device, 813 "gpu id %u\n", pdd->dev->id); 814 dev_dbg(kfd_device, 815 "lds_base %llX\n", pdd->lds_base); 816 dev_dbg(kfd_device, 817 "lds_limit %llX\n", pdd->lds_limit); 818 dev_dbg(kfd_device, 819 "gpuvm_base %llX\n", pdd->gpuvm_base); 820 dev_dbg(kfd_device, 821 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 822 dev_dbg(kfd_device, 823 "scratch_base %llX\n", pdd->scratch_base); 824 dev_dbg(kfd_device, 825 "scratch_limit %llX\n", pdd->scratch_limit); 826 } 827 mutex_unlock(&p->mutex); 828 829 args->num_of_nodes = i; 830 ret = copy_to_user( 831 (void __user *)args->kfd_process_device_apertures_ptr, 832 pa, 833 (i * sizeof(struct kfd_process_device_apertures))); 834 kfree(pa); 835 return ret ? -EFAULT : 0; 836 837 out_unlock: 838 mutex_unlock(&p->mutex); 839 return 0; 840 } 841 842 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, 843 void *data) 844 { 845 struct kfd_ioctl_create_event_args *args = data; 846 int err; 847 848 /* For dGPUs the event page is allocated in user mode. The 849 * handle is passed to KFD with the first call to this IOCTL 850 * through the event_page_offset field. 851 */ 852 if (args->event_page_offset) { 853 mutex_lock(&p->mutex); 854 err = kfd_kmap_event_page(p, args->event_page_offset); 855 mutex_unlock(&p->mutex); 856 if (err) 857 return err; 858 } 859 860 err = kfd_event_create(filp, p, args->event_type, 861 args->auto_reset != 0, args->node_id, 862 &args->event_id, &args->event_trigger_data, 863 &args->event_page_offset, 864 &args->event_slot_index); 865 866 pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__); 867 return err; 868 } 869 870 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, 871 void *data) 872 { 873 struct kfd_ioctl_destroy_event_args *args = data; 874 875 return kfd_event_destroy(p, args->event_id); 876 } 877 878 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, 879 void *data) 880 { 881 struct kfd_ioctl_set_event_args *args = data; 882 883 return kfd_set_event(p, args->event_id); 884 } 885 886 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, 887 void *data) 888 { 889 struct kfd_ioctl_reset_event_args *args = data; 890 891 return kfd_reset_event(p, args->event_id); 892 } 893 894 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, 895 void *data) 896 { 897 struct kfd_ioctl_wait_events_args *args = data; 898 899 return kfd_wait_on_events(p, args->num_events, 900 (void __user *)args->events_ptr, 901 (args->wait_for_all != 0), 902 &args->timeout, &args->wait_result); 903 } 904 static int kfd_ioctl_set_scratch_backing_va(struct file *filep, 905 struct kfd_process *p, void *data) 906 { 907 struct kfd_ioctl_set_scratch_backing_va_args *args = data; 908 struct kfd_process_device *pdd; 909 struct kfd_node *dev; 910 long err; 911 912 mutex_lock(&p->mutex); 913 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 914 if (!pdd) { 915 err = -EINVAL; 916 goto err_pdd; 917 } 918 dev = pdd->dev; 919 920 pdd = kfd_bind_process_to_device(dev, p); 921 if (IS_ERR(pdd)) { 922 err = PTR_ERR(pdd); 923 goto bind_process_to_device_fail; 924 } 925 926 pdd->qpd.sh_hidden_private_base = args->va_addr; 927 928 mutex_unlock(&p->mutex); 929 930 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && 931 pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va) 932 dev->kfd2kgd->set_scratch_backing_va( 933 dev->adev, args->va_addr, pdd->qpd.vmid); 934 935 return 0; 936 937 bind_process_to_device_fail: 938 err_pdd: 939 mutex_unlock(&p->mutex); 940 return err; 941 } 942 943 static int kfd_ioctl_get_tile_config(struct file *filep, 944 struct kfd_process *p, void *data) 945 { 946 struct kfd_ioctl_get_tile_config_args *args = data; 947 struct kfd_process_device *pdd; 948 struct tile_config config; 949 int err = 0; 950 951 mutex_lock(&p->mutex); 952 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 953 mutex_unlock(&p->mutex); 954 if (!pdd) 955 return -EINVAL; 956 957 amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config); 958 959 args->gb_addr_config = config.gb_addr_config; 960 args->num_banks = config.num_banks; 961 args->num_ranks = config.num_ranks; 962 963 if (args->num_tile_configs > config.num_tile_configs) 964 args->num_tile_configs = config.num_tile_configs; 965 err = copy_to_user((void __user *)args->tile_config_ptr, 966 config.tile_config_ptr, 967 args->num_tile_configs * sizeof(uint32_t)); 968 if (err) { 969 args->num_tile_configs = 0; 970 return -EFAULT; 971 } 972 973 if (args->num_macro_tile_configs > config.num_macro_tile_configs) 974 args->num_macro_tile_configs = 975 config.num_macro_tile_configs; 976 err = copy_to_user((void __user *)args->macro_tile_config_ptr, 977 config.macro_tile_config_ptr, 978 args->num_macro_tile_configs * sizeof(uint32_t)); 979 if (err) { 980 args->num_macro_tile_configs = 0; 981 return -EFAULT; 982 } 983 984 return 0; 985 } 986 987 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, 988 void *data) 989 { 990 struct kfd_ioctl_acquire_vm_args *args = data; 991 struct kfd_process_device *pdd; 992 struct file *drm_file; 993 int ret; 994 995 drm_file = fget(args->drm_fd); 996 if (!drm_file) 997 return -EINVAL; 998 999 mutex_lock(&p->mutex); 1000 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 1001 if (!pdd) { 1002 ret = -EINVAL; 1003 goto err_pdd; 1004 } 1005 1006 if (pdd->drm_file) { 1007 ret = pdd->drm_file == drm_file ? 0 : -EBUSY; 1008 goto err_drm_file; 1009 } 1010 1011 ret = kfd_process_device_init_vm(pdd, drm_file); 1012 if (ret) 1013 goto err_unlock; 1014 1015 /* On success, the PDD keeps the drm_file reference */ 1016 mutex_unlock(&p->mutex); 1017 1018 return 0; 1019 1020 err_unlock: 1021 err_pdd: 1022 err_drm_file: 1023 mutex_unlock(&p->mutex); 1024 fput(drm_file); 1025 return ret; 1026 } 1027 1028 bool kfd_dev_is_large_bar(struct kfd_node *dev) 1029 { 1030 if (dev->kfd->adev->debug_largebar) { 1031 pr_debug("Simulate large-bar allocation on non large-bar machine\n"); 1032 return true; 1033 } 1034 1035 if (dev->local_mem_info.local_mem_size_private == 0 && 1036 dev->local_mem_info.local_mem_size_public > 0) 1037 return true; 1038 1039 if (dev->local_mem_info.local_mem_size_public == 0 && 1040 dev->kfd->adev->gmc.is_app_apu) { 1041 pr_debug("APP APU, Consider like a large bar system\n"); 1042 return true; 1043 } 1044 1045 return false; 1046 } 1047 1048 static int kfd_ioctl_get_available_memory(struct file *filep, 1049 struct kfd_process *p, void *data) 1050 { 1051 struct kfd_ioctl_get_available_memory_args *args = data; 1052 struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id); 1053 1054 if (!pdd) 1055 return -EINVAL; 1056 args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev, 1057 pdd->dev->node_id); 1058 kfd_unlock_pdd(pdd); 1059 return 0; 1060 } 1061 1062 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, 1063 struct kfd_process *p, void *data) 1064 { 1065 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; 1066 struct kfd_process_device *pdd; 1067 void *mem; 1068 struct kfd_node *dev; 1069 int idr_handle; 1070 long err; 1071 uint64_t offset = args->mmap_offset; 1072 uint32_t flags = args->flags; 1073 1074 if (args->size == 0) 1075 return -EINVAL; 1076 1077 if (p->context_id != KFD_CONTEXT_ID_PRIMARY && (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) { 1078 pr_debug("USERPTR is not supported on non-primary kfd_process\n"); 1079 1080 return -EOPNOTSUPP; 1081 } 1082 1083 #if IS_ENABLED(CONFIG_HSA_AMD_SVM) 1084 /* Flush pending deferred work to avoid racing with deferred actions 1085 * from previous memory map changes (e.g. munmap). 1086 */ 1087 svm_range_list_lock_and_flush_work(&p->svms, current->mm); 1088 mutex_lock(&p->svms.lock); 1089 mmap_write_unlock(current->mm); 1090 1091 /* Skip a special case that allocates VRAM without VA, 1092 * VA will be invalid of 0. 1093 */ 1094 if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) && 1095 interval_tree_iter_first(&p->svms.objects, 1096 args->va_addr >> PAGE_SHIFT, 1097 (args->va_addr + args->size - 1) >> PAGE_SHIFT)) { 1098 pr_err("Address: 0x%llx already allocated by SVM\n", 1099 args->va_addr); 1100 mutex_unlock(&p->svms.lock); 1101 return -EADDRINUSE; 1102 } 1103 1104 /* When register user buffer check if it has been registered by svm by 1105 * buffer cpu virtual address. 1106 */ 1107 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) && 1108 interval_tree_iter_first(&p->svms.objects, 1109 args->mmap_offset >> PAGE_SHIFT, 1110 (args->mmap_offset + args->size - 1) >> PAGE_SHIFT)) { 1111 pr_err("User Buffer Address: 0x%llx already allocated by SVM\n", 1112 args->mmap_offset); 1113 mutex_unlock(&p->svms.lock); 1114 return -EADDRINUSE; 1115 } 1116 1117 mutex_unlock(&p->svms.lock); 1118 #endif 1119 mutex_lock(&p->mutex); 1120 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 1121 if (!pdd) { 1122 err = -EINVAL; 1123 goto err_pdd; 1124 } 1125 1126 dev = pdd->dev; 1127 1128 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && 1129 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && 1130 !kfd_dev_is_large_bar(dev)) { 1131 pr_err("Alloc host visible vram on small bar is not allowed\n"); 1132 err = -EINVAL; 1133 goto err_large_bar; 1134 } 1135 1136 pdd = kfd_bind_process_to_device(dev, p); 1137 if (IS_ERR(pdd)) { 1138 err = PTR_ERR(pdd); 1139 goto err_unlock; 1140 } 1141 1142 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { 1143 if (args->size != kfd_doorbell_process_slice(dev->kfd)) { 1144 err = -EINVAL; 1145 goto err_unlock; 1146 } 1147 offset = kfd_get_process_doorbells(pdd); 1148 if (!offset) { 1149 err = -ENOMEM; 1150 goto err_unlock; 1151 } 1152 } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { 1153 if (args->size != PAGE_SIZE) { 1154 err = -EINVAL; 1155 goto err_unlock; 1156 } 1157 offset = dev->adev->rmmio_remap.bus_addr; 1158 if (!offset || (PAGE_SIZE > 4096)) { 1159 err = -ENOMEM; 1160 goto err_unlock; 1161 } 1162 } 1163 1164 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 1165 dev->adev, args->va_addr, args->size, 1166 pdd->drm_priv, (struct kgd_mem **) &mem, &offset, 1167 flags, false); 1168 1169 if (err) 1170 goto err_unlock; 1171 1172 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1173 if (idr_handle < 0) { 1174 err = -EFAULT; 1175 goto err_free; 1176 } 1177 1178 /* Update the VRAM usage count */ 1179 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 1180 uint64_t size = args->size; 1181 1182 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) 1183 size >>= 1; 1184 atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage); 1185 } 1186 1187 mutex_unlock(&p->mutex); 1188 1189 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1190 args->mmap_offset = offset; 1191 1192 /* MMIO is mapped through kfd device 1193 * Generate a kfd mmap offset 1194 */ 1195 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) 1196 args->mmap_offset = KFD_MMAP_TYPE_MMIO 1197 | KFD_MMAP_GPU_ID(args->gpu_id); 1198 1199 return 0; 1200 1201 err_free: 1202 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem, 1203 pdd->drm_priv, NULL); 1204 err_unlock: 1205 err_pdd: 1206 err_large_bar: 1207 mutex_unlock(&p->mutex); 1208 return err; 1209 } 1210 1211 static int kfd_ioctl_free_memory_of_gpu(struct file *filep, 1212 struct kfd_process *p, void *data) 1213 { 1214 struct kfd_ioctl_free_memory_of_gpu_args *args = data; 1215 struct kfd_process_device *pdd; 1216 void *mem; 1217 int ret; 1218 uint64_t size = 0; 1219 1220 mutex_lock(&p->mutex); 1221 /* 1222 * Safeguard to prevent user space from freeing signal BO. 1223 * It will be freed at process termination. 1224 */ 1225 if (p->signal_handle && (p->signal_handle == args->handle)) { 1226 pr_err("Free signal BO is not allowed\n"); 1227 ret = -EPERM; 1228 goto err_unlock; 1229 } 1230 1231 pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle)); 1232 if (!pdd) { 1233 pr_err("Process device data doesn't exist\n"); 1234 ret = -EINVAL; 1235 goto err_pdd; 1236 } 1237 1238 mem = kfd_process_device_translate_handle( 1239 pdd, GET_IDR_HANDLE(args->handle)); 1240 if (!mem) { 1241 ret = -EINVAL; 1242 goto err_unlock; 1243 } 1244 1245 ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, 1246 (struct kgd_mem *)mem, pdd->drm_priv, &size); 1247 1248 /* If freeing the buffer failed, leave the handle in place for 1249 * clean-up during process tear-down. 1250 */ 1251 if (!ret) 1252 kfd_process_device_remove_obj_handle( 1253 pdd, GET_IDR_HANDLE(args->handle)); 1254 1255 atomic64_sub(size, &pdd->vram_usage); 1256 1257 err_unlock: 1258 err_pdd: 1259 mutex_unlock(&p->mutex); 1260 return ret; 1261 } 1262 1263 static int kfd_ioctl_map_memory_to_gpu(struct file *filep, 1264 struct kfd_process *p, void *data) 1265 { 1266 struct kfd_ioctl_map_memory_to_gpu_args *args = data; 1267 struct kfd_process_device *pdd, *peer_pdd; 1268 void *mem; 1269 struct kfd_node *dev; 1270 long err = 0; 1271 int i; 1272 uint32_t *devices_arr = NULL; 1273 1274 if (!args->n_devices) { 1275 pr_debug("Device IDs array empty\n"); 1276 return -EINVAL; 1277 } 1278 if (args->n_success > args->n_devices) { 1279 pr_debug("n_success exceeds n_devices\n"); 1280 return -EINVAL; 1281 } 1282 1283 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1284 GFP_KERNEL); 1285 if (!devices_arr) 1286 return -ENOMEM; 1287 1288 err = copy_from_user(devices_arr, 1289 (void __user *)args->device_ids_array_ptr, 1290 args->n_devices * sizeof(*devices_arr)); 1291 if (err != 0) { 1292 err = -EFAULT; 1293 goto copy_from_user_failed; 1294 } 1295 1296 mutex_lock(&p->mutex); 1297 pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle)); 1298 if (!pdd) { 1299 err = -EINVAL; 1300 goto get_process_device_data_failed; 1301 } 1302 dev = pdd->dev; 1303 1304 pdd = kfd_bind_process_to_device(dev, p); 1305 if (IS_ERR(pdd)) { 1306 err = PTR_ERR(pdd); 1307 goto bind_process_to_device_failed; 1308 } 1309 1310 mem = kfd_process_device_translate_handle(pdd, 1311 GET_IDR_HANDLE(args->handle)); 1312 if (!mem) { 1313 err = -ENOMEM; 1314 goto get_mem_obj_from_handle_failed; 1315 } 1316 1317 for (i = args->n_success; i < args->n_devices; i++) { 1318 peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); 1319 if (!peer_pdd) { 1320 pr_debug("Getting device by id failed for 0x%x\n", 1321 devices_arr[i]); 1322 err = -EINVAL; 1323 goto get_mem_obj_from_handle_failed; 1324 } 1325 1326 peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p); 1327 if (IS_ERR(peer_pdd)) { 1328 err = PTR_ERR(peer_pdd); 1329 goto get_mem_obj_from_handle_failed; 1330 } 1331 1332 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( 1333 peer_pdd->dev->adev, (struct kgd_mem *)mem, 1334 peer_pdd->drm_priv); 1335 if (err) { 1336 struct pci_dev *pdev = peer_pdd->dev->adev->pdev; 1337 1338 dev_err(dev->adev->dev, 1339 "Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n", 1340 pci_domain_nr(pdev->bus), 1341 pdev->bus->number, 1342 PCI_SLOT(pdev->devfn), 1343 PCI_FUNC(pdev->devfn), 1344 ((struct kgd_mem *)mem)->domain); 1345 goto map_memory_to_gpu_failed; 1346 } 1347 args->n_success = i+1; 1348 } 1349 1350 err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); 1351 if (err) { 1352 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1353 goto sync_memory_failed; 1354 } 1355 1356 mutex_unlock(&p->mutex); 1357 1358 /* Flush TLBs after waiting for the page table updates to complete */ 1359 for (i = 0; i < args->n_devices; i++) { 1360 peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); 1361 if (WARN_ON_ONCE(!peer_pdd)) 1362 continue; 1363 kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); 1364 } 1365 kfree(devices_arr); 1366 1367 return err; 1368 1369 get_process_device_data_failed: 1370 bind_process_to_device_failed: 1371 get_mem_obj_from_handle_failed: 1372 map_memory_to_gpu_failed: 1373 sync_memory_failed: 1374 mutex_unlock(&p->mutex); 1375 copy_from_user_failed: 1376 kfree(devices_arr); 1377 1378 return err; 1379 } 1380 1381 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, 1382 struct kfd_process *p, void *data) 1383 { 1384 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; 1385 struct kfd_process_device *pdd, *peer_pdd; 1386 void *mem; 1387 long err = 0; 1388 uint32_t *devices_arr = NULL, i; 1389 bool flush_tlb; 1390 1391 if (!args->n_devices) { 1392 pr_debug("Device IDs array empty\n"); 1393 return -EINVAL; 1394 } 1395 if (args->n_success > args->n_devices) { 1396 pr_debug("n_success exceeds n_devices\n"); 1397 return -EINVAL; 1398 } 1399 1400 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1401 GFP_KERNEL); 1402 if (!devices_arr) 1403 return -ENOMEM; 1404 1405 err = copy_from_user(devices_arr, 1406 (void __user *)args->device_ids_array_ptr, 1407 args->n_devices * sizeof(*devices_arr)); 1408 if (err != 0) { 1409 err = -EFAULT; 1410 goto copy_from_user_failed; 1411 } 1412 1413 mutex_lock(&p->mutex); 1414 pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle)); 1415 if (!pdd) { 1416 err = -EINVAL; 1417 goto bind_process_to_device_failed; 1418 } 1419 1420 mem = kfd_process_device_translate_handle(pdd, 1421 GET_IDR_HANDLE(args->handle)); 1422 if (!mem) { 1423 err = -ENOMEM; 1424 goto get_mem_obj_from_handle_failed; 1425 } 1426 1427 for (i = args->n_success; i < args->n_devices; i++) { 1428 peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); 1429 if (!peer_pdd) { 1430 err = -EINVAL; 1431 goto get_mem_obj_from_handle_failed; 1432 } 1433 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 1434 peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv); 1435 if (err) { 1436 pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices); 1437 goto unmap_memory_from_gpu_failed; 1438 } 1439 args->n_success = i+1; 1440 } 1441 1442 flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd); 1443 if (flush_tlb) { 1444 err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev, 1445 (struct kgd_mem *) mem, true); 1446 if (err) { 1447 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1448 goto sync_memory_failed; 1449 } 1450 } 1451 1452 /* Flush TLBs after waiting for the page table updates to complete */ 1453 for (i = 0; i < args->n_devices; i++) { 1454 peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); 1455 if (WARN_ON_ONCE(!peer_pdd)) 1456 continue; 1457 if (flush_tlb) 1458 kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); 1459 1460 /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */ 1461 err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); 1462 if (err) 1463 goto sync_memory_failed; 1464 } 1465 1466 mutex_unlock(&p->mutex); 1467 1468 kfree(devices_arr); 1469 1470 return 0; 1471 1472 bind_process_to_device_failed: 1473 get_mem_obj_from_handle_failed: 1474 unmap_memory_from_gpu_failed: 1475 sync_memory_failed: 1476 mutex_unlock(&p->mutex); 1477 copy_from_user_failed: 1478 kfree(devices_arr); 1479 return err; 1480 } 1481 1482 static int kfd_ioctl_alloc_queue_gws(struct file *filep, 1483 struct kfd_process *p, void *data) 1484 { 1485 int retval; 1486 struct kfd_ioctl_alloc_queue_gws_args *args = data; 1487 struct queue *q; 1488 struct kfd_node *dev; 1489 1490 mutex_lock(&p->mutex); 1491 q = pqm_get_user_queue(&p->pqm, args->queue_id); 1492 1493 if (q) { 1494 dev = q->device; 1495 } else { 1496 retval = -EINVAL; 1497 goto out_unlock; 1498 } 1499 1500 if (!dev->gws) { 1501 retval = -ENODEV; 1502 goto out_unlock; 1503 } 1504 1505 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 1506 retval = -ENODEV; 1507 goto out_unlock; 1508 } 1509 1510 if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) || 1511 kfd_dbg_has_cwsr_workaround(dev))) { 1512 retval = -EBUSY; 1513 goto out_unlock; 1514 } 1515 1516 retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); 1517 mutex_unlock(&p->mutex); 1518 1519 args->first_gws = 0; 1520 return retval; 1521 1522 out_unlock: 1523 mutex_unlock(&p->mutex); 1524 return retval; 1525 } 1526 1527 static int kfd_ioctl_get_dmabuf_info(struct file *filep, 1528 struct kfd_process *p, void *data) 1529 { 1530 struct kfd_ioctl_get_dmabuf_info_args *args = data; 1531 struct kfd_node *dev = NULL; 1532 struct amdgpu_device *dmabuf_adev; 1533 void *metadata_buffer = NULL; 1534 uint32_t flags; 1535 int8_t xcp_id; 1536 unsigned int i; 1537 int r; 1538 1539 /* Find a KFD GPU device that supports the get_dmabuf_info query */ 1540 for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) 1541 if (dev && !kfd_devcgroup_check_permission(dev)) 1542 break; 1543 if (!dev) 1544 return -EINVAL; 1545 1546 if (args->metadata_ptr) { 1547 metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL); 1548 if (!metadata_buffer) 1549 return -ENOMEM; 1550 } 1551 1552 /* Get dmabuf info from KGD */ 1553 r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd, 1554 &dmabuf_adev, &args->size, 1555 metadata_buffer, args->metadata_size, 1556 &args->metadata_size, &flags, &xcp_id); 1557 if (r) 1558 goto exit; 1559 1560 if (xcp_id >= 0) 1561 args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id; 1562 else 1563 args->gpu_id = dev->id; 1564 args->flags = flags; 1565 1566 /* Copy metadata buffer to user mode */ 1567 if (metadata_buffer) { 1568 r = copy_to_user((void __user *)args->metadata_ptr, 1569 metadata_buffer, args->metadata_size); 1570 if (r != 0) 1571 r = -EFAULT; 1572 } 1573 1574 exit: 1575 kfree(metadata_buffer); 1576 1577 return r; 1578 } 1579 1580 static int kfd_ioctl_import_dmabuf(struct file *filep, 1581 struct kfd_process *p, void *data) 1582 { 1583 struct kfd_ioctl_import_dmabuf_args *args = data; 1584 struct kfd_process_device *pdd; 1585 int idr_handle; 1586 uint64_t size; 1587 void *mem; 1588 int r; 1589 1590 mutex_lock(&p->mutex); 1591 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 1592 if (!pdd) { 1593 r = -EINVAL; 1594 goto err_unlock; 1595 } 1596 1597 pdd = kfd_bind_process_to_device(pdd->dev, p); 1598 if (IS_ERR(pdd)) { 1599 r = PTR_ERR(pdd); 1600 goto err_unlock; 1601 } 1602 1603 r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd, 1604 args->va_addr, pdd->drm_priv, 1605 (struct kgd_mem **)&mem, &size, 1606 NULL); 1607 if (r) 1608 goto err_unlock; 1609 1610 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1611 if (idr_handle < 0) { 1612 r = -EFAULT; 1613 goto err_free; 1614 } 1615 1616 mutex_unlock(&p->mutex); 1617 1618 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1619 1620 return 0; 1621 1622 err_free: 1623 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem, 1624 pdd->drm_priv, NULL); 1625 err_unlock: 1626 mutex_unlock(&p->mutex); 1627 return r; 1628 } 1629 1630 static int kfd_ioctl_export_dmabuf(struct file *filep, 1631 struct kfd_process *p, void *data) 1632 { 1633 struct kfd_ioctl_export_dmabuf_args *args = data; 1634 struct kfd_process_device *pdd; 1635 struct dma_buf *dmabuf; 1636 struct kfd_node *dev; 1637 void *mem; 1638 int ret = 0; 1639 1640 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1641 if (!dev) 1642 return -EINVAL; 1643 1644 mutex_lock(&p->mutex); 1645 1646 pdd = kfd_get_process_device_data(dev, p); 1647 if (!pdd) { 1648 ret = -EINVAL; 1649 goto err_unlock; 1650 } 1651 1652 mem = kfd_process_device_translate_handle(pdd, 1653 GET_IDR_HANDLE(args->handle)); 1654 if (!mem) { 1655 ret = -EINVAL; 1656 goto err_unlock; 1657 } 1658 1659 ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf); 1660 mutex_unlock(&p->mutex); 1661 if (ret) 1662 goto err_out; 1663 1664 ret = dma_buf_fd(dmabuf, args->flags); 1665 if (ret < 0) { 1666 dma_buf_put(dmabuf); 1667 goto err_out; 1668 } 1669 /* dma_buf_fd assigns the reference count to the fd, no need to 1670 * put the reference here. 1671 */ 1672 args->dmabuf_fd = ret; 1673 1674 return 0; 1675 1676 err_unlock: 1677 mutex_unlock(&p->mutex); 1678 err_out: 1679 return ret; 1680 } 1681 1682 /* Handle requests for watching SMI events */ 1683 static int kfd_ioctl_smi_events(struct file *filep, 1684 struct kfd_process *p, void *data) 1685 { 1686 struct kfd_ioctl_smi_events_args *args = data; 1687 struct kfd_process_device *pdd; 1688 1689 mutex_lock(&p->mutex); 1690 1691 pdd = kfd_process_device_data_by_id(p, args->gpuid); 1692 mutex_unlock(&p->mutex); 1693 if (!pdd) 1694 return -EINVAL; 1695 1696 return kfd_smi_event_open(pdd->dev, &args->anon_fd); 1697 } 1698 1699 static int kfd_ioctl_svm_validate(void *kdata, unsigned int usize) 1700 { 1701 struct kfd_ioctl_svm_args *args = kdata; 1702 size_t expected = struct_size(args, attrs, args->nattr); 1703 1704 if (expected == SIZE_MAX || usize < expected) 1705 return -EINVAL; 1706 return 0; 1707 } 1708 1709 #if IS_ENABLED(CONFIG_HSA_AMD_SVM) 1710 1711 static int kfd_ioctl_set_xnack_mode(struct file *filep, 1712 struct kfd_process *p, void *data) 1713 { 1714 struct kfd_ioctl_set_xnack_mode_args *args = data; 1715 int r = 0; 1716 1717 mutex_lock(&p->mutex); 1718 if (args->xnack_enabled >= 0) { 1719 if (!list_empty(&p->pqm.queues)) { 1720 pr_debug("Process has user queues running\n"); 1721 r = -EBUSY; 1722 goto out_unlock; 1723 } 1724 1725 if (p->xnack_enabled == args->xnack_enabled) 1726 goto out_unlock; 1727 1728 if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) { 1729 r = -EPERM; 1730 goto out_unlock; 1731 } 1732 1733 r = svm_range_switch_xnack_reserve_mem(p, args->xnack_enabled); 1734 } else { 1735 args->xnack_enabled = p->xnack_enabled; 1736 } 1737 1738 out_unlock: 1739 mutex_unlock(&p->mutex); 1740 1741 return r; 1742 } 1743 1744 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) 1745 { 1746 struct kfd_ioctl_svm_args *args = data; 1747 int r = 0; 1748 1749 if (p->context_id != KFD_CONTEXT_ID_PRIMARY) { 1750 pr_debug("SVM ioctl not supported on non-primary kfd process\n"); 1751 1752 return -EOPNOTSUPP; 1753 } 1754 1755 pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n", 1756 args->start_addr, args->size, args->op, args->nattr); 1757 1758 if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK)) 1759 return -EINVAL; 1760 if (!args->start_addr || !args->size) 1761 return -EINVAL; 1762 1763 r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr, 1764 args->attrs); 1765 1766 return r; 1767 } 1768 #else 1769 static int kfd_ioctl_set_xnack_mode(struct file *filep, 1770 struct kfd_process *p, void *data) 1771 { 1772 return -EPERM; 1773 } 1774 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) 1775 { 1776 return -EPERM; 1777 } 1778 #endif 1779 1780 static int criu_checkpoint_process(struct kfd_process *p, 1781 uint8_t __user *user_priv_data, 1782 uint64_t *priv_offset) 1783 { 1784 struct kfd_criu_process_priv_data process_priv; 1785 int ret; 1786 1787 memset(&process_priv, 0, sizeof(process_priv)); 1788 1789 process_priv.version = KFD_CRIU_PRIV_VERSION; 1790 /* For CR, we don't consider negative xnack mode which is used for 1791 * querying without changing it, here 0 simply means disabled and 1 1792 * means enabled so retry for finding a valid PTE. 1793 */ 1794 process_priv.xnack_mode = p->xnack_enabled ? 1 : 0; 1795 1796 ret = copy_to_user(user_priv_data + *priv_offset, 1797 &process_priv, sizeof(process_priv)); 1798 1799 if (ret) { 1800 pr_err("Failed to copy process information to user\n"); 1801 ret = -EFAULT; 1802 } 1803 1804 *priv_offset += sizeof(process_priv); 1805 return ret; 1806 } 1807 1808 static int criu_checkpoint_devices(struct kfd_process *p, 1809 uint32_t num_devices, 1810 uint8_t __user *user_addr, 1811 uint8_t __user *user_priv_data, 1812 uint64_t *priv_offset) 1813 { 1814 struct kfd_criu_device_priv_data *device_priv = NULL; 1815 struct kfd_criu_device_bucket *device_buckets = NULL; 1816 int ret = 0, i; 1817 1818 device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL); 1819 if (!device_buckets) { 1820 ret = -ENOMEM; 1821 goto exit; 1822 } 1823 1824 device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL); 1825 if (!device_priv) { 1826 ret = -ENOMEM; 1827 goto exit; 1828 } 1829 1830 for (i = 0; i < num_devices; i++) { 1831 struct kfd_process_device *pdd = p->pdds[i]; 1832 1833 device_buckets[i].user_gpu_id = pdd->user_gpu_id; 1834 device_buckets[i].actual_gpu_id = pdd->dev->id; 1835 1836 /* 1837 * priv_data does not contain useful information for now and is reserved for 1838 * future use, so we do not set its contents. 1839 */ 1840 } 1841 1842 ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets)); 1843 if (ret) { 1844 pr_err("Failed to copy device information to user\n"); 1845 ret = -EFAULT; 1846 goto exit; 1847 } 1848 1849 ret = copy_to_user(user_priv_data + *priv_offset, 1850 device_priv, 1851 num_devices * sizeof(*device_priv)); 1852 if (ret) { 1853 pr_err("Failed to copy device information to user\n"); 1854 ret = -EFAULT; 1855 } 1856 *priv_offset += num_devices * sizeof(*device_priv); 1857 1858 exit: 1859 kvfree(device_buckets); 1860 kvfree(device_priv); 1861 return ret; 1862 } 1863 1864 static uint32_t get_process_num_bos(struct kfd_process *p) 1865 { 1866 uint32_t num_of_bos = 0; 1867 int i; 1868 1869 /* Run over all PDDs of the process */ 1870 for (i = 0; i < p->n_pdds; i++) { 1871 struct kfd_process_device *pdd = p->pdds[i]; 1872 void *mem; 1873 int id; 1874 1875 idr_for_each_entry(&pdd->alloc_idr, mem, id) { 1876 struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; 1877 1878 if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base) 1879 num_of_bos++; 1880 } 1881 } 1882 return num_of_bos; 1883 } 1884 1885 static int criu_get_prime_handle(struct kgd_mem *mem, 1886 int flags, u32 *shared_fd, 1887 struct file **file) 1888 { 1889 struct dma_buf *dmabuf; 1890 int ret; 1891 1892 ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf); 1893 if (ret) { 1894 pr_err("dmabuf export failed for the BO\n"); 1895 return ret; 1896 } 1897 1898 ret = get_unused_fd_flags(flags); 1899 if (ret < 0) { 1900 pr_err("dmabuf create fd failed, ret:%d\n", ret); 1901 goto out_free_dmabuf; 1902 } 1903 1904 *shared_fd = ret; 1905 *file = dmabuf->file; 1906 return 0; 1907 1908 out_free_dmabuf: 1909 dma_buf_put(dmabuf); 1910 return ret; 1911 } 1912 1913 static void commit_files(struct file **files, 1914 struct kfd_criu_bo_bucket *bo_buckets, 1915 unsigned int count, 1916 int err) 1917 { 1918 while (count--) { 1919 struct file *file = files[count]; 1920 1921 if (!file) 1922 continue; 1923 if (err) { 1924 fput(file); 1925 put_unused_fd(bo_buckets[count].dmabuf_fd); 1926 } else { 1927 fd_install(bo_buckets[count].dmabuf_fd, file); 1928 } 1929 } 1930 } 1931 1932 static int criu_checkpoint_bos(struct kfd_process *p, 1933 uint32_t num_bos, 1934 uint8_t __user *user_bos, 1935 uint8_t __user *user_priv_data, 1936 uint64_t *priv_offset) 1937 { 1938 struct kfd_criu_bo_bucket *bo_buckets; 1939 struct kfd_criu_bo_priv_data *bo_privs; 1940 struct file **files = NULL; 1941 int ret = 0, pdd_index, bo_index = 0, id; 1942 void *mem; 1943 1944 bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL); 1945 if (!bo_buckets) 1946 return -ENOMEM; 1947 1948 bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL); 1949 if (!bo_privs) { 1950 ret = -ENOMEM; 1951 goto exit; 1952 } 1953 1954 files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL); 1955 if (!files) { 1956 ret = -ENOMEM; 1957 goto exit; 1958 } 1959 1960 for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) { 1961 struct kfd_process_device *pdd = p->pdds[pdd_index]; 1962 struct amdgpu_bo *dumper_bo; 1963 struct kgd_mem *kgd_mem; 1964 1965 idr_for_each_entry(&pdd->alloc_idr, mem, id) { 1966 struct kfd_criu_bo_bucket *bo_bucket; 1967 struct kfd_criu_bo_priv_data *bo_priv; 1968 int i, dev_idx = 0; 1969 1970 kgd_mem = (struct kgd_mem *)mem; 1971 dumper_bo = kgd_mem->bo; 1972 1973 /* Skip checkpointing BOs that are used for Trap handler 1974 * code and state. Currently, these BOs have a VA that 1975 * is less GPUVM Base 1976 */ 1977 if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base) 1978 continue; 1979 1980 bo_bucket = &bo_buckets[bo_index]; 1981 bo_priv = &bo_privs[bo_index]; 1982 1983 bo_bucket->gpu_id = pdd->user_gpu_id; 1984 bo_bucket->addr = (uint64_t)kgd_mem->va; 1985 bo_bucket->size = amdgpu_bo_size(dumper_bo); 1986 bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags; 1987 bo_priv->idr_handle = id; 1988 1989 if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { 1990 ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo, 1991 &bo_priv->user_addr); 1992 if (ret) { 1993 pr_err("Failed to obtain user address for user-pointer bo\n"); 1994 goto exit; 1995 } 1996 } 1997 if (bo_bucket->alloc_flags 1998 & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { 1999 ret = criu_get_prime_handle(kgd_mem, 2000 bo_bucket->alloc_flags & 2001 KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, 2002 &bo_bucket->dmabuf_fd, &files[bo_index]); 2003 if (ret) 2004 goto exit; 2005 } else { 2006 bo_bucket->dmabuf_fd = KFD_INVALID_FD; 2007 } 2008 2009 if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) 2010 bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL | 2011 KFD_MMAP_GPU_ID(pdd->dev->id); 2012 else if (bo_bucket->alloc_flags & 2013 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) 2014 bo_bucket->offset = KFD_MMAP_TYPE_MMIO | 2015 KFD_MMAP_GPU_ID(pdd->dev->id); 2016 else 2017 bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo); 2018 2019 for (i = 0; i < p->n_pdds; i++) { 2020 if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem)) 2021 bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id; 2022 } 2023 2024 pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n" 2025 "gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x", 2026 bo_bucket->size, 2027 bo_bucket->addr, 2028 bo_bucket->offset, 2029 bo_bucket->gpu_id, 2030 bo_bucket->alloc_flags, 2031 bo_priv->idr_handle); 2032 bo_index++; 2033 } 2034 } 2035 2036 ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets)); 2037 if (ret) { 2038 pr_err("Failed to copy BO information to user\n"); 2039 ret = -EFAULT; 2040 goto exit; 2041 } 2042 2043 ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs)); 2044 if (ret) { 2045 pr_err("Failed to copy BO priv information to user\n"); 2046 ret = -EFAULT; 2047 goto exit; 2048 } 2049 2050 *priv_offset += num_bos * sizeof(*bo_privs); 2051 2052 exit: 2053 commit_files(files, bo_buckets, bo_index, ret); 2054 kvfree(files); 2055 kvfree(bo_buckets); 2056 kvfree(bo_privs); 2057 return ret; 2058 } 2059 2060 static int criu_get_process_object_info(struct kfd_process *p, 2061 uint32_t *num_devices, 2062 uint32_t *num_bos, 2063 uint32_t *num_objects, 2064 uint64_t *objs_priv_size) 2065 { 2066 uint64_t queues_priv_data_size, svm_priv_data_size, priv_size; 2067 uint32_t num_queues, num_events, num_svm_ranges; 2068 int ret; 2069 2070 *num_devices = p->n_pdds; 2071 *num_bos = get_process_num_bos(p); 2072 2073 ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size); 2074 if (ret) 2075 return ret; 2076 2077 num_events = kfd_get_num_events(p); 2078 2079 svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size); 2080 2081 *num_objects = num_queues + num_events + num_svm_ranges; 2082 2083 if (objs_priv_size) { 2084 priv_size = sizeof(struct kfd_criu_process_priv_data); 2085 priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data); 2086 priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data); 2087 priv_size += queues_priv_data_size; 2088 priv_size += num_events * sizeof(struct kfd_criu_event_priv_data); 2089 priv_size += svm_priv_data_size; 2090 *objs_priv_size = priv_size; 2091 } 2092 return 0; 2093 } 2094 2095 static int criu_checkpoint(struct file *filep, 2096 struct kfd_process *p, 2097 struct kfd_ioctl_criu_args *args) 2098 { 2099 int ret; 2100 uint32_t num_devices, num_bos, num_objects; 2101 uint64_t priv_size, priv_offset = 0, bo_priv_offset; 2102 2103 if (!args->devices || !args->bos || !args->priv_data) 2104 return -EINVAL; 2105 2106 mutex_lock(&p->mutex); 2107 2108 if (!p->n_pdds) { 2109 pr_err("No pdd for given process\n"); 2110 ret = -ENODEV; 2111 goto exit_unlock; 2112 } 2113 2114 /* Confirm all process queues are evicted */ 2115 if (!p->queues_paused) { 2116 pr_err("Cannot dump process when queues are not in evicted state\n"); 2117 /* CRIU plugin did not call op PROCESS_INFO before checkpointing */ 2118 ret = -EINVAL; 2119 goto exit_unlock; 2120 } 2121 2122 ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size); 2123 if (ret) 2124 goto exit_unlock; 2125 2126 if (num_devices != args->num_devices || 2127 num_bos != args->num_bos || 2128 num_objects != args->num_objects || 2129 priv_size != args->priv_data_size) { 2130 2131 ret = -EINVAL; 2132 goto exit_unlock; 2133 } 2134 2135 /* each function will store private data inside priv_data and adjust priv_offset */ 2136 ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset); 2137 if (ret) 2138 goto exit_unlock; 2139 2140 ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices, 2141 (uint8_t __user *)args->priv_data, &priv_offset); 2142 if (ret) 2143 goto exit_unlock; 2144 2145 /* Leave room for BOs in the private data. They need to be restored 2146 * before events, but we checkpoint them last to simplify the error 2147 * handling. 2148 */ 2149 bo_priv_offset = priv_offset; 2150 priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data); 2151 2152 if (num_objects) { 2153 ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data, 2154 &priv_offset); 2155 if (ret) 2156 goto exit_unlock; 2157 2158 ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data, 2159 &priv_offset); 2160 if (ret) 2161 goto exit_unlock; 2162 2163 ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset); 2164 if (ret) 2165 goto exit_unlock; 2166 } 2167 2168 /* This must be the last thing in this function that can fail. 2169 * Otherwise we leak dmabuf file descriptors. 2170 */ 2171 ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos, 2172 (uint8_t __user *)args->priv_data, &bo_priv_offset); 2173 2174 exit_unlock: 2175 mutex_unlock(&p->mutex); 2176 if (ret) 2177 pr_err("Failed to dump CRIU ret:%d\n", ret); 2178 else 2179 pr_debug("CRIU dump ret:%d\n", ret); 2180 2181 return ret; 2182 } 2183 2184 static int criu_restore_process(struct kfd_process *p, 2185 struct kfd_ioctl_criu_args *args, 2186 uint64_t *priv_offset, 2187 uint64_t max_priv_data_size) 2188 { 2189 int ret = 0; 2190 struct kfd_criu_process_priv_data process_priv; 2191 2192 if (*priv_offset + sizeof(process_priv) > max_priv_data_size) 2193 return -EINVAL; 2194 2195 ret = copy_from_user(&process_priv, 2196 (void __user *)(args->priv_data + *priv_offset), 2197 sizeof(process_priv)); 2198 if (ret) { 2199 pr_err("Failed to copy process private information from user\n"); 2200 ret = -EFAULT; 2201 goto exit; 2202 } 2203 *priv_offset += sizeof(process_priv); 2204 2205 if (process_priv.version != KFD_CRIU_PRIV_VERSION) { 2206 pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n", 2207 process_priv.version, KFD_CRIU_PRIV_VERSION); 2208 return -EINVAL; 2209 } 2210 2211 pr_debug("Setting XNACK mode\n"); 2212 if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) { 2213 pr_err("xnack mode cannot be set\n"); 2214 ret = -EPERM; 2215 goto exit; 2216 } else { 2217 pr_debug("set xnack mode: %d\n", process_priv.xnack_mode); 2218 p->xnack_enabled = process_priv.xnack_mode; 2219 } 2220 2221 exit: 2222 return ret; 2223 } 2224 2225 static int criu_restore_devices(struct kfd_process *p, 2226 struct kfd_ioctl_criu_args *args, 2227 uint64_t *priv_offset, 2228 uint64_t max_priv_data_size) 2229 { 2230 struct kfd_criu_device_bucket *device_buckets; 2231 struct kfd_criu_device_priv_data *device_privs; 2232 int ret = 0; 2233 uint32_t i; 2234 2235 if (args->num_devices != p->n_pdds) 2236 return -EINVAL; 2237 2238 if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size) 2239 return -EINVAL; 2240 2241 device_buckets = kmalloc_objs(*device_buckets, args->num_devices); 2242 if (!device_buckets) 2243 return -ENOMEM; 2244 2245 ret = copy_from_user(device_buckets, (void __user *)args->devices, 2246 args->num_devices * sizeof(*device_buckets)); 2247 if (ret) { 2248 pr_err("Failed to copy devices buckets from user\n"); 2249 ret = -EFAULT; 2250 goto exit; 2251 } 2252 2253 for (i = 0; i < args->num_devices; i++) { 2254 struct kfd_node *dev; 2255 struct kfd_process_device *pdd; 2256 struct file *drm_file; 2257 2258 /* device private data is not currently used */ 2259 2260 if (!device_buckets[i].user_gpu_id) { 2261 pr_err("Invalid user gpu_id\n"); 2262 ret = -EINVAL; 2263 goto exit; 2264 } 2265 2266 dev = kfd_device_by_id(device_buckets[i].actual_gpu_id); 2267 if (!dev) { 2268 pr_err("Failed to find device with gpu_id = %x\n", 2269 device_buckets[i].actual_gpu_id); 2270 ret = -EINVAL; 2271 goto exit; 2272 } 2273 2274 pdd = kfd_get_process_device_data(dev, p); 2275 if (!pdd) { 2276 pr_err("Failed to get pdd for gpu_id = %x\n", 2277 device_buckets[i].actual_gpu_id); 2278 ret = -EINVAL; 2279 goto exit; 2280 } 2281 pdd->user_gpu_id = device_buckets[i].user_gpu_id; 2282 2283 drm_file = fget(device_buckets[i].drm_fd); 2284 if (!drm_file) { 2285 pr_err("Invalid render node file descriptor sent from plugin (%d)\n", 2286 device_buckets[i].drm_fd); 2287 ret = -EINVAL; 2288 goto exit; 2289 } 2290 2291 if (pdd->drm_file) { 2292 ret = -EINVAL; 2293 goto exit; 2294 } 2295 2296 /* create the vm using render nodes for kfd pdd */ 2297 if (kfd_process_device_init_vm(pdd, drm_file)) { 2298 pr_err("could not init vm for given pdd\n"); 2299 /* On success, the PDD keeps the drm_file reference */ 2300 fput(drm_file); 2301 ret = -EINVAL; 2302 goto exit; 2303 } 2304 /* 2305 * pdd now already has the vm bound to render node so below api won't create a new 2306 * exclusive kfd mapping but use existing one with renderDXXX but is still needed 2307 * for iommu v2 binding and runtime pm. 2308 */ 2309 pdd = kfd_bind_process_to_device(dev, p); 2310 if (IS_ERR(pdd)) { 2311 ret = PTR_ERR(pdd); 2312 goto exit; 2313 } 2314 2315 if (!pdd->qpd.proc_doorbells) { 2316 ret = kfd_alloc_process_doorbells(dev->kfd, pdd); 2317 if (ret) 2318 goto exit; 2319 } 2320 } 2321 2322 /* 2323 * We are not copying device private data from user as we are not using the data for now, 2324 * but we still adjust for its private data. 2325 */ 2326 *priv_offset += args->num_devices * sizeof(*device_privs); 2327 2328 exit: 2329 kfree(device_buckets); 2330 return ret; 2331 } 2332 2333 static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, 2334 struct kfd_criu_bo_bucket *bo_bucket, 2335 struct kfd_criu_bo_priv_data *bo_priv, 2336 struct kgd_mem **kgd_mem) 2337 { 2338 int idr_handle; 2339 int ret; 2340 const bool criu_resume = true; 2341 u64 offset; 2342 2343 if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { 2344 if (bo_bucket->size != 2345 kfd_doorbell_process_slice(pdd->dev->kfd)) 2346 return -EINVAL; 2347 2348 offset = kfd_get_process_doorbells(pdd); 2349 if (!offset) 2350 return -ENOMEM; 2351 } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { 2352 /* MMIO BOs need remapped bus address */ 2353 if (bo_bucket->size != PAGE_SIZE) { 2354 pr_err("Invalid page size\n"); 2355 return -EINVAL; 2356 } 2357 offset = pdd->dev->adev->rmmio_remap.bus_addr; 2358 if (!offset || (PAGE_SIZE > 4096)) { 2359 pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n"); 2360 return -ENOMEM; 2361 } 2362 } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { 2363 offset = bo_priv->user_addr; 2364 } 2365 /* Create the BO */ 2366 ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr, 2367 bo_bucket->size, pdd->drm_priv, kgd_mem, 2368 &offset, bo_bucket->alloc_flags, criu_resume); 2369 if (ret) { 2370 pr_err("Could not create the BO\n"); 2371 return ret; 2372 } 2373 pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n", 2374 bo_bucket->size, bo_bucket->addr, offset); 2375 2376 /* Restore previous IDR handle */ 2377 pr_debug("Restoring old IDR handle for the BO"); 2378 idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle, 2379 bo_priv->idr_handle + 1, GFP_KERNEL); 2380 2381 if (idr_handle < 0) { 2382 pr_err("Could not allocate idr\n"); 2383 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv, 2384 NULL); 2385 return -ENOMEM; 2386 } 2387 2388 if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) 2389 bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id); 2390 if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { 2391 bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id); 2392 } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { 2393 bo_bucket->restored_offset = offset; 2394 } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 2395 bo_bucket->restored_offset = offset; 2396 /* Update the VRAM usage count */ 2397 atomic64_add(bo_bucket->size, &pdd->vram_usage); 2398 } 2399 return 0; 2400 } 2401 2402 static int criu_restore_bo(struct kfd_process *p, 2403 struct kfd_criu_bo_bucket *bo_bucket, 2404 struct kfd_criu_bo_priv_data *bo_priv, 2405 struct file **file) 2406 { 2407 struct kfd_process_device *pdd; 2408 struct kgd_mem *kgd_mem; 2409 int ret; 2410 int j; 2411 2412 pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n", 2413 bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags, 2414 bo_priv->idr_handle); 2415 2416 pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id); 2417 if (!pdd) { 2418 pr_err("Failed to get pdd\n"); 2419 return -ENODEV; 2420 } 2421 2422 ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem); 2423 if (ret) 2424 return ret; 2425 2426 /* now map these BOs to GPU/s */ 2427 for (j = 0; j < p->n_pdds; j++) { 2428 struct kfd_node *peer; 2429 struct kfd_process_device *peer_pdd; 2430 2431 if (!bo_priv->mapped_gpuids[j]) 2432 break; 2433 2434 peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]); 2435 if (!peer_pdd) 2436 return -EINVAL; 2437 2438 peer = peer_pdd->dev; 2439 2440 peer_pdd = kfd_bind_process_to_device(peer, p); 2441 if (IS_ERR(peer_pdd)) 2442 return PTR_ERR(peer_pdd); 2443 2444 ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem, 2445 peer_pdd->drm_priv); 2446 if (ret) { 2447 pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds); 2448 return ret; 2449 } 2450 } 2451 2452 pr_debug("map memory was successful for the BO\n"); 2453 /* create the dmabuf object and export the bo */ 2454 if (bo_bucket->alloc_flags 2455 & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { 2456 ret = criu_get_prime_handle(kgd_mem, DRM_RDWR, 2457 &bo_bucket->dmabuf_fd, file); 2458 if (ret) 2459 return ret; 2460 } else { 2461 bo_bucket->dmabuf_fd = KFD_INVALID_FD; 2462 } 2463 2464 return 0; 2465 } 2466 2467 static int criu_restore_bos(struct kfd_process *p, 2468 struct kfd_ioctl_criu_args *args, 2469 uint64_t *priv_offset, 2470 uint64_t max_priv_data_size) 2471 { 2472 struct kfd_criu_bo_bucket *bo_buckets = NULL; 2473 struct kfd_criu_bo_priv_data *bo_privs = NULL; 2474 struct file **files = NULL; 2475 int ret = 0; 2476 uint32_t i = 0; 2477 2478 if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size) 2479 return -EINVAL; 2480 2481 /* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */ 2482 amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info); 2483 2484 bo_buckets = kvmalloc_objs(*bo_buckets, args->num_bos); 2485 if (!bo_buckets) 2486 return -ENOMEM; 2487 2488 files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL); 2489 if (!files) { 2490 ret = -ENOMEM; 2491 goto exit; 2492 } 2493 2494 ret = copy_from_user(bo_buckets, (void __user *)args->bos, 2495 args->num_bos * sizeof(*bo_buckets)); 2496 if (ret) { 2497 pr_err("Failed to copy BOs information from user\n"); 2498 ret = -EFAULT; 2499 goto exit; 2500 } 2501 2502 bo_privs = kvmalloc_objs(*bo_privs, args->num_bos); 2503 if (!bo_privs) { 2504 ret = -ENOMEM; 2505 goto exit; 2506 } 2507 2508 ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset, 2509 args->num_bos * sizeof(*bo_privs)); 2510 if (ret) { 2511 pr_err("Failed to copy BOs information from user\n"); 2512 ret = -EFAULT; 2513 goto exit; 2514 } 2515 *priv_offset += args->num_bos * sizeof(*bo_privs); 2516 2517 /* Create and map new BOs */ 2518 for (; i < args->num_bos; i++) { 2519 ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]); 2520 if (ret) { 2521 pr_debug("Failed to restore BO[%d] ret%d\n", i, ret); 2522 goto exit; 2523 } 2524 } /* done */ 2525 2526 /* Copy only the buckets back so user can read bo_buckets[N].restored_offset */ 2527 ret = copy_to_user((void __user *)args->bos, 2528 bo_buckets, 2529 (args->num_bos * sizeof(*bo_buckets))); 2530 if (ret) 2531 ret = -EFAULT; 2532 2533 exit: 2534 commit_files(files, bo_buckets, i, ret); 2535 kvfree(files); 2536 kvfree(bo_buckets); 2537 kvfree(bo_privs); 2538 return ret; 2539 } 2540 2541 static int criu_restore_objects(struct file *filep, 2542 struct kfd_process *p, 2543 struct kfd_ioctl_criu_args *args, 2544 uint64_t *priv_offset, 2545 uint64_t max_priv_data_size) 2546 { 2547 int ret = 0; 2548 uint32_t i; 2549 2550 BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type)); 2551 BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type)); 2552 BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type)); 2553 2554 for (i = 0; i < args->num_objects; i++) { 2555 uint32_t object_type; 2556 2557 if (*priv_offset + sizeof(object_type) > max_priv_data_size) { 2558 pr_err("Invalid private data size\n"); 2559 return -EINVAL; 2560 } 2561 2562 ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset)); 2563 if (ret) { 2564 pr_err("Failed to copy private information from user\n"); 2565 goto exit; 2566 } 2567 2568 switch (object_type) { 2569 case KFD_CRIU_OBJECT_TYPE_QUEUE: 2570 ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data, 2571 priv_offset, max_priv_data_size); 2572 if (ret) 2573 goto exit; 2574 break; 2575 case KFD_CRIU_OBJECT_TYPE_EVENT: 2576 ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data, 2577 priv_offset, max_priv_data_size); 2578 if (ret) 2579 goto exit; 2580 break; 2581 case KFD_CRIU_OBJECT_TYPE_SVM_RANGE: 2582 ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data, 2583 priv_offset, max_priv_data_size); 2584 if (ret) 2585 goto exit; 2586 break; 2587 default: 2588 pr_err("Invalid object type:%u at index:%d\n", object_type, i); 2589 ret = -EINVAL; 2590 goto exit; 2591 } 2592 } 2593 exit: 2594 return ret; 2595 } 2596 2597 static int criu_restore(struct file *filep, 2598 struct kfd_process *p, 2599 struct kfd_ioctl_criu_args *args) 2600 { 2601 uint64_t priv_offset = 0; 2602 int ret = 0; 2603 2604 pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n", 2605 args->num_devices, args->num_bos, args->num_objects, args->priv_data_size); 2606 2607 if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data || 2608 !args->priv_data_size || !args->num_devices) 2609 return -EINVAL; 2610 2611 mutex_lock(&p->mutex); 2612 2613 /* 2614 * Set the process to evicted state to avoid running any new queues before all the memory 2615 * mappings are ready. 2616 */ 2617 ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE); 2618 if (ret) 2619 goto exit_unlock; 2620 2621 /* Each function will adjust priv_offset based on how many bytes they consumed */ 2622 ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size); 2623 if (ret) 2624 goto exit_unlock; 2625 2626 ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size); 2627 if (ret) 2628 goto exit_unlock; 2629 2630 ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size); 2631 if (ret) 2632 goto exit_unlock; 2633 2634 ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size); 2635 if (ret) 2636 goto exit_unlock; 2637 2638 if (priv_offset != args->priv_data_size) { 2639 pr_err("Invalid private data size\n"); 2640 ret = -EINVAL; 2641 } 2642 2643 exit_unlock: 2644 mutex_unlock(&p->mutex); 2645 if (ret) 2646 pr_err("Failed to restore CRIU ret:%d\n", ret); 2647 else 2648 pr_debug("CRIU restore successful\n"); 2649 2650 return ret; 2651 } 2652 2653 static int criu_unpause(struct file *filep, 2654 struct kfd_process *p, 2655 struct kfd_ioctl_criu_args *args) 2656 { 2657 int ret; 2658 2659 mutex_lock(&p->mutex); 2660 2661 if (!p->queues_paused) { 2662 mutex_unlock(&p->mutex); 2663 return -EINVAL; 2664 } 2665 2666 ret = kfd_process_restore_queues(p); 2667 if (ret) 2668 pr_err("Failed to unpause queues ret:%d\n", ret); 2669 else 2670 p->queues_paused = false; 2671 2672 mutex_unlock(&p->mutex); 2673 2674 return ret; 2675 } 2676 2677 static int criu_resume(struct file *filep, 2678 struct kfd_process *p, 2679 struct kfd_ioctl_criu_args *args) 2680 { 2681 struct kfd_process *target = NULL; 2682 struct pid *pid = NULL; 2683 int ret = 0; 2684 2685 pr_debug("Inside %s, target pid for criu restore: %d\n", __func__, 2686 args->pid); 2687 2688 pid = find_get_pid(args->pid); 2689 if (!pid) { 2690 pr_err("Cannot find pid info for %i\n", args->pid); 2691 return -ESRCH; 2692 } 2693 2694 pr_debug("calling kfd_lookup_process_by_pid\n"); 2695 target = kfd_lookup_process_by_pid(pid); 2696 2697 put_pid(pid); 2698 2699 if (!target) { 2700 pr_debug("Cannot find process info for %i\n", args->pid); 2701 return -ESRCH; 2702 } 2703 2704 mutex_lock(&target->mutex); 2705 ret = kfd_criu_resume_svm(target); 2706 if (ret) { 2707 pr_err("kfd_criu_resume_svm failed for %i\n", args->pid); 2708 goto exit; 2709 } 2710 2711 ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info); 2712 if (ret) 2713 pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid); 2714 2715 exit: 2716 mutex_unlock(&target->mutex); 2717 2718 kfd_unref_process(target); 2719 return ret; 2720 } 2721 2722 static int criu_process_info(struct file *filep, 2723 struct kfd_process *p, 2724 struct kfd_ioctl_criu_args *args) 2725 { 2726 int ret = 0; 2727 2728 mutex_lock(&p->mutex); 2729 2730 if (!p->n_pdds) { 2731 pr_err("No pdd for given process\n"); 2732 ret = -ENODEV; 2733 goto err_unlock; 2734 } 2735 2736 ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT); 2737 if (ret) 2738 goto err_unlock; 2739 2740 p->queues_paused = true; 2741 2742 args->pid = task_pid_nr_ns(p->lead_thread, 2743 task_active_pid_ns(p->lead_thread)); 2744 2745 ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos, 2746 &args->num_objects, &args->priv_data_size); 2747 if (ret) 2748 goto err_unlock; 2749 2750 dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n", 2751 args->num_devices, args->num_bos, args->num_objects, 2752 args->priv_data_size); 2753 2754 err_unlock: 2755 if (ret) { 2756 kfd_process_restore_queues(p); 2757 p->queues_paused = false; 2758 } 2759 mutex_unlock(&p->mutex); 2760 return ret; 2761 } 2762 2763 static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data) 2764 { 2765 struct kfd_ioctl_criu_args *args = data; 2766 int ret; 2767 2768 dev_dbg(kfd_device, "CRIU operation: %d\n", args->op); 2769 switch (args->op) { 2770 case KFD_CRIU_OP_PROCESS_INFO: 2771 ret = criu_process_info(filep, p, args); 2772 break; 2773 case KFD_CRIU_OP_CHECKPOINT: 2774 ret = criu_checkpoint(filep, p, args); 2775 break; 2776 case KFD_CRIU_OP_UNPAUSE: 2777 ret = criu_unpause(filep, p, args); 2778 break; 2779 case KFD_CRIU_OP_RESTORE: 2780 ret = criu_restore(filep, p, args); 2781 break; 2782 case KFD_CRIU_OP_RESUME: 2783 ret = criu_resume(filep, p, args); 2784 break; 2785 default: 2786 dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op); 2787 ret = -EINVAL; 2788 break; 2789 } 2790 2791 if (ret) 2792 dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret); 2793 2794 return ret; 2795 } 2796 2797 static int runtime_enable(struct kfd_process *p, uint64_t r_debug, 2798 bool enable_ttmp_setup) 2799 { 2800 int i = 0, ret = 0; 2801 2802 if (p->is_runtime_retry) 2803 goto retry; 2804 2805 if (p->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED) 2806 return -EBUSY; 2807 2808 for (i = 0; i < p->n_pdds; i++) { 2809 struct kfd_process_device *pdd = p->pdds[i]; 2810 2811 if (pdd->qpd.queue_count) 2812 return -EEXIST; 2813 2814 /* 2815 * Setup TTMPs by default. 2816 * Note that this call must remain here for MES ADD QUEUE to 2817 * skip_process_ctx_clear unconditionally as the first call to 2818 * SET_SHADER_DEBUGGER clears any stale process context data 2819 * saved in MES. 2820 */ 2821 if (pdd->dev->kfd->shared_resources.enable_mes) { 2822 ret = kfd_dbg_set_mes_debug_mode( 2823 pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev)); 2824 if (ret) 2825 return ret; 2826 } 2827 } 2828 2829 p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED; 2830 p->runtime_info.r_debug = r_debug; 2831 p->runtime_info.ttmp_setup = enable_ttmp_setup; 2832 2833 if (p->runtime_info.ttmp_setup) { 2834 for (i = 0; i < p->n_pdds; i++) { 2835 struct kfd_process_device *pdd = p->pdds[i]; 2836 2837 if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) { 2838 amdgpu_gfx_off_ctrl(pdd->dev->adev, false); 2839 pdd->dev->kfd2kgd->enable_debug_trap( 2840 pdd->dev->adev, 2841 true, 2842 pdd->dev->vm_info.last_vmid_kfd); 2843 } else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) { 2844 pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap( 2845 pdd->dev->adev, 2846 false, 2847 0); 2848 } 2849 } 2850 } 2851 2852 retry: 2853 if (p->debug_trap_enabled) { 2854 if (!p->is_runtime_retry) { 2855 kfd_dbg_trap_activate(p); 2856 kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME), 2857 p, NULL, 0, false, NULL, 0); 2858 } 2859 2860 mutex_unlock(&p->mutex); 2861 ret = down_interruptible(&p->runtime_enable_sema); 2862 mutex_lock(&p->mutex); 2863 2864 p->is_runtime_retry = !!ret; 2865 } 2866 2867 return ret; 2868 } 2869 2870 static int runtime_disable(struct kfd_process *p) 2871 { 2872 int i = 0, ret = 0; 2873 bool was_enabled = p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED; 2874 2875 p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_DISABLED; 2876 p->runtime_info.r_debug = 0; 2877 2878 if (p->debug_trap_enabled) { 2879 if (was_enabled) 2880 kfd_dbg_trap_deactivate(p, false, 0); 2881 2882 if (!p->is_runtime_retry) 2883 kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME), 2884 p, NULL, 0, false, NULL, 0); 2885 2886 mutex_unlock(&p->mutex); 2887 ret = down_interruptible(&p->runtime_enable_sema); 2888 mutex_lock(&p->mutex); 2889 2890 p->is_runtime_retry = !!ret; 2891 if (ret) 2892 return ret; 2893 } 2894 2895 if (was_enabled && p->runtime_info.ttmp_setup) { 2896 for (i = 0; i < p->n_pdds; i++) { 2897 struct kfd_process_device *pdd = p->pdds[i]; 2898 2899 if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) 2900 amdgpu_gfx_off_ctrl(pdd->dev->adev, true); 2901 } 2902 } 2903 2904 p->runtime_info.ttmp_setup = false; 2905 2906 /* disable ttmp setup */ 2907 for (i = 0; i < p->n_pdds; i++) { 2908 struct kfd_process_device *pdd = p->pdds[i]; 2909 int last_err = 0; 2910 2911 if (kfd_dbg_is_per_vmid_supported(pdd->dev)) { 2912 pdd->spi_dbg_override = 2913 pdd->dev->kfd2kgd->disable_debug_trap( 2914 pdd->dev->adev, 2915 false, 2916 pdd->dev->vm_info.last_vmid_kfd); 2917 2918 if (!pdd->dev->kfd->shared_resources.enable_mes) 2919 last_err = debug_refresh_runlist(pdd->dev->dqm); 2920 else 2921 last_err = kfd_dbg_set_mes_debug_mode(pdd, 2922 !kfd_dbg_has_cwsr_workaround(pdd->dev)); 2923 2924 if (last_err) 2925 ret = last_err; 2926 } 2927 } 2928 2929 return ret; 2930 } 2931 2932 static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data) 2933 { 2934 struct kfd_ioctl_runtime_enable_args *args = data; 2935 int r; 2936 2937 mutex_lock(&p->mutex); 2938 2939 if (args->mode_mask & KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK) 2940 r = runtime_enable(p, args->r_debug, 2941 !!(args->mode_mask & KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK)); 2942 else 2943 r = runtime_disable(p); 2944 2945 mutex_unlock(&p->mutex); 2946 2947 return r; 2948 } 2949 2950 static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data) 2951 { 2952 struct kfd_ioctl_dbg_trap_args *args = data; 2953 struct task_struct *thread = NULL; 2954 struct mm_struct *mm = NULL; 2955 struct pid *pid = NULL; 2956 struct kfd_process *target = NULL; 2957 struct kfd_process_device *pdd = NULL; 2958 int r = 0; 2959 2960 if (p->context_id != KFD_CONTEXT_ID_PRIMARY) { 2961 pr_debug("Set debug trap ioctl can not be invoked on non-primary kfd process\n"); 2962 2963 return -EOPNOTSUPP; 2964 } 2965 2966 if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { 2967 pr_err("Debugging does not support sched_policy %i", sched_policy); 2968 return -EINVAL; 2969 } 2970 2971 pid = find_get_pid(args->pid); 2972 if (!pid) { 2973 pr_debug("Cannot find pid info for %i\n", args->pid); 2974 r = -ESRCH; 2975 goto out; 2976 } 2977 2978 thread = get_pid_task(pid, PIDTYPE_PID); 2979 if (!thread) { 2980 r = -ESRCH; 2981 goto out; 2982 } 2983 2984 mm = get_task_mm(thread); 2985 if (!mm) { 2986 r = -ESRCH; 2987 goto out; 2988 } 2989 2990 if (args->op == KFD_IOC_DBG_TRAP_ENABLE) { 2991 bool create_process; 2992 2993 rcu_read_lock(); 2994 create_process = thread && thread != current && ptrace_parent(thread) == current; 2995 rcu_read_unlock(); 2996 2997 target = create_process ? kfd_create_process(thread) : 2998 kfd_lookup_process_by_pid(pid); 2999 } else { 3000 target = kfd_lookup_process_by_pid(pid); 3001 } 3002 3003 if (IS_ERR_OR_NULL(target)) { 3004 pr_debug("Cannot find process PID %i to debug\n", args->pid); 3005 r = target ? PTR_ERR(target) : -ESRCH; 3006 target = NULL; 3007 goto out; 3008 } 3009 3010 if (target->context_id != KFD_CONTEXT_ID_PRIMARY) { 3011 pr_debug("Set debug trap ioctl not supported on non-primary kfd process\n"); 3012 r = -EOPNOTSUPP; 3013 goto out; 3014 } 3015 3016 /* Check if target is still PTRACED. */ 3017 rcu_read_lock(); 3018 if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE 3019 && ptrace_parent(target->lead_thread) != current) { 3020 pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid); 3021 r = -EPERM; 3022 } 3023 rcu_read_unlock(); 3024 3025 if (r) 3026 goto out; 3027 3028 mutex_lock(&target->mutex); 3029 3030 if (args->op != KFD_IOC_DBG_TRAP_ENABLE && !target->debug_trap_enabled) { 3031 pr_err("PID %i not debug enabled for op %i\n", args->pid, args->op); 3032 r = -EINVAL; 3033 goto unlock_out; 3034 } 3035 3036 if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_ENABLED && 3037 (args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE || 3038 args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE || 3039 args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES || 3040 args->op == KFD_IOC_DBG_TRAP_RESUME_QUEUES || 3041 args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH || 3042 args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH || 3043 args->op == KFD_IOC_DBG_TRAP_SET_FLAGS)) { 3044 r = -EPERM; 3045 goto unlock_out; 3046 } 3047 3048 if (args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH || 3049 args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) { 3050 int user_gpu_id = kfd_process_get_user_gpu_id(target, 3051 args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ? 3052 args->set_node_address_watch.gpu_id : 3053 args->clear_node_address_watch.gpu_id); 3054 3055 pdd = kfd_process_device_data_by_id(target, user_gpu_id); 3056 if (user_gpu_id == -EINVAL || !pdd) { 3057 r = -ENODEV; 3058 goto unlock_out; 3059 } 3060 } 3061 3062 switch (args->op) { 3063 case KFD_IOC_DBG_TRAP_ENABLE: 3064 if (target != p) 3065 target->debugger_process = p; 3066 3067 r = kfd_dbg_trap_enable(target, 3068 args->enable.dbg_fd, 3069 (void __user *)args->enable.rinfo_ptr, 3070 &args->enable.rinfo_size); 3071 if (!r) 3072 target->exception_enable_mask = args->enable.exception_mask; 3073 3074 break; 3075 case KFD_IOC_DBG_TRAP_DISABLE: 3076 r = kfd_dbg_trap_disable(target); 3077 break; 3078 case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT: 3079 r = kfd_dbg_send_exception_to_runtime(target, 3080 args->send_runtime_event.gpu_id, 3081 args->send_runtime_event.queue_id, 3082 args->send_runtime_event.exception_mask); 3083 break; 3084 case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: 3085 kfd_dbg_set_enabled_debug_exception_mask(target, 3086 args->set_exceptions_enabled.exception_mask); 3087 break; 3088 case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: 3089 r = kfd_dbg_trap_set_wave_launch_override(target, 3090 args->launch_override.override_mode, 3091 args->launch_override.enable_mask, 3092 args->launch_override.support_request_mask, 3093 &args->launch_override.enable_mask, 3094 &args->launch_override.support_request_mask); 3095 break; 3096 case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: 3097 r = kfd_dbg_trap_set_wave_launch_mode(target, 3098 args->launch_mode.launch_mode); 3099 break; 3100 case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES: 3101 r = suspend_queues(target, 3102 args->suspend_queues.num_queues, 3103 args->suspend_queues.grace_period, 3104 args->suspend_queues.exception_mask, 3105 (uint32_t *)args->suspend_queues.queue_array_ptr); 3106 3107 break; 3108 case KFD_IOC_DBG_TRAP_RESUME_QUEUES: 3109 r = resume_queues(target, args->resume_queues.num_queues, 3110 (uint32_t *)args->resume_queues.queue_array_ptr); 3111 break; 3112 case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: 3113 r = kfd_dbg_trap_set_dev_address_watch(pdd, 3114 args->set_node_address_watch.address, 3115 args->set_node_address_watch.mask, 3116 &args->set_node_address_watch.id, 3117 args->set_node_address_watch.mode); 3118 break; 3119 case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH: 3120 r = kfd_dbg_trap_clear_dev_address_watch(pdd, 3121 args->clear_node_address_watch.id); 3122 break; 3123 case KFD_IOC_DBG_TRAP_SET_FLAGS: 3124 r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags); 3125 break; 3126 case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: 3127 r = kfd_dbg_ev_query_debug_event(target, 3128 &args->query_debug_event.queue_id, 3129 &args->query_debug_event.gpu_id, 3130 args->query_debug_event.exception_mask, 3131 &args->query_debug_event.exception_mask); 3132 break; 3133 case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: 3134 r = kfd_dbg_trap_query_exception_info(target, 3135 args->query_exception_info.source_id, 3136 args->query_exception_info.exception_code, 3137 args->query_exception_info.clear_exception, 3138 (void __user *)args->query_exception_info.info_ptr, 3139 &args->query_exception_info.info_size); 3140 break; 3141 case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: 3142 r = pqm_get_queue_snapshot(&target->pqm, 3143 args->queue_snapshot.exception_mask, 3144 (void __user *)args->queue_snapshot.snapshot_buf_ptr, 3145 &args->queue_snapshot.num_queues, 3146 &args->queue_snapshot.entry_size); 3147 break; 3148 case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: 3149 r = kfd_dbg_trap_device_snapshot(target, 3150 args->device_snapshot.exception_mask, 3151 (void __user *)args->device_snapshot.snapshot_buf_ptr, 3152 &args->device_snapshot.num_devices, 3153 &args->device_snapshot.entry_size); 3154 break; 3155 default: 3156 pr_err("Invalid option: %i\n", args->op); 3157 r = -EINVAL; 3158 } 3159 3160 unlock_out: 3161 mutex_unlock(&target->mutex); 3162 3163 out: 3164 if (thread) 3165 put_task_struct(thread); 3166 3167 if (mm) 3168 mmput(mm); 3169 3170 if (pid) 3171 put_pid(pid); 3172 3173 if (target) 3174 kfd_unref_process(target); 3175 3176 return r; 3177 } 3178 3179 /* userspace programs need to invoke this ioctl explicitly on a FD to 3180 * create a secondary kfd_process which replacing its primary kfd_process 3181 */ 3182 static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, void *data) 3183 { 3184 struct kfd_process *process; 3185 int ret; 3186 3187 if (!filep->private_data || !p) 3188 return -EINVAL; 3189 3190 /* Each FD owns only one kfd_process */ 3191 if (p->context_id != KFD_CONTEXT_ID_PRIMARY) 3192 return -EINVAL; 3193 3194 mutex_lock(&kfd_processes_mutex); 3195 if (p != filep->private_data) { 3196 mutex_unlock(&kfd_processes_mutex); 3197 return -EINVAL; 3198 } 3199 3200 process = create_process(current, false); 3201 if (IS_ERR(process)) { 3202 mutex_unlock(&kfd_processes_mutex); 3203 return PTR_ERR(process); 3204 } 3205 3206 filep->private_data = process; 3207 mutex_unlock(&kfd_processes_mutex); 3208 3209 ret = kfd_create_process_sysfs(process); 3210 if (ret) 3211 pr_warn("Failed to create sysfs entry for the kfd_process"); 3212 3213 /* Each open() increases kref of the primary kfd_process, 3214 * so we need to reduce it here when we create a new secondary process replacing it 3215 */ 3216 kfd_unref_process(p); 3217 3218 return 0; 3219 } 3220 3221 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 3222 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 3223 .validate = NULL, .cmd_drv = 0, .name = #ioctl} 3224 3225 #define AMDKFD_IOCTL_DEF_V(ioctl, _func, _validate, _flags) \ 3226 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 3227 .validate = _validate, .cmd_drv = 0, .name = #ioctl} 3228 3229 /** Ioctl table */ 3230 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { 3231 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, 3232 kfd_ioctl_get_version, 0), 3233 3234 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, 3235 kfd_ioctl_create_queue, 0), 3236 3237 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, 3238 kfd_ioctl_destroy_queue, 0), 3239 3240 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, 3241 kfd_ioctl_set_memory_policy, 0), 3242 3243 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, 3244 kfd_ioctl_get_clock_counters, 0), 3245 3246 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, 3247 kfd_ioctl_get_process_apertures, 0), 3248 3249 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, 3250 kfd_ioctl_update_queue, 0), 3251 3252 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT, 3253 kfd_ioctl_create_event, 0), 3254 3255 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT, 3256 kfd_ioctl_destroy_event, 0), 3257 3258 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT, 3259 kfd_ioctl_set_event, 0), 3260 3261 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT, 3262 kfd_ioctl_reset_event, 0), 3263 3264 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, 3265 kfd_ioctl_wait_events, 0), 3266 3267 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED, 3268 kfd_ioctl_dbg_register, 0), 3269 3270 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED, 3271 kfd_ioctl_dbg_unregister, 0), 3272 3273 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED, 3274 kfd_ioctl_dbg_address_watch, 0), 3275 3276 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED, 3277 kfd_ioctl_dbg_wave_control, 0), 3278 3279 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, 3280 kfd_ioctl_set_scratch_backing_va, 0), 3281 3282 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, 3283 kfd_ioctl_get_tile_config, 0), 3284 3285 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, 3286 kfd_ioctl_set_trap_handler, 0), 3287 3288 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, 3289 kfd_ioctl_get_process_apertures_new, 0), 3290 3291 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, 3292 kfd_ioctl_acquire_vm, 0), 3293 3294 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, 3295 kfd_ioctl_alloc_memory_of_gpu, 0), 3296 3297 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, 3298 kfd_ioctl_free_memory_of_gpu, 0), 3299 3300 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, 3301 kfd_ioctl_map_memory_to_gpu, 0), 3302 3303 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, 3304 kfd_ioctl_unmap_memory_from_gpu, 0), 3305 3306 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, 3307 kfd_ioctl_set_cu_mask, 0), 3308 3309 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, 3310 kfd_ioctl_get_queue_wave_state, 0), 3311 3312 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO, 3313 kfd_ioctl_get_dmabuf_info, 0), 3314 3315 AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, 3316 kfd_ioctl_import_dmabuf, 0), 3317 3318 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, 3319 kfd_ioctl_alloc_queue_gws, 0), 3320 3321 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, 3322 kfd_ioctl_smi_events, 0), 3323 3324 AMDKFD_IOCTL_DEF_V(AMDKFD_IOC_SVM, kfd_ioctl_svm, 3325 kfd_ioctl_svm_validate, 0), 3326 3327 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, 3328 kfd_ioctl_set_xnack_mode, 0), 3329 3330 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP, 3331 kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE), 3332 3333 AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, 3334 kfd_ioctl_get_available_memory, 0), 3335 3336 AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF, 3337 kfd_ioctl_export_dmabuf, 0), 3338 3339 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE, 3340 kfd_ioctl_runtime_enable, 0), 3341 3342 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP, 3343 kfd_ioctl_set_debug_trap, 0), 3344 3345 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_PROCESS, 3346 kfd_ioctl_create_process, 0), 3347 }; 3348 3349 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 3350 3351 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) 3352 { 3353 struct kfd_process *process; 3354 amdkfd_ioctl_t *func; 3355 const struct amdkfd_ioctl_desc *ioctl = NULL; 3356 unsigned int nr = _IOC_NR(cmd); 3357 char stack_kdata[128]; 3358 char *kdata = NULL; 3359 unsigned int usize, asize; 3360 int retcode = -EINVAL; 3361 bool ptrace_attached = false; 3362 3363 if (nr >= AMDKFD_CORE_IOCTL_COUNT) { 3364 retcode = -ENOTTY; 3365 goto err_i1; 3366 } 3367 3368 if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { 3369 u32 amdkfd_size; 3370 3371 ioctl = &amdkfd_ioctls[nr]; 3372 3373 amdkfd_size = _IOC_SIZE(ioctl->cmd); 3374 usize = asize = _IOC_SIZE(cmd); 3375 if (amdkfd_size > asize) 3376 asize = amdkfd_size; 3377 3378 cmd = ioctl->cmd; 3379 } else { 3380 retcode = -ENOTTY; 3381 goto err_i1; 3382 } 3383 3384 dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg); 3385 3386 /* Get the process struct from the filep. Only the process 3387 * that opened /dev/kfd can use the file descriptor. Child 3388 * processes need to create their own KFD device context. 3389 */ 3390 process = filep->private_data; 3391 3392 rcu_read_lock(); 3393 if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) && 3394 ptrace_parent(process->lead_thread) == current) 3395 ptrace_attached = true; 3396 rcu_read_unlock(); 3397 3398 if (process->lead_thread != current->group_leader 3399 && !ptrace_attached) { 3400 dev_dbg(kfd_device, "Using KFD FD in wrong process\n"); 3401 retcode = -EBADF; 3402 goto err_i1; 3403 } 3404 3405 /* Do not trust userspace, use our own definition */ 3406 func = ioctl->func; 3407 3408 if (unlikely(!func)) { 3409 dev_dbg(kfd_device, "no function\n"); 3410 retcode = -EINVAL; 3411 goto err_i1; 3412 } 3413 3414 /* 3415 * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support 3416 * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a 3417 * more priviledged access. 3418 */ 3419 if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) { 3420 if (!capable(CAP_CHECKPOINT_RESTORE) && 3421 !capable(CAP_SYS_ADMIN)) { 3422 retcode = -EACCES; 3423 goto err_i1; 3424 } 3425 } 3426 3427 if (cmd & (IOC_IN | IOC_OUT)) { 3428 if (asize <= sizeof(stack_kdata)) { 3429 kdata = stack_kdata; 3430 } else { 3431 kdata = kmalloc(asize, GFP_KERNEL); 3432 if (!kdata) { 3433 retcode = -ENOMEM; 3434 goto err_i1; 3435 } 3436 } 3437 if (asize > usize) 3438 memset(kdata + usize, 0, asize - usize); 3439 } 3440 3441 if (cmd & IOC_IN) { 3442 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { 3443 retcode = -EFAULT; 3444 goto err_i1; 3445 } 3446 } else if (cmd & IOC_OUT) { 3447 memset(kdata, 0, usize); 3448 } 3449 3450 if (ioctl->validate) { 3451 retcode = ioctl->validate(kdata, usize); 3452 if (retcode) 3453 goto err_i1; 3454 } 3455 3456 retcode = func(filep, process, kdata); 3457 3458 if (cmd & IOC_OUT) 3459 if (copy_to_user((void __user *)arg, kdata, usize) != 0) 3460 retcode = -EFAULT; 3461 3462 err_i1: 3463 if (!ioctl) 3464 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", 3465 task_pid_nr(current), cmd, nr); 3466 3467 if (kdata != stack_kdata) 3468 kfree(kdata); 3469 3470 if (retcode) 3471 dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n", 3472 nr, arg, retcode); 3473 3474 return retcode; 3475 } 3476 3477 static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process, 3478 struct vm_area_struct *vma) 3479 { 3480 phys_addr_t address; 3481 3482 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 3483 return -EINVAL; 3484 3485 if (PAGE_SIZE > 4096) 3486 return -EINVAL; 3487 3488 address = dev->adev->rmmio_remap.bus_addr; 3489 3490 vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | 3491 VM_DONTDUMP | VM_PFNMAP); 3492 3493 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 3494 3495 pr_debug("process pid %d mapping mmio page\n" 3496 " target user address == 0x%08llX\n" 3497 " physical address == 0x%08llX\n" 3498 " vm_flags == 0x%04lX\n" 3499 " size == 0x%04lX\n", 3500 process->lead_thread->pid, (unsigned long long) vma->vm_start, 3501 address, vma->vm_flags, PAGE_SIZE); 3502 3503 return io_remap_pfn_range(vma, 3504 vma->vm_start, 3505 address >> PAGE_SHIFT, 3506 PAGE_SIZE, 3507 vma->vm_page_prot); 3508 } 3509 3510 3511 static int kfd_mmap(struct file *filep, struct vm_area_struct *vma) 3512 { 3513 struct kfd_process *process; 3514 struct kfd_node *dev = NULL; 3515 unsigned long mmap_offset; 3516 unsigned int gpu_id; 3517 3518 process = filep->private_data; 3519 if (!process) 3520 return -ESRCH; 3521 3522 if (process->lead_thread != current->group_leader) 3523 return -EBADF; 3524 3525 mmap_offset = vma->vm_pgoff << PAGE_SHIFT; 3526 gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset); 3527 if (gpu_id) 3528 dev = kfd_device_by_id(gpu_id); 3529 3530 switch (mmap_offset & KFD_MMAP_TYPE_MASK) { 3531 case KFD_MMAP_TYPE_DOORBELL: 3532 if (!dev) 3533 return -ENODEV; 3534 return kfd_doorbell_mmap(dev, process, vma); 3535 3536 case KFD_MMAP_TYPE_EVENTS: 3537 return kfd_event_mmap(process, vma); 3538 3539 case KFD_MMAP_TYPE_RESERVED_MEM: 3540 if (!dev) 3541 return -ENODEV; 3542 return kfd_reserved_mem_mmap(dev, process, vma); 3543 case KFD_MMAP_TYPE_MMIO: 3544 if (!dev) 3545 return -ENODEV; 3546 return kfd_mmio_mmap(dev, process, vma); 3547 } 3548 3549 return -EFAULT; 3550 } 3551