1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include <linux/device.h> 25 #include <linux/err.h> 26 #include <linux/fs.h> 27 #include <linux/file.h> 28 #include <linux/sched.h> 29 #include <linux/slab.h> 30 #include <linux/uaccess.h> 31 #include <linux/compat.h> 32 #include <uapi/linux/kfd_ioctl.h> 33 #include <linux/time.h> 34 #include <linux/mm.h> 35 #include <linux/mman.h> 36 #include <linux/ptrace.h> 37 #include <linux/dma-buf.h> 38 #include <linux/processor.h> 39 #include "kfd_priv.h" 40 #include "kfd_device_queue_manager.h" 41 #include "kfd_svm.h" 42 #include "amdgpu_amdkfd.h" 43 #include "kfd_smi_events.h" 44 #include "amdgpu_dma_buf.h" 45 #include "kfd_debug.h" 46 47 static long kfd_ioctl(struct file *, unsigned int, unsigned long); 48 static int kfd_open(struct inode *, struct file *); 49 static int kfd_release(struct inode *, struct file *); 50 static int kfd_mmap(struct file *, struct vm_area_struct *); 51 52 static const char kfd_dev_name[] = "kfd"; 53 54 static const struct file_operations kfd_fops = { 55 .owner = THIS_MODULE, 56 .unlocked_ioctl = kfd_ioctl, 57 .compat_ioctl = compat_ptr_ioctl, 58 .open = kfd_open, 59 .release = kfd_release, 60 .mmap = kfd_mmap, 61 }; 62 63 static int kfd_char_dev_major = -1; 64 struct device *kfd_device; 65 static const struct class kfd_class = { 66 .name = kfd_dev_name, 67 }; 68 69 static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id) 70 { 71 struct kfd_process_device *pdd; 72 73 mutex_lock(&p->mutex); 74 pdd = kfd_process_device_data_by_id(p, gpu_id); 75 76 if (pdd) 77 return pdd; 78 79 mutex_unlock(&p->mutex); 80 return NULL; 81 } 82 83 static inline void kfd_unlock_pdd(struct kfd_process_device *pdd) 84 { 85 mutex_unlock(&pdd->process->mutex); 86 } 87 88 int kfd_chardev_init(void) 89 { 90 int err = 0; 91 92 kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops); 93 err = kfd_char_dev_major; 94 if (err < 0) 95 goto err_register_chrdev; 96 97 err = class_register(&kfd_class); 98 if (err) 99 goto err_class_create; 100 101 kfd_device = device_create(&kfd_class, NULL, 102 MKDEV(kfd_char_dev_major, 0), 103 NULL, kfd_dev_name); 104 err = PTR_ERR(kfd_device); 105 if (IS_ERR(kfd_device)) 106 goto err_device_create; 107 108 return 0; 109 110 err_device_create: 111 class_unregister(&kfd_class); 112 err_class_create: 113 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 114 err_register_chrdev: 115 return err; 116 } 117 118 void kfd_chardev_exit(void) 119 { 120 device_destroy(&kfd_class, MKDEV(kfd_char_dev_major, 0)); 121 class_unregister(&kfd_class); 122 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 123 kfd_device = NULL; 124 } 125 126 127 static int kfd_open(struct inode *inode, struct file *filep) 128 { 129 struct kfd_process *process; 130 bool is_32bit_user_mode; 131 132 if (iminor(inode) != 0) 133 return -ENODEV; 134 135 is_32bit_user_mode = in_compat_syscall(); 136 137 if (is_32bit_user_mode) { 138 dev_warn(kfd_device, 139 "Process %d (32-bit) failed to open /dev/kfd\n" 140 "32-bit processes are not supported by amdkfd\n", 141 current->pid); 142 return -EPERM; 143 } 144 145 process = kfd_create_process(current); 146 if (IS_ERR(process)) 147 return PTR_ERR(process); 148 149 if (kfd_process_init_cwsr_apu(process, filep)) { 150 kfd_unref_process(process); 151 return -EFAULT; 152 } 153 154 /* filep now owns the reference returned by kfd_create_process */ 155 filep->private_data = process; 156 157 dev_dbg(kfd_device, "process pid %d opened kfd node, compat mode (32 bit) - %d\n", 158 process->lead_thread->pid, process->is_32bit_user_mode); 159 160 return 0; 161 } 162 163 static int kfd_release(struct inode *inode, struct file *filep) 164 { 165 struct kfd_process *process = filep->private_data; 166 167 if (!process) 168 return 0; 169 170 if (process->context_id != KFD_CONTEXT_ID_PRIMARY) 171 kfd_process_notifier_release_internal(process); 172 173 kfd_unref_process(process); 174 175 return 0; 176 } 177 178 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, 179 void *data) 180 { 181 struct kfd_ioctl_get_version_args *args = data; 182 183 args->major_version = KFD_IOCTL_MAJOR_VERSION; 184 args->minor_version = KFD_IOCTL_MINOR_VERSION; 185 186 return 0; 187 } 188 189 static int set_queue_properties_from_user(struct queue_properties *q_properties, 190 struct kfd_ioctl_create_queue_args *args) 191 { 192 /* 193 * Repurpose queue percentage to accommodate new features: 194 * bit 0-7: queue percentage 195 * bit 8-15: pm4_target_xcc 196 */ 197 if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) { 198 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 199 return -EINVAL; 200 } 201 202 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 203 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 204 return -EINVAL; 205 } 206 207 if ((args->ring_base_address) && 208 (!access_ok((const void __user *) args->ring_base_address, 209 sizeof(uint64_t)))) { 210 pr_err("Can't access ring base address\n"); 211 return -EFAULT; 212 } 213 214 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 215 pr_err("Ring size must be a power of 2 or 0\n"); 216 return -EINVAL; 217 } 218 219 if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) { 220 args->ring_size = KFD_MIN_QUEUE_RING_SIZE; 221 pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); 222 } 223 224 if ((args->metadata_ring_size != 0) && !is_power_of_2(args->metadata_ring_size)) { 225 pr_err("Metadata ring size must be a power of 2 or 0\n"); 226 return -EINVAL; 227 } 228 229 if (!access_ok((const void __user *) args->read_pointer_address, 230 sizeof(uint32_t))) { 231 pr_err("Can't access read pointer\n"); 232 return -EFAULT; 233 } 234 235 if (!access_ok((const void __user *) args->write_pointer_address, 236 sizeof(uint32_t))) { 237 pr_err("Can't access write pointer\n"); 238 return -EFAULT; 239 } 240 241 if (args->eop_buffer_address && 242 !access_ok((const void __user *) args->eop_buffer_address, 243 sizeof(uint32_t))) { 244 pr_debug("Can't access eop buffer"); 245 return -EFAULT; 246 } 247 248 if (args->ctx_save_restore_address && 249 !access_ok((const void __user *) args->ctx_save_restore_address, 250 sizeof(uint32_t))) { 251 pr_debug("Can't access ctx save restore buffer"); 252 return -EFAULT; 253 } 254 255 q_properties->is_interop = false; 256 q_properties->is_gws = false; 257 q_properties->queue_percent = args->queue_percentage & 0xFF; 258 /* bit 8-15 are repurposed to be PM4 target XCC */ 259 q_properties->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF; 260 q_properties->priority = args->queue_priority; 261 q_properties->queue_address = args->ring_base_address; 262 q_properties->queue_size = args->ring_size; 263 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 264 q_properties->metadata_queue_size = args->metadata_ring_size; 265 266 q_properties->read_ptr = (void __user *)args->read_pointer_address; 267 q_properties->write_ptr = (void __user *)args->write_pointer_address; 268 q_properties->eop_ring_buffer_address = args->eop_buffer_address; 269 q_properties->eop_ring_buffer_size = args->eop_buffer_size; 270 q_properties->ctx_save_restore_area_address = 271 args->ctx_save_restore_address; 272 q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; 273 q_properties->ctl_stack_size = args->ctl_stack_size; 274 q_properties->sdma_engine_id = args->sdma_engine_id; 275 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || 276 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 277 q_properties->type = KFD_QUEUE_TYPE_COMPUTE; 278 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) 279 q_properties->type = KFD_QUEUE_TYPE_SDMA; 280 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) 281 q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; 282 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID) 283 q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID; 284 else 285 return -ENOTSUPP; 286 287 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 288 q_properties->format = KFD_QUEUE_FORMAT_AQL; 289 else 290 q_properties->format = KFD_QUEUE_FORMAT_PM4; 291 292 pr_debug("Queue Percentage: %d, %d\n", 293 q_properties->queue_percent, args->queue_percentage); 294 295 pr_debug("Queue Priority: %d, %d\n", 296 q_properties->priority, args->queue_priority); 297 298 pr_debug("Queue Address: 0x%llX, 0x%llX\n", 299 q_properties->queue_address, args->ring_base_address); 300 301 pr_debug("Queue Size: 0x%llX, %u\n", 302 q_properties->queue_size, args->ring_size); 303 304 pr_debug("Queue r/w Pointers: %px, %px\n", 305 q_properties->read_ptr, 306 q_properties->write_ptr); 307 308 pr_debug("Queue Format: %d\n", q_properties->format); 309 310 pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); 311 312 pr_debug("Queue CTX save area: 0x%llX\n", 313 q_properties->ctx_save_restore_area_address); 314 315 return 0; 316 } 317 318 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, 319 void *data) 320 { 321 struct kfd_ioctl_create_queue_args *args = data; 322 struct kfd_node *dev; 323 int err = 0; 324 unsigned int queue_id; 325 struct kfd_process_device *pdd; 326 struct queue_properties q_properties; 327 uint32_t doorbell_offset_in_process = 0; 328 329 memset(&q_properties, 0, sizeof(struct queue_properties)); 330 331 pr_debug("Creating queue ioctl\n"); 332 333 err = set_queue_properties_from_user(&q_properties, args); 334 if (err) 335 return err; 336 337 pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); 338 339 mutex_lock(&p->mutex); 340 341 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 342 if (!pdd) { 343 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 344 err = -EINVAL; 345 goto err_pdd; 346 } 347 dev = pdd->dev; 348 349 pdd = kfd_bind_process_to_device(dev, p); 350 if (IS_ERR(pdd)) { 351 err = -ESRCH; 352 goto err_bind_process; 353 } 354 355 if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 356 int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) + 357 kfd_get_num_xgmi_sdma_engines(dev) - 1; 358 359 if (q_properties.sdma_engine_id > max_sdma_eng_id) { 360 err = -EINVAL; 361 pr_err("sdma_engine_id %i exceeds maximum id of %i\n", 362 q_properties.sdma_engine_id, max_sdma_eng_id); 363 goto err_sdma_engine_id; 364 } 365 } 366 367 if (!pdd->qpd.proc_doorbells) { 368 err = kfd_alloc_process_doorbells(dev->kfd, pdd); 369 if (err) { 370 pr_debug("failed to allocate process doorbells\n"); 371 goto err_bind_process; 372 } 373 } 374 375 err = kfd_queue_acquire_buffers(pdd, &q_properties); 376 if (err) { 377 pr_debug("failed to acquire user queue buffers\n"); 378 goto err_acquire_queue_buf; 379 } 380 381 pr_debug("Creating queue for process pid %d on gpu 0x%x\n", 382 p->lead_thread->pid, 383 dev->id); 384 385 err = pqm_create_queue(&p->pqm, dev, &q_properties, &queue_id, 386 NULL, NULL, NULL, &doorbell_offset_in_process); 387 if (err != 0) 388 goto err_create_queue; 389 390 args->queue_id = queue_id; 391 392 393 /* Return gpu_id as doorbell offset for mmap usage */ 394 args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; 395 args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); 396 if (KFD_IS_SOC15(dev)) 397 /* On SOC15 ASICs, include the doorbell offset within the 398 * process doorbell frame, which is 2 pages. 399 */ 400 args->doorbell_offset |= doorbell_offset_in_process; 401 402 mutex_unlock(&p->mutex); 403 404 pr_debug("Queue id %d was created successfully\n", args->queue_id); 405 406 pr_debug("Ring buffer address == 0x%016llX\n", 407 args->ring_base_address); 408 409 pr_debug("Read ptr address == 0x%016llX\n", 410 args->read_pointer_address); 411 412 pr_debug("Write ptr address == 0x%016llX\n", 413 args->write_pointer_address); 414 415 kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_NEW), p, dev, queue_id, false, NULL, 0); 416 return 0; 417 418 err_create_queue: 419 kfd_queue_unref_bo_vas(pdd, &q_properties); 420 kfd_queue_release_buffers(pdd, &q_properties); 421 err_acquire_queue_buf: 422 err_sdma_engine_id: 423 err_bind_process: 424 err_pdd: 425 mutex_unlock(&p->mutex); 426 return err; 427 } 428 429 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, 430 void *data) 431 { 432 int retval; 433 struct kfd_ioctl_destroy_queue_args *args = data; 434 435 pr_debug("Destroying queue id %d for process pid %d\n", 436 args->queue_id, 437 p->lead_thread->pid); 438 439 mutex_lock(&p->mutex); 440 441 retval = pqm_destroy_queue(&p->pqm, args->queue_id); 442 443 mutex_unlock(&p->mutex); 444 return retval; 445 } 446 447 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, 448 void *data) 449 { 450 int retval; 451 struct kfd_ioctl_update_queue_args *args = data; 452 struct queue_properties properties; 453 454 /* 455 * Repurpose queue percentage to accommodate new features: 456 * bit 0-7: queue percentage 457 * bit 8-15: pm4_target_xcc 458 */ 459 if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) { 460 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 461 return -EINVAL; 462 } 463 464 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 465 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 466 return -EINVAL; 467 } 468 469 if ((args->ring_base_address) && 470 (!access_ok((const void __user *) args->ring_base_address, 471 sizeof(uint64_t)))) { 472 pr_err("Can't access ring base address\n"); 473 return -EFAULT; 474 } 475 476 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 477 pr_err("Ring size must be a power of 2 or 0\n"); 478 return -EINVAL; 479 } 480 481 if (args->ring_size < KFD_MIN_QUEUE_RING_SIZE) { 482 args->ring_size = KFD_MIN_QUEUE_RING_SIZE; 483 pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); 484 } 485 486 properties.queue_address = args->ring_base_address; 487 properties.queue_size = args->ring_size; 488 properties.queue_percent = args->queue_percentage & 0xFF; 489 /* bit 8-15 are repurposed to be PM4 target XCC */ 490 properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF; 491 properties.priority = args->queue_priority; 492 493 pr_debug("Updating queue id %d for process pid %d\n", 494 args->queue_id, p->lead_thread->pid); 495 496 mutex_lock(&p->mutex); 497 498 retval = pqm_update_queue_properties(&p->pqm, args->queue_id, &properties); 499 500 mutex_unlock(&p->mutex); 501 502 return retval; 503 } 504 505 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, 506 void *data) 507 { 508 int retval; 509 const int max_num_cus = 1024; 510 struct kfd_ioctl_set_cu_mask_args *args = data; 511 struct mqd_update_info minfo = {0}; 512 uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; 513 size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); 514 515 if ((args->num_cu_mask % 32) != 0) { 516 pr_debug("num_cu_mask 0x%x must be a multiple of 32", 517 args->num_cu_mask); 518 return -EINVAL; 519 } 520 521 minfo.cu_mask.count = args->num_cu_mask; 522 if (minfo.cu_mask.count == 0) { 523 pr_debug("CU mask cannot be 0"); 524 return -EINVAL; 525 } 526 527 /* To prevent an unreasonably large CU mask size, set an arbitrary 528 * limit of max_num_cus bits. We can then just drop any CU mask bits 529 * past max_num_cus bits and just use the first max_num_cus bits. 530 */ 531 if (minfo.cu_mask.count > max_num_cus) { 532 pr_debug("CU mask cannot be greater than 1024 bits"); 533 minfo.cu_mask.count = max_num_cus; 534 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); 535 } 536 537 minfo.cu_mask.ptr = memdup_user(cu_mask_ptr, cu_mask_size); 538 if (IS_ERR(minfo.cu_mask.ptr)) { 539 pr_debug("Could not copy CU mask from userspace"); 540 return PTR_ERR(minfo.cu_mask.ptr); 541 } 542 543 mutex_lock(&p->mutex); 544 545 retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo); 546 547 mutex_unlock(&p->mutex); 548 549 kfree(minfo.cu_mask.ptr); 550 return retval; 551 } 552 553 static int kfd_ioctl_get_queue_wave_state(struct file *filep, 554 struct kfd_process *p, void *data) 555 { 556 struct kfd_ioctl_get_queue_wave_state_args *args = data; 557 int r; 558 559 mutex_lock(&p->mutex); 560 561 r = pqm_get_wave_state(&p->pqm, args->queue_id, 562 (void __user *)args->ctl_stack_address, 563 &args->ctl_stack_used_size, 564 &args->save_area_used_size); 565 566 mutex_unlock(&p->mutex); 567 568 return r; 569 } 570 571 static int kfd_ioctl_set_memory_policy(struct file *filep, 572 struct kfd_process *p, void *data) 573 { 574 struct kfd_ioctl_set_memory_policy_args *args = data; 575 int err = 0; 576 struct kfd_process_device *pdd; 577 enum cache_policy default_policy, alternate_policy; 578 579 if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT 580 && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 581 return -EINVAL; 582 } 583 584 if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT 585 && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 586 return -EINVAL; 587 } 588 589 mutex_lock(&p->mutex); 590 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 591 if (!pdd) { 592 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 593 err = -EINVAL; 594 goto err_pdd; 595 } 596 597 pdd = kfd_bind_process_to_device(pdd->dev, p); 598 if (IS_ERR(pdd)) { 599 err = -ESRCH; 600 goto out; 601 } 602 603 default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) 604 ? cache_policy_coherent : cache_policy_noncoherent; 605 606 alternate_policy = 607 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) 608 ? cache_policy_coherent : cache_policy_noncoherent; 609 610 if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm, 611 &pdd->qpd, 612 default_policy, 613 alternate_policy, 614 (void __user *)args->alternate_aperture_base, 615 args->alternate_aperture_size, 616 args->misc_process_flag)) 617 err = -EINVAL; 618 619 out: 620 err_pdd: 621 mutex_unlock(&p->mutex); 622 623 return err; 624 } 625 626 static int kfd_ioctl_set_trap_handler(struct file *filep, 627 struct kfd_process *p, void *data) 628 { 629 struct kfd_ioctl_set_trap_handler_args *args = data; 630 int err = 0; 631 struct kfd_process_device *pdd; 632 633 mutex_lock(&p->mutex); 634 635 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 636 if (!pdd) { 637 err = -EINVAL; 638 goto err_pdd; 639 } 640 641 pdd = kfd_bind_process_to_device(pdd->dev, p); 642 if (IS_ERR(pdd)) { 643 err = -ESRCH; 644 goto out; 645 } 646 647 kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr); 648 649 out: 650 err_pdd: 651 mutex_unlock(&p->mutex); 652 653 return err; 654 } 655 656 static int kfd_ioctl_dbg_register(struct file *filep, 657 struct kfd_process *p, void *data) 658 { 659 return -EPERM; 660 } 661 662 static int kfd_ioctl_dbg_unregister(struct file *filep, 663 struct kfd_process *p, void *data) 664 { 665 return -EPERM; 666 } 667 668 static int kfd_ioctl_dbg_address_watch(struct file *filep, 669 struct kfd_process *p, void *data) 670 { 671 return -EPERM; 672 } 673 674 /* Parse and generate fixed size data structure for wave control */ 675 static int kfd_ioctl_dbg_wave_control(struct file *filep, 676 struct kfd_process *p, void *data) 677 { 678 return -EPERM; 679 } 680 681 static int kfd_ioctl_get_clock_counters(struct file *filep, 682 struct kfd_process *p, void *data) 683 { 684 struct kfd_ioctl_get_clock_counters_args *args = data; 685 struct kfd_process_device *pdd; 686 687 mutex_lock(&p->mutex); 688 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 689 mutex_unlock(&p->mutex); 690 if (pdd) 691 /* Reading GPU clock counter from KGD */ 692 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev); 693 else 694 /* Node without GPU resource */ 695 args->gpu_clock_counter = 0; 696 697 /* No access to rdtsc. Using raw monotonic time */ 698 args->cpu_clock_counter = ktime_get_raw_ns(); 699 args->system_clock_counter = ktime_get_boottime_ns(); 700 701 /* Since the counter is in nano-seconds we use 1GHz frequency */ 702 args->system_clock_freq = 1000000000; 703 704 return 0; 705 } 706 707 708 static int kfd_ioctl_get_process_apertures(struct file *filp, 709 struct kfd_process *p, void *data) 710 { 711 struct kfd_ioctl_get_process_apertures_args *args = data; 712 struct kfd_process_device_apertures *pAperture; 713 int i; 714 715 dev_dbg(kfd_device, "get apertures for process pid %d", p->lead_thread->pid); 716 717 args->num_of_nodes = 0; 718 719 mutex_lock(&p->mutex); 720 /* Run over all pdd of the process */ 721 for (i = 0; i < p->n_pdds; i++) { 722 struct kfd_process_device *pdd = p->pdds[i]; 723 724 pAperture = 725 &args->process_apertures[args->num_of_nodes]; 726 pAperture->gpu_id = pdd->dev->id; 727 pAperture->lds_base = pdd->lds_base; 728 pAperture->lds_limit = pdd->lds_limit; 729 pAperture->gpuvm_base = pdd->gpuvm_base; 730 pAperture->gpuvm_limit = pdd->gpuvm_limit; 731 pAperture->scratch_base = pdd->scratch_base; 732 pAperture->scratch_limit = pdd->scratch_limit; 733 734 dev_dbg(kfd_device, 735 "node id %u\n", args->num_of_nodes); 736 dev_dbg(kfd_device, 737 "gpu id %u\n", pdd->dev->id); 738 dev_dbg(kfd_device, 739 "lds_base %llX\n", pdd->lds_base); 740 dev_dbg(kfd_device, 741 "lds_limit %llX\n", pdd->lds_limit); 742 dev_dbg(kfd_device, 743 "gpuvm_base %llX\n", pdd->gpuvm_base); 744 dev_dbg(kfd_device, 745 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 746 dev_dbg(kfd_device, 747 "scratch_base %llX\n", pdd->scratch_base); 748 dev_dbg(kfd_device, 749 "scratch_limit %llX\n", pdd->scratch_limit); 750 751 if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS) 752 break; 753 } 754 mutex_unlock(&p->mutex); 755 756 return 0; 757 } 758 759 static int kfd_ioctl_get_process_apertures_new(struct file *filp, 760 struct kfd_process *p, void *data) 761 { 762 struct kfd_ioctl_get_process_apertures_new_args *args = data; 763 struct kfd_process_device_apertures *pa; 764 int ret; 765 int i; 766 767 dev_dbg(kfd_device, "get apertures for process pid %d", 768 p->lead_thread->pid); 769 770 if (args->num_of_nodes == 0) { 771 /* Return number of nodes, so that user space can alloacate 772 * sufficient memory 773 */ 774 mutex_lock(&p->mutex); 775 args->num_of_nodes = p->n_pdds; 776 goto out_unlock; 777 } 778 779 if (args->num_of_nodes > kfd_topology_get_num_devices()) 780 return -EINVAL; 781 782 /* Fill in process-aperture information for all available 783 * nodes, but not more than args->num_of_nodes as that is 784 * the amount of memory allocated by user 785 */ 786 pa = kzalloc_objs(struct kfd_process_device_apertures, 787 args->num_of_nodes); 788 if (!pa) 789 return -ENOMEM; 790 791 mutex_lock(&p->mutex); 792 793 if (!p->n_pdds) { 794 args->num_of_nodes = 0; 795 kfree(pa); 796 goto out_unlock; 797 } 798 799 /* Run over all pdd of the process */ 800 for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) { 801 struct kfd_process_device *pdd = p->pdds[i]; 802 803 pa[i].gpu_id = pdd->dev->id; 804 pa[i].lds_base = pdd->lds_base; 805 pa[i].lds_limit = pdd->lds_limit; 806 pa[i].gpuvm_base = pdd->gpuvm_base; 807 pa[i].gpuvm_limit = pdd->gpuvm_limit; 808 pa[i].scratch_base = pdd->scratch_base; 809 pa[i].scratch_limit = pdd->scratch_limit; 810 811 dev_dbg(kfd_device, 812 "gpu id %u\n", pdd->dev->id); 813 dev_dbg(kfd_device, 814 "lds_base %llX\n", pdd->lds_base); 815 dev_dbg(kfd_device, 816 "lds_limit %llX\n", pdd->lds_limit); 817 dev_dbg(kfd_device, 818 "gpuvm_base %llX\n", pdd->gpuvm_base); 819 dev_dbg(kfd_device, 820 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 821 dev_dbg(kfd_device, 822 "scratch_base %llX\n", pdd->scratch_base); 823 dev_dbg(kfd_device, 824 "scratch_limit %llX\n", pdd->scratch_limit); 825 } 826 mutex_unlock(&p->mutex); 827 828 args->num_of_nodes = i; 829 ret = copy_to_user( 830 (void __user *)args->kfd_process_device_apertures_ptr, 831 pa, 832 (i * sizeof(struct kfd_process_device_apertures))); 833 kfree(pa); 834 return ret ? -EFAULT : 0; 835 836 out_unlock: 837 mutex_unlock(&p->mutex); 838 return 0; 839 } 840 841 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, 842 void *data) 843 { 844 struct kfd_ioctl_create_event_args *args = data; 845 int err; 846 847 /* For dGPUs the event page is allocated in user mode. The 848 * handle is passed to KFD with the first call to this IOCTL 849 * through the event_page_offset field. 850 */ 851 if (args->event_page_offset) { 852 mutex_lock(&p->mutex); 853 err = kfd_kmap_event_page(p, args->event_page_offset); 854 mutex_unlock(&p->mutex); 855 if (err) 856 return err; 857 } 858 859 err = kfd_event_create(filp, p, args->event_type, 860 args->auto_reset != 0, args->node_id, 861 &args->event_id, &args->event_trigger_data, 862 &args->event_page_offset, 863 &args->event_slot_index); 864 865 pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__); 866 return err; 867 } 868 869 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, 870 void *data) 871 { 872 struct kfd_ioctl_destroy_event_args *args = data; 873 874 return kfd_event_destroy(p, args->event_id); 875 } 876 877 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, 878 void *data) 879 { 880 struct kfd_ioctl_set_event_args *args = data; 881 882 return kfd_set_event(p, args->event_id); 883 } 884 885 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, 886 void *data) 887 { 888 struct kfd_ioctl_reset_event_args *args = data; 889 890 return kfd_reset_event(p, args->event_id); 891 } 892 893 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, 894 void *data) 895 { 896 struct kfd_ioctl_wait_events_args *args = data; 897 898 return kfd_wait_on_events(p, args->num_events, 899 (void __user *)args->events_ptr, 900 (args->wait_for_all != 0), 901 &args->timeout, &args->wait_result); 902 } 903 static int kfd_ioctl_set_scratch_backing_va(struct file *filep, 904 struct kfd_process *p, void *data) 905 { 906 struct kfd_ioctl_set_scratch_backing_va_args *args = data; 907 struct kfd_process_device *pdd; 908 struct kfd_node *dev; 909 long err; 910 911 mutex_lock(&p->mutex); 912 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 913 if (!pdd) { 914 err = -EINVAL; 915 goto err_pdd; 916 } 917 dev = pdd->dev; 918 919 pdd = kfd_bind_process_to_device(dev, p); 920 if (IS_ERR(pdd)) { 921 err = PTR_ERR(pdd); 922 goto bind_process_to_device_fail; 923 } 924 925 pdd->qpd.sh_hidden_private_base = args->va_addr; 926 927 mutex_unlock(&p->mutex); 928 929 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && 930 pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va) 931 dev->kfd2kgd->set_scratch_backing_va( 932 dev->adev, args->va_addr, pdd->qpd.vmid); 933 934 return 0; 935 936 bind_process_to_device_fail: 937 err_pdd: 938 mutex_unlock(&p->mutex); 939 return err; 940 } 941 942 static int kfd_ioctl_get_tile_config(struct file *filep, 943 struct kfd_process *p, void *data) 944 { 945 struct kfd_ioctl_get_tile_config_args *args = data; 946 struct kfd_process_device *pdd; 947 struct tile_config config; 948 int err = 0; 949 950 mutex_lock(&p->mutex); 951 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 952 mutex_unlock(&p->mutex); 953 if (!pdd) 954 return -EINVAL; 955 956 amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config); 957 958 args->gb_addr_config = config.gb_addr_config; 959 args->num_banks = config.num_banks; 960 args->num_ranks = config.num_ranks; 961 962 if (args->num_tile_configs > config.num_tile_configs) 963 args->num_tile_configs = config.num_tile_configs; 964 err = copy_to_user((void __user *)args->tile_config_ptr, 965 config.tile_config_ptr, 966 args->num_tile_configs * sizeof(uint32_t)); 967 if (err) { 968 args->num_tile_configs = 0; 969 return -EFAULT; 970 } 971 972 if (args->num_macro_tile_configs > config.num_macro_tile_configs) 973 args->num_macro_tile_configs = 974 config.num_macro_tile_configs; 975 err = copy_to_user((void __user *)args->macro_tile_config_ptr, 976 config.macro_tile_config_ptr, 977 args->num_macro_tile_configs * sizeof(uint32_t)); 978 if (err) { 979 args->num_macro_tile_configs = 0; 980 return -EFAULT; 981 } 982 983 return 0; 984 } 985 986 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, 987 void *data) 988 { 989 struct kfd_ioctl_acquire_vm_args *args = data; 990 struct kfd_process_device *pdd; 991 struct file *drm_file; 992 int ret; 993 994 drm_file = fget(args->drm_fd); 995 if (!drm_file) 996 return -EINVAL; 997 998 mutex_lock(&p->mutex); 999 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 1000 if (!pdd) { 1001 ret = -EINVAL; 1002 goto err_pdd; 1003 } 1004 1005 if (pdd->drm_file) { 1006 ret = pdd->drm_file == drm_file ? 0 : -EBUSY; 1007 goto err_drm_file; 1008 } 1009 1010 ret = kfd_process_device_init_vm(pdd, drm_file); 1011 if (ret) 1012 goto err_unlock; 1013 1014 /* On success, the PDD keeps the drm_file reference */ 1015 mutex_unlock(&p->mutex); 1016 1017 return 0; 1018 1019 err_unlock: 1020 err_pdd: 1021 err_drm_file: 1022 mutex_unlock(&p->mutex); 1023 fput(drm_file); 1024 return ret; 1025 } 1026 1027 bool kfd_dev_is_large_bar(struct kfd_node *dev) 1028 { 1029 if (dev->kfd->adev->debug_largebar) { 1030 pr_debug("Simulate large-bar allocation on non large-bar machine\n"); 1031 return true; 1032 } 1033 1034 if (dev->local_mem_info.local_mem_size_private == 0 && 1035 dev->local_mem_info.local_mem_size_public > 0) 1036 return true; 1037 1038 if (dev->local_mem_info.local_mem_size_public == 0 && 1039 dev->kfd->adev->gmc.is_app_apu) { 1040 pr_debug("APP APU, Consider like a large bar system\n"); 1041 return true; 1042 } 1043 1044 return false; 1045 } 1046 1047 static int kfd_ioctl_get_available_memory(struct file *filep, 1048 struct kfd_process *p, void *data) 1049 { 1050 struct kfd_ioctl_get_available_memory_args *args = data; 1051 struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id); 1052 1053 if (!pdd) 1054 return -EINVAL; 1055 args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev, 1056 pdd->dev->node_id); 1057 kfd_unlock_pdd(pdd); 1058 return 0; 1059 } 1060 1061 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, 1062 struct kfd_process *p, void *data) 1063 { 1064 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; 1065 struct kfd_process_device *pdd; 1066 void *mem; 1067 struct kfd_node *dev; 1068 int idr_handle; 1069 long err; 1070 uint64_t offset = args->mmap_offset; 1071 uint32_t flags = args->flags; 1072 1073 if (args->size == 0) 1074 return -EINVAL; 1075 1076 if (p->context_id != KFD_CONTEXT_ID_PRIMARY && (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) { 1077 pr_debug("USERPTR is not supported on non-primary kfd_process\n"); 1078 1079 return -EOPNOTSUPP; 1080 } 1081 1082 #if IS_ENABLED(CONFIG_HSA_AMD_SVM) 1083 /* Flush pending deferred work to avoid racing with deferred actions 1084 * from previous memory map changes (e.g. munmap). 1085 */ 1086 svm_range_list_lock_and_flush_work(&p->svms, current->mm); 1087 mutex_lock(&p->svms.lock); 1088 mmap_write_unlock(current->mm); 1089 1090 /* Skip a special case that allocates VRAM without VA, 1091 * VA will be invalid of 0. 1092 */ 1093 if (!(!args->va_addr && (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) && 1094 interval_tree_iter_first(&p->svms.objects, 1095 args->va_addr >> PAGE_SHIFT, 1096 (args->va_addr + args->size - 1) >> PAGE_SHIFT)) { 1097 pr_err("Address: 0x%llx already allocated by SVM\n", 1098 args->va_addr); 1099 mutex_unlock(&p->svms.lock); 1100 return -EADDRINUSE; 1101 } 1102 1103 /* When register user buffer check if it has been registered by svm by 1104 * buffer cpu virtual address. 1105 */ 1106 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) && 1107 interval_tree_iter_first(&p->svms.objects, 1108 args->mmap_offset >> PAGE_SHIFT, 1109 (args->mmap_offset + args->size - 1) >> PAGE_SHIFT)) { 1110 pr_err("User Buffer Address: 0x%llx already allocated by SVM\n", 1111 args->mmap_offset); 1112 mutex_unlock(&p->svms.lock); 1113 return -EADDRINUSE; 1114 } 1115 1116 mutex_unlock(&p->svms.lock); 1117 #endif 1118 mutex_lock(&p->mutex); 1119 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 1120 if (!pdd) { 1121 err = -EINVAL; 1122 goto err_pdd; 1123 } 1124 1125 dev = pdd->dev; 1126 1127 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && 1128 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && 1129 !kfd_dev_is_large_bar(dev)) { 1130 pr_err("Alloc host visible vram on small bar is not allowed\n"); 1131 err = -EINVAL; 1132 goto err_large_bar; 1133 } 1134 1135 pdd = kfd_bind_process_to_device(dev, p); 1136 if (IS_ERR(pdd)) { 1137 err = PTR_ERR(pdd); 1138 goto err_unlock; 1139 } 1140 1141 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { 1142 if (args->size != kfd_doorbell_process_slice(dev->kfd)) { 1143 err = -EINVAL; 1144 goto err_unlock; 1145 } 1146 offset = kfd_get_process_doorbells(pdd); 1147 if (!offset) { 1148 err = -ENOMEM; 1149 goto err_unlock; 1150 } 1151 } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { 1152 if (args->size != PAGE_SIZE) { 1153 err = -EINVAL; 1154 goto err_unlock; 1155 } 1156 offset = dev->adev->rmmio_remap.bus_addr; 1157 if (!offset || (PAGE_SIZE > 4096)) { 1158 err = -ENOMEM; 1159 goto err_unlock; 1160 } 1161 } 1162 1163 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 1164 dev->adev, args->va_addr, args->size, 1165 pdd->drm_priv, (struct kgd_mem **) &mem, &offset, 1166 flags, false); 1167 1168 if (err) 1169 goto err_unlock; 1170 1171 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1172 if (idr_handle < 0) { 1173 err = -EFAULT; 1174 goto err_free; 1175 } 1176 1177 /* Update the VRAM usage count */ 1178 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 1179 uint64_t size = args->size; 1180 1181 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) 1182 size >>= 1; 1183 atomic64_add(PAGE_ALIGN(size), &pdd->vram_usage); 1184 } 1185 1186 mutex_unlock(&p->mutex); 1187 1188 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1189 args->mmap_offset = offset; 1190 1191 /* MMIO is mapped through kfd device 1192 * Generate a kfd mmap offset 1193 */ 1194 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) 1195 args->mmap_offset = KFD_MMAP_TYPE_MMIO 1196 | KFD_MMAP_GPU_ID(args->gpu_id); 1197 1198 return 0; 1199 1200 err_free: 1201 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem, 1202 pdd->drm_priv, NULL); 1203 err_unlock: 1204 err_pdd: 1205 err_large_bar: 1206 mutex_unlock(&p->mutex); 1207 return err; 1208 } 1209 1210 static int kfd_ioctl_free_memory_of_gpu(struct file *filep, 1211 struct kfd_process *p, void *data) 1212 { 1213 struct kfd_ioctl_free_memory_of_gpu_args *args = data; 1214 struct kfd_process_device *pdd; 1215 void *mem; 1216 int ret; 1217 uint64_t size = 0; 1218 1219 mutex_lock(&p->mutex); 1220 /* 1221 * Safeguard to prevent user space from freeing signal BO. 1222 * It will be freed at process termination. 1223 */ 1224 if (p->signal_handle && (p->signal_handle == args->handle)) { 1225 pr_err("Free signal BO is not allowed\n"); 1226 ret = -EPERM; 1227 goto err_unlock; 1228 } 1229 1230 pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle)); 1231 if (!pdd) { 1232 pr_err("Process device data doesn't exist\n"); 1233 ret = -EINVAL; 1234 goto err_pdd; 1235 } 1236 1237 mem = kfd_process_device_translate_handle( 1238 pdd, GET_IDR_HANDLE(args->handle)); 1239 if (!mem) { 1240 ret = -EINVAL; 1241 goto err_unlock; 1242 } 1243 1244 ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, 1245 (struct kgd_mem *)mem, pdd->drm_priv, &size); 1246 1247 /* If freeing the buffer failed, leave the handle in place for 1248 * clean-up during process tear-down. 1249 */ 1250 if (!ret) 1251 kfd_process_device_remove_obj_handle( 1252 pdd, GET_IDR_HANDLE(args->handle)); 1253 1254 atomic64_sub(size, &pdd->vram_usage); 1255 1256 err_unlock: 1257 err_pdd: 1258 mutex_unlock(&p->mutex); 1259 return ret; 1260 } 1261 1262 static int kfd_ioctl_map_memory_to_gpu(struct file *filep, 1263 struct kfd_process *p, void *data) 1264 { 1265 struct kfd_ioctl_map_memory_to_gpu_args *args = data; 1266 struct kfd_process_device *pdd, *peer_pdd; 1267 void *mem; 1268 struct kfd_node *dev; 1269 long err = 0; 1270 int i; 1271 uint32_t *devices_arr = NULL; 1272 1273 if (!args->n_devices) { 1274 pr_debug("Device IDs array empty\n"); 1275 return -EINVAL; 1276 } 1277 if (args->n_success > args->n_devices) { 1278 pr_debug("n_success exceeds n_devices\n"); 1279 return -EINVAL; 1280 } 1281 1282 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1283 GFP_KERNEL); 1284 if (!devices_arr) 1285 return -ENOMEM; 1286 1287 err = copy_from_user(devices_arr, 1288 (void __user *)args->device_ids_array_ptr, 1289 args->n_devices * sizeof(*devices_arr)); 1290 if (err != 0) { 1291 err = -EFAULT; 1292 goto copy_from_user_failed; 1293 } 1294 1295 mutex_lock(&p->mutex); 1296 pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle)); 1297 if (!pdd) { 1298 err = -EINVAL; 1299 goto get_process_device_data_failed; 1300 } 1301 dev = pdd->dev; 1302 1303 pdd = kfd_bind_process_to_device(dev, p); 1304 if (IS_ERR(pdd)) { 1305 err = PTR_ERR(pdd); 1306 goto bind_process_to_device_failed; 1307 } 1308 1309 mem = kfd_process_device_translate_handle(pdd, 1310 GET_IDR_HANDLE(args->handle)); 1311 if (!mem) { 1312 err = -ENOMEM; 1313 goto get_mem_obj_from_handle_failed; 1314 } 1315 1316 for (i = args->n_success; i < args->n_devices; i++) { 1317 peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); 1318 if (!peer_pdd) { 1319 pr_debug("Getting device by id failed for 0x%x\n", 1320 devices_arr[i]); 1321 err = -EINVAL; 1322 goto get_mem_obj_from_handle_failed; 1323 } 1324 1325 peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p); 1326 if (IS_ERR(peer_pdd)) { 1327 err = PTR_ERR(peer_pdd); 1328 goto get_mem_obj_from_handle_failed; 1329 } 1330 1331 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( 1332 peer_pdd->dev->adev, (struct kgd_mem *)mem, 1333 peer_pdd->drm_priv); 1334 if (err) { 1335 struct pci_dev *pdev = peer_pdd->dev->adev->pdev; 1336 1337 dev_err(dev->adev->dev, 1338 "Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n", 1339 pci_domain_nr(pdev->bus), 1340 pdev->bus->number, 1341 PCI_SLOT(pdev->devfn), 1342 PCI_FUNC(pdev->devfn), 1343 ((struct kgd_mem *)mem)->domain); 1344 goto map_memory_to_gpu_failed; 1345 } 1346 args->n_success = i+1; 1347 } 1348 1349 err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); 1350 if (err) { 1351 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1352 goto sync_memory_failed; 1353 } 1354 1355 mutex_unlock(&p->mutex); 1356 1357 /* Flush TLBs after waiting for the page table updates to complete */ 1358 for (i = 0; i < args->n_devices; i++) { 1359 peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); 1360 if (WARN_ON_ONCE(!peer_pdd)) 1361 continue; 1362 kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); 1363 } 1364 kfree(devices_arr); 1365 1366 return err; 1367 1368 get_process_device_data_failed: 1369 bind_process_to_device_failed: 1370 get_mem_obj_from_handle_failed: 1371 map_memory_to_gpu_failed: 1372 sync_memory_failed: 1373 mutex_unlock(&p->mutex); 1374 copy_from_user_failed: 1375 kfree(devices_arr); 1376 1377 return err; 1378 } 1379 1380 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, 1381 struct kfd_process *p, void *data) 1382 { 1383 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; 1384 struct kfd_process_device *pdd, *peer_pdd; 1385 void *mem; 1386 long err = 0; 1387 uint32_t *devices_arr = NULL, i; 1388 bool flush_tlb; 1389 1390 if (!args->n_devices) { 1391 pr_debug("Device IDs array empty\n"); 1392 return -EINVAL; 1393 } 1394 if (args->n_success > args->n_devices) { 1395 pr_debug("n_success exceeds n_devices\n"); 1396 return -EINVAL; 1397 } 1398 1399 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1400 GFP_KERNEL); 1401 if (!devices_arr) 1402 return -ENOMEM; 1403 1404 err = copy_from_user(devices_arr, 1405 (void __user *)args->device_ids_array_ptr, 1406 args->n_devices * sizeof(*devices_arr)); 1407 if (err != 0) { 1408 err = -EFAULT; 1409 goto copy_from_user_failed; 1410 } 1411 1412 mutex_lock(&p->mutex); 1413 pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle)); 1414 if (!pdd) { 1415 err = -EINVAL; 1416 goto bind_process_to_device_failed; 1417 } 1418 1419 mem = kfd_process_device_translate_handle(pdd, 1420 GET_IDR_HANDLE(args->handle)); 1421 if (!mem) { 1422 err = -ENOMEM; 1423 goto get_mem_obj_from_handle_failed; 1424 } 1425 1426 for (i = args->n_success; i < args->n_devices; i++) { 1427 peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); 1428 if (!peer_pdd) { 1429 err = -EINVAL; 1430 goto get_mem_obj_from_handle_failed; 1431 } 1432 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 1433 peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv); 1434 if (err) { 1435 pr_debug("Failed to unmap from gpu %d/%d\n", i, args->n_devices); 1436 goto unmap_memory_from_gpu_failed; 1437 } 1438 args->n_success = i+1; 1439 } 1440 1441 flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd); 1442 if (flush_tlb) { 1443 err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev, 1444 (struct kgd_mem *) mem, true); 1445 if (err) { 1446 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1447 goto sync_memory_failed; 1448 } 1449 } 1450 1451 /* Flush TLBs after waiting for the page table updates to complete */ 1452 for (i = 0; i < args->n_devices; i++) { 1453 peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); 1454 if (WARN_ON_ONCE(!peer_pdd)) 1455 continue; 1456 if (flush_tlb) 1457 kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT); 1458 1459 /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */ 1460 err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv); 1461 if (err) 1462 goto sync_memory_failed; 1463 } 1464 1465 mutex_unlock(&p->mutex); 1466 1467 kfree(devices_arr); 1468 1469 return 0; 1470 1471 bind_process_to_device_failed: 1472 get_mem_obj_from_handle_failed: 1473 unmap_memory_from_gpu_failed: 1474 sync_memory_failed: 1475 mutex_unlock(&p->mutex); 1476 copy_from_user_failed: 1477 kfree(devices_arr); 1478 return err; 1479 } 1480 1481 static int kfd_ioctl_alloc_queue_gws(struct file *filep, 1482 struct kfd_process *p, void *data) 1483 { 1484 int retval; 1485 struct kfd_ioctl_alloc_queue_gws_args *args = data; 1486 struct queue *q; 1487 struct kfd_node *dev; 1488 1489 mutex_lock(&p->mutex); 1490 q = pqm_get_user_queue(&p->pqm, args->queue_id); 1491 1492 if (q) { 1493 dev = q->device; 1494 } else { 1495 retval = -EINVAL; 1496 goto out_unlock; 1497 } 1498 1499 if (!dev->gws) { 1500 retval = -ENODEV; 1501 goto out_unlock; 1502 } 1503 1504 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 1505 retval = -ENODEV; 1506 goto out_unlock; 1507 } 1508 1509 if (p->debug_trap_enabled && (!kfd_dbg_has_gws_support(dev) || 1510 kfd_dbg_has_cwsr_workaround(dev))) { 1511 retval = -EBUSY; 1512 goto out_unlock; 1513 } 1514 1515 retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); 1516 mutex_unlock(&p->mutex); 1517 1518 args->first_gws = 0; 1519 return retval; 1520 1521 out_unlock: 1522 mutex_unlock(&p->mutex); 1523 return retval; 1524 } 1525 1526 static int kfd_ioctl_get_dmabuf_info(struct file *filep, 1527 struct kfd_process *p, void *data) 1528 { 1529 struct kfd_ioctl_get_dmabuf_info_args *args = data; 1530 struct kfd_node *dev = NULL; 1531 struct amdgpu_device *dmabuf_adev; 1532 void *metadata_buffer = NULL; 1533 uint32_t flags; 1534 int8_t xcp_id; 1535 unsigned int i; 1536 int r; 1537 1538 /* Find a KFD GPU device that supports the get_dmabuf_info query */ 1539 for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) 1540 if (dev && !kfd_devcgroup_check_permission(dev)) 1541 break; 1542 if (!dev) 1543 return -EINVAL; 1544 1545 if (args->metadata_ptr) { 1546 metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL); 1547 if (!metadata_buffer) 1548 return -ENOMEM; 1549 } 1550 1551 /* Get dmabuf info from KGD */ 1552 r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd, 1553 &dmabuf_adev, &args->size, 1554 metadata_buffer, args->metadata_size, 1555 &args->metadata_size, &flags, &xcp_id); 1556 if (r) 1557 goto exit; 1558 1559 if (xcp_id >= 0) 1560 args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id; 1561 else 1562 args->gpu_id = dev->id; 1563 args->flags = flags; 1564 1565 /* Copy metadata buffer to user mode */ 1566 if (metadata_buffer) { 1567 r = copy_to_user((void __user *)args->metadata_ptr, 1568 metadata_buffer, args->metadata_size); 1569 if (r != 0) 1570 r = -EFAULT; 1571 } 1572 1573 exit: 1574 kfree(metadata_buffer); 1575 1576 return r; 1577 } 1578 1579 static int kfd_ioctl_import_dmabuf(struct file *filep, 1580 struct kfd_process *p, void *data) 1581 { 1582 struct kfd_ioctl_import_dmabuf_args *args = data; 1583 struct kfd_process_device *pdd; 1584 int idr_handle; 1585 uint64_t size; 1586 void *mem; 1587 int r; 1588 1589 mutex_lock(&p->mutex); 1590 pdd = kfd_process_device_data_by_id(p, args->gpu_id); 1591 if (!pdd) { 1592 r = -EINVAL; 1593 goto err_unlock; 1594 } 1595 1596 pdd = kfd_bind_process_to_device(pdd->dev, p); 1597 if (IS_ERR(pdd)) { 1598 r = PTR_ERR(pdd); 1599 goto err_unlock; 1600 } 1601 1602 r = amdgpu_amdkfd_gpuvm_import_dmabuf_fd(pdd->dev->adev, args->dmabuf_fd, 1603 args->va_addr, pdd->drm_priv, 1604 (struct kgd_mem **)&mem, &size, 1605 NULL); 1606 if (r) 1607 goto err_unlock; 1608 1609 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1610 if (idr_handle < 0) { 1611 r = -EFAULT; 1612 goto err_free; 1613 } 1614 1615 mutex_unlock(&p->mutex); 1616 1617 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1618 1619 return 0; 1620 1621 err_free: 1622 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem, 1623 pdd->drm_priv, NULL); 1624 err_unlock: 1625 mutex_unlock(&p->mutex); 1626 return r; 1627 } 1628 1629 static int kfd_ioctl_export_dmabuf(struct file *filep, 1630 struct kfd_process *p, void *data) 1631 { 1632 struct kfd_ioctl_export_dmabuf_args *args = data; 1633 struct kfd_process_device *pdd; 1634 struct dma_buf *dmabuf; 1635 struct kfd_node *dev; 1636 void *mem; 1637 int ret = 0; 1638 1639 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1640 if (!dev) 1641 return -EINVAL; 1642 1643 mutex_lock(&p->mutex); 1644 1645 pdd = kfd_get_process_device_data(dev, p); 1646 if (!pdd) { 1647 ret = -EINVAL; 1648 goto err_unlock; 1649 } 1650 1651 mem = kfd_process_device_translate_handle(pdd, 1652 GET_IDR_HANDLE(args->handle)); 1653 if (!mem) { 1654 ret = -EINVAL; 1655 goto err_unlock; 1656 } 1657 1658 ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf); 1659 mutex_unlock(&p->mutex); 1660 if (ret) 1661 goto err_out; 1662 1663 ret = dma_buf_fd(dmabuf, args->flags); 1664 if (ret < 0) { 1665 dma_buf_put(dmabuf); 1666 goto err_out; 1667 } 1668 /* dma_buf_fd assigns the reference count to the fd, no need to 1669 * put the reference here. 1670 */ 1671 args->dmabuf_fd = ret; 1672 1673 return 0; 1674 1675 err_unlock: 1676 mutex_unlock(&p->mutex); 1677 err_out: 1678 return ret; 1679 } 1680 1681 /* Handle requests for watching SMI events */ 1682 static int kfd_ioctl_smi_events(struct file *filep, 1683 struct kfd_process *p, void *data) 1684 { 1685 struct kfd_ioctl_smi_events_args *args = data; 1686 struct kfd_process_device *pdd; 1687 1688 mutex_lock(&p->mutex); 1689 1690 pdd = kfd_process_device_data_by_id(p, args->gpuid); 1691 mutex_unlock(&p->mutex); 1692 if (!pdd) 1693 return -EINVAL; 1694 1695 return kfd_smi_event_open(pdd->dev, &args->anon_fd); 1696 } 1697 1698 #if IS_ENABLED(CONFIG_HSA_AMD_SVM) 1699 1700 static int kfd_ioctl_set_xnack_mode(struct file *filep, 1701 struct kfd_process *p, void *data) 1702 { 1703 struct kfd_ioctl_set_xnack_mode_args *args = data; 1704 int r = 0; 1705 1706 mutex_lock(&p->mutex); 1707 if (args->xnack_enabled >= 0) { 1708 if (!list_empty(&p->pqm.queues)) { 1709 pr_debug("Process has user queues running\n"); 1710 r = -EBUSY; 1711 goto out_unlock; 1712 } 1713 1714 if (p->xnack_enabled == args->xnack_enabled) 1715 goto out_unlock; 1716 1717 if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) { 1718 r = -EPERM; 1719 goto out_unlock; 1720 } 1721 1722 r = svm_range_switch_xnack_reserve_mem(p, args->xnack_enabled); 1723 } else { 1724 args->xnack_enabled = p->xnack_enabled; 1725 } 1726 1727 out_unlock: 1728 mutex_unlock(&p->mutex); 1729 1730 return r; 1731 } 1732 1733 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) 1734 { 1735 struct kfd_ioctl_svm_args *args = data; 1736 int r = 0; 1737 1738 if (p->context_id != KFD_CONTEXT_ID_PRIMARY) { 1739 pr_debug("SVM ioctl not supported on non-primary kfd process\n"); 1740 1741 return -EOPNOTSUPP; 1742 } 1743 1744 pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n", 1745 args->start_addr, args->size, args->op, args->nattr); 1746 1747 if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK)) 1748 return -EINVAL; 1749 if (!args->start_addr || !args->size) 1750 return -EINVAL; 1751 1752 r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr, 1753 args->attrs); 1754 1755 return r; 1756 } 1757 #else 1758 static int kfd_ioctl_set_xnack_mode(struct file *filep, 1759 struct kfd_process *p, void *data) 1760 { 1761 return -EPERM; 1762 } 1763 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) 1764 { 1765 return -EPERM; 1766 } 1767 #endif 1768 1769 static int criu_checkpoint_process(struct kfd_process *p, 1770 uint8_t __user *user_priv_data, 1771 uint64_t *priv_offset) 1772 { 1773 struct kfd_criu_process_priv_data process_priv; 1774 int ret; 1775 1776 memset(&process_priv, 0, sizeof(process_priv)); 1777 1778 process_priv.version = KFD_CRIU_PRIV_VERSION; 1779 /* For CR, we don't consider negative xnack mode which is used for 1780 * querying without changing it, here 0 simply means disabled and 1 1781 * means enabled so retry for finding a valid PTE. 1782 */ 1783 process_priv.xnack_mode = p->xnack_enabled ? 1 : 0; 1784 1785 ret = copy_to_user(user_priv_data + *priv_offset, 1786 &process_priv, sizeof(process_priv)); 1787 1788 if (ret) { 1789 pr_err("Failed to copy process information to user\n"); 1790 ret = -EFAULT; 1791 } 1792 1793 *priv_offset += sizeof(process_priv); 1794 return ret; 1795 } 1796 1797 static int criu_checkpoint_devices(struct kfd_process *p, 1798 uint32_t num_devices, 1799 uint8_t __user *user_addr, 1800 uint8_t __user *user_priv_data, 1801 uint64_t *priv_offset) 1802 { 1803 struct kfd_criu_device_priv_data *device_priv = NULL; 1804 struct kfd_criu_device_bucket *device_buckets = NULL; 1805 int ret = 0, i; 1806 1807 device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL); 1808 if (!device_buckets) { 1809 ret = -ENOMEM; 1810 goto exit; 1811 } 1812 1813 device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL); 1814 if (!device_priv) { 1815 ret = -ENOMEM; 1816 goto exit; 1817 } 1818 1819 for (i = 0; i < num_devices; i++) { 1820 struct kfd_process_device *pdd = p->pdds[i]; 1821 1822 device_buckets[i].user_gpu_id = pdd->user_gpu_id; 1823 device_buckets[i].actual_gpu_id = pdd->dev->id; 1824 1825 /* 1826 * priv_data does not contain useful information for now and is reserved for 1827 * future use, so we do not set its contents. 1828 */ 1829 } 1830 1831 ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets)); 1832 if (ret) { 1833 pr_err("Failed to copy device information to user\n"); 1834 ret = -EFAULT; 1835 goto exit; 1836 } 1837 1838 ret = copy_to_user(user_priv_data + *priv_offset, 1839 device_priv, 1840 num_devices * sizeof(*device_priv)); 1841 if (ret) { 1842 pr_err("Failed to copy device information to user\n"); 1843 ret = -EFAULT; 1844 } 1845 *priv_offset += num_devices * sizeof(*device_priv); 1846 1847 exit: 1848 kvfree(device_buckets); 1849 kvfree(device_priv); 1850 return ret; 1851 } 1852 1853 static uint32_t get_process_num_bos(struct kfd_process *p) 1854 { 1855 uint32_t num_of_bos = 0; 1856 int i; 1857 1858 /* Run over all PDDs of the process */ 1859 for (i = 0; i < p->n_pdds; i++) { 1860 struct kfd_process_device *pdd = p->pdds[i]; 1861 void *mem; 1862 int id; 1863 1864 idr_for_each_entry(&pdd->alloc_idr, mem, id) { 1865 struct kgd_mem *kgd_mem = (struct kgd_mem *)mem; 1866 1867 if (!kgd_mem->va || kgd_mem->va > pdd->gpuvm_base) 1868 num_of_bos++; 1869 } 1870 } 1871 return num_of_bos; 1872 } 1873 1874 static int criu_get_prime_handle(struct kgd_mem *mem, 1875 int flags, u32 *shared_fd, 1876 struct file **file) 1877 { 1878 struct dma_buf *dmabuf; 1879 int ret; 1880 1881 ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf); 1882 if (ret) { 1883 pr_err("dmabuf export failed for the BO\n"); 1884 return ret; 1885 } 1886 1887 ret = get_unused_fd_flags(flags); 1888 if (ret < 0) { 1889 pr_err("dmabuf create fd failed, ret:%d\n", ret); 1890 goto out_free_dmabuf; 1891 } 1892 1893 *shared_fd = ret; 1894 *file = dmabuf->file; 1895 return 0; 1896 1897 out_free_dmabuf: 1898 dma_buf_put(dmabuf); 1899 return ret; 1900 } 1901 1902 static void commit_files(struct file **files, 1903 struct kfd_criu_bo_bucket *bo_buckets, 1904 unsigned int count, 1905 int err) 1906 { 1907 while (count--) { 1908 struct file *file = files[count]; 1909 1910 if (!file) 1911 continue; 1912 if (err) { 1913 fput(file); 1914 put_unused_fd(bo_buckets[count].dmabuf_fd); 1915 } else { 1916 fd_install(bo_buckets[count].dmabuf_fd, file); 1917 } 1918 } 1919 } 1920 1921 static int criu_checkpoint_bos(struct kfd_process *p, 1922 uint32_t num_bos, 1923 uint8_t __user *user_bos, 1924 uint8_t __user *user_priv_data, 1925 uint64_t *priv_offset) 1926 { 1927 struct kfd_criu_bo_bucket *bo_buckets; 1928 struct kfd_criu_bo_priv_data *bo_privs; 1929 struct file **files = NULL; 1930 int ret = 0, pdd_index, bo_index = 0, id; 1931 void *mem; 1932 1933 bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL); 1934 if (!bo_buckets) 1935 return -ENOMEM; 1936 1937 bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL); 1938 if (!bo_privs) { 1939 ret = -ENOMEM; 1940 goto exit; 1941 } 1942 1943 files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL); 1944 if (!files) { 1945 ret = -ENOMEM; 1946 goto exit; 1947 } 1948 1949 for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) { 1950 struct kfd_process_device *pdd = p->pdds[pdd_index]; 1951 struct amdgpu_bo *dumper_bo; 1952 struct kgd_mem *kgd_mem; 1953 1954 idr_for_each_entry(&pdd->alloc_idr, mem, id) { 1955 struct kfd_criu_bo_bucket *bo_bucket; 1956 struct kfd_criu_bo_priv_data *bo_priv; 1957 int i, dev_idx = 0; 1958 1959 kgd_mem = (struct kgd_mem *)mem; 1960 dumper_bo = kgd_mem->bo; 1961 1962 /* Skip checkpointing BOs that are used for Trap handler 1963 * code and state. Currently, these BOs have a VA that 1964 * is less GPUVM Base 1965 */ 1966 if (kgd_mem->va && kgd_mem->va <= pdd->gpuvm_base) 1967 continue; 1968 1969 bo_bucket = &bo_buckets[bo_index]; 1970 bo_priv = &bo_privs[bo_index]; 1971 1972 bo_bucket->gpu_id = pdd->user_gpu_id; 1973 bo_bucket->addr = (uint64_t)kgd_mem->va; 1974 bo_bucket->size = amdgpu_bo_size(dumper_bo); 1975 bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags; 1976 bo_priv->idr_handle = id; 1977 1978 if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { 1979 ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo, 1980 &bo_priv->user_addr); 1981 if (ret) { 1982 pr_err("Failed to obtain user address for user-pointer bo\n"); 1983 goto exit; 1984 } 1985 } 1986 if (bo_bucket->alloc_flags 1987 & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { 1988 ret = criu_get_prime_handle(kgd_mem, 1989 bo_bucket->alloc_flags & 1990 KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, 1991 &bo_bucket->dmabuf_fd, &files[bo_index]); 1992 if (ret) 1993 goto exit; 1994 } else { 1995 bo_bucket->dmabuf_fd = KFD_INVALID_FD; 1996 } 1997 1998 if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) 1999 bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL | 2000 KFD_MMAP_GPU_ID(pdd->dev->id); 2001 else if (bo_bucket->alloc_flags & 2002 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) 2003 bo_bucket->offset = KFD_MMAP_TYPE_MMIO | 2004 KFD_MMAP_GPU_ID(pdd->dev->id); 2005 else 2006 bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo); 2007 2008 for (i = 0; i < p->n_pdds; i++) { 2009 if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->drm_priv, kgd_mem)) 2010 bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id; 2011 } 2012 2013 pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n" 2014 "gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x", 2015 bo_bucket->size, 2016 bo_bucket->addr, 2017 bo_bucket->offset, 2018 bo_bucket->gpu_id, 2019 bo_bucket->alloc_flags, 2020 bo_priv->idr_handle); 2021 bo_index++; 2022 } 2023 } 2024 2025 ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets)); 2026 if (ret) { 2027 pr_err("Failed to copy BO information to user\n"); 2028 ret = -EFAULT; 2029 goto exit; 2030 } 2031 2032 ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs)); 2033 if (ret) { 2034 pr_err("Failed to copy BO priv information to user\n"); 2035 ret = -EFAULT; 2036 goto exit; 2037 } 2038 2039 *priv_offset += num_bos * sizeof(*bo_privs); 2040 2041 exit: 2042 commit_files(files, bo_buckets, bo_index, ret); 2043 kvfree(files); 2044 kvfree(bo_buckets); 2045 kvfree(bo_privs); 2046 return ret; 2047 } 2048 2049 static int criu_get_process_object_info(struct kfd_process *p, 2050 uint32_t *num_devices, 2051 uint32_t *num_bos, 2052 uint32_t *num_objects, 2053 uint64_t *objs_priv_size) 2054 { 2055 uint64_t queues_priv_data_size, svm_priv_data_size, priv_size; 2056 uint32_t num_queues, num_events, num_svm_ranges; 2057 int ret; 2058 2059 *num_devices = p->n_pdds; 2060 *num_bos = get_process_num_bos(p); 2061 2062 ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size); 2063 if (ret) 2064 return ret; 2065 2066 num_events = kfd_get_num_events(p); 2067 2068 svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size); 2069 2070 *num_objects = num_queues + num_events + num_svm_ranges; 2071 2072 if (objs_priv_size) { 2073 priv_size = sizeof(struct kfd_criu_process_priv_data); 2074 priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data); 2075 priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data); 2076 priv_size += queues_priv_data_size; 2077 priv_size += num_events * sizeof(struct kfd_criu_event_priv_data); 2078 priv_size += svm_priv_data_size; 2079 *objs_priv_size = priv_size; 2080 } 2081 return 0; 2082 } 2083 2084 static int criu_checkpoint(struct file *filep, 2085 struct kfd_process *p, 2086 struct kfd_ioctl_criu_args *args) 2087 { 2088 int ret; 2089 uint32_t num_devices, num_bos, num_objects; 2090 uint64_t priv_size, priv_offset = 0, bo_priv_offset; 2091 2092 if (!args->devices || !args->bos || !args->priv_data) 2093 return -EINVAL; 2094 2095 mutex_lock(&p->mutex); 2096 2097 if (!p->n_pdds) { 2098 pr_err("No pdd for given process\n"); 2099 ret = -ENODEV; 2100 goto exit_unlock; 2101 } 2102 2103 /* Confirm all process queues are evicted */ 2104 if (!p->queues_paused) { 2105 pr_err("Cannot dump process when queues are not in evicted state\n"); 2106 /* CRIU plugin did not call op PROCESS_INFO before checkpointing */ 2107 ret = -EINVAL; 2108 goto exit_unlock; 2109 } 2110 2111 ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size); 2112 if (ret) 2113 goto exit_unlock; 2114 2115 if (num_devices != args->num_devices || 2116 num_bos != args->num_bos || 2117 num_objects != args->num_objects || 2118 priv_size != args->priv_data_size) { 2119 2120 ret = -EINVAL; 2121 goto exit_unlock; 2122 } 2123 2124 /* each function will store private data inside priv_data and adjust priv_offset */ 2125 ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset); 2126 if (ret) 2127 goto exit_unlock; 2128 2129 ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices, 2130 (uint8_t __user *)args->priv_data, &priv_offset); 2131 if (ret) 2132 goto exit_unlock; 2133 2134 /* Leave room for BOs in the private data. They need to be restored 2135 * before events, but we checkpoint them last to simplify the error 2136 * handling. 2137 */ 2138 bo_priv_offset = priv_offset; 2139 priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data); 2140 2141 if (num_objects) { 2142 ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data, 2143 &priv_offset); 2144 if (ret) 2145 goto exit_unlock; 2146 2147 ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data, 2148 &priv_offset); 2149 if (ret) 2150 goto exit_unlock; 2151 2152 ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset); 2153 if (ret) 2154 goto exit_unlock; 2155 } 2156 2157 /* This must be the last thing in this function that can fail. 2158 * Otherwise we leak dmabuf file descriptors. 2159 */ 2160 ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos, 2161 (uint8_t __user *)args->priv_data, &bo_priv_offset); 2162 2163 exit_unlock: 2164 mutex_unlock(&p->mutex); 2165 if (ret) 2166 pr_err("Failed to dump CRIU ret:%d\n", ret); 2167 else 2168 pr_debug("CRIU dump ret:%d\n", ret); 2169 2170 return ret; 2171 } 2172 2173 static int criu_restore_process(struct kfd_process *p, 2174 struct kfd_ioctl_criu_args *args, 2175 uint64_t *priv_offset, 2176 uint64_t max_priv_data_size) 2177 { 2178 int ret = 0; 2179 struct kfd_criu_process_priv_data process_priv; 2180 2181 if (*priv_offset + sizeof(process_priv) > max_priv_data_size) 2182 return -EINVAL; 2183 2184 ret = copy_from_user(&process_priv, 2185 (void __user *)(args->priv_data + *priv_offset), 2186 sizeof(process_priv)); 2187 if (ret) { 2188 pr_err("Failed to copy process private information from user\n"); 2189 ret = -EFAULT; 2190 goto exit; 2191 } 2192 *priv_offset += sizeof(process_priv); 2193 2194 if (process_priv.version != KFD_CRIU_PRIV_VERSION) { 2195 pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n", 2196 process_priv.version, KFD_CRIU_PRIV_VERSION); 2197 return -EINVAL; 2198 } 2199 2200 pr_debug("Setting XNACK mode\n"); 2201 if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) { 2202 pr_err("xnack mode cannot be set\n"); 2203 ret = -EPERM; 2204 goto exit; 2205 } else { 2206 pr_debug("set xnack mode: %d\n", process_priv.xnack_mode); 2207 p->xnack_enabled = process_priv.xnack_mode; 2208 } 2209 2210 exit: 2211 return ret; 2212 } 2213 2214 static int criu_restore_devices(struct kfd_process *p, 2215 struct kfd_ioctl_criu_args *args, 2216 uint64_t *priv_offset, 2217 uint64_t max_priv_data_size) 2218 { 2219 struct kfd_criu_device_bucket *device_buckets; 2220 struct kfd_criu_device_priv_data *device_privs; 2221 int ret = 0; 2222 uint32_t i; 2223 2224 if (args->num_devices != p->n_pdds) 2225 return -EINVAL; 2226 2227 if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size) 2228 return -EINVAL; 2229 2230 device_buckets = kmalloc_objs(*device_buckets, args->num_devices); 2231 if (!device_buckets) 2232 return -ENOMEM; 2233 2234 ret = copy_from_user(device_buckets, (void __user *)args->devices, 2235 args->num_devices * sizeof(*device_buckets)); 2236 if (ret) { 2237 pr_err("Failed to copy devices buckets from user\n"); 2238 ret = -EFAULT; 2239 goto exit; 2240 } 2241 2242 for (i = 0; i < args->num_devices; i++) { 2243 struct kfd_node *dev; 2244 struct kfd_process_device *pdd; 2245 struct file *drm_file; 2246 2247 /* device private data is not currently used */ 2248 2249 if (!device_buckets[i].user_gpu_id) { 2250 pr_err("Invalid user gpu_id\n"); 2251 ret = -EINVAL; 2252 goto exit; 2253 } 2254 2255 dev = kfd_device_by_id(device_buckets[i].actual_gpu_id); 2256 if (!dev) { 2257 pr_err("Failed to find device with gpu_id = %x\n", 2258 device_buckets[i].actual_gpu_id); 2259 ret = -EINVAL; 2260 goto exit; 2261 } 2262 2263 pdd = kfd_get_process_device_data(dev, p); 2264 if (!pdd) { 2265 pr_err("Failed to get pdd for gpu_id = %x\n", 2266 device_buckets[i].actual_gpu_id); 2267 ret = -EINVAL; 2268 goto exit; 2269 } 2270 pdd->user_gpu_id = device_buckets[i].user_gpu_id; 2271 2272 drm_file = fget(device_buckets[i].drm_fd); 2273 if (!drm_file) { 2274 pr_err("Invalid render node file descriptor sent from plugin (%d)\n", 2275 device_buckets[i].drm_fd); 2276 ret = -EINVAL; 2277 goto exit; 2278 } 2279 2280 if (pdd->drm_file) { 2281 ret = -EINVAL; 2282 goto exit; 2283 } 2284 2285 /* create the vm using render nodes for kfd pdd */ 2286 if (kfd_process_device_init_vm(pdd, drm_file)) { 2287 pr_err("could not init vm for given pdd\n"); 2288 /* On success, the PDD keeps the drm_file reference */ 2289 fput(drm_file); 2290 ret = -EINVAL; 2291 goto exit; 2292 } 2293 /* 2294 * pdd now already has the vm bound to render node so below api won't create a new 2295 * exclusive kfd mapping but use existing one with renderDXXX but is still needed 2296 * for iommu v2 binding and runtime pm. 2297 */ 2298 pdd = kfd_bind_process_to_device(dev, p); 2299 if (IS_ERR(pdd)) { 2300 ret = PTR_ERR(pdd); 2301 goto exit; 2302 } 2303 2304 if (!pdd->qpd.proc_doorbells) { 2305 ret = kfd_alloc_process_doorbells(dev->kfd, pdd); 2306 if (ret) 2307 goto exit; 2308 } 2309 } 2310 2311 /* 2312 * We are not copying device private data from user as we are not using the data for now, 2313 * but we still adjust for its private data. 2314 */ 2315 *priv_offset += args->num_devices * sizeof(*device_privs); 2316 2317 exit: 2318 kfree(device_buckets); 2319 return ret; 2320 } 2321 2322 static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, 2323 struct kfd_criu_bo_bucket *bo_bucket, 2324 struct kfd_criu_bo_priv_data *bo_priv, 2325 struct kgd_mem **kgd_mem) 2326 { 2327 int idr_handle; 2328 int ret; 2329 const bool criu_resume = true; 2330 u64 offset; 2331 2332 if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { 2333 if (bo_bucket->size != 2334 kfd_doorbell_process_slice(pdd->dev->kfd)) 2335 return -EINVAL; 2336 2337 offset = kfd_get_process_doorbells(pdd); 2338 if (!offset) 2339 return -ENOMEM; 2340 } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { 2341 /* MMIO BOs need remapped bus address */ 2342 if (bo_bucket->size != PAGE_SIZE) { 2343 pr_err("Invalid page size\n"); 2344 return -EINVAL; 2345 } 2346 offset = pdd->dev->adev->rmmio_remap.bus_addr; 2347 if (!offset || (PAGE_SIZE > 4096)) { 2348 pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n"); 2349 return -ENOMEM; 2350 } 2351 } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { 2352 offset = bo_priv->user_addr; 2353 } 2354 /* Create the BO */ 2355 ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr, 2356 bo_bucket->size, pdd->drm_priv, kgd_mem, 2357 &offset, bo_bucket->alloc_flags, criu_resume); 2358 if (ret) { 2359 pr_err("Could not create the BO\n"); 2360 return ret; 2361 } 2362 pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n", 2363 bo_bucket->size, bo_bucket->addr, offset); 2364 2365 /* Restore previous IDR handle */ 2366 pr_debug("Restoring old IDR handle for the BO"); 2367 idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle, 2368 bo_priv->idr_handle + 1, GFP_KERNEL); 2369 2370 if (idr_handle < 0) { 2371 pr_err("Could not allocate idr\n"); 2372 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv, 2373 NULL); 2374 return -ENOMEM; 2375 } 2376 2377 if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) 2378 bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id); 2379 if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { 2380 bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id); 2381 } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { 2382 bo_bucket->restored_offset = offset; 2383 } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { 2384 bo_bucket->restored_offset = offset; 2385 /* Update the VRAM usage count */ 2386 atomic64_add(bo_bucket->size, &pdd->vram_usage); 2387 } 2388 return 0; 2389 } 2390 2391 static int criu_restore_bo(struct kfd_process *p, 2392 struct kfd_criu_bo_bucket *bo_bucket, 2393 struct kfd_criu_bo_priv_data *bo_priv, 2394 struct file **file) 2395 { 2396 struct kfd_process_device *pdd; 2397 struct kgd_mem *kgd_mem; 2398 int ret; 2399 int j; 2400 2401 pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n", 2402 bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags, 2403 bo_priv->idr_handle); 2404 2405 pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id); 2406 if (!pdd) { 2407 pr_err("Failed to get pdd\n"); 2408 return -ENODEV; 2409 } 2410 2411 ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem); 2412 if (ret) 2413 return ret; 2414 2415 /* now map these BOs to GPU/s */ 2416 for (j = 0; j < p->n_pdds; j++) { 2417 struct kfd_node *peer; 2418 struct kfd_process_device *peer_pdd; 2419 2420 if (!bo_priv->mapped_gpuids[j]) 2421 break; 2422 2423 peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]); 2424 if (!peer_pdd) 2425 return -EINVAL; 2426 2427 peer = peer_pdd->dev; 2428 2429 peer_pdd = kfd_bind_process_to_device(peer, p); 2430 if (IS_ERR(peer_pdd)) 2431 return PTR_ERR(peer_pdd); 2432 2433 ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem, 2434 peer_pdd->drm_priv); 2435 if (ret) { 2436 pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds); 2437 return ret; 2438 } 2439 } 2440 2441 pr_debug("map memory was successful for the BO\n"); 2442 /* create the dmabuf object and export the bo */ 2443 if (bo_bucket->alloc_flags 2444 & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { 2445 ret = criu_get_prime_handle(kgd_mem, DRM_RDWR, 2446 &bo_bucket->dmabuf_fd, file); 2447 if (ret) 2448 return ret; 2449 } else { 2450 bo_bucket->dmabuf_fd = KFD_INVALID_FD; 2451 } 2452 2453 return 0; 2454 } 2455 2456 static int criu_restore_bos(struct kfd_process *p, 2457 struct kfd_ioctl_criu_args *args, 2458 uint64_t *priv_offset, 2459 uint64_t max_priv_data_size) 2460 { 2461 struct kfd_criu_bo_bucket *bo_buckets = NULL; 2462 struct kfd_criu_bo_priv_data *bo_privs = NULL; 2463 struct file **files = NULL; 2464 int ret = 0; 2465 uint32_t i = 0; 2466 2467 if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size) 2468 return -EINVAL; 2469 2470 /* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */ 2471 amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info); 2472 2473 bo_buckets = kvmalloc_objs(*bo_buckets, args->num_bos); 2474 if (!bo_buckets) 2475 return -ENOMEM; 2476 2477 files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL); 2478 if (!files) { 2479 ret = -ENOMEM; 2480 goto exit; 2481 } 2482 2483 ret = copy_from_user(bo_buckets, (void __user *)args->bos, 2484 args->num_bos * sizeof(*bo_buckets)); 2485 if (ret) { 2486 pr_err("Failed to copy BOs information from user\n"); 2487 ret = -EFAULT; 2488 goto exit; 2489 } 2490 2491 bo_privs = kvmalloc_objs(*bo_privs, args->num_bos); 2492 if (!bo_privs) { 2493 ret = -ENOMEM; 2494 goto exit; 2495 } 2496 2497 ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset, 2498 args->num_bos * sizeof(*bo_privs)); 2499 if (ret) { 2500 pr_err("Failed to copy BOs information from user\n"); 2501 ret = -EFAULT; 2502 goto exit; 2503 } 2504 *priv_offset += args->num_bos * sizeof(*bo_privs); 2505 2506 /* Create and map new BOs */ 2507 for (; i < args->num_bos; i++) { 2508 ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]); 2509 if (ret) { 2510 pr_debug("Failed to restore BO[%d] ret%d\n", i, ret); 2511 goto exit; 2512 } 2513 } /* done */ 2514 2515 /* Copy only the buckets back so user can read bo_buckets[N].restored_offset */ 2516 ret = copy_to_user((void __user *)args->bos, 2517 bo_buckets, 2518 (args->num_bos * sizeof(*bo_buckets))); 2519 if (ret) 2520 ret = -EFAULT; 2521 2522 exit: 2523 commit_files(files, bo_buckets, i, ret); 2524 kvfree(files); 2525 kvfree(bo_buckets); 2526 kvfree(bo_privs); 2527 return ret; 2528 } 2529 2530 static int criu_restore_objects(struct file *filep, 2531 struct kfd_process *p, 2532 struct kfd_ioctl_criu_args *args, 2533 uint64_t *priv_offset, 2534 uint64_t max_priv_data_size) 2535 { 2536 int ret = 0; 2537 uint32_t i; 2538 2539 BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type)); 2540 BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type)); 2541 BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type)); 2542 2543 for (i = 0; i < args->num_objects; i++) { 2544 uint32_t object_type; 2545 2546 if (*priv_offset + sizeof(object_type) > max_priv_data_size) { 2547 pr_err("Invalid private data size\n"); 2548 return -EINVAL; 2549 } 2550 2551 ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset)); 2552 if (ret) { 2553 pr_err("Failed to copy private information from user\n"); 2554 goto exit; 2555 } 2556 2557 switch (object_type) { 2558 case KFD_CRIU_OBJECT_TYPE_QUEUE: 2559 ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data, 2560 priv_offset, max_priv_data_size); 2561 if (ret) 2562 goto exit; 2563 break; 2564 case KFD_CRIU_OBJECT_TYPE_EVENT: 2565 ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data, 2566 priv_offset, max_priv_data_size); 2567 if (ret) 2568 goto exit; 2569 break; 2570 case KFD_CRIU_OBJECT_TYPE_SVM_RANGE: 2571 ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data, 2572 priv_offset, max_priv_data_size); 2573 if (ret) 2574 goto exit; 2575 break; 2576 default: 2577 pr_err("Invalid object type:%u at index:%d\n", object_type, i); 2578 ret = -EINVAL; 2579 goto exit; 2580 } 2581 } 2582 exit: 2583 return ret; 2584 } 2585 2586 static int criu_restore(struct file *filep, 2587 struct kfd_process *p, 2588 struct kfd_ioctl_criu_args *args) 2589 { 2590 uint64_t priv_offset = 0; 2591 int ret = 0; 2592 2593 pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n", 2594 args->num_devices, args->num_bos, args->num_objects, args->priv_data_size); 2595 2596 if ((args->num_bos > 0 && !args->bos) || !args->devices || !args->priv_data || 2597 !args->priv_data_size || !args->num_devices) 2598 return -EINVAL; 2599 2600 mutex_lock(&p->mutex); 2601 2602 /* 2603 * Set the process to evicted state to avoid running any new queues before all the memory 2604 * mappings are ready. 2605 */ 2606 ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE); 2607 if (ret) 2608 goto exit_unlock; 2609 2610 /* Each function will adjust priv_offset based on how many bytes they consumed */ 2611 ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size); 2612 if (ret) 2613 goto exit_unlock; 2614 2615 ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size); 2616 if (ret) 2617 goto exit_unlock; 2618 2619 ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size); 2620 if (ret) 2621 goto exit_unlock; 2622 2623 ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size); 2624 if (ret) 2625 goto exit_unlock; 2626 2627 if (priv_offset != args->priv_data_size) { 2628 pr_err("Invalid private data size\n"); 2629 ret = -EINVAL; 2630 } 2631 2632 exit_unlock: 2633 mutex_unlock(&p->mutex); 2634 if (ret) 2635 pr_err("Failed to restore CRIU ret:%d\n", ret); 2636 else 2637 pr_debug("CRIU restore successful\n"); 2638 2639 return ret; 2640 } 2641 2642 static int criu_unpause(struct file *filep, 2643 struct kfd_process *p, 2644 struct kfd_ioctl_criu_args *args) 2645 { 2646 int ret; 2647 2648 mutex_lock(&p->mutex); 2649 2650 if (!p->queues_paused) { 2651 mutex_unlock(&p->mutex); 2652 return -EINVAL; 2653 } 2654 2655 ret = kfd_process_restore_queues(p); 2656 if (ret) 2657 pr_err("Failed to unpause queues ret:%d\n", ret); 2658 else 2659 p->queues_paused = false; 2660 2661 mutex_unlock(&p->mutex); 2662 2663 return ret; 2664 } 2665 2666 static int criu_resume(struct file *filep, 2667 struct kfd_process *p, 2668 struct kfd_ioctl_criu_args *args) 2669 { 2670 struct kfd_process *target = NULL; 2671 struct pid *pid = NULL; 2672 int ret = 0; 2673 2674 pr_debug("Inside %s, target pid for criu restore: %d\n", __func__, 2675 args->pid); 2676 2677 pid = find_get_pid(args->pid); 2678 if (!pid) { 2679 pr_err("Cannot find pid info for %i\n", args->pid); 2680 return -ESRCH; 2681 } 2682 2683 pr_debug("calling kfd_lookup_process_by_pid\n"); 2684 target = kfd_lookup_process_by_pid(pid); 2685 2686 put_pid(pid); 2687 2688 if (!target) { 2689 pr_debug("Cannot find process info for %i\n", args->pid); 2690 return -ESRCH; 2691 } 2692 2693 mutex_lock(&target->mutex); 2694 ret = kfd_criu_resume_svm(target); 2695 if (ret) { 2696 pr_err("kfd_criu_resume_svm failed for %i\n", args->pid); 2697 goto exit; 2698 } 2699 2700 ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info); 2701 if (ret) 2702 pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid); 2703 2704 exit: 2705 mutex_unlock(&target->mutex); 2706 2707 kfd_unref_process(target); 2708 return ret; 2709 } 2710 2711 static int criu_process_info(struct file *filep, 2712 struct kfd_process *p, 2713 struct kfd_ioctl_criu_args *args) 2714 { 2715 int ret = 0; 2716 2717 mutex_lock(&p->mutex); 2718 2719 if (!p->n_pdds) { 2720 pr_err("No pdd for given process\n"); 2721 ret = -ENODEV; 2722 goto err_unlock; 2723 } 2724 2725 ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT); 2726 if (ret) 2727 goto err_unlock; 2728 2729 p->queues_paused = true; 2730 2731 args->pid = task_pid_nr_ns(p->lead_thread, 2732 task_active_pid_ns(p->lead_thread)); 2733 2734 ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos, 2735 &args->num_objects, &args->priv_data_size); 2736 if (ret) 2737 goto err_unlock; 2738 2739 dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n", 2740 args->num_devices, args->num_bos, args->num_objects, 2741 args->priv_data_size); 2742 2743 err_unlock: 2744 if (ret) { 2745 kfd_process_restore_queues(p); 2746 p->queues_paused = false; 2747 } 2748 mutex_unlock(&p->mutex); 2749 return ret; 2750 } 2751 2752 static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data) 2753 { 2754 struct kfd_ioctl_criu_args *args = data; 2755 int ret; 2756 2757 dev_dbg(kfd_device, "CRIU operation: %d\n", args->op); 2758 switch (args->op) { 2759 case KFD_CRIU_OP_PROCESS_INFO: 2760 ret = criu_process_info(filep, p, args); 2761 break; 2762 case KFD_CRIU_OP_CHECKPOINT: 2763 ret = criu_checkpoint(filep, p, args); 2764 break; 2765 case KFD_CRIU_OP_UNPAUSE: 2766 ret = criu_unpause(filep, p, args); 2767 break; 2768 case KFD_CRIU_OP_RESTORE: 2769 ret = criu_restore(filep, p, args); 2770 break; 2771 case KFD_CRIU_OP_RESUME: 2772 ret = criu_resume(filep, p, args); 2773 break; 2774 default: 2775 dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op); 2776 ret = -EINVAL; 2777 break; 2778 } 2779 2780 if (ret) 2781 dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret); 2782 2783 return ret; 2784 } 2785 2786 static int runtime_enable(struct kfd_process *p, uint64_t r_debug, 2787 bool enable_ttmp_setup) 2788 { 2789 int i = 0, ret = 0; 2790 2791 if (p->is_runtime_retry) 2792 goto retry; 2793 2794 if (p->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED) 2795 return -EBUSY; 2796 2797 for (i = 0; i < p->n_pdds; i++) { 2798 struct kfd_process_device *pdd = p->pdds[i]; 2799 2800 if (pdd->qpd.queue_count) 2801 return -EEXIST; 2802 2803 /* 2804 * Setup TTMPs by default. 2805 * Note that this call must remain here for MES ADD QUEUE to 2806 * skip_process_ctx_clear unconditionally as the first call to 2807 * SET_SHADER_DEBUGGER clears any stale process context data 2808 * saved in MES. 2809 */ 2810 if (pdd->dev->kfd->shared_resources.enable_mes) { 2811 ret = kfd_dbg_set_mes_debug_mode( 2812 pdd, !kfd_dbg_has_cwsr_workaround(pdd->dev)); 2813 if (ret) 2814 return ret; 2815 } 2816 } 2817 2818 p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED; 2819 p->runtime_info.r_debug = r_debug; 2820 p->runtime_info.ttmp_setup = enable_ttmp_setup; 2821 2822 if (p->runtime_info.ttmp_setup) { 2823 for (i = 0; i < p->n_pdds; i++) { 2824 struct kfd_process_device *pdd = p->pdds[i]; 2825 2826 if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) { 2827 amdgpu_gfx_off_ctrl(pdd->dev->adev, false); 2828 pdd->dev->kfd2kgd->enable_debug_trap( 2829 pdd->dev->adev, 2830 true, 2831 pdd->dev->vm_info.last_vmid_kfd); 2832 } else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) { 2833 pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap( 2834 pdd->dev->adev, 2835 false, 2836 0); 2837 } 2838 } 2839 } 2840 2841 retry: 2842 if (p->debug_trap_enabled) { 2843 if (!p->is_runtime_retry) { 2844 kfd_dbg_trap_activate(p); 2845 kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME), 2846 p, NULL, 0, false, NULL, 0); 2847 } 2848 2849 mutex_unlock(&p->mutex); 2850 ret = down_interruptible(&p->runtime_enable_sema); 2851 mutex_lock(&p->mutex); 2852 2853 p->is_runtime_retry = !!ret; 2854 } 2855 2856 return ret; 2857 } 2858 2859 static int runtime_disable(struct kfd_process *p) 2860 { 2861 int i = 0, ret = 0; 2862 bool was_enabled = p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED; 2863 2864 p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_DISABLED; 2865 p->runtime_info.r_debug = 0; 2866 2867 if (p->debug_trap_enabled) { 2868 if (was_enabled) 2869 kfd_dbg_trap_deactivate(p, false, 0); 2870 2871 if (!p->is_runtime_retry) 2872 kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME), 2873 p, NULL, 0, false, NULL, 0); 2874 2875 mutex_unlock(&p->mutex); 2876 ret = down_interruptible(&p->runtime_enable_sema); 2877 mutex_lock(&p->mutex); 2878 2879 p->is_runtime_retry = !!ret; 2880 if (ret) 2881 return ret; 2882 } 2883 2884 if (was_enabled && p->runtime_info.ttmp_setup) { 2885 for (i = 0; i < p->n_pdds; i++) { 2886 struct kfd_process_device *pdd = p->pdds[i]; 2887 2888 if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) 2889 amdgpu_gfx_off_ctrl(pdd->dev->adev, true); 2890 } 2891 } 2892 2893 p->runtime_info.ttmp_setup = false; 2894 2895 /* disable ttmp setup */ 2896 for (i = 0; i < p->n_pdds; i++) { 2897 struct kfd_process_device *pdd = p->pdds[i]; 2898 int last_err = 0; 2899 2900 if (kfd_dbg_is_per_vmid_supported(pdd->dev)) { 2901 pdd->spi_dbg_override = 2902 pdd->dev->kfd2kgd->disable_debug_trap( 2903 pdd->dev->adev, 2904 false, 2905 pdd->dev->vm_info.last_vmid_kfd); 2906 2907 if (!pdd->dev->kfd->shared_resources.enable_mes) 2908 last_err = debug_refresh_runlist(pdd->dev->dqm); 2909 else 2910 last_err = kfd_dbg_set_mes_debug_mode(pdd, 2911 !kfd_dbg_has_cwsr_workaround(pdd->dev)); 2912 2913 if (last_err) 2914 ret = last_err; 2915 } 2916 } 2917 2918 return ret; 2919 } 2920 2921 static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data) 2922 { 2923 struct kfd_ioctl_runtime_enable_args *args = data; 2924 int r; 2925 2926 mutex_lock(&p->mutex); 2927 2928 if (args->mode_mask & KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK) 2929 r = runtime_enable(p, args->r_debug, 2930 !!(args->mode_mask & KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK)); 2931 else 2932 r = runtime_disable(p); 2933 2934 mutex_unlock(&p->mutex); 2935 2936 return r; 2937 } 2938 2939 static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data) 2940 { 2941 struct kfd_ioctl_dbg_trap_args *args = data; 2942 struct task_struct *thread = NULL; 2943 struct mm_struct *mm = NULL; 2944 struct pid *pid = NULL; 2945 struct kfd_process *target = NULL; 2946 struct kfd_process_device *pdd = NULL; 2947 int r = 0; 2948 2949 if (p->context_id != KFD_CONTEXT_ID_PRIMARY) { 2950 pr_debug("Set debug trap ioctl can not be invoked on non-primary kfd process\n"); 2951 2952 return -EOPNOTSUPP; 2953 } 2954 2955 if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { 2956 pr_err("Debugging does not support sched_policy %i", sched_policy); 2957 return -EINVAL; 2958 } 2959 2960 pid = find_get_pid(args->pid); 2961 if (!pid) { 2962 pr_debug("Cannot find pid info for %i\n", args->pid); 2963 r = -ESRCH; 2964 goto out; 2965 } 2966 2967 thread = get_pid_task(pid, PIDTYPE_PID); 2968 if (!thread) { 2969 r = -ESRCH; 2970 goto out; 2971 } 2972 2973 mm = get_task_mm(thread); 2974 if (!mm) { 2975 r = -ESRCH; 2976 goto out; 2977 } 2978 2979 if (args->op == KFD_IOC_DBG_TRAP_ENABLE) { 2980 bool create_process; 2981 2982 rcu_read_lock(); 2983 create_process = thread && thread != current && ptrace_parent(thread) == current; 2984 rcu_read_unlock(); 2985 2986 target = create_process ? kfd_create_process(thread) : 2987 kfd_lookup_process_by_pid(pid); 2988 } else { 2989 target = kfd_lookup_process_by_pid(pid); 2990 } 2991 2992 if (IS_ERR_OR_NULL(target)) { 2993 pr_debug("Cannot find process PID %i to debug\n", args->pid); 2994 r = target ? PTR_ERR(target) : -ESRCH; 2995 target = NULL; 2996 goto out; 2997 } 2998 2999 if (target->context_id != KFD_CONTEXT_ID_PRIMARY) { 3000 pr_debug("Set debug trap ioctl not supported on non-primary kfd process\n"); 3001 r = -EOPNOTSUPP; 3002 goto out; 3003 } 3004 3005 /* Check if target is still PTRACED. */ 3006 rcu_read_lock(); 3007 if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE 3008 && ptrace_parent(target->lead_thread) != current) { 3009 pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid); 3010 r = -EPERM; 3011 } 3012 rcu_read_unlock(); 3013 3014 if (r) 3015 goto out; 3016 3017 mutex_lock(&target->mutex); 3018 3019 if (args->op != KFD_IOC_DBG_TRAP_ENABLE && !target->debug_trap_enabled) { 3020 pr_err("PID %i not debug enabled for op %i\n", args->pid, args->op); 3021 r = -EINVAL; 3022 goto unlock_out; 3023 } 3024 3025 if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_ENABLED && 3026 (args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE || 3027 args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE || 3028 args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES || 3029 args->op == KFD_IOC_DBG_TRAP_RESUME_QUEUES || 3030 args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH || 3031 args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH || 3032 args->op == KFD_IOC_DBG_TRAP_SET_FLAGS)) { 3033 r = -EPERM; 3034 goto unlock_out; 3035 } 3036 3037 if (args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH || 3038 args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) { 3039 int user_gpu_id = kfd_process_get_user_gpu_id(target, 3040 args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ? 3041 args->set_node_address_watch.gpu_id : 3042 args->clear_node_address_watch.gpu_id); 3043 3044 pdd = kfd_process_device_data_by_id(target, user_gpu_id); 3045 if (user_gpu_id == -EINVAL || !pdd) { 3046 r = -ENODEV; 3047 goto unlock_out; 3048 } 3049 } 3050 3051 switch (args->op) { 3052 case KFD_IOC_DBG_TRAP_ENABLE: 3053 if (target != p) 3054 target->debugger_process = p; 3055 3056 r = kfd_dbg_trap_enable(target, 3057 args->enable.dbg_fd, 3058 (void __user *)args->enable.rinfo_ptr, 3059 &args->enable.rinfo_size); 3060 if (!r) 3061 target->exception_enable_mask = args->enable.exception_mask; 3062 3063 break; 3064 case KFD_IOC_DBG_TRAP_DISABLE: 3065 r = kfd_dbg_trap_disable(target); 3066 break; 3067 case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT: 3068 r = kfd_dbg_send_exception_to_runtime(target, 3069 args->send_runtime_event.gpu_id, 3070 args->send_runtime_event.queue_id, 3071 args->send_runtime_event.exception_mask); 3072 break; 3073 case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: 3074 kfd_dbg_set_enabled_debug_exception_mask(target, 3075 args->set_exceptions_enabled.exception_mask); 3076 break; 3077 case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: 3078 r = kfd_dbg_trap_set_wave_launch_override(target, 3079 args->launch_override.override_mode, 3080 args->launch_override.enable_mask, 3081 args->launch_override.support_request_mask, 3082 &args->launch_override.enable_mask, 3083 &args->launch_override.support_request_mask); 3084 break; 3085 case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: 3086 r = kfd_dbg_trap_set_wave_launch_mode(target, 3087 args->launch_mode.launch_mode); 3088 break; 3089 case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES: 3090 r = suspend_queues(target, 3091 args->suspend_queues.num_queues, 3092 args->suspend_queues.grace_period, 3093 args->suspend_queues.exception_mask, 3094 (uint32_t *)args->suspend_queues.queue_array_ptr); 3095 3096 break; 3097 case KFD_IOC_DBG_TRAP_RESUME_QUEUES: 3098 r = resume_queues(target, args->resume_queues.num_queues, 3099 (uint32_t *)args->resume_queues.queue_array_ptr); 3100 break; 3101 case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: 3102 r = kfd_dbg_trap_set_dev_address_watch(pdd, 3103 args->set_node_address_watch.address, 3104 args->set_node_address_watch.mask, 3105 &args->set_node_address_watch.id, 3106 args->set_node_address_watch.mode); 3107 break; 3108 case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH: 3109 r = kfd_dbg_trap_clear_dev_address_watch(pdd, 3110 args->clear_node_address_watch.id); 3111 break; 3112 case KFD_IOC_DBG_TRAP_SET_FLAGS: 3113 r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags); 3114 break; 3115 case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: 3116 r = kfd_dbg_ev_query_debug_event(target, 3117 &args->query_debug_event.queue_id, 3118 &args->query_debug_event.gpu_id, 3119 args->query_debug_event.exception_mask, 3120 &args->query_debug_event.exception_mask); 3121 break; 3122 case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: 3123 r = kfd_dbg_trap_query_exception_info(target, 3124 args->query_exception_info.source_id, 3125 args->query_exception_info.exception_code, 3126 args->query_exception_info.clear_exception, 3127 (void __user *)args->query_exception_info.info_ptr, 3128 &args->query_exception_info.info_size); 3129 break; 3130 case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: 3131 r = pqm_get_queue_snapshot(&target->pqm, 3132 args->queue_snapshot.exception_mask, 3133 (void __user *)args->queue_snapshot.snapshot_buf_ptr, 3134 &args->queue_snapshot.num_queues, 3135 &args->queue_snapshot.entry_size); 3136 break; 3137 case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: 3138 r = kfd_dbg_trap_device_snapshot(target, 3139 args->device_snapshot.exception_mask, 3140 (void __user *)args->device_snapshot.snapshot_buf_ptr, 3141 &args->device_snapshot.num_devices, 3142 &args->device_snapshot.entry_size); 3143 break; 3144 default: 3145 pr_err("Invalid option: %i\n", args->op); 3146 r = -EINVAL; 3147 } 3148 3149 unlock_out: 3150 mutex_unlock(&target->mutex); 3151 3152 out: 3153 if (thread) 3154 put_task_struct(thread); 3155 3156 if (mm) 3157 mmput(mm); 3158 3159 if (pid) 3160 put_pid(pid); 3161 3162 if (target) 3163 kfd_unref_process(target); 3164 3165 return r; 3166 } 3167 3168 /* userspace programs need to invoke this ioctl explicitly on a FD to 3169 * create a secondary kfd_process which replacing its primary kfd_process 3170 */ 3171 static int kfd_ioctl_create_process(struct file *filep, struct kfd_process *p, void *data) 3172 { 3173 struct kfd_process *process; 3174 int ret; 3175 3176 if (!filep->private_data || !p) 3177 return -EINVAL; 3178 3179 /* Each FD owns only one kfd_process */ 3180 if (p->context_id != KFD_CONTEXT_ID_PRIMARY) 3181 return -EINVAL; 3182 3183 mutex_lock(&kfd_processes_mutex); 3184 if (p != filep->private_data) { 3185 mutex_unlock(&kfd_processes_mutex); 3186 return -EINVAL; 3187 } 3188 3189 process = create_process(current, false); 3190 if (IS_ERR(process)) { 3191 mutex_unlock(&kfd_processes_mutex); 3192 return PTR_ERR(process); 3193 } 3194 3195 filep->private_data = process; 3196 mutex_unlock(&kfd_processes_mutex); 3197 3198 ret = kfd_create_process_sysfs(process); 3199 if (ret) 3200 pr_warn("Failed to create sysfs entry for the kfd_process"); 3201 3202 /* Each open() increases kref of the primary kfd_process, 3203 * so we need to reduce it here when we create a new secondary process replacing it 3204 */ 3205 kfd_unref_process(p); 3206 3207 return 0; 3208 } 3209 3210 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 3211 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 3212 .cmd_drv = 0, .name = #ioctl} 3213 3214 /** Ioctl table */ 3215 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { 3216 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, 3217 kfd_ioctl_get_version, 0), 3218 3219 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, 3220 kfd_ioctl_create_queue, 0), 3221 3222 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, 3223 kfd_ioctl_destroy_queue, 0), 3224 3225 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, 3226 kfd_ioctl_set_memory_policy, 0), 3227 3228 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, 3229 kfd_ioctl_get_clock_counters, 0), 3230 3231 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, 3232 kfd_ioctl_get_process_apertures, 0), 3233 3234 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, 3235 kfd_ioctl_update_queue, 0), 3236 3237 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT, 3238 kfd_ioctl_create_event, 0), 3239 3240 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT, 3241 kfd_ioctl_destroy_event, 0), 3242 3243 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT, 3244 kfd_ioctl_set_event, 0), 3245 3246 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT, 3247 kfd_ioctl_reset_event, 0), 3248 3249 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, 3250 kfd_ioctl_wait_events, 0), 3251 3252 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED, 3253 kfd_ioctl_dbg_register, 0), 3254 3255 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED, 3256 kfd_ioctl_dbg_unregister, 0), 3257 3258 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED, 3259 kfd_ioctl_dbg_address_watch, 0), 3260 3261 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED, 3262 kfd_ioctl_dbg_wave_control, 0), 3263 3264 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, 3265 kfd_ioctl_set_scratch_backing_va, 0), 3266 3267 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, 3268 kfd_ioctl_get_tile_config, 0), 3269 3270 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, 3271 kfd_ioctl_set_trap_handler, 0), 3272 3273 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, 3274 kfd_ioctl_get_process_apertures_new, 0), 3275 3276 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, 3277 kfd_ioctl_acquire_vm, 0), 3278 3279 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, 3280 kfd_ioctl_alloc_memory_of_gpu, 0), 3281 3282 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, 3283 kfd_ioctl_free_memory_of_gpu, 0), 3284 3285 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, 3286 kfd_ioctl_map_memory_to_gpu, 0), 3287 3288 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, 3289 kfd_ioctl_unmap_memory_from_gpu, 0), 3290 3291 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, 3292 kfd_ioctl_set_cu_mask, 0), 3293 3294 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, 3295 kfd_ioctl_get_queue_wave_state, 0), 3296 3297 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO, 3298 kfd_ioctl_get_dmabuf_info, 0), 3299 3300 AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, 3301 kfd_ioctl_import_dmabuf, 0), 3302 3303 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, 3304 kfd_ioctl_alloc_queue_gws, 0), 3305 3306 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, 3307 kfd_ioctl_smi_events, 0), 3308 3309 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0), 3310 3311 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, 3312 kfd_ioctl_set_xnack_mode, 0), 3313 3314 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP, 3315 kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE), 3316 3317 AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, 3318 kfd_ioctl_get_available_memory, 0), 3319 3320 AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF, 3321 kfd_ioctl_export_dmabuf, 0), 3322 3323 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE, 3324 kfd_ioctl_runtime_enable, 0), 3325 3326 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP, 3327 kfd_ioctl_set_debug_trap, 0), 3328 3329 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_PROCESS, 3330 kfd_ioctl_create_process, 0), 3331 }; 3332 3333 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 3334 3335 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) 3336 { 3337 struct kfd_process *process; 3338 amdkfd_ioctl_t *func; 3339 const struct amdkfd_ioctl_desc *ioctl = NULL; 3340 unsigned int nr = _IOC_NR(cmd); 3341 char stack_kdata[128]; 3342 char *kdata = NULL; 3343 unsigned int usize, asize; 3344 int retcode = -EINVAL; 3345 bool ptrace_attached = false; 3346 3347 if (nr >= AMDKFD_CORE_IOCTL_COUNT) { 3348 retcode = -ENOTTY; 3349 goto err_i1; 3350 } 3351 3352 if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { 3353 u32 amdkfd_size; 3354 3355 ioctl = &amdkfd_ioctls[nr]; 3356 3357 amdkfd_size = _IOC_SIZE(ioctl->cmd); 3358 usize = asize = _IOC_SIZE(cmd); 3359 if (amdkfd_size > asize) 3360 asize = amdkfd_size; 3361 3362 cmd = ioctl->cmd; 3363 } else { 3364 retcode = -ENOTTY; 3365 goto err_i1; 3366 } 3367 3368 dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg); 3369 3370 /* Get the process struct from the filep. Only the process 3371 * that opened /dev/kfd can use the file descriptor. Child 3372 * processes need to create their own KFD device context. 3373 */ 3374 process = filep->private_data; 3375 3376 rcu_read_lock(); 3377 if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) && 3378 ptrace_parent(process->lead_thread) == current) 3379 ptrace_attached = true; 3380 rcu_read_unlock(); 3381 3382 if (process->lead_thread != current->group_leader 3383 && !ptrace_attached) { 3384 dev_dbg(kfd_device, "Using KFD FD in wrong process\n"); 3385 retcode = -EBADF; 3386 goto err_i1; 3387 } 3388 3389 /* Do not trust userspace, use our own definition */ 3390 func = ioctl->func; 3391 3392 if (unlikely(!func)) { 3393 dev_dbg(kfd_device, "no function\n"); 3394 retcode = -EINVAL; 3395 goto err_i1; 3396 } 3397 3398 /* 3399 * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support 3400 * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a 3401 * more priviledged access. 3402 */ 3403 if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) { 3404 if (!capable(CAP_CHECKPOINT_RESTORE) && 3405 !capable(CAP_SYS_ADMIN)) { 3406 retcode = -EACCES; 3407 goto err_i1; 3408 } 3409 } 3410 3411 if (cmd & (IOC_IN | IOC_OUT)) { 3412 if (asize <= sizeof(stack_kdata)) { 3413 kdata = stack_kdata; 3414 } else { 3415 kdata = kmalloc(asize, GFP_KERNEL); 3416 if (!kdata) { 3417 retcode = -ENOMEM; 3418 goto err_i1; 3419 } 3420 } 3421 if (asize > usize) 3422 memset(kdata + usize, 0, asize - usize); 3423 } 3424 3425 if (cmd & IOC_IN) { 3426 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { 3427 retcode = -EFAULT; 3428 goto err_i1; 3429 } 3430 } else if (cmd & IOC_OUT) { 3431 memset(kdata, 0, usize); 3432 } 3433 3434 retcode = func(filep, process, kdata); 3435 3436 if (cmd & IOC_OUT) 3437 if (copy_to_user((void __user *)arg, kdata, usize) != 0) 3438 retcode = -EFAULT; 3439 3440 err_i1: 3441 if (!ioctl) 3442 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", 3443 task_pid_nr(current), cmd, nr); 3444 3445 if (kdata != stack_kdata) 3446 kfree(kdata); 3447 3448 if (retcode) 3449 dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n", 3450 nr, arg, retcode); 3451 3452 return retcode; 3453 } 3454 3455 static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process, 3456 struct vm_area_struct *vma) 3457 { 3458 phys_addr_t address; 3459 3460 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 3461 return -EINVAL; 3462 3463 if (PAGE_SIZE > 4096) 3464 return -EINVAL; 3465 3466 address = dev->adev->rmmio_remap.bus_addr; 3467 3468 vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | 3469 VM_DONTDUMP | VM_PFNMAP); 3470 3471 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 3472 3473 pr_debug("process pid %d mapping mmio page\n" 3474 " target user address == 0x%08llX\n" 3475 " physical address == 0x%08llX\n" 3476 " vm_flags == 0x%04lX\n" 3477 " size == 0x%04lX\n", 3478 process->lead_thread->pid, (unsigned long long) vma->vm_start, 3479 address, vma->vm_flags, PAGE_SIZE); 3480 3481 return io_remap_pfn_range(vma, 3482 vma->vm_start, 3483 address >> PAGE_SHIFT, 3484 PAGE_SIZE, 3485 vma->vm_page_prot); 3486 } 3487 3488 3489 static int kfd_mmap(struct file *filep, struct vm_area_struct *vma) 3490 { 3491 struct kfd_process *process; 3492 struct kfd_node *dev = NULL; 3493 unsigned long mmap_offset; 3494 unsigned int gpu_id; 3495 3496 process = filep->private_data; 3497 if (!process) 3498 return -ESRCH; 3499 3500 if (process->lead_thread != current->group_leader) 3501 return -EBADF; 3502 3503 mmap_offset = vma->vm_pgoff << PAGE_SHIFT; 3504 gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset); 3505 if (gpu_id) 3506 dev = kfd_device_by_id(gpu_id); 3507 3508 switch (mmap_offset & KFD_MMAP_TYPE_MASK) { 3509 case KFD_MMAP_TYPE_DOORBELL: 3510 if (!dev) 3511 return -ENODEV; 3512 return kfd_doorbell_mmap(dev, process, vma); 3513 3514 case KFD_MMAP_TYPE_EVENTS: 3515 return kfd_event_mmap(process, vma); 3516 3517 case KFD_MMAP_TYPE_RESERVED_MEM: 3518 if (!dev) 3519 return -ENODEV; 3520 return kfd_reserved_mem_mmap(dev, process, vma); 3521 case KFD_MMAP_TYPE_MMIO: 3522 if (!dev) 3523 return -ENODEV; 3524 return kfd_mmio_mmap(dev, process, vma); 3525 } 3526 3527 return -EFAULT; 3528 } 3529