1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/mutex.h> 24 #include <linux/log2.h> 25 #include <linux/sched.h> 26 #include <linux/sched/mm.h> 27 #include <linux/slab.h> 28 #include <linux/amd-iommu.h> 29 #include <linux/notifier.h> 30 #include <linux/compat.h> 31 32 struct mm_struct; 33 34 #include "kfd_priv.h" 35 #include "kfd_dbgmgr.h" 36 37 /* 38 * Initial size for the array of queues. 39 * The allocated size is doubled each time 40 * it is exceeded up to MAX_PROCESS_QUEUES. 41 */ 42 #define INITIAL_QUEUE_ARRAY_SIZE 16 43 44 /* 45 * List of struct kfd_process (field kfd_process). 46 * Unique/indexed by mm_struct* 47 */ 48 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ 49 static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); 50 static DEFINE_MUTEX(kfd_processes_mutex); 51 52 DEFINE_STATIC_SRCU(kfd_processes_srcu); 53 54 static struct workqueue_struct *kfd_process_wq; 55 56 struct kfd_process_release_work { 57 struct work_struct kfd_work; 58 struct kfd_process *p; 59 }; 60 61 static struct kfd_process *find_process(const struct task_struct *thread); 62 static struct kfd_process *create_process(const struct task_struct *thread); 63 64 void kfd_process_create_wq(void) 65 { 66 if (!kfd_process_wq) 67 kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0); 68 } 69 70 void kfd_process_destroy_wq(void) 71 { 72 if (kfd_process_wq) { 73 destroy_workqueue(kfd_process_wq); 74 kfd_process_wq = NULL; 75 } 76 } 77 78 struct kfd_process *kfd_create_process(const struct task_struct *thread) 79 { 80 struct kfd_process *process; 81 82 if (!thread->mm) 83 return ERR_PTR(-EINVAL); 84 85 /* Only the pthreads threading model is supported. */ 86 if (thread->group_leader->mm != thread->mm) 87 return ERR_PTR(-EINVAL); 88 89 /* Take mmap_sem because we call __mmu_notifier_register inside */ 90 down_write(&thread->mm->mmap_sem); 91 92 /* 93 * take kfd processes mutex before starting of process creation 94 * so there won't be a case where two threads of the same process 95 * create two kfd_process structures 96 */ 97 mutex_lock(&kfd_processes_mutex); 98 99 /* A prior open of /dev/kfd could have already created the process. */ 100 process = find_process(thread); 101 if (process) 102 pr_debug("Process already found\n"); 103 104 if (!process) 105 process = create_process(thread); 106 107 mutex_unlock(&kfd_processes_mutex); 108 109 up_write(&thread->mm->mmap_sem); 110 111 return process; 112 } 113 114 struct kfd_process *kfd_get_process(const struct task_struct *thread) 115 { 116 struct kfd_process *process; 117 118 if (!thread->mm) 119 return ERR_PTR(-EINVAL); 120 121 /* Only the pthreads threading model is supported. */ 122 if (thread->group_leader->mm != thread->mm) 123 return ERR_PTR(-EINVAL); 124 125 process = find_process(thread); 126 127 return process; 128 } 129 130 static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) 131 { 132 struct kfd_process *process; 133 134 hash_for_each_possible_rcu(kfd_processes_table, process, 135 kfd_processes, (uintptr_t)mm) 136 if (process->mm == mm) 137 return process; 138 139 return NULL; 140 } 141 142 static struct kfd_process *find_process(const struct task_struct *thread) 143 { 144 struct kfd_process *p; 145 int idx; 146 147 idx = srcu_read_lock(&kfd_processes_srcu); 148 p = find_process_by_mm(thread->mm); 149 srcu_read_unlock(&kfd_processes_srcu, idx); 150 151 return p; 152 } 153 154 static void kfd_process_wq_release(struct work_struct *work) 155 { 156 struct kfd_process_release_work *my_work; 157 struct kfd_process_device *pdd, *temp; 158 struct kfd_process *p; 159 160 my_work = (struct kfd_process_release_work *) work; 161 162 p = my_work->p; 163 164 pr_debug("Releasing process (pasid %d) in workqueue\n", 165 p->pasid); 166 167 mutex_lock(&p->mutex); 168 169 list_for_each_entry_safe(pdd, temp, &p->per_device_data, 170 per_device_list) { 171 pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", 172 pdd->dev->id, p->pasid); 173 174 if (pdd->reset_wavefronts) 175 dbgdev_wave_reset_wavefronts(pdd->dev, p); 176 177 amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); 178 list_del(&pdd->per_device_list); 179 180 kfree(pdd); 181 } 182 183 kfd_event_free_process(p); 184 185 kfd_pasid_free(p->pasid); 186 kfd_free_process_doorbells(p); 187 188 mutex_unlock(&p->mutex); 189 190 mutex_destroy(&p->mutex); 191 192 kfree(p->queues); 193 194 kfree(p); 195 196 kfree(work); 197 } 198 199 static void kfd_process_destroy_delayed(struct rcu_head *rcu) 200 { 201 struct kfd_process_release_work *work; 202 struct kfd_process *p; 203 204 p = container_of(rcu, struct kfd_process, rcu); 205 WARN_ON(atomic_read(&p->mm->mm_count) <= 0); 206 207 mmdrop(p->mm); 208 209 work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC); 210 211 if (work) { 212 INIT_WORK((struct work_struct *) work, kfd_process_wq_release); 213 work->p = p; 214 queue_work(kfd_process_wq, (struct work_struct *) work); 215 } 216 } 217 218 static void kfd_process_notifier_release(struct mmu_notifier *mn, 219 struct mm_struct *mm) 220 { 221 struct kfd_process *p; 222 struct kfd_process_device *pdd = NULL; 223 224 /* 225 * The kfd_process structure can not be free because the 226 * mmu_notifier srcu is read locked 227 */ 228 p = container_of(mn, struct kfd_process, mmu_notifier); 229 if (WARN_ON(p->mm != mm)) 230 return; 231 232 mutex_lock(&kfd_processes_mutex); 233 hash_del_rcu(&p->kfd_processes); 234 mutex_unlock(&kfd_processes_mutex); 235 synchronize_srcu(&kfd_processes_srcu); 236 237 mutex_lock(&p->mutex); 238 239 /* In case our notifier is called before IOMMU notifier */ 240 pqm_uninit(&p->pqm); 241 242 /* Iterate over all process device data structure and check 243 * if we should delete debug managers and reset all wavefronts 244 */ 245 list_for_each_entry(pdd, &p->per_device_data, per_device_list) { 246 if ((pdd->dev->dbgmgr) && 247 (pdd->dev->dbgmgr->pasid == p->pasid)) 248 kfd_dbgmgr_destroy(pdd->dev->dbgmgr); 249 250 if (pdd->reset_wavefronts) { 251 pr_warn("Resetting all wave fronts\n"); 252 dbgdev_wave_reset_wavefronts(pdd->dev, p); 253 pdd->reset_wavefronts = false; 254 } 255 } 256 257 mutex_unlock(&p->mutex); 258 259 /* 260 * Because we drop mm_count inside kfd_process_destroy_delayed 261 * and because the mmu_notifier_unregister function also drop 262 * mm_count we need to take an extra count here. 263 */ 264 mmgrab(p->mm); 265 mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm); 266 mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); 267 } 268 269 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { 270 .release = kfd_process_notifier_release, 271 }; 272 273 static struct kfd_process *create_process(const struct task_struct *thread) 274 { 275 struct kfd_process *process; 276 int err = -ENOMEM; 277 278 process = kzalloc(sizeof(*process), GFP_KERNEL); 279 280 if (!process) 281 goto err_alloc_process; 282 283 process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE, 284 sizeof(process->queues[0]), GFP_KERNEL); 285 if (!process->queues) 286 goto err_alloc_queues; 287 288 process->pasid = kfd_pasid_alloc(); 289 if (process->pasid == 0) 290 goto err_alloc_pasid; 291 292 if (kfd_alloc_process_doorbells(process) < 0) 293 goto err_alloc_doorbells; 294 295 mutex_init(&process->mutex); 296 297 process->mm = thread->mm; 298 299 /* register notifier */ 300 process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; 301 err = __mmu_notifier_register(&process->mmu_notifier, process->mm); 302 if (err) 303 goto err_mmu_notifier; 304 305 hash_add_rcu(kfd_processes_table, &process->kfd_processes, 306 (uintptr_t)process->mm); 307 308 process->lead_thread = thread->group_leader; 309 310 process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE; 311 312 INIT_LIST_HEAD(&process->per_device_data); 313 314 kfd_event_init_process(process); 315 316 err = pqm_init(&process->pqm, process); 317 if (err != 0) 318 goto err_process_pqm_init; 319 320 /* init process apertures*/ 321 process->is_32bit_user_mode = in_compat_syscall(); 322 err = kfd_init_apertures(process); 323 if (err != 0) 324 goto err_init_apertures; 325 326 return process; 327 328 err_init_apertures: 329 pqm_uninit(&process->pqm); 330 err_process_pqm_init: 331 hash_del_rcu(&process->kfd_processes); 332 synchronize_rcu(); 333 mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); 334 err_mmu_notifier: 335 mutex_destroy(&process->mutex); 336 kfd_free_process_doorbells(process); 337 err_alloc_doorbells: 338 kfd_pasid_free(process->pasid); 339 err_alloc_pasid: 340 kfree(process->queues); 341 err_alloc_queues: 342 kfree(process); 343 err_alloc_process: 344 return ERR_PTR(err); 345 } 346 347 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 348 struct kfd_process *p) 349 { 350 struct kfd_process_device *pdd = NULL; 351 352 list_for_each_entry(pdd, &p->per_device_data, per_device_list) 353 if (pdd->dev == dev) 354 break; 355 356 return pdd; 357 } 358 359 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, 360 struct kfd_process *p) 361 { 362 struct kfd_process_device *pdd = NULL; 363 364 pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); 365 if (pdd != NULL) { 366 pdd->dev = dev; 367 INIT_LIST_HEAD(&pdd->qpd.queues_list); 368 INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); 369 pdd->qpd.dqm = dev->dqm; 370 pdd->reset_wavefronts = false; 371 list_add(&pdd->per_device_list, &p->per_device_data); 372 } 373 374 return pdd; 375 } 376 377 /* 378 * Direct the IOMMU to bind the process (specifically the pasid->mm) 379 * to the device. 380 * Unbinding occurs when the process dies or the device is removed. 381 * 382 * Assumes that the process lock is held. 383 */ 384 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 385 struct kfd_process *p) 386 { 387 struct kfd_process_device *pdd; 388 int err; 389 390 pdd = kfd_get_process_device_data(dev, p); 391 if (!pdd) { 392 pr_err("Process device data doesn't exist\n"); 393 return ERR_PTR(-ENOMEM); 394 } 395 396 if (pdd->bound) 397 return pdd; 398 399 err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); 400 if (err < 0) 401 return ERR_PTR(err); 402 403 pdd->bound = true; 404 405 return pdd; 406 } 407 408 void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) 409 { 410 struct kfd_process *p; 411 struct kfd_process_device *pdd; 412 413 /* 414 * Look for the process that matches the pasid. If there is no such 415 * process, we either released it in amdkfd's own notifier, or there 416 * is a bug. Unfortunately, there is no way to tell... 417 */ 418 p = kfd_lookup_process_by_pasid(pasid); 419 if (!p) 420 return; 421 422 pr_debug("Unbinding process %d from IOMMU\n", pasid); 423 424 if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) 425 kfd_dbgmgr_destroy(dev->dbgmgr); 426 427 pqm_uninit(&p->pqm); 428 429 pdd = kfd_get_process_device_data(dev, p); 430 431 if (!pdd) { 432 mutex_unlock(&p->mutex); 433 return; 434 } 435 436 if (pdd->reset_wavefronts) { 437 dbgdev_wave_reset_wavefronts(pdd->dev, p); 438 pdd->reset_wavefronts = false; 439 } 440 441 /* 442 * Just mark pdd as unbound, because we still need it 443 * to call amd_iommu_unbind_pasid() in when the 444 * process exits. 445 * We don't call amd_iommu_unbind_pasid() here 446 * because the IOMMU called us. 447 */ 448 pdd->bound = false; 449 450 mutex_unlock(&p->mutex); 451 } 452 453 struct kfd_process_device *kfd_get_first_process_device_data( 454 struct kfd_process *p) 455 { 456 return list_first_entry(&p->per_device_data, 457 struct kfd_process_device, 458 per_device_list); 459 } 460 461 struct kfd_process_device *kfd_get_next_process_device_data( 462 struct kfd_process *p, 463 struct kfd_process_device *pdd) 464 { 465 if (list_is_last(&pdd->per_device_list, &p->per_device_data)) 466 return NULL; 467 return list_next_entry(pdd, per_device_list); 468 } 469 470 bool kfd_has_process_device_data(struct kfd_process *p) 471 { 472 return !(list_empty(&p->per_device_data)); 473 } 474 475 /* This returns with process->mutex locked. */ 476 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) 477 { 478 struct kfd_process *p; 479 unsigned int temp; 480 481 int idx = srcu_read_lock(&kfd_processes_srcu); 482 483 hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { 484 if (p->pasid == pasid) { 485 mutex_lock(&p->mutex); 486 break; 487 } 488 } 489 490 srcu_read_unlock(&kfd_processes_srcu, idx); 491 492 return p; 493 } 494