1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2024 Advanced Micro Devices, Inc. 4 * 5 * Lockdep annotation for AMDGPU lock ordering 6 * 7 * This module teaches lockdep the correct lock ordering to catch 8 * potential deadlocks at development time rather than runtime. 9 * 10 * Based on dma-resv lockdep approach from: 11 * drivers/dma-buf/dma-resv.c:dma_resv_lockdep() 12 */ 13 14 #include "amdgpu.h" 15 #include "amdgpu_reset.h" 16 17 #ifdef CONFIG_LOCKDEP 18 19 /* Lock class keys for associating with real driver locks */ 20 static struct lock_class_key amdgpu_userq_sch_mutex_key; 21 static struct lock_class_key amdgpu_userq_mutex_key; 22 static struct lock_class_key amdgpu_notifier_lock_key; 23 static struct lock_class_key amdgpu_vram_lock_key; 24 static struct lock_class_key amdgpu_reset_sem_key; 25 static struct lock_class_key amdgpu_reset_lock_key; 26 static struct lock_class_key amdgpu_srbm_lock_key; 27 static struct lock_class_key amdgpu_grbm_lock_key; 28 static struct lock_class_key amdgpu_mmio_lock_key; 29 30 /** 31 * amdgpu_lockdep_set_class - Associate lock class keys with real locks 32 * @adev: AMDGPU device 33 * 34 * Call during device init to associate lock classes with actual locks 35 * so lockdep can track them properly. 36 */ 37 void amdgpu_lockdep_set_class(struct amdgpu_device *adev) 38 { 39 lockdep_set_class(&adev->gfx.userq_sch_mutex, 40 &amdgpu_userq_sch_mutex_key); 41 lockdep_set_class(&adev->notifier_lock, &amdgpu_notifier_lock_key); 42 lockdep_set_class(&adev->srbm_mutex, &amdgpu_srbm_lock_key); 43 lockdep_set_class(&adev->grbm_idx_mutex, &amdgpu_grbm_lock_key); 44 lockdep_set_class(&adev->mmio_idx_lock, &amdgpu_mmio_lock_key); 45 46 if (adev->reset_domain) 47 lockdep_set_class(&adev->reset_domain->sem, 48 &amdgpu_reset_sem_key); 49 } 50 51 /** 52 * amdgpu_lockdep_init - Teach lockdep the correct lock ordering 53 * 54 * Instantiates dummy objects and takes locks in the correct order to 55 * train lockdep. This helps catch lock ordering violations during 56 * development. 57 * 58 * Lock ordering hierarchy (outermost to innermost): 59 * 60 * 1. userq_sch_mutex - Global userq scheduler (enforce_isolation) 61 * 2. userq_mutex - Per-context userq (held across queue create/destroy) 62 * 3. notifier_lock - MMU notifier lock 63 * 4. vram_lock - VRAM allocator lock 64 * 5. reset_domain->sem - GPU reset synchronization 65 * 6. reset_lock - Reset control lock 66 * 7. srbm_mutex - SRBM register access 67 * 8. grbm_idx_mutex - GRBM index access 68 * 9. mmio_idx_lock - MMIO index access (spinlock) 69 * 70 * Evidence: 71 * - userq_sch_mutex -> userq_mutex: amdgpu_gfx_kfd_sch_ctrl() calls 72 * amdgpu_userq_stop_sched_for_enforce_isolation() which takes userq_mutex 73 * - userq_mutex -> notifier_lock: userq paths may trigger MMU notifier 74 * invalidation which acquires notifier_lock 75 * - notifier_lock -> reset_domain->sem: HMM invalidation callback holds 76 * notifier_lock and can wait for GPU reset completion, so notifier_lock 77 * must be outer to reset_domain->sem 78 * - vram_lock -> reset_domain->sem: VRAM management paths may need to 79 * wait for ongoing reset to complete 80 * 81 * Note: mmap_lock ordering relative to GPU locks is already taught 82 * by dma-resv (drivers/dma-buf/dma-resv.c). 83 */ 84 int amdgpu_lockdep_init(void) 85 { 86 struct amdgpu_reset_domain *reset_domain = NULL; 87 struct amdgpu_reset_control reset_ctl; 88 struct mutex userq_sch_mutex; 89 struct mutex userq_mutex; 90 struct mutex notifier_lock; 91 struct mutex vram_lock; 92 struct mutex srbm_mutex; 93 struct mutex grbm_idx_mutex; 94 spinlock_t mmio_idx_lock; 95 unsigned long flags; 96 97 /* 98 * Initialize dummy reset domain 99 */ 100 reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, 101 "lockdep_test"); 102 if (!reset_domain) 103 return -ENOMEM; 104 105 /* Initialize dummy locks */ 106 mutex_init(&userq_sch_mutex); 107 mutex_init(&userq_mutex); 108 mutex_init(¬ifier_lock); 109 mutex_init(&vram_lock); 110 mutex_init(&reset_ctl.reset_lock); 111 mutex_init(&srbm_mutex); 112 mutex_init(&grbm_idx_mutex); 113 spin_lock_init(&mmio_idx_lock); 114 115 /* 116 * Associate dummy locks with the same class keys used for real 117 * driver locks. This ensures lockdep connects the ordering learned 118 * here with the actual locks used at runtime. 119 */ 120 lockdep_set_class(&userq_sch_mutex, &amdgpu_userq_sch_mutex_key); 121 lockdep_set_class(&userq_mutex, &amdgpu_userq_mutex_key); 122 lockdep_set_class(¬ifier_lock, &amdgpu_notifier_lock_key); 123 lockdep_set_class(&vram_lock, &amdgpu_vram_lock_key); 124 lockdep_set_class(&reset_domain->sem, &amdgpu_reset_sem_key); 125 lockdep_set_class(&reset_ctl.reset_lock, &amdgpu_reset_lock_key); 126 lockdep_set_class(&srbm_mutex, &amdgpu_srbm_lock_key); 127 lockdep_set_class(&grbm_idx_mutex, &amdgpu_grbm_lock_key); 128 lockdep_set_class(&mmio_idx_lock, &amdgpu_mmio_lock_key); 129 130 /* 131 * Take locks in the correct order to train lockdep. 132 * This establishes the dependency chain. 133 */ 134 135 /* Level 1: Global userq scheduler mutex (outermost) */ 136 mutex_lock(&userq_sch_mutex); 137 138 /* Level 2: Per-context userq mutex */ 139 mutex_lock(&userq_mutex); 140 141 /* Level 3: MMU notifier lock */ 142 mutex_lock(¬ifier_lock); 143 144 /* Level 4: VRAM allocator lock */ 145 mutex_lock(&vram_lock); 146 147 /* Level 5: Reset domain semaphore */ 148 down_read(&reset_domain->sem); 149 150 /* Level 6: Reset control lock */ 151 mutex_lock(&reset_ctl.reset_lock); 152 153 /* 154 * Mark potential memory reclaim boundary. 155 * GPU operations might trigger memory allocation/reclaim. 156 */ 157 fs_reclaim_acquire(GFP_KERNEL); 158 159 /* Level 7: SRBM register access */ 160 mutex_lock(&srbm_mutex); 161 162 /* Level 8: GRBM index access */ 163 mutex_lock(&grbm_idx_mutex); 164 165 /* Level 9: MMIO index access (innermost lock, spinlock) */ 166 spin_lock_irqsave(&mmio_idx_lock, flags); 167 168 /* 169 * All locks acquired in order. 170 * Lockdep has now learned the valid dependency chain. 171 */ 172 173 /* Release in reverse order */ 174 spin_unlock_irqrestore(&mmio_idx_lock, flags); 175 mutex_unlock(&grbm_idx_mutex); 176 mutex_unlock(&srbm_mutex); 177 178 fs_reclaim_release(GFP_KERNEL); 179 180 mutex_unlock(&reset_ctl.reset_lock); 181 up_read(&reset_domain->sem); 182 mutex_unlock(&vram_lock); 183 mutex_unlock(¬ifier_lock); 184 mutex_unlock(&userq_mutex); 185 mutex_unlock(&userq_sch_mutex); 186 187 /* Cleanup */ 188 amdgpu_reset_put_reset_domain(reset_domain); 189 190 pr_info("AMDGPU: Lockdep annotations initialized (9 lock levels)\n"); 191 192 return 0; 193 } 194 195 #endif /* CONFIG_LOCKDEP */ 196