1 /* 2 * Regression1 3 * Description: 4 * Salman Qazi describes the following radix-tree bug: 5 * 6 * In the following case, we get can get a deadlock: 7 * 8 * 0. The radix tree contains two items, one has the index 0. 9 * 1. The reader (in this case find_get_pages) takes the rcu_read_lock. 10 * 2. The reader acquires slot(s) for item(s) including the index 0 item. 11 * 3. The non-zero index item is deleted, and as a consequence the other item 12 * is moved to the root of the tree. The place where it used to be is queued 13 * for deletion after the readers finish. 14 * 3b. The zero item is deleted, removing it from the direct slot, it remains in 15 * the rcu-delayed indirect node. 16 * 4. The reader looks at the index 0 slot, and finds that the page has 0 ref 17 * count 18 * 5. The reader looks at it again, hoping that the item will either be freed 19 * or the ref count will increase. This never happens, as the slot it is 20 * looking at will never be updated. Also, this slot can never be reclaimed 21 * because the reader is holding rcu_read_lock and is in an infinite loop. 22 * 23 * The fix is to re-use the same "indirect" pointer case that requires a slot 24 * lookup retry into a general "retry the lookup" bit. 25 * 26 * Running: 27 * This test should run to completion in a few seconds. The above bug would 28 * cause it to hang indefinitely. 29 * 30 * Upstream commit: 31 * Not yet 32 */ 33 #include <linux/kernel.h> 34 #include <linux/gfp.h> 35 #include <linux/slab.h> 36 #include <linux/radix-tree.h> 37 #include <linux/rcupdate.h> 38 #include <stdlib.h> 39 #include <pthread.h> 40 #include <stdio.h> 41 #include <assert.h> 42 43 #include "regression.h" 44 45 static RADIX_TREE(mt_tree, GFP_KERNEL); 46 static pthread_mutex_t mt_lock; 47 48 struct page { 49 pthread_mutex_t lock; 50 struct rcu_head rcu; 51 int count; 52 unsigned long index; 53 }; 54 55 static struct page *page_alloc(void) 56 { 57 struct page *p; 58 p = malloc(sizeof(struct page)); 59 p->count = 1; 60 p->index = 1; 61 pthread_mutex_init(&p->lock, NULL); 62 63 return p; 64 } 65 66 static void page_rcu_free(struct rcu_head *rcu) 67 { 68 struct page *p = container_of(rcu, struct page, rcu); 69 assert(!p->count); 70 pthread_mutex_destroy(&p->lock); 71 free(p); 72 } 73 74 static void page_free(struct page *p) 75 { 76 call_rcu(&p->rcu, page_rcu_free); 77 } 78 79 static unsigned find_get_pages(unsigned long start, 80 unsigned int nr_pages, struct page **pages) 81 { 82 unsigned int i; 83 unsigned int ret; 84 unsigned int nr_found; 85 86 rcu_read_lock(); 87 restart: 88 nr_found = radix_tree_gang_lookup_slot(&mt_tree, 89 (void ***)pages, NULL, start, nr_pages); 90 ret = 0; 91 for (i = 0; i < nr_found; i++) { 92 struct page *page; 93 repeat: 94 page = radix_tree_deref_slot((void **)pages[i]); 95 if (unlikely(!page)) 96 continue; 97 98 if (radix_tree_exception(page)) { 99 if (radix_tree_deref_retry(page)) { 100 /* 101 * Transient condition which can only trigger 102 * when entry at index 0 moves out of or back 103 * to root: none yet gotten, safe to restart. 104 */ 105 assert((start | i) == 0); 106 goto restart; 107 } 108 /* 109 * No exceptional entries are inserted in this test. 110 */ 111 assert(0); 112 } 113 114 pthread_mutex_lock(&page->lock); 115 if (!page->count) { 116 pthread_mutex_unlock(&page->lock); 117 goto repeat; 118 } 119 /* don't actually update page refcount */ 120 pthread_mutex_unlock(&page->lock); 121 122 /* Has the page moved? */ 123 if (unlikely(page != *((void **)pages[i]))) { 124 goto repeat; 125 } 126 127 pages[ret] = page; 128 ret++; 129 } 130 rcu_read_unlock(); 131 return ret; 132 } 133 134 static pthread_barrier_t worker_barrier; 135 136 static void *regression1_fn(void *arg) 137 { 138 rcu_register_thread(); 139 140 if (pthread_barrier_wait(&worker_barrier) == 141 PTHREAD_BARRIER_SERIAL_THREAD) { 142 int j; 143 144 for (j = 0; j < 1000000; j++) { 145 struct page *p; 146 147 p = page_alloc(); 148 pthread_mutex_lock(&mt_lock); 149 radix_tree_insert(&mt_tree, 0, p); 150 pthread_mutex_unlock(&mt_lock); 151 152 p = page_alloc(); 153 pthread_mutex_lock(&mt_lock); 154 radix_tree_insert(&mt_tree, 1, p); 155 pthread_mutex_unlock(&mt_lock); 156 157 pthread_mutex_lock(&mt_lock); 158 p = radix_tree_delete(&mt_tree, 1); 159 pthread_mutex_lock(&p->lock); 160 p->count--; 161 pthread_mutex_unlock(&p->lock); 162 pthread_mutex_unlock(&mt_lock); 163 page_free(p); 164 165 pthread_mutex_lock(&mt_lock); 166 p = radix_tree_delete(&mt_tree, 0); 167 pthread_mutex_lock(&p->lock); 168 p->count--; 169 pthread_mutex_unlock(&p->lock); 170 pthread_mutex_unlock(&mt_lock); 171 page_free(p); 172 } 173 } else { 174 int j; 175 176 for (j = 0; j < 100000000; j++) { 177 struct page *pages[10]; 178 179 find_get_pages(0, 10, pages); 180 } 181 } 182 183 rcu_unregister_thread(); 184 185 return NULL; 186 } 187 188 static pthread_t *threads; 189 void regression1_test(void) 190 { 191 int nr_threads; 192 int i; 193 long arg; 194 195 /* Regression #1 */ 196 printf("running regression test 1, should finish in under a minute\n"); 197 nr_threads = 2; 198 pthread_barrier_init(&worker_barrier, NULL, nr_threads); 199 200 threads = malloc(nr_threads * sizeof(pthread_t *)); 201 202 for (i = 0; i < nr_threads; i++) { 203 arg = i; 204 if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) { 205 perror("pthread_create"); 206 exit(1); 207 } 208 } 209 210 for (i = 0; i < nr_threads; i++) { 211 if (pthread_join(threads[i], NULL)) { 212 perror("pthread_join"); 213 exit(1); 214 } 215 } 216 217 free(threads); 218 219 printf("regression test 1, done\n"); 220 } 221