1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <pthread.h> 4 #include <sys/shm.h> 5 #include <sys/mman.h> 6 #include <fcntl.h> 7 #include <stdbool.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <time.h> 11 #include <assert.h> 12 #include "futextest.h" 13 #include "futex2test.h" 14 15 typedef u_int32_t u32; 16 typedef int32_t s32; 17 typedef u_int64_t u64; 18 19 static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE); 20 static int fnode = FUTEX_NO_NODE; 21 22 /* fairly stupid test-and-set lock with a waiter flag */ 23 24 #define N_LOCK 0x0000001 25 #define N_WAITERS 0x0001000 26 27 struct futex_numa_32 { 28 union { 29 u64 full; 30 struct { 31 u32 val; 32 u32 node; 33 }; 34 }; 35 }; 36 37 void futex_numa_32_lock(struct futex_numa_32 *lock) 38 { 39 for (;;) { 40 struct futex_numa_32 new, old = { 41 .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED), 42 }; 43 44 for (;;) { 45 new = old; 46 if (old.val == 0) { 47 /* no waiter, no lock -> first lock, set no-node */ 48 new.node = fnode; 49 } 50 if (old.val & N_LOCK) { 51 /* contention, set waiter */ 52 new.val |= N_WAITERS; 53 } 54 new.val |= N_LOCK; 55 56 /* nothing changed, ready to block */ 57 if (old.full == new.full) 58 break; 59 60 /* 61 * Use u64 cmpxchg to set the futex value and node in a 62 * consistent manner. 63 */ 64 if (__atomic_compare_exchange_n(&lock->full, 65 &old.full, new.full, 66 /* .weak */ false, 67 __ATOMIC_ACQUIRE, 68 __ATOMIC_RELAXED)) { 69 70 /* if we just set N_LOCK, we own it */ 71 if (!(old.val & N_LOCK)) 72 return; 73 74 /* go block */ 75 break; 76 } 77 } 78 79 futex2_wait(lock, new.val, fflags, NULL, 0); 80 } 81 } 82 83 void futex_numa_32_unlock(struct futex_numa_32 *lock) 84 { 85 u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE); 86 assert((s32)val >= 0); 87 if (val & N_WAITERS) { 88 int woken = futex2_wake(lock, 1, fflags); 89 assert(val == N_WAITERS); 90 if (!woken) { 91 __atomic_compare_exchange_n(&lock->val, &val, 0U, 92 false, __ATOMIC_RELAXED, 93 __ATOMIC_RELAXED); 94 } 95 } 96 } 97 98 static long nanos = 50000; 99 100 struct thread_args { 101 pthread_t tid; 102 volatile int * done; 103 struct futex_numa_32 *lock; 104 int val; 105 int *val1, *val2; 106 int node; 107 }; 108 109 static void *threadfn(void *_arg) 110 { 111 struct thread_args *args = _arg; 112 struct timespec ts = { 113 .tv_nsec = nanos, 114 }; 115 int node; 116 117 while (!*args->done) { 118 119 futex_numa_32_lock(args->lock); 120 args->val++; 121 122 assert(*args->val1 == *args->val2); 123 (*args->val1)++; 124 nanosleep(&ts, NULL); 125 (*args->val2)++; 126 127 node = args->lock->node; 128 futex_numa_32_unlock(args->lock); 129 130 if (node != args->node) { 131 args->node = node; 132 printf("node: %d\n", node); 133 } 134 135 nanosleep(&ts, NULL); 136 } 137 138 return NULL; 139 } 140 141 static void *contendfn(void *_arg) 142 { 143 struct thread_args *args = _arg; 144 145 while (!*args->done) { 146 /* 147 * futex2_wait() will take hb-lock, verify *var == val and 148 * queue/abort. By knowingly setting val 'wrong' this will 149 * abort and thereby generate hb-lock contention. 150 */ 151 futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0); 152 args->val++; 153 } 154 155 return NULL; 156 } 157 158 static volatile int done = 0; 159 static struct futex_numa_32 lock = { .val = 0, }; 160 static int val1, val2; 161 162 int main(int argc, char *argv[]) 163 { 164 struct thread_args *tas[512], *cas[512]; 165 int c, t, threads = 2, contenders = 0; 166 int sleeps = 10; 167 int total = 0; 168 169 while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) { 170 switch (c) { 171 case 'c': 172 contenders = atoi(optarg); 173 break; 174 case 't': 175 threads = atoi(optarg); 176 break; 177 case 's': 178 sleeps = atoi(optarg); 179 break; 180 case 'n': 181 nanos = atoi(optarg); 182 break; 183 case 'N': 184 fflags |= FUTEX2_NUMA; 185 if (optarg) 186 fnode = atoi(optarg); 187 break; 188 default: 189 exit(1); 190 break; 191 } 192 } 193 194 for (t = 0; t < contenders; t++) { 195 struct thread_args *args = calloc(1, sizeof(*args)); 196 if (!args) { 197 perror("thread_args"); 198 exit(-1); 199 } 200 201 args->done = &done; 202 args->lock = &lock; 203 args->val1 = &val1; 204 args->val2 = &val2; 205 args->node = -1; 206 207 if (pthread_create(&args->tid, NULL, contendfn, args)) { 208 perror("pthread_create"); 209 exit(-1); 210 } 211 212 cas[t] = args; 213 } 214 215 for (t = 0; t < threads; t++) { 216 struct thread_args *args = calloc(1, sizeof(*args)); 217 if (!args) { 218 perror("thread_args"); 219 exit(-1); 220 } 221 222 args->done = &done; 223 args->lock = &lock; 224 args->val1 = &val1; 225 args->val2 = &val2; 226 args->node = -1; 227 228 if (pthread_create(&args->tid, NULL, threadfn, args)) { 229 perror("pthread_create"); 230 exit(-1); 231 } 232 233 tas[t] = args; 234 } 235 236 sleep(sleeps); 237 238 done = true; 239 240 for (t = 0; t < threads; t++) { 241 struct thread_args *args = tas[t]; 242 243 pthread_join(args->tid, NULL); 244 total += args->val; 245 // printf("tval: %d\n", args->val); 246 } 247 printf("total: %d\n", total); 248 249 if (contenders) { 250 total = 0; 251 for (t = 0; t < contenders; t++) { 252 struct thread_args *args = cas[t]; 253 254 pthread_join(args->tid, NULL); 255 total += args->val; 256 // printf("tval: %d\n", args->val); 257 } 258 printf("contenders: %d\n", total); 259 } 260 261 return 0; 262 } 263 264