1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <pthread.h> 4 #include <sys/shm.h> 5 #include <sys/mman.h> 6 #include <fcntl.h> 7 #include <stdbool.h> 8 #include <time.h> 9 #include <assert.h> 10 #include "logging.h" 11 #include "futextest.h" 12 #include "futex2test.h" 13 14 typedef u_int32_t u32; 15 typedef int32_t s32; 16 typedef u_int64_t u64; 17 18 static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE); 19 static int fnode = FUTEX_NO_NODE; 20 21 /* fairly stupid test-and-set lock with a waiter flag */ 22 23 #define N_LOCK 0x0000001 24 #define N_WAITERS 0x0001000 25 26 struct futex_numa_32 { 27 union { 28 u64 full; 29 struct { 30 u32 val; 31 u32 node; 32 }; 33 }; 34 }; 35 36 void futex_numa_32_lock(struct futex_numa_32 *lock) 37 { 38 for (;;) { 39 struct futex_numa_32 new, old = { 40 .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED), 41 }; 42 43 for (;;) { 44 new = old; 45 if (old.val == 0) { 46 /* no waiter, no lock -> first lock, set no-node */ 47 new.node = fnode; 48 } 49 if (old.val & N_LOCK) { 50 /* contention, set waiter */ 51 new.val |= N_WAITERS; 52 } 53 new.val |= N_LOCK; 54 55 /* nothing changed, ready to block */ 56 if (old.full == new.full) 57 break; 58 59 /* 60 * Use u64 cmpxchg to set the futex value and node in a 61 * consistent manner. 62 */ 63 if (__atomic_compare_exchange_n(&lock->full, 64 &old.full, new.full, 65 /* .weak */ false, 66 __ATOMIC_ACQUIRE, 67 __ATOMIC_RELAXED)) { 68 69 /* if we just set N_LOCK, we own it */ 70 if (!(old.val & N_LOCK)) 71 return; 72 73 /* go block */ 74 break; 75 } 76 } 77 78 futex2_wait(lock, new.val, fflags, NULL, 0); 79 } 80 } 81 82 void futex_numa_32_unlock(struct futex_numa_32 *lock) 83 { 84 u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE); 85 assert((s32)val >= 0); 86 if (val & N_WAITERS) { 87 int woken = futex2_wake(lock, 1, fflags); 88 assert(val == N_WAITERS); 89 if (!woken) { 90 __atomic_compare_exchange_n(&lock->val, &val, 0U, 91 false, __ATOMIC_RELAXED, 92 __ATOMIC_RELAXED); 93 } 94 } 95 } 96 97 static long nanos = 50000; 98 99 struct thread_args { 100 pthread_t tid; 101 volatile int * done; 102 struct futex_numa_32 *lock; 103 int val; 104 int *val1, *val2; 105 int node; 106 }; 107 108 static void *threadfn(void *_arg) 109 { 110 struct thread_args *args = _arg; 111 struct timespec ts = { 112 .tv_nsec = nanos, 113 }; 114 int node; 115 116 while (!*args->done) { 117 118 futex_numa_32_lock(args->lock); 119 args->val++; 120 121 assert(*args->val1 == *args->val2); 122 (*args->val1)++; 123 nanosleep(&ts, NULL); 124 (*args->val2)++; 125 126 node = args->lock->node; 127 futex_numa_32_unlock(args->lock); 128 129 if (node != args->node) { 130 args->node = node; 131 printf("node: %d\n", node); 132 } 133 134 nanosleep(&ts, NULL); 135 } 136 137 return NULL; 138 } 139 140 static void *contendfn(void *_arg) 141 { 142 struct thread_args *args = _arg; 143 144 while (!*args->done) { 145 /* 146 * futex2_wait() will take hb-lock, verify *var == val and 147 * queue/abort. By knowingly setting val 'wrong' this will 148 * abort and thereby generate hb-lock contention. 149 */ 150 futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0); 151 args->val++; 152 } 153 154 return NULL; 155 } 156 157 static volatile int done = 0; 158 static struct futex_numa_32 lock = { .val = 0, }; 159 static int val1, val2; 160 161 int main(int argc, char *argv[]) 162 { 163 struct thread_args *tas[512], *cas[512]; 164 int c, t, threads = 2, contenders = 0; 165 int sleeps = 10; 166 int total = 0; 167 168 while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) { 169 switch (c) { 170 case 'c': 171 contenders = atoi(optarg); 172 break; 173 case 't': 174 threads = atoi(optarg); 175 break; 176 case 's': 177 sleeps = atoi(optarg); 178 break; 179 case 'n': 180 nanos = atoi(optarg); 181 break; 182 case 'N': 183 fflags |= FUTEX2_NUMA; 184 if (optarg) 185 fnode = atoi(optarg); 186 break; 187 default: 188 exit(1); 189 break; 190 } 191 } 192 193 for (t = 0; t < contenders; t++) { 194 struct thread_args *args = calloc(1, sizeof(*args)); 195 if (!args) { 196 perror("thread_args"); 197 exit(-1); 198 } 199 200 args->done = &done; 201 args->lock = &lock; 202 args->val1 = &val1; 203 args->val2 = &val2; 204 args->node = -1; 205 206 if (pthread_create(&args->tid, NULL, contendfn, args)) { 207 perror("pthread_create"); 208 exit(-1); 209 } 210 211 cas[t] = args; 212 } 213 214 for (t = 0; t < threads; t++) { 215 struct thread_args *args = calloc(1, sizeof(*args)); 216 if (!args) { 217 perror("thread_args"); 218 exit(-1); 219 } 220 221 args->done = &done; 222 args->lock = &lock; 223 args->val1 = &val1; 224 args->val2 = &val2; 225 args->node = -1; 226 227 if (pthread_create(&args->tid, NULL, threadfn, args)) { 228 perror("pthread_create"); 229 exit(-1); 230 } 231 232 tas[t] = args; 233 } 234 235 sleep(sleeps); 236 237 done = true; 238 239 for (t = 0; t < threads; t++) { 240 struct thread_args *args = tas[t]; 241 242 pthread_join(args->tid, NULL); 243 total += args->val; 244 // printf("tval: %d\n", args->val); 245 } 246 printf("total: %d\n", total); 247 248 if (contenders) { 249 total = 0; 250 for (t = 0; t < contenders; t++) { 251 struct thread_args *args = cas[t]; 252 253 pthread_join(args->tid, NULL); 254 total += args->val; 255 // printf("tval: %d\n", args->val); 256 } 257 printf("contenders: %d\n", total); 258 } 259 260 return 0; 261 } 262 263