1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <pthread.h>
4 #include <sys/shm.h>
5 #include <sys/mman.h>
6 #include <fcntl.h>
7 #include <stdbool.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <time.h>
11 #include <assert.h>
12 #include "futextest.h"
13 #include "futex2test.h"
14
15 typedef u_int32_t u32;
16 typedef int32_t s32;
17 typedef u_int64_t u64;
18
19 static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
20 static int fnode = FUTEX_NO_NODE;
21
22 /* fairly stupid test-and-set lock with a waiter flag */
23
24 #define N_LOCK 0x0000001
25 #define N_WAITERS 0x0001000
26
27 struct futex_numa_32 {
28 union {
29 u64 full;
30 struct {
31 u32 val;
32 u32 node;
33 };
34 };
35 };
36
futex_numa_32_lock(struct futex_numa_32 * lock)37 void futex_numa_32_lock(struct futex_numa_32 *lock)
38 {
39 for (;;) {
40 struct futex_numa_32 new, old = {
41 .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
42 };
43
44 for (;;) {
45 new = old;
46 if (old.val == 0) {
47 /* no waiter, no lock -> first lock, set no-node */
48 new.node = fnode;
49 }
50 if (old.val & N_LOCK) {
51 /* contention, set waiter */
52 new.val |= N_WAITERS;
53 }
54 new.val |= N_LOCK;
55
56 /* nothing changed, ready to block */
57 if (old.full == new.full)
58 break;
59
60 /*
61 * Use u64 cmpxchg to set the futex value and node in a
62 * consistent manner.
63 */
64 if (__atomic_compare_exchange_n(&lock->full,
65 &old.full, new.full,
66 /* .weak */ false,
67 __ATOMIC_ACQUIRE,
68 __ATOMIC_RELAXED)) {
69
70 /* if we just set N_LOCK, we own it */
71 if (!(old.val & N_LOCK))
72 return;
73
74 /* go block */
75 break;
76 }
77 }
78
79 futex2_wait(lock, new.val, fflags, NULL, 0);
80 }
81 }
82
futex_numa_32_unlock(struct futex_numa_32 * lock)83 void futex_numa_32_unlock(struct futex_numa_32 *lock)
84 {
85 u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
86 assert((s32)val >= 0);
87 if (val & N_WAITERS) {
88 int woken = futex2_wake(lock, 1, fflags);
89 assert(val == N_WAITERS);
90 if (!woken) {
91 __atomic_compare_exchange_n(&lock->val, &val, 0U,
92 false, __ATOMIC_RELAXED,
93 __ATOMIC_RELAXED);
94 }
95 }
96 }
97
98 static long nanos = 50000;
99
100 struct thread_args {
101 pthread_t tid;
102 volatile int * done;
103 struct futex_numa_32 *lock;
104 int val;
105 int *val1, *val2;
106 int node;
107 };
108
threadfn(void * _arg)109 static void *threadfn(void *_arg)
110 {
111 struct thread_args *args = _arg;
112 struct timespec ts = {
113 .tv_nsec = nanos,
114 };
115 int node;
116
117 while (!*args->done) {
118
119 futex_numa_32_lock(args->lock);
120 args->val++;
121
122 assert(*args->val1 == *args->val2);
123 (*args->val1)++;
124 nanosleep(&ts, NULL);
125 (*args->val2)++;
126
127 node = args->lock->node;
128 futex_numa_32_unlock(args->lock);
129
130 if (node != args->node) {
131 args->node = node;
132 printf("node: %d\n", node);
133 }
134
135 nanosleep(&ts, NULL);
136 }
137
138 return NULL;
139 }
140
contendfn(void * _arg)141 static void *contendfn(void *_arg)
142 {
143 struct thread_args *args = _arg;
144
145 while (!*args->done) {
146 /*
147 * futex2_wait() will take hb-lock, verify *var == val and
148 * queue/abort. By knowingly setting val 'wrong' this will
149 * abort and thereby generate hb-lock contention.
150 */
151 futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0);
152 args->val++;
153 }
154
155 return NULL;
156 }
157
158 static volatile int done = 0;
159 static struct futex_numa_32 lock = { .val = 0, };
160 static int val1, val2;
161
main(int argc,char * argv[])162 int main(int argc, char *argv[])
163 {
164 struct thread_args *tas[512], *cas[512];
165 int c, t, threads = 2, contenders = 0;
166 int sleeps = 10;
167 int total = 0;
168
169 while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
170 switch (c) {
171 case 'c':
172 contenders = atoi(optarg);
173 break;
174 case 't':
175 threads = atoi(optarg);
176 break;
177 case 's':
178 sleeps = atoi(optarg);
179 break;
180 case 'n':
181 nanos = atoi(optarg);
182 break;
183 case 'N':
184 fflags |= FUTEX2_NUMA;
185 if (optarg)
186 fnode = atoi(optarg);
187 break;
188 default:
189 exit(1);
190 break;
191 }
192 }
193
194 for (t = 0; t < contenders; t++) {
195 struct thread_args *args = calloc(1, sizeof(*args));
196 if (!args) {
197 perror("thread_args");
198 exit(-1);
199 }
200
201 args->done = &done;
202 args->lock = &lock;
203 args->val1 = &val1;
204 args->val2 = &val2;
205 args->node = -1;
206
207 if (pthread_create(&args->tid, NULL, contendfn, args)) {
208 perror("pthread_create");
209 exit(-1);
210 }
211
212 cas[t] = args;
213 }
214
215 for (t = 0; t < threads; t++) {
216 struct thread_args *args = calloc(1, sizeof(*args));
217 if (!args) {
218 perror("thread_args");
219 exit(-1);
220 }
221
222 args->done = &done;
223 args->lock = &lock;
224 args->val1 = &val1;
225 args->val2 = &val2;
226 args->node = -1;
227
228 if (pthread_create(&args->tid, NULL, threadfn, args)) {
229 perror("pthread_create");
230 exit(-1);
231 }
232
233 tas[t] = args;
234 }
235
236 sleep(sleeps);
237
238 done = true;
239
240 for (t = 0; t < threads; t++) {
241 struct thread_args *args = tas[t];
242
243 pthread_join(args->tid, NULL);
244 total += args->val;
245 // printf("tval: %d\n", args->val);
246 }
247 printf("total: %d\n", total);
248
249 if (contenders) {
250 total = 0;
251 for (t = 0; t < contenders; t++) {
252 struct thread_args *args = cas[t];
253
254 pthread_join(args->tid, NULL);
255 total += args->val;
256 // printf("tval: %d\n", args->val);
257 }
258 printf("contenders: %d\n", total);
259 }
260
261 return 0;
262 }
263
264