xref: /linux/tools/testing/selftests/futex/functional/futex_numa.c (revision 55a42f78ffd386e01a5404419f8c5ded7db70a21)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <pthread.h>
4 #include <sys/shm.h>
5 #include <sys/mman.h>
6 #include <fcntl.h>
7 #include <stdbool.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <time.h>
11 #include <assert.h>
12 #include "futextest.h"
13 #include "futex2test.h"
14 
15 typedef u_int32_t u32;
16 typedef int32_t   s32;
17 typedef u_int64_t u64;
18 
19 static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
20 static int fnode = FUTEX_NO_NODE;
21 
22 /* fairly stupid test-and-set lock with a waiter flag */
23 
24 #define N_LOCK		0x0000001
25 #define N_WAITERS	0x0001000
26 
27 struct futex_numa_32 {
28 	union {
29 		u64 full;
30 		struct {
31 			u32 val;
32 			u32 node;
33 		};
34 	};
35 };
36 
37 void futex_numa_32_lock(struct futex_numa_32 *lock)
38 {
39 	for (;;) {
40 		struct futex_numa_32 new, old = {
41 			.full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
42 		};
43 
44 		for (;;) {
45 			new = old;
46 			if (old.val == 0) {
47 				/* no waiter, no lock -> first lock, set no-node */
48 				new.node = fnode;
49 			}
50 			if (old.val & N_LOCK) {
51 				/* contention, set waiter */
52 				new.val |= N_WAITERS;
53 			}
54 			new.val |= N_LOCK;
55 
56 			/* nothing changed, ready to block */
57 			if (old.full == new.full)
58 				break;
59 
60 			/*
61 			 * Use u64 cmpxchg to set the futex value and node in a
62 			 * consistent manner.
63 			 */
64 			if (__atomic_compare_exchange_n(&lock->full,
65 							&old.full, new.full,
66 							/* .weak */ false,
67 							__ATOMIC_ACQUIRE,
68 							__ATOMIC_RELAXED)) {
69 
70 				/* if we just set N_LOCK, we own it */
71 				if (!(old.val & N_LOCK))
72 					return;
73 
74 				/* go block */
75 				break;
76 			}
77 		}
78 
79 		futex2_wait(lock, new.val, fflags, NULL, 0);
80 	}
81 }
82 
83 void futex_numa_32_unlock(struct futex_numa_32 *lock)
84 {
85 	u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
86 	assert((s32)val >= 0);
87 	if (val & N_WAITERS) {
88 		int woken = futex2_wake(lock, 1, fflags);
89 		assert(val == N_WAITERS);
90 		if (!woken) {
91 			__atomic_compare_exchange_n(&lock->val, &val, 0U,
92 						    false, __ATOMIC_RELAXED,
93 						    __ATOMIC_RELAXED);
94 		}
95 	}
96 }
97 
98 static long nanos = 50000;
99 
100 struct thread_args {
101 	pthread_t tid;
102 	volatile int * done;
103 	struct futex_numa_32 *lock;
104 	int val;
105 	int *val1, *val2;
106 	int node;
107 };
108 
109 static void *threadfn(void *_arg)
110 {
111 	struct thread_args *args = _arg;
112 	struct timespec ts = {
113 		.tv_nsec = nanos,
114 	};
115 	int node;
116 
117 	while (!*args->done) {
118 
119 		futex_numa_32_lock(args->lock);
120 		args->val++;
121 
122 		assert(*args->val1 == *args->val2);
123 		(*args->val1)++;
124 		nanosleep(&ts, NULL);
125 		(*args->val2)++;
126 
127 		node = args->lock->node;
128 		futex_numa_32_unlock(args->lock);
129 
130 		if (node != args->node) {
131 			args->node = node;
132 			printf("node: %d\n", node);
133 		}
134 
135 		nanosleep(&ts, NULL);
136 	}
137 
138 	return NULL;
139 }
140 
141 static void *contendfn(void *_arg)
142 {
143 	struct thread_args *args = _arg;
144 
145 	while (!*args->done) {
146 		/*
147 		 * futex2_wait() will take hb-lock, verify *var == val and
148 		 * queue/abort.  By knowingly setting val 'wrong' this will
149 		 * abort and thereby generate hb-lock contention.
150 		 */
151 		futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0);
152 		args->val++;
153 	}
154 
155 	return NULL;
156 }
157 
158 static volatile int done = 0;
159 static struct futex_numa_32 lock = { .val = 0, };
160 static int val1, val2;
161 
162 int main(int argc, char *argv[])
163 {
164 	struct thread_args *tas[512], *cas[512];
165 	int c, t, threads = 2, contenders = 0;
166 	int sleeps = 10;
167 	int total = 0;
168 
169 	while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
170 		switch (c) {
171 		case 'c':
172 			contenders = atoi(optarg);
173 			break;
174 		case 't':
175 			threads = atoi(optarg);
176 			break;
177 		case 's':
178 			sleeps = atoi(optarg);
179 			break;
180 		case 'n':
181 			nanos = atoi(optarg);
182 			break;
183 		case 'N':
184 			fflags |= FUTEX2_NUMA;
185 			if (optarg)
186 				fnode = atoi(optarg);
187 			break;
188 		default:
189 			exit(1);
190 			break;
191 		}
192 	}
193 
194 	for (t = 0; t < contenders; t++) {
195 		struct thread_args *args = calloc(1, sizeof(*args));
196 		if (!args) {
197 			perror("thread_args");
198 			exit(-1);
199 		}
200 
201 		args->done = &done;
202 		args->lock = &lock;
203 		args->val1 = &val1;
204 		args->val2 = &val2;
205 		args->node = -1;
206 
207 		if (pthread_create(&args->tid, NULL, contendfn, args)) {
208 			perror("pthread_create");
209 			exit(-1);
210 		}
211 
212 		cas[t] = args;
213 	}
214 
215 	for (t = 0; t < threads; t++) {
216 		struct thread_args *args = calloc(1, sizeof(*args));
217 		if (!args) {
218 			perror("thread_args");
219 			exit(-1);
220 		}
221 
222 		args->done = &done;
223 		args->lock = &lock;
224 		args->val1 = &val1;
225 		args->val2 = &val2;
226 		args->node = -1;
227 
228 		if (pthread_create(&args->tid, NULL, threadfn, args)) {
229 			perror("pthread_create");
230 			exit(-1);
231 		}
232 
233 		tas[t] = args;
234 	}
235 
236 	sleep(sleeps);
237 
238 	done = true;
239 
240 	for (t = 0; t < threads; t++) {
241 		struct thread_args *args = tas[t];
242 
243 		pthread_join(args->tid, NULL);
244 		total += args->val;
245 //		printf("tval: %d\n", args->val);
246 	}
247 	printf("total: %d\n", total);
248 
249 	if (contenders) {
250 		total = 0;
251 		for (t = 0; t < contenders; t++) {
252 			struct thread_args *args = cas[t];
253 
254 			pthread_join(args->tid, NULL);
255 			total += args->val;
256 			//		printf("tval: %d\n", args->val);
257 		}
258 		printf("contenders: %d\n", total);
259 	}
260 
261 	return 0;
262 }
263 
264