18fdc9ce9SDmitry Chagin /* $Id: avx_sig.c,v 1.12 2021/12/11 22:47:09 kostik Exp $ */
28fdc9ce9SDmitry Chagin /*
38fdc9ce9SDmitry Chagin * Naive test to check that context switches and signal delivery do
48fdc9ce9SDmitry Chagin * not corrupt AVX registers file (%xmm). Run until some
58fdc9ce9SDmitry Chagin * inconsistency detected, then aborts.
68fdc9ce9SDmitry Chagin *
78fdc9ce9SDmitry Chagin * FreeBSD:
88fdc9ce9SDmitry Chagin * ${CC} -Wall -Wextra -O -g -o avx_sig avx_sig.c -lpthread
98fdc9ce9SDmitry Chagin * Linux
108fdc9ce9SDmitry Chagin * ${CC} -D_GNU_SOURCE -Wall -Wextra -O -g -o avx_sig avx_sig.c -lbsd -lpthread
118fdc9ce9SDmitry Chagin */
128fdc9ce9SDmitry Chagin
138fdc9ce9SDmitry Chagin #include <sys/param.h>
148fdc9ce9SDmitry Chagin #include <sys/time.h>
158fdc9ce9SDmitry Chagin #include <sys/resource.h>
168fdc9ce9SDmitry Chagin #include <sys/syscall.h>
178fdc9ce9SDmitry Chagin #include <errno.h>
188fdc9ce9SDmitry Chagin #include <pthread.h>
198fdc9ce9SDmitry Chagin #ifdef __FreeBSD__
208fdc9ce9SDmitry Chagin #include <pthread_np.h>
218fdc9ce9SDmitry Chagin #endif
228fdc9ce9SDmitry Chagin #ifdef __linux__
238fdc9ce9SDmitry Chagin #ifdef __GLIBC__
248fdc9ce9SDmitry Chagin #include <gnu/libc-version.h>
258fdc9ce9SDmitry Chagin #endif
268fdc9ce9SDmitry Chagin #if !defined(__GLIBC__) || (__GLIBC__ * 100 + __GLIBC_MINOR__) < 236
278fdc9ce9SDmitry Chagin #include <bsd/stdlib.h>
288fdc9ce9SDmitry Chagin #endif
298fdc9ce9SDmitry Chagin #endif
308fdc9ce9SDmitry Chagin #include <signal.h>
318fdc9ce9SDmitry Chagin #include <stdatomic.h>
328fdc9ce9SDmitry Chagin #include <stdbool.h>
338fdc9ce9SDmitry Chagin #include <stdint.h>
348fdc9ce9SDmitry Chagin #include <stdio.h>
358fdc9ce9SDmitry Chagin #include <stdlib.h>
368fdc9ce9SDmitry Chagin #include <string.h>
378fdc9ce9SDmitry Chagin #include <unistd.h>
388fdc9ce9SDmitry Chagin
398fdc9ce9SDmitry Chagin /* SIGALRM interval in seconds. */
408fdc9ce9SDmitry Chagin #ifndef TIMO
418fdc9ce9SDmitry Chagin #define TIMO 5
428fdc9ce9SDmitry Chagin #endif
438fdc9ce9SDmitry Chagin
448fdc9ce9SDmitry Chagin #ifndef __unused
458fdc9ce9SDmitry Chagin #define __unused __attribute__((__unused__))
468fdc9ce9SDmitry Chagin #endif
470be13a45SDmitry Chagin
480be13a45SDmitry Chagin struct xregs_bank {
490be13a45SDmitry Chagin const char *b_name;
500be13a45SDmitry Chagin const char *r_name;
510be13a45SDmitry Chagin uint32_t regs;
520be13a45SDmitry Chagin uint32_t bytes;
530be13a45SDmitry Chagin void (*x2c)(uint8_t *);
540be13a45SDmitry Chagin void (*c2x)(uint8_t *);
550be13a45SDmitry Chagin };
568fdc9ce9SDmitry Chagin
57*c8dbef44SDmitry Chagin int xregs_banks_max(void);
58*c8dbef44SDmitry Chagin
598fdc9ce9SDmitry Chagin #if defined(__amd64__)
600be13a45SDmitry Chagin void cpu_to_xmm(uint8_t *);
610be13a45SDmitry Chagin void xmm_to_cpu(uint8_t *);
62*c8dbef44SDmitry Chagin void cpu_to_avx(uint8_t *);
63*c8dbef44SDmitry Chagin void avx_to_cpu(uint8_t *);
640be13a45SDmitry Chagin
650be13a45SDmitry Chagin static const struct xregs_bank xregs_banks[] = {
660be13a45SDmitry Chagin {
670be13a45SDmitry Chagin .b_name = "SSE",
680be13a45SDmitry Chagin .r_name = "xmm",
690be13a45SDmitry Chagin .regs = 16,
700be13a45SDmitry Chagin .bytes = 16,
710be13a45SDmitry Chagin .x2c = xmm_to_cpu,
720be13a45SDmitry Chagin .c2x = cpu_to_xmm,
730be13a45SDmitry Chagin },
74*c8dbef44SDmitry Chagin {
75*c8dbef44SDmitry Chagin .b_name = "AVX",
76*c8dbef44SDmitry Chagin .r_name = "ymm",
77*c8dbef44SDmitry Chagin .regs = 16,
78*c8dbef44SDmitry Chagin .bytes = 32,
79*c8dbef44SDmitry Chagin .x2c = avx_to_cpu,
80*c8dbef44SDmitry Chagin .c2x = cpu_to_avx,
81*c8dbef44SDmitry Chagin },
820be13a45SDmitry Chagin };
838fdc9ce9SDmitry Chagin #elif defined(__aarch64__)
840be13a45SDmitry Chagin void cpu_to_vfp(uint8_t *);
850be13a45SDmitry Chagin void vfp_to_cpu(uint8_t *);
860be13a45SDmitry Chagin
870be13a45SDmitry Chagin static const struct xregs_bank xregs_banks[] = {
880be13a45SDmitry Chagin {
890be13a45SDmitry Chagin .b_name = "VFP",
900be13a45SDmitry Chagin .r_name = "q",
910be13a45SDmitry Chagin .regs = 32,
920be13a45SDmitry Chagin .bytes = 16,
930be13a45SDmitry Chagin .x2c = vfp_to_cpu,
940be13a45SDmitry Chagin .c2x = cpu_to_vfp,
950be13a45SDmitry Chagin },
960be13a45SDmitry Chagin };
978fdc9ce9SDmitry Chagin #endif
988fdc9ce9SDmitry Chagin
998fdc9ce9SDmitry Chagin static atomic_uint sigs;
1000be13a45SDmitry Chagin static int max_bank_idx;
1010be13a45SDmitry Chagin
1028fdc9ce9SDmitry Chagin
1038fdc9ce9SDmitry Chagin static void
sigusr1_handler(int sig __unused,siginfo_t * si __unused,void * m __unused)1048fdc9ce9SDmitry Chagin sigusr1_handler(int sig __unused, siginfo_t *si __unused, void *m __unused)
1058fdc9ce9SDmitry Chagin {
1068fdc9ce9SDmitry Chagin atomic_fetch_add_explicit(&sigs, 1, memory_order_relaxed);
1078fdc9ce9SDmitry Chagin }
1088fdc9ce9SDmitry Chagin
1098fdc9ce9SDmitry Chagin static void
sigalrm_handler(int sig __unused)1108fdc9ce9SDmitry Chagin sigalrm_handler(int sig __unused)
1118fdc9ce9SDmitry Chagin {
1128fdc9ce9SDmitry Chagin struct rusage r;
1138fdc9ce9SDmitry Chagin
1148fdc9ce9SDmitry Chagin if (getrusage(RUSAGE_SELF, &r) == 0) {
1158fdc9ce9SDmitry Chagin printf("%lu vctx %lu nvctx %lu nsigs %u SIGUSR1\n",
1168fdc9ce9SDmitry Chagin r.ru_nvcsw, r.ru_nivcsw, r.ru_nsignals, sigs);
1178fdc9ce9SDmitry Chagin }
1188fdc9ce9SDmitry Chagin alarm(TIMO);
1198fdc9ce9SDmitry Chagin }
1208fdc9ce9SDmitry Chagin
1218fdc9ce9SDmitry Chagin
1228fdc9ce9SDmitry Chagin static void
fill_xregs(uint8_t * xregs,int bank)1230be13a45SDmitry Chagin fill_xregs(uint8_t *xregs, int bank)
1248fdc9ce9SDmitry Chagin {
1250be13a45SDmitry Chagin arc4random_buf(xregs, xregs_banks[bank].regs * xregs_banks[bank].bytes);
1268fdc9ce9SDmitry Chagin }
1278fdc9ce9SDmitry Chagin
1288fdc9ce9SDmitry Chagin static void
dump_xregs(const uint8_t * r,int bank)1290be13a45SDmitry Chagin dump_xregs(const uint8_t *r, int bank)
1308fdc9ce9SDmitry Chagin {
1318fdc9ce9SDmitry Chagin unsigned k;
1328fdc9ce9SDmitry Chagin
1330be13a45SDmitry Chagin for (k = 0; k < xregs_banks[bank].bytes; k++) {
1348fdc9ce9SDmitry Chagin if (k != 0)
1358fdc9ce9SDmitry Chagin printf(" ");
1360be13a45SDmitry Chagin printf("%02x", r[k]);
1378fdc9ce9SDmitry Chagin }
1388fdc9ce9SDmitry Chagin printf("\n");
1398fdc9ce9SDmitry Chagin }
1408fdc9ce9SDmitry Chagin
1418fdc9ce9SDmitry Chagin static pthread_mutex_t show_lock;
1428fdc9ce9SDmitry Chagin
1438fdc9ce9SDmitry Chagin static void
show_diff(const uint8_t * xregs1,const uint8_t * xregs2,int bank)1440be13a45SDmitry Chagin show_diff(const uint8_t *xregs1, const uint8_t *xregs2, int bank)
1458fdc9ce9SDmitry Chagin {
1460be13a45SDmitry Chagin const uint8_t *r1, *r2;
1478fdc9ce9SDmitry Chagin unsigned i, j;
1488fdc9ce9SDmitry Chagin
1498fdc9ce9SDmitry Chagin #if defined(__FreeBSD__)
1508fdc9ce9SDmitry Chagin printf("thr %d\n", pthread_getthreadid_np());
1518fdc9ce9SDmitry Chagin #elif defined(__linux__)
1528fdc9ce9SDmitry Chagin printf("thr %ld\n", syscall(SYS_gettid));
1538fdc9ce9SDmitry Chagin #endif
1540be13a45SDmitry Chagin for (i = 0; i < xregs_banks[bank].regs; i++) {
1550be13a45SDmitry Chagin r1 = xregs1 + i * xregs_banks[bank].bytes;
1560be13a45SDmitry Chagin r2 = xregs2 + i * xregs_banks[bank].bytes;
1570be13a45SDmitry Chagin for (j = 0; j < xregs_banks[bank].bytes; j++) {
1580be13a45SDmitry Chagin if (r1[j] != r2[j]) {
1590be13a45SDmitry Chagin printf("%%%s%u\n", xregs_banks[bank].r_name, i);
1600be13a45SDmitry Chagin dump_xregs(r1, bank);
1610be13a45SDmitry Chagin dump_xregs(r2, bank);
1628fdc9ce9SDmitry Chagin break;
1638fdc9ce9SDmitry Chagin }
1648fdc9ce9SDmitry Chagin }
1658fdc9ce9SDmitry Chagin }
1668fdc9ce9SDmitry Chagin }
1678fdc9ce9SDmitry Chagin
1688fdc9ce9SDmitry Chagin static void
my_pause(void)1698fdc9ce9SDmitry Chagin my_pause(void)
1708fdc9ce9SDmitry Chagin {
1718fdc9ce9SDmitry Chagin usleep(0);
1728fdc9ce9SDmitry Chagin }
1738fdc9ce9SDmitry Chagin
1748fdc9ce9SDmitry Chagin static void *
worker_thread(void * arg)1750be13a45SDmitry Chagin worker_thread(void *arg)
1768fdc9ce9SDmitry Chagin {
1770be13a45SDmitry Chagin int bank = (uintptr_t)arg;
1780be13a45SDmitry Chagin int sz = xregs_banks[bank].regs * xregs_banks[bank].bytes;
1790be13a45SDmitry Chagin uint8_t xregs[sz], xregs_cpu[sz], zero_xregs[sz];
1808fdc9ce9SDmitry Chagin
1810be13a45SDmitry Chagin memset(zero_xregs, 0, sz);
1820be13a45SDmitry Chagin
1830be13a45SDmitry Chagin fill_xregs(xregs, bank);
1848fdc9ce9SDmitry Chagin for (;;) {
1850be13a45SDmitry Chagin xregs_banks[bank].x2c(xregs);
1868fdc9ce9SDmitry Chagin my_pause();
1870be13a45SDmitry Chagin xregs_banks[bank].c2x(xregs_cpu);
1880be13a45SDmitry Chagin if (memcmp(xregs, xregs_cpu, sz) != 0) {
1898fdc9ce9SDmitry Chagin pthread_mutex_lock(&show_lock);
1900be13a45SDmitry Chagin show_diff(xregs, xregs_cpu, bank);
1918fdc9ce9SDmitry Chagin abort();
1928fdc9ce9SDmitry Chagin pthread_mutex_unlock(&show_lock);
1938fdc9ce9SDmitry Chagin }
1948fdc9ce9SDmitry Chagin
1950be13a45SDmitry Chagin xregs_banks[bank].x2c(zero_xregs);
1968fdc9ce9SDmitry Chagin my_pause();
1970be13a45SDmitry Chagin xregs_banks[bank].c2x(xregs_cpu);
1980be13a45SDmitry Chagin if (memcmp(zero_xregs, xregs_cpu, sz) != 0) {
1998fdc9ce9SDmitry Chagin pthread_mutex_lock(&show_lock);
2000be13a45SDmitry Chagin show_diff(zero_xregs, xregs_cpu, bank);
2018fdc9ce9SDmitry Chagin abort();
2028fdc9ce9SDmitry Chagin pthread_mutex_unlock(&show_lock);
2038fdc9ce9SDmitry Chagin }
2048fdc9ce9SDmitry Chagin }
2058fdc9ce9SDmitry Chagin return (NULL);
2068fdc9ce9SDmitry Chagin }
2078fdc9ce9SDmitry Chagin
2088fdc9ce9SDmitry Chagin int
main(void)2098fdc9ce9SDmitry Chagin main(void)
2108fdc9ce9SDmitry Chagin {
2118fdc9ce9SDmitry Chagin struct sigaction sa;
2120be13a45SDmitry Chagin int error, i, ncpu, bank;
2130be13a45SDmitry Chagin
214*c8dbef44SDmitry Chagin max_bank_idx = xregs_banks_max();
2158fdc9ce9SDmitry Chagin
2168fdc9ce9SDmitry Chagin bzero(&sa, sizeof(sa));
2178fdc9ce9SDmitry Chagin sa.sa_handler = sigalrm_handler;
2188fdc9ce9SDmitry Chagin if (sigaction(SIGALRM, &sa, NULL) == -1) {
2198fdc9ce9SDmitry Chagin fprintf(stderr, "sigaction SIGALRM %s\n", strerror(errno));
2208fdc9ce9SDmitry Chagin exit(1);
2218fdc9ce9SDmitry Chagin }
2228fdc9ce9SDmitry Chagin
2238fdc9ce9SDmitry Chagin bzero(&sa, sizeof(sa));
2248fdc9ce9SDmitry Chagin sa.sa_sigaction = sigusr1_handler;
2258fdc9ce9SDmitry Chagin sa.sa_flags = SA_SIGINFO;
2268fdc9ce9SDmitry Chagin if (sigaction(SIGUSR1, &sa, NULL) == -1) {
2278fdc9ce9SDmitry Chagin fprintf(stderr, "sigaction SIGUSR1 %s\n", strerror(errno));
2288fdc9ce9SDmitry Chagin exit(1);
2298fdc9ce9SDmitry Chagin }
2308fdc9ce9SDmitry Chagin
2318fdc9ce9SDmitry Chagin error = pthread_mutex_init(&show_lock, NULL);
2328fdc9ce9SDmitry Chagin if (error != 0) {
2338fdc9ce9SDmitry Chagin fprintf(stderr, "pthread_mutex_init %s\n", strerror(error));
2348fdc9ce9SDmitry Chagin exit(1);
2358fdc9ce9SDmitry Chagin }
2368fdc9ce9SDmitry Chagin
2378fdc9ce9SDmitry Chagin ncpu = sysconf(_SC_NPROCESSORS_ONLN);
2380be13a45SDmitry Chagin if (max_bank_idx == 0)
2398fdc9ce9SDmitry Chagin ncpu *= 2;
2400be13a45SDmitry Chagin bank = 0;
2418fdc9ce9SDmitry Chagin pthread_t wt[ncpu];
2420be13a45SDmitry Chagin nextbank:
2430be13a45SDmitry Chagin printf("Starting %d threads for registers bank %s sized [%d][%d]\n", ncpu,
2440be13a45SDmitry Chagin xregs_banks[bank].b_name, xregs_banks[bank].regs, xregs_banks[bank].bytes);
2458fdc9ce9SDmitry Chagin for (i = 0; i < ncpu; i++) {
2460be13a45SDmitry Chagin error = pthread_create(&wt[i], NULL, worker_thread,
2470be13a45SDmitry Chagin (void *)(uintptr_t)bank);
2488fdc9ce9SDmitry Chagin if (error != 0) {
2498fdc9ce9SDmitry Chagin fprintf(stderr, "pthread_create %s\n", strerror(error));
2508fdc9ce9SDmitry Chagin }
2518fdc9ce9SDmitry Chagin }
2520be13a45SDmitry Chagin if (++bank <= max_bank_idx)
2530be13a45SDmitry Chagin goto nextbank;
2548fdc9ce9SDmitry Chagin
2558fdc9ce9SDmitry Chagin alarm(TIMO);
2568fdc9ce9SDmitry Chagin for (;;) {
2578fdc9ce9SDmitry Chagin for (i = 0; i < ncpu; i++) {
2588fdc9ce9SDmitry Chagin my_pause();
2598fdc9ce9SDmitry Chagin pthread_kill(wt[i], SIGUSR1);
2608fdc9ce9SDmitry Chagin }
2618fdc9ce9SDmitry Chagin }
2628fdc9ce9SDmitry Chagin }
263