1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <sys/syscall.h>
7 #include <sys/ioctl.h>
8 #include <time.h>
9 #include <signal.h>
10 #include <setjmp.h>
11 #include <sys/mman.h>
12 #include <sys/utsname.h>
13 #include <sys/wait.h>
14 #include <sys/stat.h>
15 #include <fcntl.h>
16 #include <inttypes.h>
17 #include <sched.h>
18
19 #include <sys/uio.h>
20 #include <linux/io_uring.h>
21 #include "../kselftest.h"
22
23 #ifndef __x86_64__
24 # error This test is 64-bit only
25 #endif
26
27 /* LAM modes, these definitions were copied from kernel code */
28 #define LAM_NONE 0
29 #define LAM_U57_BITS 6
30
31 #define LAM_U57_MASK (0x3fULL << 57)
32 /* arch prctl for LAM */
33 #define ARCH_GET_UNTAG_MASK 0x4001
34 #define ARCH_ENABLE_TAGGED_ADDR 0x4002
35 #define ARCH_GET_MAX_TAG_BITS 0x4003
36 #define ARCH_FORCE_TAGGED_SVA 0x4004
37
38 /* Specified test function bits */
39 #define FUNC_MALLOC 0x1
40 #define FUNC_BITS 0x2
41 #define FUNC_MMAP 0x4
42 #define FUNC_SYSCALL 0x8
43 #define FUNC_URING 0x10
44 #define FUNC_INHERITE 0x20
45 #define FUNC_PASID 0x40
46
47 /* get_user() pointer test cases */
48 #define GET_USER_USER 0
49 #define GET_USER_KERNEL_TOP 1
50 #define GET_USER_KERNEL_BOT 2
51 #define GET_USER_KERNEL 3
52
53 #define TEST_MASK 0x7f
54 #define L5_SIGN_EXT_MASK (0xFFUL << 56)
55 #define L4_SIGN_EXT_MASK (0x1FFFFUL << 47)
56
57 #define LOW_ADDR (0x1UL << 30)
58 #define HIGH_ADDR (0x3UL << 48)
59
60 #define MALLOC_LEN 32
61
62 #define PAGE_SIZE (4 << 10)
63
64 #define STACK_SIZE 65536
65
66 #define barrier() ({ \
67 __asm__ __volatile__("" : : : "memory"); \
68 })
69
70 #define URING_QUEUE_SZ 1
71 #define URING_BLOCK_SZ 2048
72
73 /* Pasid test define */
74 #define LAM_CMD_BIT 0x1
75 #define PAS_CMD_BIT 0x2
76 #define SVA_CMD_BIT 0x4
77
78 #define PAS_CMD(cmd1, cmd2, cmd3) (((cmd3) << 8) | ((cmd2) << 4) | ((cmd1) << 0))
79
80 struct testcases {
81 unsigned int later;
82 int expected; /* 2: SIGSEGV Error; 1: other errors */
83 unsigned long lam;
84 uint64_t addr;
85 uint64_t cmd;
86 int (*test_func)(struct testcases *test);
87 const char *msg;
88 };
89
90 /* Used by CQ of uring, source file handler and file's size */
91 struct file_io {
92 int file_fd;
93 off_t file_sz;
94 struct iovec iovecs[];
95 };
96
97 struct io_uring_queue {
98 unsigned int *head;
99 unsigned int *tail;
100 unsigned int *ring_mask;
101 unsigned int *ring_entries;
102 unsigned int *flags;
103 unsigned int *array;
104 union {
105 struct io_uring_cqe *cqes;
106 struct io_uring_sqe *sqes;
107 } queue;
108 size_t ring_sz;
109 };
110
111 struct io_ring {
112 int ring_fd;
113 struct io_uring_queue sq_ring;
114 struct io_uring_queue cq_ring;
115 };
116
117 int tests_cnt;
118 jmp_buf segv_env;
119
segv_handler(int sig)120 static void segv_handler(int sig)
121 {
122 ksft_print_msg("Get segmentation fault(%d).", sig);
123
124 siglongjmp(segv_env, 1);
125 }
126
lam_is_available(void)127 static inline int lam_is_available(void)
128 {
129 unsigned int cpuinfo[4];
130 unsigned long bits = 0;
131 int ret;
132
133 __cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
134
135 /* Check if cpu supports LAM */
136 if (!(cpuinfo[0] & (1 << 26))) {
137 ksft_print_msg("LAM is not supported!\n");
138 return 0;
139 }
140
141 /* Return 0 if CONFIG_ADDRESS_MASKING is not set */
142 ret = syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits);
143 if (ret) {
144 ksft_print_msg("LAM is disabled in the kernel!\n");
145 return 0;
146 }
147
148 return 1;
149 }
150
la57_enabled(void)151 static inline int la57_enabled(void)
152 {
153 int ret;
154 void *p;
155
156 p = mmap((void *)HIGH_ADDR, PAGE_SIZE, PROT_READ | PROT_WRITE,
157 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
158
159 ret = p == MAP_FAILED ? 0 : 1;
160
161 munmap(p, PAGE_SIZE);
162 return ret;
163 }
164
165 /*
166 * Set tagged address and read back untag mask.
167 * check if the untagged mask is expected.
168 *
169 * @return:
170 * 0: Set LAM mode successfully
171 * others: failed to set LAM
172 */
set_lam(unsigned long lam)173 static int set_lam(unsigned long lam)
174 {
175 int ret = 0;
176 uint64_t ptr = 0;
177
178 if (lam != LAM_U57_BITS && lam != LAM_NONE)
179 return -1;
180
181 /* Skip check return */
182 syscall(SYS_arch_prctl, ARCH_ENABLE_TAGGED_ADDR, lam);
183
184 /* Get untagged mask */
185 syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr);
186
187 /* Check mask returned is expected */
188 if (lam == LAM_U57_BITS)
189 ret = (ptr != ~(LAM_U57_MASK));
190 else if (lam == LAM_NONE)
191 ret = (ptr != -1ULL);
192
193 return ret;
194 }
195
get_default_tag_bits(void)196 static unsigned long get_default_tag_bits(void)
197 {
198 pid_t pid;
199 int lam = LAM_NONE;
200 int ret = 0;
201
202 pid = fork();
203 if (pid < 0) {
204 perror("Fork failed.");
205 } else if (pid == 0) {
206 /* Set LAM mode in child process */
207 if (set_lam(LAM_U57_BITS) == 0)
208 lam = LAM_U57_BITS;
209 else
210 lam = LAM_NONE;
211 exit(lam);
212 } else {
213 wait(&ret);
214 lam = WEXITSTATUS(ret);
215 }
216
217 return lam;
218 }
219
220 /*
221 * Set tagged address and read back untag mask.
222 * check if the untag mask is expected.
223 */
get_lam(void)224 static int get_lam(void)
225 {
226 uint64_t ptr = 0;
227 int ret = -1;
228 /* Get untagged mask */
229 if (syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr) == -1)
230 return -1;
231
232 /* Check mask returned is expected */
233 if (ptr == ~(LAM_U57_MASK))
234 ret = LAM_U57_BITS;
235 else if (ptr == -1ULL)
236 ret = LAM_NONE;
237
238
239 return ret;
240 }
241
242 /* According to LAM mode, set metadata in high bits */
set_metadata(uint64_t src,unsigned long lam)243 static uint64_t set_metadata(uint64_t src, unsigned long lam)
244 {
245 uint64_t metadata;
246
247 srand(time(NULL));
248
249 switch (lam) {
250 case LAM_U57_BITS: /* Set metadata in bits 62:57 */
251 /* Get a random non-zero value as metadata */
252 metadata = (rand() % ((1UL << LAM_U57_BITS) - 1) + 1) << 57;
253 metadata |= (src & ~(LAM_U57_MASK));
254 break;
255 default:
256 metadata = src;
257 break;
258 }
259
260 return metadata;
261 }
262
263 /*
264 * Set metadata in user pointer, compare new pointer with original pointer.
265 * both pointers should point to the same address.
266 *
267 * @return:
268 * 0: value on the pointer with metadata and value on original are same
269 * 1: not same.
270 */
handle_lam_test(void * src,unsigned int lam)271 static int handle_lam_test(void *src, unsigned int lam)
272 {
273 char *ptr;
274
275 strcpy((char *)src, "USER POINTER");
276
277 ptr = (char *)set_metadata((uint64_t)src, lam);
278 if (src == ptr)
279 return 0;
280
281 /* Copy a string into the pointer with metadata */
282 strcpy((char *)ptr, "METADATA POINTER");
283
284 return (!!strcmp((char *)src, (char *)ptr));
285 }
286
287
handle_max_bits(struct testcases * test)288 int handle_max_bits(struct testcases *test)
289 {
290 unsigned long exp_bits = get_default_tag_bits();
291 unsigned long bits = 0;
292
293 if (exp_bits != LAM_NONE)
294 exp_bits = LAM_U57_BITS;
295
296 /* Get LAM max tag bits */
297 if (syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits) == -1)
298 return 1;
299
300 return (exp_bits != bits);
301 }
302
303 /*
304 * Test lam feature through dereference pointer get from malloc.
305 * @return 0: Pass test. 1: Get failure during test 2: Get SIGSEGV
306 */
handle_malloc(struct testcases * test)307 static int handle_malloc(struct testcases *test)
308 {
309 char *ptr = NULL;
310 int ret = 0;
311
312 if (test->later == 0 && test->lam != 0)
313 if (set_lam(test->lam) == -1)
314 return 1;
315
316 ptr = (char *)malloc(MALLOC_LEN);
317 if (ptr == NULL) {
318 perror("malloc() failure\n");
319 return 1;
320 }
321
322 /* Set signal handler */
323 if (sigsetjmp(segv_env, 1) == 0) {
324 signal(SIGSEGV, segv_handler);
325 ret = handle_lam_test(ptr, test->lam);
326 } else {
327 ret = 2;
328 }
329
330 if (test->later != 0 && test->lam != 0)
331 if (set_lam(test->lam) == -1 && ret == 0)
332 ret = 1;
333
334 free(ptr);
335
336 return ret;
337 }
338
handle_mmap(struct testcases * test)339 static int handle_mmap(struct testcases *test)
340 {
341 void *ptr;
342 unsigned int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
343 int ret = 0;
344
345 if (test->later == 0 && test->lam != 0)
346 if (set_lam(test->lam) != 0)
347 return 1;
348
349 ptr = mmap((void *)test->addr, PAGE_SIZE, PROT_READ | PROT_WRITE,
350 flags, -1, 0);
351 if (ptr == MAP_FAILED) {
352 if (test->addr == HIGH_ADDR)
353 if (!la57_enabled())
354 return 3; /* unsupport LA57 */
355 return 1;
356 }
357
358 if (test->later != 0 && test->lam != 0)
359 if (set_lam(test->lam) != 0)
360 ret = 1;
361
362 if (ret == 0) {
363 if (sigsetjmp(segv_env, 1) == 0) {
364 signal(SIGSEGV, segv_handler);
365 ret = handle_lam_test(ptr, test->lam);
366 } else {
367 ret = 2;
368 }
369 }
370
371 munmap(ptr, PAGE_SIZE);
372 return ret;
373 }
374
handle_syscall(struct testcases * test)375 static int handle_syscall(struct testcases *test)
376 {
377 struct utsname unme, *pu;
378 int ret = 0;
379
380 if (test->later == 0 && test->lam != 0)
381 if (set_lam(test->lam) != 0)
382 return 1;
383
384 if (sigsetjmp(segv_env, 1) == 0) {
385 signal(SIGSEGV, segv_handler);
386 pu = (struct utsname *)set_metadata((uint64_t)&unme, test->lam);
387 ret = uname(pu);
388 if (ret < 0)
389 ret = 1;
390 } else {
391 ret = 2;
392 }
393
394 if (test->later != 0 && test->lam != 0)
395 if (set_lam(test->lam) != -1 && ret == 0)
396 ret = 1;
397
398 return ret;
399 }
400
get_user_syscall(struct testcases * test)401 static int get_user_syscall(struct testcases *test)
402 {
403 uint64_t ptr_address, bitmask;
404 int fd, ret = 0;
405 void *ptr;
406
407 if (la57_enabled()) {
408 bitmask = L5_SIGN_EXT_MASK;
409 ptr_address = HIGH_ADDR;
410 } else {
411 bitmask = L4_SIGN_EXT_MASK;
412 ptr_address = LOW_ADDR;
413 }
414
415 ptr = mmap((void *)ptr_address, PAGE_SIZE, PROT_READ | PROT_WRITE,
416 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
417
418 if (ptr == MAP_FAILED) {
419 perror("failed to map byte to pass into get_user");
420 return 1;
421 }
422
423 if (set_lam(test->lam) != 0) {
424 ret = 2;
425 goto error;
426 }
427
428 fd = memfd_create("lam_ioctl", 0);
429 if (fd == -1) {
430 munmap(ptr, PAGE_SIZE);
431 exit(EXIT_FAILURE);
432 }
433
434 switch (test->later) {
435 case GET_USER_USER:
436 /* Control group - properly tagged user pointer */
437 ptr = (void *)set_metadata((uint64_t)ptr, test->lam);
438 break;
439 case GET_USER_KERNEL_TOP:
440 /* Kernel address with top bit cleared */
441 bitmask &= (bitmask >> 1);
442 ptr = (void *)((uint64_t)ptr | bitmask);
443 break;
444 case GET_USER_KERNEL_BOT:
445 /* Kernel address with bottom sign-extension bit cleared */
446 bitmask &= (bitmask << 1);
447 ptr = (void *)((uint64_t)ptr | bitmask);
448 break;
449 case GET_USER_KERNEL:
450 /* Try to pass a kernel address */
451 ptr = (void *)((uint64_t)ptr | bitmask);
452 break;
453 default:
454 printf("Invalid test case value passed!\n");
455 break;
456 }
457
458 /*
459 * Use FIOASYNC ioctl because it utilizes get_user() internally and is
460 * very non-invasive to the system. Pass differently tagged pointers to
461 * get_user() in order to verify that valid user pointers are going
462 * through and invalid kernel/non-canonical pointers are not.
463 */
464 if (ioctl(fd, FIOASYNC, ptr) != 0)
465 ret = 1;
466
467 close(fd);
468 error:
469 munmap(ptr, PAGE_SIZE);
470 return ret;
471 }
472
sys_uring_setup(unsigned int entries,struct io_uring_params * p)473 int sys_uring_setup(unsigned int entries, struct io_uring_params *p)
474 {
475 return (int)syscall(__NR_io_uring_setup, entries, p);
476 }
477
sys_uring_enter(int fd,unsigned int to,unsigned int min,unsigned int flags)478 int sys_uring_enter(int fd, unsigned int to, unsigned int min, unsigned int flags)
479 {
480 return (int)syscall(__NR_io_uring_enter, fd, to, min, flags, NULL, 0);
481 }
482
483 /* Init submission queue and completion queue */
mmap_io_uring(struct io_uring_params p,struct io_ring * s)484 int mmap_io_uring(struct io_uring_params p, struct io_ring *s)
485 {
486 struct io_uring_queue *sring = &s->sq_ring;
487 struct io_uring_queue *cring = &s->cq_ring;
488
489 sring->ring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned int);
490 cring->ring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
491
492 if (p.features & IORING_FEAT_SINGLE_MMAP) {
493 if (cring->ring_sz > sring->ring_sz)
494 sring->ring_sz = cring->ring_sz;
495
496 cring->ring_sz = sring->ring_sz;
497 }
498
499 void *sq_ptr = mmap(0, sring->ring_sz, PROT_READ | PROT_WRITE,
500 MAP_SHARED | MAP_POPULATE, s->ring_fd,
501 IORING_OFF_SQ_RING);
502
503 if (sq_ptr == MAP_FAILED) {
504 perror("sub-queue!");
505 return 1;
506 }
507
508 void *cq_ptr = sq_ptr;
509
510 if (!(p.features & IORING_FEAT_SINGLE_MMAP)) {
511 cq_ptr = mmap(0, cring->ring_sz, PROT_READ | PROT_WRITE,
512 MAP_SHARED | MAP_POPULATE, s->ring_fd,
513 IORING_OFF_CQ_RING);
514 if (cq_ptr == MAP_FAILED) {
515 perror("cpl-queue!");
516 munmap(sq_ptr, sring->ring_sz);
517 return 1;
518 }
519 }
520
521 sring->head = sq_ptr + p.sq_off.head;
522 sring->tail = sq_ptr + p.sq_off.tail;
523 sring->ring_mask = sq_ptr + p.sq_off.ring_mask;
524 sring->ring_entries = sq_ptr + p.sq_off.ring_entries;
525 sring->flags = sq_ptr + p.sq_off.flags;
526 sring->array = sq_ptr + p.sq_off.array;
527
528 /* Map a queue as mem map */
529 s->sq_ring.queue.sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
530 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
531 s->ring_fd, IORING_OFF_SQES);
532 if (s->sq_ring.queue.sqes == MAP_FAILED) {
533 munmap(sq_ptr, sring->ring_sz);
534 if (sq_ptr != cq_ptr) {
535 ksft_print_msg("failed to mmap uring queue!");
536 munmap(cq_ptr, cring->ring_sz);
537 return 1;
538 }
539 }
540
541 cring->head = cq_ptr + p.cq_off.head;
542 cring->tail = cq_ptr + p.cq_off.tail;
543 cring->ring_mask = cq_ptr + p.cq_off.ring_mask;
544 cring->ring_entries = cq_ptr + p.cq_off.ring_entries;
545 cring->queue.cqes = cq_ptr + p.cq_off.cqes;
546
547 return 0;
548 }
549
550 /* Init io_uring queues */
setup_io_uring(struct io_ring * s)551 int setup_io_uring(struct io_ring *s)
552 {
553 struct io_uring_params para;
554
555 memset(¶, 0, sizeof(para));
556 s->ring_fd = sys_uring_setup(URING_QUEUE_SZ, ¶);
557 if (s->ring_fd < 0)
558 return 1;
559
560 return mmap_io_uring(para, s);
561 }
562
563 /*
564 * Get data from completion queue. the data buffer saved the file data
565 * return 0: success; others: error;
566 */
handle_uring_cq(struct io_ring * s)567 int handle_uring_cq(struct io_ring *s)
568 {
569 struct file_io *fi = NULL;
570 struct io_uring_queue *cring = &s->cq_ring;
571 struct io_uring_cqe *cqe;
572 unsigned int head;
573 off_t len = 0;
574
575 head = *cring->head;
576
577 do {
578 barrier();
579 if (head == *cring->tail)
580 break;
581 /* Get the entry */
582 cqe = &cring->queue.cqes[head & *s->cq_ring.ring_mask];
583 fi = (struct file_io *)cqe->user_data;
584 if (cqe->res < 0)
585 break;
586
587 int blocks = (int)(fi->file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
588
589 for (int i = 0; i < blocks; i++)
590 len += fi->iovecs[i].iov_len;
591
592 head++;
593 } while (1);
594
595 *cring->head = head;
596 barrier();
597
598 return (len != fi->file_sz);
599 }
600
601 /*
602 * Submit squeue. specify via IORING_OP_READV.
603 * the buffer need to be set metadata according to LAM mode
604 */
handle_uring_sq(struct io_ring * ring,struct file_io * fi,unsigned long lam)605 int handle_uring_sq(struct io_ring *ring, struct file_io *fi, unsigned long lam)
606 {
607 int file_fd = fi->file_fd;
608 struct io_uring_queue *sring = &ring->sq_ring;
609 unsigned int index = 0, cur_block = 0, tail = 0, next_tail = 0;
610 struct io_uring_sqe *sqe;
611
612 off_t remain = fi->file_sz;
613 int blocks = (int)(remain + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
614
615 while (remain) {
616 off_t bytes = remain;
617 void *buf;
618
619 if (bytes > URING_BLOCK_SZ)
620 bytes = URING_BLOCK_SZ;
621
622 fi->iovecs[cur_block].iov_len = bytes;
623
624 if (posix_memalign(&buf, URING_BLOCK_SZ, URING_BLOCK_SZ))
625 return 1;
626
627 fi->iovecs[cur_block].iov_base = (void *)set_metadata((uint64_t)buf, lam);
628 remain -= bytes;
629 cur_block++;
630 }
631
632 next_tail = *sring->tail;
633 tail = next_tail;
634 next_tail++;
635
636 barrier();
637
638 index = tail & *ring->sq_ring.ring_mask;
639
640 sqe = &ring->sq_ring.queue.sqes[index];
641 sqe->fd = file_fd;
642 sqe->flags = 0;
643 sqe->opcode = IORING_OP_READV;
644 sqe->addr = (unsigned long)fi->iovecs;
645 sqe->len = blocks;
646 sqe->off = 0;
647 sqe->user_data = (uint64_t)fi;
648
649 sring->array[index] = index;
650 tail = next_tail;
651
652 if (*sring->tail != tail) {
653 *sring->tail = tail;
654 barrier();
655 }
656
657 if (sys_uring_enter(ring->ring_fd, 1, 1, IORING_ENTER_GETEVENTS) < 0)
658 return 1;
659
660 return 0;
661 }
662
663 /*
664 * Test LAM in async I/O and io_uring, read current binery through io_uring
665 * Set metadata in pointers to iovecs buffer.
666 */
do_uring(unsigned long lam)667 int do_uring(unsigned long lam)
668 {
669 struct io_ring *ring;
670 struct file_io *fi;
671 struct stat st;
672 int ret = 1;
673 char path[PATH_MAX] = {0};
674
675 /* get current process path */
676 if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
677 return 1;
678
679 int file_fd = open(path, O_RDONLY);
680
681 if (file_fd < 0)
682 return 1;
683
684 if (fstat(file_fd, &st) < 0)
685 return 1;
686
687 off_t file_sz = st.st_size;
688
689 int blocks = (int)(file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
690
691 fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
692 if (!fi)
693 return 1;
694
695 fi->file_sz = file_sz;
696 fi->file_fd = file_fd;
697
698 ring = malloc(sizeof(*ring));
699 if (!ring) {
700 free(fi);
701 return 1;
702 }
703
704 memset(ring, 0, sizeof(struct io_ring));
705
706 if (setup_io_uring(ring))
707 goto out;
708
709 if (handle_uring_sq(ring, fi, lam))
710 goto out;
711
712 ret = handle_uring_cq(ring);
713
714 out:
715 free(ring);
716
717 for (int i = 0; i < blocks; i++) {
718 if (fi->iovecs[i].iov_base) {
719 uint64_t addr = ((uint64_t)fi->iovecs[i].iov_base);
720
721 switch (lam) {
722 case LAM_U57_BITS: /* Clear bits 62:57 */
723 addr = (addr & ~(LAM_U57_MASK));
724 break;
725 }
726 free((void *)addr);
727 fi->iovecs[i].iov_base = NULL;
728 }
729 }
730
731 free(fi);
732
733 return ret;
734 }
735
handle_uring(struct testcases * test)736 int handle_uring(struct testcases *test)
737 {
738 int ret = 0;
739
740 if (test->later == 0 && test->lam != 0)
741 if (set_lam(test->lam) != 0)
742 return 1;
743
744 if (sigsetjmp(segv_env, 1) == 0) {
745 signal(SIGSEGV, segv_handler);
746 ret = do_uring(test->lam);
747 } else {
748 ret = 2;
749 }
750
751 return ret;
752 }
753
fork_test(struct testcases * test)754 static int fork_test(struct testcases *test)
755 {
756 int ret, child_ret;
757 pid_t pid;
758
759 pid = fork();
760 if (pid < 0) {
761 perror("Fork failed.");
762 ret = 1;
763 } else if (pid == 0) {
764 ret = test->test_func(test);
765 exit(ret);
766 } else {
767 wait(&child_ret);
768 ret = WEXITSTATUS(child_ret);
769 }
770
771 return ret;
772 }
773
handle_execve(struct testcases * test)774 static int handle_execve(struct testcases *test)
775 {
776 int ret, child_ret;
777 int lam = test->lam;
778 pid_t pid;
779
780 pid = fork();
781 if (pid < 0) {
782 perror("Fork failed.");
783 ret = 1;
784 } else if (pid == 0) {
785 char path[PATH_MAX] = {0};
786
787 /* Set LAM mode in parent process */
788 if (set_lam(lam) != 0)
789 return 1;
790
791 /* Get current binary's path and the binary was run by execve */
792 if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
793 exit(-1);
794
795 /* run binary to get LAM mode and return to parent process */
796 if (execlp(path, path, "-t 0x0", NULL) < 0) {
797 perror("error on exec");
798 exit(-1);
799 }
800 } else {
801 wait(&child_ret);
802 ret = WEXITSTATUS(child_ret);
803 if (ret != LAM_NONE)
804 return 1;
805 }
806
807 return 0;
808 }
809
handle_inheritance(struct testcases * test)810 static int handle_inheritance(struct testcases *test)
811 {
812 int ret, child_ret;
813 int lam = test->lam;
814 pid_t pid;
815
816 /* Set LAM mode in parent process */
817 if (set_lam(lam) != 0)
818 return 1;
819
820 pid = fork();
821 if (pid < 0) {
822 perror("Fork failed.");
823 return 1;
824 } else if (pid == 0) {
825 /* Set LAM mode in parent process */
826 int child_lam = get_lam();
827
828 exit(child_lam);
829 } else {
830 wait(&child_ret);
831 ret = WEXITSTATUS(child_ret);
832
833 if (lam != ret)
834 return 1;
835 }
836
837 return 0;
838 }
839
thread_fn_get_lam(void * arg)840 static int thread_fn_get_lam(void *arg)
841 {
842 return get_lam();
843 }
844
thread_fn_set_lam(void * arg)845 static int thread_fn_set_lam(void *arg)
846 {
847 struct testcases *test = arg;
848
849 return set_lam(test->lam);
850 }
851
handle_thread(struct testcases * test)852 static int handle_thread(struct testcases *test)
853 {
854 char stack[STACK_SIZE];
855 int ret, child_ret;
856 int lam = 0;
857 pid_t pid;
858
859 /* Set LAM mode in parent process */
860 if (!test->later) {
861 lam = test->lam;
862 if (set_lam(lam) != 0)
863 return 1;
864 }
865
866 pid = clone(thread_fn_get_lam, stack + STACK_SIZE,
867 SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, NULL);
868 if (pid < 0) {
869 perror("Clone failed.");
870 return 1;
871 }
872
873 waitpid(pid, &child_ret, 0);
874 ret = WEXITSTATUS(child_ret);
875
876 if (lam != ret)
877 return 1;
878
879 if (test->later) {
880 if (set_lam(test->lam) != 0)
881 return 1;
882 }
883
884 return 0;
885 }
886
handle_thread_enable(struct testcases * test)887 static int handle_thread_enable(struct testcases *test)
888 {
889 char stack[STACK_SIZE];
890 int ret, child_ret;
891 int lam = test->lam;
892 pid_t pid;
893
894 pid = clone(thread_fn_set_lam, stack + STACK_SIZE,
895 SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, test);
896 if (pid < 0) {
897 perror("Clone failed.");
898 return 1;
899 }
900
901 waitpid(pid, &child_ret, 0);
902 ret = WEXITSTATUS(child_ret);
903
904 if (lam != ret)
905 return 1;
906
907 return 0;
908 }
run_test(struct testcases * test,int count)909 static void run_test(struct testcases *test, int count)
910 {
911 int i, ret = 0;
912
913 for (i = 0; i < count; i++) {
914 struct testcases *t = test + i;
915
916 /* fork a process to run test case */
917 tests_cnt++;
918 ret = fork_test(t);
919
920 /* return 3 is not support LA57, the case should be skipped */
921 if (ret == 3) {
922 ksft_test_result_skip("%s", t->msg);
923 continue;
924 }
925
926 if (ret != 0)
927 ret = (t->expected == ret);
928 else
929 ret = !(t->expected);
930
931 ksft_test_result(ret, "%s", t->msg);
932 }
933 }
934
935 static struct testcases uring_cases[] = {
936 {
937 .later = 0,
938 .lam = LAM_U57_BITS,
939 .test_func = handle_uring,
940 .msg = "URING: LAM_U57. Dereferencing pointer with metadata\n",
941 },
942 {
943 .later = 1,
944 .expected = 1,
945 .lam = LAM_U57_BITS,
946 .test_func = handle_uring,
947 .msg = "URING:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
948 },
949 };
950
951 static struct testcases malloc_cases[] = {
952 {
953 .later = 0,
954 .lam = LAM_U57_BITS,
955 .test_func = handle_malloc,
956 .msg = "MALLOC: LAM_U57. Dereferencing pointer with metadata\n",
957 },
958 {
959 .later = 1,
960 .expected = 2,
961 .lam = LAM_U57_BITS,
962 .test_func = handle_malloc,
963 .msg = "MALLOC:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
964 },
965 };
966
967 static struct testcases bits_cases[] = {
968 {
969 .test_func = handle_max_bits,
970 .msg = "BITS: Check default tag bits\n",
971 },
972 };
973
974 static struct testcases syscall_cases[] = {
975 {
976 .later = 0,
977 .lam = LAM_U57_BITS,
978 .test_func = handle_syscall,
979 .msg = "SYSCALL: LAM_U57. syscall with metadata\n",
980 },
981 {
982 .later = 1,
983 .expected = 1,
984 .lam = LAM_U57_BITS,
985 .test_func = handle_syscall,
986 .msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
987 },
988 {
989 .later = GET_USER_USER,
990 .lam = LAM_U57_BITS,
991 .test_func = get_user_syscall,
992 .msg = "GET_USER: get_user() and pass a properly tagged user pointer.\n",
993 },
994 {
995 .later = GET_USER_KERNEL_TOP,
996 .expected = 1,
997 .lam = LAM_U57_BITS,
998 .test_func = get_user_syscall,
999 .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the top bit cleared.\n",
1000 },
1001 {
1002 .later = GET_USER_KERNEL_BOT,
1003 .expected = 1,
1004 .lam = LAM_U57_BITS,
1005 .test_func = get_user_syscall,
1006 .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the bottom sign-extension bit cleared.\n",
1007 },
1008 {
1009 .later = GET_USER_KERNEL,
1010 .expected = 1,
1011 .lam = LAM_U57_BITS,
1012 .test_func = get_user_syscall,
1013 .msg = "GET_USER:[Negative] get_user() and pass a kernel pointer.\n",
1014 },
1015 };
1016
1017 static struct testcases mmap_cases[] = {
1018 {
1019 .later = 1,
1020 .expected = 0,
1021 .lam = LAM_U57_BITS,
1022 .addr = HIGH_ADDR,
1023 .test_func = handle_mmap,
1024 .msg = "MMAP: First mmap high address, then set LAM_U57.\n",
1025 },
1026 {
1027 .later = 0,
1028 .expected = 0,
1029 .lam = LAM_U57_BITS,
1030 .addr = HIGH_ADDR,
1031 .test_func = handle_mmap,
1032 .msg = "MMAP: First LAM_U57, then High address.\n",
1033 },
1034 {
1035 .later = 0,
1036 .expected = 0,
1037 .lam = LAM_U57_BITS,
1038 .addr = LOW_ADDR,
1039 .test_func = handle_mmap,
1040 .msg = "MMAP: First LAM_U57, then Low address.\n",
1041 },
1042 };
1043
1044 static struct testcases inheritance_cases[] = {
1045 {
1046 .expected = 0,
1047 .lam = LAM_U57_BITS,
1048 .test_func = handle_inheritance,
1049 .msg = "FORK: LAM_U57, child process should get LAM mode same as parent\n",
1050 },
1051 {
1052 .expected = 0,
1053 .lam = LAM_U57_BITS,
1054 .test_func = handle_thread,
1055 .msg = "THREAD: LAM_U57, child thread should get LAM mode same as parent\n",
1056 },
1057 {
1058 .expected = 1,
1059 .lam = LAM_U57_BITS,
1060 .test_func = handle_thread_enable,
1061 .msg = "THREAD: [NEGATIVE] Enable LAM in child.\n",
1062 },
1063 {
1064 .expected = 1,
1065 .later = 1,
1066 .lam = LAM_U57_BITS,
1067 .test_func = handle_thread,
1068 .msg = "THREAD: [NEGATIVE] Enable LAM in parent after thread created.\n",
1069 },
1070 {
1071 .expected = 0,
1072 .lam = LAM_U57_BITS,
1073 .test_func = handle_execve,
1074 .msg = "EXECVE: LAM_U57, child process should get disabled LAM mode\n",
1075 },
1076 };
1077
cmd_help(void)1078 static void cmd_help(void)
1079 {
1080 printf("usage: lam [-h] [-t test list]\n");
1081 printf("\t-t test list: run tests specified in the test list, default:0x%x\n", TEST_MASK);
1082 printf("\t\t0x1:malloc; 0x2:max_bits; 0x4:mmap; 0x8:syscall; 0x10:io_uring; 0x20:inherit;\n");
1083 printf("\t-h: help\n");
1084 }
1085
1086 /* Check for file existence */
file_Exists(const char * fileName)1087 uint8_t file_Exists(const char *fileName)
1088 {
1089 struct stat buffer;
1090
1091 uint8_t ret = (stat(fileName, &buffer) == 0);
1092
1093 return ret;
1094 }
1095
1096 /* Sysfs idxd files */
1097 const char *dsa_configs[] = {
1098 "echo 1 > /sys/bus/dsa/devices/dsa0/wq0.1/group_id",
1099 "echo shared > /sys/bus/dsa/devices/dsa0/wq0.1/mode",
1100 "echo 10 > /sys/bus/dsa/devices/dsa0/wq0.1/priority",
1101 "echo 16 > /sys/bus/dsa/devices/dsa0/wq0.1/size",
1102 "echo 15 > /sys/bus/dsa/devices/dsa0/wq0.1/threshold",
1103 "echo user > /sys/bus/dsa/devices/dsa0/wq0.1/type",
1104 "echo MyApp1 > /sys/bus/dsa/devices/dsa0/wq0.1/name",
1105 "echo 1 > /sys/bus/dsa/devices/dsa0/engine0.1/group_id",
1106 "echo dsa0 > /sys/bus/dsa/drivers/idxd/bind",
1107 /* bind files and devices, generated a device file in /dev */
1108 "echo wq0.1 > /sys/bus/dsa/drivers/user/bind",
1109 };
1110
1111 /* DSA device file */
1112 const char *dsaDeviceFile = "/dev/dsa/wq0.1";
1113 /* file for io*/
1114 const char *dsaPasidEnable = "/sys/bus/dsa/devices/dsa0/pasid_enabled";
1115
1116 /*
1117 * DSA depends on kernel cmdline "intel_iommu=on,sm_on"
1118 * return pasid_enabled (0: disable 1:enable)
1119 */
Check_DSA_Kernel_Setting(void)1120 int Check_DSA_Kernel_Setting(void)
1121 {
1122 char command[256] = "";
1123 char buf[256] = "";
1124 char *ptr;
1125 int rv = -1;
1126
1127 snprintf(command, sizeof(command) - 1, "cat %s", dsaPasidEnable);
1128
1129 FILE *cmd = popen(command, "r");
1130
1131 if (cmd) {
1132 while (fgets(buf, sizeof(buf) - 1, cmd) != NULL);
1133
1134 pclose(cmd);
1135 rv = strtol(buf, &ptr, 16);
1136 }
1137
1138 return rv;
1139 }
1140
1141 /*
1142 * Config DSA's sysfs files as shared DSA's WQ.
1143 * Generated a device file /dev/dsa/wq0.1
1144 * Return: 0 OK; 1 Failed; 3 Skip(SVA disabled).
1145 */
Dsa_Init_Sysfs(void)1146 int Dsa_Init_Sysfs(void)
1147 {
1148 uint len = ARRAY_SIZE(dsa_configs);
1149 const char **p = dsa_configs;
1150
1151 if (file_Exists(dsaDeviceFile) == 1)
1152 return 0;
1153
1154 /* check the idxd driver */
1155 if (file_Exists(dsaPasidEnable) != 1) {
1156 printf("Please make sure idxd driver was loaded\n");
1157 return 3;
1158 }
1159
1160 /* Check SVA feature */
1161 if (Check_DSA_Kernel_Setting() != 1) {
1162 printf("Please enable SVA.(Add intel_iommu=on,sm_on in kernel cmdline)\n");
1163 return 3;
1164 }
1165
1166 /* Check the idxd device file on /dev/dsa/ */
1167 for (int i = 0; i < len; i++) {
1168 if (system(p[i]))
1169 return 1;
1170 }
1171
1172 /* After config, /dev/dsa/wq0.1 should be generated */
1173 return (file_Exists(dsaDeviceFile) != 1);
1174 }
1175
1176 /*
1177 * Open DSA device file, triger API: iommu_sva_alloc_pasid
1178 */
allocate_dsa_pasid(void)1179 void *allocate_dsa_pasid(void)
1180 {
1181 int fd;
1182 void *wq;
1183
1184 fd = open(dsaDeviceFile, O_RDWR);
1185 if (fd < 0) {
1186 perror("open");
1187 return MAP_FAILED;
1188 }
1189
1190 wq = mmap(NULL, 0x1000, PROT_WRITE,
1191 MAP_SHARED | MAP_POPULATE, fd, 0);
1192 if (wq == MAP_FAILED)
1193 perror("mmap");
1194
1195 return wq;
1196 }
1197
set_force_svm(void)1198 int set_force_svm(void)
1199 {
1200 int ret = 0;
1201
1202 ret = syscall(SYS_arch_prctl, ARCH_FORCE_TAGGED_SVA);
1203
1204 return ret;
1205 }
1206
handle_pasid(struct testcases * test)1207 int handle_pasid(struct testcases *test)
1208 {
1209 uint tmp = test->cmd;
1210 uint runed = 0x0;
1211 int ret = 0;
1212 void *wq = NULL;
1213
1214 ret = Dsa_Init_Sysfs();
1215 if (ret != 0)
1216 return ret;
1217
1218 for (int i = 0; i < 3; i++) {
1219 int err = 0;
1220
1221 if (tmp & 0x1) {
1222 /* run set lam mode*/
1223 if ((runed & 0x1) == 0) {
1224 err = set_lam(LAM_U57_BITS);
1225 runed = runed | 0x1;
1226 } else
1227 err = 1;
1228 } else if (tmp & 0x4) {
1229 /* run force svm */
1230 if ((runed & 0x4) == 0) {
1231 err = set_force_svm();
1232 runed = runed | 0x4;
1233 } else
1234 err = 1;
1235 } else if (tmp & 0x2) {
1236 /* run allocate pasid */
1237 if ((runed & 0x2) == 0) {
1238 runed = runed | 0x2;
1239 wq = allocate_dsa_pasid();
1240 if (wq == MAP_FAILED)
1241 err = 1;
1242 } else
1243 err = 1;
1244 }
1245
1246 ret = ret + err;
1247 if (ret > 0)
1248 break;
1249
1250 tmp = tmp >> 4;
1251 }
1252
1253 if (wq != MAP_FAILED && wq != NULL)
1254 if (munmap(wq, 0x1000))
1255 printf("munmap failed %d\n", errno);
1256
1257 if (runed != 0x7)
1258 ret = 1;
1259
1260 return (ret != 0);
1261 }
1262
1263 /*
1264 * Pasid test depends on idxd and SVA, kernel should enable iommu and sm.
1265 * command line(intel_iommu=on,sm_on)
1266 */
1267 static struct testcases pasid_cases[] = {
1268 {
1269 .expected = 1,
1270 .cmd = PAS_CMD(LAM_CMD_BIT, PAS_CMD_BIT, SVA_CMD_BIT),
1271 .test_func = handle_pasid,
1272 .msg = "PASID: [Negative] Execute LAM, PASID, SVA in sequence\n",
1273 },
1274 {
1275 .expected = 0,
1276 .cmd = PAS_CMD(LAM_CMD_BIT, SVA_CMD_BIT, PAS_CMD_BIT),
1277 .test_func = handle_pasid,
1278 .msg = "PASID: Execute LAM, SVA, PASID in sequence\n",
1279 },
1280 {
1281 .expected = 1,
1282 .cmd = PAS_CMD(PAS_CMD_BIT, LAM_CMD_BIT, SVA_CMD_BIT),
1283 .test_func = handle_pasid,
1284 .msg = "PASID: [Negative] Execute PASID, LAM, SVA in sequence\n",
1285 },
1286 {
1287 .expected = 0,
1288 .cmd = PAS_CMD(PAS_CMD_BIT, SVA_CMD_BIT, LAM_CMD_BIT),
1289 .test_func = handle_pasid,
1290 .msg = "PASID: Execute PASID, SVA, LAM in sequence\n",
1291 },
1292 {
1293 .expected = 0,
1294 .cmd = PAS_CMD(SVA_CMD_BIT, LAM_CMD_BIT, PAS_CMD_BIT),
1295 .test_func = handle_pasid,
1296 .msg = "PASID: Execute SVA, LAM, PASID in sequence\n",
1297 },
1298 {
1299 .expected = 0,
1300 .cmd = PAS_CMD(SVA_CMD_BIT, PAS_CMD_BIT, LAM_CMD_BIT),
1301 .test_func = handle_pasid,
1302 .msg = "PASID: Execute SVA, PASID, LAM in sequence\n",
1303 },
1304 };
1305
main(int argc,char ** argv)1306 int main(int argc, char **argv)
1307 {
1308 int c = 0;
1309 unsigned int tests = TEST_MASK;
1310
1311 tests_cnt = 0;
1312
1313 if (!lam_is_available())
1314 return KSFT_SKIP;
1315
1316 while ((c = getopt(argc, argv, "ht:")) != -1) {
1317 switch (c) {
1318 case 't':
1319 tests = strtoul(optarg, NULL, 16);
1320 if (tests && !(tests & TEST_MASK)) {
1321 ksft_print_msg("Invalid argument!\n");
1322 return -1;
1323 }
1324 break;
1325 case 'h':
1326 cmd_help();
1327 return 0;
1328 default:
1329 ksft_print_msg("Invalid argument\n");
1330 return -1;
1331 }
1332 }
1333
1334 /*
1335 * When tests is 0, it is not a real test case;
1336 * the option used by test case(execve) to check the lam mode in
1337 * process generated by execve, the process read back lam mode and
1338 * check with lam mode in parent process.
1339 */
1340 if (!tests)
1341 return (get_lam());
1342
1343 /* Run test cases */
1344 if (tests & FUNC_MALLOC)
1345 run_test(malloc_cases, ARRAY_SIZE(malloc_cases));
1346
1347 if (tests & FUNC_BITS)
1348 run_test(bits_cases, ARRAY_SIZE(bits_cases));
1349
1350 if (tests & FUNC_MMAP)
1351 run_test(mmap_cases, ARRAY_SIZE(mmap_cases));
1352
1353 if (tests & FUNC_SYSCALL)
1354 run_test(syscall_cases, ARRAY_SIZE(syscall_cases));
1355
1356 if (tests & FUNC_URING)
1357 run_test(uring_cases, ARRAY_SIZE(uring_cases));
1358
1359 if (tests & FUNC_INHERITE)
1360 run_test(inheritance_cases, ARRAY_SIZE(inheritance_cases));
1361
1362 if (tests & FUNC_PASID)
1363 run_test(pasid_cases, ARRAY_SIZE(pasid_cases));
1364
1365 ksft_set_plan(tests_cnt);
1366
1367 ksft_exit_pass();
1368 }
1369