/* * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2021 Warner Losh * Copyright (c) 2023 Stormshield * Copyright (c) 2023 Klara, Inc. */ #include #define STDOUT_FILENO 1 #define MUTEX_LOCKED 0x01 #define MUTEX_UNLOCKED 0x00 #define STACK_SIZE 4096 #define TLS_SIZE 4096 .text .file "swp_test.S" .syntax unified .globl main .p2align 2 .type main,%function .code 32 main: /* * Stack slots: * 0 - Sync word * 1 - Thread id * 2 - Shared word */ sub sp, sp, #12 /* Print a message */ movw r0, :lower16:.L.mainmsg movt r0, :upper16:.L.mainmsg ldr r1, =(.L.mainmsgEnd - .L.mainmsg - 1) bl print /* Create two secondary threads */ mov r0, #1 str r0, [sp, #4] /* Thread ID */ movw r0, :lower16:secondary_thread movt r0, :upper16:secondary_thread mov r1, sp movw r2, :lower16:stack1 movt r2, :upper16:stack1 movw r3, :lower16:tls1 movt r3, :upper16:tls1 bl create_thr 1: /* * Wait for the first new thread to ack its existence by * incrementing the thread id. */ ldr r0, [sp, #4] cmp r0, #1 bne 2f ldr r7, =SYS_sched_yield swi 0 b 1b 2: /* Create thread #2 */ movw r0, :lower16:secondary_thread movt r0, :upper16:secondary_thread mov r1, sp movw r2, :lower16:stack2 movt r2, :upper16:stack2 movw r3, :lower16:tls2 movt r3, :upper16:tls2 bl create_thr 3: /* * Wait for the first new thread to ack its existence by * incrementing the thread id. */ ldr r0, [sp, #4] cmp r0, #2 bne 4f ldr r7, =SYS_sched_yield swi 0 b 3b /* Loop */ 4: mov r0, sp mov r1, #0 /* Thread loop */ add r2, sp, #8 bl thread_loop b 4b /* UNREACHABLE */ mov r0, #0 ldr r7, =SYS_exit swi 0 .p2align 2 .type secondary_thread,%function .code 32 secondary_thread: /* * On entry, r0 is where we stashed our sync word and * ack word (thread ID). * * Stash the sync word in r4, thread ID in r5. */ mov r4, r0 ldr r5, [r0, #4] /* Print a message */ movw r0, :lower16:.L.secondarymsg movt r0, :upper16:.L.secondarymsg ldr r1, =(.L.secondarymsgEnd - .L.secondarymsg - 1) bl print /* Acknowledge that we started */ add r0, r5, #1 str r0, [r4, #4] 1: mov r0, r4 mov r1, r5 add r2, r4, #8 bl thread_loop b 1b .p2align 2 .type thread_loop,%function .code 32 thread_loop: push {r4, r5, r6, r7, r8, lr} /* * r0 == sync word * r1 == thread ID * r2 == shared word */ mov r4, r0 mov r5, r1 mov r6, r2 bl lock_mutex_swp str r5, [r6] /* Write the thread ID */ bl random_cycles # Save off the now cycle count */ mov r8, r0 /* Print the thread ID and cycle count */ mov r0, r5 mov r1, #0 bl printnum /* Separator */ movw r0, :lower16:.L.idsep movt r0, :upper16:.L.idsep ldr r1, =(.L.idsepEnd - .L.idsep - 1) bl print /* Cycle count */ mov r0, r8 mov r1, #1 bl printnum 1: ldr r0, [r6] cmp r0, r5 /* Check against the thread ID */ bne 2f str r5, [r6] /* * Check if the count hit 0, otherwise go again. */ cmp r8, #0 beq 3f sub r8, r8, #1 b 1b 2: /* exit(1) */ mov r0, #1 ldr r7, =SYS_exit swi 0 3: mov r0, r4 bl unlock_mutex_swp /* * Yield to lower the chance that we end up re-acquiring, the other two * threads are still actively trying to acquire the lock. */ ldr r7, =SYS_sched_yield swi 0 pop {r4, r5, r6, r7, r8, lr} bx lr .p2align 2 .type random_cycles,%function .code 32 random_cycles: /* Return a random number < 4k */ sub sp, sp, #4 mov r0, sp mov r1, #4 mov r2, #0 ldr r7, =SYS_getrandom swi 0 /* * Just truncate the result of getrandom(2) * to put us within range. Naive, but functional. */ ldr r0, [sp] mov r1, #0xfff and r0, r0, r1 add sp, sp, #4 bx lr /* * lock_mutex_swp and unlock_mutex_swp lifted from * ARM documentation on SWP/SWPB. */ .p2align 2 .type lock_mutex_swp,%function .code 32 lock_mutex_swp: mov r2, #MUTEX_LOCKED swp r1, r2, [r0] /* Swap in lock value. */ cmp r1, r2 /* Check if we were locked already. */ beq lock_mutex_swp /* Retry if so */ bx lr /* Return locked */ .p2align 2 .type unlock_mutex_swp,%function .code 32 unlock_mutex_swp: mov r1, #MUTEX_UNLOCKED str r1, [r0] /* Move in unlocked */ bx lr .p2align 2 .type create_thr,%function .code 32 create_thr: /* * r0 == start_func * r1 == arg * r2 == stack_base * r3 == tls_base */ sub sp, sp, #56 str r0, [sp, #4] /* start_func */ str r1, [sp, #8] /* arg */ str r2, [sp, #12] /* stack_base */ mov r0, #STACK_SIZE str r0, [sp, #16] /* stack_size */ str r3, [sp, #20] /* tls_base */ mov r0, #TLS_SIZE str r0, [sp, #24] /* tls_size */ mov r0, #0 str r0, [sp, #28] str r0, [sp, #32] str r0, [sp, #36] str r0, [sp, #40] add r0, sp, #4 /* &thrp */ mov r1, #52 /* sizeof(thrp) */ ldr r7, =SYS_thr_new swi 0 add sp, sp, #56 bx lr .p2align 2 .type printnum,%function .code 32 printnum: push {r4, r5, r6, r7, r8, r10, lr} sub sp, #4 /* 1000000000 */ movw r6, #0xca00 movt r6, #0x3b9a udiv r5, r0, r6 cmp r5, #9 bhi abort /* r4 is our accumulator */ mov r4, r0 /* r5 to be used as our "significant bit" */ mov r5, #0 /* r10 is "output_newline" */ mov r10, r1 1: cmp r6, #0 beq 4f /* Divide by current place */ udiv r0, r4, r6 /* Significant already? print anyways */ cmp r5, #0 bne 2f /* * Not significant, maybe print. If we made it all the way to 1, we * need to just print the 0 anyways. */ cmp r6, #1 beq 2f cmp r0, #0 bne 2f b 3f /* Proceed */ /* Print */ 2: mov r5, #1 mov r8, r0 add r0, r0, #0x30 str r0, [sp] mov r0, sp mov r1, #1 bl print /* Multiply back into place and subtract from accumulator */ mul r0, r8, r6 sub r4, r4, r0 3: mov r3, #10 udiv r6, r6, r3 b 1b 4: cmp r10, #0 beq 5f /* newline */ mov r0, #0x0a str r0, [sp] mov r0, sp mov r1, #1 bl print 5: add sp, sp, #4 pop {r4, r5, r6, r7, r8, r10, lr} bx lr abort: movw r0, :lower16:.L.badnum movt r0, :upper16:.L.badnum ldr r1, =(.L.badnumEnd - .L.badnum - 1) bl print mov r0, #1 ldr r7, =SYS_exit swi 0 .p2align 2 .type print,%function .code 32 print: /* r0 - string, r1 = size */ mov r2, r1 mov r1, r0 ldr r0, =STDOUT_FILENO ldr r7, =SYS_write swi 0 bx lr .L.mainmsg: .asciz "Main thread\n" .L.mainmsgEnd: .size .L.mainmsg, .L.mainmsgEnd - .L.mainmsg .L.secondarymsg: .asciz "Secondary thread\n" .L.secondarymsgEnd: .size .L.secondarymsg, .L.secondarymsgEnd - .L.secondarymsg .L.badnum: .asciz "Bad number\n" .L.badnumEnd: .size .L.badnum, .L.badnumEnd - .L.badnum .L.idsep: .asciz " - cycles " .L.idsepEnd: .size .L.idsep, .L.idsepEnd - .L.idsep .type stack1,%object .local stack1 .comm stack1,STACK_SIZE,1 .type tls1,%object .local tls1 .comm tls1,TLS_SIZE,1 .type stack2,%object .local stack2 .comm stack2,STACK_SIZE,1 .type tls2,%object .local tls2 .comm tls2,TLS_SIZE,1