From da834a28df28324de48c6c8e810e92cff94e47d4 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Thu, 14 Jul 2022 06:03:02 -0400 Subject: [PATCH] adding support for TLS global variables --- runtime/linker/vx_link32.ld | 14 ++++++++-- runtime/linker/vx_link64.ld | 14 ++++++++-- runtime/src/vx_start.S | 55 +++++++++++++++++++++++++------------ runtime/src/vx_syscalls.c | 47 ++++++++++++++++++++----------- 4 files changed, 93 insertions(+), 37 deletions(-) diff --git a/runtime/linker/vx_link32.ld b/runtime/linker/vx_link32.ld index 53fe7521..7461e516 100644 --- a/runtime/linker/vx_link32.ld +++ b/runtime/linker/vx_link32.ld @@ -89,12 +89,22 @@ SECTIONS .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } .exception_ranges : ONLY_IF_RW { *(.exception_ranges*) } /* Thread Local Storage sections */ - .tdata : + .tdata : { PROVIDE_HIDDEN (__tdata_start = .); *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE_HIDDEN (__tdata_end = .); } - .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } + PROVIDE (__tdata_size = SIZEOF (.tdata)); + .tbss : + { + PROVIDE_HIDDEN (__tbss_start = .); + PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start)); + *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) + PROVIDE_HIDDEN (__tbss_end = .); + } + PROVIDE (__tbss_size = SIZEOF (.tbss)); + PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64)); .preinit_array : { PROVIDE_HIDDEN (__preinit_array_start = .); diff --git a/runtime/linker/vx_link64.ld b/runtime/linker/vx_link64.ld index f67c67cc..18b3669b 100644 --- a/runtime/linker/vx_link64.ld +++ b/runtime/linker/vx_link64.ld @@ -89,12 +89,22 @@ SECTIONS .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } .exception_ranges : ONLY_IF_RW { *(.exception_ranges*) } /* Thread Local Storage sections */ - .tdata : + .tdata : { PROVIDE_HIDDEN (__tdata_start = .); *(.tdata .tdata.* .gnu.linkonce.td.*) + PROVIDE_HIDDEN (__tdata_end = .); } - .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } + PROVIDE (__tdata_size = SIZEOF (.tdata)); + .tbss : + { + PROVIDE_HIDDEN (__tbss_start = .); + PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start)); + *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) + PROVIDE_HIDDEN (__tbss_end = .); + } + PROVIDE (__tbss_size = SIZEOF (.tbss)); + PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64)); .preinit_array : { PROVIDE_HIDDEN (__preinit_array_start = .); diff --git a/runtime/src/vx_start.S b/runtime/src/vx_start.S index 16e91a15..3f35207a 100644 --- a/runtime/src/vx_start.S +++ b/runtime/src/vx_start.S @@ -4,32 +4,44 @@ .global _start .type _start, @function _start: - - # execute stack initialization on all warps - la a1, vx_set_sp - csrr a0, CSR_NW # get num warps - .insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1 - jal vx_set_sp + # initialize per-thread registers + csrr a0, CSR_NW # get num warps + la a1, init_regs + .insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1 + jal init_regs + # return back to single thread execution + li a0, 1 + .insn s 0x6b, 0, x0, 0(a0) # tmc a0 + + # initialize TLS for all warps + csrr a0, CSR_NW # get num warps + la a1, __init_tls + .insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1 + call __init_tls # return back to single thread execution li a0, 1 .insn s 0x6b, 0, x0, 0(a0) # tmc a0 - # Clear the bss segment + # clear BSS segment la a0, _edata la a2, _end sub a2, a2, a0 li a1, 0 call memset + # Initialize trap vector + # a t0, trap_entry + # csrw mtvec, t0 + # Register global termination functions - la a0, __libc_fini_array + la a0, __libc_fini_array # to be called upon exit - call atexit + call atexit # Run global initialization functions - call __libc_init_array + call __libc_init_array # call main program routine call main @@ -49,9 +61,9 @@ _exit: .insn s 0x6b, 0, x0, 0(a0) # tmc a0 .section .text -.type vx_set_sp, @function -.global vx_set_sp -vx_set_sp: +.type init_regs, @function +.global init_regs +init_regs: # activate all threads li a0, -1 .insn s 0x6b, 0, x0, 0(a0) # tmc a0 @@ -66,12 +78,21 @@ vx_set_sp: # set stack pointer li sp, SMEM_BASE_ADDR # load stack base address #if SM_ENABLE - csrr a2, CSR_LTID # get local thread id + csrr a0, CSR_LTID # get local thread id #else - csrr a2, CSR_GTID # get global thread id + csrr a0, CSR_GTID # get global thread id #endif - slli a1, a2, STACK_LOG2_SIZE - sub sp, sp, a1 # sub thread block + sll a1, a0, STACK_LOG2_SIZE + sub sp, sp, a1 + + # set thread pointer register + # use address space after BSS region + # ensure cacheline alignment + la a1, __tcb_aligned_size + mul a0, a0, a1 + la tp, _end + 63 + add tp, tp, a0 + and tp, tp, -64 # disable active warps except warp0 csrr a3, CSR_LWID # get local wid diff --git a/runtime/src/vx_syscalls.c b/runtime/src/vx_syscalls.c index 37e4d193..fcd8b26f 100644 --- a/runtime/src/vx_syscalls.c +++ b/runtime/src/vx_syscalls.c @@ -3,6 +3,7 @@ #include #include #include +#include int _close(int file) { return -1; } @@ -22,18 +23,36 @@ caddr_t _sbrk(int incr) { } int _write(int file, char *ptr, int len) { - int i; - for (i = 0; i < len; ++i) { - vx_putchar(*ptr++); - } - return len; - } + int i; + for (i = 0; i < len; ++i) { + vx_putchar(*ptr++); + } + return len; +} - int _kill(int pid, int sig) { return -1; } +int _kill(int pid, int sig) { return -1; } - int _getpid() { - return vx_warp_gid(); - } +int _getpid() { + return vx_warp_gid(); +} + +void __init_tls(void) { + extern char __tdata_start[]; + extern char __tbss_offset[]; + extern char __tdata_size[]; + extern char __tbss_size[]; + + // activate all threads + vx_tmc(-1); + + // TLS memory initialization + register char *__thread_self __asm__ ("tp"); + memcpy(__thread_self, __tdata_start, (size_t)__tdata_size); + memset(__thread_self + (size_t)__tbss_offset, 0, (size_t)__tbss_size); + + // back to single thread execution + vx_tmc(0 == vx_warp_id()); +} #ifdef HAVE_INITFINI_ARRAY @@ -48,9 +67,7 @@ extern void _init (void); #endif /* Iterate over all the init routines. */ -void -__libc_init_array (void) -{ +void __libc_init_array (void) { size_t count; size_t i; @@ -77,9 +94,7 @@ extern void _fini (void); #endif /* Run all the cleanup routines. */ -void -__libc_fini_array (void) -{ +void __libc_fini_array (void) { size_t count; size_t i;