adding support for TLS global variables

This commit is contained in:
Blaise Tine
2022-07-14 06:03:02 -04:00
parent 77002dd06a
commit da834a28df
4 changed files with 93 additions and 37 deletions

View File

@@ -89,12 +89,22 @@ SECTIONS
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) } .exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
/* Thread Local Storage sections */ /* Thread Local Storage sections */
.tdata : .tdata :
{ {
PROVIDE_HIDDEN (__tdata_start = .); PROVIDE_HIDDEN (__tdata_start = .);
*(.tdata .tdata.* .gnu.linkonce.td.*) *(.tdata .tdata.* .gnu.linkonce.td.*)
PROVIDE_HIDDEN (__tdata_end = .);
} }
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } PROVIDE (__tdata_size = SIZEOF (.tdata));
.tbss :
{
PROVIDE_HIDDEN (__tbss_start = .);
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
PROVIDE_HIDDEN (__tbss_end = .);
}
PROVIDE (__tbss_size = SIZEOF (.tbss));
PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64));
.preinit_array : .preinit_array :
{ {
PROVIDE_HIDDEN (__preinit_array_start = .); PROVIDE_HIDDEN (__preinit_array_start = .);

View File

@@ -89,12 +89,22 @@ SECTIONS
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) } .exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
/* Thread Local Storage sections */ /* Thread Local Storage sections */
.tdata : .tdata :
{ {
PROVIDE_HIDDEN (__tdata_start = .); PROVIDE_HIDDEN (__tdata_start = .);
*(.tdata .tdata.* .gnu.linkonce.td.*) *(.tdata .tdata.* .gnu.linkonce.td.*)
PROVIDE_HIDDEN (__tdata_end = .);
} }
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } PROVIDE (__tdata_size = SIZEOF (.tdata));
.tbss :
{
PROVIDE_HIDDEN (__tbss_start = .);
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
PROVIDE_HIDDEN (__tbss_end = .);
}
PROVIDE (__tbss_size = SIZEOF (.tbss));
PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64));
.preinit_array : .preinit_array :
{ {
PROVIDE_HIDDEN (__preinit_array_start = .); PROVIDE_HIDDEN (__preinit_array_start = .);

View File

@@ -5,23 +5,35 @@
.type _start, @function .type _start, @function
_start: _start:
# execute stack initialization on all warps # initialize per-thread registers
la a1, vx_set_sp
csrr a0, CSR_NW # get num warps csrr a0, CSR_NW # get num warps
la a1, init_regs
.insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1 .insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1
jal vx_set_sp jal init_regs
# return back to single thread execution # return back to single thread execution
li a0, 1 li a0, 1
.insn s 0x6b, 0, x0, 0(a0) # tmc a0 .insn s 0x6b, 0, x0, 0(a0) # tmc a0
# Clear the bss segment # initialize TLS for all warps
csrr a0, CSR_NW # get num warps
la a1, __init_tls
.insn s 0x6b, 1, a1, 0(a0) # wspawn a0, a1
call __init_tls
# return back to single thread execution
li a0, 1
.insn s 0x6b, 0, x0, 0(a0) # tmc a0
# clear BSS segment
la a0, _edata la a0, _edata
la a2, _end la a2, _end
sub a2, a2, a0 sub a2, a2, a0
li a1, 0 li a1, 0
call memset call memset
# Initialize trap vector
# a t0, trap_entry
# csrw mtvec, t0
# Register global termination functions # Register global termination functions
la a0, __libc_fini_array la a0, __libc_fini_array
@@ -49,9 +61,9 @@ _exit:
.insn s 0x6b, 0, x0, 0(a0) # tmc a0 .insn s 0x6b, 0, x0, 0(a0) # tmc a0
.section .text .section .text
.type vx_set_sp, @function .type init_regs, @function
.global vx_set_sp .global init_regs
vx_set_sp: init_regs:
# activate all threads # activate all threads
li a0, -1 li a0, -1
.insn s 0x6b, 0, x0, 0(a0) # tmc a0 .insn s 0x6b, 0, x0, 0(a0) # tmc a0
@@ -66,12 +78,21 @@ vx_set_sp:
# set stack pointer # set stack pointer
li sp, SMEM_BASE_ADDR # load stack base address li sp, SMEM_BASE_ADDR # load stack base address
#if SM_ENABLE #if SM_ENABLE
csrr a2, CSR_LTID # get local thread id csrr a0, CSR_LTID # get local thread id
#else #else
csrr a2, CSR_GTID # get global thread id csrr a0, CSR_GTID # get global thread id
#endif #endif
slli a1, a2, STACK_LOG2_SIZE sll a1, a0, STACK_LOG2_SIZE
sub sp, sp, a1 # sub thread block sub sp, sp, a1
# set thread pointer register
# use address space after BSS region
# ensure cacheline alignment
la a1, __tcb_aligned_size
mul a0, a0, a1
la tp, _end + 63
add tp, tp, a0
and tp, tp, -64
# disable active warps except warp0 # disable active warps except warp0
csrr a3, CSR_LWID # get local wid csrr a3, CSR_LWID # get local wid

View File

@@ -3,6 +3,7 @@
#include <unistd.h> #include <unistd.h>
#include <vx_intrinsics.h> #include <vx_intrinsics.h>
#include <vx_print.h> #include <vx_print.h>
#include <string.h>
int _close(int file) { return -1; } int _close(int file) { return -1; }
@@ -22,18 +23,36 @@ caddr_t _sbrk(int incr) {
} }
int _write(int file, char *ptr, int len) { int _write(int file, char *ptr, int len) {
int i; int i;
for (i = 0; i < len; ++i) { for (i = 0; i < len; ++i) {
vx_putchar(*ptr++); vx_putchar(*ptr++);
} }
return len; return len;
} }
int _kill(int pid, int sig) { return -1; } int _kill(int pid, int sig) { return -1; }
int _getpid() { int _getpid() {
return vx_warp_gid(); return vx_warp_gid();
} }
void __init_tls(void) {
extern char __tdata_start[];
extern char __tbss_offset[];
extern char __tdata_size[];
extern char __tbss_size[];
// activate all threads
vx_tmc(-1);
// TLS memory initialization
register char *__thread_self __asm__ ("tp");
memcpy(__thread_self, __tdata_start, (size_t)__tdata_size);
memset(__thread_self + (size_t)__tbss_offset, 0, (size_t)__tbss_size);
// back to single thread execution
vx_tmc(0 == vx_warp_id());
}
#ifdef HAVE_INITFINI_ARRAY #ifdef HAVE_INITFINI_ARRAY
@@ -48,9 +67,7 @@ extern void _init (void);
#endif #endif
/* Iterate over all the init routines. */ /* Iterate over all the init routines. */
void void __libc_init_array (void) {
__libc_init_array (void)
{
size_t count; size_t count;
size_t i; size_t i;
@@ -77,9 +94,7 @@ extern void _fini (void);
#endif #endif
/* Run all the cleanup routines. */ /* Run all the cleanup routines. */
void void __libc_fini_array (void) {
__libc_fini_array (void)
{
size_t count; size_t count;
size_t i; size_t i;