fixed global obejct sharing between cores

This commit is contained in:
Blaise Tine
2020-12-24 19:36:07 -05:00
parent 703a861fe9
commit 4f689c4ce9
46 changed files with 6710 additions and 6792 deletions

View File

@@ -6,35 +6,61 @@
extern "C" {
#endif
#define NUM_CORES_MAX 8
typedef struct {
func_t function;
void * arguments;
int nthreads;
} spawn_t;
spawn_t* g_spawn = NULL;
spawn_t* g_spawn[NUM_CORES_MAX];
void spawn_warp_runonce() {
void spawn_warp_all() {
// active all threads
vx_tmc(g_spawn->nthreads);
int num_threads = vx_num_threads();
vx_tmc(num_threads);
int core_id = vx_core_id();
spawn_t* p_spawn = g_spawn[core_id];
// call user routine
g_spawn->function(g_spawn->arguments);
p_spawn->function(p_spawn->arguments);
// resume single-thread execution on exit
// resume single-warp execution on exit
int wid = vx_warp_id();
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
vx_tmc(tmask);
}
void spawn_warp_threads(int num_threads) {
// active all threads
vx_tmc(num_threads);
int core_id = vx_core_id();
spawn_t* p_spawn = g_spawn[core_id];
// call user routine
p_spawn->function(p_spawn->arguments);
// resume single-warp execution on exit
int wid = vx_warp_id();
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
vx_tmc(tmask);
}
void vx_spawn_warps(int num_warps, int num_threads, func_t func_ptr , void * args) {
spawn_t spawn = { func_ptr, args, num_threads };
g_spawn = &spawn;
int core_id = vx_core_id();
if (core_id >= NUM_CORES_MAX)
return;
spawn_t spawn = { func_ptr, args, num_threads };
g_spawn[core_id] = &spawn;
if (num_warps > 1) {
vx_wspawn(num_warps, (unsigned)spawn_warp_runonce);
vx_wspawn(num_warps, (unsigned)spawn_warp_all);
}
spawn_warp_runonce();
spawn_warp_threads(num_threads);
}
#ifdef __cplusplus

View File

@@ -4,24 +4,37 @@
.global _start
.type _start, @function
_start:
# execute stack initialization on all warps
la a1, vx_set_sp
csrr a0, CSR_NW # get num warps
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
.word 0x00b5106b # wspawn a0, a1
jal vx_set_sp
# return back to single thread execution
li a0, 1
.word 0x0005006b # back to single thread
# Initialize global pointerp
# call __cxx_global_var_init
.word 0x0005006b # tmc a0
# Clear the bss segment
la a0, _edata
la a2, _end
sub a2, a2, a0
li a1, 0
call memset
la a0, __libc_fini_array # Register global termination functions
call atexit # to be called upon exit
call __libc_init_array # Run global initialization functions
# Register global termination functions
la a0, __libc_fini_array
# to be called upon exit
call atexit
# Run global initialization functions
call __libc_init_array
# call main program routine
call main
# call exit routine
tail exit
.size _start, .-_start
@@ -29,34 +42,39 @@ _start:
.type _exit, @function
.global _exit
_exit:
# disable all threads in current warp
li a0, 0
.word 0x0005006b # disable all threads
.word 0x0005006b # tmc a0
.section .text
.type vx_set_sp, @function
.global vx_set_sp
vx_set_sp:
# activate all threads
csrr a0, CSR_NT # get num threads
.word 0x0005006b # activate all threads
.word 0x0005006b # set thread mask
# set global pointer register
.option push
.option norelax
1:auipc gp, %pcrel_hi(__global_pointer$)
addi gp, gp, %pcrel_lo(1b)
la gp, __global_pointer$
.option pop
# allocate stack region for a threads on the processor
# set stack pointer
csrr a1, CSR_GTID # get global thread id
slli a1, a1, 10 # multiply by 1024
csrr a2, CSR_LTID # get local thread id
slli a2, a2, 2 # multiply by 4
lui sp, (SHARED_MEM_BASE_ADDR>>12) # load base sp
la sp, __stack_top$ # load stack base address
sub sp, sp, a1 # sub thread block
add sp, sp, a2 # reduce addr collision for perf
csrr a3, CSR_LWID # get wid
# disable active warps except warp0
csrr a3, CSR_LWID # get local wid
beqz a3, RETURN
li a0, 0
.word 0x0005006b # tmc 0
.word 0x0005006b # tmc a0
RETURN:
ret