diff --git a/runtime/Makefile b/runtime/Makefile index ab9d150a..fe486999 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -5,12 +5,12 @@ AR = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc-ar DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy -CFLAGS += -O3 -march=rv32imf -mabi=ilp32f -fno-exceptions -fdata-sections -ffunction-sections +CFLAGS += -O3 -march=rv32imf -mabi=ilp32f -Wstack-usage=1024 -fno-exceptions -fdata-sections -ffunction-sections CFLAGS += -I./include -I../hw PROJECT = libvortexrt -SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c ./src/vx_perf.c +SRCS = ./src/vx_start.S ./src/vx_syscalls.c ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c ./src/vx_spawn.S ./src/vx_perf.c OBJS := $(addsuffix .o, $(notdir $(SRCS))) diff --git a/runtime/include/vx_spawn.h b/runtime/include/vx_spawn.h index 0071eba7..301a2b0d 100644 --- a/runtime/include/vx_spawn.h +++ b/runtime/include/vx_spawn.h @@ -26,11 +26,13 @@ typedef void (*pfn_workgroup_func) ( uint32_t /* group_z */ ); -typedef void (*pfn_callback)(int task_id, const void *arg); +typedef void (*pfn_callback)(int task_id, void *arg); -void vx_spawn_kernel(struct context_t * ctx, pfn_workgroup_func wg_func, const void * args); +void vx_spawn_kernel(struct context_t * ctx, pfn_workgroup_func wg_func, void * arg); -void vx_spawn_tasks(int num_tasks, pfn_callback callback, const void * args); +void vx_spawn_tasks(int num_tasks, pfn_callback callback, void * arg); + +void vx_serial(pfn_callback callback, void * arg); #ifdef __cplusplus } diff --git a/runtime/src/vx_print.c b/runtime/src/vx_print.c index 80d2cb59..e3e93190 100644 --- a/runtime/src/vx_print.c +++ b/runtime/src/vx_print.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -8,45 +9,32 @@ extern "C" { #endif -int __attribute__((noinline)) __vprintf(int index, int tid, const char* format, va_list va) { - __if (index == tid) { - return vprintf(format, va); - }__endif - return 0; +struct printf_arg_t { + const char* format; + va_list va; + int ret; +}; + +static void __printf_callback(int task_id, void* arg) { + struct printf_arg_t* p_arg = (struct printf_arg_t*)(arg); + p_arg->ret = vprintf(p_arg->format, p_arg->va); } int vx_vprintf(const char* format, va_list va) { - int ret = 0; - - // need to execute single-threaded due to potential thread-data dependency - // use manual goto loop to disable compiler optimizations affceting split/join placement - - volatile int nt = vx_num_threads(); - int tid = vx_thread_id(); - - for (int i = 0; i < nt; ++i) { - ret |= __vprintf(i, tid, format, va); - } - - return ret; + // need to execute 'vprintf' single-threaded due to potential thread-data dependency + struct printf_arg_t arg; + arg.format = format; + arg.va = va; + vx_serial(__printf_callback, &arg); + return arg.ret; } int vx_printf(const char * format, ...) { - int ret = 0; - - // need to execute single-threaded due to potential thread-data dependency - // use manual goto loop to disable compiler optimizations affceting split/join placement - - volatile int nt = vx_num_threads(); - int tid = vx_thread_id(); - + int ret; va_list va; va_start(va, format); - for (int i = 0; i < nt; ++i) { - ret |= __vprintf(i, tid, format, va); - } - va_end(va); - + ret = vx_vprintf(format, va); + va_end(va); return ret; } diff --git a/runtime/src/vx_spawn.S b/runtime/src/vx_spawn.S new file mode 100644 index 00000000..cf9caa48 --- /dev/null +++ b/runtime/src/vx_spawn.S @@ -0,0 +1,36 @@ +.type vx_serial, @function +.global vx_serial +vx_serial: + addi sp, sp, -24 + sw ra, 20(sp) + sw s4, 16(sp) + sw s3, 12(sp) + sw s2, 8(sp) + sw s1, 4(sp) + sw s0, 0(sp) + mv s4, a0 # callback + mv s3, a1 # arg + csrr s2, 0xfc0 # NT + csrr s1, 0xcc0 # tid + li s0, 0 # index +label_loop: + sub t0, s0, s1 + snez t0, t0 + .insn s 0x6b, 2, x0, 0(t0) # split t0 + bnez t0, label_join + mv a0, s0 # a0 <- index + mv a1, s3 # a1 <- arg + jalr s4 # callback(index, arg) +label_join: + .insn s 0x6b, 3, x0, 0(x0) # join + addi s0, s0, 1 + blt s0, s2, label_loop + lw ra, 20(sp) + lw s4, 16(sp) + lw s3, 12(sp) + lw s2, 8(sp) + lw s1, 4(sp) + lw s0, 0(sp) + addi sp, sp, 24 + + ret \ No newline at end of file diff --git a/runtime/src/vx_spawn.c b/runtime/src/vx_spawn.c index 0fc8184d..8d077099 100644 --- a/runtime/src/vx_spawn.c +++ b/runtime/src/vx_spawn.c @@ -12,7 +12,7 @@ extern "C" { typedef struct { pfn_callback callback; - const void * args; + void * arg; int offset; int N; int R; @@ -21,7 +21,7 @@ typedef struct { typedef struct { struct context_t * ctx; pfn_workgroup_func wg_func; - const void * args; + void * arg; int offset; int N; int R; @@ -57,7 +57,7 @@ static void spawn_tasks_callback() { int offset = p_wspawn_args->offset + (wK * NT) + (tid * tK); for (int task_id = offset, N = task_id + tK; task_id < N; ++task_id) { - (p_wspawn_args->callback)(task_id, p_wspawn_args->args); + (p_wspawn_args->callback)(task_id, p_wspawn_args->arg); } vx_tmc(0 == wid); @@ -72,12 +72,12 @@ void spawn_remaining_tasks_callback(int nthreads) { wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[core_id]; int task_id = p_wspawn_args->offset + tid; - (p_wspawn_args->callback)(task_id, p_wspawn_args->args); + (p_wspawn_args->callback)(task_id, p_wspawn_args->arg); vx_tmc(1); } -void vx_spawn_tasks(int num_tasks, pfn_callback callback , const void * args) { +void vx_spawn_tasks(int num_tasks, pfn_callback callback , void * arg) { // device specs int NC = vx_num_cores(); int NW = vx_num_warps(); @@ -112,7 +112,7 @@ void vx_spawn_tasks(int num_tasks, pfn_callback callback , const void * args) { fW = 1; //-- - wspawn_tasks_args_t wspawn_args = { callback, args, core_id * tasks_per_core, fW, rW }; + wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW }; g_wspawn_args[core_id] = &wspawn_args; //-- @@ -159,7 +159,7 @@ static void spawn_kernel_callback() { int gid1 = p_wspawn_args->ctx->global_offset[1] + j; int gid2 = p_wspawn_args->ctx->global_offset[2] + k; - (p_wspawn_args->wg_func)(p_wspawn_args->args, p_wspawn_args->ctx, gid0, gid1, gid2); + (p_wspawn_args->wg_func)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2); } vx_tmc(0 == wid); @@ -188,12 +188,12 @@ static void spawn_kernel_remaining_callback(int nthreads) { int gid1 = p_wspawn_args->ctx->global_offset[1] + j; int gid2 = p_wspawn_args->ctx->global_offset[2] + k; - (p_wspawn_args->wg_func)(p_wspawn_args->args, p_wspawn_args->ctx, gid0, gid1, gid2); + (p_wspawn_args->wg_func)(p_wspawn_args->arg, p_wspawn_args->ctx, gid0, gid1, gid2); vx_tmc(1); } -void vx_spawn_kernel(struct context_t * ctx, pfn_workgroup_func wg_func, const void * args) { +void vx_spawn_kernel(struct context_t * ctx, pfn_workgroup_func wg_func, void * arg) { // total number of WGs int X = ctx->num_groups[0]; int Y = ctx->num_groups[1]; @@ -241,7 +241,7 @@ void vx_spawn_kernel(struct context_t * ctx, pfn_workgroup_func wg_func, const v char log2X = fast_log2(X); //-- - wspawn_kernel_args_t wspawn_args = { ctx, wg_func, args, core_id * wgs_per_core, fW, rW, isXYpow2, isXpow2, log2XY, log2X }; + wspawn_kernel_args_t wspawn_args = { ctx, wg_func, arg, core_id * wgs_per_core, fW, rW, isXYpow2, isXpow2, log2XY, log2X }; g_wspawn_args[core_id] = &wspawn_args; //-- diff --git a/runtime/src/vx_syscalls.c b/runtime/src/vx_syscalls.c new file mode 100644 index 00000000..37d60b8d --- /dev/null +++ b/runtime/src/vx_syscalls.c @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include + +int _close(int file) { return -1; } + +int _fstat(int file, struct stat *st) { return -1; } + +int _isatty(int file) { return 0; } + +int _lseek(int file, int ptr, int dir) { return 0; } + +int _open(const char *name, int flags, int mode) { return -1; } + +int _read(int file, char *ptr, int len) { return -1; } + +caddr_t _sbrk(int incr) { return 0; } + +int _write(int file, char *ptr, int len) { + int i; + for (i = 0; i < len; ++i) { + vx_putchar(*ptr++); + } + return len; + } + + int _kill(int pid, int sig) { return -1; } + + int _getpid() { + return vx_warp_gid(); + } + + #ifdef HAVE_INITFINI_ARRAY + +/* These magic symbols are provided by the linker. */ +extern void (*__preinit_array_start []) (void) __attribute__((weak)); +extern void (*__preinit_array_end []) (void) __attribute__((weak)); +extern void (*__init_array_start []) (void) __attribute__((weak)); +extern void (*__init_array_end []) (void) __attribute__((weak)); + +#ifdef HAVE_INIT_FINI +extern void _init (void); +#endif + +/* Iterate over all the init routines. */ +void +__libc_init_array (void) +{ + size_t count; + size_t i; + + count = __preinit_array_end - __preinit_array_start; + for (i = 0; i < count; i++) + __preinit_array_start[i] (); + +#ifdef HAVE_INIT_FINI + _init (); +#endif + + count = __init_array_end - __init_array_start; + for (i = 0; i < count; i++) + __init_array_start[i] (); +} +#endif + +#ifdef HAVE_INITFINI_ARRAY +extern void (*__fini_array_start []) (void) __attribute__((weak)); +extern void (*__fini_array_end []) (void) __attribute__((weak)); + +#ifdef HAVE_INIT_FINI +extern void _fini (void); +#endif + +/* Run all the cleanup routines. */ +void +__libc_fini_array (void) +{ + size_t count; + size_t i; + + count = __fini_array_end - __fini_array_start; + for (i = count; i > 0; i--) + __fini_array_start[i-1] (); + +#ifdef HAVE_INIT_FINI + _fini (); +#endif +} +#endif \ No newline at end of file