Vortex 2.0 changes:

+ Microarchitecture optimizations
+ 64-bit support
+ Xilinx FPGA support
+ LLVM-16 support
+ Refactoring and quality control fixes
This commit is contained in:
Blaise Tine
2023-10-19 20:51:22 -07:00
parent d69a64c32c
commit d47cccc157
1300 changed files with 247321 additions and 311189 deletions

0
kernel/.gitignore vendored Normal file
View File

66
kernel/Makefile Normal file
View File

@@ -0,0 +1,66 @@
XLEN ?= 32
ifeq ($(XLEN),64)
RISCV_TOOLCHAIN_PATH ?= /opt/riscv64-gnu-toolchain
CFLAGS += -march=rv64imafd -mabi=lp64d
else
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
CFLAGS += -march=rv32imaf -mabi=ilp32f
endif
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
LLVM_VORTEX ?= /opt/llvm-vortex
LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT)
LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH)
LLVM_CFLAGS += -Xclang -target-feature -Xclang +vortex -mllvm -vortex-branch-divergence=0
#LLVM_CFLAGS += -I$(RISCV_SYSROOT)/include/c++/9.2.0/$(RISCV_PREFIX)
#LLVM_CFLAGS += -I$(RISCV_SYSROOT)/include/c++/9.2.0
#LLVM_CFLAGS += -Wl,-L$(RISCV_TOOLCHAIN_PATH)/lib/gcc/$(RISCV_PREFIX)/9.2.0
#LLVM_CFLAGS += --rtlib=libgcc
#CC = $(LLVM_VORTEX)/bin/clang $(LLVM_CFLAGS)
#CXX = $(LLVM_VORTEX)/bin/clang++ $(LLVM_CFLAGS)
#DP = $(LLVM_VORTEX)/bin/llvm-objdump
#CP = $(LLVM_VORTEX)/bin/llvm-objcopy
CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc
CXX = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-g++
AR = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc-ar
DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump
CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy
CFLAGS += -O3 -mcmodel=medany -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections
CFLAGS += -I./include -I../hw
CFLAGS += -DXLEN_$(XLEN)
PROJECT = libvortexrt
SRCS = ./src/vx_start.S ./src/vx_syscalls.c ./src/vx_print.S ./src/tinyprintf.c ./src/vx_print.c ./src/vx_spawn.c ./src/vx_serial.S ./src/vx_perf.c
OBJS := $(addsuffix .o, $(notdir $(SRCS)))
all: $(PROJECT).a $(PROJECT).dump
$(PROJECT).dump: $(PROJECT).a
$(DP) -D $(PROJECT).a > $(PROJECT).dump
%.S.o: src/%.S
$(CC) $(CFLAGS) -c $< -o $@
%.cpp.o: src/%.cpp
$(CXX) $(CFLAGS) -c $< -o $@
%.c.o: src/%.c
$(CC) $(CFLAGS) -c $< -o $@
$(PROJECT).a: $(OBJS)
$(AR) rcs $@ $^
.depend: $(SRCS)
$(CC) $(CFLAGS) -MM $^ > .depend;
clean:
rm -rf *.a *.o *.dump .depend

View File

@@ -0,0 +1,247 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __VX_INTRINSICS_H__
#define __VX_INTRINSICS_H__
#include <VX_config.h>
#include <VX_types.h>
#if defined(__clang__)
#define __UNIFORM__ __attribute__((annotate("vortex.uniform")))
#else
#define __UNIFORM__
#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __ASSEMBLY__
#define __ASM_STR(x) x
#else
#define __ASM_STR(x) #x
#endif
#define RISCV_CUSTOM0 0x0B
#define RISCV_CUSTOM1 0x2B
#define RISCV_CUSTOM2 0x5B
#define RISCV_CUSTOM3 0x7B
#define csr_read(csr) ({ \
unsigned __r; \
__asm__ __volatile__ ("csrr %0, %1" : "=r" (__r) : "i" (csr)); \
__r; \
})
#define csr_write(csr, val) ({ \
unsigned __v = (unsigned)(val); \
if (__builtin_constant_p(val) && __v < 32) \
__asm__ __volatile__ ("csrw %0, %1" :: "i" (csr), "i" (__v)); \
else \
__asm__ __volatile__ ("csrw %0, %1" :: "i" (csr), "r" (__v)); \
})
#define csr_swap(csr, val) ({ \
unsigned __r; \
unsigned __v = (unsigned)(val); \
if (__builtin_constant_p(val) && __v < 32) \
__asm__ __volatile__ ("csrrw %0, %1, %2" : "=r" (__r) : "i" (csr), "i" (__v)); \
else \
__asm__ __volatile__ ("csrrw %0, %1, %2" : "=r" (__r) : "i" (csr), "r" (__v)); \
__r; \
})
#define csr_read_set(csr, val) ({ \
unsigned __r; \
unsigned __v = (unsigned)(val); \
if (__builtin_constant_p(val) && __v < 32) \
__asm__ __volatile__ ("csrrs %0, %1, %2" : "=r" (__r) : "i" (csr), "i" (__v)); \
else \
__asm__ __volatile__ ("csrrs %0, %1, %2" : "=r" (__r) : "i" (csr), "r" (__v)); \
__r; \
})
#define csr_set(csr, val) ({ \
unsigned __v = (unsigned)(val); \
if (__builtin_constant_p(val) && __v < 32) \
__asm__ __volatile__ ("csrs %0, %1" :: "i" (csr), "i" (__v)); \
else \
__asm__ __volatile__ ("csrs %0, %1" :: "i" (csr), "r" (__v)); \
})
#define csr_read_clear(csr, val) ({ \
unsigned __r; \
unsigned __v = (unsigned)(val); \
if (__builtin_constant_p(val) && __v < 32) \
__asm__ __volatile__ ("csrrc %0, %1, %2" : "=r" (__r) : "i" (csr), "i" (__v)); \
else \
__asm__ __volatile__ ("csrrc %0, %1, %2" : "=r" (__r) : "i" (csr), "r" (__v)); \
__r; \
})
#define csr_clear(csr, val) ({ \
unsigned __v = (unsigned)(val); \
if (__builtin_constant_p(val) && __v < 32) \
__asm__ __volatile__ ("csrc %0, %1" :: "i" (csr), "i" (__v)); \
else \
__asm__ __volatile__ ("csrc %0, %1" :: "i" (csr), "r" (__v)); \
})
// Texture load
inline unsigned vx_tex(unsigned stage, unsigned u, unsigned v, unsigned lod) {
unsigned ret;
asm volatile (".insn r4 %1, 0, %2, %0, %3, %4, %5" : "=r"(ret) : "i"(RISCV_CUSTOM1), "i"(stage), "r"(u), "r"(v), "r"(lod));
return ret;
}
// Conditional move
inline unsigned vx_cmov(unsigned c, unsigned t, unsigned f) {
unsigned ret;
asm volatile (".insn r4 %1, 1, 0, %0, %2, %3, %4" : "=r"(ret) : "i"(RISCV_CUSTOM1), "r"(c), "r"(t), "r"(f));
return ret;
}
// Rop write
inline void vx_rop(unsigned x, unsigned y, unsigned face, unsigned color, unsigned depth) {
unsigned pos_face = (y << 16) | (x << 1) | face;
asm volatile (".insn r4 %0, 1, 1, x0, %1, %2, %3" :: "i"(RISCV_CUSTOM1), "r"(pos_face), "r"(color), "r"(depth));
}
// Raster load
inline unsigned vx_rast() {
unsigned ret;
asm volatile (".insn r %1, 0, 1, %0, x0, x0" : "=r"(ret) : "i"(RISCV_CUSTOM0));
return ret;
}
// Set thread mask
inline void vx_tmc(unsigned thread_mask) {
asm volatile (".insn r %0, 0, 0, x0, %1, x0" :: "i"(RISCV_CUSTOM0), "r"(thread_mask));
}
// disable all threads in the current warp
inline void vx_tmc_zero() {
asm volatile (".insn r %0, 0, 0, x0, x0, x0" :: "i"(RISCV_CUSTOM0));
}
// switch execution to single thread zero
inline void vx_tmc_one() {
asm volatile (
"li a0, 1\n\t" // Load immediate value 1 into a0 (x10) register
".insn r %0, 0, 0, x0, a0, x0" :: "i"(RISCV_CUSTOM0)
: "a0" // Indicate that a0 (x10) is clobbered
);
}
// Set thread predicate
inline void vx_pred(unsigned condition, unsigned thread_mask) {
asm volatile (".insn r %0, 5, 0, x0, %1, %2" :: "i"(RISCV_CUSTOM0), "r"(condition), "r"(thread_mask));
}
typedef void (*vx_wspawn_pfn)();
// Spawn warps
inline void vx_wspawn(unsigned num_warps, vx_wspawn_pfn func_ptr) {
asm volatile (".insn r %0, 1, 0, x0, %1, %2" :: "i"(RISCV_CUSTOM0), "r"(num_warps), "r"(func_ptr));
}
// Split on a predicate
inline unsigned vx_split(unsigned predicate) {
unsigned ret;
asm volatile (".insn r %1, 2, 0, %0, %2, x0" : "=r"(ret) : "i"(RISCV_CUSTOM0), "r"(predicate));
return ret;
}
// Join
inline void vx_join(unsigned stack_ptr) {
asm volatile (".insn r %0, 3, 0, x0, %1, x0" :: "i"(RISCV_CUSTOM0), "r"(stack_ptr));
}
// Warp Barrier
inline void vx_barrier(unsigned barried_id, unsigned num_warps) {
asm volatile (".insn r %0, 4, 0, x0, %1, %2" :: "i"(RISCV_CUSTOM0), "r"(barried_id), "r"(num_warps));
}
// Return current thread identifier
inline int vx_thread_id() {
int ret;
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_THREAD_ID));
return ret;
}
// Return current warp identifier
inline int vx_warp_id() {
int ret;
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_WARP_ID));
return ret;
}
// Return current core identifier
inline int vx_core_id() {
int ret;
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_CORE_ID));
return ret;
}
// Return current thread mask
inline int vx_thread_mask() {
int ret;
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_THREAD_MASK));
return ret;
}
// Return number of active warps
inline int vx_active_warps() {
int ret;
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_WARP_MASK));
return ret;
}
// Return the number of threads per warp
inline int vx_num_threads() {
int ret;
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_NUM_THREADS));
return ret;
}
// Return the number of warps per core
inline int vx_num_warps() {
int ret;
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_NUM_WARPS));
return ret;
}
// Return the number of cores per cluster
inline int vx_num_cores() {
int ret;
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_NUM_CORES));
return ret;
}
// Return the hart identifier (thread id accross the processor)
inline int vx_hart_id() {
int ret;
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_MHARTID));
return ret;
}
inline void vx_fence() {
asm volatile ("fence iorw, iorw");
}
#ifdef __cplusplus
}
#endif
#endif // __VX_INTRINSICS_H__

34
kernel/include/vx_print.h Normal file
View File

@@ -0,0 +1,34 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __VX_PRINT_H__
#define __VX_PRINT_H__
#include <stdarg.h>
#ifdef __cplusplus
extern "C" {
#endif
int vx_vprintf(const char* format, va_list va);
int vx_printf(const char * format, ...);
void vx_putchar(int c);
void vx_putint(int value, int base);
void vx_putfloat(float value, int precision);
#ifdef __cplusplus
}
#endif
#endif // __VX_PRINT_H__

58
kernel/include/vx_spawn.h Normal file
View File

@@ -0,0 +1,58 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __VX_SPAWN_H__
#define __VX_SPAWN_H__
#include <stdint.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
uint32_t num_groups[3];
uint32_t global_offset[3];
uint32_t local_size[3];
char * printf_buffer;
uint32_t *printf_buffer_position;
uint32_t printf_buffer_capacity;
uint32_t work_dim;
} context_t;
typedef void (*vx_spawn_kernel_cb) (
const void * /* arg */,
const context_t * /* context */,
uint32_t /* group_x */,
uint32_t /* group_y */,
uint32_t /* group_z */
);
typedef void (*vx_spawn_tasks_cb)(int task_id, void *arg);
typedef void (*vx_serial_cb)(void *arg);
void vx_wspawn_wait();
void vx_spawn_kernel(context_t * ctx, vx_spawn_kernel_cb callback, void * arg);
void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback, void * arg);
void vx_serial(vx_serial_cb callback, void * arg);
#ifdef __cplusplus
}
#endif
#endif // __VX_SPAWN_H__

252
kernel/linker/vx_link32.ld Normal file
View File

@@ -0,0 +1,252 @@
/* Default linker script, for normal executables */
/* Copyright (C) 2014-2020 Free Software Foundation, Inc.
Copying and distribution of this script, with or without modification,
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. */
OUTPUT_FORMAT("elf32-littleriscv", "elf32-littleriscv",
"elf32-littleriscv")
OUTPUT_ARCH(riscv)
ENTRY(_start)
SECTIONS
{
. = STARTUP_ADDR;
.interp : { *(.interp) }
.note.gnu.build-id : { *(.note.gnu.build-id) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.rela.init : { *(.rela.init) }
.rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
.rela.fini : { *(.rela.fini) }
.rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
.rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
.rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
.rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
.rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
.rela.ctors : { *(.rela.ctors) }
.rela.dtors : { *(.rela.dtors) }
.rela.got : { *(.rela.got) }
.rela.sdata : { *(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*) }
.rela.sbss : { *(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*) }
.rela.sdata2 : { *(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*) }
.rela.sbss2 : { *(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*) }
.rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
.rela.iplt :
{
PROVIDE_HIDDEN (__rela_iplt_start = .);
*(.rela.iplt)
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.rela.plt :
{
*(.rela.plt)
}
.init :
{
KEEP (*(SORT_NONE(.init)))
}
.plt : { *(.plt) }
.iplt : { *(.iplt) }
.text :
{
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
*(.text.exit .text.exit.*)
*(.text.startup .text.startup.*)
*(.text.hot .text.hot.*)
*(SORT(.text.sorted.*))
*(.text .stub .text.* .gnu.linkonce.t.*)
/* .gnu.warning sections are handled specially by elf.em. */
*(.gnu.warning)
}
.fini :
{
KEEP (*(SORT_NONE(.fini)))
}
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.sdata2 :
{
*(.sdata2 .sdata2.* .gnu.linkonce.s2.*)
}
.sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) }
.gnu_extab : ONLY_IF_RO { *(.gnu_extab*) }
/* These sections are generated by the Sun/Oracle C++ compiler. */
.exception_ranges : ONLY_IF_RO { *(.exception_ranges*) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
. = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gnu_extab : ONLY_IF_RW { *(.gnu_extab) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
/* Thread Local Storage sections */
.tdata :
{
PROVIDE_HIDDEN (__tdata_start = .);
*(.tdata .tdata.* .gnu.linkonce.td.*)
PROVIDE_HIDDEN (__tdata_end = .);
}
PROVIDE (__tdata_size = SIZEOF (.tdata));
.tbss :
{
PROVIDE_HIDDEN (__tbss_start = .);
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
PROVIDE_HIDDEN (__tbss_end = .);
}
PROVIDE (__tbss_size = SIZEOF (.tbss));
PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64));
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);
KEEP (*(.preinit_array))
PROVIDE_HIDDEN (__preinit_array_end = .);
}
.init_array :
{
PROVIDE_HIDDEN (__init_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
PROVIDE_HIDDEN (__init_array_end = .);
}
.fini_array :
{
PROVIDE_HIDDEN (__fini_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
PROVIDE_HIDDEN (__fini_array_end = .);
}
.ctors :
{
/* gcc uses crtbegin.o to find the start of
the constructors, so we make sure it is
first. Because this is a wildcard, it
doesn't matter if the user does not
actually link against crtbegin.o; the
linker won't look for a file to match a
wildcard. The wildcard also means that it
doesn't matter which directory crtbegin.o
is in. */
KEEP (*crtbegin.o(.ctors))
KEEP (*crtbegin?.o(.ctors))
/* We don't want to include the .ctor section from
the crtend.o file until after the sorted ctors.
The .ctor section from the crtend file contains the
end of ctors marker and it must be last */
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
KEEP (*(SORT(.ctors.*)))
KEEP (*(.ctors))
}
.dtors :
{
KEEP (*crtbegin.o(.dtors))
KEEP (*crtbegin?.o(.dtors))
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
KEEP (*(SORT(.dtors.*)))
KEEP (*(.dtors))
}
.jcr : { KEEP (*(.jcr)) }
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
.dynamic : { *(.dynamic) }
. = DATA_SEGMENT_RELRO_END (0, .);
.data :
{
__DATA_BEGIN__ = .;
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
}
.data1 : { *(.data1) }
.got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
/* We want the small data sections together, so single-instruction offsets
can access them all, and initialized data all before uninitialized, so
we can shorten the on-disk segment size. */
.sdata :
{
__SDATA_BEGIN__ = .;
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*)
*(.sdata .sdata.* .gnu.linkonce.s.*)
}
_edata = .; PROVIDE (edata = .);
. = .;
__bss_start = .;
.sbss :
{
*(.dynsbss)
*(.sbss .sbss.* .gnu.linkonce.sb.*)
*(.scommon)
}
.bss :
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(COMMON)
/* Align here to ensure that the .bss section occupies space up to
_end. Align after .bss to ensure correct alignment even if the
.bss section disappears because there are no input sections.
FIXME: Why do we need it? When there is no .bss section, we do not
pad the .data section. */
. = ALIGN(. != 0 ? 32 / 8 : 1);
}
. = ALIGN(32 / 8);
. = SEGMENT_START("ldata-segment", .);
. = ALIGN(32 / 8);
__BSS_END__ = .;
__global_pointer = MIN(__SDATA_BEGIN__ + 0x800,
MAX(__DATA_BEGIN__ + 0x800, __BSS_END__ - 0x800));
_end = .; PROVIDE (end = .);
. = DATA_SEGMENT_END (.);
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
.stab.exclstr 0 : { *(.stab.exclstr) }
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
.gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) }
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. */
/* DWARF 1 */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2 */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2 */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/* SGI/MIPS DWARF 2 extensions */
.debug_weaknames 0 : { *(.debug_weaknames) }
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
/* DWARF 3 */
.debug_pubtypes 0 : { *(.debug_pubtypes) }
.debug_ranges 0 : { *(.debug_ranges) }
/* DWARF Extension. */
.debug_macro 0 : { *(.debug_macro) }
.debug_addr 0 : { *(.debug_addr) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
}

252
kernel/linker/vx_link64.ld Normal file
View File

@@ -0,0 +1,252 @@
/* Default linker script, for normal executables */
/* Copyright (C) 2014-2020 Free Software Foundation, Inc.
Copying and distribution of this script, with or without modification,
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved. */
OUTPUT_FORMAT("elf64-littleriscv", "elf64-littleriscv",
"elf64-littleriscv")
OUTPUT_ARCH(riscv)
ENTRY(_start)
SECTIONS
{
. = STARTUP_ADDR;
.interp : { *(.interp) }
.note.gnu.build-id : { *(.note.gnu.build-id) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
.rela.init : { *(.rela.init) }
.rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
.rela.fini : { *(.rela.fini) }
.rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
.rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
.rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
.rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
.rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
.rela.ctors : { *(.rela.ctors) }
.rela.dtors : { *(.rela.dtors) }
.rela.got : { *(.rela.got) }
.rela.sdata : { *(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*) }
.rela.sbss : { *(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*) }
.rela.sdata2 : { *(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*) }
.rela.sbss2 : { *(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*) }
.rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
.rela.iplt :
{
PROVIDE_HIDDEN (__rela_iplt_start = .);
*(.rela.iplt)
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.rela.plt :
{
*(.rela.plt)
}
.init :
{
KEEP (*(SORT_NONE(.init)))
}
.plt : { *(.plt) }
.iplt : { *(.iplt) }
.text :
{
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
*(.text.exit .text.exit.*)
*(.text.startup .text.startup.*)
*(.text.hot .text.hot.*)
*(SORT(.text.sorted.*))
*(.text .stub .text.* .gnu.linkonce.t.*)
/* .gnu.warning sections are handled specially by elf.em. */
*(.gnu.warning)
}
.fini :
{
KEEP (*(SORT_NONE(.fini)))
}
PROVIDE (__etext = .);
PROVIDE (_etext = .);
PROVIDE (etext = .);
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.sdata2 :
{
*(.sdata2 .sdata2.* .gnu.linkonce.s2.*)
}
.sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) }
.gnu_extab : ONLY_IF_RO { *(.gnu_extab*) }
/* These sections are generated by the Sun/Oracle C++ compiler. */
.exception_ranges : ONLY_IF_RO { *(.exception_ranges*) }
/* Adjust the address for the data segment. We want to adjust up to
the same address within the page on the next page up. */
. = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
/* Exception handling */
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) }
.gnu_extab : ONLY_IF_RW { *(.gnu_extab) }
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
/* Thread Local Storage sections */
.tdata :
{
PROVIDE_HIDDEN (__tdata_start = .);
*(.tdata .tdata.* .gnu.linkonce.td.*)
PROVIDE_HIDDEN (__tdata_end = .);
}
PROVIDE (__tdata_size = SIZEOF (.tdata));
.tbss :
{
PROVIDE_HIDDEN (__tbss_start = .);
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
PROVIDE_HIDDEN (__tbss_end = .);
}
PROVIDE (__tbss_size = SIZEOF (.tbss));
PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64));
.preinit_array :
{
PROVIDE_HIDDEN (__preinit_array_start = .);
KEEP (*(.preinit_array))
PROVIDE_HIDDEN (__preinit_array_end = .);
}
.init_array :
{
PROVIDE_HIDDEN (__init_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
PROVIDE_HIDDEN (__init_array_end = .);
}
.fini_array :
{
PROVIDE_HIDDEN (__fini_array_start = .);
KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
PROVIDE_HIDDEN (__fini_array_end = .);
}
.ctors :
{
/* gcc uses crtbegin.o to find the start of
the constructors, so we make sure it is
first. Because this is a wildcard, it
doesn't matter if the user does not
actually link against crtbegin.o; the
linker won't look for a file to match a
wildcard. The wildcard also means that it
doesn't matter which directory crtbegin.o
is in. */
KEEP (*crtbegin.o(.ctors))
KEEP (*crtbegin?.o(.ctors))
/* We don't want to include the .ctor section from
the crtend.o file until after the sorted ctors.
The .ctor section from the crtend file contains the
end of ctors marker and it must be last */
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
KEEP (*(SORT(.ctors.*)))
KEEP (*(.ctors))
}
.dtors :
{
KEEP (*crtbegin.o(.dtors))
KEEP (*crtbegin?.o(.dtors))
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
KEEP (*(SORT(.dtors.*)))
KEEP (*(.dtors))
}
.jcr : { KEEP (*(.jcr)) }
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
.dynamic : { *(.dynamic) }
. = DATA_SEGMENT_RELRO_END (0, .);
.data :
{
__DATA_BEGIN__ = .;
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
}
.data1 : { *(.data1) }
.got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
/* We want the small data sections together, so single-instruction offsets
can access them all, and initialized data all before uninitialized, so
we can shorten the on-disk segment size. */
.sdata :
{
__SDATA_BEGIN__ = .;
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*)
*(.sdata .sdata.* .gnu.linkonce.s.*)
}
_edata = .; PROVIDE (edata = .);
. = .;
__bss_start = .;
.sbss :
{
*(.dynsbss)
*(.sbss .sbss.* .gnu.linkonce.sb.*)
*(.scommon)
}
.bss :
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)
*(COMMON)
/* Align here to ensure that the .bss section occupies space up to
_end. Align after .bss to ensure correct alignment even if the
.bss section disappears because there are no input sections.
FIXME: Why do we need it? When there is no .bss section, we do not
pad the .data section. */
. = ALIGN(. != 0 ? 64 / 8 : 1);
}
. = ALIGN(64 / 8);
. = SEGMENT_START("ldata-segment", .);
. = ALIGN(64 / 8);
__BSS_END__ = .;
__global_pointer = MIN(__SDATA_BEGIN__ + 0x800,
MAX(__DATA_BEGIN__ + 0x800, __BSS_END__ - 0x800));
_end = .; PROVIDE (end = .);
. = DATA_SEGMENT_END (.);
/* Stabs debugging sections. */
.stab 0 : { *(.stab) }
.stabstr 0 : { *(.stabstr) }
.stab.excl 0 : { *(.stab.excl) }
.stab.exclstr 0 : { *(.stab.exclstr) }
.stab.index 0 : { *(.stab.index) }
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
.gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) }
/* DWARF debug sections.
Symbols in the DWARF debugging sections are relative to the beginning
of the section so we begin them at 0. */
/* DWARF 1 */
.debug 0 : { *(.debug) }
.line 0 : { *(.line) }
/* GNU DWARF 1 extensions */
.debug_srcinfo 0 : { *(.debug_srcinfo) }
.debug_sfnames 0 : { *(.debug_sfnames) }
/* DWARF 1.1 and DWARF 2 */
.debug_aranges 0 : { *(.debug_aranges) }
.debug_pubnames 0 : { *(.debug_pubnames) }
/* DWARF 2 */
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
.debug_abbrev 0 : { *(.debug_abbrev) }
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) }
.debug_frame 0 : { *(.debug_frame) }
.debug_str 0 : { *(.debug_str) }
.debug_loc 0 : { *(.debug_loc) }
.debug_macinfo 0 : { *(.debug_macinfo) }
/* SGI/MIPS DWARF 2 extensions */
.debug_weaknames 0 : { *(.debug_weaknames) }
.debug_funcnames 0 : { *(.debug_funcnames) }
.debug_typenames 0 : { *(.debug_typenames) }
.debug_varnames 0 : { *(.debug_varnames) }
/* DWARF 3 */
.debug_pubtypes 0 : { *(.debug_pubtypes) }
.debug_ranges 0 : { *(.debug_ranges) }
/* DWARF Extension. */
.debug_macro 0 : { *(.debug_macro) }
.debug_addr 0 : { *(.debug_addr) }
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
}

890
kernel/src/tinyprintf.c Normal file
View File

@@ -0,0 +1,890 @@
///////////////////////////////////////////////////////////////////////////////
// \author (c) Marco Paland (info@paland.com)
// 2014-2019, PALANDesign Hannover, Germany
//
// \license The MIT License (MIT)
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// \brief Tiny printf, sprintf and (v)snprintf implementation, optimized for speed on
// embedded systems with a very limited resources. These routines are thread
// safe and reentrant!
// Use this instead of the bloated standard/newlib printf cause these use
// malloc for printf (and may not be thread safe).
//
///////////////////////////////////////////////////////////////////////////////
#include <stdbool.h>
#include <stdint.h>
#include "tinyprintf.h"
#include "vx_print.h"
// define this globally (e.g. gcc -DPRINTF_INCLUDE_CONFIG_H ...) to include the
// printf_config.h header file
// default: undefined
#ifdef PRINTF_INCLUDE_CONFIG_H
#include "printf_config.h"
#endif
// 'ntoa' conversion buffer size, this must be big enough to hold one converted
// numeric number including padded zeros (dynamically created on stack)
// default: 32 byte
#ifndef PRINTF_NTOA_BUFFER_SIZE
#define PRINTF_NTOA_BUFFER_SIZE 32U
#endif
// 'ftoa' conversion buffer size, this must be big enough to hold one converted
// float number including padded zeros (dynamically created on stack)
// default: 32 byte
#ifndef PRINTF_FTOA_BUFFER_SIZE
#define PRINTF_FTOA_BUFFER_SIZE 32U
#endif
// support for the floating point type (%f)
// default: activated
#ifndef PRINTF_DISABLE_SUPPORT_FLOAT
#define PRINTF_SUPPORT_FLOAT
#endif
// support for exponential floating point notation (%e/%g)
// default: activated
#ifndef PRINTF_DISABLE_SUPPORT_EXPONENTIAL
#define PRINTF_SUPPORT_EXPONENTIAL
#endif
// define the default floating point precision
// default: 6 digits
#ifndef PRINTF_DEFAULT_FLOAT_PRECISION
#define PRINTF_DEFAULT_FLOAT_PRECISION 6U
#endif
// define the largest float suitable to print with %f
// default: 1e9
#ifndef PRINTF_MAX_FLOAT
#define PRINTF_MAX_FLOAT 1e9
#endif
// support for the long long types (%llu or %p)
// default: activated
#ifndef PRINTF_DISABLE_SUPPORT_LONG_LONG
#define PRINTF_SUPPORT_LONG_LONG
#endif
// support for the ptrdiff_t type (%t)
// ptrdiff_t is normally defined in <stddef.h> as long or long long type
// default: activated
#ifndef PRINTF_DISABLE_SUPPORT_PTRDIFF_T
#define PRINTF_SUPPORT_PTRDIFF_T
#endif
///////////////////////////////////////////////////////////////////////////////
// internal flag definitions
#define FLAGS_ZEROPAD (1U << 0U)
#define FLAGS_LEFT (1U << 1U)
#define FLAGS_PLUS (1U << 2U)
#define FLAGS_SPACE (1U << 3U)
#define FLAGS_HASH (1U << 4U)
#define FLAGS_UPPERCASE (1U << 5U)
#define FLAGS_CHAR (1U << 6U)
#define FLAGS_SHORT (1U << 7U)
#define FLAGS_LONG (1U << 8U)
#define FLAGS_LONG_LONG (1U << 9U)
#define FLAGS_PRECISION (1U << 10U)
#define FLAGS_ADAPT_EXP (1U << 11U)
// import float.h for DBL_MAX
#if defined(PRINTF_SUPPORT_FLOAT)
#include <float.h>
#endif
// output function type
typedef void (*out_fct_type)(char character, void* buffer, size_t idx, size_t maxlen);
// wrapper (used as buffer) for output function type
typedef struct {
void (*fct)(char character, void* arg);
void* arg;
} out_fct_wrap_type;
// internal buffer output
static inline void _out_buffer(char character, void* buffer, size_t idx, size_t maxlen)
{
if (idx < maxlen) {
((char*)buffer)[idx] = character;
}
}
// internal null output
static inline void _out_null(char character, void* buffer, size_t idx, size_t maxlen)
{
(void)character; (void)buffer; (void)idx; (void)maxlen;
}
// internal _putchar wrapper
static inline void _out_char(char character, void* buffer, size_t idx, size_t maxlen)
{
(void)buffer; (void)idx; (void)maxlen;
if (character) {
vx_putchar(character);
}
}
// internal output function wrapper
static inline void _out_fct(char character, void* buffer, size_t idx, size_t maxlen)
{
(void)idx; (void)maxlen;
if (character) {
// buffer is the output fct pointer
((out_fct_wrap_type*)buffer)->fct(character, ((out_fct_wrap_type*)buffer)->arg);
}
}
// internal secure strlen
// \return The length of the string (excluding the terminating 0) limited by 'maxsize'
static inline unsigned int _strnlen_s(const char* str, size_t maxsize)
{
const char* s;
for (s = str; *s && maxsize--; ++s);
return (unsigned int)(s - str);
}
// internal test if char is a digit (0-9)
// \return true if char is a digit
static inline bool _is_digit(char ch)
{
return (ch >= '0') && (ch <= '9');
}
// internal ASCII string to unsigned int conversion
static unsigned int _atoi(const char** str)
{
unsigned int i = 0U;
while (_is_digit(**str)) {
i = i * 10U + (unsigned int)(*((*str)++) - '0');
}
return i;
}
// output the specified string in reverse, taking care of any zero-padding
static size_t _out_rev(out_fct_type out, char* buffer, size_t idx, size_t maxlen, const char* buf, size_t len, unsigned int width, unsigned int flags)
{
const size_t start_idx = idx;
// pad spaces up to given width
if (!(flags & FLAGS_LEFT) && !(flags & FLAGS_ZEROPAD)) {
for (size_t i = len; i < width; i++) {
out(' ', buffer, idx++, maxlen);
}
}
// reverse string
while (len) {
out(buf[--len], buffer, idx++, maxlen);
}
// append pad spaces up to given width
if (flags & FLAGS_LEFT) {
while (idx - start_idx < width) {
out(' ', buffer, idx++, maxlen);
}
}
return idx;
}
// internal itoa format
static size_t _ntoa_format(out_fct_type out, char* buffer, size_t idx, size_t maxlen, char* buf, size_t len, bool negative, unsigned int base, unsigned int prec, unsigned int width, unsigned int flags)
{
// pad leading zeros
if (!(flags & FLAGS_LEFT)) {
if (width && (flags & FLAGS_ZEROPAD) && (negative || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) {
width--;
}
while ((len < prec) && (len < PRINTF_NTOA_BUFFER_SIZE)) {
buf[len++] = '0';
}
while ((flags & FLAGS_ZEROPAD) && (len < width) && (len < PRINTF_NTOA_BUFFER_SIZE)) {
buf[len++] = '0';
}
}
// handle hash
if (flags & FLAGS_HASH) {
if (!(flags & FLAGS_PRECISION) && len && ((len == prec) || (len == width))) {
len--;
if (len && (base == 16U)) {
len--;
}
}
if ((base == 16U) && !(flags & FLAGS_UPPERCASE) && (len < PRINTF_NTOA_BUFFER_SIZE)) {
buf[len++] = 'x';
}
else if ((base == 16U) && (flags & FLAGS_UPPERCASE) && (len < PRINTF_NTOA_BUFFER_SIZE)) {
buf[len++] = 'X';
}
else if ((base == 2U) && (len < PRINTF_NTOA_BUFFER_SIZE)) {
buf[len++] = 'b';
}
if (len < PRINTF_NTOA_BUFFER_SIZE) {
buf[len++] = '0';
}
}
if (len < PRINTF_NTOA_BUFFER_SIZE) {
if (negative) {
buf[len++] = '-';
}
else if (flags & FLAGS_PLUS) {
buf[len++] = '+'; // ignore the space if the '+' exists
}
else if (flags & FLAGS_SPACE) {
buf[len++] = ' ';
}
}
return _out_rev(out, buffer, idx, maxlen, buf, len, width, flags);
}
// internal itoa for 'long' type
static size_t _ntoa_long(out_fct_type out, char* buffer, size_t idx, size_t maxlen, unsigned long value, bool negative, unsigned long base, unsigned int prec, unsigned int width, unsigned int flags)
{
char buf[PRINTF_NTOA_BUFFER_SIZE];
size_t len = 0U;
// no hash for 0 values
if (!value) {
flags &= ~FLAGS_HASH;
}
// write if precision != 0 and value is != 0
if (!(flags & FLAGS_PRECISION) || value) {
do {
const char digit = (char)(value % base);
buf[len++] = digit < 10 ? '0' + digit : (flags & FLAGS_UPPERCASE ? 'A' : 'a') + digit - 10;
value /= base;
} while (value && (len < PRINTF_NTOA_BUFFER_SIZE));
}
return _ntoa_format(out, buffer, idx, maxlen, buf, len, negative, (unsigned int)base, prec, width, flags);
}
// internal itoa for 'long long' type
#if defined(PRINTF_SUPPORT_LONG_LONG)
static size_t _ntoa_long_long(out_fct_type out, char* buffer, size_t idx, size_t maxlen, unsigned long long value, bool negative, unsigned long long base, unsigned int prec, unsigned int width, unsigned int flags)
{
char buf[PRINTF_NTOA_BUFFER_SIZE];
size_t len = 0U;
// no hash for 0 values
if (!value) {
flags &= ~FLAGS_HASH;
}
// write if precision != 0 and value is != 0
if (!(flags & FLAGS_PRECISION) || value) {
do {
const char digit = (char)(value % base);
buf[len++] = digit < 10 ? '0' + digit : (flags & FLAGS_UPPERCASE ? 'A' : 'a') + digit - 10;
value /= base;
} while (value && (len < PRINTF_NTOA_BUFFER_SIZE));
}
return _ntoa_format(out, buffer, idx, maxlen, buf, len, negative, (unsigned int)base, prec, width, flags);
}
#endif // PRINTF_SUPPORT_LONG_LONG
#if defined(PRINTF_SUPPORT_FLOAT)
#if defined(PRINTF_SUPPORT_EXPONENTIAL)
// forward declaration so that _ftoa can switch to exp notation for values > PRINTF_MAX_FLOAT
static size_t _etoa(out_fct_type out, char* buffer, size_t idx, size_t maxlen, double value, unsigned int prec, unsigned int width, unsigned int flags);
#endif
// internal ftoa for fixed decimal floating point
static size_t _ftoa(out_fct_type out, char* buffer, size_t idx, size_t maxlen, double value, unsigned int prec, unsigned int width, unsigned int flags)
{
char buf[PRINTF_FTOA_BUFFER_SIZE];
size_t len = 0U;
double diff = 0.0;
// powers of 10
static const double pow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
// test for special values
if (value != value)
return _out_rev(out, buffer, idx, maxlen, "nan", 3, width, flags);
if (value < -DBL_MAX)
return _out_rev(out, buffer, idx, maxlen, "fni-", 4, width, flags);
if (value > DBL_MAX)
return _out_rev(out, buffer, idx, maxlen, (flags & FLAGS_PLUS) ? "fni+" : "fni", (flags & FLAGS_PLUS) ? 4U : 3U, width, flags);
// test for very large values
// standard printf behavior is to print EVERY whole number digit -- which could be 100s of characters overflowing your buffers == bad
if ((value > PRINTF_MAX_FLOAT) || (value < -PRINTF_MAX_FLOAT)) {
#if defined(PRINTF_SUPPORT_EXPONENTIAL)
return _etoa(out, buffer, idx, maxlen, value, prec, width, flags);
#else
return 0U;
#endif
}
// test for negative
bool negative = false;
if (value < 0) {
negative = true;
value = 0 - value;
}
// set default precision, if not set explicitly
if (!(flags & FLAGS_PRECISION)) {
prec = PRINTF_DEFAULT_FLOAT_PRECISION;
}
// limit precision to 9, cause a prec >= 10 can lead to overflow errors
while ((len < PRINTF_FTOA_BUFFER_SIZE) && (prec > 9U)) {
buf[len++] = '0';
prec--;
}
int whole = (int)value;
double tmp = (value - whole) * pow10[prec];
unsigned long frac = (unsigned long)tmp;
diff = tmp - frac;
if (diff > 0.5) {
++frac;
// handle rollover, e.g. case 0.99 with prec 1 is 1.0
if (frac >= pow10[prec]) {
frac = 0;
++whole;
}
}
else if (diff < 0.5) {
}
else if ((frac == 0U) || (frac & 1U)) {
// if halfway, round up if odd OR if last digit is 0
++frac;
}
if (prec == 0U) {
diff = value - (double)whole;
if ((!(diff < 0.5) || (diff > 0.5)) && (whole & 1)) {
// exactly 0.5 and ODD, then round up
// 1.5 -> 2, but 2.5 -> 2
++whole;
}
}
else {
unsigned int count = prec;
// now do fractional part, as an unsigned number
while (len < PRINTF_FTOA_BUFFER_SIZE) {
--count;
buf[len++] = (char)(48U + (frac % 10U));
if (!(frac /= 10U)) {
break;
}
}
// add extra 0s
while ((len < PRINTF_FTOA_BUFFER_SIZE) && (count-- > 0U)) {
buf[len++] = '0';
}
if (len < PRINTF_FTOA_BUFFER_SIZE) {
// add decimal
buf[len++] = '.';
}
}
// do whole part, number is reversed
while (len < PRINTF_FTOA_BUFFER_SIZE) {
buf[len++] = (char)(48 + (whole % 10));
if (!(whole /= 10)) {
break;
}
}
// pad leading zeros
if (!(flags & FLAGS_LEFT) && (flags & FLAGS_ZEROPAD)) {
if (width && (negative || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) {
width--;
}
while ((len < width) && (len < PRINTF_FTOA_BUFFER_SIZE)) {
buf[len++] = '0';
}
}
if (len < PRINTF_FTOA_BUFFER_SIZE) {
if (negative) {
buf[len++] = '-';
}
else if (flags & FLAGS_PLUS) {
buf[len++] = '+'; // ignore the space if the '+' exists
}
else if (flags & FLAGS_SPACE) {
buf[len++] = ' ';
}
}
return _out_rev(out, buffer, idx, maxlen, buf, len, width, flags);
}
#if defined(PRINTF_SUPPORT_EXPONENTIAL)
// internal ftoa variant for exponential floating-point type, contributed by Martijn Jasperse <m.jasperse@gmail.com>
static size_t _etoa(out_fct_type out, char* buffer, size_t idx, size_t maxlen, double value, unsigned int prec, unsigned int width, unsigned int flags)
{
// check for NaN and special values
if ((value != value) || (value > DBL_MAX) || (value < -DBL_MAX)) {
return _ftoa(out, buffer, idx, maxlen, value, prec, width, flags);
}
// determine the sign
const bool negative = value < 0;
if (negative) {
value = -value;
}
// default precision
if (!(flags & FLAGS_PRECISION)) {
prec = PRINTF_DEFAULT_FLOAT_PRECISION;
}
// determine the decimal exponent
// based on the algorithm by David Gay (https://www.ampl.com/netlib/fp/dtoa.c)
union {
uint64_t U;
double F;
} conv;
conv.F = value;
int exp2 = (int)((conv.U >> 52U) & 0x07FFU) - 1023; // effectively log2
conv.U = (conv.U & ((1ULL << 52U) - 1U)) | (1023ULL << 52U); // drop the exponent so conv.F is now in [1,2)
// now approximate log10 from the log2 integer part and an expansion of ln around 1.5
int expval = (int)(0.1760912590558 + exp2 * 0.301029995663981 + (conv.F - 1.5) * 0.289529654602168);
// now we want to compute 10^expval but we want to be sure it won't overflow
exp2 = (int)(expval * 3.321928094887362 + 0.5);
const double z = expval * 2.302585092994046 - exp2 * 0.6931471805599453;
const double z2 = z * z;
conv.U = (uint64_t)(exp2 + 1023) << 52U;
// compute exp(z) using continued fractions, see https://en.wikipedia.org/wiki/Exponential_function#Continued_fractions_for_ex
conv.F *= 1 + 2 * z / (2 - z + (z2 / (6 + (z2 / (10 + z2 / 14)))));
// correct for rounding errors
if (value < conv.F) {
expval--;
conv.F /= 10;
}
// the exponent format is "%+03d" and largest value is "307", so set aside 4-5 characters
unsigned int minwidth = ((expval < 100) && (expval > -100)) ? 4U : 5U;
// in "%g" mode, "prec" is the number of *significant figures* not decimals
if (flags & FLAGS_ADAPT_EXP) {
// do we want to fall-back to "%f" mode?
if ((value >= 1e-4) && (value < 1e6)) {
if ((int)prec > expval) {
prec = (unsigned)((int)prec - expval - 1);
}
else {
prec = 0;
}
flags |= FLAGS_PRECISION; // make sure _ftoa respects precision
// no characters in exponent
minwidth = 0U;
expval = 0;
}
else {
// we use one sigfig for the whole part
if ((prec > 0) && (flags & FLAGS_PRECISION)) {
--prec;
}
}
}
// will everything fit?
unsigned int fwidth = width;
if (width > minwidth) {
// we didn't fall-back so subtract the characters required for the exponent
fwidth -= minwidth;
} else {
// not enough characters, so go back to default sizing
fwidth = 0U;
}
if ((flags & FLAGS_LEFT) && minwidth) {
// if we're padding on the right, DON'T pad the floating part
fwidth = 0U;
}
// rescale the float value
if (expval) {
value /= conv.F;
}
// output the floating part
const size_t start_idx = idx;
idx = _ftoa(out, buffer, idx, maxlen, negative ? -value : value, prec, fwidth, flags & ~FLAGS_ADAPT_EXP);
// output the exponent part
if (minwidth) {
// output the exponential symbol
out((flags & FLAGS_UPPERCASE) ? 'E' : 'e', buffer, idx++, maxlen);
// output the exponent value
idx = _ntoa_long(out, buffer, idx, maxlen, (expval < 0) ? -expval : expval, expval < 0, 10, 0, minwidth-1, FLAGS_ZEROPAD | FLAGS_PLUS);
// might need to right-pad spaces
if (flags & FLAGS_LEFT) {
while (idx - start_idx < width) out(' ', buffer, idx++, maxlen);
}
}
return idx;
}
#endif // PRINTF_SUPPORT_EXPONENTIAL
#endif // PRINTF_SUPPORT_FLOAT
// internal vsnprintf
static int _vsnprintf(out_fct_type out, char* buffer, const size_t maxlen, const char* format, va_list va) {
unsigned int flags, width, precision, n;
size_t idx = 0U;
if (!buffer) {
// use null output function
out = _out_null;
}
while (*format)
{
// format specifier? %[flags][width][.precision][length]
if (*format != '%') {
// no
out(*format, buffer, idx++, maxlen);
format++;
continue;
}
else {
// yes, evaluate it
format++;
}
// evaluate flags
flags = 0U;
do {
switch (*format) {
case '0': flags |= FLAGS_ZEROPAD; format++; n = 1U; break;
case '-': flags |= FLAGS_LEFT; format++; n = 1U; break;
case '+': flags |= FLAGS_PLUS; format++; n = 1U; break;
case ' ': flags |= FLAGS_SPACE; format++; n = 1U; break;
case '#': flags |= FLAGS_HASH; format++; n = 1U; break;
default : n = 0U; break;
}
} while (n);
// evaluate width field
width = 0U;
if (_is_digit(*format)) {
width = _atoi(&format);
}
else if (*format == '*') {
const int w = va_arg(va, int);
if (w < 0) {
flags |= FLAGS_LEFT; // reverse padding
width = (unsigned int)-w;
}
else {
width = (unsigned int)w;
}
format++;
}
// evaluate precision field
precision = 0U;
if (*format == '.') {
flags |= FLAGS_PRECISION;
format++;
if (_is_digit(*format)) {
precision = _atoi(&format);
}
else if (*format == '*') {
const int prec = (int)va_arg(va, int);
precision = prec > 0 ? (unsigned int)prec : 0U;
format++;
}
}
// evaluate length field
switch (*format) {
case 'l' :
flags |= FLAGS_LONG;
format++;
if (*format == 'l') {
flags |= FLAGS_LONG_LONG;
format++;
}
break;
case 'h' :
flags |= FLAGS_SHORT;
format++;
if (*format == 'h') {
flags |= FLAGS_CHAR;
format++;
}
break;
#if defined(PRINTF_SUPPORT_PTRDIFF_T)
case 't' :
flags |= (sizeof(ptrdiff_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG);
format++;
break;
#endif
case 'j' :
flags |= (sizeof(intmax_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG);
format++;
break;
case 'z' :
flags |= (sizeof(size_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG);
format++;
break;
default :
break;
}
// evaluate specifier
switch (*format) {
case 'd' :
case 'i' :
case 'u' :
case 'x' :
case 'X' :
case 'o' :
case 'b' : {
// set the base
unsigned int base;
if (*format == 'x' || *format == 'X') {
base = 16U;
}
else if (*format == 'o') {
base = 8U;
}
else if (*format == 'b') {
base = 2U;
}
else {
base = 10U;
flags &= ~FLAGS_HASH; // no hash for dec format
}
// uppercase
if (*format == 'X') {
flags |= FLAGS_UPPERCASE;
}
// no plus or space flag for u, x, X, o, b
if ((*format != 'i') && (*format != 'd')) {
flags &= ~(FLAGS_PLUS | FLAGS_SPACE);
}
// ignore '0' flag when precision is given
if (flags & FLAGS_PRECISION) {
flags &= ~FLAGS_ZEROPAD;
}
// convert the integer
if ((*format == 'i') || (*format == 'd')) {
// signed
if (flags & FLAGS_LONG_LONG) {
#if defined(PRINTF_SUPPORT_LONG_LONG)
const long long value = va_arg(va, long long);
idx = _ntoa_long_long(out, buffer, idx, maxlen, (unsigned long long)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags);
#endif
}
else if (flags & FLAGS_LONG) {
const long value = va_arg(va, long);
idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned long)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags);
}
else {
const int value = (flags & FLAGS_CHAR) ? (char)va_arg(va, int) : (flags & FLAGS_SHORT) ? (short int)va_arg(va, int) : va_arg(va, int);
idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned int)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags);
}
}
else {
// unsigned
if (flags & FLAGS_LONG_LONG) {
#if defined(PRINTF_SUPPORT_LONG_LONG)
idx = _ntoa_long_long(out, buffer, idx, maxlen, va_arg(va, unsigned long long), false, base, precision, width, flags);
#endif
}
else if (flags & FLAGS_LONG) {
idx = _ntoa_long(out, buffer, idx, maxlen, va_arg(va, unsigned long), false, base, precision, width, flags);
}
else {
const unsigned int value = (flags & FLAGS_CHAR) ? (unsigned char)va_arg(va, unsigned int) : (flags & FLAGS_SHORT) ? (unsigned short int)va_arg(va, unsigned int) : va_arg(va, unsigned int);
idx = _ntoa_long(out, buffer, idx, maxlen, value, false, base, precision, width, flags);
}
}
format++;
break;
}
#if defined(PRINTF_SUPPORT_FLOAT)
case 'f' :
case 'F' :
if (*format == 'F') flags |= FLAGS_UPPERCASE;
idx = _ftoa(out, buffer, idx, maxlen, va_arg(va, double), precision, width, flags);
format++;
break;
#if defined(PRINTF_SUPPORT_EXPONENTIAL)
case 'e':
case 'E':
case 'g':
case 'G':
if ((*format == 'g')||(*format == 'G')) flags |= FLAGS_ADAPT_EXP;
if ((*format == 'E')||(*format == 'G')) flags |= FLAGS_UPPERCASE;
idx = _etoa(out, buffer, idx, maxlen, va_arg(va, double), precision, width, flags);
format++;
break;
#endif // PRINTF_SUPPORT_EXPONENTIAL
#endif // PRINTF_SUPPORT_FLOAT
case 'c' : {
unsigned int l = 1U;
// pre padding
if (!(flags & FLAGS_LEFT)) {
while (l++ < width) {
out(' ', buffer, idx++, maxlen);
}
}
// char output
out((char)va_arg(va, int), buffer, idx++, maxlen);
// post padding
if (flags & FLAGS_LEFT) {
while (l++ < width) {
out(' ', buffer, idx++, maxlen);
}
}
format++;
break;
}
case 's' : {
const char* p = va_arg(va, char*);
unsigned int l = _strnlen_s(p, precision ? precision : (size_t)-1);
// pre padding
if (flags & FLAGS_PRECISION) {
l = (l < precision ? l : precision);
}
if (!(flags & FLAGS_LEFT)) {
while (l++ < width) {
out(' ', buffer, idx++, maxlen);
}
}
// string output
while ((*p != 0) && (!(flags & FLAGS_PRECISION) || precision--)) {
out(*(p++), buffer, idx++, maxlen);
}
// post padding
if (flags & FLAGS_LEFT) {
while (l++ < width) {
out(' ', buffer, idx++, maxlen);
}
}
format++;
break;
}
case 'p' : {
width = sizeof(void*) * 2U;
flags |= FLAGS_ZEROPAD | FLAGS_UPPERCASE;
#if defined(PRINTF_SUPPORT_LONG_LONG)
const bool is_ll = sizeof(uintptr_t) == sizeof(long long);
if (is_ll) {
idx = _ntoa_long_long(out, buffer, idx, maxlen, (uintptr_t)va_arg(va, void*), false, 16U, precision, width, flags);
}
else {
#endif
idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned long)((uintptr_t)va_arg(va, void*)), false, 16U, precision, width, flags);
#if defined(PRINTF_SUPPORT_LONG_LONG)
}
#endif
format++;
break;
}
case '%' :
out('%', buffer, idx++, maxlen);
format++;
break;
default :
out(*format, buffer, idx++, maxlen);
format++;
break;
}
}
// termination
out((char)0, buffer, idx < maxlen ? idx : maxlen - 1U, maxlen);
// return written chars without terminating \0
return (int)idx;
}
int tiny_printf(const char* format, ...) {
va_list va;
va_start(va, format);
char buffer[1];
const int ret = _vsnprintf(_out_char, buffer, (size_t)-1, format, va);
va_end(va);
return ret;
}
int tiny_sprintf(char* buffer, const char* format, ...) {
va_list va;
va_start(va, format);
const int ret = _vsnprintf(_out_buffer, buffer, (size_t)-1, format, va);
va_end(va);
return ret;
}
int tiny_snprintf(char* buffer, size_t count, const char* format, ...) {
va_list va;
va_start(va, format);
const int ret = _vsnprintf(_out_buffer, buffer, count, format, va);
va_end(va);
return ret;
}
int tiny_vprintf(const char* format, va_list va) {
char buffer[1];
return _vsnprintf(_out_char, buffer, (size_t)-1, format, va);
}
int tiny_vsnprintf(char* buffer, size_t count, const char* format, va_list va) {
return _vsnprintf(_out_buffer, buffer, count, format, va);
}

86
kernel/src/tinyprintf.h Normal file
View File

@@ -0,0 +1,86 @@
///////////////////////////////////////////////////////////////////////////////
// \author (c) Marco Paland (info@paland.com)
// 2014-2019, PALANDesign Hannover, Germany
//
// \license The MIT License (MIT)
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// \brief Tiny printf, sprintf and snprintf implementation, optimized for speed on
// embedded systems with a very limited resources.
// Use this instead of bloated standard/newlib printf.
// These routines are thread safe and reentrant.
//
///////////////////////////////////////////////////////////////////////////////
#ifndef __TINYPRINTF_H__
#define __TINYPRINTF_H__
#include <stdarg.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Tiny printf implementation
* You have to implement _putchar if you use printf()
* To avoid conflicts with the regular printf() API it is overridden by macro defines
* and internal underscore-appended functions like printf_() are used
* \param format A string that specifies the format of the output
* \return The number of characters that are written into the array, not counting the terminating null character
*/
int tiny_printf(const char* format, ...);
/**
* Tiny sprintf implementation
* Due to security reasons (buffer overflow) YOU SHOULD CONSIDER USING (V)SNPRINTF INSTEAD!
* \param buffer A pointer to the buffer where to store the formatted string. MUST be big enough to store the output!
* \param format A string that specifies the format of the output
* \return The number of characters that are WRITTEN into the buffer, not counting the terminating null character
*/
int tiny_sprintf(char* buffer, const char* format, ...);
/**
* Tiny snprintf/vsnprintf implementation
* \param buffer A pointer to the buffer where to store the formatted string
* \param count The maximum number of characters to store in the buffer, including a terminating null character
* \param format A string that specifies the format of the output
* \param va A value identifying a variable arguments list
* \return The number of characters that COULD have been written into the buffer, not counting the terminating
* null character. A value equal or larger than count indicates truncation. Only when the returned value
* is non-negative and less than count, the string has been completely written.
*/
int tiny_snprintf(char* buffer, size_t count, const char* format, ...);
int tiny_vsnprintf(char* buffer, size_t count, const char* format, va_list va);
/**
* Tiny vprintf implementation
* \param format A string that specifies the format of the output
* \param va A value identifying a variable arguments list
* \return The number of characters that are WRITTEN into the buffer, not counting the terminating null character
*/
int tiny_vprintf(const char* format, va_list va);
#ifdef __cplusplus
}
#endif
#endif // __TINYPRINTF_H__

49
kernel/src/vx_perf.c Normal file
View File

@@ -0,0 +1,49 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <VX_config.h>
#include <VX_types.h>
#include <vx_intrinsics.h>
#include <stdint.h>
#define DUMP_CSR_4(d, s) \
csr_mem[d + 0] = csr_read(s + 0); \
csr_mem[d + 1] = csr_read(s + 1); \
csr_mem[d + 2] = csr_read(s + 2); \
csr_mem[d + 3] = csr_read(s + 3);
#define DUMP_CSR_32(d, s) \
DUMP_CSR_4(d + 0, s + 0) \
DUMP_CSR_4(d + 4, s + 4) \
DUMP_CSR_4(d + 8, s + 8) \
DUMP_CSR_4(d + 12, s + 12) \
DUMP_CSR_4(d + 16, s + 16) \
DUMP_CSR_4(d + 20, s + 20) \
DUMP_CSR_4(d + 24, s + 24) \
DUMP_CSR_4(d + 28, s + 28)
#ifdef __cplusplus
extern "C" {
#endif
void vx_perf_dump() {
int core_id = vx_core_id();
uint32_t* const csr_mem = (uint32_t*)(IO_CSR_ADDR + 64 * sizeof(uint32_t) * core_id);
DUMP_CSR_32(0, VX_CSR_MPM_BASE)
DUMP_CSR_32(32, VX_CSR_MPM_BASE_H)
}
#ifdef __cplusplus
}
#endif

32
kernel/src/vx_print.S Normal file
View File

@@ -0,0 +1,32 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <VX_config.h>
#include <VX_types.h>
.type vx_putchar, @function
.global vx_putchar
vx_putchar:
csrr t0, VX_CSR_MHARTID
andi t0, t0, %lo(IO_COUT_SIZE-1)
#if (XLEN == 64)
li t1, (IO_COUT_ADDR >> 32)
slli t1, t1, 32
li t2, (IO_COUT_ADDR & 0xffffffff)
or t1, t1, t2
#else
li t1, IO_COUT_ADDR
#endif
add t0, t0, t1
sb a0, 0(t0)
ret

107
kernel/src/vx_print.c Normal file
View File

@@ -0,0 +1,107 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vx_print.h>
#include <vx_spawn.h>
#include <vx_intrinsics.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "tinyprintf.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
const char* format;
va_list* va;
int ret;
} printf_arg_t;
typedef struct {
int value;
int base;
} putint_arg_t;
typedef struct {
float value;
int precision;
} putfloat_arg_t;
static void __putint_cb(const putint_arg_t* arg) {
char tmp[33];
float value = arg->value;
int base = arg->base;
itoa(value, tmp, base);
for (int i = 0; i < 33; ++i) {
int c = tmp[i];
if (!c)
break;
vx_putchar(c);
}
}
static void __putfloat_cb(const putfloat_arg_t* arg) {
float value = arg->value;
int precision = arg->precision;
int ipart = (int)value;
vx_putint(ipart, 10);
if (precision != 0) {
vx_putchar('.');
float frac = value - (float)ipart;
float fscaled = frac * pow(10, precision);
vx_putint((int)fscaled, 10);
}
}
static void __vprintf_cb(printf_arg_t* arg) {
arg->ret = tiny_vprintf(arg->format, *arg->va);
}
void vx_putint(int value, int base) {
putint_arg_t arg;
arg.value = value;
arg.base = base;
vx_serial((vx_serial_cb)__putint_cb, &arg);
}
void vx_putfloat(float value, int precision) {
putfloat_arg_t arg;
arg.value = value;
arg.precision = precision;
vx_serial((vx_serial_cb)__putfloat_cb, &arg);
}
int vx_vprintf(const char* format, va_list va) {
printf_arg_t arg;
arg.format = format;
arg.va = &va;
vx_serial((vx_serial_cb)__vprintf_cb, &arg);
return arg.ret;
}
int vx_printf(const char * format, ...) {
int ret;
va_list va;
va_start(va, format);
ret = vx_vprintf(format, va);
va_end(va);
return ret;
}
#ifdef __cplusplus
}
#endif

77
kernel/src/vx_serial.S Normal file
View File

@@ -0,0 +1,77 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <VX_config.h>
#include <VX_types.h>
#define RISCV_CUSTOM0 0x0B
.type vx_serial, @function
.global vx_serial
vx_serial:
#if (XLEN == 64)
addi sp, sp, -56
sd ra, 48(sp)
sd s5, 40(sp)
sd s4, 32(sp)
sd s3, 24(sp)
sd s2, 16(sp)
sd s1, 8(sp)
sd s0, 0(sp)
#else
addi sp, sp, -28
sw ra, 24(sp)
sw s5, 20(sp)
sw s4, 16(sp)
sw s3, 12(sp)
sw s2, 8(sp)
sw s1, 4(sp)
sw s0, 0(sp)
#endif
mv s4, a0 # s4 <- callback
mv s3, a1 # s3 <- arg
csrr s2, VX_CSR_NUM_THREADS # s2 <- NT
csrr s1, VX_CSR_THREAD_ID # s1 <- tid
li s0, 0 # s0 <- index
label_loop:
sub t0, s0, s1
seqz t1, t0 # (index != tid)
.insn r RISCV_CUSTOM0, 2, 0, s5, t1, x0 # split s5, t0
bnez t0, label_join
mv a0, s3 # a0 <- arg
jalr s4 # callback(arg)
label_join:
.insn r RISCV_CUSTOM0, 3, 0, x0, s5, x0 # join s5
addi s0, s0, 1 # index++
blt s0, s2, label_loop # loop back
#if (XLEN == 64)
ld ra, 48(sp)
ld s5, 40(sp)
ld s4, 32(sp)
ld s3, 24(sp)
ld s2, 16(sp)
ld s1, 8(sp)
ld s0, 0(sp)
addi sp, sp, 56
#else
lw ra, 24(sp)
lw s5, 20(sp)
lw s4, 16(sp)
lw s3, 12(sp)
lw s2, 8(sp)
lw s1, 4(sp)
lw s0, 0(sp)
addi sp, sp, 28
#endif
ret

334
kernel/src/vx_spawn.c Normal file
View File

@@ -0,0 +1,334 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vx_spawn.h>
#include <vx_intrinsics.h>
#include <inttypes.h>
#ifdef __cplusplus
extern "C" {
#endif
#define NUM_CORES_MAX 1024
#ifndef MIN
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#endif
typedef struct {
vx_spawn_tasks_cb callback;
void* arg;
int offset; // task offset
int NWs; // number of NW batches where NW=<total warps per core>.
int RWs; // number of remaining warps in the core
} wspawn_tasks_args_t;
typedef struct {
context_t * ctx;
vx_spawn_kernel_cb callback;
void* arg;
int offset; // task offset
int NWs; // number of NW batches where NW=<total warps per core>.
int RWs; // number of remaining warps in the core
char isXYpow2;
char log2XY;
char log2X;
} wspawn_kernel_args_t;
void* g_wspawn_args[NUM_CORES_MAX];
inline char is_log2(int x) {
return ((x & (x-1)) == 0);
}
inline int fast_log2(int x) {
float f = x;
return (*(int*)(&f)>>23) - 127;
}
static void __attribute__ ((noinline)) spawn_tasks_all_stub() {
int NT = vx_num_threads();
int cid = vx_core_id();
int wid = vx_warp_id();
int tid = vx_thread_id();
wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[cid];
int wK = (p_wspawn_args->NWs * wid) + MIN(p_wspawn_args->RWs, wid);
int tK = p_wspawn_args->NWs + (wid < p_wspawn_args->RWs);
int offset = p_wspawn_args->offset + (wK * NT) + (tid * tK);
vx_spawn_tasks_cb callback = p_wspawn_args->callback;
void* arg = p_wspawn_args->arg;
for (int task_id = offset, N = task_id + tK; task_id < N; ++task_id) {
callback(task_id, arg);
}
}
static void __attribute__ ((noinline)) spawn_tasks_rem_stub() {
int cid = vx_core_id();
int tid = vx_thread_id();
wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[cid];
int task_id = p_wspawn_args->offset + tid;
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
}
static void __attribute__ ((noinline)) spawn_tasks_all_cb() {
// activate all threads
vx_tmc(-1);
// call stub routine
spawn_tasks_all_stub();
// disable warp
vx_tmc_zero();
}
void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
// device specs
int NC = vx_num_cores();
int NW = vx_num_warps();
int NT = vx_num_threads();
// current core id
int core_id = vx_core_id();
if (core_id >= NUM_CORES_MAX)
return;
// calculate necessary active cores
int WT = NW * NT;
int nC = (num_tasks > WT) ? (num_tasks / WT) : 1;
int nc = MIN(nC, NC);
if (core_id >= nc)
return; // terminate extra cores
// number of tasks per core
int tasks_per_core = num_tasks / nc;
int tasks_per_core_n1 = tasks_per_core;
if (core_id == (nc-1)) {
int rem = num_tasks - (nc * tasks_per_core);
tasks_per_core_n1 += rem; // last core also executes remaining tasks
}
// number of tasks per warp
int TW = tasks_per_core_n1 / NT; // occupied warps
int rT = tasks_per_core_n1 - TW * NT; // remaining threads
int fW = 1, rW = 0;
if (TW >= NW) {
fW = TW / NW; // full warps iterations
rW = TW - fW * NW; // remaining warps
}
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW };
g_wspawn_args[core_id] = &wspawn_args;
if (TW >= 1) {
// execute callback on other warps
int nw = MIN(TW, NW);
vx_wspawn(nw, spawn_tasks_all_cb);
// activate all threads
vx_tmc(-1);
// call stub routine
spawn_tasks_all_stub();
// back to single-threaded
vx_tmc_one();
// wait for spawn warps to terminate
vx_wspawn_wait();
}
if (rT != 0) {
// adjust offset
wspawn_args.offset += (tasks_per_core_n1 - rT);
// activate remaining threads
int tmask = (1 << rT) - 1;
vx_tmc(tmask);
// call stub routine
spawn_tasks_rem_stub();
// back to single-threaded
vx_tmc_one();
}
}
///////////////////////////////////////////////////////////////////////////////
static void __attribute__ ((noinline)) spawn_kernel_all_stub() {
int NT = vx_num_threads();
int cid = vx_core_id();
int wid = vx_warp_id();
int tid = vx_thread_id();
wspawn_kernel_args_t* p_wspawn_args = (wspawn_kernel_args_t*)g_wspawn_args[cid];
int wK = (p_wspawn_args->NWs * wid) + MIN(p_wspawn_args->RWs, wid);
int tK = p_wspawn_args->NWs + (wid < p_wspawn_args->RWs);
int offset = p_wspawn_args->offset + (wK * NT) + (tid * tK);
int X = p_wspawn_args->ctx->num_groups[0];
int Y = p_wspawn_args->ctx->num_groups[1];
int XY = X * Y;
if (p_wspawn_args->isXYpow2) {
for (int wg_id = offset, N = wg_id + tK; wg_id < N; ++wg_id) {
int k = wg_id >> p_wspawn_args->log2XY;
int wg_2d = wg_id - k * XY;
int j = wg_2d >> p_wspawn_args->log2X;
int i = wg_2d - j * X;
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, i, j, k);
}
} else {
for (int wg_id = offset, N = wg_id + tK; wg_id < N; ++wg_id) {
int k = wg_id / XY;
int wg_2d = wg_id - k * XY;
int j = wg_2d / X;
int i = wg_2d - j * X;
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, i, j, k);
}
}
}
static void __attribute__ ((noinline)) spawn_kernel_rem_stub() {
int cid = vx_core_id();
int tid = vx_thread_id();
wspawn_kernel_args_t* p_wspawn_args = (wspawn_kernel_args_t*)g_wspawn_args[cid];
int wg_id = p_wspawn_args->offset + tid;
int X = p_wspawn_args->ctx->num_groups[0];
int Y = p_wspawn_args->ctx->num_groups[1];
int XY = X * Y;
if (p_wspawn_args->isXYpow2) {
int k = wg_id >> p_wspawn_args->log2XY;
int wg_2d = wg_id - k * XY;
int j = wg_2d >> p_wspawn_args->log2X;
int i = wg_2d - j * X;
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, i, j, k);
} else {
int k = wg_id / XY;
int wg_2d = wg_id - k * XY;
int j = wg_2d / X;
int i = wg_2d - j * X;
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, i, j, k);
}
}
static void __attribute__ ((noinline)) spawn_kernel_all_cb() {
// activate all threads
vx_tmc(-1);
// call stub routine
spawn_kernel_all_stub();
// disable warp
vx_tmc_zero();
}
void vx_spawn_kernel(context_t * ctx, vx_spawn_kernel_cb callback, void * arg) {
// total number of WGs
int X = ctx->num_groups[0];
int Y = ctx->num_groups[1];
int Z = ctx->num_groups[2];
int XY = X * Y;
int num_tasks = XY * Z;
// device specs
int NC = vx_num_cores();
int NW = vx_num_warps();
int NT = vx_num_threads();
// current core id
int core_id = vx_core_id();
if (core_id >= NUM_CORES_MAX)
return;
// calculate necessary active cores
int WT = NW * NT;
int nC = (num_tasks > WT) ? (num_tasks / WT) : 1;
int nc = MIN(nC, NC);
if (core_id >= nc)
return; // terminate extra cores
// number of tasks per core
int tasks_per_core = num_tasks / nc;
int tasks_per_core_n1 = tasks_per_core;
if (core_id == (nc-1)) {
int rem = num_tasks - (nc * tasks_per_core);
tasks_per_core_n1 += rem; // last core also executes remaining WGs
}
// number of tasks per warp
int TW = tasks_per_core_n1 / NT; // occupied warps
int rT = tasks_per_core_n1 - TW * NT; // remaining threads
int fW = 1, rW = 0;
if (TW >= NW) {
fW = TW / NW; // full warps iterations
rW = TW - fW * NW; // remaining warps
}
// fast path handling
char isXYpow2 = is_log2(XY);
char log2XY = fast_log2(XY);
char log2X = fast_log2(X);
wspawn_kernel_args_t wspawn_args = {
ctx, callback, arg, core_id * tasks_per_core, fW, rW, isXYpow2, log2XY, log2X
};
g_wspawn_args[core_id] = &wspawn_args;
if (TW >= 1) {
// execute callback on other warps
int nw = MIN(TW, NW);
vx_wspawn(nw, spawn_kernel_all_cb);
// activate all threads
vx_tmc(-1);
// call stub routine
asm volatile("" ::: "memory");
spawn_kernel_all_stub();
// back to single-threaded
vx_tmc_one();
// wait for spawn warps to terminate
vx_wspawn_wait();
}
if (rT != 0) {
// adjust offset
wspawn_args.offset += (tasks_per_core_n1 - rT);
// activate remaining threads
int tmask = (1 << rT) - 1;
vx_tmc(tmask);
// call stub routine
spawn_kernel_rem_stub();
// back to single-threaded
vx_tmc_one();
}
}
#ifdef __cplusplus
}
#endif

150
kernel/src/vx_start.S Normal file
View File

@@ -0,0 +1,150 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <VX_config.h>
#include <VX_types.h>
#define RISCV_CUSTOM0 0x0B
.section .init, "ax"
.global _start
.type _start, @function
_start:
# initialize per-thread registers
csrr t0, VX_CSR_NUM_WARPS # get num warps
la t1, init_regs_all
.insn r RISCV_CUSTOM0, 1, 0, x0, t0, t1 # wspawn t0, t1
li t0, -1
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
jal init_regs
li t0, 1
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
# wait for spawn warps to terminate
jal vx_wspawn_wait
# initialize TLS for all warps
csrr t0, VX_CSR_NUM_WARPS # get num warps
la t1, init_tls_all
.insn r RISCV_CUSTOM0, 1, 0, x0, t0, t1 # wspawn t0, t1
li t0, -1
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
call __init_tls
li t0, 1
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
# wait for spawn warps to terminate
jal vx_wspawn_wait
# clear BSS segment
la a0, _edata
la a2, _end
sub a2, a2, a0
li a1, 0
call memset
# initialize trap vector
# la t0, trap_entry
# csrw mtvec, t0
# register global termination functions
la a0, __libc_fini_array
call atexit
# run global initialization functions
call __libc_init_array
# call main program routine
call main
# call exit routine
tail exit
.size _start, .-_start
.section .text
.type _exit, @function
.global _exit
_exit:
mv s0, a0
call vx_perf_dump
mv gp, s0
.insn r RISCV_CUSTOM0, 0, 0, x0, x0, x0 # tmc x0
.section .text
.type init_regs, @function
.local init_regs
init_regs:
# set global pointer register
.option push
.option norelax
la gp, __global_pointer
.option pop
# set stack pointer register
#if (XLEN == 64)
li t0, (STACK_BASE_ADDR >> 32)
slli t0, t0, 32
li sp, (STACK_BASE_ADDR & 0xffffffff)
or sp, sp, t0
#else
li sp, STACK_BASE_ADDR # load stack base address
#endif
csrr t0, VX_CSR_MHARTID
sll t1, t0, STACK_LOG2_SIZE
sub sp, sp, t1
# set thread pointer register
# use address space after BSS region
# ensure cache line alignment
la t1, __tcb_aligned_size
mul t0, t0, t1
la tp, _end + 63
add tp, tp, t0
and tp, tp, -64
ret
.section .text
.type init_regs_all, @function
.local init_regs_all
init_regs_all:
li t0, -1
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
jal init_regs
.insn r RISCV_CUSTOM0, 0, 0, x0, x0, x0 # tmc x0
ret
.section .text
.type init_tls_all, @function
.local init_tls_all
init_tls_all:
li t0, -1
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
call __init_tls
.insn r RISCV_CUSTOM0, 0, 0, x0, x0, x0 # tmc x0
ret
.section .text
.type vx_wspawn_wait, @function
.global vx_wspawn_wait
vx_wspawn_wait:
csrr t0, VX_CSR_WARP_MASK
li t1, 1
bne t0, t1, vx_wspawn_wait
ret
.section .data
.global __dso_handle
.weak __dso_handle
__dso_handle:
.long 0

124
kernel/src/vx_syscalls.c Normal file
View File

@@ -0,0 +1,124 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sys/stat.h>
#include <newlib.h>
#include <unistd.h>
#include <vx_intrinsics.h>
#include <vx_print.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
int _close(int file) { return -1; }
int _fstat(int file, struct stat *st) { return -1; }
int _isatty(int file) { return 0; }
int _lseek(int file, int ptr, int dir) { return 0; }
int _open(const char *name, int flags, int mode) { return -1; }
int _read(int file, char *ptr, int len) { return -1; }
caddr_t _sbrk(int incr) {
__asm__ __volatile__("ebreak");
return 0;
}
int _write(int file, char *ptr, int len) {
int i;
for (i = 0; i < len; ++i) {
vx_putchar(*ptr++);
}
return len;
}
int _kill(int pid, int sig) { return -1; }
int _getpid() {
return vx_hart_id();
}
void __init_tls(void) {
extern char __tdata_start[];
extern char __tbss_offset[];
extern char __tdata_size[];
extern char __tbss_size[];
// TLS memory initialization
register char *__thread_self __asm__ ("tp");
memcpy(__thread_self, __tdata_start, (size_t)__tdata_size);
memset(__thread_self + (size_t)__tbss_offset, 0, (size_t)__tbss_size);
}
#ifdef HAVE_INITFINI_ARRAY
/* These magic symbols are provided by the linker. */
extern void (*__preinit_array_start []) (void) __attribute__((weak));
extern void (*__preinit_array_end []) (void) __attribute__((weak));
extern void (*__init_array_start []) (void) __attribute__((weak));
extern void (*__init_array_end []) (void) __attribute__((weak));
#ifdef HAVE_INIT_FINI
extern void _init (void);
#endif
/* Iterate over all the init routines. */
void __libc_init_array (void) {
size_t count;
size_t i;
count = __preinit_array_end - __preinit_array_start;
for (i = 0; i < count; i++)
__preinit_array_start[i] ();
#ifdef HAVE_INIT_FINI
_init ();
#endif
count = __init_array_end - __init_array_start;
for (i = 0; i < count; i++)
__init_array_start[i] ();
}
#endif
#ifdef HAVE_INITFINI_ARRAY
extern void (*__fini_array_start []) (void) __attribute__((weak));
extern void (*__fini_array_end []) (void) __attribute__((weak));
#ifdef HAVE_INIT_FINI
extern void _fini (void);
#endif
/* Run all the cleanup routines. */
void __libc_fini_array (void) {
size_t count;
size_t i;
count = __fini_array_end - __fini_array_start;
for (i = count; i > 0; i--)
__fini_array_start[i-1] ();
#ifdef HAVE_INIT_FINI
_fini ();
#endif
}
#endif
#ifdef __cplusplus
}
#endif