Vortex 2.0 changes:
+ Microarchitecture optimizations + 64-bit support + Xilinx FPGA support + LLVM-16 support + Refactoring and quality control fixes
This commit is contained in:
0
kernel/.gitignore
vendored
Normal file
0
kernel/.gitignore
vendored
Normal file
66
kernel/Makefile
Normal file
66
kernel/Makefile
Normal file
@@ -0,0 +1,66 @@
|
||||
XLEN ?= 32
|
||||
|
||||
ifeq ($(XLEN),64)
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv64-gnu-toolchain
|
||||
CFLAGS += -march=rv64imafd -mabi=lp64d
|
||||
else
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
CFLAGS += -march=rv32imaf -mabi=ilp32f
|
||||
endif
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
|
||||
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
|
||||
|
||||
LLVM_VORTEX ?= /opt/llvm-vortex
|
||||
|
||||
LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT)
|
||||
LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH)
|
||||
LLVM_CFLAGS += -Xclang -target-feature -Xclang +vortex -mllvm -vortex-branch-divergence=0
|
||||
#LLVM_CFLAGS += -I$(RISCV_SYSROOT)/include/c++/9.2.0/$(RISCV_PREFIX)
|
||||
#LLVM_CFLAGS += -I$(RISCV_SYSROOT)/include/c++/9.2.0
|
||||
#LLVM_CFLAGS += -Wl,-L$(RISCV_TOOLCHAIN_PATH)/lib/gcc/$(RISCV_PREFIX)/9.2.0
|
||||
#LLVM_CFLAGS += --rtlib=libgcc
|
||||
|
||||
#CC = $(LLVM_VORTEX)/bin/clang $(LLVM_CFLAGS)
|
||||
#CXX = $(LLVM_VORTEX)/bin/clang++ $(LLVM_CFLAGS)
|
||||
#DP = $(LLVM_VORTEX)/bin/llvm-objdump
|
||||
#CP = $(LLVM_VORTEX)/bin/llvm-objcopy
|
||||
|
||||
CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc
|
||||
CXX = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-g++
|
||||
AR = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc-ar
|
||||
DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump
|
||||
CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy
|
||||
|
||||
CFLAGS += -O3 -mcmodel=medany -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections
|
||||
CFLAGS += -I./include -I../hw
|
||||
CFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
PROJECT = libvortexrt
|
||||
|
||||
SRCS = ./src/vx_start.S ./src/vx_syscalls.c ./src/vx_print.S ./src/tinyprintf.c ./src/vx_print.c ./src/vx_spawn.c ./src/vx_serial.S ./src/vx_perf.c
|
||||
|
||||
OBJS := $(addsuffix .o, $(notdir $(SRCS)))
|
||||
|
||||
all: $(PROJECT).a $(PROJECT).dump
|
||||
|
||||
$(PROJECT).dump: $(PROJECT).a
|
||||
$(DP) -D $(PROJECT).a > $(PROJECT).dump
|
||||
|
||||
%.S.o: src/%.S
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
%.cpp.o: src/%.cpp
|
||||
$(CXX) $(CFLAGS) -c $< -o $@
|
||||
|
||||
%.c.o: src/%.c
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
$(PROJECT).a: $(OBJS)
|
||||
$(AR) rcs $@ $^
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CC) $(CFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf *.a *.o *.dump .depend
|
||||
247
kernel/include/vx_intrinsics.h
Normal file
247
kernel/include/vx_intrinsics.h
Normal file
@@ -0,0 +1,247 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __VX_INTRINSICS_H__
|
||||
#define __VX_INTRINSICS_H__
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <VX_types.h>
|
||||
|
||||
#if defined(__clang__)
|
||||
#define __UNIFORM__ __attribute__((annotate("vortex.uniform")))
|
||||
#else
|
||||
#define __UNIFORM__
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#define __ASM_STR(x) x
|
||||
#else
|
||||
#define __ASM_STR(x) #x
|
||||
#endif
|
||||
|
||||
#define RISCV_CUSTOM0 0x0B
|
||||
#define RISCV_CUSTOM1 0x2B
|
||||
#define RISCV_CUSTOM2 0x5B
|
||||
#define RISCV_CUSTOM3 0x7B
|
||||
|
||||
#define csr_read(csr) ({ \
|
||||
unsigned __r; \
|
||||
__asm__ __volatile__ ("csrr %0, %1" : "=r" (__r) : "i" (csr)); \
|
||||
__r; \
|
||||
})
|
||||
|
||||
#define csr_write(csr, val) ({ \
|
||||
unsigned __v = (unsigned)(val); \
|
||||
if (__builtin_constant_p(val) && __v < 32) \
|
||||
__asm__ __volatile__ ("csrw %0, %1" :: "i" (csr), "i" (__v)); \
|
||||
else \
|
||||
__asm__ __volatile__ ("csrw %0, %1" :: "i" (csr), "r" (__v)); \
|
||||
})
|
||||
|
||||
#define csr_swap(csr, val) ({ \
|
||||
unsigned __r; \
|
||||
unsigned __v = (unsigned)(val); \
|
||||
if (__builtin_constant_p(val) && __v < 32) \
|
||||
__asm__ __volatile__ ("csrrw %0, %1, %2" : "=r" (__r) : "i" (csr), "i" (__v)); \
|
||||
else \
|
||||
__asm__ __volatile__ ("csrrw %0, %1, %2" : "=r" (__r) : "i" (csr), "r" (__v)); \
|
||||
__r; \
|
||||
})
|
||||
|
||||
#define csr_read_set(csr, val) ({ \
|
||||
unsigned __r; \
|
||||
unsigned __v = (unsigned)(val); \
|
||||
if (__builtin_constant_p(val) && __v < 32) \
|
||||
__asm__ __volatile__ ("csrrs %0, %1, %2" : "=r" (__r) : "i" (csr), "i" (__v)); \
|
||||
else \
|
||||
__asm__ __volatile__ ("csrrs %0, %1, %2" : "=r" (__r) : "i" (csr), "r" (__v)); \
|
||||
__r; \
|
||||
})
|
||||
|
||||
#define csr_set(csr, val) ({ \
|
||||
unsigned __v = (unsigned)(val); \
|
||||
if (__builtin_constant_p(val) && __v < 32) \
|
||||
__asm__ __volatile__ ("csrs %0, %1" :: "i" (csr), "i" (__v)); \
|
||||
else \
|
||||
__asm__ __volatile__ ("csrs %0, %1" :: "i" (csr), "r" (__v)); \
|
||||
})
|
||||
|
||||
#define csr_read_clear(csr, val) ({ \
|
||||
unsigned __r; \
|
||||
unsigned __v = (unsigned)(val); \
|
||||
if (__builtin_constant_p(val) && __v < 32) \
|
||||
__asm__ __volatile__ ("csrrc %0, %1, %2" : "=r" (__r) : "i" (csr), "i" (__v)); \
|
||||
else \
|
||||
__asm__ __volatile__ ("csrrc %0, %1, %2" : "=r" (__r) : "i" (csr), "r" (__v)); \
|
||||
__r; \
|
||||
})
|
||||
|
||||
#define csr_clear(csr, val) ({ \
|
||||
unsigned __v = (unsigned)(val); \
|
||||
if (__builtin_constant_p(val) && __v < 32) \
|
||||
__asm__ __volatile__ ("csrc %0, %1" :: "i" (csr), "i" (__v)); \
|
||||
else \
|
||||
__asm__ __volatile__ ("csrc %0, %1" :: "i" (csr), "r" (__v)); \
|
||||
})
|
||||
|
||||
// Texture load
|
||||
inline unsigned vx_tex(unsigned stage, unsigned u, unsigned v, unsigned lod) {
|
||||
unsigned ret;
|
||||
asm volatile (".insn r4 %1, 0, %2, %0, %3, %4, %5" : "=r"(ret) : "i"(RISCV_CUSTOM1), "i"(stage), "r"(u), "r"(v), "r"(lod));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Conditional move
|
||||
inline unsigned vx_cmov(unsigned c, unsigned t, unsigned f) {
|
||||
unsigned ret;
|
||||
asm volatile (".insn r4 %1, 1, 0, %0, %2, %3, %4" : "=r"(ret) : "i"(RISCV_CUSTOM1), "r"(c), "r"(t), "r"(f));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Rop write
|
||||
inline void vx_rop(unsigned x, unsigned y, unsigned face, unsigned color, unsigned depth) {
|
||||
unsigned pos_face = (y << 16) | (x << 1) | face;
|
||||
asm volatile (".insn r4 %0, 1, 1, x0, %1, %2, %3" :: "i"(RISCV_CUSTOM1), "r"(pos_face), "r"(color), "r"(depth));
|
||||
}
|
||||
|
||||
// Raster load
|
||||
inline unsigned vx_rast() {
|
||||
unsigned ret;
|
||||
asm volatile (".insn r %1, 0, 1, %0, x0, x0" : "=r"(ret) : "i"(RISCV_CUSTOM0));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Set thread mask
|
||||
inline void vx_tmc(unsigned thread_mask) {
|
||||
asm volatile (".insn r %0, 0, 0, x0, %1, x0" :: "i"(RISCV_CUSTOM0), "r"(thread_mask));
|
||||
}
|
||||
|
||||
// disable all threads in the current warp
|
||||
inline void vx_tmc_zero() {
|
||||
asm volatile (".insn r %0, 0, 0, x0, x0, x0" :: "i"(RISCV_CUSTOM0));
|
||||
}
|
||||
|
||||
// switch execution to single thread zero
|
||||
inline void vx_tmc_one() {
|
||||
asm volatile (
|
||||
"li a0, 1\n\t" // Load immediate value 1 into a0 (x10) register
|
||||
".insn r %0, 0, 0, x0, a0, x0" :: "i"(RISCV_CUSTOM0)
|
||||
: "a0" // Indicate that a0 (x10) is clobbered
|
||||
);
|
||||
}
|
||||
|
||||
// Set thread predicate
|
||||
inline void vx_pred(unsigned condition, unsigned thread_mask) {
|
||||
asm volatile (".insn r %0, 5, 0, x0, %1, %2" :: "i"(RISCV_CUSTOM0), "r"(condition), "r"(thread_mask));
|
||||
}
|
||||
|
||||
typedef void (*vx_wspawn_pfn)();
|
||||
|
||||
// Spawn warps
|
||||
inline void vx_wspawn(unsigned num_warps, vx_wspawn_pfn func_ptr) {
|
||||
asm volatile (".insn r %0, 1, 0, x0, %1, %2" :: "i"(RISCV_CUSTOM0), "r"(num_warps), "r"(func_ptr));
|
||||
}
|
||||
|
||||
// Split on a predicate
|
||||
inline unsigned vx_split(unsigned predicate) {
|
||||
unsigned ret;
|
||||
asm volatile (".insn r %1, 2, 0, %0, %2, x0" : "=r"(ret) : "i"(RISCV_CUSTOM0), "r"(predicate));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Join
|
||||
inline void vx_join(unsigned stack_ptr) {
|
||||
asm volatile (".insn r %0, 3, 0, x0, %1, x0" :: "i"(RISCV_CUSTOM0), "r"(stack_ptr));
|
||||
}
|
||||
|
||||
// Warp Barrier
|
||||
inline void vx_barrier(unsigned barried_id, unsigned num_warps) {
|
||||
asm volatile (".insn r %0, 4, 0, x0, %1, %2" :: "i"(RISCV_CUSTOM0), "r"(barried_id), "r"(num_warps));
|
||||
}
|
||||
|
||||
// Return current thread identifier
|
||||
inline int vx_thread_id() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_THREAD_ID));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Return current warp identifier
|
||||
inline int vx_warp_id() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_WARP_ID));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Return current core identifier
|
||||
inline int vx_core_id() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_CORE_ID));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Return current thread mask
|
||||
inline int vx_thread_mask() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_THREAD_MASK));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Return number of active warps
|
||||
inline int vx_active_warps() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_WARP_MASK));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Return the number of threads per warp
|
||||
inline int vx_num_threads() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_NUM_THREADS));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Return the number of warps per core
|
||||
inline int vx_num_warps() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_NUM_WARPS));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Return the number of cores per cluster
|
||||
inline int vx_num_cores() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_NUM_CORES));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Return the hart identifier (thread id accross the processor)
|
||||
inline int vx_hart_id() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_MHARTID));
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline void vx_fence() {
|
||||
asm volatile ("fence iorw, iorw");
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __VX_INTRINSICS_H__
|
||||
34
kernel/include/vx_print.h
Normal file
34
kernel/include/vx_print.h
Normal file
@@ -0,0 +1,34 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __VX_PRINT_H__
|
||||
#define __VX_PRINT_H__
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int vx_vprintf(const char* format, va_list va);
|
||||
int vx_printf(const char * format, ...);
|
||||
|
||||
void vx_putchar(int c);
|
||||
void vx_putint(int value, int base);
|
||||
void vx_putfloat(float value, int precision);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __VX_PRINT_H__
|
||||
58
kernel/include/vx_spawn.h
Normal file
58
kernel/include/vx_spawn.h
Normal file
@@ -0,0 +1,58 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef __VX_SPAWN_H__
|
||||
#define __VX_SPAWN_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint32_t num_groups[3];
|
||||
uint32_t global_offset[3];
|
||||
uint32_t local_size[3];
|
||||
char * printf_buffer;
|
||||
uint32_t *printf_buffer_position;
|
||||
uint32_t printf_buffer_capacity;
|
||||
uint32_t work_dim;
|
||||
} context_t;
|
||||
|
||||
typedef void (*vx_spawn_kernel_cb) (
|
||||
const void * /* arg */,
|
||||
const context_t * /* context */,
|
||||
uint32_t /* group_x */,
|
||||
uint32_t /* group_y */,
|
||||
uint32_t /* group_z */
|
||||
);
|
||||
|
||||
typedef void (*vx_spawn_tasks_cb)(int task_id, void *arg);
|
||||
|
||||
typedef void (*vx_serial_cb)(void *arg);
|
||||
|
||||
void vx_wspawn_wait();
|
||||
|
||||
void vx_spawn_kernel(context_t * ctx, vx_spawn_kernel_cb callback, void * arg);
|
||||
|
||||
void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback, void * arg);
|
||||
|
||||
void vx_serial(vx_serial_cb callback, void * arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __VX_SPAWN_H__
|
||||
252
kernel/linker/vx_link32.ld
Normal file
252
kernel/linker/vx_link32.ld
Normal file
@@ -0,0 +1,252 @@
|
||||
/* Default linker script, for normal executables */
|
||||
/* Copyright (C) 2014-2020 Free Software Foundation, Inc.
|
||||
Copying and distribution of this script, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved. */
|
||||
OUTPUT_FORMAT("elf32-littleriscv", "elf32-littleriscv",
|
||||
"elf32-littleriscv")
|
||||
OUTPUT_ARCH(riscv)
|
||||
ENTRY(_start)
|
||||
SECTIONS
|
||||
{
|
||||
. = STARTUP_ADDR;
|
||||
.interp : { *(.interp) }
|
||||
.note.gnu.build-id : { *(.note.gnu.build-id) }
|
||||
.hash : { *(.hash) }
|
||||
.gnu.hash : { *(.gnu.hash) }
|
||||
.dynsym : { *(.dynsym) }
|
||||
.dynstr : { *(.dynstr) }
|
||||
.gnu.version : { *(.gnu.version) }
|
||||
.gnu.version_d : { *(.gnu.version_d) }
|
||||
.gnu.version_r : { *(.gnu.version_r) }
|
||||
.rela.init : { *(.rela.init) }
|
||||
.rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
|
||||
.rela.fini : { *(.rela.fini) }
|
||||
.rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
|
||||
.rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
|
||||
.rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
|
||||
.rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
|
||||
.rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
|
||||
.rela.ctors : { *(.rela.ctors) }
|
||||
.rela.dtors : { *(.rela.dtors) }
|
||||
.rela.got : { *(.rela.got) }
|
||||
.rela.sdata : { *(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*) }
|
||||
.rela.sbss : { *(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*) }
|
||||
.rela.sdata2 : { *(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*) }
|
||||
.rela.sbss2 : { *(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*) }
|
||||
.rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
|
||||
.rela.iplt :
|
||||
{
|
||||
PROVIDE_HIDDEN (__rela_iplt_start = .);
|
||||
*(.rela.iplt)
|
||||
PROVIDE_HIDDEN (__rela_iplt_end = .);
|
||||
}
|
||||
.rela.plt :
|
||||
{
|
||||
*(.rela.plt)
|
||||
}
|
||||
.init :
|
||||
{
|
||||
KEEP (*(SORT_NONE(.init)))
|
||||
}
|
||||
.plt : { *(.plt) }
|
||||
.iplt : { *(.iplt) }
|
||||
.text :
|
||||
{
|
||||
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
|
||||
*(.text.exit .text.exit.*)
|
||||
*(.text.startup .text.startup.*)
|
||||
*(.text.hot .text.hot.*)
|
||||
*(SORT(.text.sorted.*))
|
||||
*(.text .stub .text.* .gnu.linkonce.t.*)
|
||||
/* .gnu.warning sections are handled specially by elf.em. */
|
||||
*(.gnu.warning)
|
||||
}
|
||||
.fini :
|
||||
{
|
||||
KEEP (*(SORT_NONE(.fini)))
|
||||
}
|
||||
PROVIDE (__etext = .);
|
||||
PROVIDE (_etext = .);
|
||||
PROVIDE (etext = .);
|
||||
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
|
||||
.rodata1 : { *(.rodata1) }
|
||||
.sdata2 :
|
||||
{
|
||||
*(.sdata2 .sdata2.* .gnu.linkonce.s2.*)
|
||||
}
|
||||
.sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
|
||||
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) }
|
||||
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) }
|
||||
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) }
|
||||
.gnu_extab : ONLY_IF_RO { *(.gnu_extab*) }
|
||||
/* These sections are generated by the Sun/Oracle C++ compiler. */
|
||||
.exception_ranges : ONLY_IF_RO { *(.exception_ranges*) }
|
||||
/* Adjust the address for the data segment. We want to adjust up to
|
||||
the same address within the page on the next page up. */
|
||||
. = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
|
||||
/* Exception handling */
|
||||
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) }
|
||||
.gnu_extab : ONLY_IF_RW { *(.gnu_extab) }
|
||||
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
|
||||
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
|
||||
/* Thread Local Storage sections */
|
||||
.tdata :
|
||||
{
|
||||
PROVIDE_HIDDEN (__tdata_start = .);
|
||||
*(.tdata .tdata.* .gnu.linkonce.td.*)
|
||||
PROVIDE_HIDDEN (__tdata_end = .);
|
||||
}
|
||||
PROVIDE (__tdata_size = SIZEOF (.tdata));
|
||||
.tbss :
|
||||
{
|
||||
PROVIDE_HIDDEN (__tbss_start = .);
|
||||
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
|
||||
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
|
||||
PROVIDE_HIDDEN (__tbss_end = .);
|
||||
}
|
||||
PROVIDE (__tbss_size = SIZEOF (.tbss));
|
||||
PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64));
|
||||
.preinit_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__preinit_array_start = .);
|
||||
KEEP (*(.preinit_array))
|
||||
PROVIDE_HIDDEN (__preinit_array_end = .);
|
||||
}
|
||||
.init_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__init_array_start = .);
|
||||
KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
|
||||
KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
|
||||
PROVIDE_HIDDEN (__init_array_end = .);
|
||||
}
|
||||
.fini_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__fini_array_start = .);
|
||||
KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
|
||||
KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
|
||||
PROVIDE_HIDDEN (__fini_array_end = .);
|
||||
}
|
||||
.ctors :
|
||||
{
|
||||
/* gcc uses crtbegin.o to find the start of
|
||||
the constructors, so we make sure it is
|
||||
first. Because this is a wildcard, it
|
||||
doesn't matter if the user does not
|
||||
actually link against crtbegin.o; the
|
||||
linker won't look for a file to match a
|
||||
wildcard. The wildcard also means that it
|
||||
doesn't matter which directory crtbegin.o
|
||||
is in. */
|
||||
KEEP (*crtbegin.o(.ctors))
|
||||
KEEP (*crtbegin?.o(.ctors))
|
||||
/* We don't want to include the .ctor section from
|
||||
the crtend.o file until after the sorted ctors.
|
||||
The .ctor section from the crtend file contains the
|
||||
end of ctors marker and it must be last */
|
||||
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
|
||||
KEEP (*(SORT(.ctors.*)))
|
||||
KEEP (*(.ctors))
|
||||
}
|
||||
.dtors :
|
||||
{
|
||||
KEEP (*crtbegin.o(.dtors))
|
||||
KEEP (*crtbegin?.o(.dtors))
|
||||
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
|
||||
KEEP (*(SORT(.dtors.*)))
|
||||
KEEP (*(.dtors))
|
||||
}
|
||||
.jcr : { KEEP (*(.jcr)) }
|
||||
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
|
||||
.dynamic : { *(.dynamic) }
|
||||
. = DATA_SEGMENT_RELRO_END (0, .);
|
||||
.data :
|
||||
{
|
||||
__DATA_BEGIN__ = .;
|
||||
*(.data .data.* .gnu.linkonce.d.*)
|
||||
SORT(CONSTRUCTORS)
|
||||
}
|
||||
.data1 : { *(.data1) }
|
||||
.got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
|
||||
/* We want the small data sections together, so single-instruction offsets
|
||||
can access them all, and initialized data all before uninitialized, so
|
||||
we can shorten the on-disk segment size. */
|
||||
.sdata :
|
||||
{
|
||||
__SDATA_BEGIN__ = .;
|
||||
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*)
|
||||
*(.sdata .sdata.* .gnu.linkonce.s.*)
|
||||
}
|
||||
_edata = .; PROVIDE (edata = .);
|
||||
. = .;
|
||||
__bss_start = .;
|
||||
.sbss :
|
||||
{
|
||||
*(.dynsbss)
|
||||
*(.sbss .sbss.* .gnu.linkonce.sb.*)
|
||||
*(.scommon)
|
||||
}
|
||||
.bss :
|
||||
{
|
||||
*(.dynbss)
|
||||
*(.bss .bss.* .gnu.linkonce.b.*)
|
||||
*(COMMON)
|
||||
/* Align here to ensure that the .bss section occupies space up to
|
||||
_end. Align after .bss to ensure correct alignment even if the
|
||||
.bss section disappears because there are no input sections.
|
||||
FIXME: Why do we need it? When there is no .bss section, we do not
|
||||
pad the .data section. */
|
||||
. = ALIGN(. != 0 ? 32 / 8 : 1);
|
||||
}
|
||||
. = ALIGN(32 / 8);
|
||||
. = SEGMENT_START("ldata-segment", .);
|
||||
. = ALIGN(32 / 8);
|
||||
__BSS_END__ = .;
|
||||
__global_pointer = MIN(__SDATA_BEGIN__ + 0x800,
|
||||
MAX(__DATA_BEGIN__ + 0x800, __BSS_END__ - 0x800));
|
||||
_end = .; PROVIDE (end = .);
|
||||
. = DATA_SEGMENT_END (.);
|
||||
/* Stabs debugging sections. */
|
||||
.stab 0 : { *(.stab) }
|
||||
.stabstr 0 : { *(.stabstr) }
|
||||
.stab.excl 0 : { *(.stab.excl) }
|
||||
.stab.exclstr 0 : { *(.stab.exclstr) }
|
||||
.stab.index 0 : { *(.stab.index) }
|
||||
.stab.indexstr 0 : { *(.stab.indexstr) }
|
||||
.comment 0 : { *(.comment) }
|
||||
.gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) }
|
||||
/* DWARF debug sections.
|
||||
Symbols in the DWARF debugging sections are relative to the beginning
|
||||
of the section so we begin them at 0. */
|
||||
/* DWARF 1 */
|
||||
.debug 0 : { *(.debug) }
|
||||
.line 0 : { *(.line) }
|
||||
/* GNU DWARF 1 extensions */
|
||||
.debug_srcinfo 0 : { *(.debug_srcinfo) }
|
||||
.debug_sfnames 0 : { *(.debug_sfnames) }
|
||||
/* DWARF 1.1 and DWARF 2 */
|
||||
.debug_aranges 0 : { *(.debug_aranges) }
|
||||
.debug_pubnames 0 : { *(.debug_pubnames) }
|
||||
/* DWARF 2 */
|
||||
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
|
||||
.debug_abbrev 0 : { *(.debug_abbrev) }
|
||||
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) }
|
||||
.debug_frame 0 : { *(.debug_frame) }
|
||||
.debug_str 0 : { *(.debug_str) }
|
||||
.debug_loc 0 : { *(.debug_loc) }
|
||||
.debug_macinfo 0 : { *(.debug_macinfo) }
|
||||
/* SGI/MIPS DWARF 2 extensions */
|
||||
.debug_weaknames 0 : { *(.debug_weaknames) }
|
||||
.debug_funcnames 0 : { *(.debug_funcnames) }
|
||||
.debug_typenames 0 : { *(.debug_typenames) }
|
||||
.debug_varnames 0 : { *(.debug_varnames) }
|
||||
/* DWARF 3 */
|
||||
.debug_pubtypes 0 : { *(.debug_pubtypes) }
|
||||
.debug_ranges 0 : { *(.debug_ranges) }
|
||||
/* DWARF Extension. */
|
||||
.debug_macro 0 : { *(.debug_macro) }
|
||||
.debug_addr 0 : { *(.debug_addr) }
|
||||
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
|
||||
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
|
||||
|
||||
}
|
||||
252
kernel/linker/vx_link64.ld
Normal file
252
kernel/linker/vx_link64.ld
Normal file
@@ -0,0 +1,252 @@
|
||||
/* Default linker script, for normal executables */
|
||||
/* Copyright (C) 2014-2020 Free Software Foundation, Inc.
|
||||
Copying and distribution of this script, with or without modification,
|
||||
are permitted in any medium without royalty provided the copyright
|
||||
notice and this notice are preserved. */
|
||||
OUTPUT_FORMAT("elf64-littleriscv", "elf64-littleriscv",
|
||||
"elf64-littleriscv")
|
||||
OUTPUT_ARCH(riscv)
|
||||
ENTRY(_start)
|
||||
SECTIONS
|
||||
{
|
||||
. = STARTUP_ADDR;
|
||||
.interp : { *(.interp) }
|
||||
.note.gnu.build-id : { *(.note.gnu.build-id) }
|
||||
.hash : { *(.hash) }
|
||||
.gnu.hash : { *(.gnu.hash) }
|
||||
.dynsym : { *(.dynsym) }
|
||||
.dynstr : { *(.dynstr) }
|
||||
.gnu.version : { *(.gnu.version) }
|
||||
.gnu.version_d : { *(.gnu.version_d) }
|
||||
.gnu.version_r : { *(.gnu.version_r) }
|
||||
.rela.init : { *(.rela.init) }
|
||||
.rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
|
||||
.rela.fini : { *(.rela.fini) }
|
||||
.rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
|
||||
.rela.data.rel.ro : { *(.rela.data.rel.ro .rela.data.rel.ro.* .rela.gnu.linkonce.d.rel.ro.*) }
|
||||
.rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
|
||||
.rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
|
||||
.rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
|
||||
.rela.ctors : { *(.rela.ctors) }
|
||||
.rela.dtors : { *(.rela.dtors) }
|
||||
.rela.got : { *(.rela.got) }
|
||||
.rela.sdata : { *(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*) }
|
||||
.rela.sbss : { *(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*) }
|
||||
.rela.sdata2 : { *(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*) }
|
||||
.rela.sbss2 : { *(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*) }
|
||||
.rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
|
||||
.rela.iplt :
|
||||
{
|
||||
PROVIDE_HIDDEN (__rela_iplt_start = .);
|
||||
*(.rela.iplt)
|
||||
PROVIDE_HIDDEN (__rela_iplt_end = .);
|
||||
}
|
||||
.rela.plt :
|
||||
{
|
||||
*(.rela.plt)
|
||||
}
|
||||
.init :
|
||||
{
|
||||
KEEP (*(SORT_NONE(.init)))
|
||||
}
|
||||
.plt : { *(.plt) }
|
||||
.iplt : { *(.iplt) }
|
||||
.text :
|
||||
{
|
||||
*(.text.unlikely .text.*_unlikely .text.unlikely.*)
|
||||
*(.text.exit .text.exit.*)
|
||||
*(.text.startup .text.startup.*)
|
||||
*(.text.hot .text.hot.*)
|
||||
*(SORT(.text.sorted.*))
|
||||
*(.text .stub .text.* .gnu.linkonce.t.*)
|
||||
/* .gnu.warning sections are handled specially by elf.em. */
|
||||
*(.gnu.warning)
|
||||
}
|
||||
.fini :
|
||||
{
|
||||
KEEP (*(SORT_NONE(.fini)))
|
||||
}
|
||||
PROVIDE (__etext = .);
|
||||
PROVIDE (_etext = .);
|
||||
PROVIDE (etext = .);
|
||||
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
|
||||
.rodata1 : { *(.rodata1) }
|
||||
.sdata2 :
|
||||
{
|
||||
*(.sdata2 .sdata2.* .gnu.linkonce.s2.*)
|
||||
}
|
||||
.sbss2 : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
|
||||
.eh_frame_hdr : { *(.eh_frame_hdr) *(.eh_frame_entry .eh_frame_entry.*) }
|
||||
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) *(.eh_frame.*) }
|
||||
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) }
|
||||
.gnu_extab : ONLY_IF_RO { *(.gnu_extab*) }
|
||||
/* These sections are generated by the Sun/Oracle C++ compiler. */
|
||||
.exception_ranges : ONLY_IF_RO { *(.exception_ranges*) }
|
||||
/* Adjust the address for the data segment. We want to adjust up to
|
||||
the same address within the page on the next page up. */
|
||||
. = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
|
||||
/* Exception handling */
|
||||
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) *(.eh_frame.*) }
|
||||
.gnu_extab : ONLY_IF_RW { *(.gnu_extab) }
|
||||
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
|
||||
.exception_ranges : ONLY_IF_RW { *(.exception_ranges*) }
|
||||
/* Thread Local Storage sections */
|
||||
.tdata :
|
||||
{
|
||||
PROVIDE_HIDDEN (__tdata_start = .);
|
||||
*(.tdata .tdata.* .gnu.linkonce.td.*)
|
||||
PROVIDE_HIDDEN (__tdata_end = .);
|
||||
}
|
||||
PROVIDE (__tdata_size = SIZEOF (.tdata));
|
||||
.tbss :
|
||||
{
|
||||
PROVIDE_HIDDEN (__tbss_start = .);
|
||||
PROVIDE_HIDDEN (__tbss_offset = ABSOLUTE (__tbss_start - __tdata_start));
|
||||
*(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon)
|
||||
PROVIDE_HIDDEN (__tbss_end = .);
|
||||
}
|
||||
PROVIDE (__tbss_size = SIZEOF (.tbss));
|
||||
PROVIDE (__tcb_aligned_size = ALIGN(__tbss_end - __tdata_start, 64));
|
||||
.preinit_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__preinit_array_start = .);
|
||||
KEEP (*(.preinit_array))
|
||||
PROVIDE_HIDDEN (__preinit_array_end = .);
|
||||
}
|
||||
.init_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__init_array_start = .);
|
||||
KEEP (*(SORT_BY_INIT_PRIORITY(.init_array.*) SORT_BY_INIT_PRIORITY(.ctors.*)))
|
||||
KEEP (*(.init_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors))
|
||||
PROVIDE_HIDDEN (__init_array_end = .);
|
||||
}
|
||||
.fini_array :
|
||||
{
|
||||
PROVIDE_HIDDEN (__fini_array_start = .);
|
||||
KEEP (*(SORT_BY_INIT_PRIORITY(.fini_array.*) SORT_BY_INIT_PRIORITY(.dtors.*)))
|
||||
KEEP (*(.fini_array EXCLUDE_FILE (*crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors))
|
||||
PROVIDE_HIDDEN (__fini_array_end = .);
|
||||
}
|
||||
.ctors :
|
||||
{
|
||||
/* gcc uses crtbegin.o to find the start of
|
||||
the constructors, so we make sure it is
|
||||
first. Because this is a wildcard, it
|
||||
doesn't matter if the user does not
|
||||
actually link against crtbegin.o; the
|
||||
linker won't look for a file to match a
|
||||
wildcard. The wildcard also means that it
|
||||
doesn't matter which directory crtbegin.o
|
||||
is in. */
|
||||
KEEP (*crtbegin.o(.ctors))
|
||||
KEEP (*crtbegin?.o(.ctors))
|
||||
/* We don't want to include the .ctor section from
|
||||
the crtend.o file until after the sorted ctors.
|
||||
The .ctor section from the crtend file contains the
|
||||
end of ctors marker and it must be last */
|
||||
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
|
||||
KEEP (*(SORT(.ctors.*)))
|
||||
KEEP (*(.ctors))
|
||||
}
|
||||
.dtors :
|
||||
{
|
||||
KEEP (*crtbegin.o(.dtors))
|
||||
KEEP (*crtbegin?.o(.dtors))
|
||||
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
|
||||
KEEP (*(SORT(.dtors.*)))
|
||||
KEEP (*(.dtors))
|
||||
}
|
||||
.jcr : { KEEP (*(.jcr)) }
|
||||
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.*) }
|
||||
.dynamic : { *(.dynamic) }
|
||||
. = DATA_SEGMENT_RELRO_END (0, .);
|
||||
.data :
|
||||
{
|
||||
__DATA_BEGIN__ = .;
|
||||
*(.data .data.* .gnu.linkonce.d.*)
|
||||
SORT(CONSTRUCTORS)
|
||||
}
|
||||
.data1 : { *(.data1) }
|
||||
.got : { *(.got.plt) *(.igot.plt) *(.got) *(.igot) }
|
||||
/* We want the small data sections together, so single-instruction offsets
|
||||
can access them all, and initialized data all before uninitialized, so
|
||||
we can shorten the on-disk segment size. */
|
||||
.sdata :
|
||||
{
|
||||
__SDATA_BEGIN__ = .;
|
||||
*(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata .srodata.*)
|
||||
*(.sdata .sdata.* .gnu.linkonce.s.*)
|
||||
}
|
||||
_edata = .; PROVIDE (edata = .);
|
||||
. = .;
|
||||
__bss_start = .;
|
||||
.sbss :
|
||||
{
|
||||
*(.dynsbss)
|
||||
*(.sbss .sbss.* .gnu.linkonce.sb.*)
|
||||
*(.scommon)
|
||||
}
|
||||
.bss :
|
||||
{
|
||||
*(.dynbss)
|
||||
*(.bss .bss.* .gnu.linkonce.b.*)
|
||||
*(COMMON)
|
||||
/* Align here to ensure that the .bss section occupies space up to
|
||||
_end. Align after .bss to ensure correct alignment even if the
|
||||
.bss section disappears because there are no input sections.
|
||||
FIXME: Why do we need it? When there is no .bss section, we do not
|
||||
pad the .data section. */
|
||||
. = ALIGN(. != 0 ? 64 / 8 : 1);
|
||||
}
|
||||
. = ALIGN(64 / 8);
|
||||
. = SEGMENT_START("ldata-segment", .);
|
||||
. = ALIGN(64 / 8);
|
||||
__BSS_END__ = .;
|
||||
__global_pointer = MIN(__SDATA_BEGIN__ + 0x800,
|
||||
MAX(__DATA_BEGIN__ + 0x800, __BSS_END__ - 0x800));
|
||||
_end = .; PROVIDE (end = .);
|
||||
. = DATA_SEGMENT_END (.);
|
||||
/* Stabs debugging sections. */
|
||||
.stab 0 : { *(.stab) }
|
||||
.stabstr 0 : { *(.stabstr) }
|
||||
.stab.excl 0 : { *(.stab.excl) }
|
||||
.stab.exclstr 0 : { *(.stab.exclstr) }
|
||||
.stab.index 0 : { *(.stab.index) }
|
||||
.stab.indexstr 0 : { *(.stab.indexstr) }
|
||||
.comment 0 : { *(.comment) }
|
||||
.gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) }
|
||||
/* DWARF debug sections.
|
||||
Symbols in the DWARF debugging sections are relative to the beginning
|
||||
of the section so we begin them at 0. */
|
||||
/* DWARF 1 */
|
||||
.debug 0 : { *(.debug) }
|
||||
.line 0 : { *(.line) }
|
||||
/* GNU DWARF 1 extensions */
|
||||
.debug_srcinfo 0 : { *(.debug_srcinfo) }
|
||||
.debug_sfnames 0 : { *(.debug_sfnames) }
|
||||
/* DWARF 1.1 and DWARF 2 */
|
||||
.debug_aranges 0 : { *(.debug_aranges) }
|
||||
.debug_pubnames 0 : { *(.debug_pubnames) }
|
||||
/* DWARF 2 */
|
||||
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
|
||||
.debug_abbrev 0 : { *(.debug_abbrev) }
|
||||
.debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) }
|
||||
.debug_frame 0 : { *(.debug_frame) }
|
||||
.debug_str 0 : { *(.debug_str) }
|
||||
.debug_loc 0 : { *(.debug_loc) }
|
||||
.debug_macinfo 0 : { *(.debug_macinfo) }
|
||||
/* SGI/MIPS DWARF 2 extensions */
|
||||
.debug_weaknames 0 : { *(.debug_weaknames) }
|
||||
.debug_funcnames 0 : { *(.debug_funcnames) }
|
||||
.debug_typenames 0 : { *(.debug_typenames) }
|
||||
.debug_varnames 0 : { *(.debug_varnames) }
|
||||
/* DWARF 3 */
|
||||
.debug_pubtypes 0 : { *(.debug_pubtypes) }
|
||||
.debug_ranges 0 : { *(.debug_ranges) }
|
||||
/* DWARF Extension. */
|
||||
.debug_macro 0 : { *(.debug_macro) }
|
||||
.debug_addr 0 : { *(.debug_addr) }
|
||||
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
|
||||
/DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_*) }
|
||||
|
||||
}
|
||||
890
kernel/src/tinyprintf.c
Normal file
890
kernel/src/tinyprintf.c
Normal file
@@ -0,0 +1,890 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// \author (c) Marco Paland (info@paland.com)
|
||||
// 2014-2019, PALANDesign Hannover, Germany
|
||||
//
|
||||
// \license The MIT License (MIT)
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
// \brief Tiny printf, sprintf and (v)snprintf implementation, optimized for speed on
|
||||
// embedded systems with a very limited resources. These routines are thread
|
||||
// safe and reentrant!
|
||||
// Use this instead of the bloated standard/newlib printf cause these use
|
||||
// malloc for printf (and may not be thread safe).
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "tinyprintf.h"
|
||||
#include "vx_print.h"
|
||||
|
||||
|
||||
// define this globally (e.g. gcc -DPRINTF_INCLUDE_CONFIG_H ...) to include the
|
||||
// printf_config.h header file
|
||||
// default: undefined
|
||||
#ifdef PRINTF_INCLUDE_CONFIG_H
|
||||
#include "printf_config.h"
|
||||
#endif
|
||||
|
||||
|
||||
// 'ntoa' conversion buffer size, this must be big enough to hold one converted
|
||||
// numeric number including padded zeros (dynamically created on stack)
|
||||
// default: 32 byte
|
||||
#ifndef PRINTF_NTOA_BUFFER_SIZE
|
||||
#define PRINTF_NTOA_BUFFER_SIZE 32U
|
||||
#endif
|
||||
|
||||
// 'ftoa' conversion buffer size, this must be big enough to hold one converted
|
||||
// float number including padded zeros (dynamically created on stack)
|
||||
// default: 32 byte
|
||||
#ifndef PRINTF_FTOA_BUFFER_SIZE
|
||||
#define PRINTF_FTOA_BUFFER_SIZE 32U
|
||||
#endif
|
||||
|
||||
// support for the floating point type (%f)
|
||||
// default: activated
|
||||
#ifndef PRINTF_DISABLE_SUPPORT_FLOAT
|
||||
#define PRINTF_SUPPORT_FLOAT
|
||||
#endif
|
||||
|
||||
// support for exponential floating point notation (%e/%g)
|
||||
// default: activated
|
||||
#ifndef PRINTF_DISABLE_SUPPORT_EXPONENTIAL
|
||||
#define PRINTF_SUPPORT_EXPONENTIAL
|
||||
#endif
|
||||
|
||||
// define the default floating point precision
|
||||
// default: 6 digits
|
||||
#ifndef PRINTF_DEFAULT_FLOAT_PRECISION
|
||||
#define PRINTF_DEFAULT_FLOAT_PRECISION 6U
|
||||
#endif
|
||||
|
||||
// define the largest float suitable to print with %f
|
||||
// default: 1e9
|
||||
#ifndef PRINTF_MAX_FLOAT
|
||||
#define PRINTF_MAX_FLOAT 1e9
|
||||
#endif
|
||||
|
||||
// support for the long long types (%llu or %p)
|
||||
// default: activated
|
||||
#ifndef PRINTF_DISABLE_SUPPORT_LONG_LONG
|
||||
#define PRINTF_SUPPORT_LONG_LONG
|
||||
#endif
|
||||
|
||||
// support for the ptrdiff_t type (%t)
|
||||
// ptrdiff_t is normally defined in <stddef.h> as long or long long type
|
||||
// default: activated
|
||||
#ifndef PRINTF_DISABLE_SUPPORT_PTRDIFF_T
|
||||
#define PRINTF_SUPPORT_PTRDIFF_T
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// internal flag definitions
|
||||
#define FLAGS_ZEROPAD (1U << 0U)
|
||||
#define FLAGS_LEFT (1U << 1U)
|
||||
#define FLAGS_PLUS (1U << 2U)
|
||||
#define FLAGS_SPACE (1U << 3U)
|
||||
#define FLAGS_HASH (1U << 4U)
|
||||
#define FLAGS_UPPERCASE (1U << 5U)
|
||||
#define FLAGS_CHAR (1U << 6U)
|
||||
#define FLAGS_SHORT (1U << 7U)
|
||||
#define FLAGS_LONG (1U << 8U)
|
||||
#define FLAGS_LONG_LONG (1U << 9U)
|
||||
#define FLAGS_PRECISION (1U << 10U)
|
||||
#define FLAGS_ADAPT_EXP (1U << 11U)
|
||||
|
||||
|
||||
// import float.h for DBL_MAX
|
||||
#if defined(PRINTF_SUPPORT_FLOAT)
|
||||
#include <float.h>
|
||||
#endif
|
||||
|
||||
|
||||
// output function type
|
||||
typedef void (*out_fct_type)(char character, void* buffer, size_t idx, size_t maxlen);
|
||||
|
||||
|
||||
// wrapper (used as buffer) for output function type
|
||||
typedef struct {
|
||||
void (*fct)(char character, void* arg);
|
||||
void* arg;
|
||||
} out_fct_wrap_type;
|
||||
|
||||
|
||||
// internal buffer output
|
||||
static inline void _out_buffer(char character, void* buffer, size_t idx, size_t maxlen)
|
||||
{
|
||||
if (idx < maxlen) {
|
||||
((char*)buffer)[idx] = character;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// internal null output
|
||||
static inline void _out_null(char character, void* buffer, size_t idx, size_t maxlen)
|
||||
{
|
||||
(void)character; (void)buffer; (void)idx; (void)maxlen;
|
||||
}
|
||||
|
||||
|
||||
// internal _putchar wrapper
|
||||
static inline void _out_char(char character, void* buffer, size_t idx, size_t maxlen)
|
||||
{
|
||||
(void)buffer; (void)idx; (void)maxlen;
|
||||
if (character) {
|
||||
vx_putchar(character);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// internal output function wrapper
|
||||
static inline void _out_fct(char character, void* buffer, size_t idx, size_t maxlen)
|
||||
{
|
||||
(void)idx; (void)maxlen;
|
||||
if (character) {
|
||||
// buffer is the output fct pointer
|
||||
((out_fct_wrap_type*)buffer)->fct(character, ((out_fct_wrap_type*)buffer)->arg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// internal secure strlen
|
||||
// \return The length of the string (excluding the terminating 0) limited by 'maxsize'
|
||||
static inline unsigned int _strnlen_s(const char* str, size_t maxsize)
|
||||
{
|
||||
const char* s;
|
||||
for (s = str; *s && maxsize--; ++s);
|
||||
return (unsigned int)(s - str);
|
||||
}
|
||||
|
||||
|
||||
// internal test if char is a digit (0-9)
|
||||
// \return true if char is a digit
|
||||
static inline bool _is_digit(char ch)
|
||||
{
|
||||
return (ch >= '0') && (ch <= '9');
|
||||
}
|
||||
|
||||
|
||||
// internal ASCII string to unsigned int conversion
|
||||
static unsigned int _atoi(const char** str)
|
||||
{
|
||||
unsigned int i = 0U;
|
||||
while (_is_digit(**str)) {
|
||||
i = i * 10U + (unsigned int)(*((*str)++) - '0');
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
// output the specified string in reverse, taking care of any zero-padding
|
||||
static size_t _out_rev(out_fct_type out, char* buffer, size_t idx, size_t maxlen, const char* buf, size_t len, unsigned int width, unsigned int flags)
|
||||
{
|
||||
const size_t start_idx = idx;
|
||||
|
||||
// pad spaces up to given width
|
||||
if (!(flags & FLAGS_LEFT) && !(flags & FLAGS_ZEROPAD)) {
|
||||
for (size_t i = len; i < width; i++) {
|
||||
out(' ', buffer, idx++, maxlen);
|
||||
}
|
||||
}
|
||||
|
||||
// reverse string
|
||||
while (len) {
|
||||
out(buf[--len], buffer, idx++, maxlen);
|
||||
}
|
||||
|
||||
// append pad spaces up to given width
|
||||
if (flags & FLAGS_LEFT) {
|
||||
while (idx - start_idx < width) {
|
||||
out(' ', buffer, idx++, maxlen);
|
||||
}
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
|
||||
// internal itoa format
|
||||
static size_t _ntoa_format(out_fct_type out, char* buffer, size_t idx, size_t maxlen, char* buf, size_t len, bool negative, unsigned int base, unsigned int prec, unsigned int width, unsigned int flags)
|
||||
{
|
||||
// pad leading zeros
|
||||
if (!(flags & FLAGS_LEFT)) {
|
||||
if (width && (flags & FLAGS_ZEROPAD) && (negative || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) {
|
||||
width--;
|
||||
}
|
||||
while ((len < prec) && (len < PRINTF_NTOA_BUFFER_SIZE)) {
|
||||
buf[len++] = '0';
|
||||
}
|
||||
while ((flags & FLAGS_ZEROPAD) && (len < width) && (len < PRINTF_NTOA_BUFFER_SIZE)) {
|
||||
buf[len++] = '0';
|
||||
}
|
||||
}
|
||||
|
||||
// handle hash
|
||||
if (flags & FLAGS_HASH) {
|
||||
if (!(flags & FLAGS_PRECISION) && len && ((len == prec) || (len == width))) {
|
||||
len--;
|
||||
if (len && (base == 16U)) {
|
||||
len--;
|
||||
}
|
||||
}
|
||||
if ((base == 16U) && !(flags & FLAGS_UPPERCASE) && (len < PRINTF_NTOA_BUFFER_SIZE)) {
|
||||
buf[len++] = 'x';
|
||||
}
|
||||
else if ((base == 16U) && (flags & FLAGS_UPPERCASE) && (len < PRINTF_NTOA_BUFFER_SIZE)) {
|
||||
buf[len++] = 'X';
|
||||
}
|
||||
else if ((base == 2U) && (len < PRINTF_NTOA_BUFFER_SIZE)) {
|
||||
buf[len++] = 'b';
|
||||
}
|
||||
if (len < PRINTF_NTOA_BUFFER_SIZE) {
|
||||
buf[len++] = '0';
|
||||
}
|
||||
}
|
||||
|
||||
if (len < PRINTF_NTOA_BUFFER_SIZE) {
|
||||
if (negative) {
|
||||
buf[len++] = '-';
|
||||
}
|
||||
else if (flags & FLAGS_PLUS) {
|
||||
buf[len++] = '+'; // ignore the space if the '+' exists
|
||||
}
|
||||
else if (flags & FLAGS_SPACE) {
|
||||
buf[len++] = ' ';
|
||||
}
|
||||
}
|
||||
|
||||
return _out_rev(out, buffer, idx, maxlen, buf, len, width, flags);
|
||||
}
|
||||
|
||||
|
||||
// internal itoa for 'long' type
|
||||
static size_t _ntoa_long(out_fct_type out, char* buffer, size_t idx, size_t maxlen, unsigned long value, bool negative, unsigned long base, unsigned int prec, unsigned int width, unsigned int flags)
|
||||
{
|
||||
char buf[PRINTF_NTOA_BUFFER_SIZE];
|
||||
size_t len = 0U;
|
||||
|
||||
// no hash for 0 values
|
||||
if (!value) {
|
||||
flags &= ~FLAGS_HASH;
|
||||
}
|
||||
|
||||
// write if precision != 0 and value is != 0
|
||||
if (!(flags & FLAGS_PRECISION) || value) {
|
||||
do {
|
||||
const char digit = (char)(value % base);
|
||||
buf[len++] = digit < 10 ? '0' + digit : (flags & FLAGS_UPPERCASE ? 'A' : 'a') + digit - 10;
|
||||
value /= base;
|
||||
} while (value && (len < PRINTF_NTOA_BUFFER_SIZE));
|
||||
}
|
||||
|
||||
return _ntoa_format(out, buffer, idx, maxlen, buf, len, negative, (unsigned int)base, prec, width, flags);
|
||||
}
|
||||
|
||||
|
||||
// internal itoa for 'long long' type
|
||||
#if defined(PRINTF_SUPPORT_LONG_LONG)
|
||||
static size_t _ntoa_long_long(out_fct_type out, char* buffer, size_t idx, size_t maxlen, unsigned long long value, bool negative, unsigned long long base, unsigned int prec, unsigned int width, unsigned int flags)
|
||||
{
|
||||
char buf[PRINTF_NTOA_BUFFER_SIZE];
|
||||
size_t len = 0U;
|
||||
|
||||
// no hash for 0 values
|
||||
if (!value) {
|
||||
flags &= ~FLAGS_HASH;
|
||||
}
|
||||
|
||||
// write if precision != 0 and value is != 0
|
||||
if (!(flags & FLAGS_PRECISION) || value) {
|
||||
do {
|
||||
const char digit = (char)(value % base);
|
||||
buf[len++] = digit < 10 ? '0' + digit : (flags & FLAGS_UPPERCASE ? 'A' : 'a') + digit - 10;
|
||||
value /= base;
|
||||
} while (value && (len < PRINTF_NTOA_BUFFER_SIZE));
|
||||
}
|
||||
|
||||
return _ntoa_format(out, buffer, idx, maxlen, buf, len, negative, (unsigned int)base, prec, width, flags);
|
||||
}
|
||||
#endif // PRINTF_SUPPORT_LONG_LONG
|
||||
|
||||
|
||||
#if defined(PRINTF_SUPPORT_FLOAT)
|
||||
|
||||
#if defined(PRINTF_SUPPORT_EXPONENTIAL)
|
||||
// forward declaration so that _ftoa can switch to exp notation for values > PRINTF_MAX_FLOAT
|
||||
static size_t _etoa(out_fct_type out, char* buffer, size_t idx, size_t maxlen, double value, unsigned int prec, unsigned int width, unsigned int flags);
|
||||
#endif
|
||||
|
||||
|
||||
// internal ftoa for fixed decimal floating point
|
||||
static size_t _ftoa(out_fct_type out, char* buffer, size_t idx, size_t maxlen, double value, unsigned int prec, unsigned int width, unsigned int flags)
|
||||
{
|
||||
char buf[PRINTF_FTOA_BUFFER_SIZE];
|
||||
size_t len = 0U;
|
||||
double diff = 0.0;
|
||||
|
||||
// powers of 10
|
||||
static const double pow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
|
||||
|
||||
// test for special values
|
||||
if (value != value)
|
||||
return _out_rev(out, buffer, idx, maxlen, "nan", 3, width, flags);
|
||||
if (value < -DBL_MAX)
|
||||
return _out_rev(out, buffer, idx, maxlen, "fni-", 4, width, flags);
|
||||
if (value > DBL_MAX)
|
||||
return _out_rev(out, buffer, idx, maxlen, (flags & FLAGS_PLUS) ? "fni+" : "fni", (flags & FLAGS_PLUS) ? 4U : 3U, width, flags);
|
||||
|
||||
// test for very large values
|
||||
// standard printf behavior is to print EVERY whole number digit -- which could be 100s of characters overflowing your buffers == bad
|
||||
if ((value > PRINTF_MAX_FLOAT) || (value < -PRINTF_MAX_FLOAT)) {
|
||||
#if defined(PRINTF_SUPPORT_EXPONENTIAL)
|
||||
return _etoa(out, buffer, idx, maxlen, value, prec, width, flags);
|
||||
#else
|
||||
return 0U;
|
||||
#endif
|
||||
}
|
||||
|
||||
// test for negative
|
||||
bool negative = false;
|
||||
if (value < 0) {
|
||||
negative = true;
|
||||
value = 0 - value;
|
||||
}
|
||||
|
||||
// set default precision, if not set explicitly
|
||||
if (!(flags & FLAGS_PRECISION)) {
|
||||
prec = PRINTF_DEFAULT_FLOAT_PRECISION;
|
||||
}
|
||||
// limit precision to 9, cause a prec >= 10 can lead to overflow errors
|
||||
while ((len < PRINTF_FTOA_BUFFER_SIZE) && (prec > 9U)) {
|
||||
buf[len++] = '0';
|
||||
prec--;
|
||||
}
|
||||
|
||||
int whole = (int)value;
|
||||
double tmp = (value - whole) * pow10[prec];
|
||||
unsigned long frac = (unsigned long)tmp;
|
||||
diff = tmp - frac;
|
||||
|
||||
if (diff > 0.5) {
|
||||
++frac;
|
||||
// handle rollover, e.g. case 0.99 with prec 1 is 1.0
|
||||
if (frac >= pow10[prec]) {
|
||||
frac = 0;
|
||||
++whole;
|
||||
}
|
||||
}
|
||||
else if (diff < 0.5) {
|
||||
}
|
||||
else if ((frac == 0U) || (frac & 1U)) {
|
||||
// if halfway, round up if odd OR if last digit is 0
|
||||
++frac;
|
||||
}
|
||||
|
||||
if (prec == 0U) {
|
||||
diff = value - (double)whole;
|
||||
if ((!(diff < 0.5) || (diff > 0.5)) && (whole & 1)) {
|
||||
// exactly 0.5 and ODD, then round up
|
||||
// 1.5 -> 2, but 2.5 -> 2
|
||||
++whole;
|
||||
}
|
||||
}
|
||||
else {
|
||||
unsigned int count = prec;
|
||||
// now do fractional part, as an unsigned number
|
||||
while (len < PRINTF_FTOA_BUFFER_SIZE) {
|
||||
--count;
|
||||
buf[len++] = (char)(48U + (frac % 10U));
|
||||
if (!(frac /= 10U)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// add extra 0s
|
||||
while ((len < PRINTF_FTOA_BUFFER_SIZE) && (count-- > 0U)) {
|
||||
buf[len++] = '0';
|
||||
}
|
||||
if (len < PRINTF_FTOA_BUFFER_SIZE) {
|
||||
// add decimal
|
||||
buf[len++] = '.';
|
||||
}
|
||||
}
|
||||
|
||||
// do whole part, number is reversed
|
||||
while (len < PRINTF_FTOA_BUFFER_SIZE) {
|
||||
buf[len++] = (char)(48 + (whole % 10));
|
||||
if (!(whole /= 10)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// pad leading zeros
|
||||
if (!(flags & FLAGS_LEFT) && (flags & FLAGS_ZEROPAD)) {
|
||||
if (width && (negative || (flags & (FLAGS_PLUS | FLAGS_SPACE)))) {
|
||||
width--;
|
||||
}
|
||||
while ((len < width) && (len < PRINTF_FTOA_BUFFER_SIZE)) {
|
||||
buf[len++] = '0';
|
||||
}
|
||||
}
|
||||
|
||||
if (len < PRINTF_FTOA_BUFFER_SIZE) {
|
||||
if (negative) {
|
||||
buf[len++] = '-';
|
||||
}
|
||||
else if (flags & FLAGS_PLUS) {
|
||||
buf[len++] = '+'; // ignore the space if the '+' exists
|
||||
}
|
||||
else if (flags & FLAGS_SPACE) {
|
||||
buf[len++] = ' ';
|
||||
}
|
||||
}
|
||||
|
||||
return _out_rev(out, buffer, idx, maxlen, buf, len, width, flags);
|
||||
}
|
||||
|
||||
|
||||
#if defined(PRINTF_SUPPORT_EXPONENTIAL)
|
||||
// internal ftoa variant for exponential floating-point type, contributed by Martijn Jasperse <m.jasperse@gmail.com>
|
||||
static size_t _etoa(out_fct_type out, char* buffer, size_t idx, size_t maxlen, double value, unsigned int prec, unsigned int width, unsigned int flags)
|
||||
{
|
||||
// check for NaN and special values
|
||||
if ((value != value) || (value > DBL_MAX) || (value < -DBL_MAX)) {
|
||||
return _ftoa(out, buffer, idx, maxlen, value, prec, width, flags);
|
||||
}
|
||||
|
||||
// determine the sign
|
||||
const bool negative = value < 0;
|
||||
if (negative) {
|
||||
value = -value;
|
||||
}
|
||||
|
||||
// default precision
|
||||
if (!(flags & FLAGS_PRECISION)) {
|
||||
prec = PRINTF_DEFAULT_FLOAT_PRECISION;
|
||||
}
|
||||
|
||||
// determine the decimal exponent
|
||||
// based on the algorithm by David Gay (https://www.ampl.com/netlib/fp/dtoa.c)
|
||||
union {
|
||||
uint64_t U;
|
||||
double F;
|
||||
} conv;
|
||||
|
||||
conv.F = value;
|
||||
int exp2 = (int)((conv.U >> 52U) & 0x07FFU) - 1023; // effectively log2
|
||||
conv.U = (conv.U & ((1ULL << 52U) - 1U)) | (1023ULL << 52U); // drop the exponent so conv.F is now in [1,2)
|
||||
// now approximate log10 from the log2 integer part and an expansion of ln around 1.5
|
||||
int expval = (int)(0.1760912590558 + exp2 * 0.301029995663981 + (conv.F - 1.5) * 0.289529654602168);
|
||||
// now we want to compute 10^expval but we want to be sure it won't overflow
|
||||
exp2 = (int)(expval * 3.321928094887362 + 0.5);
|
||||
const double z = expval * 2.302585092994046 - exp2 * 0.6931471805599453;
|
||||
const double z2 = z * z;
|
||||
conv.U = (uint64_t)(exp2 + 1023) << 52U;
|
||||
// compute exp(z) using continued fractions, see https://en.wikipedia.org/wiki/Exponential_function#Continued_fractions_for_ex
|
||||
conv.F *= 1 + 2 * z / (2 - z + (z2 / (6 + (z2 / (10 + z2 / 14)))));
|
||||
// correct for rounding errors
|
||||
if (value < conv.F) {
|
||||
expval--;
|
||||
conv.F /= 10;
|
||||
}
|
||||
|
||||
// the exponent format is "%+03d" and largest value is "307", so set aside 4-5 characters
|
||||
unsigned int minwidth = ((expval < 100) && (expval > -100)) ? 4U : 5U;
|
||||
|
||||
// in "%g" mode, "prec" is the number of *significant figures* not decimals
|
||||
if (flags & FLAGS_ADAPT_EXP) {
|
||||
// do we want to fall-back to "%f" mode?
|
||||
if ((value >= 1e-4) && (value < 1e6)) {
|
||||
if ((int)prec > expval) {
|
||||
prec = (unsigned)((int)prec - expval - 1);
|
||||
}
|
||||
else {
|
||||
prec = 0;
|
||||
}
|
||||
flags |= FLAGS_PRECISION; // make sure _ftoa respects precision
|
||||
// no characters in exponent
|
||||
minwidth = 0U;
|
||||
expval = 0;
|
||||
}
|
||||
else {
|
||||
// we use one sigfig for the whole part
|
||||
if ((prec > 0) && (flags & FLAGS_PRECISION)) {
|
||||
--prec;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// will everything fit?
|
||||
unsigned int fwidth = width;
|
||||
if (width > minwidth) {
|
||||
// we didn't fall-back so subtract the characters required for the exponent
|
||||
fwidth -= minwidth;
|
||||
} else {
|
||||
// not enough characters, so go back to default sizing
|
||||
fwidth = 0U;
|
||||
}
|
||||
if ((flags & FLAGS_LEFT) && minwidth) {
|
||||
// if we're padding on the right, DON'T pad the floating part
|
||||
fwidth = 0U;
|
||||
}
|
||||
|
||||
// rescale the float value
|
||||
if (expval) {
|
||||
value /= conv.F;
|
||||
}
|
||||
|
||||
// output the floating part
|
||||
const size_t start_idx = idx;
|
||||
idx = _ftoa(out, buffer, idx, maxlen, negative ? -value : value, prec, fwidth, flags & ~FLAGS_ADAPT_EXP);
|
||||
|
||||
// output the exponent part
|
||||
if (minwidth) {
|
||||
// output the exponential symbol
|
||||
out((flags & FLAGS_UPPERCASE) ? 'E' : 'e', buffer, idx++, maxlen);
|
||||
// output the exponent value
|
||||
idx = _ntoa_long(out, buffer, idx, maxlen, (expval < 0) ? -expval : expval, expval < 0, 10, 0, minwidth-1, FLAGS_ZEROPAD | FLAGS_PLUS);
|
||||
// might need to right-pad spaces
|
||||
if (flags & FLAGS_LEFT) {
|
||||
while (idx - start_idx < width) out(' ', buffer, idx++, maxlen);
|
||||
}
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
#endif // PRINTF_SUPPORT_EXPONENTIAL
|
||||
#endif // PRINTF_SUPPORT_FLOAT
|
||||
|
||||
|
||||
// internal vsnprintf
|
||||
static int _vsnprintf(out_fct_type out, char* buffer, const size_t maxlen, const char* format, va_list va) {
|
||||
unsigned int flags, width, precision, n;
|
||||
size_t idx = 0U;
|
||||
|
||||
if (!buffer) {
|
||||
// use null output function
|
||||
out = _out_null;
|
||||
}
|
||||
|
||||
while (*format)
|
||||
{
|
||||
// format specifier? %[flags][width][.precision][length]
|
||||
if (*format != '%') {
|
||||
// no
|
||||
out(*format, buffer, idx++, maxlen);
|
||||
format++;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
// yes, evaluate it
|
||||
format++;
|
||||
}
|
||||
|
||||
// evaluate flags
|
||||
flags = 0U;
|
||||
do {
|
||||
switch (*format) {
|
||||
case '0': flags |= FLAGS_ZEROPAD; format++; n = 1U; break;
|
||||
case '-': flags |= FLAGS_LEFT; format++; n = 1U; break;
|
||||
case '+': flags |= FLAGS_PLUS; format++; n = 1U; break;
|
||||
case ' ': flags |= FLAGS_SPACE; format++; n = 1U; break;
|
||||
case '#': flags |= FLAGS_HASH; format++; n = 1U; break;
|
||||
default : n = 0U; break;
|
||||
}
|
||||
} while (n);
|
||||
|
||||
// evaluate width field
|
||||
width = 0U;
|
||||
if (_is_digit(*format)) {
|
||||
width = _atoi(&format);
|
||||
}
|
||||
else if (*format == '*') {
|
||||
const int w = va_arg(va, int);
|
||||
if (w < 0) {
|
||||
flags |= FLAGS_LEFT; // reverse padding
|
||||
width = (unsigned int)-w;
|
||||
}
|
||||
else {
|
||||
width = (unsigned int)w;
|
||||
}
|
||||
format++;
|
||||
}
|
||||
|
||||
// evaluate precision field
|
||||
precision = 0U;
|
||||
if (*format == '.') {
|
||||
flags |= FLAGS_PRECISION;
|
||||
format++;
|
||||
if (_is_digit(*format)) {
|
||||
precision = _atoi(&format);
|
||||
}
|
||||
else if (*format == '*') {
|
||||
const int prec = (int)va_arg(va, int);
|
||||
precision = prec > 0 ? (unsigned int)prec : 0U;
|
||||
format++;
|
||||
}
|
||||
}
|
||||
|
||||
// evaluate length field
|
||||
switch (*format) {
|
||||
case 'l' :
|
||||
flags |= FLAGS_LONG;
|
||||
format++;
|
||||
if (*format == 'l') {
|
||||
flags |= FLAGS_LONG_LONG;
|
||||
format++;
|
||||
}
|
||||
break;
|
||||
case 'h' :
|
||||
flags |= FLAGS_SHORT;
|
||||
format++;
|
||||
if (*format == 'h') {
|
||||
flags |= FLAGS_CHAR;
|
||||
format++;
|
||||
}
|
||||
break;
|
||||
#if defined(PRINTF_SUPPORT_PTRDIFF_T)
|
||||
case 't' :
|
||||
flags |= (sizeof(ptrdiff_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG);
|
||||
format++;
|
||||
break;
|
||||
#endif
|
||||
case 'j' :
|
||||
flags |= (sizeof(intmax_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG);
|
||||
format++;
|
||||
break;
|
||||
case 'z' :
|
||||
flags |= (sizeof(size_t) == sizeof(long) ? FLAGS_LONG : FLAGS_LONG_LONG);
|
||||
format++;
|
||||
break;
|
||||
default :
|
||||
break;
|
||||
}
|
||||
|
||||
// evaluate specifier
|
||||
switch (*format) {
|
||||
case 'd' :
|
||||
case 'i' :
|
||||
case 'u' :
|
||||
case 'x' :
|
||||
case 'X' :
|
||||
case 'o' :
|
||||
case 'b' : {
|
||||
// set the base
|
||||
unsigned int base;
|
||||
if (*format == 'x' || *format == 'X') {
|
||||
base = 16U;
|
||||
}
|
||||
else if (*format == 'o') {
|
||||
base = 8U;
|
||||
}
|
||||
else if (*format == 'b') {
|
||||
base = 2U;
|
||||
}
|
||||
else {
|
||||
base = 10U;
|
||||
flags &= ~FLAGS_HASH; // no hash for dec format
|
||||
}
|
||||
// uppercase
|
||||
if (*format == 'X') {
|
||||
flags |= FLAGS_UPPERCASE;
|
||||
}
|
||||
|
||||
// no plus or space flag for u, x, X, o, b
|
||||
if ((*format != 'i') && (*format != 'd')) {
|
||||
flags &= ~(FLAGS_PLUS | FLAGS_SPACE);
|
||||
}
|
||||
|
||||
// ignore '0' flag when precision is given
|
||||
if (flags & FLAGS_PRECISION) {
|
||||
flags &= ~FLAGS_ZEROPAD;
|
||||
}
|
||||
|
||||
// convert the integer
|
||||
if ((*format == 'i') || (*format == 'd')) {
|
||||
// signed
|
||||
if (flags & FLAGS_LONG_LONG) {
|
||||
#if defined(PRINTF_SUPPORT_LONG_LONG)
|
||||
const long long value = va_arg(va, long long);
|
||||
idx = _ntoa_long_long(out, buffer, idx, maxlen, (unsigned long long)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags);
|
||||
#endif
|
||||
}
|
||||
else if (flags & FLAGS_LONG) {
|
||||
const long value = va_arg(va, long);
|
||||
idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned long)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags);
|
||||
}
|
||||
else {
|
||||
const int value = (flags & FLAGS_CHAR) ? (char)va_arg(va, int) : (flags & FLAGS_SHORT) ? (short int)va_arg(va, int) : va_arg(va, int);
|
||||
idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned int)(value > 0 ? value : 0 - value), value < 0, base, precision, width, flags);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// unsigned
|
||||
if (flags & FLAGS_LONG_LONG) {
|
||||
#if defined(PRINTF_SUPPORT_LONG_LONG)
|
||||
idx = _ntoa_long_long(out, buffer, idx, maxlen, va_arg(va, unsigned long long), false, base, precision, width, flags);
|
||||
#endif
|
||||
}
|
||||
else if (flags & FLAGS_LONG) {
|
||||
idx = _ntoa_long(out, buffer, idx, maxlen, va_arg(va, unsigned long), false, base, precision, width, flags);
|
||||
}
|
||||
else {
|
||||
const unsigned int value = (flags & FLAGS_CHAR) ? (unsigned char)va_arg(va, unsigned int) : (flags & FLAGS_SHORT) ? (unsigned short int)va_arg(va, unsigned int) : va_arg(va, unsigned int);
|
||||
idx = _ntoa_long(out, buffer, idx, maxlen, value, false, base, precision, width, flags);
|
||||
}
|
||||
}
|
||||
format++;
|
||||
break;
|
||||
}
|
||||
#if defined(PRINTF_SUPPORT_FLOAT)
|
||||
case 'f' :
|
||||
case 'F' :
|
||||
if (*format == 'F') flags |= FLAGS_UPPERCASE;
|
||||
idx = _ftoa(out, buffer, idx, maxlen, va_arg(va, double), precision, width, flags);
|
||||
format++;
|
||||
break;
|
||||
#if defined(PRINTF_SUPPORT_EXPONENTIAL)
|
||||
case 'e':
|
||||
case 'E':
|
||||
case 'g':
|
||||
case 'G':
|
||||
if ((*format == 'g')||(*format == 'G')) flags |= FLAGS_ADAPT_EXP;
|
||||
if ((*format == 'E')||(*format == 'G')) flags |= FLAGS_UPPERCASE;
|
||||
idx = _etoa(out, buffer, idx, maxlen, va_arg(va, double), precision, width, flags);
|
||||
format++;
|
||||
break;
|
||||
#endif // PRINTF_SUPPORT_EXPONENTIAL
|
||||
#endif // PRINTF_SUPPORT_FLOAT
|
||||
case 'c' : {
|
||||
unsigned int l = 1U;
|
||||
// pre padding
|
||||
if (!(flags & FLAGS_LEFT)) {
|
||||
while (l++ < width) {
|
||||
out(' ', buffer, idx++, maxlen);
|
||||
}
|
||||
}
|
||||
// char output
|
||||
out((char)va_arg(va, int), buffer, idx++, maxlen);
|
||||
// post padding
|
||||
if (flags & FLAGS_LEFT) {
|
||||
while (l++ < width) {
|
||||
out(' ', buffer, idx++, maxlen);
|
||||
}
|
||||
}
|
||||
format++;
|
||||
break;
|
||||
}
|
||||
|
||||
case 's' : {
|
||||
const char* p = va_arg(va, char*);
|
||||
unsigned int l = _strnlen_s(p, precision ? precision : (size_t)-1);
|
||||
// pre padding
|
||||
if (flags & FLAGS_PRECISION) {
|
||||
l = (l < precision ? l : precision);
|
||||
}
|
||||
if (!(flags & FLAGS_LEFT)) {
|
||||
while (l++ < width) {
|
||||
out(' ', buffer, idx++, maxlen);
|
||||
}
|
||||
}
|
||||
// string output
|
||||
while ((*p != 0) && (!(flags & FLAGS_PRECISION) || precision--)) {
|
||||
out(*(p++), buffer, idx++, maxlen);
|
||||
}
|
||||
// post padding
|
||||
if (flags & FLAGS_LEFT) {
|
||||
while (l++ < width) {
|
||||
out(' ', buffer, idx++, maxlen);
|
||||
}
|
||||
}
|
||||
format++;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'p' : {
|
||||
width = sizeof(void*) * 2U;
|
||||
flags |= FLAGS_ZEROPAD | FLAGS_UPPERCASE;
|
||||
#if defined(PRINTF_SUPPORT_LONG_LONG)
|
||||
const bool is_ll = sizeof(uintptr_t) == sizeof(long long);
|
||||
if (is_ll) {
|
||||
idx = _ntoa_long_long(out, buffer, idx, maxlen, (uintptr_t)va_arg(va, void*), false, 16U, precision, width, flags);
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
idx = _ntoa_long(out, buffer, idx, maxlen, (unsigned long)((uintptr_t)va_arg(va, void*)), false, 16U, precision, width, flags);
|
||||
#if defined(PRINTF_SUPPORT_LONG_LONG)
|
||||
}
|
||||
#endif
|
||||
format++;
|
||||
break;
|
||||
}
|
||||
|
||||
case '%' :
|
||||
out('%', buffer, idx++, maxlen);
|
||||
format++;
|
||||
break;
|
||||
|
||||
default :
|
||||
out(*format, buffer, idx++, maxlen);
|
||||
format++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// termination
|
||||
out((char)0, buffer, idx < maxlen ? idx : maxlen - 1U, maxlen);
|
||||
|
||||
// return written chars without terminating \0
|
||||
return (int)idx;
|
||||
}
|
||||
|
||||
int tiny_printf(const char* format, ...) {
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
char buffer[1];
|
||||
const int ret = _vsnprintf(_out_char, buffer, (size_t)-1, format, va);
|
||||
va_end(va);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int tiny_sprintf(char* buffer, const char* format, ...) {
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
const int ret = _vsnprintf(_out_buffer, buffer, (size_t)-1, format, va);
|
||||
va_end(va);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int tiny_snprintf(char* buffer, size_t count, const char* format, ...) {
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
const int ret = _vsnprintf(_out_buffer, buffer, count, format, va);
|
||||
va_end(va);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int tiny_vprintf(const char* format, va_list va) {
|
||||
char buffer[1];
|
||||
return _vsnprintf(_out_char, buffer, (size_t)-1, format, va);
|
||||
}
|
||||
|
||||
int tiny_vsnprintf(char* buffer, size_t count, const char* format, va_list va) {
|
||||
return _vsnprintf(_out_buffer, buffer, count, format, va);
|
||||
}
|
||||
86
kernel/src/tinyprintf.h
Normal file
86
kernel/src/tinyprintf.h
Normal file
@@ -0,0 +1,86 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// \author (c) Marco Paland (info@paland.com)
|
||||
// 2014-2019, PALANDesign Hannover, Germany
|
||||
//
|
||||
// \license The MIT License (MIT)
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
// \brief Tiny printf, sprintf and snprintf implementation, optimized for speed on
|
||||
// embedded systems with a very limited resources.
|
||||
// Use this instead of bloated standard/newlib printf.
|
||||
// These routines are thread safe and reentrant.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef __TINYPRINTF_H__
|
||||
#define __TINYPRINTF_H__
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Tiny printf implementation
|
||||
* You have to implement _putchar if you use printf()
|
||||
* To avoid conflicts with the regular printf() API it is overridden by macro defines
|
||||
* and internal underscore-appended functions like printf_() are used
|
||||
* \param format A string that specifies the format of the output
|
||||
* \return The number of characters that are written into the array, not counting the terminating null character
|
||||
*/
|
||||
int tiny_printf(const char* format, ...);
|
||||
|
||||
/**
|
||||
* Tiny sprintf implementation
|
||||
* Due to security reasons (buffer overflow) YOU SHOULD CONSIDER USING (V)SNPRINTF INSTEAD!
|
||||
* \param buffer A pointer to the buffer where to store the formatted string. MUST be big enough to store the output!
|
||||
* \param format A string that specifies the format of the output
|
||||
* \return The number of characters that are WRITTEN into the buffer, not counting the terminating null character
|
||||
*/
|
||||
int tiny_sprintf(char* buffer, const char* format, ...);
|
||||
|
||||
/**
|
||||
* Tiny snprintf/vsnprintf implementation
|
||||
* \param buffer A pointer to the buffer where to store the formatted string
|
||||
* \param count The maximum number of characters to store in the buffer, including a terminating null character
|
||||
* \param format A string that specifies the format of the output
|
||||
* \param va A value identifying a variable arguments list
|
||||
* \return The number of characters that COULD have been written into the buffer, not counting the terminating
|
||||
* null character. A value equal or larger than count indicates truncation. Only when the returned value
|
||||
* is non-negative and less than count, the string has been completely written.
|
||||
*/
|
||||
int tiny_snprintf(char* buffer, size_t count, const char* format, ...);
|
||||
int tiny_vsnprintf(char* buffer, size_t count, const char* format, va_list va);
|
||||
|
||||
/**
|
||||
* Tiny vprintf implementation
|
||||
* \param format A string that specifies the format of the output
|
||||
* \param va A value identifying a variable arguments list
|
||||
* \return The number of characters that are WRITTEN into the buffer, not counting the terminating null character
|
||||
*/
|
||||
int tiny_vprintf(const char* format, va_list va);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __TINYPRINTF_H__
|
||||
49
kernel/src/vx_perf.c
Normal file
49
kernel/src/vx_perf.c
Normal file
@@ -0,0 +1,49 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <VX_types.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define DUMP_CSR_4(d, s) \
|
||||
csr_mem[d + 0] = csr_read(s + 0); \
|
||||
csr_mem[d + 1] = csr_read(s + 1); \
|
||||
csr_mem[d + 2] = csr_read(s + 2); \
|
||||
csr_mem[d + 3] = csr_read(s + 3);
|
||||
|
||||
#define DUMP_CSR_32(d, s) \
|
||||
DUMP_CSR_4(d + 0, s + 0) \
|
||||
DUMP_CSR_4(d + 4, s + 4) \
|
||||
DUMP_CSR_4(d + 8, s + 8) \
|
||||
DUMP_CSR_4(d + 12, s + 12) \
|
||||
DUMP_CSR_4(d + 16, s + 16) \
|
||||
DUMP_CSR_4(d + 20, s + 20) \
|
||||
DUMP_CSR_4(d + 24, s + 24) \
|
||||
DUMP_CSR_4(d + 28, s + 28)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vx_perf_dump() {
|
||||
int core_id = vx_core_id();
|
||||
uint32_t* const csr_mem = (uint32_t*)(IO_CSR_ADDR + 64 * sizeof(uint32_t) * core_id);
|
||||
DUMP_CSR_32(0, VX_CSR_MPM_BASE)
|
||||
DUMP_CSR_32(32, VX_CSR_MPM_BASE_H)
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
32
kernel/src/vx_print.S
Normal file
32
kernel/src/vx_print.S
Normal file
@@ -0,0 +1,32 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <VX_types.h>
|
||||
|
||||
.type vx_putchar, @function
|
||||
.global vx_putchar
|
||||
vx_putchar:
|
||||
csrr t0, VX_CSR_MHARTID
|
||||
andi t0, t0, %lo(IO_COUT_SIZE-1)
|
||||
#if (XLEN == 64)
|
||||
li t1, (IO_COUT_ADDR >> 32)
|
||||
slli t1, t1, 32
|
||||
li t2, (IO_COUT_ADDR & 0xffffffff)
|
||||
or t1, t1, t2
|
||||
#else
|
||||
li t1, IO_COUT_ADDR
|
||||
#endif
|
||||
add t0, t0, t1
|
||||
sb a0, 0(t0)
|
||||
ret
|
||||
107
kernel/src/vx_print.c
Normal file
107
kernel/src/vx_print.c
Normal file
@@ -0,0 +1,107 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <vx_print.h>
|
||||
#include <vx_spawn.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include "tinyprintf.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
const char* format;
|
||||
va_list* va;
|
||||
int ret;
|
||||
} printf_arg_t;
|
||||
|
||||
typedef struct {
|
||||
int value;
|
||||
int base;
|
||||
} putint_arg_t;
|
||||
|
||||
typedef struct {
|
||||
float value;
|
||||
int precision;
|
||||
} putfloat_arg_t;
|
||||
|
||||
static void __putint_cb(const putint_arg_t* arg) {
|
||||
char tmp[33];
|
||||
float value = arg->value;
|
||||
int base = arg->base;
|
||||
itoa(value, tmp, base);
|
||||
for (int i = 0; i < 33; ++i) {
|
||||
int c = tmp[i];
|
||||
if (!c)
|
||||
break;
|
||||
vx_putchar(c);
|
||||
}
|
||||
}
|
||||
|
||||
static void __putfloat_cb(const putfloat_arg_t* arg) {
|
||||
float value = arg->value;
|
||||
int precision = arg->precision;
|
||||
int ipart = (int)value;
|
||||
vx_putint(ipart, 10);
|
||||
if (precision != 0) {
|
||||
vx_putchar('.');
|
||||
float frac = value - (float)ipart;
|
||||
float fscaled = frac * pow(10, precision);
|
||||
vx_putint((int)fscaled, 10);
|
||||
}
|
||||
}
|
||||
|
||||
static void __vprintf_cb(printf_arg_t* arg) {
|
||||
arg->ret = tiny_vprintf(arg->format, *arg->va);
|
||||
}
|
||||
|
||||
void vx_putint(int value, int base) {
|
||||
putint_arg_t arg;
|
||||
arg.value = value;
|
||||
arg.base = base;
|
||||
vx_serial((vx_serial_cb)__putint_cb, &arg);
|
||||
}
|
||||
|
||||
void vx_putfloat(float value, int precision) {
|
||||
putfloat_arg_t arg;
|
||||
arg.value = value;
|
||||
arg.precision = precision;
|
||||
vx_serial((vx_serial_cb)__putfloat_cb, &arg);
|
||||
}
|
||||
|
||||
int vx_vprintf(const char* format, va_list va) {
|
||||
printf_arg_t arg;
|
||||
arg.format = format;
|
||||
arg.va = &va;
|
||||
vx_serial((vx_serial_cb)__vprintf_cb, &arg);
|
||||
return arg.ret;
|
||||
}
|
||||
|
||||
int vx_printf(const char * format, ...) {
|
||||
int ret;
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
ret = vx_vprintf(format, va);
|
||||
va_end(va);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
77
kernel/src/vx_serial.S
Normal file
77
kernel/src/vx_serial.S
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <VX_types.h>
|
||||
|
||||
#define RISCV_CUSTOM0 0x0B
|
||||
|
||||
.type vx_serial, @function
|
||||
.global vx_serial
|
||||
vx_serial:
|
||||
#if (XLEN == 64)
|
||||
addi sp, sp, -56
|
||||
sd ra, 48(sp)
|
||||
sd s5, 40(sp)
|
||||
sd s4, 32(sp)
|
||||
sd s3, 24(sp)
|
||||
sd s2, 16(sp)
|
||||
sd s1, 8(sp)
|
||||
sd s0, 0(sp)
|
||||
#else
|
||||
addi sp, sp, -28
|
||||
sw ra, 24(sp)
|
||||
sw s5, 20(sp)
|
||||
sw s4, 16(sp)
|
||||
sw s3, 12(sp)
|
||||
sw s2, 8(sp)
|
||||
sw s1, 4(sp)
|
||||
sw s0, 0(sp)
|
||||
#endif
|
||||
mv s4, a0 # s4 <- callback
|
||||
mv s3, a1 # s3 <- arg
|
||||
csrr s2, VX_CSR_NUM_THREADS # s2 <- NT
|
||||
csrr s1, VX_CSR_THREAD_ID # s1 <- tid
|
||||
li s0, 0 # s0 <- index
|
||||
label_loop:
|
||||
sub t0, s0, s1
|
||||
seqz t1, t0 # (index != tid)
|
||||
.insn r RISCV_CUSTOM0, 2, 0, s5, t1, x0 # split s5, t0
|
||||
bnez t0, label_join
|
||||
mv a0, s3 # a0 <- arg
|
||||
jalr s4 # callback(arg)
|
||||
label_join:
|
||||
.insn r RISCV_CUSTOM0, 3, 0, x0, s5, x0 # join s5
|
||||
addi s0, s0, 1 # index++
|
||||
blt s0, s2, label_loop # loop back
|
||||
#if (XLEN == 64)
|
||||
ld ra, 48(sp)
|
||||
ld s5, 40(sp)
|
||||
ld s4, 32(sp)
|
||||
ld s3, 24(sp)
|
||||
ld s2, 16(sp)
|
||||
ld s1, 8(sp)
|
||||
ld s0, 0(sp)
|
||||
addi sp, sp, 56
|
||||
#else
|
||||
lw ra, 24(sp)
|
||||
lw s5, 20(sp)
|
||||
lw s4, 16(sp)
|
||||
lw s3, 12(sp)
|
||||
lw s2, 8(sp)
|
||||
lw s1, 4(sp)
|
||||
lw s0, 0(sp)
|
||||
addi sp, sp, 28
|
||||
#endif
|
||||
ret
|
||||
|
||||
334
kernel/src/vx_spawn.c
Normal file
334
kernel/src/vx_spawn.c
Normal file
@@ -0,0 +1,334 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <vx_spawn.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define NUM_CORES_MAX 1024
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
vx_spawn_tasks_cb callback;
|
||||
void* arg;
|
||||
int offset; // task offset
|
||||
int NWs; // number of NW batches where NW=<total warps per core>.
|
||||
int RWs; // number of remaining warps in the core
|
||||
} wspawn_tasks_args_t;
|
||||
|
||||
typedef struct {
|
||||
context_t * ctx;
|
||||
vx_spawn_kernel_cb callback;
|
||||
void* arg;
|
||||
int offset; // task offset
|
||||
int NWs; // number of NW batches where NW=<total warps per core>.
|
||||
int RWs; // number of remaining warps in the core
|
||||
char isXYpow2;
|
||||
char log2XY;
|
||||
char log2X;
|
||||
} wspawn_kernel_args_t;
|
||||
|
||||
void* g_wspawn_args[NUM_CORES_MAX];
|
||||
|
||||
inline char is_log2(int x) {
|
||||
return ((x & (x-1)) == 0);
|
||||
}
|
||||
|
||||
inline int fast_log2(int x) {
|
||||
float f = x;
|
||||
return (*(int*)(&f)>>23) - 127;
|
||||
}
|
||||
|
||||
static void __attribute__ ((noinline)) spawn_tasks_all_stub() {
|
||||
int NT = vx_num_threads();
|
||||
int cid = vx_core_id();
|
||||
int wid = vx_warp_id();
|
||||
int tid = vx_thread_id();
|
||||
|
||||
wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[cid];
|
||||
|
||||
int wK = (p_wspawn_args->NWs * wid) + MIN(p_wspawn_args->RWs, wid);
|
||||
int tK = p_wspawn_args->NWs + (wid < p_wspawn_args->RWs);
|
||||
int offset = p_wspawn_args->offset + (wK * NT) + (tid * tK);
|
||||
|
||||
vx_spawn_tasks_cb callback = p_wspawn_args->callback;
|
||||
void* arg = p_wspawn_args->arg;
|
||||
for (int task_id = offset, N = task_id + tK; task_id < N; ++task_id) {
|
||||
callback(task_id, arg);
|
||||
}
|
||||
}
|
||||
|
||||
static void __attribute__ ((noinline)) spawn_tasks_rem_stub() {
|
||||
int cid = vx_core_id();
|
||||
int tid = vx_thread_id();
|
||||
|
||||
wspawn_tasks_args_t* p_wspawn_args = (wspawn_tasks_args_t*)g_wspawn_args[cid];
|
||||
int task_id = p_wspawn_args->offset + tid;
|
||||
(p_wspawn_args->callback)(task_id, p_wspawn_args->arg);
|
||||
}
|
||||
|
||||
static void __attribute__ ((noinline)) spawn_tasks_all_cb() {
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
// call stub routine
|
||||
spawn_tasks_all_stub();
|
||||
|
||||
// disable warp
|
||||
vx_tmc_zero();
|
||||
}
|
||||
|
||||
void vx_spawn_tasks(int num_tasks, vx_spawn_tasks_cb callback , void * arg) {
|
||||
// device specs
|
||||
int NC = vx_num_cores();
|
||||
int NW = vx_num_warps();
|
||||
int NT = vx_num_threads();
|
||||
|
||||
// current core id
|
||||
int core_id = vx_core_id();
|
||||
if (core_id >= NUM_CORES_MAX)
|
||||
return;
|
||||
|
||||
// calculate necessary active cores
|
||||
int WT = NW * NT;
|
||||
int nC = (num_tasks > WT) ? (num_tasks / WT) : 1;
|
||||
int nc = MIN(nC, NC);
|
||||
if (core_id >= nc)
|
||||
return; // terminate extra cores
|
||||
|
||||
// number of tasks per core
|
||||
int tasks_per_core = num_tasks / nc;
|
||||
int tasks_per_core_n1 = tasks_per_core;
|
||||
if (core_id == (nc-1)) {
|
||||
int rem = num_tasks - (nc * tasks_per_core);
|
||||
tasks_per_core_n1 += rem; // last core also executes remaining tasks
|
||||
}
|
||||
|
||||
// number of tasks per warp
|
||||
int TW = tasks_per_core_n1 / NT; // occupied warps
|
||||
int rT = tasks_per_core_n1 - TW * NT; // remaining threads
|
||||
int fW = 1, rW = 0;
|
||||
if (TW >= NW) {
|
||||
fW = TW / NW; // full warps iterations
|
||||
rW = TW - fW * NW; // remaining warps
|
||||
}
|
||||
|
||||
wspawn_tasks_args_t wspawn_args = { callback, arg, core_id * tasks_per_core, fW, rW };
|
||||
g_wspawn_args[core_id] = &wspawn_args;
|
||||
|
||||
if (TW >= 1) {
|
||||
// execute callback on other warps
|
||||
int nw = MIN(TW, NW);
|
||||
vx_wspawn(nw, spawn_tasks_all_cb);
|
||||
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
// call stub routine
|
||||
spawn_tasks_all_stub();
|
||||
|
||||
// back to single-threaded
|
||||
vx_tmc_one();
|
||||
|
||||
// wait for spawn warps to terminate
|
||||
vx_wspawn_wait();
|
||||
}
|
||||
|
||||
if (rT != 0) {
|
||||
// adjust offset
|
||||
wspawn_args.offset += (tasks_per_core_n1 - rT);
|
||||
|
||||
// activate remaining threads
|
||||
int tmask = (1 << rT) - 1;
|
||||
vx_tmc(tmask);
|
||||
|
||||
// call stub routine
|
||||
spawn_tasks_rem_stub();
|
||||
|
||||
// back to single-threaded
|
||||
vx_tmc_one();
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void __attribute__ ((noinline)) spawn_kernel_all_stub() {
|
||||
int NT = vx_num_threads();
|
||||
int cid = vx_core_id();
|
||||
int wid = vx_warp_id();
|
||||
int tid = vx_thread_id();
|
||||
|
||||
wspawn_kernel_args_t* p_wspawn_args = (wspawn_kernel_args_t*)g_wspawn_args[cid];
|
||||
|
||||
int wK = (p_wspawn_args->NWs * wid) + MIN(p_wspawn_args->RWs, wid);
|
||||
int tK = p_wspawn_args->NWs + (wid < p_wspawn_args->RWs);
|
||||
int offset = p_wspawn_args->offset + (wK * NT) + (tid * tK);
|
||||
|
||||
int X = p_wspawn_args->ctx->num_groups[0];
|
||||
int Y = p_wspawn_args->ctx->num_groups[1];
|
||||
int XY = X * Y;
|
||||
|
||||
if (p_wspawn_args->isXYpow2) {
|
||||
for (int wg_id = offset, N = wg_id + tK; wg_id < N; ++wg_id) {
|
||||
int k = wg_id >> p_wspawn_args->log2XY;
|
||||
int wg_2d = wg_id - k * XY;
|
||||
int j = wg_2d >> p_wspawn_args->log2X;
|
||||
int i = wg_2d - j * X;
|
||||
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, i, j, k);
|
||||
}
|
||||
} else {
|
||||
for (int wg_id = offset, N = wg_id + tK; wg_id < N; ++wg_id) {
|
||||
int k = wg_id / XY;
|
||||
int wg_2d = wg_id - k * XY;
|
||||
int j = wg_2d / X;
|
||||
int i = wg_2d - j * X;
|
||||
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, i, j, k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void __attribute__ ((noinline)) spawn_kernel_rem_stub() {
|
||||
int cid = vx_core_id();
|
||||
int tid = vx_thread_id();
|
||||
|
||||
wspawn_kernel_args_t* p_wspawn_args = (wspawn_kernel_args_t*)g_wspawn_args[cid];
|
||||
|
||||
int wg_id = p_wspawn_args->offset + tid;
|
||||
|
||||
int X = p_wspawn_args->ctx->num_groups[0];
|
||||
int Y = p_wspawn_args->ctx->num_groups[1];
|
||||
int XY = X * Y;
|
||||
|
||||
if (p_wspawn_args->isXYpow2) {
|
||||
int k = wg_id >> p_wspawn_args->log2XY;
|
||||
int wg_2d = wg_id - k * XY;
|
||||
int j = wg_2d >> p_wspawn_args->log2X;
|
||||
int i = wg_2d - j * X;
|
||||
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, i, j, k);
|
||||
} else {
|
||||
int k = wg_id / XY;
|
||||
int wg_2d = wg_id - k * XY;
|
||||
int j = wg_2d / X;
|
||||
int i = wg_2d - j * X;
|
||||
(p_wspawn_args->callback)(p_wspawn_args->arg, p_wspawn_args->ctx, i, j, k);
|
||||
}
|
||||
}
|
||||
|
||||
static void __attribute__ ((noinline)) spawn_kernel_all_cb() {
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
// call stub routine
|
||||
spawn_kernel_all_stub();
|
||||
|
||||
// disable warp
|
||||
vx_tmc_zero();
|
||||
}
|
||||
|
||||
void vx_spawn_kernel(context_t * ctx, vx_spawn_kernel_cb callback, void * arg) {
|
||||
// total number of WGs
|
||||
int X = ctx->num_groups[0];
|
||||
int Y = ctx->num_groups[1];
|
||||
int Z = ctx->num_groups[2];
|
||||
int XY = X * Y;
|
||||
int num_tasks = XY * Z;
|
||||
|
||||
// device specs
|
||||
int NC = vx_num_cores();
|
||||
int NW = vx_num_warps();
|
||||
int NT = vx_num_threads();
|
||||
|
||||
// current core id
|
||||
int core_id = vx_core_id();
|
||||
if (core_id >= NUM_CORES_MAX)
|
||||
return;
|
||||
|
||||
// calculate necessary active cores
|
||||
int WT = NW * NT;
|
||||
int nC = (num_tasks > WT) ? (num_tasks / WT) : 1;
|
||||
int nc = MIN(nC, NC);
|
||||
if (core_id >= nc)
|
||||
return; // terminate extra cores
|
||||
|
||||
// number of tasks per core
|
||||
int tasks_per_core = num_tasks / nc;
|
||||
int tasks_per_core_n1 = tasks_per_core;
|
||||
if (core_id == (nc-1)) {
|
||||
int rem = num_tasks - (nc * tasks_per_core);
|
||||
tasks_per_core_n1 += rem; // last core also executes remaining WGs
|
||||
}
|
||||
|
||||
// number of tasks per warp
|
||||
int TW = tasks_per_core_n1 / NT; // occupied warps
|
||||
int rT = tasks_per_core_n1 - TW * NT; // remaining threads
|
||||
int fW = 1, rW = 0;
|
||||
if (TW >= NW) {
|
||||
fW = TW / NW; // full warps iterations
|
||||
rW = TW - fW * NW; // remaining warps
|
||||
}
|
||||
|
||||
// fast path handling
|
||||
char isXYpow2 = is_log2(XY);
|
||||
char log2XY = fast_log2(XY);
|
||||
char log2X = fast_log2(X);
|
||||
|
||||
wspawn_kernel_args_t wspawn_args = {
|
||||
ctx, callback, arg, core_id * tasks_per_core, fW, rW, isXYpow2, log2XY, log2X
|
||||
};
|
||||
g_wspawn_args[core_id] = &wspawn_args;
|
||||
|
||||
if (TW >= 1) {
|
||||
// execute callback on other warps
|
||||
int nw = MIN(TW, NW);
|
||||
vx_wspawn(nw, spawn_kernel_all_cb);
|
||||
|
||||
// activate all threads
|
||||
vx_tmc(-1);
|
||||
|
||||
// call stub routine
|
||||
asm volatile("" ::: "memory");
|
||||
spawn_kernel_all_stub();
|
||||
|
||||
// back to single-threaded
|
||||
vx_tmc_one();
|
||||
|
||||
// wait for spawn warps to terminate
|
||||
vx_wspawn_wait();
|
||||
}
|
||||
|
||||
if (rT != 0) {
|
||||
// adjust offset
|
||||
wspawn_args.offset += (tasks_per_core_n1 - rT);
|
||||
|
||||
// activate remaining threads
|
||||
int tmask = (1 << rT) - 1;
|
||||
vx_tmc(tmask);
|
||||
|
||||
// call stub routine
|
||||
spawn_kernel_rem_stub();
|
||||
|
||||
// back to single-threaded
|
||||
vx_tmc_one();
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
150
kernel/src/vx_start.S
Normal file
150
kernel/src/vx_start.S
Normal file
@@ -0,0 +1,150 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <VX_config.h>
|
||||
#include <VX_types.h>
|
||||
|
||||
#define RISCV_CUSTOM0 0x0B
|
||||
|
||||
.section .init, "ax"
|
||||
.global _start
|
||||
.type _start, @function
|
||||
_start:
|
||||
|
||||
# initialize per-thread registers
|
||||
csrr t0, VX_CSR_NUM_WARPS # get num warps
|
||||
la t1, init_regs_all
|
||||
.insn r RISCV_CUSTOM0, 1, 0, x0, t0, t1 # wspawn t0, t1
|
||||
li t0, -1
|
||||
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
|
||||
jal init_regs
|
||||
li t0, 1
|
||||
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
|
||||
|
||||
# wait for spawn warps to terminate
|
||||
jal vx_wspawn_wait
|
||||
|
||||
# initialize TLS for all warps
|
||||
csrr t0, VX_CSR_NUM_WARPS # get num warps
|
||||
la t1, init_tls_all
|
||||
.insn r RISCV_CUSTOM0, 1, 0, x0, t0, t1 # wspawn t0, t1
|
||||
li t0, -1
|
||||
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
|
||||
call __init_tls
|
||||
li t0, 1
|
||||
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
|
||||
|
||||
# wait for spawn warps to terminate
|
||||
jal vx_wspawn_wait
|
||||
|
||||
# clear BSS segment
|
||||
la a0, _edata
|
||||
la a2, _end
|
||||
sub a2, a2, a0
|
||||
li a1, 0
|
||||
call memset
|
||||
|
||||
# initialize trap vector
|
||||
# la t0, trap_entry
|
||||
# csrw mtvec, t0
|
||||
|
||||
# register global termination functions
|
||||
la a0, __libc_fini_array
|
||||
call atexit
|
||||
|
||||
# run global initialization functions
|
||||
call __libc_init_array
|
||||
|
||||
# call main program routine
|
||||
call main
|
||||
|
||||
# call exit routine
|
||||
tail exit
|
||||
.size _start, .-_start
|
||||
|
||||
.section .text
|
||||
.type _exit, @function
|
||||
.global _exit
|
||||
_exit:
|
||||
mv s0, a0
|
||||
call vx_perf_dump
|
||||
mv gp, s0
|
||||
.insn r RISCV_CUSTOM0, 0, 0, x0, x0, x0 # tmc x0
|
||||
|
||||
.section .text
|
||||
.type init_regs, @function
|
||||
.local init_regs
|
||||
init_regs:
|
||||
# set global pointer register
|
||||
.option push
|
||||
.option norelax
|
||||
la gp, __global_pointer
|
||||
.option pop
|
||||
|
||||
# set stack pointer register
|
||||
#if (XLEN == 64)
|
||||
li t0, (STACK_BASE_ADDR >> 32)
|
||||
slli t0, t0, 32
|
||||
li sp, (STACK_BASE_ADDR & 0xffffffff)
|
||||
or sp, sp, t0
|
||||
#else
|
||||
li sp, STACK_BASE_ADDR # load stack base address
|
||||
#endif
|
||||
csrr t0, VX_CSR_MHARTID
|
||||
sll t1, t0, STACK_LOG2_SIZE
|
||||
sub sp, sp, t1
|
||||
|
||||
# set thread pointer register
|
||||
# use address space after BSS region
|
||||
# ensure cache line alignment
|
||||
la t1, __tcb_aligned_size
|
||||
mul t0, t0, t1
|
||||
la tp, _end + 63
|
||||
add tp, tp, t0
|
||||
and tp, tp, -64
|
||||
ret
|
||||
|
||||
.section .text
|
||||
.type init_regs_all, @function
|
||||
.local init_regs_all
|
||||
init_regs_all:
|
||||
li t0, -1
|
||||
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
|
||||
jal init_regs
|
||||
.insn r RISCV_CUSTOM0, 0, 0, x0, x0, x0 # tmc x0
|
||||
ret
|
||||
|
||||
.section .text
|
||||
.type init_tls_all, @function
|
||||
.local init_tls_all
|
||||
init_tls_all:
|
||||
li t0, -1
|
||||
.insn r RISCV_CUSTOM0, 0, 0, x0, t0, x0 # tmc t0
|
||||
call __init_tls
|
||||
.insn r RISCV_CUSTOM0, 0, 0, x0, x0, x0 # tmc x0
|
||||
ret
|
||||
|
||||
.section .text
|
||||
.type vx_wspawn_wait, @function
|
||||
.global vx_wspawn_wait
|
||||
vx_wspawn_wait:
|
||||
csrr t0, VX_CSR_WARP_MASK
|
||||
li t1, 1
|
||||
bne t0, t1, vx_wspawn_wait
|
||||
ret
|
||||
|
||||
.section .data
|
||||
.global __dso_handle
|
||||
.weak __dso_handle
|
||||
__dso_handle:
|
||||
.long 0
|
||||
124
kernel/src/vx_syscalls.c
Normal file
124
kernel/src/vx_syscalls.c
Normal file
@@ -0,0 +1,124 @@
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <newlib.h>
|
||||
#include <unistd.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <vx_print.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int _close(int file) { return -1; }
|
||||
|
||||
int _fstat(int file, struct stat *st) { return -1; }
|
||||
|
||||
int _isatty(int file) { return 0; }
|
||||
|
||||
int _lseek(int file, int ptr, int dir) { return 0; }
|
||||
|
||||
int _open(const char *name, int flags, int mode) { return -1; }
|
||||
|
||||
int _read(int file, char *ptr, int len) { return -1; }
|
||||
|
||||
caddr_t _sbrk(int incr) {
|
||||
__asm__ __volatile__("ebreak");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int _write(int file, char *ptr, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
vx_putchar(*ptr++);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
int _kill(int pid, int sig) { return -1; }
|
||||
|
||||
int _getpid() {
|
||||
return vx_hart_id();
|
||||
}
|
||||
|
||||
void __init_tls(void) {
|
||||
extern char __tdata_start[];
|
||||
extern char __tbss_offset[];
|
||||
extern char __tdata_size[];
|
||||
extern char __tbss_size[];
|
||||
|
||||
// TLS memory initialization
|
||||
register char *__thread_self __asm__ ("tp");
|
||||
memcpy(__thread_self, __tdata_start, (size_t)__tdata_size);
|
||||
memset(__thread_self + (size_t)__tbss_offset, 0, (size_t)__tbss_size);
|
||||
}
|
||||
|
||||
#ifdef HAVE_INITFINI_ARRAY
|
||||
|
||||
/* These magic symbols are provided by the linker. */
|
||||
extern void (*__preinit_array_start []) (void) __attribute__((weak));
|
||||
extern void (*__preinit_array_end []) (void) __attribute__((weak));
|
||||
extern void (*__init_array_start []) (void) __attribute__((weak));
|
||||
extern void (*__init_array_end []) (void) __attribute__((weak));
|
||||
|
||||
#ifdef HAVE_INIT_FINI
|
||||
extern void _init (void);
|
||||
#endif
|
||||
|
||||
/* Iterate over all the init routines. */
|
||||
void __libc_init_array (void) {
|
||||
size_t count;
|
||||
size_t i;
|
||||
|
||||
count = __preinit_array_end - __preinit_array_start;
|
||||
for (i = 0; i < count; i++)
|
||||
__preinit_array_start[i] ();
|
||||
|
||||
#ifdef HAVE_INIT_FINI
|
||||
_init ();
|
||||
#endif
|
||||
|
||||
count = __init_array_end - __init_array_start;
|
||||
for (i = 0; i < count; i++)
|
||||
__init_array_start[i] ();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_INITFINI_ARRAY
|
||||
extern void (*__fini_array_start []) (void) __attribute__((weak));
|
||||
extern void (*__fini_array_end []) (void) __attribute__((weak));
|
||||
|
||||
#ifdef HAVE_INIT_FINI
|
||||
extern void _fini (void);
|
||||
#endif
|
||||
|
||||
/* Run all the cleanup routines. */
|
||||
void __libc_fini_array (void) {
|
||||
size_t count;
|
||||
size_t i;
|
||||
|
||||
count = __fini_array_end - __fini_array_start;
|
||||
for (i = count; i > 0; i--)
|
||||
__fini_array_start[i-1] ();
|
||||
|
||||
#ifdef HAVE_INIT_FINI
|
||||
_fini ();
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user