diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 3564e60e..e9e57b03 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -50,15 +50,15 @@ `endif `ifndef IO_BASE_ADDR -`define IO_BASE_ADDR 64'hFFFFFFFFFF000000 +`define IO_BASE_ADDR 32'hFF000000 `endif `ifndef IO_ADDR_SIZE -`define IO_ADDR_SIZE (64'hFFFFFFFFFFFFFFFF - `IO_BASE_ADDR + 1) +`define IO_ADDR_SIZE (32'hFFFFFFFF - `IO_BASE_ADDR + 1) `endif `ifndef IO_COUT_ADDR -`define IO_COUT_ADDR (64'hFFFFFFFFFFFFFFFF - `MEM_BLOCK_SIZE + 1) +`define IO_COUT_ADDR (32'hFFFFFFFF - `MEM_BLOCK_SIZE + 1) `endif `ifndef IO_COUT_SIZE diff --git a/runtime/Makefile b/runtime/Makefile index 9373fd0d..8e1ec0a5 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -1,12 +1,12 @@ RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain RISCV64_TOOLCHAIN_PATH ?= /nethome/ssrivatsan/riscv/ -CC = $(RISCV64_TOOLCHAIN_PATH)/bin/riscv64-unknown-elf-gcc -AR = $(RISCV64_TOOLCHAIN_PATH)/bin/riscv64-unknown-elf-gcc-ar -DP = $(RISCV64_TOOLCHAIN_PATH)/bin/riscv64-unknown-elf-objdump -CP = $(RISCV64_TOOLCHAIN_PATH)/bin/riscv64-unknown-elf-objcopy +CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc +AR = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc-ar +DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump +CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy -CFLAGS += -O3 -march=rv64imfd -mabi=lp64d -mcmodel=medany -Wstack-usage=1024 -fno-exceptions -fdata-sections -ffunction-sections +CFLAGS += -O3 -march=rv32imf -mabi=ilp32f -mcmodel=medany -Wstack-usage=1024 -fno-exceptions -fdata-sections -ffunction-sections CFLAGS += -I./include -I../hw PROJECT = libvortexrt diff --git a/runtime/linker/vx_link64.ld b/runtime/linker/vx_link64.ld index 3c18eb40..10566d49 100644 --- a/runtime/linker/vx_link64.ld +++ b/runtime/linker/vx_link64.ld @@ -207,7 +207,7 @@ SECTIONS KEEP(*(.stack*)) } __stack_usage = SIZEOF(.stack_dummy); - PROVIDE(__stack_top = 0xFFFFFFFFFF000000); + PROVIDE(__stack_top = 0xFF000000); PROVIDE(__stack_size = 0x400); PROVIDE(__stack = __stack_top); ASSERT(__stack_usage <= __stack_size, "stack overflow") diff --git a/sim/common/bitmanip.h b/sim/common/bitmanip.h index bd199319..cd76a7ce 100644 --- a/sim/common/bitmanip.h +++ b/sim/common/bitmanip.h @@ -92,4 +92,8 @@ inline __uint128_t sext128(__uint128_t word, uint32_t width) { __uint128_t unity = 1; __uint128_t mask = (unity << width) - 1; return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word; +} + +inline uint64_t nan_box(uint32_t word) { + return word | 0xFFFFFFFF00000000; } \ No newline at end of file diff --git a/sim/common/rvfloats.cpp b/sim/common/rvfloats.cpp index 13b7c149..f82223e8 100644 --- a/sim/common/rvfloats.cpp +++ b/sim/common/rvfloats.cpp @@ -169,7 +169,7 @@ uint32_t rv_ftoi_s(uint32_t a, uint32_t frm, uint32_t* fflags) { return r; } -uint64_t rv_ftoi_d(uint64_t a, uint64_t frm, uint32_t* fflags) { +uint64_t rv_ftoi_d(uint64_t a, uint32_t frm, uint32_t* fflags) { softfloat_roundingMode = frm; auto r = f64_to_i32(to_float64_t(a), frm, true); if (fflags) { *fflags = get_fflags(); } @@ -183,7 +183,7 @@ uint32_t rv_ftou_s(uint32_t a, uint32_t frm, uint32_t* fflags) { return r; } -uint64_t rv_ftou_d(uint64_t a, uint64_t frm, uint32_t* fflags) { +uint64_t rv_ftou_d(uint64_t a, uint32_t frm, uint32_t* fflags) { softfloat_roundingMode = frm; auto r = f64_to_ui32(to_float64_t(a), frm, true); if (fflags) { *fflags = get_fflags(); } @@ -197,7 +197,7 @@ uint64_t rv_ftol_s(uint32_t a, uint32_t frm, uint32_t* fflags) { return r; } -uint64_t rv_ftol_d(uint64_t a, uint64_t frm, uint32_t* fflags) { +uint64_t rv_ftol_d(uint64_t a, uint32_t frm, uint32_t* fflags) { softfloat_roundingMode = frm; auto r = f64_to_i64(to_float64_t(a), frm, true); if (fflags) { *fflags = get_fflags(); } @@ -211,7 +211,7 @@ uint64_t rv_ftolu_s(uint32_t a, uint32_t frm, uint32_t* fflags) { return r; } -uint64_t rv_ftolu_d(uint64_t a, uint64_t frm, uint32_t* fflags) { +uint64_t rv_ftolu_d(uint64_t a, uint32_t frm, uint32_t* fflags) { softfloat_roundingMode = frm; auto r = f64_to_ui64(to_float64_t(a), frm, true); if (fflags) { *fflags = get_fflags(); } @@ -225,7 +225,7 @@ uint32_t rv_itof_s(uint32_t a, uint32_t frm, uint32_t* fflags) { return from_float32_t(r); } -uint64_t rv_itof_d(uint32_t a, uint32_t frm, uint32_t* fflags) { +uint64_t rv_itof_d(uint64_t a, uint32_t frm, uint32_t* fflags) { softfloat_roundingMode = frm; auto r = i32_to_f64(a); if (fflags) { *fflags = get_fflags(); } @@ -239,7 +239,7 @@ uint32_t rv_utof_s(uint32_t a, uint32_t frm, uint32_t* fflags) { return from_float32_t(r); } -uint64_t rv_utof_d(uint32_t a, uint32_t frm, uint32_t* fflags) { +uint64_t rv_utof_d(uint64_t a, uint32_t frm, uint32_t* fflags) { softfloat_roundingMode = frm; auto r = ui32_to_f64(a); if (fflags) { *fflags = get_fflags(); } @@ -298,7 +298,13 @@ uint64_t rv_fle_d(uint64_t a, uint64_t b, uint32_t* fflags) { return r; } -uint32_t rv_feq_s(uint32_t a, uint32_t b, uint32_t* fflags) { +uint32_t rv_feq_s(uint64_t a, uint64_t b, uint32_t* fflags) { + + // Either a or b isn't NaN boxed + if ((a >> 32 != 0xffffffff) || (b >> 32 != 0xffffffff)) { + return 0; + } + auto r = f32_eq(to_float32_t(a), to_float32_t(b)); if (fflags) { *fflags = get_fflags(); } return r; @@ -428,8 +434,20 @@ uint64_t rv_fclss_d(uint64_t a) { return r; } -uint32_t rv_fsgnj_s(uint32_t a, uint32_t b) { - +uint32_t rv_fsgnj_s(uint64_t a, uint64_t b) { + + // Both a and b aren't NaN boxed + if ((a >> 32 != 0xffffffff) && (b >> 32 != 0xffffffff)) { + return 0x7fc00000; + } + // a is NaN boxed but b isn't + if (b >> 32 != 0xffffffff) + return a; + + // b is NaN boxed but a isn't + if(a >> 32 != 0xffffffff) + return 0xffc00000; + int sign = b & F32_SIGN; int r = sign | (a & ~F32_SIGN); @@ -444,8 +462,20 @@ uint64_t rv_fsgnj_d(uint64_t a, uint64_t b) { return r; } -uint32_t rv_fsgnjn_s(uint32_t a, uint32_t b) { +uint32_t rv_fsgnjn_s(uint64_t a, uint64_t b) { + + // Both a and b aren't NaN boxed + if ((a >> 32 != 0xffffffff) && (b >> 32 != 0xffffffff)) { + return 0x7fc00000; + } + // a is NaN boxed but b isn't + if (b >> 32 != 0xffffffff) + return a; + // b is NaN boxed but a isn't + if(a >> 32 != 0xffffffff) + return 0xffc00000; + int sign = ~b & F32_SIGN; int r = sign | (a & ~F32_SIGN); @@ -460,8 +490,20 @@ uint64_t rv_fsgnjn_d(uint64_t a, uint64_t b) { return r; } -uint32_t rv_fsgnjx_s(uint32_t a, uint32_t b) { +uint32_t rv_fsgnjx_s(uint64_t a, uint64_t b) { + + // Both a and b aren't NaN boxed + if ((a >> 32 != 0xffffffff) && (b >> 32 != 0xffffffff)) { + return 0x7fc00000; + } + // a is NaN boxed but b isn't + if (b >> 32 != 0xffffffff) + return a; + // b is NaN boxed but a isn't + if(a >> 32 != 0xffffffff) + return 0xffc00000; + int sign1 = a & F32_SIGN; int sign2 = b & F32_SIGN; int r = (sign1 ^ sign2) | (a & ~F32_SIGN); @@ -478,7 +520,7 @@ uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b) { return r; } -uint64_t rv_dtof(uint64_t a) { +uint32_t rv_dtof(uint64_t a) { auto r = f64_to_f32(to_float64_t(a)); return from_float32_t(r); diff --git a/sim/common/rvfloats.h b/sim/common/rvfloats.h index 9193d7bf..2a82858b 100644 --- a/sim/common/rvfloats.h +++ b/sim/common/rvfloats.h @@ -27,13 +27,13 @@ uint32_t rv_ltof_s(uint64_t a, uint32_t frm, uint32_t* fflags); uint32_t rv_lutof_s(uint64_t a, uint32_t frm, uint32_t* fflags); uint32_t rv_fclss_s(uint32_t a); -uint32_t rv_fsgnj_s(uint32_t a, uint32_t b); -uint32_t rv_fsgnjn_s(uint32_t a, uint32_t b); -uint32_t rv_fsgnjx_s(uint32_t a, uint32_t b); +uint32_t rv_fsgnj_s(uint64_t a, uint64_t b); +uint32_t rv_fsgnjn_s(uint64_t a, uint64_t b); +uint32_t rv_fsgnjx_s(uint64_t a, uint64_t b); uint32_t rv_flt_s(uint32_t a, uint32_t b, uint32_t* fflags); uint32_t rv_fle_s(uint32_t a, uint32_t b, uint32_t* fflags); -uint32_t rv_feq_s(uint32_t a, uint32_t b, uint32_t* fflags); +uint32_t rv_feq_s(uint64_t a, uint64_t b, uint32_t* fflags); uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags); uint32_t rv_fmax_s(uint32_t a, uint32_t b, uint32_t* fflags); @@ -49,12 +49,12 @@ uint64_t rv_fmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* uint64_t rv_fnmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags); uint64_t rv_fnmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags); -uint64_t rv_ftoi_d(uint64_t a, uint64_t frm, uint32_t* fflags); -uint64_t rv_ftou_d(uint64_t a, uint64_t frm, uint32_t* fflags); -uint64_t rv_ftol_d(uint64_t a, uint64_t frm, uint32_t* fflags); -uint64_t rv_ftolu_d(uint64_t a, uint64_t frm, uint32_t* fflags); -uint64_t rv_itof_d(uint32_t a, uint32_t frm, uint32_t* fflags); -uint64_t rv_utof_d(uint32_t a, uint32_t frm, uint32_t* fflags); +uint64_t rv_ftoi_d(uint64_t a, uint32_t frm, uint32_t* fflags); +uint64_t rv_ftou_d(uint64_t a, uint32_t frm, uint32_t* fflags); +uint64_t rv_ftol_d(uint64_t a, uint32_t frm, uint32_t* fflags); +uint64_t rv_ftolu_d(uint64_t a, uint32_t frm, uint32_t* fflags); +uint64_t rv_itof_d(uint64_t a, uint32_t frm, uint32_t* fflags); +uint64_t rv_utof_d(uint64_t a, uint32_t frm, uint32_t* fflags); uint64_t rv_ltof_d(uint64_t a, uint32_t frm, uint32_t* fflags); uint64_t rv_lutof_d(uint64_t a, uint32_t frm, uint32_t* fflags); @@ -69,7 +69,7 @@ uint64_t rv_feq_d(uint64_t a, uint64_t b, uint32_t* fflags); uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags); uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags); -uint64_t rv_dtof(uint64_t a); +uint32_t rv_dtof(uint64_t a); uint64_t rv_ftod(uint32_t a); #ifdef __cplusplus diff --git a/sim/simx/decode.cpp b/sim/simx/decode.cpp index 0a44147a..ea59b817 100644 --- a/sim/simx/decode.cpp +++ b/sim/simx/decode.cpp @@ -487,8 +487,8 @@ std::shared_ptr Decoder::decode(Word code) const { break; case Opcode::I_INST: if (func3 == 0x1 || func3 == 0x5) { - // int5 - instr->setImm(sext64(rs2, 5)); + // int6 + instr->setImm(sext64(((func7 & 0x1) << 5) | rs2, 6)); } else { // int12 instr->setImm(sext64(code >> shift_rs2_, 12)); @@ -496,8 +496,8 @@ std::shared_ptr Decoder::decode(Word code) const { break; case Opcode::I_INST_64: if (func3 == 0x1 || func3 == 0x5) { - // int4 - instr->setImm(sext64(rs2, 4)); + // int5 + instr->setImm(sext64(rs2, 5)); } else { // int12 instr->setImm(sext64(code >> shift_rs2_, 12)); diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index d8be2dac..8e7b71d6 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -512,7 +512,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { if (!tmask_.test(t)) continue; rddata[t] = nextPC; - nextPC = PC_ + immsrc; + nextPC = Word(PC_ + immsrc); trace->fetch_stall = true; break; // runonce } @@ -538,12 +538,13 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { trace->lsu.type = LsuType::LOAD; trace->used_iregs.set(rsrc0); if (opcode == L_INST - || (opcode == FL && func3 == 2)) { + || (opcode == FL && func3 == 2) + || (opcode == FL && func3 == 3)) { for (int t = 0; t < num_threads; ++t) { if (!tmask_.test(t)) continue; - DWord mem_addr = ((rsdata[t][0] + immsrc) & 0xFFFFFFFFFFFFFFF8); // double word aligned - DWord shift_by = ((rsdata[t][0] + immsrc) & 0x00000007) * 8; + DWord mem_addr = ((rsdata[t][0] + immsrc) & 0xFFFFFFFFFFFFFFFC); // double word aligned + DWord shift_by = ((rsdata[t][0] + immsrc) & 0x00000003) * 8; DWord data_read = core_->dcache_read(mem_addr, 8); trace->mem_addrs.at(t).push_back({mem_addr, 8}); DP(4, "LOAD MEM: ADDRESS=0x" << std::hex << mem_addr << ", DATA=0x" << data_read); @@ -557,11 +558,11 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { rddata[t] = sext64((data_read >> shift_by) & 0xFFFF, 16); break; case 2: - // RV32I: LW - rddata[t] = sext64((data_read >> shift_by) & 0xFFFFFFFF, 32); + // RV32I: LW / RV32F: FLW + rddata[t] = (opcode == FL) ? nan_box((data_read >> shift_by) & 0xFFFFFFFF) : sext64((data_read >> shift_by) & 0xFFFFFFFF, 32); break; case 3: - // RV64I: LD + // RV64I: LD / RV32D: FLD rddata[t] = data_read; break; case 4: @@ -628,11 +629,11 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { core_->dcache_write(mem_addr, rsdata[t][1] & 0x0000FFFF, 2); break; case 2: - // RV32I: SW + // RV32I: SW / RV32F: FSW core_->dcache_write(mem_addr, rsdata[t][1] & 0xFFFFFFFF, 4); break; case 3: - // RV64I: SD + // RV64I: SD / RV32D: FSD core_->dcache_write(mem_addr, rsdata[t][1], 8); break; default: @@ -742,7 +743,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { uint32_t fflags = 0; switch (func7) { case 0x00: // RV32F: FADD.S - rddata[t] = rv_fadd_s(rsdata[t][0], rsdata[t][1], frm, &fflags); + rddata[t] = nan_box(rv_fadd_s(rsdata[t][0], rsdata[t][1], frm, &fflags)); trace->fpu.type = FpuType::FMA; trace->used_fregs.set(rsrc0); trace->used_fregs.set(rsrc1); @@ -754,7 +755,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { trace->used_fregs.set(rsrc1); break; case 0x04: // RV32F: FSUB.S - rddata[t] = rv_fsub_s(rsdata[t][0], rsdata[t][1], frm, &fflags); + rddata[t] = nan_box(rv_fsub_s(rsdata[t][0], rsdata[t][1], frm, &fflags)); trace->fpu.type = FpuType::FMA; trace->used_fregs.set(rsrc0); trace->used_fregs.set(rsrc1); @@ -766,31 +767,31 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { trace->used_fregs.set(rsrc1); break; case 0x08: // RV32F: FMUL.S - rddata[t] = rv_fmul_s(rsdata[t][0], rsdata[t][1], frm, &fflags); + rddata[t] = nan_box(rv_fmul_s(rsdata[t][0], rsdata[t][1], frm, &fflags)); trace->fpu.type = FpuType::FMA; trace->used_fregs.set(rsrc0); trace->used_fregs.set(rsrc1); break; - case 0x09: // RV32F: FMUL.D + case 0x09: // RV32D: FMUL.D rddata[t] = rv_fmul_d(rsdata[t][0], rsdata[t][1], frm, &fflags); trace->fpu.type = FpuType::FMA; trace->used_fregs.set(rsrc0); trace->used_fregs.set(rsrc1); break; case 0x0c: // RV32F: FDIV.S - rddata[t] = rv_fdiv_s(rsdata[t][0], rsdata[t][1], frm, &fflags); + rddata[t] = nan_box(rv_fdiv_s(rsdata[t][0], rsdata[t][1], frm, &fflags)); trace->fpu.type = FpuType::FDIV; trace->used_fregs.set(rsrc0); trace->used_fregs.set(rsrc1); break; - case 0x0d: // RV32F: FDIV.D + case 0x0d: // RV32D: FDIV.D rddata[t] = rv_fdiv_d(rsdata[t][0], rsdata[t][1], frm, &fflags); trace->fpu.type = FpuType::FDIV; trace->used_fregs.set(rsrc0); trace->used_fregs.set(rsrc1); break; case 0x2c: // RV32F: FSQRT.S - rddata[t] = rv_fsqrt_s(rsdata[t][0], frm, &fflags); + rddata[t] = nan_box(rv_fsqrt_s(rsdata[t][0], frm, &fflags)); trace->fpu.type = FpuType::FSQRT; trace->used_fregs.set(rsrc0); break; @@ -802,24 +803,28 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { case 0x10: switch (func3) { case 0: // RV32F: FSGNJ.S - rddata[t] = rv_fsgnj_s(rsdata[t][0], rsdata[t][1]); + rddata[t] = nan_box(rv_fsgnj_s(rsdata[t][0], rsdata[t][1])); break; case 1: // RV32F: FSGNJN.S - rddata[t] = rv_fsgnjn_s(rsdata[t][0], rsdata[t][1]); + rddata[t] = nan_box(rv_fsgnjn_s(rsdata[t][0], rsdata[t][1])); break; case 2: // RV32F: FSGNJX.S - rddata[t] = rv_fsgnjx_s(rsdata[t][0], rsdata[t][1]); + rddata[t] = nan_box(rv_fsgnjx_s(rsdata[t][0], rsdata[t][1])); break; } + trace->fpu.type = FpuType::FNCP; + trace->used_fregs.set(rsrc0); + trace->used_fregs.set(rsrc1); + break; case 0x11: switch (func3) { - case 0: // RV32F: FSGNJ.D + case 0: // RV32D: FSGNJ.D rddata[t] = rv_fsgnj_d(rsdata[t][0], rsdata[t][1]); break; - case 1: // RV32F: FSGNJN.D + case 1: // RV32D: FSGNJN.D rddata[t] = rv_fsgnjn_d(rsdata[t][0], rsdata[t][1]); break; - case 2: // RV32F: FSGNJX.D + case 2: // RV32D: FSGNJX.D rddata[t] = rv_fsgnjx_d(rsdata[t][0], rsdata[t][1]); break; } @@ -830,10 +835,10 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { case 0x14: if (func3) { // RV32F: FMAX.S - rddata[t] = rv_fmax_s(rsdata[t][0], rsdata[t][1], &fflags); + rddata[t] = nan_box(rv_fmax_s(rsdata[t][0], rsdata[t][1], &fflags)); } else { // RV32F: FMIN.S - rddata[t] = rv_fmin_s(rsdata[t][0], rsdata[t][1], &fflags); + rddata[t] = nan_box(rv_fmin_s(rsdata[t][0], rsdata[t][1], &fflags)); } trace->fpu.type = FpuType::FNCP; trace->used_fregs.set(rsrc0); @@ -851,6 +856,20 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { trace->used_fregs.set(rsrc0); trace->used_fregs.set(rsrc1); break; + case 0x20: + // RV32D: FCVT.S.D + rddata[t] = nan_box(rv_dtof(rsdata[t][0])); + trace->fpu.type = FpuType::FNCP; + trace->used_fregs.set(rsrc0); + trace->used_fregs.set(rsrc1); + break; + case 0x21: + // RV32D: FCVT.D.S + rddata[t] = rv_ftod(rsdata[t][0]); + trace->fpu.type = FpuType::FNCP; + trace->used_fregs.set(rsrc0); + trace->used_fregs.set(rsrc1); + break; case 0x60: switch(rsrc1) { case 0: @@ -884,11 +903,11 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { rddata[t] = sext64(rv_ftou_d(rsdata[t][0], frm, &fflags), 32); break; case 2: - // RV64F: FCVT.L.D + // RV64D: FCVT.L.D rddata[t] = rv_ftol_d(rsdata[t][0], frm, &fflags); break; case 3: - // RV64F: FCVT.LU.D + // RV64D: FCVT.LU.D rddata[t] = rv_ftolu_d(rsdata[t][0], frm, &fflags); break; } @@ -901,7 +920,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { rddata[t] = rv_fclss_s(rsdata[t][0]); } else { // RV32F: FMV.X.W - rddata[t] = rsdata[t][0]; + rddata[t] = sext64((Word)rsdata[t][0],32); } trace->fpu.type = FpuType::FNCP; trace->used_fregs.set(rsrc0); @@ -959,19 +978,19 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { switch(rsrc1) { case 0: // RV32F: FCVT.S.W - rddata[t] = rv_itof_s(rsdata[t][0], frm, &fflags); + rddata[t] = nan_box(rv_itof_s(rsdata[t][0], frm, &fflags)); break; case 1: // RV32F: FCVT.S.WU - rddata[t] = rv_utof_s(rsdata[t][0], frm, &fflags); + rddata[t] = nan_box(rv_utof_s(rsdata[t][0], frm, &fflags)); break; case 2: // RV64F: FCVT.S.L - rddata[t] = rv_ltof_s(rsdata[t][0], frm, &fflags); + rddata[t] = nan_box(rv_ltof_s(rsdata[t][0], frm, &fflags)); break; case 3: // RV64F: FCVT.S.LU - rddata[t] = rv_lutof_s(rsdata[t][0], frm, &fflags); + rddata[t] = nan_box(rv_lutof_s(rsdata[t][0], frm, &fflags)); break; } trace->fpu.type = FpuType::FCVT; @@ -999,8 +1018,12 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { trace->fpu.type = FpuType::FCVT; trace->used_iregs.set(rsrc0); break; - case 0x78: // FMV.W.X - case 0x79: // FMV.D.X + case 0x78: // RV32F: FMV.W.X + rddata[t] = nan_box(rsdata[t][0]); + trace->fpu.type = FpuType::FNCP; + trace->used_iregs.set(rsrc0); + break; + case 0x79: // RV64D: FMV.D.X rddata[t] = rsdata[t][0]; trace->fpu.type = FpuType::FNCP; trace->used_iregs.set(rsrc0); @@ -1030,7 +1053,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { rddata[t] = rv_fmadd_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags); else // RV32F: FMADD.S - rddata[t] = rv_fmadd_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags); + rddata[t] = nan_box(rv_fmadd_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags)); break; case FMSUB: if (func2) @@ -1038,7 +1061,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { rddata[t] = rv_fmsub_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags); else // RV32F: FMSUB.S - rddata[t] = rv_fmsub_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags); + rddata[t] = nan_box(rv_fmsub_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags)); break; case FMNMADD: if (func2) @@ -1046,7 +1069,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { rddata[t] = rv_fnmadd_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags); else // RV32F: FNMADD.S - rddata[t] = rv_fnmadd_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags); + rddata[t] = nan_box(rv_fnmadd_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags)); break; case FMNMSUB: if (func2) @@ -1054,7 +1077,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) { rddata[t] = rv_fnmsub_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags); else // RV32F: FNMSUB.S - rddata[t] = rv_fnmsub_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags); + rddata[t] = nan_box(rv_fnmsub_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags)); break; default: break; diff --git a/sim/simx/warp.cpp b/sim/simx/warp.cpp index c684a924..70182d41 100644 --- a/sim/simx/warp.cpp +++ b/sim/simx/warp.cpp @@ -73,6 +73,10 @@ void Warp::eval(pipeline_trace_t *trace) { for (int j = 0; j < core_->arch().num_threads(); ++j) { DPN(4, ' ' << std::setfill('0') << std::setw(16) << std::hex << ireg_file_.at(j).at(i) << std::setfill(' ') << ' '); } + // delete later: printing floating point reg file + for (int j = 0; j < core_->arch().num_threads(); ++j) { + DPN(4, ' ' << std::setfill('0') << std::setw(16) << std::hex << freg_file_.at(j).at(i) << std::setfill(' ') << ' '); + } DPN(4, std::endl); } } \ No newline at end of file diff --git a/tests/riscv/isa/Makefile b/tests/riscv/isa/Makefile index 5f4f1cfa..e7df2118 100644 --- a/tests/riscv/isa/Makefile +++ b/tests/riscv/isa/Makefile @@ -4,9 +4,15 @@ ALL_TESTS_64 := $(wildcard rv64*.hex) D_TESTS := $(wildcard *ud-p-*.hex) V_TESTS := $(wildcard *-v-*.hex) +I_TESTS := $(wildcard rv64ui-p-*.hex) +M_TESTS := $(wildcard rv64um-p-*.hex) +F_TESTS := $(wildcard rv64uf-p-*.hex) +D_TESTS_64 := $(wildcard rv64ud-p-*.hex) + + EXCLUDED_TESTS_32 := $(V_TESTS) $(D_TESTS) rv32si-p-scall.hex rv32si-p-sbreak.hex rv32mi-p-breakpoint.hex rv32ua-p-amomax_w.hex rv32ua-p-amoxor_w.hex rv32ua-p-amoor_w.hex rv32mi-p-ma_addr.hex rv32mi-p-mcsr.hex rv32ua-p-amoswap_w.hex rv32mi-p-ma_fetch.hex rv32mi-p-csr.hex rv32ua-p-amoadd_w.hex rv32si-p-dirty.hex rv32ui-p-fence_i.hex rv32si-p-csr.hex rv32mi-p-shamt.hex rv32ua-p-amomin_w.hex rv32ua-p-lrsc.hex rv32si-p-wfi.hex rv32ua-p-amomaxu_w.hex rv32si-p-ma_fetch.hex rv32mi-p-illegal.hex rv32uc-p-rvc.hex rv32mi-p-sbreak.hex rv32ua-p-amominu_w.hex rv32ua-p-amoand_w.hex -EXCLUDED_TESTS_64 := rv64ud-p-move.hex +EXCLUDED_TESTS_64 := rv64ud-p-ldst.hex rv64ud-p-recoding.hex TESTS_32 := $(filter-out $(EXCLUDED_TESTS_32), $(ALL_TESTS_32)) TESTS_64 := $(filter-out $(EXCLUDED_TESTS_64), $(ALL_TESTS_64)) @@ -19,6 +25,18 @@ run-simx-32: run-simx-64: $(foreach test, $(TESTS_64), ../../../sim/simx/simx -r -a rv64imfd -c 1 -i $(test) || exit;) +run-simx-64-i: + $(foreach test, $(I_TESTS), ../../../sim/simx/simx -r -a rv64imfd -c 1 -i $(test) || exit;) + +run-simx-64-m: + $(foreach test, $(M_TESTS), ../../../sim/simx/simx -r -a rv64imfd -c 1 -i $(test) || exit;) + +run-simx-64-f: + $(foreach test, $(F_TESTS), ../../../sim/simx/simx -r -a rv64imfd -c 1 -i $(test) || exit;) + +run-simx-64-d: + $(foreach test, $(D_TESTS_64), ../../../sim/simx/simx -r -a rv64imfd -c 1 -i $(test) || exit;) + run-rtlsim: $(foreach test, $(TESTS), ../../../sim/rtlsim/rtlsim -r $(test) || exit;) diff --git a/tests/runtime/hello/Makefile b/tests/runtime/hello/Makefile index 9c83df0c..708dc7c3 100644 --- a/tests/runtime/hello/Makefile +++ b/tests/runtime/hello/Makefile @@ -6,8 +6,8 @@ AR = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc-ar DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy -CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections -CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw +CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -mcmodel=medany -ffreestanding -nostartfiles -fdata-sections -ffunction-sections +CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw --save-temps -v LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a