Merged branch xlen-parameterization into staging

This commit is contained in:
Santosh Srivatsan
2022-02-05 13:47:42 -05:00
332 changed files with 15010 additions and 32479 deletions

View File

@@ -71,9 +71,30 @@ inline uint64_t bit_getw(uint64_t bits, uint32_t start, uint32_t end) {
}
// Apply integer sign extension
inline uint32_t sext32(uint32_t word, uint32_t width) {
inline uint32_t sext(uint32_t word, uint32_t width) {
assert(width > 1);
assert(width <= 32);
uint32_t mask = (1 << width) - 1;
if (width == 32)
return word;
uint32_t mask = (uint32_t(1) << width) - 1;
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word;
}
}
inline uint64_t sext(uint64_t word, uint32_t width) {
assert(width > 1);
assert(width <= 64);
if (width == 64)
return word;
uint64_t mask = (uint64_t(1) << width) - 1;
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word;
}
inline __uint128_t sext(__uint128_t word, uint32_t width) {
assert(width > 1);
assert(width <= 128);
if (width == 128)
return word;
__uint128_t mask = (__uint128_t(1) << width) - 1;
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word;
}

View File

@@ -226,9 +226,9 @@ void RAM::read(void *data, uint64_t addr, uint64_t size) {
}
void RAM::write(const void *data, uint64_t addr, uint64_t size) {
const uint8_t* s = (const uint8_t*)data;
const uint8_t* d = (const uint8_t*)data;
for (uint64_t i = 0; i < size; i++) {
*this->get(addr + i) = s[i];
*this->get(addr + i) = d[i];
}
}
@@ -276,7 +276,7 @@ void RAM::loadHexImage(const char* filename) {
ifs.seekg(0, ifs.beg);
ifs.read(content.data(), size);
int offset = 0;
uint32_t offset = 0;
char *line = content.data();
this->clear();

View File

@@ -1,5 +1,6 @@
#pragma once
#include <cstdint>
#include <vector>
#include <unordered_map>
#include <cstdint>

View File

@@ -8,10 +8,13 @@ extern "C" {
}
#define F32_SIGN 0x80000000
#define F64_SIGN 0x8000000000000000
inline float32_t to_float32_t(uint32_t x) { return float32_t{x}; }
inline float64_t to_float64_t(uint64_t x) { return float64_t{x}; }
inline uint32_t from_float32_t(float32_t x) { return uint32_t(x.v); }
inline uint64_t from_float64_t(float64_t x) { return uint64_t(x.v); }
inline uint32_t get_fflags() {
uint32_t fflags = softfloat_exceptionFlags;
@@ -25,121 +28,290 @@ inline uint32_t get_fflags() {
extern "C" {
#endif
uint32_t rv_fadd(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
uint32_t rv_fadd_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f32_add(to_float32_t(a), to_float32_t(b));
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint32_t rv_fsub(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
uint64_t rv_fadd_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f64_add(to_float64_t(a), to_float64_t(b));
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_fsub_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f32_sub(to_float32_t(a), to_float32_t(b));
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint32_t rv_fmul(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
uint64_t rv_fsub_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f64_sub(to_float64_t(a), to_float64_t(b));
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_fmul_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f32_mul(to_float32_t(a), to_float32_t(b));
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint32_t rv_fmadd(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
uint64_t rv_fmul_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f64_mul(to_float64_t(a), to_float64_t(b));
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_fmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f32_mulAdd(to_float32_t(a), to_float32_t(b), to_float32_t(c));
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint32_t rv_fmsub(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
uint64_t rv_fmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
int c_neg = c ^ F32_SIGN;
auto r = f64_mulAdd(to_float64_t(a), to_float64_t(b), to_float64_t(c));
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_fmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto c_neg = c ^ F32_SIGN;
auto r = f32_mulAdd(to_float32_t(a), to_float32_t(b), to_float32_t(c_neg));
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint32_t rv_fnmadd(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
uint64_t rv_fmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
int a_neg = a ^ F32_SIGN;
int c_neg = c ^ F32_SIGN;
auto c_neg = c ^ F64_SIGN;
auto r = f64_mulAdd(to_float64_t(a), to_float64_t(b), to_float64_t(c_neg));
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_fnmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto a_neg = a ^ F32_SIGN;
auto c_neg = c ^ F32_SIGN;
auto r = f32_mulAdd(to_float32_t(a_neg), to_float32_t(b), to_float32_t(c_neg));
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint32_t rv_fnmsub(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
uint64_t rv_fnmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
int a_neg = a ^ F32_SIGN;
auto a_neg = a ^ F64_SIGN;
auto c_neg = c ^ F64_SIGN;
auto r = f64_mulAdd(to_float64_t(a_neg), to_float64_t(b), to_float64_t(c_neg));
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_fnmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto a_neg = a ^ F32_SIGN;
auto r = f32_mulAdd(to_float32_t(a_neg), to_float32_t(b), to_float32_t(c));
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint32_t rv_fdiv(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
uint64_t rv_fnmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto a_neg = a ^ F64_SIGN;
auto r = f64_mulAdd(to_float64_t(a_neg), to_float64_t(b), to_float64_t(c));
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_fdiv_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f32_div(to_float32_t(a), to_float32_t(b));
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint32_t rv_fsqrt(uint32_t a, uint32_t frm, uint32_t* fflags) {
uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f64_div(to_float64_t(a), to_float64_t(b));
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f32_sqrt(to_float32_t(a));
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint32_t rv_ftoi(uint32_t a, uint32_t frm, uint32_t* fflags) {
uint64_t rv_fsqrt_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f64_sqrt(to_float64_t(a));
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_ftoi_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f32_to_i32(to_float32_t(a), frm, true);
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint32_t rv_ftou(uint32_t a, uint32_t frm, uint32_t* fflags) {
uint32_t rv_ftoi_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f64_to_i32(to_float64_t(a), frm, true);
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint32_t rv_ftou_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f32_to_ui32(to_float32_t(a), frm, true);
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint32_t rv_itof(uint32_t a, uint32_t frm, uint32_t* fflags) {
uint32_t rv_ftou_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f64_to_ui32(to_float64_t(a), frm, true);
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint64_t rv_ftol_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f32_to_i64(to_float32_t(a), frm, true);
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint64_t rv_ftol_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f64_to_i64(to_float64_t(a), frm, true);
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint64_t rv_ftolu_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f32_to_ui64(to_float32_t(a), frm, true);
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint64_t rv_ftolu_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = f64_to_ui64(to_float64_t(a), frm, true);
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint32_t rv_itof_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = i32_to_f32(a);
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint32_t rv_utof(uint32_t a, uint32_t frm, uint32_t* fflags) {
uint64_t rv_itof_d(uint32_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = i32_to_f64(a);
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_utof_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = ui32_to_f32(a);
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint32_t rv_flt(uint32_t a, uint32_t b, uint32_t* fflags) {
uint64_t rv_utof_d(uint32_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = ui32_to_f64(a);
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_ltof_s(uint64_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = i64_to_f32(a);
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint64_t rv_ltof_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = i64_to_f64(a);
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
uint32_t rv_lutof_s(uint64_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = ui64_to_f32(a);
if (fflags) { *fflags = get_fflags(); }
return from_float32_t(r);
}
uint64_t rv_lutof_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
softfloat_roundingMode = frm;
auto r = ui64_to_f64(a);
if (fflags) { *fflags = get_fflags(); }
return from_float64_t(r);
}
bool rv_flt_s(uint32_t a, uint32_t b, uint32_t* fflags) {
auto r = f32_lt(to_float32_t(a), to_float32_t(b));
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint32_t rv_fle(uint32_t a, uint32_t b, uint32_t* fflags) {
bool rv_flt_d(uint64_t a, uint64_t b, uint32_t* fflags) {
auto r = f64_lt(to_float64_t(a), to_float64_t(b));
if (fflags) { *fflags = get_fflags(); }
return r;
}
bool rv_fle_s(uint32_t a, uint32_t b, uint32_t* fflags) {
auto r = f32_le(to_float32_t(a), to_float32_t(b));
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint32_t rv_feq(uint32_t a, uint32_t b, uint32_t* fflags) {
bool rv_fle_d(uint64_t a, uint64_t b, uint32_t* fflags) {
auto r = f64_le(to_float64_t(a), to_float64_t(b));
if (fflags) { *fflags = get_fflags(); }
return r;
}
bool rv_feq_s(uint32_t a, uint32_t b, uint32_t* fflags) {
auto r = f32_eq(to_float32_t(a), to_float32_t(b));
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint32_t rv_fmin(uint32_t a, uint32_t b, uint32_t* fflags) {
int r;
bool rv_feq_d(uint64_t a, uint64_t b, uint32_t* fflags) {
auto r = f64_eq(to_float64_t(a), to_float64_t(b));
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
uint32_t r;
if (isNaNF32UI(a) && isNaNF32UI(b)) {
r = defaultNaNF32UI;
} else {
@@ -156,8 +328,26 @@ uint32_t rv_fmin(uint32_t a, uint32_t b, uint32_t* fflags) {
return r;
}
uint32_t rv_fmax(uint32_t a, uint32_t b, uint32_t* fflags) {
int r;
uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags) {
uint64_t r;
if (isNaNF64UI(a) && isNaNF64UI(b)) {
r = defaultNaNF64UI;
} else {
auto fa = to_float64_t(a);
auto fb = to_float64_t(b);
if ((f64_lt_quiet(fa, fb) || (f64_eq(fa, fb) && (a & F64_SIGN)))
|| isNaNF64UI(b)) {
r = a;
} else {
r = b;
}
}
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint32_t rv_fmax_s(uint32_t a, uint32_t b, uint32_t* fflags) {
uint32_t r;
if (isNaNF32UI(a) && isNaNF32UI(b)) {
r = defaultNaNF32UI;
} else {
@@ -174,7 +364,25 @@ uint32_t rv_fmax(uint32_t a, uint32_t b, uint32_t* fflags) {
return r;
}
uint32_t rv_fclss(uint32_t a) {
uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags) {
uint64_t r;
if (isNaNF64UI(a) && isNaNF64UI(b)) {
r = defaultNaNF64UI;
} else {
auto fa = to_float64_t(a);
auto fb = to_float64_t(b);
if ((f64_lt_quiet(fb, fa) || (f64_eq(fb, fa) && (b & F64_SIGN)))
|| isNaNF64UI(b)) {
r = a;
} else {
r = b;
}
}
if (fflags) { *fflags = get_fflags(); }
return r;
}
uint32_t rv_fclss_s(uint32_t a) {
auto infOrNaN = (0xff == expF32UI(a));
auto subnormOrZero = (0 == expF32UI(a));
bool sign = signF32UI(a);
@@ -182,7 +390,7 @@ uint32_t rv_fclss(uint32_t a) {
bool isNaN = isNaNF32UI(a);
bool isSNaN = softfloat_isSigNaNF32UI(a);
int r =
uint32_t r =
( sign && infOrNaN && fracZero ) << 0 |
( sign && !infOrNaN && !subnormOrZero ) << 1 |
( sign && subnormOrZero && !fracZero ) << 2 |
@@ -197,31 +405,77 @@ uint32_t rv_fclss(uint32_t a) {
return r;
}
uint32_t rv_fsgnj(uint32_t a, uint32_t b) {
int sign = b & F32_SIGN;
int r = sign | (a & ~F32_SIGN);
uint32_t rv_fclss_d(uint64_t a) {
auto infOrNaN = (0x7ff == expF64UI(a));
auto subnormOrZero = (0 == expF64UI(a));
bool sign = signF64UI(a);
bool fracZero = (0 == fracF64UI(a));
bool isNaN = isNaNF64UI(a);
bool isSNaN = softfloat_isSigNaNF64UI(a);
uint32_t r =
( sign && infOrNaN && fracZero ) << 0 |
( sign && !infOrNaN && !subnormOrZero ) << 1 |
( sign && subnormOrZero && !fracZero ) << 2 |
( sign && subnormOrZero && fracZero ) << 3 |
( !sign && infOrNaN && fracZero ) << 7 |
( !sign && !infOrNaN && !subnormOrZero ) << 6 |
( !sign && subnormOrZero && !fracZero ) << 5 |
( !sign && subnormOrZero && fracZero ) << 4 |
( isNaN && isSNaN ) << 8 |
( isNaN && !isSNaN ) << 9;
return r;
}
uint32_t rv_fsgnjn(uint32_t a, uint32_t b) {
int sign = ~b & F32_SIGN;
int r = sign | (a & ~F32_SIGN);
uint32_t rv_fsgnj_s(uint32_t a, uint32_t b) {
auto sign = b & F32_SIGN;
auto r = sign | (a & ~F32_SIGN);
return r;
}
uint32_t rv_fsgnjx(uint32_t a, uint32_t b) {
int sign1 = a & F32_SIGN;
int sign2 = b & F32_SIGN;
int r = (sign1 ^ sign2) | (a & ~F32_SIGN);
uint64_t rv_fsgnj_d(uint64_t a, uint64_t b) {
auto sign = b & F64_SIGN;
auto r = sign | (a & ~F64_SIGN);
return r;
}
uint32_t rv_fsgnjn_s(uint32_t a, uint32_t b) {
auto sign = ~b & F32_SIGN;
auto r = sign | (a & ~F32_SIGN);
return r;
}
uint64_t rv_fsgnjn_d(uint64_t a, uint64_t b) {
auto sign = ~b & F64_SIGN;
auto r = sign | (a & ~F64_SIGN);
return r;
}
uint32_t rv_fsgnjx_s(uint32_t a, uint32_t b) {
auto sign1 = a & F32_SIGN;
auto sign2 = b & F32_SIGN;
auto r = (sign1 ^ sign2) | (a & ~F32_SIGN);
return r;
}
uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b) {
auto sign1 = a & F64_SIGN;
auto sign2 = b & F64_SIGN;
auto r = (sign1 ^ sign2) | (a & ~F64_SIGN);
return r;
}
uint32_t rv_dtof(uint64_t a) {
auto r = f64_to_f32(to_float64_t(a));
return from_float32_t(r);
}
uint64_t rv_ftod(uint32_t a) {
auto r = f32_to_f64(to_float32_t(a));
return from_float64_t(r);
}
#ifdef __cplusplus
}
#endif

View File

@@ -7,32 +7,73 @@
extern "C" {
#endif
uint32_t rv_fadd(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
uint32_t rv_fsub(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
uint32_t rv_fmul(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
uint32_t rv_fmadd(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
uint32_t rv_fmsub(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
uint32_t rv_fnmadd(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
uint32_t rv_fnmsub(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
uint32_t rv_fadd_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
uint32_t rv_fsub_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
uint32_t rv_fmul_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
uint32_t rv_fmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
uint32_t rv_fmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
uint32_t rv_fnmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
uint32_t rv_fnmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
uint32_t rv_fdiv_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_fdiv(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
uint32_t rv_fsqrt(uint32_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_ftoi_s(uint32_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_ftou_s(uint32_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_itof_s(uint32_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_utof_s(uint32_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_ftoi(uint32_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_ftou(uint32_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_itof(uint32_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_utof(uint32_t a, uint32_t frm, uint32_t* fflags);
uint64_t rv_ftol_s(uint32_t a, uint32_t frm, uint32_t* fflags);
uint64_t rv_ftolu_s(uint32_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_ltof_s(uint64_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_lutof_s(uint64_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_fclss(uint32_t a);
uint32_t rv_fsgnj(uint32_t a, uint32_t b);
uint32_t rv_fsgnjn(uint32_t a, uint32_t b);
uint32_t rv_fsgnjx(uint32_t a, uint32_t b);
uint32_t rv_fclss_s(uint32_t a);
uint32_t rv_flt(uint32_t a, uint32_t b, uint32_t* fflags);
uint32_t rv_fle(uint32_t a, uint32_t b, uint32_t* fflags);
uint32_t rv_feq(uint32_t a, uint32_t b, uint32_t* fflags);
uint32_t rv_fmin(uint32_t a, uint32_t b, uint32_t* fflags);
uint32_t rv_fmax(uint32_t a, uint32_t b, uint32_t* fflags);
uint32_t rv_fsgnj_s(uint32_t a, uint32_t b);
uint32_t rv_fsgnjn_s(uint32_t a, uint32_t b);
uint32_t rv_fsgnjx_s(uint32_t a, uint32_t b);
bool rv_flt_s(uint32_t a, uint32_t b, uint32_t* fflags);
bool rv_fle_s(uint32_t a, uint32_t b, uint32_t* fflags);
bool rv_feq_s(uint32_t a, uint32_t b, uint32_t* fflags);
uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags);
uint32_t rv_fmax_s(uint32_t a, uint32_t b, uint32_t* fflags);
///////////////////////////////////////////////////////////////////////////////
uint64_t rv_fadd_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags);
uint64_t rv_fsub_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags);
uint64_t rv_fmul_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags);
uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags);
uint64_t rv_fsqrt_d(uint64_t a, uint32_t frm, uint32_t* fflags);
uint64_t rv_fmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags);
uint64_t rv_fmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags);
uint64_t rv_fnmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags);
uint64_t rv_fnmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags);
uint32_t rv_ftoi_d(uint64_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_ftou_d(uint64_t a, uint32_t frm, uint32_t* fflags);
uint64_t rv_ftol_d(uint64_t a, uint32_t frm, uint32_t* fflags);
uint64_t rv_ftolu_d(uint64_t a, uint32_t frm, uint32_t* fflags);
uint64_t rv_itof_d(uint32_t a, uint32_t frm, uint32_t* fflags);
uint64_t rv_utof_d(uint32_t a, uint32_t frm, uint32_t* fflags);
uint64_t rv_ltof_d(uint64_t a, uint32_t frm, uint32_t* fflags);
uint64_t rv_lutof_d(uint64_t a, uint32_t frm, uint32_t* fflags);
uint32_t rv_fclss_d(uint64_t a);
uint64_t rv_fsgnj_d(uint64_t a, uint64_t b);
uint64_t rv_fsgnjn_d(uint64_t a, uint64_t b);
uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b);
bool rv_flt_d(uint64_t a, uint64_t b, uint32_t* fflags);
bool rv_fle_d(uint64_t a, uint64_t b, uint32_t* fflags);
bool rv_feq_d(uint64_t a, uint64_t b, uint32_t* fflags);
uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags);
uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags);
uint32_t rv_dtof(uint64_t a);
uint64_t rv_ftod(uint32_t a);
#ifdef __cplusplus
}

View File

@@ -56,7 +56,7 @@ inline void Unpack8888(TexFormat format,
uint32_t texel,
uint32_t* lo,
uint32_t* hi) {
int r, g, b, a;
uint32_t r, g, b, a;
switch (format) {
case TexFormat::A8R8G8B8:
r = (texel >> 16) & 0xff;

View File

@@ -29,6 +29,11 @@ else
CXXFLAGS += -O2 -DNDEBUG
endif
# XLEN parameterization
ifdef XLEN
CXXFLAGS += -DXLEN=$(XLEN)
endif
PROJECT = simx
all: $(DESTDIR)/$(PROJECT)
@@ -43,4 +48,4 @@ $(DESTDIR)/lib$(PROJECT).so: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
clean:
rm -rf obj_dir $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so
rm -rf obj_dir $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so

View File

@@ -21,8 +21,7 @@ private:
uint16_t num_barriers_;
public:
ArchDef(const std::string& /*arch*/,
uint16_t num_cores,
ArchDef(uint16_t num_cores,
uint16_t num_warps,
uint16_t num_threads)
: num_cores_(num_cores)

View File

@@ -488,11 +488,11 @@ private:
} else {
bool hit = false;
bool found_free_block = false;
int hit_block_id = 0;
int repl_block_id = 0;
uint32_t hit_block_id = 0;
uint32_t repl_block_id = 0;
uint32_t max_cnt = 0;
for (int i = 0, n = set.blocks.size(); i < n; ++i) {
for (uint32_t i = 0, n = set.blocks.size(); i < n; ++i) {
auto& block = set.blocks.at(i);
if (block.valid) {
if (block.tag == pipeline_req.tag) {

View File

@@ -16,7 +16,7 @@ namespace vortex {
enum Constants {
SMEM_BANK_OFFSET = log2ceil(sizeof(Word)) + log2ceil(STACK_SIZE / sizeof(Word)),
SMEM_BANK_OFFSET = log2ceil(sizeof(uint32_t)) + log2ceil(STACK_SIZE / sizeof(uint32_t)),
};

View File

@@ -13,7 +13,7 @@
using namespace vortex;
Core::Core(const SimContext& ctx, const ArchDef &arch, Word id)
Core::Core(const SimContext& ctx, const ArchDef &arch, uint32_t id)
: SimObject(ctx, "Core")
, MemRspPort(this)
, MemReqPort(this)
@@ -73,7 +73,7 @@ Core::Core(const SimContext& ctx, const ArchDef &arch, Word id)
, decode_latch_("decode")
, pending_icache_(arch_.num_warps())
{
for (int i = 0; i < arch_.num_warps(); ++i) {
for (uint32_t i = 0; i < arch_.num_warps(); ++i) {
warps_.at(i) = std::make_shared<Warp>(this, i);
}
@@ -195,7 +195,7 @@ void Core::tick() {
void Core::schedule() {
bool foundSchedule = false;
int scheduled_warp = last_schedule_wid_;
uint32_t scheduled_warp = last_schedule_wid_;
// round robin scheduling
for (size_t wid = 0, nw = arch_.num_warps(); wid < nw; ++wid) {
@@ -367,11 +367,11 @@ void Core::commit() {
}
}
WarpMask Core::wspawn(int num_warps, int nextPC) {
WarpMask Core::wspawn(uint32_t num_warps, uint32_t nextPC) {
WarpMask ret(1);
int active_warps = std::min<int>(num_warps, arch_.num_warps());
uint32_t active_warps = std::min<uint32_t>(num_warps, arch_.num_warps());
DP(3, "*** Activate " << (active_warps-1) << " warps at PC: " << std::hex << nextPC);
for (int i = 1; i < active_warps; ++i) {
for (uint32_t i = 1; i < active_warps; ++i) {
auto warp = warps_.at(i);
warp->setPC(nextPC);
warp->setTmask(0, true);
@@ -380,7 +380,7 @@ WarpMask Core::wspawn(int num_warps, int nextPC) {
return ret;
}
WarpMask Core::barrier(int bar_id, int count, int warp_id) {
WarpMask Core::barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id) {
WarpMask ret(0);
auto& barrier = barriers_.at(bar_id);
barrier.set(warp_id);
@@ -389,7 +389,7 @@ WarpMask Core::barrier(int bar_id, int count, int warp_id) {
DP(3, "*** Suspend warp #" << warp_id << " at barrier #" << bar_id);
return ret;
}
for (int i = 0; i < arch_.num_warps(); ++i) {
for (uint32_t i = 0; i < arch_.num_warps(); ++i) {
if (barrier.test(i)) {
DP(3, "*** Resume warp #" << i << " at barrier #" << bar_id);
warps_.at(i)->activate();
@@ -400,45 +400,45 @@ WarpMask Core::barrier(int bar_id, int count, int warp_id) {
return ret;
}
Word Core::icache_read(Addr addr, Size size) {
Word data;
mmu_.read(&data, addr, size, 0);
return data;
void Core::icache_read(void *data, uint64_t addr, uint32_t size) {
mmu_.read(data, addr, size, 0);
}
Word Core::dcache_read(Addr addr, Size size) {
Word data;
void Core::dcache_read(void *data, uint64_t addr, uint32_t size) {
auto type = get_addr_type(addr, size);
if (type == AddrType::Shared) {
smem_.read(&data, addr & (SMEM_SIZE-1), size);
addr &= (SMEM_SIZE-1);
smem_.read(data, addr, size);
} else {
mmu_.read(&data, addr, size, 0);
mmu_.read(data, addr, size, 0);
}
return data;
}
void Core::dcache_write(Addr addr, Word data, Size size) {
void Core::dcache_write(const void* data, uint64_t addr, uint32_t size) {
if (addr >= IO_COUT_ADDR
&& addr < (IO_COUT_ADDR + IO_COUT_SIZE)) {
this->writeToStdOut(addr, data);
&& addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
this->writeToStdOut(data, addr, size);
} else {
auto type = get_addr_type(addr, size);
if (type == AddrType::Shared) {
smem_.write(&data, addr & (SMEM_SIZE-1), size);
addr &= (SMEM_SIZE-1);
smem_.write(data, addr, size);
} else {
mmu_.write(&data, addr, size, 0);
mmu_.write(data, addr, size, 0);
}
}
}
Word Core::tex_read(uint32_t unit, Word u, Word v, Word lod, std::vector<mem_addr_size_t>* mem_addrs) {
uint32_t Core::tex_read(uint32_t unit, uint32_t u, uint32_t v, uint32_t lod, std::vector<mem_addr_size_t>* mem_addrs) {
return tex_units_.at(unit).read(u, v, lod, mem_addrs);
}
void Core::writeToStdOut(Addr addr, Word data) {
void Core::writeToStdOut(const void* data, uint64_t addr, uint32_t size) {
if (size != 1)
std::abort();
uint32_t tid = (addr - IO_COUT_ADDR) & (IO_COUT_SIZE-1);
auto& ss_buf = print_bufs_[tid];
char c = (char)data;
char c = *(char*)data;
ss_buf << c;
if (c == '\n') {
std::cout << std::dec << "#" << tid << ": " << ss_buf.str() << std::flush;
@@ -446,7 +446,7 @@ void Core::writeToStdOut(Addr addr, Word data) {
}
}
Word Core::get_csr(Addr addr, int tid, int wid) {
uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
switch (addr) {
case CSR_SATP:
case CSR_PMPCFG0:
@@ -502,13 +502,13 @@ Word Core::get_csr(Addr addr, int tid, int wid) {
return perf_stats_.instrs & 0xffffffff;
case CSR_MINSTRET_H:
// NumInsts
return (Word)(perf_stats_.instrs >> 32);
return (uint32_t)(perf_stats_.instrs >> 32);
case CSR_MCYCLE:
// NumCycles
return (Word)SimPlatform::instance().cycles();
return (uint32_t)SimPlatform::instance().cycles();
case CSR_MCYCLE_H:
// NumCycles
return (Word)(SimPlatform::instance().cycles() >> 32);
return (uint32_t)(SimPlatform::instance().cycles() >> 32);
case CSR_MPM_IBUF_ST:
return perf_stats_.ibuf_stalls & 0xffffffff;
case CSR_MPM_IBUF_ST_H:
@@ -644,7 +644,7 @@ Word Core::get_csr(Addr addr, int tid, int wid) {
return 0;
}
void Core::set_csr(Addr addr, Word value, int /*tid*/, int wid) {
void Core::set_csr(uint32_t addr, uint32_t value, uint32_t /*tid*/, uint32_t wid) {
if (addr == CSR_FFLAGS) {
fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0x1F) | (value & 0x1F);
} else if (addr == CSR_FRM) {

View File

@@ -68,7 +68,7 @@ public:
SimPort<MemRsp> MemRspPort;
SimPort<MemReq> MemReqPort;
Core(const SimContext& ctx, const ArchDef &arch, Word id);
Core(const SimContext& ctx, const ArchDef &arch, uint32_t id);
~Core();
void attach_ram(RAM* ram);
@@ -79,7 +79,7 @@ public:
void tick();
Word id() const {
uint32_t id() const {
return id_;
}
@@ -95,25 +95,25 @@ public:
return perf_stats_;
}
Word getIRegValue(int reg) const {
uint32_t getIRegValue(int reg) const {
return warps_.at(0)->getIRegValue(reg);
}
Word get_csr(Addr addr, int tid, int wid);
uint32_t get_csr(uint32_t addr, uint32_t tid, uint32_t wid);
void set_csr(Addr addr, Word value, int tid, int wid);
void set_csr(uint32_t addr, uint32_t value, uint32_t tid, uint32_t wid);
WarpMask wspawn(int num_warps, int nextPC);
WarpMask wspawn(uint32_t num_warps, uint32_t nextPC);
WarpMask barrier(int bar_id, int count, int warp_id);
WarpMask barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
Word icache_read(Addr, Size);
void icache_read(void* data, uint64_t addr, uint32_t size);
Word dcache_read(Addr, Size);
void dcache_read(void* data, uint64_t addr, uint32_t size);
void dcache_write(Addr, Word, Size);
void dcache_write(const void* data, uint64_t addr, uint32_t size);
Word tex_read(uint32_t unit, Word lod, Word u, Word v, std::vector<mem_addr_size_t>* mem_addrs);
uint32_t tex_read(uint32_t unit, uint32_t lod, uint32_t u, uint32_t v, std::vector<mem_addr_size_t>* mem_addrs);
void trigger_ecall();
@@ -129,11 +129,11 @@ private:
void execute();
void commit();
void writeToStdOut(Addr addr, Word data);
void writeToStdOut(const void* data, uint64_t addr, uint32_t size);
void cout_flush();
Word id_;
uint32_t id_;
const ArchDef arch_;
const Decoder decoder_;
MemoryUnit mmu_;
@@ -142,7 +142,7 @@ private:
std::vector<std::shared_ptr<Warp>> warps_;
std::vector<WarpMask> barriers_;
std::vector<Word> csrs_;
std::vector<uint32_t> csrs_;
std::vector<Byte> fcsrs_;
std::vector<IBuffer> ibuffers_;
Scoreboard scoreboard_;

View File

@@ -19,7 +19,7 @@ struct InstTableEntry_t {
InstType iType;
};
static const std::unordered_map<int, struct InstTableEntry_t> sc_instTable = {
static const std::unordered_map<Opcode, struct InstTableEntry_t> sc_instTable = {
{Opcode::NOP, {false, InstType::N_TYPE}},
{Opcode::R_INST, {false, InstType::R_TYPE}},
{Opcode::L_INST, {false, InstType::I_TYPE}},
@@ -42,15 +42,54 @@ static const std::unordered_map<int, struct InstTableEntry_t> sc_instTable = {
{Opcode::VSET, {false, InstType::V_TYPE}},
{Opcode::GPGPU, {false, InstType::R_TYPE}},
{Opcode::GPU, {false, InstType::R4_TYPE}},
{Opcode::R_INST_W, {false, InstType::R_TYPE}},
{Opcode::I_INST_W, {false, InstType::I_TYPE}},
};
enum Constants {
width_opcode= 7,
width_reg = 5,
width_func2 = 2,
width_func3 = 3,
width_func6 = 6,
width_func7 = 7,
width_mop = 3,
width_vmask = 1,
width_i_imm = 12,
width_j_imm = 20,
width_v_imm = 11,
shift_opcode= 0,
shift_rd = width_opcode,
shift_func3 = shift_rd + width_reg,
shift_rs1 = shift_func3 + width_func3,
shift_rs2 = shift_rs1 + width_reg,
shift_func2 = shift_rs2 + width_reg,
shift_func7 = shift_rs2 + width_reg,
shift_rs3 = shift_func7 + width_func2,
shift_vmop = shift_func7 + width_vmask,
shift_vnf = shift_vmop + width_mop,
shift_func6 = shift_func7 + width_vmask,
shift_vset = shift_func7 + width_func6,
mask_opcode = (1<<width_opcode)-1,
mask_reg = (1<<width_reg)-1,
mask_func2 = (1<<width_func2)-1,
mask_func3 = (1<<width_func3)-1,
mask_func6 = (1<<width_func6)-1,
mask_func7 = (1<<width_func7)-1,
mask_i_imm = (1<<width_i_imm)-1,
mask_j_imm = (1<<width_j_imm)-1,
mask_v_imm = (1<<width_v_imm)-1,
};
static const char* op_string(const Instr &instr) {
auto opcode = instr.getOpcode();
Word func2 = instr.getFunc2();
Word func3 = instr.getFunc3();
Word func7 = instr.getFunc7();
Word rs2 = instr.getRSrc(1);
Word imm = instr.getImm();
auto func2 = instr.getFunc2();
auto func3 = instr.getFunc3();
auto func7 = instr.getFunc7();
auto rs2 = instr.getRSrc(1);
auto imm = instr.getImm();
switch (opcode) {
case Opcode::NOP: return "NOP";
@@ -115,8 +154,10 @@ static const char* op_string(const Instr &instr) {
case 0: return "LBI";
case 1: return "LHI";
case 2: return "LW";
case 3: return "LD";
case 4: return "LBU";
case 5: return "LHU";
case 6: return "LWU";
default:
std::abort();
}
@@ -125,9 +166,38 @@ static const char* op_string(const Instr &instr) {
case 0: return "SB";
case 1: return "SH";
case 2: return "SW";
case 3: return "SD";
default:
std::abort();
}
case Opcode::R_INST_W:
if (func7 & 0x1){
switch (func3) {
case 0: return "MULW";
case 4: return "DIVW";
case 5: return "DIVUW";
case 6: return "REMW";
case 7: return "REMUW";
default:
std::abort();
}
} else {
switch (func3) {
case 0: return func7 ? "SUBW" : "ADDW";
case 1: return "SLLW";
case 5: return func7 ? "SRAW" : "SRLW";
default:
std::abort();
}
}
case Opcode::I_INST_W:
switch (func3) {
case 0: return "ADDIW";
case 1: return "SLLIW";
case 5: return func7 ? "SRAIW" : "SRLIW";
default:
std::abort();
}
case Opcode::SYS_INST:
switch (func3) {
case 0:
@@ -150,49 +220,129 @@ static const char* op_string(const Instr &instr) {
std::abort();
}
case Opcode::FENCE: return "FENCE";
case Opcode::FL: return (func3 == 0x2) ? "FL" : "VL";
case Opcode::FS: return (func3 == 0x2) ? "FS" : "VS";
case Opcode::FL:
switch (func3) {
case 0x1: return "VL";
case 0x2: return "FLW";
case 0x3: return "FLD";
default:
std::abort();
}
case Opcode::FS:
switch (func3) {
case 0x1: return "VS";
case 0x2: return "FSW";
case 0x3: return "FSD";
default:
std::abort();
}
case Opcode::FCI:
switch (func7) {
case 0x00: return "FADD";
case 0x04: return "FSUB";
case 0x08: return "FMUL";
case 0x0c: return "FDIV";
case 0x2c: return "FSQRT";
case 0x00: return "FADD.S";
case 0x01: return "FADD.D";
case 0x04: return "FSUB.S";
case 0x05: return "FSUB.D";
case 0x08: return "FMUL.S";
case 0x09: return "FMUL.D";
case 0x0c: return "FDIV.S";
case 0x0d: return "FDIV.D";
case 0x2c: return "FSQRT.S";
case 0x2d: return "FSQRT.D";
case 0x10:
switch (func3) {
case 0: return "FSGNJ";
case 1: return "FSGNJN";
case 2: return "FSGNJX";
case 0: return "FSGNJ.S";
case 1: return "FSGNJN.S";
case 2: return "FSGNJX.S";
default:
std::abort();
}
case 0x11:
switch (func3) {
case 0: return "FSGNJ.D";
case 1: return "FSGNJN.D";
case 2: return "FSGNJX.D";
default:
std::abort();
}
case 0x14:
switch (func3) {
case 0: return "FMIM";
case 1: return "FMAX";
case 0: return "FMIN.S";
case 1: return "FMAX.S";
default:
std::abort();
}
case 0x15:
switch (func3) {
case 0: return "FMIN.D";
case 1: return "FMAX.D";
default:
std::abort();
}
case 0x20: return "FCVT.S.D";
case 0x21: return "FCVT.D.S";
case 0x50:
switch (func3) {
case 0: return "FLE";
case 1: return "FLT";
case 2: return "FEQ";
case 0: return "FLE.S";
case 1: return "FLT.S";
case 2: return "FEQ.S";
default:
std::abort();
}
case 0x60: return rs2 ? "FCVT.WU.S" : "FCVT.W.S";
case 0x68: return rs2 ? "FCVT.S.WU" : "FCVT.S.W";
case 0x70: return func3 ? "FLASS" : "FMV.X.W";
case 0x51:
switch (func3) {
case 0: return "FLE.D";
case 1: return "FLT.D";
case 2: return "FEQ.D";
default:
std::abort();
}
case 0x60:
switch (rs2) {
case 0: return "FCVT.W.S";
case 1: return "FCVT.WU.S";
case 2: return "FCVT.L.S";
case 3: return "FCVT.LU.S";
default:
std::abort();
}
case 0x61:
switch (rs2) {
case 0: return "FCVT.W.D";
case 1: return "FCVT.WU.D";
case 2: return "FCVT.L.D";
case 3: return "FCVT.LU.D";
default:
std::abort();
}
case 0x68:
switch (rs2) {
case 0: return "FCVT.S.W";
case 1: return "FCVT.S.WU";
case 2: return "FCVT.S.L";
case 3: return "FCVT.S.LU";
default:
std::abort();
}
case 0x69:
switch (rs2) {
case 0: return "FCVT.D.W";
case 1: return "FCVT.D.WU";
case 2: return "FCVT.D.L";
case 3: return "FCVT.D.LU";
default:
std::abort();
}
case 0x70: return func3 ? "FCLASS.S" : "FMV.X.W";
case 0x71: return func3 ? "FCLASS.D" : "FMV.X.D";
case 0x78: return "FMV.W.X";
case 0x79: return "FMV.D.X";
default:
std::abort();
}
case Opcode::FMADD: return "FMADD";
case Opcode::FMSUB: return "FMSUB";
case Opcode::FMNMADD: return "FMNMADD";
case Opcode::FMNMSUB: return "FMNMSUB";
case Opcode::FMADD: return func2 ? "FMADD.D" : "FMADD.S";
case Opcode::FMSUB: return func2 ? "FMSUB.D" : "FMSUB.S";
case Opcode::FMNMADD: return func2 ? "FNMADD.D" : "FNMADD.S";
case Opcode::FMNMSUB: return func2 ? "FNMSUB.D" : "FNMSUB.S";
case Opcode::VSET: return "VSET";
case Opcode::GPGPU:
switch (func3) {
@@ -226,8 +376,8 @@ static const char* op_string(const Instr &instr) {
namespace vortex {
std::ostream &operator<<(std::ostream &os, const Instr &instr) {
auto opcode = instr.getOpcode();
Word func2 = instr.getFunc2();
Word func3 = instr.getFunc3();
auto func2 = instr.getFunc2();
auto func3 = instr.getFunc3();
os << op_string(instr) << ": ";
@@ -244,7 +394,7 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) {
if (instr.getRDType() != RegType::None) {
os << instr.getRDType() << std::dec << instr.getRDest() << " <- ";
}
int i = 0;
uint32_t i = 0;
for (; i < instr.getNRSrc(); ++i) {
if (i) os << ", ";
os << instr.getRSType(i) << std::dec << instr.getRSrc(i);
@@ -261,56 +411,22 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) {
}
}
Decoder::Decoder(const ArchDef &arch) {
inst_s_ = arch.wsize() * 8;
opcode_s_ = 7;
reg_s_ = 5;
func2_s_ = 2;
func3_s_ = 3;
mop_s_ = 3;
vmask_s_ = 1;
Decoder::Decoder(const ArchDef&) {}
shift_opcode_ = 0;
shift_rd_ = opcode_s_;
shift_func3_ = shift_rd_ + reg_s_;
shift_rs1_ = shift_func3_ + func3_s_;
shift_rs2_ = shift_rs1_ + reg_s_;
shift_func2_ = shift_rs2_ + reg_s_;
shift_func7_ = shift_rs2_ + reg_s_;
shift_rs3_ = shift_func7_ + func2_s_;
shift_vmop_ = shift_func7_ + vmask_s_;
shift_vnf_ = shift_vmop_ + mop_s_;
shift_func6_ = shift_func7_ + 1;
shift_vset_ = shift_func7_ + 6;
reg_mask_ = 0x1f;
func2_mask_ = 0x3;
func3_mask_ = 0x7;
func6_mask_ = 0x3f;
func7_mask_ = 0x7f;
opcode_mask_ = 0x7f;
i_imm_mask_ = 0xfff;
s_imm_mask_ = 0xfff;
b_imm_mask_ = 0x1fff;
u_imm_mask_ = 0xfffff;
j_imm_mask_ = 0xfffff;
v_imm_mask_ = 0x7ff;
}
std::shared_ptr<Instr> Decoder::decode(Word code) const {
std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
auto instr = std::make_shared<Instr>();
Opcode op = (Opcode)((code >> shift_opcode_) & opcode_mask_);
auto op = Opcode((code >> shift_opcode) & mask_opcode);
instr->setOpcode(op);
Word func2 = (code >> shift_func2_) & func2_mask_;
Word func3 = (code >> shift_func3_) & func3_mask_;
Word func6 = (code >> shift_func6_) & func6_mask_;
Word func7 = (code >> shift_func7_) & func7_mask_;
auto func2 = (code >> shift_func2) & mask_func2;
auto func3 = (code >> shift_func3) & mask_func3;
auto func6 = (code >> shift_func6) & mask_func6;
auto func7 = (code >> shift_func7) & mask_func7;
int rd = (code >> shift_rd_) & reg_mask_;
int rs1 = (code >> shift_rs1_) & reg_mask_;
int rs2 = (code >> shift_rs2_) & reg_mask_;
int rs3 = (code >> shift_rs3_) & reg_mask_;
auto rd = (code >> shift_rd) & mask_reg;
auto rs1 = (code >> shift_rs1) & mask_reg;
auto rs2 = (code >> shift_rs2) & mask_reg;
auto rs3 = (code >> shift_rs3) & mask_reg;
auto op_it = sc_instTable.find(op);
if (op_it == sc_instTable.end()) {
@@ -320,7 +436,7 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
auto iType = op_it->second.iType;
if (op == Opcode::FL || op == Opcode::FS) {
if (func3 != 0x2) {
if (func3 != 0x2 && func3 != 0x3) {
iType = InstType::V_TYPE;
}
}
@@ -330,40 +446,57 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
break;
case InstType::R_TYPE:
if (op == Opcode::FCI) {
switch (func7) {
case 0x68: // FCVT.S.W, FCVT.S.WU
if (op == Opcode::FCI) {
switch (func7) {
case 0x50: // FLE.S, FLT.S, FEQ.S
case 0x51: // FLE.D, FLT.D, FEQ.D
instr->setDestReg(rd, RegType::Integer);
instr->setSrcReg(rs1, RegType::Float);
instr->setSrcReg(rs2, RegType::Float);
break;
case 0x60: // FCVT.W.D, FCVT.WU.D, FCVT.L.D, FCVT.LU.D
case 0x61: // FCVT.WU.S, FCVT.W.S, FCVT.L.S, FCVT.LU.S
instr->setDestReg(rd, RegType::Integer);
instr->setSrcReg(rs1, RegType::Float);
instr->setSrcReg(rs2, RegType::Integer);
break;
case 0x68: // FCVT.S.W, FCVT.S.WU, FCVT.S.L, FCVT.S.LU
case 0x69: // FCVT.D.W, FCVT.D.WU, FCVT.D.L, FCVT.D.LU
instr->setDestReg(rd, RegType::Float);
instr->setSrcReg(rs1, RegType::Integer);
instr->setSrcReg(rs2, RegType::Integer);
break;
case 0x70: // FCLASS.S, FMV.X.W
case 0x71: // FCLASS.D, FMV.X.D
instr->setDestReg(rd, RegType::Integer);
instr->setSrcReg(rs1, RegType::Float);
break;
case 0x78: // FMV.W.X
instr->setSrcReg(rs1);
case 0x79: // FMV.D.X
instr->setDestReg(rd, RegType::Float);
instr->setSrcReg(rs1, RegType::Integer);
break;
default:
instr->setSrcFReg(rs1);
}
instr->setSrcFReg(rs2);
switch (func7) {
case 0x50: // FLE, FLT, FEQ
case 0x60: // FCVT.WU.S, FCVT.W.S
case 0x70: // FLASS, FMV.X.W
instr->setDestReg(rd);
instr->setDestReg(rd, RegType::Float);
instr->setSrcReg(rs1, RegType::Float);
instr->setSrcReg(rs2, RegType::Float);
break;
default:
instr->setDestFReg(rd);
}
} else {
instr->setDestReg(rd);
instr->setSrcReg(rs1);
instr->setSrcReg(rs2);
instr->setDestReg(rd, RegType::Integer);
instr->setSrcReg(rs1, RegType::Integer);
instr->setSrcReg(rs2, RegType::Integer);
}
instr->setFunc3(func3);
instr->setFunc7(func7);
break;
case InstType::I_TYPE: {
instr->setSrcReg(rs1);
instr->setSrcReg(rs1, RegType::Integer);
if (op == Opcode::FL) {
instr->setDestFReg(rd);
instr->setDestReg(rd, RegType::Float);
} else {
instr->setDestReg(rd);
instr->setDestReg(rd, RegType::Integer);
}
instr->setFunc3(func3);
instr->setFunc7(func7);
@@ -371,64 +504,71 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
case Opcode::SYS_INST:
case Opcode::FENCE:
// uint12
instr->setImm(code >> shift_rs2_);
instr->setImm(code >> shift_rs2);
break;
case Opcode::I_INST:
case Opcode::I_INST_W:
if (func3 == 0x1 || func3 == 0x5) {
// int5
instr->setImm(sext32(rs2, 5));
auto shamt = rs2; // uint5
#if (XLEN == 64)
if (op == Opcode::I_INST) {
// uint6
shamt |= ((func7 & 0x1) << 5);
}
#endif
instr->setImm(shamt);
} else {
// int12
instr->setImm(sext32(code >> shift_rs2_, 12));
auto imm = code >> shift_rs2;
instr->setImm(sext(imm, width_i_imm));
}
break;
default:
// int12
instr->setImm(sext32(code >> shift_rs2_, 12));
auto imm = code >> shift_rs2;
instr->setImm(sext(imm, width_i_imm));
break;
}
} break;
case InstType::S_TYPE: {
instr->setSrcReg(rs1);
instr->setSrcReg(rs1, RegType::Integer);
if (op == Opcode::FS) {
instr->setSrcFReg(rs2);
instr->setSrcReg(rs2, RegType::Float);
} else {
instr->setSrcReg(rs2);
instr->setSrcReg(rs2, RegType::Integer);
}
instr->setFunc3(func3);
Word imm = (func7 << reg_s_) | rd;
instr->setImm(sext32(imm, 12));
auto imm = (func7 << width_reg) | rd;
instr->setImm(sext(imm, width_i_imm));
} break;
case InstType::B_TYPE: {
instr->setSrcReg(rs1);
instr->setSrcReg(rs2);
instr->setSrcReg(rs1, RegType::Integer);
instr->setSrcReg(rs2, RegType::Integer);
instr->setFunc3(func3);
Word bit_11 = rd & 0x1;
Word bits_4_1 = rd >> 1;
Word bit_10_5 = func7 & 0x3f;
Word bit_12 = func7 >> 6;
Word imm = (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
instr->setImm(sext32(imm, 13));
auto bit_11 = rd & 0x1;
auto bits_4_1 = rd >> 1;
auto bit_10_5 = func7 & 0x3f;
auto bit_12 = func7 >> 6;
auto imm = (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
instr->setImm(sext(imm, width_i_imm+1));
} break;
case InstType::U_TYPE:
instr->setDestReg(rd);
instr->setImm(sext32(code >> shift_func3_, 20));
break;
case InstType::U_TYPE: {
instr->setDestReg(rd, RegType::Integer);
auto imm = code >> shift_func3;
instr->setImm(sext(imm, width_j_imm));
} break;
case InstType::J_TYPE: {
instr->setDestReg(rd);
Word unordered = code >> shift_func3_;
Word bits_19_12 = unordered & 0xff;
Word bit_11 = (unordered >> 8) & 0x1;
Word bits_10_1 = (unordered >> 9) & 0x3ff;
Word bit_20 = (unordered >> 19) & 0x1;
Word imm = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
if (bit_20) {
imm |= ~j_imm_mask_;
}
instr->setImm(imm);
instr->setDestReg(rd, RegType::Integer);
auto unordered = code >> shift_func3;
auto bits_19_12 = unordered & 0xff;
auto bit_11 = (unordered >> 8) & 0x1;
auto bits_10_1 = (unordered >> 9) & 0x3ff;
auto bit_20 = (unordered >> 19) & 0x1;
auto imm = (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
instr->setImm(sext(imm, width_j_imm+1));
} break;
case InstType::V_TYPE:
@@ -438,9 +578,9 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
instr->setSrcVReg(rs1);
instr->setFunc3(func3);
if (func3 == 7) {
instr->setImm(!(code >> shift_vset_));
instr->setImm(!(code >> shift_vset));
if (instr->getImm()) {
Word immed = (code >> shift_rs2_) & v_imm_mask_;
auto immed = (code >> shift_rs2) & mask_v_imm;
instr->setImm(immed);
instr->setVlmul(immed & 0x3);
instr->setVediv((immed >> 4) & 0x3);
@@ -450,7 +590,7 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
}
} else {
instr->setSrcVReg(rs2);
instr->setVmask((code >> shift_func7_) & 0x1);
instr->setVmask((code >> shift_func7) & 0x1);
instr->setFunc6(func6);
}
} break;
@@ -460,9 +600,9 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
instr->setSrcVReg(rs1);
instr->setVlsWidth(func3);
instr->setSrcVReg(rs2);
instr->setVmask(code >> shift_func7_);
instr->setVmop((code >> shift_vmop_) & func3_mask_);
instr->setVnf((code >> shift_vnf_) & func3_mask_);
instr->setVmask(code >> shift_func7);
instr->setVmop((code >> shift_vmop) & mask_func3);
instr->setVnf((code >> shift_vnf) & mask_func3);
break;
case Opcode::FS:
@@ -470,9 +610,9 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
instr->setSrcVReg(rs1);
instr->setVlsWidth(func3);
instr->setSrcVReg(rs2);
instr->setVmask(code >> shift_func7_);
instr->setVmop((code >> shift_vmop_) & func3_mask_);
instr->setVnf((code >> shift_vnf_) & func3_mask_);
instr->setVmask(code >> shift_func7);
instr->setVmop((code >> shift_vmop) & mask_func3);
instr->setVnf((code >> shift_vnf) & mask_func3);
break;
default:
@@ -481,15 +621,15 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
break;
case R4_TYPE:
if (op == Opcode::GPU) {
instr->setDestReg(rd);
instr->setSrcReg(rs1);
instr->setSrcReg(rs2);
instr->setSrcReg(rs3);
instr->setDestReg(rd, RegType::Integer);
instr->setSrcReg(rs1, RegType::Integer);
instr->setSrcReg(rs2, RegType::Integer);
instr->setSrcReg(rs3, RegType::Integer);
} else {
instr->setDestFReg(rd);
instr->setSrcFReg(rs1);
instr->setSrcFReg(rs2);
instr->setSrcFReg(rs3);
instr->setDestReg(rd, RegType::Float);
instr->setSrcReg(rs1, RegType::Float);
instr->setSrcReg(rs2, RegType::Float);
instr->setSrcReg(rs3, RegType::Float);
}
instr->setFunc2(func2);
instr->setFunc3(func3);

View File

@@ -7,55 +7,12 @@ namespace vortex {
class ArchDef;
class Instr;
class Pipeline;
class Decoder {
public:
Decoder(const ArchDef &);
std::shared_ptr<Instr> decode(Word code) const;
private:
Word inst_s_;
Word opcode_s_;
Word reg_s_;
Word func2_s_;
Word func3_s_;
Word shift_opcode_;
Word shift_rd_;
Word shift_rs1_;
Word shift_rs2_;
Word shift_rs3_;
Word shift_func2_;
Word shift_func3_;
Word shift_func7_;
Word shift_j_u_immed_;
Word shift_s_b_immed_;
Word shift_i_immed_;
Word reg_mask_;
Word func2_mask_;
Word func3_mask_;
Word func6_mask_;
Word func7_mask_;
Word opcode_mask_;
Word i_imm_mask_;
Word s_imm_mask_;
Word b_imm_mask_;
Word u_imm_mask_;
Word j_imm_mask_;
Word v_imm_mask_;
//Vector
Word shift_vset_;
Word shift_vset_immed_;
Word shift_vmask_;
Word shift_vmop_;
Word shift_vnf_;
Word shift_func6_;
Word vmask_s_;
Word mop_s_;
std::shared_ptr<Instr> decode(uint32_t code) const;
};
}

File diff suppressed because it is too large Load Diff

View File

@@ -114,8 +114,8 @@ void LsuUnit::tick() {
// duplicates detection
bool is_dup = false;
if (trace->tmask.test(0)) {
uint64_t addr_mask = sizeof(Word)-1;
Word addr0 = trace->mem_addrs.at(0).at(0).addr & ~addr_mask;
uint64_t addr_mask = sizeof(uint32_t)-1;
uint32_t addr0 = trace->mem_addrs.at(0).at(0).addr & ~addr_mask;
uint32_t matches = 1;
for (uint32_t t = 1; t < num_threads_; ++t) {
if (!trace->tmask.test(t))

View File

@@ -32,6 +32,9 @@ enum Opcode {
// GPGPU Extension
GPGPU = 0x6b,
GPU = 0x5b,
// RV64 Standard Extensions
R_INST_W = 0x3b,
I_INST_W = 0x1b,
};
enum InstType {
@@ -54,56 +57,54 @@ public:
, has_imm_(false)
, rdest_type_(RegType::None)
, rdest_(0)
, func2_(0)
, func3_(0)
, func6_(0)
, func7_(0) {
for (int i = 0; i < MAX_REG_SOURCES; ++i) {
for (uint32_t i = 0; i < MAX_REG_SOURCES; ++i) {
rsrc_type_[i] = RegType::None;
}
}
/* Setters used to "craft" the instruction. */
void setOpcode(Opcode opcode) { opcode_ = opcode; }
void setDestReg(int destReg) { rdest_type_ = RegType::Integer; rdest_ = destReg; }
void setSrcReg(int srcReg) { rsrc_type_[num_rsrcs_] = RegType::Integer; rsrc_[num_rsrcs_++] = srcReg; }
void setDestFReg(int destReg) { rdest_type_ = RegType::Float; rdest_ = destReg; }
void setSrcFReg(int srcReg) { rsrc_type_[num_rsrcs_] = RegType::Float; rsrc_[num_rsrcs_++] = srcReg; }
void setDestVReg(int destReg) { rdest_type_ = RegType::Vector; rdest_ = destReg; }
void setSrcVReg(int srcReg) { rsrc_type_[num_rsrcs_] = RegType::Vector; rsrc_[num_rsrcs_++] = srcReg; }
void setFunc2(Word func2) { func2_ = func2; }
void setFunc3(Word func3) { func3_ = func3; }
void setFunc7(Word func7) { func7_ = func7; }
void setImm(Word imm) { has_imm_ = true; imm_ = imm; }
void setVlsWidth(Word width) { vlsWidth_ = width; }
void setVmop(Word mop) { vMop_ = mop; }
void setVnf(Word nf) { vNf_ = nf; }
void setVmask(Word mask) { vmask_ = mask; }
void setVs3(Word vs) { vs3_ = vs; }
void setVlmul(Word lmul) { vlmul_ = 1 << lmul; }
void setVsew(Word sew) { vsew_ = 1 << (3+sew); }
void setVediv(Word ediv) { vediv_ = 1 << ediv; }
void setFunc6(Word func6) { func6_ = func6; }
void setDestReg(uint32_t destReg, RegType type) { rdest_type_ = type; rdest_ = destReg; }
void setSrcReg(uint32_t srcReg, RegType type) { rsrc_type_[num_rsrcs_] = type; rsrc_[num_rsrcs_++] = srcReg; }
void setDestVReg(uint32_t destReg) { rdest_type_ = RegType::Vector; rdest_ = destReg; }
void setSrcVReg(uint32_t srcReg) { rsrc_type_[num_rsrcs_] = RegType::Vector; rsrc_[num_rsrcs_++] = srcReg; }
void setFunc2(uint32_t func2) { func2_ = func2; }
void setFunc3(uint32_t func3) { func3_ = func3; }
void setFunc7(uint32_t func7) { func7_ = func7; }
void setImm(uint32_t imm) { has_imm_ = true; imm_ = imm; }
void setVlsWidth(uint32_t width) { vlsWidth_ = width; }
void setVmop(uint32_t mop) { vMop_ = mop; }
void setVnf(uint32_t nf) { vNf_ = nf; }
void setVmask(uint32_t mask) { vmask_ = mask; }
void setVs3(uint32_t vs) { vs3_ = vs; }
void setVlmul(uint32_t lmul) { vlmul_ = 1 << lmul; }
void setVsew(uint32_t sew) { vsew_ = 1 << (3+sew); }
void setVediv(uint32_t ediv) { vediv_ = 1 << ediv; }
void setFunc6(uint32_t func6) { func6_ = func6; }
/* Getters used by encoders. */
Opcode getOpcode() const { return opcode_; }
Word getFunc2() const { return func2_; }
Word getFunc3() const { return func3_; }
Word getFunc6() const { return func6_; }
Word getFunc7() const { return func7_; }
int getNRSrc() const { return num_rsrcs_; }
int getRSrc(int i) const { return rsrc_[i]; }
RegType getRSType(int i) const { return rsrc_type_[i]; }
int getRDest() const { return rdest_; }
uint32_t getFunc2() const { return func2_; }
uint32_t getFunc3() const { return func3_; }
uint32_t getFunc6() const { return func6_; }
uint32_t getFunc7() const { return func7_; }
uint32_t getNRSrc() const { return num_rsrcs_; }
uint32_t getRSrc(uint32_t i) const { return rsrc_[i]; }
RegType getRSType(uint32_t i) const { return rsrc_type_[i]; }
uint32_t getRDest() const { return rdest_; }
RegType getRDType() const { return rdest_type_; }
bool hasImm() const { return has_imm_; }
Word getImm() const { return imm_; }
Word getVlsWidth() const { return vlsWidth_; }
Word getVmop() const { return vMop_; }
Word getvNf() const { return vNf_; }
Word getVmask() const { return vmask_; }
Word getVs3() const { return vs3_; }
Word getVlmul() const { return vlmul_; }
Word getVsew() const { return vsew_; }
Word getVediv() const { return vediv_; }
uint32_t getImm() const { return imm_; }
uint32_t getVlsWidth() const { return vlsWidth_; }
uint32_t getVmop() const { return vMop_; }
uint32_t getvNf() const { return vNf_; }
uint32_t getVmask() const { return vmask_; }
uint32_t getVs3() const { return vs3_; }
uint32_t getVlmul() const { return vlmul_; }
uint32_t getVsew() const { return vsew_; }
uint32_t getVediv() const { return vediv_; }
private:
@@ -112,27 +113,27 @@ private:
};
Opcode opcode_;
int num_rsrcs_;
uint32_t num_rsrcs_;
bool has_imm_;
RegType rdest_type_;
Word imm_;
uint32_t imm_;
RegType rsrc_type_[MAX_REG_SOURCES];
int rsrc_[MAX_REG_SOURCES];
int rdest_;
Word func2_;
Word func3_;
Word func6_;
uint32_t rsrc_[MAX_REG_SOURCES];
uint32_t rdest_;
uint32_t func2_;
uint32_t func3_;
uint32_t func6_;
uint32_t func7_;
// Vector
Word vmask_;
Word vlsWidth_;
Word vMop_;
Word vNf_;
Word vs3_;
Word vlmul_;
Word vsew_;
Word vediv_;
Word func7_;
uint32_t vmask_;
uint32_t vlsWidth_;
uint32_t vMop_;
uint32_t vNf_;
uint32_t vs3_;
uint32_t vlmul_;
uint32_t vsew_;
uint32_t vediv_;
friend std::ostream &operator<<(std::ostream &, const Instr&);
};

View File

@@ -11,13 +11,13 @@
#include "constants.h"
#include <util.h>
#include "args.h"
#include "core.h"
using namespace vortex;
int main(int argc, char **argv) {
int exitcode = 0;
std::string archStr("rv32imf");
std::string imgFileName;
int num_cores(NUM_CORES * NUM_CLUSTERS);
int num_warps(NUM_WARPS);
@@ -26,15 +26,14 @@ int main(int argc, char **argv) {
bool showStats(false);
bool riscv_test(false);
/* Read the command line arguments. */
CommandLineArgFlag fh("-h", "--help", "", showHelp);
CommandLineArgSetter<std::string> fa("-a", "--arch", "", archStr);
CommandLineArgSetter<std::string> fi("-i", "--image", "", imgFileName);
CommandLineArgSetter<int> fc("-c", "--cores", "", num_cores);
CommandLineArgSetter<int> fw("-w", "--warps", "", num_warps);
CommandLineArgSetter<int> ft("-t", "--threads", "", num_threads);
CommandLineArgFlag fr("-r", "--riscv", "", riscv_test);
CommandLineArgFlag fs("-s", "--stats", "", showStats);
// parse the command line arguments
CommandLineArgFlag fh("-h", "--help", "show command line options", showHelp);
CommandLineArgSetter<std::string> fi("-i", "--image", "program binary", imgFileName);
CommandLineArgSetter<int> fc("-c", "--cores", "number of cores", num_cores);
CommandLineArgSetter<int> fw("-w", "--warps", "number of warps", num_warps);
CommandLineArgSetter<int> ft("-t", "--threads", "number of threads", num_threads);
CommandLineArgFlag fr("-r", "--riscv", "enable riscv tests", riscv_test);
CommandLineArgFlag fs("-s", "--stats", "show stats", showStats);
CommandLineArg::readArgs(argc - 1, argv + 1);
@@ -44,7 +43,6 @@ int main(int argc, char **argv) {
" -c, --cores <num> Number of cores\n"
" -w, --warps <num> Number of warps\n"
" -t, --threads <num> Number of threads\n"
" -a, --arch <arch string> Architecture string\n"
" -r, --riscv riscv test\n"
" -s, --stats Print stats on exit.\n";
return 0;
@@ -54,7 +52,7 @@ int main(int argc, char **argv) {
{
// create processor configuation
ArchDef arch(archStr, num_cores, num_warps, num_threads);
ArchDef arch(num_cores, num_warps, num_threads);
// create memory module
RAM ram(RAM_PAGE_SIZE);
@@ -79,7 +77,8 @@ int main(int argc, char **argv) {
processor.attach_ram(&ram);
// run simulation
processor.run();
exitcode = processor.run();
}
if (riscv_test) {

View File

@@ -15,8 +15,8 @@ struct pipeline_trace_t {
uint64_t uuid;
//--
int cid;
int wid;
uint32_t cid;
uint32_t wid;
ThreadMask tmask;
Word PC;
@@ -26,7 +26,7 @@ struct pipeline_trace_t {
//--
bool wb;
RegType rdest_type;
int rdest;
uint32_t rdest;
//--
RegMask used_iregs;

View File

@@ -28,7 +28,7 @@ public:
}
void clear() {
for (int i = 0, n = in_use_iregs_.size(); i < n; ++i) {
for (uint32_t i = 0, n = in_use_iregs_.size(); i < n; ++i) {
in_use_iregs_.at(i).reset();
in_use_fregs_.at(i).reset();
in_use_vregs_.at(i).reset();

View File

@@ -61,10 +61,11 @@ uint32_t TexUnit::read(int32_t u,
uint32_t addr11 = base_addr + offset11 * stride;
// memory lookup
uint32_t texel00 = core_->dcache_read(addr00, stride);
uint32_t texel01 = core_->dcache_read(addr01, stride);
uint32_t texel10 = core_->dcache_read(addr10, stride);
uint32_t texel11 = core_->dcache_read(addr11, stride);
uint32_t texel00(0), texel01(0), texel10(0), texel11(0);
core_->dcache_read(&texel00, addr00, stride);
core_->dcache_read(&texel01, addr01, stride);
core_->dcache_read(&texel10, addr10, stride);
core_->dcache_read(&texel11, addr11, stride);
mem_addrs->push_back({addr00, stride});
mem_addrs->push_back({addr01, stride});
@@ -84,7 +85,8 @@ uint32_t TexUnit::read(int32_t u,
uint32_t addr = base_addr + offset * stride;
// memory lookup
uint32_t texel = core_->dcache_read(addr, stride);
uint32_t texel(0);
core_->dcache_read(&texel, addr, stride);
mem_addrs->push_back({addr, stride});
// filtering

View File

@@ -10,12 +10,22 @@
namespace vortex {
typedef uint8_t Byte;
typedef uint8_t Byte;
#if XLEN == 32
typedef uint32_t Word;
typedef int32_t WordI;
typedef uint64_t DWord;
typedef int64_t DWordI;
#elif XLEN == 64
typedef uint64_t Word;
typedef int64_t WordI;
typedef __uint128_t DWord;
typedef __int128_t DWordI;
#else
#error unsupported XLEN
#endif
typedef uint32_t Addr;
typedef uint32_t Size;
typedef uint64_t FWord;
typedef std::bitset<32> RegMask;
typedef std::bitset<32> ThreadMask;
@@ -30,12 +40,12 @@ enum class RegType {
Vector
};
inline std::ostream &operator<<(std::ostream &os, const RegType& type) {
switch (type) {
inline std::ostream &operator<<(std::ostream &os, const RegType& clss) {
switch (clss) {
case RegType::None: break;
case RegType::Integer: os << "r"; break;
case RegType::Float: os << "fr"; break;
case RegType::Vector: os << "vr"; break;
case RegType::Integer: os << "x"; break;
case RegType::Float: os << "f"; break;
case RegType::Vector: os << "v"; break;
}
return os;
}
@@ -232,7 +242,7 @@ struct MemReq {
inline std::ostream &operator<<(std::ostream &os, const MemReq& req) {
os << "mem-" << (req.write ? "wr" : "rd") << ": ";
os << "addr=" << req.addr << ", tag=" << req.tag << ", core_id=" << req.core_id;
os << "addr=" << std::hex << req.addr << std::dec << ", tag=" << req.tag << ", core_id=" << req.core_id;
os << " (#" << std::dec << req.uuid << ")";
return os;
}

View File

@@ -10,11 +10,11 @@
using namespace vortex;
Warp::Warp(Core *core, Word id)
Warp::Warp(Core *core, uint32_t id)
: id_(id)
, core_(core)
, ireg_file_(core->arch().num_threads(), std::vector<Word>(core->arch().num_regs()))
, freg_file_(core->arch().num_threads(), std::vector<Word>(core->arch().num_regs()))
, freg_file_(core->arch().num_threads(), std::vector<FWord>(core->arch().num_regs()))
, vreg_file_(core->arch().num_threads(), std::vector<Byte>(core->arch().vsize()))
{
this->clear();
@@ -24,7 +24,7 @@ void Warp::clear() {
active_ = false;
PC_ = STARTUP_ADDR;
tmask_.reset();
for (int i = 0, n = core_->arch().num_threads(); i < n; ++i) {
for (uint32_t i = 0, n = core_->arch().num_threads(); i < n; ++i) {
for (auto& reg : ireg_file_.at(i)) {
reg = 0;
}
@@ -41,13 +41,14 @@ void Warp::eval(pipeline_trace_t *trace) {
assert(tmask_.any());
DPH(2, "Fetch: coreid=" << core_->id() << ", wid=" << id_ << ", tmask=");
for (int i = 0, n = core_->arch().num_threads(); i < n; ++i)
for (uint32_t i = 0, n = core_->arch().num_threads(); i < n; ++i)
DPN(2, tmask_.test(n-i-1));
DPN(2, ", PC=0x" << std::hex << PC_ << " (#" << std::dec << trace->uuid << ")" << std::endl);
/* Fetch and decode. */
Word instr_code = core_->icache_read(PC_, sizeof(Word));
uint32_t instr_code = 0;
core_->icache_read(&instr_code, PC_, sizeof(uint32_t));
auto instr = core_->decoder().decode(instr_code);
if (!instr) {
std::cout << std::hex << "Error: invalid instruction 0x" << instr_code << ", at PC=" << PC_ << std::endl;
@@ -68,10 +69,16 @@ void Warp::eval(pipeline_trace_t *trace) {
this->execute(*instr, trace);
DP(4, "Register state:");
for (int i = 0; i < core_->arch().num_regs(); ++i) {
for (uint32_t i = 0; i < core_->arch().num_regs(); ++i) {
DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
for (int j = 0; j < core_->arch().num_threads(); ++j) {
DPN(4, ' ' << std::setfill('0') << std::setw(8) << std::hex << ireg_file_.at(j).at(i) << std::setfill(' ') << ' ');
// Integer register file
for (uint32_t j = 0; j < core_->arch().num_threads(); ++j) {
DPN(4, ' ' << std::setfill('0') << std::setw(XLEN/4) << std::hex << ireg_file_.at(j).at(i) << std::setfill(' ') << ' ');
}
DPN(4, '|');
// Floating point register file
for (uint32_t j = 0; j < core_->arch().num_threads(); ++j) {
DPN(4, ' ' << std::setfill('0') << std::setw(16) << std::hex << freg_file_.at(j).at(i) << std::setfill(' ') << ' ');
}
DPN(4, std::endl);
}

View File

@@ -32,15 +32,15 @@ struct DomStackEntry {
};
struct vtype {
int vill;
int vediv;
int vsew;
int vlmul;
uint32_t vill;
uint32_t vediv;
uint32_t vsew;
uint32_t vlmul;
};
class Warp {
public:
Warp(Core *core, Word id);
Warp(Core *core, uint32_t id);
void clear();
@@ -62,15 +62,15 @@ public:
return 0;
}
Word id() const {
uint32_t id() const {
return id_;
}
Word getPC() const {
uint32_t getPC() const {
return PC_;
}
void setPC(Word PC) {
void setPC(uint32_t PC) {
PC_ = PC;
}
@@ -79,13 +79,13 @@ public:
active_ = tmask_.any();
}
Word getTmask() const {
uint32_t getTmask() const {
if (active_)
return tmask_.to_ulong();
return 0;
}
Word getIRegValue(int reg) const {
uint32_t getIRegValue(uint32_t reg) const {
return ireg_file_.at(0).at(reg);
}
@@ -95,7 +95,7 @@ private:
void execute(const Instr &instr, pipeline_trace_t *trace);
Word id_;
uint32_t id_;
Core *core_;
bool active_;
@@ -103,12 +103,12 @@ private:
ThreadMask tmask_;
std::vector<std::vector<Word>> ireg_file_;
std::vector<std::vector<Word>> freg_file_;
std::vector<std::vector<FWord>> freg_file_;
std::vector<std::vector<Byte>> vreg_file_;
std::stack<DomStackEntry> dom_stack_;
struct vtype vtype_;
int vl_;
uint32_t vl_;
};
}