Merged branch xlen-parameterization into staging
This commit is contained in:
@@ -71,9 +71,30 @@ inline uint64_t bit_getw(uint64_t bits, uint32_t start, uint32_t end) {
|
||||
}
|
||||
|
||||
// Apply integer sign extension
|
||||
inline uint32_t sext32(uint32_t word, uint32_t width) {
|
||||
inline uint32_t sext(uint32_t word, uint32_t width) {
|
||||
assert(width > 1);
|
||||
assert(width <= 32);
|
||||
uint32_t mask = (1 << width) - 1;
|
||||
if (width == 32)
|
||||
return word;
|
||||
uint32_t mask = (uint32_t(1) << width) - 1;
|
||||
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word;
|
||||
}
|
||||
}
|
||||
|
||||
inline uint64_t sext(uint64_t word, uint32_t width) {
|
||||
assert(width > 1);
|
||||
assert(width <= 64);
|
||||
if (width == 64)
|
||||
return word;
|
||||
uint64_t mask = (uint64_t(1) << width) - 1;
|
||||
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word;
|
||||
}
|
||||
|
||||
inline __uint128_t sext(__uint128_t word, uint32_t width) {
|
||||
assert(width > 1);
|
||||
assert(width <= 128);
|
||||
if (width == 128)
|
||||
return word;
|
||||
__uint128_t mask = (__uint128_t(1) << width) - 1;
|
||||
return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word;
|
||||
}
|
||||
|
||||
|
||||
@@ -226,9 +226,9 @@ void RAM::read(void *data, uint64_t addr, uint64_t size) {
|
||||
}
|
||||
|
||||
void RAM::write(const void *data, uint64_t addr, uint64_t size) {
|
||||
const uint8_t* s = (const uint8_t*)data;
|
||||
const uint8_t* d = (const uint8_t*)data;
|
||||
for (uint64_t i = 0; i < size; i++) {
|
||||
*this->get(addr + i) = s[i];
|
||||
*this->get(addr + i) = d[i];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -276,7 +276,7 @@ void RAM::loadHexImage(const char* filename) {
|
||||
ifs.seekg(0, ifs.beg);
|
||||
ifs.read(content.data(), size);
|
||||
|
||||
int offset = 0;
|
||||
uint32_t offset = 0;
|
||||
char *line = content.data();
|
||||
|
||||
this->clear();
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <cstdint>
|
||||
|
||||
@@ -8,10 +8,13 @@ extern "C" {
|
||||
}
|
||||
|
||||
#define F32_SIGN 0x80000000
|
||||
#define F64_SIGN 0x8000000000000000
|
||||
|
||||
inline float32_t to_float32_t(uint32_t x) { return float32_t{x}; }
|
||||
inline float64_t to_float64_t(uint64_t x) { return float64_t{x}; }
|
||||
|
||||
inline uint32_t from_float32_t(float32_t x) { return uint32_t(x.v); }
|
||||
inline uint64_t from_float64_t(float64_t x) { return uint64_t(x.v); }
|
||||
|
||||
inline uint32_t get_fflags() {
|
||||
uint32_t fflags = softfloat_exceptionFlags;
|
||||
@@ -25,121 +28,290 @@ inline uint32_t get_fflags() {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint32_t rv_fadd(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
uint32_t rv_fadd_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_add(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fsub(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
uint64_t rv_fadd_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f64_add(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fsub_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_sub(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmul(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
uint64_t rv_fsub_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f64_sub(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmul_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_mul(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmadd(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
uint64_t rv_fmul_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f64_mul(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_mulAdd(to_float32_t(a), to_float32_t(b), to_float32_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmsub(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
uint64_t rv_fmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
int c_neg = c ^ F32_SIGN;
|
||||
auto r = f64_mulAdd(to_float64_t(a), to_float64_t(b), to_float64_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto c_neg = c ^ F32_SIGN;
|
||||
auto r = f32_mulAdd(to_float32_t(a), to_float32_t(b), to_float32_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fnmadd(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
uint64_t rv_fmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
int a_neg = a ^ F32_SIGN;
|
||||
int c_neg = c ^ F32_SIGN;
|
||||
auto c_neg = c ^ F64_SIGN;
|
||||
auto r = f64_mulAdd(to_float64_t(a), to_float64_t(b), to_float64_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fnmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto a_neg = a ^ F32_SIGN;
|
||||
auto c_neg = c ^ F32_SIGN;
|
||||
auto r = f32_mulAdd(to_float32_t(a_neg), to_float32_t(b), to_float32_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fnmsub(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
uint64_t rv_fnmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
int a_neg = a ^ F32_SIGN;
|
||||
auto a_neg = a ^ F64_SIGN;
|
||||
auto c_neg = c ^ F64_SIGN;
|
||||
auto r = f64_mulAdd(to_float64_t(a_neg), to_float64_t(b), to_float64_t(c_neg));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fnmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto a_neg = a ^ F32_SIGN;
|
||||
auto r = f32_mulAdd(to_float32_t(a_neg), to_float32_t(b), to_float32_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fdiv(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
uint64_t rv_fnmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto a_neg = a ^ F64_SIGN;
|
||||
auto r = f64_mulAdd(to_float64_t(a_neg), to_float64_t(b), to_float64_t(c));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fdiv_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_div(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fsqrt(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f64_div(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_sqrt(to_float32_t(a));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_ftoi(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
uint64_t rv_fsqrt_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f64_sqrt(to_float64_t(a));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_ftoi_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_to_i32(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_ftou(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
uint32_t rv_ftoi_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f64_to_i32(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_ftou_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_to_ui32(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_itof(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
uint32_t rv_ftou_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f64_to_ui32(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftol_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_to_i64(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftol_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f64_to_i64(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftolu_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_to_ui64(to_float32_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_ftolu_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f64_to_ui64(to_float64_t(a), frm, true);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_itof_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = i32_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_utof(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
uint64_t rv_itof_d(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = i32_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_utof_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = ui32_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_flt(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
uint64_t rv_utof_d(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = ui32_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_ltof_s(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = i64_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_ltof_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = i64_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_lutof_s(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = ui64_to_f32(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_lutof_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = ui64_to_f64(a);
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
bool rv_flt_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
auto r = f32_lt(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fle(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
bool rv_flt_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
auto r = f64_lt(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_fle_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
auto r = f32_le(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_feq(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
bool rv_fle_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
auto r = f64_le(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
bool rv_feq_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
auto r = f32_eq(to_float32_t(a), to_float32_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fmin(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
int r;
|
||||
bool rv_feq_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
auto r = f64_eq(to_float64_t(a), to_float64_t(b));
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
uint32_t r;
|
||||
if (isNaNF32UI(a) && isNaNF32UI(b)) {
|
||||
r = defaultNaNF32UI;
|
||||
} else {
|
||||
@@ -156,8 +328,26 @@ uint32_t rv_fmin(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fmax(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
int r;
|
||||
uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
uint64_t r;
|
||||
if (isNaNF64UI(a) && isNaNF64UI(b)) {
|
||||
r = defaultNaNF64UI;
|
||||
} else {
|
||||
auto fa = to_float64_t(a);
|
||||
auto fb = to_float64_t(b);
|
||||
if ((f64_lt_quiet(fa, fb) || (f64_eq(fa, fb) && (a & F64_SIGN)))
|
||||
|| isNaNF64UI(b)) {
|
||||
r = a;
|
||||
} else {
|
||||
r = b;
|
||||
}
|
||||
}
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fmax_s(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
uint32_t r;
|
||||
if (isNaNF32UI(a) && isNaNF32UI(b)) {
|
||||
r = defaultNaNF32UI;
|
||||
} else {
|
||||
@@ -174,7 +364,25 @@ uint32_t rv_fmax(uint32_t a, uint32_t b, uint32_t* fflags) {
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fclss(uint32_t a) {
|
||||
uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags) {
|
||||
uint64_t r;
|
||||
if (isNaNF64UI(a) && isNaNF64UI(b)) {
|
||||
r = defaultNaNF64UI;
|
||||
} else {
|
||||
auto fa = to_float64_t(a);
|
||||
auto fb = to_float64_t(b);
|
||||
if ((f64_lt_quiet(fb, fa) || (f64_eq(fb, fa) && (b & F64_SIGN)))
|
||||
|| isNaNF64UI(b)) {
|
||||
r = a;
|
||||
} else {
|
||||
r = b;
|
||||
}
|
||||
}
|
||||
if (fflags) { *fflags = get_fflags(); }
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fclss_s(uint32_t a) {
|
||||
auto infOrNaN = (0xff == expF32UI(a));
|
||||
auto subnormOrZero = (0 == expF32UI(a));
|
||||
bool sign = signF32UI(a);
|
||||
@@ -182,7 +390,7 @@ uint32_t rv_fclss(uint32_t a) {
|
||||
bool isNaN = isNaNF32UI(a);
|
||||
bool isSNaN = softfloat_isSigNaNF32UI(a);
|
||||
|
||||
int r =
|
||||
uint32_t r =
|
||||
( sign && infOrNaN && fracZero ) << 0 |
|
||||
( sign && !infOrNaN && !subnormOrZero ) << 1 |
|
||||
( sign && subnormOrZero && !fracZero ) << 2 |
|
||||
@@ -197,31 +405,77 @@ uint32_t rv_fclss(uint32_t a) {
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fsgnj(uint32_t a, uint32_t b) {
|
||||
|
||||
int sign = b & F32_SIGN;
|
||||
int r = sign | (a & ~F32_SIGN);
|
||||
uint32_t rv_fclss_d(uint64_t a) {
|
||||
auto infOrNaN = (0x7ff == expF64UI(a));
|
||||
auto subnormOrZero = (0 == expF64UI(a));
|
||||
bool sign = signF64UI(a);
|
||||
bool fracZero = (0 == fracF64UI(a));
|
||||
bool isNaN = isNaNF64UI(a);
|
||||
bool isSNaN = softfloat_isSigNaNF64UI(a);
|
||||
|
||||
uint32_t r =
|
||||
( sign && infOrNaN && fracZero ) << 0 |
|
||||
( sign && !infOrNaN && !subnormOrZero ) << 1 |
|
||||
( sign && subnormOrZero && !fracZero ) << 2 |
|
||||
( sign && subnormOrZero && fracZero ) << 3 |
|
||||
( !sign && infOrNaN && fracZero ) << 7 |
|
||||
( !sign && !infOrNaN && !subnormOrZero ) << 6 |
|
||||
( !sign && subnormOrZero && !fracZero ) << 5 |
|
||||
( !sign && subnormOrZero && fracZero ) << 4 |
|
||||
( isNaN && isSNaN ) << 8 |
|
||||
( isNaN && !isSNaN ) << 9;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fsgnjn(uint32_t a, uint32_t b) {
|
||||
|
||||
int sign = ~b & F32_SIGN;
|
||||
int r = sign | (a & ~F32_SIGN);
|
||||
|
||||
uint32_t rv_fsgnj_s(uint32_t a, uint32_t b) {
|
||||
auto sign = b & F32_SIGN;
|
||||
auto r = sign | (a & ~F32_SIGN);
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fsgnjx(uint32_t a, uint32_t b) {
|
||||
|
||||
int sign1 = a & F32_SIGN;
|
||||
int sign2 = b & F32_SIGN;
|
||||
int r = (sign1 ^ sign2) | (a & ~F32_SIGN);
|
||||
|
||||
uint64_t rv_fsgnj_d(uint64_t a, uint64_t b) {
|
||||
auto sign = b & F64_SIGN;
|
||||
auto r = sign | (a & ~F64_SIGN);
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fsgnjn_s(uint32_t a, uint32_t b) {
|
||||
auto sign = ~b & F32_SIGN;
|
||||
auto r = sign | (a & ~F32_SIGN);
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_fsgnjn_d(uint64_t a, uint64_t b) {
|
||||
auto sign = ~b & F64_SIGN;
|
||||
auto r = sign | (a & ~F64_SIGN);
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_fsgnjx_s(uint32_t a, uint32_t b) {
|
||||
auto sign1 = a & F32_SIGN;
|
||||
auto sign2 = b & F32_SIGN;
|
||||
auto r = (sign1 ^ sign2) | (a & ~F32_SIGN);
|
||||
return r;
|
||||
}
|
||||
|
||||
uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b) {
|
||||
auto sign1 = a & F64_SIGN;
|
||||
auto sign2 = b & F64_SIGN;
|
||||
auto r = (sign1 ^ sign2) | (a & ~F64_SIGN);
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_dtof(uint64_t a) {
|
||||
auto r = f64_to_f32(to_float64_t(a));
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_ftod(uint32_t a) {
|
||||
auto r = f32_to_f64(to_float32_t(a));
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -7,32 +7,73 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint32_t rv_fadd(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fsub(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fmul(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fmadd(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fmsub(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fnmadd(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fnmsub(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fadd_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fsub_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fmul_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fnmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fnmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fdiv_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
|
||||
uint32_t rv_fdiv(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fsqrt(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_ftoi_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_ftou_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_itof_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_utof_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
|
||||
uint32_t rv_ftoi(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_ftou(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_itof(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_utof(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_ftol_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_ftolu_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_ltof_s(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_lutof_s(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
|
||||
uint32_t rv_fclss(uint32_t a);
|
||||
uint32_t rv_fsgnj(uint32_t a, uint32_t b);
|
||||
uint32_t rv_fsgnjn(uint32_t a, uint32_t b);
|
||||
uint32_t rv_fsgnjx(uint32_t a, uint32_t b);
|
||||
uint32_t rv_fclss_s(uint32_t a);
|
||||
|
||||
uint32_t rv_flt(uint32_t a, uint32_t b, uint32_t* fflags);
|
||||
uint32_t rv_fle(uint32_t a, uint32_t b, uint32_t* fflags);
|
||||
uint32_t rv_feq(uint32_t a, uint32_t b, uint32_t* fflags);
|
||||
uint32_t rv_fmin(uint32_t a, uint32_t b, uint32_t* fflags);
|
||||
uint32_t rv_fmax(uint32_t a, uint32_t b, uint32_t* fflags);
|
||||
uint32_t rv_fsgnj_s(uint32_t a, uint32_t b);
|
||||
uint32_t rv_fsgnjn_s(uint32_t a, uint32_t b);
|
||||
uint32_t rv_fsgnjx_s(uint32_t a, uint32_t b);
|
||||
|
||||
bool rv_flt_s(uint32_t a, uint32_t b, uint32_t* fflags);
|
||||
bool rv_fle_s(uint32_t a, uint32_t b, uint32_t* fflags);
|
||||
bool rv_feq_s(uint32_t a, uint32_t b, uint32_t* fflags);
|
||||
uint32_t rv_fmin_s(uint32_t a, uint32_t b, uint32_t* fflags);
|
||||
uint32_t rv_fmax_s(uint32_t a, uint32_t b, uint32_t* fflags);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
uint64_t rv_fadd_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_fsub_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_fmul_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_fsqrt_d(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
|
||||
uint64_t rv_fmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_fmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_fnmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_fnmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags);
|
||||
|
||||
uint32_t rv_ftoi_d(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_ftou_d(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_ftol_d(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_ftolu_d(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_itof_d(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_utof_d(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_ltof_d(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_lutof_d(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
|
||||
uint32_t rv_fclss_d(uint64_t a);
|
||||
uint64_t rv_fsgnj_d(uint64_t a, uint64_t b);
|
||||
uint64_t rv_fsgnjn_d(uint64_t a, uint64_t b);
|
||||
uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b);
|
||||
|
||||
bool rv_flt_d(uint64_t a, uint64_t b, uint32_t* fflags);
|
||||
bool rv_fle_d(uint64_t a, uint64_t b, uint32_t* fflags);
|
||||
bool rv_feq_d(uint64_t a, uint64_t b, uint32_t* fflags);
|
||||
uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags);
|
||||
uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags);
|
||||
|
||||
uint32_t rv_dtof(uint64_t a);
|
||||
uint64_t rv_ftod(uint32_t a);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ inline void Unpack8888(TexFormat format,
|
||||
uint32_t texel,
|
||||
uint32_t* lo,
|
||||
uint32_t* hi) {
|
||||
int r, g, b, a;
|
||||
uint32_t r, g, b, a;
|
||||
switch (format) {
|
||||
case TexFormat::A8R8G8B8:
|
||||
r = (texel >> 16) & 0xff;
|
||||
|
||||
@@ -29,6 +29,11 @@ else
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
# XLEN parameterization
|
||||
ifdef XLEN
|
||||
CXXFLAGS += -DXLEN=$(XLEN)
|
||||
endif
|
||||
|
||||
PROJECT = simx
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
@@ -43,4 +48,4 @@ $(DESTDIR)/lib$(PROJECT).so: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so
|
||||
rm -rf obj_dir $(DESTDIR)/$(PROJECT) $(DESTDIR)/lib$(PROJECT).so
|
||||
|
||||
@@ -21,8 +21,7 @@ private:
|
||||
uint16_t num_barriers_;
|
||||
|
||||
public:
|
||||
ArchDef(const std::string& /*arch*/,
|
||||
uint16_t num_cores,
|
||||
ArchDef(uint16_t num_cores,
|
||||
uint16_t num_warps,
|
||||
uint16_t num_threads)
|
||||
: num_cores_(num_cores)
|
||||
|
||||
@@ -488,11 +488,11 @@ private:
|
||||
} else {
|
||||
bool hit = false;
|
||||
bool found_free_block = false;
|
||||
int hit_block_id = 0;
|
||||
int repl_block_id = 0;
|
||||
uint32_t hit_block_id = 0;
|
||||
uint32_t repl_block_id = 0;
|
||||
uint32_t max_cnt = 0;
|
||||
|
||||
for (int i = 0, n = set.blocks.size(); i < n; ++i) {
|
||||
for (uint32_t i = 0, n = set.blocks.size(); i < n; ++i) {
|
||||
auto& block = set.blocks.at(i);
|
||||
if (block.valid) {
|
||||
if (block.tag == pipeline_req.tag) {
|
||||
|
||||
@@ -16,7 +16,7 @@ namespace vortex {
|
||||
|
||||
enum Constants {
|
||||
|
||||
SMEM_BANK_OFFSET = log2ceil(sizeof(Word)) + log2ceil(STACK_SIZE / sizeof(Word)),
|
||||
SMEM_BANK_OFFSET = log2ceil(sizeof(uint32_t)) + log2ceil(STACK_SIZE / sizeof(uint32_t)),
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Core::Core(const SimContext& ctx, const ArchDef &arch, Word id)
|
||||
Core::Core(const SimContext& ctx, const ArchDef &arch, uint32_t id)
|
||||
: SimObject(ctx, "Core")
|
||||
, MemRspPort(this)
|
||||
, MemReqPort(this)
|
||||
@@ -73,7 +73,7 @@ Core::Core(const SimContext& ctx, const ArchDef &arch, Word id)
|
||||
, decode_latch_("decode")
|
||||
, pending_icache_(arch_.num_warps())
|
||||
{
|
||||
for (int i = 0; i < arch_.num_warps(); ++i) {
|
||||
for (uint32_t i = 0; i < arch_.num_warps(); ++i) {
|
||||
warps_.at(i) = std::make_shared<Warp>(this, i);
|
||||
}
|
||||
|
||||
@@ -195,7 +195,7 @@ void Core::tick() {
|
||||
|
||||
void Core::schedule() {
|
||||
bool foundSchedule = false;
|
||||
int scheduled_warp = last_schedule_wid_;
|
||||
uint32_t scheduled_warp = last_schedule_wid_;
|
||||
|
||||
// round robin scheduling
|
||||
for (size_t wid = 0, nw = arch_.num_warps(); wid < nw; ++wid) {
|
||||
@@ -367,11 +367,11 @@ void Core::commit() {
|
||||
}
|
||||
}
|
||||
|
||||
WarpMask Core::wspawn(int num_warps, int nextPC) {
|
||||
WarpMask Core::wspawn(uint32_t num_warps, uint32_t nextPC) {
|
||||
WarpMask ret(1);
|
||||
int active_warps = std::min<int>(num_warps, arch_.num_warps());
|
||||
uint32_t active_warps = std::min<uint32_t>(num_warps, arch_.num_warps());
|
||||
DP(3, "*** Activate " << (active_warps-1) << " warps at PC: " << std::hex << nextPC);
|
||||
for (int i = 1; i < active_warps; ++i) {
|
||||
for (uint32_t i = 1; i < active_warps; ++i) {
|
||||
auto warp = warps_.at(i);
|
||||
warp->setPC(nextPC);
|
||||
warp->setTmask(0, true);
|
||||
@@ -380,7 +380,7 @@ WarpMask Core::wspawn(int num_warps, int nextPC) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
WarpMask Core::barrier(int bar_id, int count, int warp_id) {
|
||||
WarpMask Core::barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id) {
|
||||
WarpMask ret(0);
|
||||
auto& barrier = barriers_.at(bar_id);
|
||||
barrier.set(warp_id);
|
||||
@@ -389,7 +389,7 @@ WarpMask Core::barrier(int bar_id, int count, int warp_id) {
|
||||
DP(3, "*** Suspend warp #" << warp_id << " at barrier #" << bar_id);
|
||||
return ret;
|
||||
}
|
||||
for (int i = 0; i < arch_.num_warps(); ++i) {
|
||||
for (uint32_t i = 0; i < arch_.num_warps(); ++i) {
|
||||
if (barrier.test(i)) {
|
||||
DP(3, "*** Resume warp #" << i << " at barrier #" << bar_id);
|
||||
warps_.at(i)->activate();
|
||||
@@ -400,45 +400,45 @@ WarpMask Core::barrier(int bar_id, int count, int warp_id) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
Word Core::icache_read(Addr addr, Size size) {
|
||||
Word data;
|
||||
mmu_.read(&data, addr, size, 0);
|
||||
return data;
|
||||
void Core::icache_read(void *data, uint64_t addr, uint32_t size) {
|
||||
mmu_.read(data, addr, size, 0);
|
||||
}
|
||||
|
||||
Word Core::dcache_read(Addr addr, Size size) {
|
||||
Word data;
|
||||
void Core::dcache_read(void *data, uint64_t addr, uint32_t size) {
|
||||
auto type = get_addr_type(addr, size);
|
||||
if (type == AddrType::Shared) {
|
||||
smem_.read(&data, addr & (SMEM_SIZE-1), size);
|
||||
addr &= (SMEM_SIZE-1);
|
||||
smem_.read(data, addr, size);
|
||||
} else {
|
||||
mmu_.read(&data, addr, size, 0);
|
||||
mmu_.read(data, addr, size, 0);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
void Core::dcache_write(Addr addr, Word data, Size size) {
|
||||
void Core::dcache_write(const void* data, uint64_t addr, uint32_t size) {
|
||||
if (addr >= IO_COUT_ADDR
|
||||
&& addr < (IO_COUT_ADDR + IO_COUT_SIZE)) {
|
||||
this->writeToStdOut(addr, data);
|
||||
&& addr <= (IO_COUT_ADDR + IO_COUT_SIZE - 1)) {
|
||||
this->writeToStdOut(data, addr, size);
|
||||
} else {
|
||||
auto type = get_addr_type(addr, size);
|
||||
if (type == AddrType::Shared) {
|
||||
smem_.write(&data, addr & (SMEM_SIZE-1), size);
|
||||
addr &= (SMEM_SIZE-1);
|
||||
smem_.write(data, addr, size);
|
||||
} else {
|
||||
mmu_.write(&data, addr, size, 0);
|
||||
mmu_.write(data, addr, size, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Word Core::tex_read(uint32_t unit, Word u, Word v, Word lod, std::vector<mem_addr_size_t>* mem_addrs) {
|
||||
uint32_t Core::tex_read(uint32_t unit, uint32_t u, uint32_t v, uint32_t lod, std::vector<mem_addr_size_t>* mem_addrs) {
|
||||
return tex_units_.at(unit).read(u, v, lod, mem_addrs);
|
||||
}
|
||||
|
||||
void Core::writeToStdOut(Addr addr, Word data) {
|
||||
void Core::writeToStdOut(const void* data, uint64_t addr, uint32_t size) {
|
||||
if (size != 1)
|
||||
std::abort();
|
||||
uint32_t tid = (addr - IO_COUT_ADDR) & (IO_COUT_SIZE-1);
|
||||
auto& ss_buf = print_bufs_[tid];
|
||||
char c = (char)data;
|
||||
char c = *(char*)data;
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << tid << ": " << ss_buf.str() << std::flush;
|
||||
@@ -446,7 +446,7 @@ void Core::writeToStdOut(Addr addr, Word data) {
|
||||
}
|
||||
}
|
||||
|
||||
Word Core::get_csr(Addr addr, int tid, int wid) {
|
||||
uint32_t Core::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
||||
switch (addr) {
|
||||
case CSR_SATP:
|
||||
case CSR_PMPCFG0:
|
||||
@@ -502,13 +502,13 @@ Word Core::get_csr(Addr addr, int tid, int wid) {
|
||||
return perf_stats_.instrs & 0xffffffff;
|
||||
case CSR_MINSTRET_H:
|
||||
// NumInsts
|
||||
return (Word)(perf_stats_.instrs >> 32);
|
||||
return (uint32_t)(perf_stats_.instrs >> 32);
|
||||
case CSR_MCYCLE:
|
||||
// NumCycles
|
||||
return (Word)SimPlatform::instance().cycles();
|
||||
return (uint32_t)SimPlatform::instance().cycles();
|
||||
case CSR_MCYCLE_H:
|
||||
// NumCycles
|
||||
return (Word)(SimPlatform::instance().cycles() >> 32);
|
||||
return (uint32_t)(SimPlatform::instance().cycles() >> 32);
|
||||
case CSR_MPM_IBUF_ST:
|
||||
return perf_stats_.ibuf_stalls & 0xffffffff;
|
||||
case CSR_MPM_IBUF_ST_H:
|
||||
@@ -644,7 +644,7 @@ Word Core::get_csr(Addr addr, int tid, int wid) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Core::set_csr(Addr addr, Word value, int /*tid*/, int wid) {
|
||||
void Core::set_csr(uint32_t addr, uint32_t value, uint32_t /*tid*/, uint32_t wid) {
|
||||
if (addr == CSR_FFLAGS) {
|
||||
fcsrs_.at(wid) = (fcsrs_.at(wid) & ~0x1F) | (value & 0x1F);
|
||||
} else if (addr == CSR_FRM) {
|
||||
|
||||
@@ -68,7 +68,7 @@ public:
|
||||
SimPort<MemRsp> MemRspPort;
|
||||
SimPort<MemReq> MemReqPort;
|
||||
|
||||
Core(const SimContext& ctx, const ArchDef &arch, Word id);
|
||||
Core(const SimContext& ctx, const ArchDef &arch, uint32_t id);
|
||||
~Core();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
@@ -79,7 +79,7 @@ public:
|
||||
|
||||
void tick();
|
||||
|
||||
Word id() const {
|
||||
uint32_t id() const {
|
||||
return id_;
|
||||
}
|
||||
|
||||
@@ -95,25 +95,25 @@ public:
|
||||
return perf_stats_;
|
||||
}
|
||||
|
||||
Word getIRegValue(int reg) const {
|
||||
uint32_t getIRegValue(int reg) const {
|
||||
return warps_.at(0)->getIRegValue(reg);
|
||||
}
|
||||
|
||||
Word get_csr(Addr addr, int tid, int wid);
|
||||
uint32_t get_csr(uint32_t addr, uint32_t tid, uint32_t wid);
|
||||
|
||||
void set_csr(Addr addr, Word value, int tid, int wid);
|
||||
void set_csr(uint32_t addr, uint32_t value, uint32_t tid, uint32_t wid);
|
||||
|
||||
WarpMask wspawn(int num_warps, int nextPC);
|
||||
WarpMask wspawn(uint32_t num_warps, uint32_t nextPC);
|
||||
|
||||
WarpMask barrier(int bar_id, int count, int warp_id);
|
||||
WarpMask barrier(uint32_t bar_id, uint32_t count, uint32_t warp_id);
|
||||
|
||||
Word icache_read(Addr, Size);
|
||||
void icache_read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
Word dcache_read(Addr, Size);
|
||||
void dcache_read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
void dcache_write(Addr, Word, Size);
|
||||
void dcache_write(const void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
Word tex_read(uint32_t unit, Word lod, Word u, Word v, std::vector<mem_addr_size_t>* mem_addrs);
|
||||
uint32_t tex_read(uint32_t unit, uint32_t lod, uint32_t u, uint32_t v, std::vector<mem_addr_size_t>* mem_addrs);
|
||||
|
||||
void trigger_ecall();
|
||||
|
||||
@@ -129,11 +129,11 @@ private:
|
||||
void execute();
|
||||
void commit();
|
||||
|
||||
void writeToStdOut(Addr addr, Word data);
|
||||
void writeToStdOut(const void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
void cout_flush();
|
||||
|
||||
Word id_;
|
||||
uint32_t id_;
|
||||
const ArchDef arch_;
|
||||
const Decoder decoder_;
|
||||
MemoryUnit mmu_;
|
||||
@@ -142,7 +142,7 @@ private:
|
||||
|
||||
std::vector<std::shared_ptr<Warp>> warps_;
|
||||
std::vector<WarpMask> barriers_;
|
||||
std::vector<Word> csrs_;
|
||||
std::vector<uint32_t> csrs_;
|
||||
std::vector<Byte> fcsrs_;
|
||||
std::vector<IBuffer> ibuffers_;
|
||||
Scoreboard scoreboard_;
|
||||
|
||||
@@ -19,7 +19,7 @@ struct InstTableEntry_t {
|
||||
InstType iType;
|
||||
};
|
||||
|
||||
static const std::unordered_map<int, struct InstTableEntry_t> sc_instTable = {
|
||||
static const std::unordered_map<Opcode, struct InstTableEntry_t> sc_instTable = {
|
||||
{Opcode::NOP, {false, InstType::N_TYPE}},
|
||||
{Opcode::R_INST, {false, InstType::R_TYPE}},
|
||||
{Opcode::L_INST, {false, InstType::I_TYPE}},
|
||||
@@ -42,15 +42,54 @@ static const std::unordered_map<int, struct InstTableEntry_t> sc_instTable = {
|
||||
{Opcode::VSET, {false, InstType::V_TYPE}},
|
||||
{Opcode::GPGPU, {false, InstType::R_TYPE}},
|
||||
{Opcode::GPU, {false, InstType::R4_TYPE}},
|
||||
{Opcode::R_INST_W, {false, InstType::R_TYPE}},
|
||||
{Opcode::I_INST_W, {false, InstType::I_TYPE}},
|
||||
};
|
||||
|
||||
enum Constants {
|
||||
width_opcode= 7,
|
||||
width_reg = 5,
|
||||
width_func2 = 2,
|
||||
width_func3 = 3,
|
||||
width_func6 = 6,
|
||||
width_func7 = 7,
|
||||
width_mop = 3,
|
||||
width_vmask = 1,
|
||||
width_i_imm = 12,
|
||||
width_j_imm = 20,
|
||||
width_v_imm = 11,
|
||||
|
||||
shift_opcode= 0,
|
||||
shift_rd = width_opcode,
|
||||
shift_func3 = shift_rd + width_reg,
|
||||
shift_rs1 = shift_func3 + width_func3,
|
||||
shift_rs2 = shift_rs1 + width_reg,
|
||||
shift_func2 = shift_rs2 + width_reg,
|
||||
shift_func7 = shift_rs2 + width_reg,
|
||||
shift_rs3 = shift_func7 + width_func2,
|
||||
shift_vmop = shift_func7 + width_vmask,
|
||||
shift_vnf = shift_vmop + width_mop,
|
||||
shift_func6 = shift_func7 + width_vmask,
|
||||
shift_vset = shift_func7 + width_func6,
|
||||
|
||||
mask_opcode = (1<<width_opcode)-1,
|
||||
mask_reg = (1<<width_reg)-1,
|
||||
mask_func2 = (1<<width_func2)-1,
|
||||
mask_func3 = (1<<width_func3)-1,
|
||||
mask_func6 = (1<<width_func6)-1,
|
||||
mask_func7 = (1<<width_func7)-1,
|
||||
mask_i_imm = (1<<width_i_imm)-1,
|
||||
mask_j_imm = (1<<width_j_imm)-1,
|
||||
mask_v_imm = (1<<width_v_imm)-1,
|
||||
};
|
||||
|
||||
static const char* op_string(const Instr &instr) {
|
||||
auto opcode = instr.getOpcode();
|
||||
Word func2 = instr.getFunc2();
|
||||
Word func3 = instr.getFunc3();
|
||||
Word func7 = instr.getFunc7();
|
||||
Word rs2 = instr.getRSrc(1);
|
||||
Word imm = instr.getImm();
|
||||
auto func2 = instr.getFunc2();
|
||||
auto func3 = instr.getFunc3();
|
||||
auto func7 = instr.getFunc7();
|
||||
auto rs2 = instr.getRSrc(1);
|
||||
auto imm = instr.getImm();
|
||||
|
||||
switch (opcode) {
|
||||
case Opcode::NOP: return "NOP";
|
||||
@@ -115,8 +154,10 @@ static const char* op_string(const Instr &instr) {
|
||||
case 0: return "LBI";
|
||||
case 1: return "LHI";
|
||||
case 2: return "LW";
|
||||
case 3: return "LD";
|
||||
case 4: return "LBU";
|
||||
case 5: return "LHU";
|
||||
case 6: return "LWU";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
@@ -125,9 +166,38 @@ static const char* op_string(const Instr &instr) {
|
||||
case 0: return "SB";
|
||||
case 1: return "SH";
|
||||
case 2: return "SW";
|
||||
case 3: return "SD";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::R_INST_W:
|
||||
if (func7 & 0x1){
|
||||
switch (func3) {
|
||||
case 0: return "MULW";
|
||||
case 4: return "DIVW";
|
||||
case 5: return "DIVUW";
|
||||
case 6: return "REMW";
|
||||
case 7: return "REMUW";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
} else {
|
||||
switch (func3) {
|
||||
case 0: return func7 ? "SUBW" : "ADDW";
|
||||
case 1: return "SLLW";
|
||||
case 5: return func7 ? "SRAW" : "SRLW";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
case Opcode::I_INST_W:
|
||||
switch (func3) {
|
||||
case 0: return "ADDIW";
|
||||
case 1: return "SLLIW";
|
||||
case 5: return func7 ? "SRAIW" : "SRLIW";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::SYS_INST:
|
||||
switch (func3) {
|
||||
case 0:
|
||||
@@ -150,49 +220,129 @@ static const char* op_string(const Instr &instr) {
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::FENCE: return "FENCE";
|
||||
case Opcode::FL: return (func3 == 0x2) ? "FL" : "VL";
|
||||
case Opcode::FS: return (func3 == 0x2) ? "FS" : "VS";
|
||||
case Opcode::FL:
|
||||
switch (func3) {
|
||||
case 0x1: return "VL";
|
||||
case 0x2: return "FLW";
|
||||
case 0x3: return "FLD";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::FS:
|
||||
switch (func3) {
|
||||
case 0x1: return "VS";
|
||||
case 0x2: return "FSW";
|
||||
case 0x3: return "FSD";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::FCI:
|
||||
switch (func7) {
|
||||
case 0x00: return "FADD";
|
||||
case 0x04: return "FSUB";
|
||||
case 0x08: return "FMUL";
|
||||
case 0x0c: return "FDIV";
|
||||
case 0x2c: return "FSQRT";
|
||||
case 0x00: return "FADD.S";
|
||||
case 0x01: return "FADD.D";
|
||||
case 0x04: return "FSUB.S";
|
||||
case 0x05: return "FSUB.D";
|
||||
case 0x08: return "FMUL.S";
|
||||
case 0x09: return "FMUL.D";
|
||||
case 0x0c: return "FDIV.S";
|
||||
case 0x0d: return "FDIV.D";
|
||||
case 0x2c: return "FSQRT.S";
|
||||
case 0x2d: return "FSQRT.D";
|
||||
case 0x10:
|
||||
switch (func3) {
|
||||
case 0: return "FSGNJ";
|
||||
case 1: return "FSGNJN";
|
||||
case 2: return "FSGNJX";
|
||||
case 0: return "FSGNJ.S";
|
||||
case 1: return "FSGNJN.S";
|
||||
case 2: return "FSGNJX.S";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x11:
|
||||
switch (func3) {
|
||||
case 0: return "FSGNJ.D";
|
||||
case 1: return "FSGNJN.D";
|
||||
case 2: return "FSGNJX.D";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x14:
|
||||
switch (func3) {
|
||||
case 0: return "FMIM";
|
||||
case 1: return "FMAX";
|
||||
case 0: return "FMIN.S";
|
||||
case 1: return "FMAX.S";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x15:
|
||||
switch (func3) {
|
||||
case 0: return "FMIN.D";
|
||||
case 1: return "FMAX.D";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x20: return "FCVT.S.D";
|
||||
case 0x21: return "FCVT.D.S";
|
||||
case 0x50:
|
||||
switch (func3) {
|
||||
case 0: return "FLE";
|
||||
case 1: return "FLT";
|
||||
case 2: return "FEQ";
|
||||
case 0: return "FLE.S";
|
||||
case 1: return "FLT.S";
|
||||
case 2: return "FEQ.S";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x60: return rs2 ? "FCVT.WU.S" : "FCVT.W.S";
|
||||
case 0x68: return rs2 ? "FCVT.S.WU" : "FCVT.S.W";
|
||||
case 0x70: return func3 ? "FLASS" : "FMV.X.W";
|
||||
case 0x51:
|
||||
switch (func3) {
|
||||
case 0: return "FLE.D";
|
||||
case 1: return "FLT.D";
|
||||
case 2: return "FEQ.D";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x60:
|
||||
switch (rs2) {
|
||||
case 0: return "FCVT.W.S";
|
||||
case 1: return "FCVT.WU.S";
|
||||
case 2: return "FCVT.L.S";
|
||||
case 3: return "FCVT.LU.S";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x61:
|
||||
switch (rs2) {
|
||||
case 0: return "FCVT.W.D";
|
||||
case 1: return "FCVT.WU.D";
|
||||
case 2: return "FCVT.L.D";
|
||||
case 3: return "FCVT.LU.D";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x68:
|
||||
switch (rs2) {
|
||||
case 0: return "FCVT.S.W";
|
||||
case 1: return "FCVT.S.WU";
|
||||
case 2: return "FCVT.S.L";
|
||||
case 3: return "FCVT.S.LU";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x69:
|
||||
switch (rs2) {
|
||||
case 0: return "FCVT.D.W";
|
||||
case 1: return "FCVT.D.WU";
|
||||
case 2: return "FCVT.D.L";
|
||||
case 3: return "FCVT.D.LU";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case 0x70: return func3 ? "FCLASS.S" : "FMV.X.W";
|
||||
case 0x71: return func3 ? "FCLASS.D" : "FMV.X.D";
|
||||
case 0x78: return "FMV.W.X";
|
||||
case 0x79: return "FMV.D.X";
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::FMADD: return "FMADD";
|
||||
case Opcode::FMSUB: return "FMSUB";
|
||||
case Opcode::FMNMADD: return "FMNMADD";
|
||||
case Opcode::FMNMSUB: return "FMNMSUB";
|
||||
case Opcode::FMADD: return func2 ? "FMADD.D" : "FMADD.S";
|
||||
case Opcode::FMSUB: return func2 ? "FMSUB.D" : "FMSUB.S";
|
||||
case Opcode::FMNMADD: return func2 ? "FNMADD.D" : "FNMADD.S";
|
||||
case Opcode::FMNMSUB: return func2 ? "FNMSUB.D" : "FNMSUB.S";
|
||||
case Opcode::VSET: return "VSET";
|
||||
case Opcode::GPGPU:
|
||||
switch (func3) {
|
||||
@@ -226,8 +376,8 @@ static const char* op_string(const Instr &instr) {
|
||||
namespace vortex {
|
||||
std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
||||
auto opcode = instr.getOpcode();
|
||||
Word func2 = instr.getFunc2();
|
||||
Word func3 = instr.getFunc3();
|
||||
auto func2 = instr.getFunc2();
|
||||
auto func3 = instr.getFunc3();
|
||||
|
||||
os << op_string(instr) << ": ";
|
||||
|
||||
@@ -244,7 +394,7 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
||||
if (instr.getRDType() != RegType::None) {
|
||||
os << instr.getRDType() << std::dec << instr.getRDest() << " <- ";
|
||||
}
|
||||
int i = 0;
|
||||
uint32_t i = 0;
|
||||
for (; i < instr.getNRSrc(); ++i) {
|
||||
if (i) os << ", ";
|
||||
os << instr.getRSType(i) << std::dec << instr.getRSrc(i);
|
||||
@@ -261,56 +411,22 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
||||
}
|
||||
}
|
||||
|
||||
Decoder::Decoder(const ArchDef &arch) {
|
||||
inst_s_ = arch.wsize() * 8;
|
||||
opcode_s_ = 7;
|
||||
reg_s_ = 5;
|
||||
func2_s_ = 2;
|
||||
func3_s_ = 3;
|
||||
mop_s_ = 3;
|
||||
vmask_s_ = 1;
|
||||
Decoder::Decoder(const ArchDef&) {}
|
||||
|
||||
shift_opcode_ = 0;
|
||||
shift_rd_ = opcode_s_;
|
||||
shift_func3_ = shift_rd_ + reg_s_;
|
||||
shift_rs1_ = shift_func3_ + func3_s_;
|
||||
shift_rs2_ = shift_rs1_ + reg_s_;
|
||||
shift_func2_ = shift_rs2_ + reg_s_;
|
||||
shift_func7_ = shift_rs2_ + reg_s_;
|
||||
shift_rs3_ = shift_func7_ + func2_s_;
|
||||
shift_vmop_ = shift_func7_ + vmask_s_;
|
||||
shift_vnf_ = shift_vmop_ + mop_s_;
|
||||
shift_func6_ = shift_func7_ + 1;
|
||||
shift_vset_ = shift_func7_ + 6;
|
||||
|
||||
reg_mask_ = 0x1f;
|
||||
func2_mask_ = 0x3;
|
||||
func3_mask_ = 0x7;
|
||||
func6_mask_ = 0x3f;
|
||||
func7_mask_ = 0x7f;
|
||||
opcode_mask_ = 0x7f;
|
||||
i_imm_mask_ = 0xfff;
|
||||
s_imm_mask_ = 0xfff;
|
||||
b_imm_mask_ = 0x1fff;
|
||||
u_imm_mask_ = 0xfffff;
|
||||
j_imm_mask_ = 0xfffff;
|
||||
v_imm_mask_ = 0x7ff;
|
||||
}
|
||||
|
||||
std::shared_ptr<Instr> Decoder::decode(Word code) const {
|
||||
std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
|
||||
auto instr = std::make_shared<Instr>();
|
||||
Opcode op = (Opcode)((code >> shift_opcode_) & opcode_mask_);
|
||||
auto op = Opcode((code >> shift_opcode) & mask_opcode);
|
||||
instr->setOpcode(op);
|
||||
|
||||
Word func2 = (code >> shift_func2_) & func2_mask_;
|
||||
Word func3 = (code >> shift_func3_) & func3_mask_;
|
||||
Word func6 = (code >> shift_func6_) & func6_mask_;
|
||||
Word func7 = (code >> shift_func7_) & func7_mask_;
|
||||
auto func2 = (code >> shift_func2) & mask_func2;
|
||||
auto func3 = (code >> shift_func3) & mask_func3;
|
||||
auto func6 = (code >> shift_func6) & mask_func6;
|
||||
auto func7 = (code >> shift_func7) & mask_func7;
|
||||
|
||||
int rd = (code >> shift_rd_) & reg_mask_;
|
||||
int rs1 = (code >> shift_rs1_) & reg_mask_;
|
||||
int rs2 = (code >> shift_rs2_) & reg_mask_;
|
||||
int rs3 = (code >> shift_rs3_) & reg_mask_;
|
||||
auto rd = (code >> shift_rd) & mask_reg;
|
||||
auto rs1 = (code >> shift_rs1) & mask_reg;
|
||||
auto rs2 = (code >> shift_rs2) & mask_reg;
|
||||
auto rs3 = (code >> shift_rs3) & mask_reg;
|
||||
|
||||
auto op_it = sc_instTable.find(op);
|
||||
if (op_it == sc_instTable.end()) {
|
||||
@@ -320,7 +436,7 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
|
||||
|
||||
auto iType = op_it->second.iType;
|
||||
if (op == Opcode::FL || op == Opcode::FS) {
|
||||
if (func3 != 0x2) {
|
||||
if (func3 != 0x2 && func3 != 0x3) {
|
||||
iType = InstType::V_TYPE;
|
||||
}
|
||||
}
|
||||
@@ -330,40 +446,57 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
|
||||
break;
|
||||
|
||||
case InstType::R_TYPE:
|
||||
if (op == Opcode::FCI) {
|
||||
switch (func7) {
|
||||
case 0x68: // FCVT.S.W, FCVT.S.WU
|
||||
if (op == Opcode::FCI) {
|
||||
switch (func7) {
|
||||
case 0x50: // FLE.S, FLT.S, FEQ.S
|
||||
case 0x51: // FLE.D, FLT.D, FEQ.D
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
break;
|
||||
case 0x60: // FCVT.W.D, FCVT.WU.D, FCVT.L.D, FCVT.LU.D
|
||||
case 0x61: // FCVT.WU.S, FCVT.W.S, FCVT.L.S, FCVT.LU.S
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
break;
|
||||
case 0x68: // FCVT.S.W, FCVT.S.WU, FCVT.S.L, FCVT.S.LU
|
||||
case 0x69: // FCVT.D.W, FCVT.D.WU, FCVT.D.L, FCVT.D.LU
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
break;
|
||||
case 0x70: // FCLASS.S, FMV.X.W
|
||||
case 0x71: // FCLASS.D, FMV.X.D
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
break;
|
||||
case 0x78: // FMV.W.X
|
||||
instr->setSrcReg(rs1);
|
||||
case 0x79: // FMV.D.X
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
break;
|
||||
default:
|
||||
instr->setSrcFReg(rs1);
|
||||
}
|
||||
instr->setSrcFReg(rs2);
|
||||
switch (func7) {
|
||||
case 0x50: // FLE, FLT, FEQ
|
||||
case 0x60: // FCVT.WU.S, FCVT.W.S
|
||||
case 0x70: // FLASS, FMV.X.W
|
||||
instr->setDestReg(rd);
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
break;
|
||||
default:
|
||||
instr->setDestFReg(rd);
|
||||
}
|
||||
} else {
|
||||
instr->setDestReg(rd);
|
||||
instr->setSrcReg(rs1);
|
||||
instr->setSrcReg(rs2);
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
}
|
||||
instr->setFunc3(func3);
|
||||
instr->setFunc7(func7);
|
||||
break;
|
||||
|
||||
case InstType::I_TYPE: {
|
||||
instr->setSrcReg(rs1);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
if (op == Opcode::FL) {
|
||||
instr->setDestFReg(rd);
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
} else {
|
||||
instr->setDestReg(rd);
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
}
|
||||
instr->setFunc3(func3);
|
||||
instr->setFunc7(func7);
|
||||
@@ -371,64 +504,71 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
|
||||
case Opcode::SYS_INST:
|
||||
case Opcode::FENCE:
|
||||
// uint12
|
||||
instr->setImm(code >> shift_rs2_);
|
||||
instr->setImm(code >> shift_rs2);
|
||||
break;
|
||||
case Opcode::I_INST:
|
||||
case Opcode::I_INST_W:
|
||||
if (func3 == 0x1 || func3 == 0x5) {
|
||||
// int5
|
||||
instr->setImm(sext32(rs2, 5));
|
||||
auto shamt = rs2; // uint5
|
||||
#if (XLEN == 64)
|
||||
if (op == Opcode::I_INST) {
|
||||
// uint6
|
||||
shamt |= ((func7 & 0x1) << 5);
|
||||
}
|
||||
#endif
|
||||
instr->setImm(shamt);
|
||||
} else {
|
||||
// int12
|
||||
instr->setImm(sext32(code >> shift_rs2_, 12));
|
||||
auto imm = code >> shift_rs2;
|
||||
instr->setImm(sext(imm, width_i_imm));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// int12
|
||||
instr->setImm(sext32(code >> shift_rs2_, 12));
|
||||
auto imm = code >> shift_rs2;
|
||||
instr->setImm(sext(imm, width_i_imm));
|
||||
break;
|
||||
}
|
||||
} break;
|
||||
case InstType::S_TYPE: {
|
||||
instr->setSrcReg(rs1);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
if (op == Opcode::FS) {
|
||||
instr->setSrcFReg(rs2);
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
} else {
|
||||
instr->setSrcReg(rs2);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
}
|
||||
instr->setFunc3(func3);
|
||||
Word imm = (func7 << reg_s_) | rd;
|
||||
instr->setImm(sext32(imm, 12));
|
||||
auto imm = (func7 << width_reg) | rd;
|
||||
instr->setImm(sext(imm, width_i_imm));
|
||||
} break;
|
||||
|
||||
case InstType::B_TYPE: {
|
||||
instr->setSrcReg(rs1);
|
||||
instr->setSrcReg(rs2);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->setFunc3(func3);
|
||||
Word bit_11 = rd & 0x1;
|
||||
Word bits_4_1 = rd >> 1;
|
||||
Word bit_10_5 = func7 & 0x3f;
|
||||
Word bit_12 = func7 >> 6;
|
||||
Word imm = (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
|
||||
instr->setImm(sext32(imm, 13));
|
||||
auto bit_11 = rd & 0x1;
|
||||
auto bits_4_1 = rd >> 1;
|
||||
auto bit_10_5 = func7 & 0x3f;
|
||||
auto bit_12 = func7 >> 6;
|
||||
auto imm = (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
|
||||
instr->setImm(sext(imm, width_i_imm+1));
|
||||
} break;
|
||||
|
||||
case InstType::U_TYPE:
|
||||
instr->setDestReg(rd);
|
||||
instr->setImm(sext32(code >> shift_func3_, 20));
|
||||
break;
|
||||
case InstType::U_TYPE: {
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
auto imm = code >> shift_func3;
|
||||
instr->setImm(sext(imm, width_j_imm));
|
||||
} break;
|
||||
|
||||
case InstType::J_TYPE: {
|
||||
instr->setDestReg(rd);
|
||||
Word unordered = code >> shift_func3_;
|
||||
Word bits_19_12 = unordered & 0xff;
|
||||
Word bit_11 = (unordered >> 8) & 0x1;
|
||||
Word bits_10_1 = (unordered >> 9) & 0x3ff;
|
||||
Word bit_20 = (unordered >> 19) & 0x1;
|
||||
Word imm = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
|
||||
if (bit_20) {
|
||||
imm |= ~j_imm_mask_;
|
||||
}
|
||||
instr->setImm(imm);
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
auto unordered = code >> shift_func3;
|
||||
auto bits_19_12 = unordered & 0xff;
|
||||
auto bit_11 = (unordered >> 8) & 0x1;
|
||||
auto bits_10_1 = (unordered >> 9) & 0x3ff;
|
||||
auto bit_20 = (unordered >> 19) & 0x1;
|
||||
auto imm = (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
|
||||
instr->setImm(sext(imm, width_j_imm+1));
|
||||
} break;
|
||||
|
||||
case InstType::V_TYPE:
|
||||
@@ -438,9 +578,9 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->setFunc3(func3);
|
||||
if (func3 == 7) {
|
||||
instr->setImm(!(code >> shift_vset_));
|
||||
instr->setImm(!(code >> shift_vset));
|
||||
if (instr->getImm()) {
|
||||
Word immed = (code >> shift_rs2_) & v_imm_mask_;
|
||||
auto immed = (code >> shift_rs2) & mask_v_imm;
|
||||
instr->setImm(immed);
|
||||
instr->setVlmul(immed & 0x3);
|
||||
instr->setVediv((immed >> 4) & 0x3);
|
||||
@@ -450,7 +590,7 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
|
||||
}
|
||||
} else {
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->setVmask((code >> shift_func7_) & 0x1);
|
||||
instr->setVmask((code >> shift_func7) & 0x1);
|
||||
instr->setFunc6(func6);
|
||||
}
|
||||
} break;
|
||||
@@ -460,9 +600,9 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->setVlsWidth(func3);
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->setVmask(code >> shift_func7_);
|
||||
instr->setVmop((code >> shift_vmop_) & func3_mask_);
|
||||
instr->setVnf((code >> shift_vnf_) & func3_mask_);
|
||||
instr->setVmask(code >> shift_func7);
|
||||
instr->setVmop((code >> shift_vmop) & mask_func3);
|
||||
instr->setVnf((code >> shift_vnf) & mask_func3);
|
||||
break;
|
||||
|
||||
case Opcode::FS:
|
||||
@@ -470,9 +610,9 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
|
||||
instr->setSrcVReg(rs1);
|
||||
instr->setVlsWidth(func3);
|
||||
instr->setSrcVReg(rs2);
|
||||
instr->setVmask(code >> shift_func7_);
|
||||
instr->setVmop((code >> shift_vmop_) & func3_mask_);
|
||||
instr->setVnf((code >> shift_vnf_) & func3_mask_);
|
||||
instr->setVmask(code >> shift_func7);
|
||||
instr->setVmop((code >> shift_vmop) & mask_func3);
|
||||
instr->setVnf((code >> shift_vnf) & mask_func3);
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -481,15 +621,15 @@ std::shared_ptr<Instr> Decoder::decode(Word code) const {
|
||||
break;
|
||||
case R4_TYPE:
|
||||
if (op == Opcode::GPU) {
|
||||
instr->setDestReg(rd);
|
||||
instr->setSrcReg(rs1);
|
||||
instr->setSrcReg(rs2);
|
||||
instr->setSrcReg(rs3);
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setSrcReg(rs1, RegType::Integer);
|
||||
instr->setSrcReg(rs2, RegType::Integer);
|
||||
instr->setSrcReg(rs3, RegType::Integer);
|
||||
} else {
|
||||
instr->setDestFReg(rd);
|
||||
instr->setSrcFReg(rs1);
|
||||
instr->setSrcFReg(rs2);
|
||||
instr->setSrcFReg(rs3);
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->setSrcReg(rs1, RegType::Float);
|
||||
instr->setSrcReg(rs2, RegType::Float);
|
||||
instr->setSrcReg(rs3, RegType::Float);
|
||||
}
|
||||
instr->setFunc2(func2);
|
||||
instr->setFunc3(func3);
|
||||
|
||||
@@ -7,55 +7,12 @@ namespace vortex {
|
||||
|
||||
class ArchDef;
|
||||
class Instr;
|
||||
class Pipeline;
|
||||
|
||||
class Decoder {
|
||||
public:
|
||||
Decoder(const ArchDef &);
|
||||
|
||||
std::shared_ptr<Instr> decode(Word code) const;
|
||||
|
||||
private:
|
||||
|
||||
Word inst_s_;
|
||||
Word opcode_s_;
|
||||
Word reg_s_;
|
||||
Word func2_s_;
|
||||
Word func3_s_;
|
||||
Word shift_opcode_;
|
||||
Word shift_rd_;
|
||||
Word shift_rs1_;
|
||||
Word shift_rs2_;
|
||||
Word shift_rs3_;
|
||||
Word shift_func2_;
|
||||
Word shift_func3_;
|
||||
Word shift_func7_;
|
||||
Word shift_j_u_immed_;
|
||||
Word shift_s_b_immed_;
|
||||
Word shift_i_immed_;
|
||||
|
||||
Word reg_mask_;
|
||||
Word func2_mask_;
|
||||
Word func3_mask_;
|
||||
Word func6_mask_;
|
||||
Word func7_mask_;
|
||||
Word opcode_mask_;
|
||||
Word i_imm_mask_;
|
||||
Word s_imm_mask_;
|
||||
Word b_imm_mask_;
|
||||
Word u_imm_mask_;
|
||||
Word j_imm_mask_;
|
||||
Word v_imm_mask_;
|
||||
|
||||
//Vector
|
||||
Word shift_vset_;
|
||||
Word shift_vset_immed_;
|
||||
Word shift_vmask_;
|
||||
Word shift_vmop_;
|
||||
Word shift_vnf_;
|
||||
Word shift_func6_;
|
||||
Word vmask_s_;
|
||||
Word mop_s_;
|
||||
std::shared_ptr<Instr> decode(uint32_t code) const;
|
||||
};
|
||||
|
||||
}
|
||||
1487
sim/simx/execute.cpp
1487
sim/simx/execute.cpp
File diff suppressed because it is too large
Load Diff
@@ -114,8 +114,8 @@ void LsuUnit::tick() {
|
||||
// duplicates detection
|
||||
bool is_dup = false;
|
||||
if (trace->tmask.test(0)) {
|
||||
uint64_t addr_mask = sizeof(Word)-1;
|
||||
Word addr0 = trace->mem_addrs.at(0).at(0).addr & ~addr_mask;
|
||||
uint64_t addr_mask = sizeof(uint32_t)-1;
|
||||
uint32_t addr0 = trace->mem_addrs.at(0).at(0).addr & ~addr_mask;
|
||||
uint32_t matches = 1;
|
||||
for (uint32_t t = 1; t < num_threads_; ++t) {
|
||||
if (!trace->tmask.test(t))
|
||||
|
||||
111
sim/simx/instr.h
111
sim/simx/instr.h
@@ -32,6 +32,9 @@ enum Opcode {
|
||||
// GPGPU Extension
|
||||
GPGPU = 0x6b,
|
||||
GPU = 0x5b,
|
||||
// RV64 Standard Extensions
|
||||
R_INST_W = 0x3b,
|
||||
I_INST_W = 0x1b,
|
||||
};
|
||||
|
||||
enum InstType {
|
||||
@@ -54,56 +57,54 @@ public:
|
||||
, has_imm_(false)
|
||||
, rdest_type_(RegType::None)
|
||||
, rdest_(0)
|
||||
, func2_(0)
|
||||
, func3_(0)
|
||||
, func6_(0)
|
||||
, func7_(0) {
|
||||
for (int i = 0; i < MAX_REG_SOURCES; ++i) {
|
||||
for (uint32_t i = 0; i < MAX_REG_SOURCES; ++i) {
|
||||
rsrc_type_[i] = RegType::None;
|
||||
}
|
||||
}
|
||||
|
||||
/* Setters used to "craft" the instruction. */
|
||||
void setOpcode(Opcode opcode) { opcode_ = opcode; }
|
||||
void setDestReg(int destReg) { rdest_type_ = RegType::Integer; rdest_ = destReg; }
|
||||
void setSrcReg(int srcReg) { rsrc_type_[num_rsrcs_] = RegType::Integer; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setDestFReg(int destReg) { rdest_type_ = RegType::Float; rdest_ = destReg; }
|
||||
void setSrcFReg(int srcReg) { rsrc_type_[num_rsrcs_] = RegType::Float; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setDestVReg(int destReg) { rdest_type_ = RegType::Vector; rdest_ = destReg; }
|
||||
void setSrcVReg(int srcReg) { rsrc_type_[num_rsrcs_] = RegType::Vector; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setFunc2(Word func2) { func2_ = func2; }
|
||||
void setFunc3(Word func3) { func3_ = func3; }
|
||||
void setFunc7(Word func7) { func7_ = func7; }
|
||||
void setImm(Word imm) { has_imm_ = true; imm_ = imm; }
|
||||
void setVlsWidth(Word width) { vlsWidth_ = width; }
|
||||
void setVmop(Word mop) { vMop_ = mop; }
|
||||
void setVnf(Word nf) { vNf_ = nf; }
|
||||
void setVmask(Word mask) { vmask_ = mask; }
|
||||
void setVs3(Word vs) { vs3_ = vs; }
|
||||
void setVlmul(Word lmul) { vlmul_ = 1 << lmul; }
|
||||
void setVsew(Word sew) { vsew_ = 1 << (3+sew); }
|
||||
void setVediv(Word ediv) { vediv_ = 1 << ediv; }
|
||||
void setFunc6(Word func6) { func6_ = func6; }
|
||||
void setDestReg(uint32_t destReg, RegType type) { rdest_type_ = type; rdest_ = destReg; }
|
||||
void setSrcReg(uint32_t srcReg, RegType type) { rsrc_type_[num_rsrcs_] = type; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setDestVReg(uint32_t destReg) { rdest_type_ = RegType::Vector; rdest_ = destReg; }
|
||||
void setSrcVReg(uint32_t srcReg) { rsrc_type_[num_rsrcs_] = RegType::Vector; rsrc_[num_rsrcs_++] = srcReg; }
|
||||
void setFunc2(uint32_t func2) { func2_ = func2; }
|
||||
void setFunc3(uint32_t func3) { func3_ = func3; }
|
||||
void setFunc7(uint32_t func7) { func7_ = func7; }
|
||||
void setImm(uint32_t imm) { has_imm_ = true; imm_ = imm; }
|
||||
void setVlsWidth(uint32_t width) { vlsWidth_ = width; }
|
||||
void setVmop(uint32_t mop) { vMop_ = mop; }
|
||||
void setVnf(uint32_t nf) { vNf_ = nf; }
|
||||
void setVmask(uint32_t mask) { vmask_ = mask; }
|
||||
void setVs3(uint32_t vs) { vs3_ = vs; }
|
||||
void setVlmul(uint32_t lmul) { vlmul_ = 1 << lmul; }
|
||||
void setVsew(uint32_t sew) { vsew_ = 1 << (3+sew); }
|
||||
void setVediv(uint32_t ediv) { vediv_ = 1 << ediv; }
|
||||
void setFunc6(uint32_t func6) { func6_ = func6; }
|
||||
|
||||
/* Getters used by encoders. */
|
||||
Opcode getOpcode() const { return opcode_; }
|
||||
Word getFunc2() const { return func2_; }
|
||||
Word getFunc3() const { return func3_; }
|
||||
Word getFunc6() const { return func6_; }
|
||||
Word getFunc7() const { return func7_; }
|
||||
int getNRSrc() const { return num_rsrcs_; }
|
||||
int getRSrc(int i) const { return rsrc_[i]; }
|
||||
RegType getRSType(int i) const { return rsrc_type_[i]; }
|
||||
int getRDest() const { return rdest_; }
|
||||
uint32_t getFunc2() const { return func2_; }
|
||||
uint32_t getFunc3() const { return func3_; }
|
||||
uint32_t getFunc6() const { return func6_; }
|
||||
uint32_t getFunc7() const { return func7_; }
|
||||
uint32_t getNRSrc() const { return num_rsrcs_; }
|
||||
uint32_t getRSrc(uint32_t i) const { return rsrc_[i]; }
|
||||
RegType getRSType(uint32_t i) const { return rsrc_type_[i]; }
|
||||
uint32_t getRDest() const { return rdest_; }
|
||||
RegType getRDType() const { return rdest_type_; }
|
||||
bool hasImm() const { return has_imm_; }
|
||||
Word getImm() const { return imm_; }
|
||||
Word getVlsWidth() const { return vlsWidth_; }
|
||||
Word getVmop() const { return vMop_; }
|
||||
Word getvNf() const { return vNf_; }
|
||||
Word getVmask() const { return vmask_; }
|
||||
Word getVs3() const { return vs3_; }
|
||||
Word getVlmul() const { return vlmul_; }
|
||||
Word getVsew() const { return vsew_; }
|
||||
Word getVediv() const { return vediv_; }
|
||||
uint32_t getImm() const { return imm_; }
|
||||
uint32_t getVlsWidth() const { return vlsWidth_; }
|
||||
uint32_t getVmop() const { return vMop_; }
|
||||
uint32_t getvNf() const { return vNf_; }
|
||||
uint32_t getVmask() const { return vmask_; }
|
||||
uint32_t getVs3() const { return vs3_; }
|
||||
uint32_t getVlmul() const { return vlmul_; }
|
||||
uint32_t getVsew() const { return vsew_; }
|
||||
uint32_t getVediv() const { return vediv_; }
|
||||
|
||||
private:
|
||||
|
||||
@@ -112,27 +113,27 @@ private:
|
||||
};
|
||||
|
||||
Opcode opcode_;
|
||||
int num_rsrcs_;
|
||||
uint32_t num_rsrcs_;
|
||||
bool has_imm_;
|
||||
RegType rdest_type_;
|
||||
Word imm_;
|
||||
uint32_t imm_;
|
||||
RegType rsrc_type_[MAX_REG_SOURCES];
|
||||
int rsrc_[MAX_REG_SOURCES];
|
||||
int rdest_;
|
||||
Word func2_;
|
||||
Word func3_;
|
||||
Word func6_;
|
||||
uint32_t rsrc_[MAX_REG_SOURCES];
|
||||
uint32_t rdest_;
|
||||
uint32_t func2_;
|
||||
uint32_t func3_;
|
||||
uint32_t func6_;
|
||||
uint32_t func7_;
|
||||
|
||||
// Vector
|
||||
Word vmask_;
|
||||
Word vlsWidth_;
|
||||
Word vMop_;
|
||||
Word vNf_;
|
||||
Word vs3_;
|
||||
Word vlmul_;
|
||||
Word vsew_;
|
||||
Word vediv_;
|
||||
Word func7_;
|
||||
uint32_t vmask_;
|
||||
uint32_t vlsWidth_;
|
||||
uint32_t vMop_;
|
||||
uint32_t vNf_;
|
||||
uint32_t vs3_;
|
||||
uint32_t vlmul_;
|
||||
uint32_t vsew_;
|
||||
uint32_t vediv_;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &, const Instr&);
|
||||
};
|
||||
|
||||
@@ -11,13 +11,13 @@
|
||||
#include "constants.h"
|
||||
#include <util.h>
|
||||
#include "args.h"
|
||||
#include "core.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int exitcode = 0;
|
||||
|
||||
std::string archStr("rv32imf");
|
||||
std::string imgFileName;
|
||||
int num_cores(NUM_CORES * NUM_CLUSTERS);
|
||||
int num_warps(NUM_WARPS);
|
||||
@@ -26,15 +26,14 @@ int main(int argc, char **argv) {
|
||||
bool showStats(false);
|
||||
bool riscv_test(false);
|
||||
|
||||
/* Read the command line arguments. */
|
||||
CommandLineArgFlag fh("-h", "--help", "", showHelp);
|
||||
CommandLineArgSetter<std::string> fa("-a", "--arch", "", archStr);
|
||||
CommandLineArgSetter<std::string> fi("-i", "--image", "", imgFileName);
|
||||
CommandLineArgSetter<int> fc("-c", "--cores", "", num_cores);
|
||||
CommandLineArgSetter<int> fw("-w", "--warps", "", num_warps);
|
||||
CommandLineArgSetter<int> ft("-t", "--threads", "", num_threads);
|
||||
CommandLineArgFlag fr("-r", "--riscv", "", riscv_test);
|
||||
CommandLineArgFlag fs("-s", "--stats", "", showStats);
|
||||
// parse the command line arguments
|
||||
CommandLineArgFlag fh("-h", "--help", "show command line options", showHelp);
|
||||
CommandLineArgSetter<std::string> fi("-i", "--image", "program binary", imgFileName);
|
||||
CommandLineArgSetter<int> fc("-c", "--cores", "number of cores", num_cores);
|
||||
CommandLineArgSetter<int> fw("-w", "--warps", "number of warps", num_warps);
|
||||
CommandLineArgSetter<int> ft("-t", "--threads", "number of threads", num_threads);
|
||||
CommandLineArgFlag fr("-r", "--riscv", "enable riscv tests", riscv_test);
|
||||
CommandLineArgFlag fs("-s", "--stats", "show stats", showStats);
|
||||
|
||||
CommandLineArg::readArgs(argc - 1, argv + 1);
|
||||
|
||||
@@ -44,7 +43,6 @@ int main(int argc, char **argv) {
|
||||
" -c, --cores <num> Number of cores\n"
|
||||
" -w, --warps <num> Number of warps\n"
|
||||
" -t, --threads <num> Number of threads\n"
|
||||
" -a, --arch <arch string> Architecture string\n"
|
||||
" -r, --riscv riscv test\n"
|
||||
" -s, --stats Print stats on exit.\n";
|
||||
return 0;
|
||||
@@ -54,7 +52,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
{
|
||||
// create processor configuation
|
||||
ArchDef arch(archStr, num_cores, num_warps, num_threads);
|
||||
ArchDef arch(num_cores, num_warps, num_threads);
|
||||
|
||||
// create memory module
|
||||
RAM ram(RAM_PAGE_SIZE);
|
||||
@@ -79,7 +77,8 @@ int main(int argc, char **argv) {
|
||||
processor.attach_ram(&ram);
|
||||
|
||||
// run simulation
|
||||
processor.run();
|
||||
exitcode = processor.run();
|
||||
|
||||
}
|
||||
|
||||
if (riscv_test) {
|
||||
|
||||
@@ -15,8 +15,8 @@ struct pipeline_trace_t {
|
||||
uint64_t uuid;
|
||||
|
||||
//--
|
||||
int cid;
|
||||
int wid;
|
||||
uint32_t cid;
|
||||
uint32_t wid;
|
||||
ThreadMask tmask;
|
||||
Word PC;
|
||||
|
||||
@@ -26,7 +26,7 @@ struct pipeline_trace_t {
|
||||
//--
|
||||
bool wb;
|
||||
RegType rdest_type;
|
||||
int rdest;
|
||||
uint32_t rdest;
|
||||
|
||||
//--
|
||||
RegMask used_iregs;
|
||||
|
||||
@@ -28,7 +28,7 @@ public:
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (int i = 0, n = in_use_iregs_.size(); i < n; ++i) {
|
||||
for (uint32_t i = 0, n = in_use_iregs_.size(); i < n; ++i) {
|
||||
in_use_iregs_.at(i).reset();
|
||||
in_use_fregs_.at(i).reset();
|
||||
in_use_vregs_.at(i).reset();
|
||||
|
||||
@@ -61,10 +61,11 @@ uint32_t TexUnit::read(int32_t u,
|
||||
uint32_t addr11 = base_addr + offset11 * stride;
|
||||
|
||||
// memory lookup
|
||||
uint32_t texel00 = core_->dcache_read(addr00, stride);
|
||||
uint32_t texel01 = core_->dcache_read(addr01, stride);
|
||||
uint32_t texel10 = core_->dcache_read(addr10, stride);
|
||||
uint32_t texel11 = core_->dcache_read(addr11, stride);
|
||||
uint32_t texel00(0), texel01(0), texel10(0), texel11(0);
|
||||
core_->dcache_read(&texel00, addr00, stride);
|
||||
core_->dcache_read(&texel01, addr01, stride);
|
||||
core_->dcache_read(&texel10, addr10, stride);
|
||||
core_->dcache_read(&texel11, addr11, stride);
|
||||
|
||||
mem_addrs->push_back({addr00, stride});
|
||||
mem_addrs->push_back({addr01, stride});
|
||||
@@ -84,7 +85,8 @@ uint32_t TexUnit::read(int32_t u,
|
||||
uint32_t addr = base_addr + offset * stride;
|
||||
|
||||
// memory lookup
|
||||
uint32_t texel = core_->dcache_read(addr, stride);
|
||||
uint32_t texel(0);
|
||||
core_->dcache_read(&texel, addr, stride);
|
||||
mem_addrs->push_back({addr, stride});
|
||||
|
||||
// filtering
|
||||
|
||||
@@ -10,12 +10,22 @@
|
||||
|
||||
namespace vortex {
|
||||
|
||||
typedef uint8_t Byte;
|
||||
typedef uint8_t Byte;
|
||||
#if XLEN == 32
|
||||
typedef uint32_t Word;
|
||||
typedef int32_t WordI;
|
||||
typedef uint64_t DWord;
|
||||
typedef int64_t DWordI;
|
||||
#elif XLEN == 64
|
||||
typedef uint64_t Word;
|
||||
typedef int64_t WordI;
|
||||
typedef __uint128_t DWord;
|
||||
typedef __int128_t DWordI;
|
||||
#else
|
||||
#error unsupported XLEN
|
||||
#endif
|
||||
|
||||
typedef uint32_t Addr;
|
||||
typedef uint32_t Size;
|
||||
typedef uint64_t FWord;
|
||||
|
||||
typedef std::bitset<32> RegMask;
|
||||
typedef std::bitset<32> ThreadMask;
|
||||
@@ -30,12 +40,12 @@ enum class RegType {
|
||||
Vector
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const RegType& type) {
|
||||
switch (type) {
|
||||
inline std::ostream &operator<<(std::ostream &os, const RegType& clss) {
|
||||
switch (clss) {
|
||||
case RegType::None: break;
|
||||
case RegType::Integer: os << "r"; break;
|
||||
case RegType::Float: os << "fr"; break;
|
||||
case RegType::Vector: os << "vr"; break;
|
||||
case RegType::Integer: os << "x"; break;
|
||||
case RegType::Float: os << "f"; break;
|
||||
case RegType::Vector: os << "v"; break;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
@@ -232,7 +242,7 @@ struct MemReq {
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const MemReq& req) {
|
||||
os << "mem-" << (req.write ? "wr" : "rd") << ": ";
|
||||
os << "addr=" << req.addr << ", tag=" << req.tag << ", core_id=" << req.core_id;
|
||||
os << "addr=" << std::hex << req.addr << std::dec << ", tag=" << req.tag << ", core_id=" << req.core_id;
|
||||
os << " (#" << std::dec << req.uuid << ")";
|
||||
return os;
|
||||
}
|
||||
|
||||
@@ -10,11 +10,11 @@
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
Warp::Warp(Core *core, Word id)
|
||||
Warp::Warp(Core *core, uint32_t id)
|
||||
: id_(id)
|
||||
, core_(core)
|
||||
, ireg_file_(core->arch().num_threads(), std::vector<Word>(core->arch().num_regs()))
|
||||
, freg_file_(core->arch().num_threads(), std::vector<Word>(core->arch().num_regs()))
|
||||
, freg_file_(core->arch().num_threads(), std::vector<FWord>(core->arch().num_regs()))
|
||||
, vreg_file_(core->arch().num_threads(), std::vector<Byte>(core->arch().vsize()))
|
||||
{
|
||||
this->clear();
|
||||
@@ -24,7 +24,7 @@ void Warp::clear() {
|
||||
active_ = false;
|
||||
PC_ = STARTUP_ADDR;
|
||||
tmask_.reset();
|
||||
for (int i = 0, n = core_->arch().num_threads(); i < n; ++i) {
|
||||
for (uint32_t i = 0, n = core_->arch().num_threads(); i < n; ++i) {
|
||||
for (auto& reg : ireg_file_.at(i)) {
|
||||
reg = 0;
|
||||
}
|
||||
@@ -41,13 +41,14 @@ void Warp::eval(pipeline_trace_t *trace) {
|
||||
assert(tmask_.any());
|
||||
|
||||
DPH(2, "Fetch: coreid=" << core_->id() << ", wid=" << id_ << ", tmask=");
|
||||
for (int i = 0, n = core_->arch().num_threads(); i < n; ++i)
|
||||
for (uint32_t i = 0, n = core_->arch().num_threads(); i < n; ++i)
|
||||
DPN(2, tmask_.test(n-i-1));
|
||||
DPN(2, ", PC=0x" << std::hex << PC_ << " (#" << std::dec << trace->uuid << ")" << std::endl);
|
||||
|
||||
/* Fetch and decode. */
|
||||
|
||||
Word instr_code = core_->icache_read(PC_, sizeof(Word));
|
||||
uint32_t instr_code = 0;
|
||||
core_->icache_read(&instr_code, PC_, sizeof(uint32_t));
|
||||
auto instr = core_->decoder().decode(instr_code);
|
||||
if (!instr) {
|
||||
std::cout << std::hex << "Error: invalid instruction 0x" << instr_code << ", at PC=" << PC_ << std::endl;
|
||||
@@ -68,10 +69,16 @@ void Warp::eval(pipeline_trace_t *trace) {
|
||||
this->execute(*instr, trace);
|
||||
|
||||
DP(4, "Register state:");
|
||||
for (int i = 0; i < core_->arch().num_regs(); ++i) {
|
||||
for (uint32_t i = 0; i < core_->arch().num_regs(); ++i) {
|
||||
DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
for (int j = 0; j < core_->arch().num_threads(); ++j) {
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(8) << std::hex << ireg_file_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
// Integer register file
|
||||
for (uint32_t j = 0; j < core_->arch().num_threads(); ++j) {
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(XLEN/4) << std::hex << ireg_file_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
}
|
||||
DPN(4, '|');
|
||||
// Floating point register file
|
||||
for (uint32_t j = 0; j < core_->arch().num_threads(); ++j) {
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(16) << std::hex << freg_file_.at(j).at(i) << std::setfill(' ') << ' ');
|
||||
}
|
||||
DPN(4, std::endl);
|
||||
}
|
||||
|
||||
@@ -32,15 +32,15 @@ struct DomStackEntry {
|
||||
};
|
||||
|
||||
struct vtype {
|
||||
int vill;
|
||||
int vediv;
|
||||
int vsew;
|
||||
int vlmul;
|
||||
uint32_t vill;
|
||||
uint32_t vediv;
|
||||
uint32_t vsew;
|
||||
uint32_t vlmul;
|
||||
};
|
||||
|
||||
class Warp {
|
||||
public:
|
||||
Warp(Core *core, Word id);
|
||||
Warp(Core *core, uint32_t id);
|
||||
|
||||
void clear();
|
||||
|
||||
@@ -62,15 +62,15 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
Word id() const {
|
||||
uint32_t id() const {
|
||||
return id_;
|
||||
}
|
||||
|
||||
Word getPC() const {
|
||||
uint32_t getPC() const {
|
||||
return PC_;
|
||||
}
|
||||
|
||||
void setPC(Word PC) {
|
||||
void setPC(uint32_t PC) {
|
||||
PC_ = PC;
|
||||
}
|
||||
|
||||
@@ -79,13 +79,13 @@ public:
|
||||
active_ = tmask_.any();
|
||||
}
|
||||
|
||||
Word getTmask() const {
|
||||
uint32_t getTmask() const {
|
||||
if (active_)
|
||||
return tmask_.to_ulong();
|
||||
return 0;
|
||||
}
|
||||
|
||||
Word getIRegValue(int reg) const {
|
||||
uint32_t getIRegValue(uint32_t reg) const {
|
||||
return ireg_file_.at(0).at(reg);
|
||||
}
|
||||
|
||||
@@ -95,7 +95,7 @@ private:
|
||||
|
||||
void execute(const Instr &instr, pipeline_trace_t *trace);
|
||||
|
||||
Word id_;
|
||||
uint32_t id_;
|
||||
Core *core_;
|
||||
bool active_;
|
||||
|
||||
@@ -103,12 +103,12 @@ private:
|
||||
ThreadMask tmask_;
|
||||
|
||||
std::vector<std::vector<Word>> ireg_file_;
|
||||
std::vector<std::vector<Word>> freg_file_;
|
||||
std::vector<std::vector<FWord>> freg_file_;
|
||||
std::vector<std::vector<Byte>> vreg_file_;
|
||||
std::stack<DomStackEntry> dom_stack_;
|
||||
|
||||
struct vtype vtype_;
|
||||
int vl_;
|
||||
uint32_t vl_;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user