simX debugging update

This commit is contained in:
Blaise Tine
2021-06-25 20:57:31 -04:00
parent 7ce0127e37
commit 7baa673817
4 changed files with 127 additions and 159 deletions

View File

@@ -94,16 +94,10 @@ void Core::clear() {
} }
void Core::step() { void Core::step() {
D(3, "###########################################################"); D(2, "###########################################################");
steps_++; steps_++;
D(3, std::dec << "Core" << id_ << ": cycle: " << steps_); D(2, std::dec << "Core" << id_ << ": cycle: " << steps_);
DPH(3, "stalled warps:");
for (int i = 0; i < arch_.num_warps(); i++) {
DPN(3, " " << stalled_warps_[i]);
}
DPN(3, "\n");
this->writeback(); this->writeback();
this->execute(); this->execute();
@@ -112,7 +106,7 @@ void Core::step() {
this->fetch(); this->fetch();
this->schedule(); this->schedule();
DPN(3, std::flush); DPN(2, std::flush);
} }
void Core::schedule() { void Core::schedule() {
@@ -136,7 +130,7 @@ void Core::schedule() {
if (!foundSchedule) if (!foundSchedule)
return; return;
D(3, "Schedule: wid=" << scheduled_warp); D(2, "Schedule: wid=" << scheduled_warp);
inst_in_schedule_.wid = scheduled_warp; inst_in_schedule_.wid = scheduled_warp;
// advance pipeline // advance pipeline
@@ -155,11 +149,11 @@ void Core::fetch() {
insts_ += active_threads_b; insts_ += active_threads_b;
if (active_threads_b != active_threads_a) { if (active_threads_b != active_threads_a) {
D(3, "** warp #" << wid << " active threads changed from " << active_threads_b << " to " << active_threads_a); D(3, "*** warp#" << wid << " active threads changed to " << active_threads_a);
} }
if (inst_in_fetch_.stall_warp) { if (inst_in_fetch_.stall_warp) {
D(3, "** warp #" << wid << " stalled"); D(3, "*** warp#" << wid << " fetch stalled");
stalled_warps_[wid] = true; stalled_warps_[wid] = true;
} }
@@ -186,7 +180,7 @@ void Core::issue() {
|| (inst_in_issue_.used_vregs & in_use_vregs_) != 0; || (inst_in_issue_.used_vregs & in_use_vregs_) != 0;
if (in_use_regs) { if (in_use_regs) {
D(3, "Issue: registers not ready!"); D(3, "*** Issue: registers not ready!");
inst_in_issue_.stalled = true; inst_in_issue_.stalled = true;
return; return;
} }
@@ -237,7 +231,8 @@ void Core::writeback() {
} }
if (inst_in_writeback_.stall_warp) { if (inst_in_writeback_.stall_warp) {
stalled_warps_[inst_in_writeback_.wid] = 0; stalled_warps_[inst_in_writeback_.wid] = false;
D(3, "*** warp#" << inst_in_writeback_.wid << " fetch released");
} }
// advance pipeline // advance pipeline

View File

@@ -15,30 +15,27 @@
using namespace vortex; using namespace vortex;
struct DivergentBranchException {}; static bool HasDivergentThreads(const ThreadMask &thread_mask,
const std::vector<std::vector<Word>> &reg_file,
static bool checkUnanimous(unsigned p, unsigned reg) {
const std::vector<std::vector<Word>> &m, bool cond;
const ThreadMask &tm) { size_t thread_idx = 0;
bool same; size_t num_threads = reg_file.size();
size_t i; for (; thread_idx < num_threads; ++thread_idx) {
for (i = 0; i < m.size(); ++i) { if (thread_mask[thread_idx]) {
if (tm[i]) { cond = bool(reg_file[thread_idx][reg]);
same = m[i][p];
break; break;
} }
} }
if (i == m.size()) assert(thread_idx != num_threads);
throw DivergentBranchException(); for (; thread_idx < num_threads; ++thread_idx) {
if (thread_mask[thread_idx]) {
for (; i < m.size(); ++i) { if (cond != (bool(reg_file[thread_idx][reg]))) {
if (tm[i]) { return true;
if (same != (bool(m[i][p]))) {
return false;
} }
} }
} }
return true; return false;
} }
static void update_fcrs(Core* core, int tid, int wid, bool outOfRange = false) { static void update_fcrs(Core* core, int tid, int wid, bool outOfRange = false) {
@@ -98,24 +95,24 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
int num_rsrcs = instr.getNRSrc(); int num_rsrcs = instr.getNRSrc();
if (num_rsrcs) { if (num_rsrcs) {
DPH(3, "[" << std::dec << t << "] Src Registers: "); DPH(2, "[" << std::dec << t << "] Src Regs: ");
for (int i = 0; i < num_rsrcs; ++i) { for (int i = 0; i < num_rsrcs; ++i) {
int rst = instr.getRSType(i); int rst = instr.getRSType(i);
int rs = instr.getRSrc(i); int rs = instr.getRSrc(i);
if (i) DPN(3, ", "); if (i) DPN(2, ", ");
switch (rst) { switch (rst) {
case 1: case 1:
rsdata[i] = iregs[rs]; rsdata[i] = iregs[rs];
DPN(3, "r" << std::dec << rs << "=0x" << std::hex << rsdata[i]); DPN(2, "r" << std::dec << rs << "=0x" << std::hex << rsdata[i]);
break; break;
case 2: case 2:
rsdata[i] = fregs[rs]; rsdata[i] = fregs[rs];
DPN(3, "fr" << std::dec << rs << "=0x" << std::hex << rsdata[i]); DPN(2, "fr" << std::dec << rs << "=0x" << std::hex << rsdata[i]);
break; break;
default: break; default: break;
} }
} }
DPN(3, std::endl); DPN(2, std::endl);
} }
switch (opcode) { switch (opcode) {
@@ -445,7 +442,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} }
} break; } break;
case FENCE: case FENCE:
D(3, "FENCE");
pipeline->stall_warp = true; pipeline->stall_warp = true;
runOnce = true; runOnce = true;
break; break;
@@ -457,10 +453,10 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
rddata = data_read; rddata = data_read;
} else { } else {
D(3, "Executing vector load"); D(3, "Executing vector load");
D(4, "lmul: " << vtype_.vlmul << " VLEN:" << (core_->arch().vsize() * 8) << "sew: " << vtype_.vsew); D(3, "lmul: " << vtype_.vlmul << " VLEN:" << (core_->arch().vsize() * 8) << "sew: " << vtype_.vsew);
D(4, "src: " << rsrc0 << " " << rsdata[0]); D(3, "src: " << rsrc0 << " " << rsdata[0]);
D(4, "dest" << rdest); D(3, "dest" << rdest);
D(4, "width" << instr.getVlsWidth()); D(3, "width" << instr.getVlsWidth());
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
@@ -471,7 +467,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
Word memAddr = ((rsdata[0]) & 0xFFFFFFFC) + (i * vtype_.vsew / 8); Word memAddr = ((rsdata[0]) & 0xFFFFFFFC) + (i * vtype_.vsew / 8);
D(3, "STORE MEM: ADDRESS=0x" << std::hex << memAddr); D(3, "STORE MEM: ADDRESS=0x" << std::hex << memAddr);
Word data_read = core_->dcache_read(memAddr, 4); Word data_read = core_->dcache_read(memAddr, 4);
D(4, "Mem addr: " << std::hex << memAddr << " Data read " << data_read); D(3, "Mem addr: " << std::hex << memAddr << " Data read " << data_read);
int *result_ptr = (int *)(vd.data() + i); int *result_ptr = (int *)(vd.data() + i);
*result_ptr = data_read; *result_ptr = data_read;
} }
@@ -496,7 +492,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
//store word and unit strided (not checking for unit stride) //store word and unit strided (not checking for unit stride)
uint32_t value = *(uint32_t *)(vRegFile_[instr.getVs3()].data() + i); uint32_t value = *(uint32_t *)(vRegFile_[instr.getVs3()].data() + i);
core_->dcache_write(memAddr, value, 4); core_->dcache_write(memAddr, value, 4);
D(4, "store: " << memAddr << " value:" << value); D(3, "store: " << memAddr << " value:" << value);
} break; } break;
default: default:
std::abort(); std::abort();
@@ -548,7 +544,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
// update fcsrs // update fcsrs
update_fcrs(core_, t, id_); update_fcrs(core_, t, id_);
D(4, "fpDest: " << fpDest); D(3, "fpDest: " << fpDest);
if (fpBinIsNan(floatToBin(fpDest)) == 0) { if (fpBinIsNan(floatToBin(fpDest)) == 0) {
rddata = floatToBin(fpDest); rddata = floatToBin(fpDest);
} else { } else {
@@ -835,7 +831,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
case 1: { case 1: {
// WSPAWN // WSPAWN
int active_warps = std::min<int>(rsdata[0], core_->arch().num_warps()); int active_warps = std::min<int>(rsdata[0], core_->arch().num_warps());
D(0, "Spawning " << (active_warps-1) << " warps at PC: " << std::hex << rsdata[1]); D(3, "*** Spawning " << (active_warps-1) << " warps at PC: " << std::hex << rsdata[1]);
for (int i = 1; i < active_warps; ++i) { for (int i = 1; i < active_warps; ++i) {
Warp &newWarp = core_->warp(i); Warp &newWarp = core_->warp(i);
newWarp.setPC(rsdata[1]); newWarp.setPC(rsdata[1]);
@@ -846,15 +842,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} break; } break;
case 2: { case 2: {
// SPLIT // SPLIT
if (checkUnanimous(rsrc0, iRegFile_, tmask_)) { if (HasDivergentThreads(tmask_, iRegFile_, rsrc0)) {
D(3, "Unanimous pred: " << rsrc0 << " val: " << rsdata[0] << "\n");
DomStackEntry e(tmask_);
e.unanimous = true;
domStack_.push(e);
} else {
D(3, "Split: Original TM: ");
DX( for (int i = 0; i < num_threads; ++i) D(3, tmask_[i] << " "); )
ThreadMask tmask; ThreadMask tmask;
for (int i = 0; i < num_threads; ++i) { for (int i = 0; i < num_threads; ++i) {
tmask[i] = tmask_[i] && !iRegFile_[i][rsrc0]; tmask[i] = tmask_[i] && !iRegFile_[i][rsrc0];
@@ -868,37 +856,39 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} }
active_ = tmask_.any(); active_ = tmask_.any();
D(3, "Split: New TM"); DPH(3, "*** Split: New TM=");
DX( for (int i = 0; i < num_threads; ++i) D(3, tmask_[i] << " "); ) for (int i = 0; i < num_threads; ++i) DPN(3, tmask_[num_threads-i-1]);
DPN(3, ", Pushed TM=");
D(3, "Split: Pushed TM PC: " << std::hex << e.PC << std::dec << "\n"); for (int i = 0; i < num_threads; ++i) DPN(3, e.tmask[num_threads-i-1]);
DX( for (int i = 0; i < num_threads; ++i) D(3, e.tmask[i] << " "); ) DPN(3, ", PC=0x" << std::hex << e.PC << "\n");
} else {
D(3, "*** Unanimous pred: r" << rsrc0 << ", val: " << rsdata[0]);
DomStackEntry e(tmask_);
e.unanimous = true;
domStack_.push(e);
} }
pipeline->stall_warp = true; pipeline->stall_warp = true;
runOnce = true; runOnce = true;
} break; } break;
case 3: { case 3: {
// JOIN // JOIN
D(3, "JOIN");
if (!domStack_.empty() && domStack_.top().unanimous) { if (!domStack_.empty() && domStack_.top().unanimous) {
D(2, "Uninimous branch at join"); D(3, "*** Uninimous branch at join");
tmask_ = domStack_.top().tmask; tmask_ = domStack_.top().tmask;
active_ = tmask_.any(); active_ = tmask_.any();
domStack_.pop(); domStack_.pop();
} else { } else {
if (!domStack_.top().fallThrough) { if (!domStack_.top().fallThrough) {
nextPC = domStack_.top().PC; nextPC = domStack_.top().PC;
D(3, "join: NOT FALLTHROUGH PC: " << std::hex << nextPC << std::dec); D(3, "*** Join: next PC: " << std::hex << nextPC << std::dec);
} }
D(3, "Join: Old TM: ");
DX( for (int i = 0; i < num_threads; ++i) D(3, tmask_[i] << " "); )
std::cout << "\n";
tmask_ = domStack_.top().tmask; tmask_ = domStack_.top().tmask;
active_ = tmask_.any(); active_ = tmask_.any();
D(3, "Join: New TM: "); DPH(3, "*** Join: New TM=");
DX( for (int i = 0; i < num_threads; ++i) D(3, tmask_[i] << " "); ) for (int i = 0; i < num_threads; ++i) DPN(3, tmask_[num_threads-i-1]);
DPN(3, "\n");
domStack_.pop(); domStack_.pop();
} }
@@ -917,7 +907,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} }
break; break;
case VSET: { case VSET: {
D(3, "VSET");
int VLEN = core_->arch().vsize() * 8; int VLEN = core_->arch().vsize() * 8;
int VLMAX = (instr.getVlmul() * VLEN) / instr.getVsew(); int VLMAX = (instr.getVlmul() * VLEN) / instr.getVsew();
switch (func3) { switch (func3) {
@@ -936,7 +925,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first = *(uint8_t *)(vr1.data() + i); uint8_t first = *(uint8_t *)(vr1.data() + i);
uint8_t second = *(uint8_t *)(vr2.data() + i); uint8_t second = *(uint8_t *)(vr2.data() + i);
uint8_t result = first + second; uint8_t result = first + second;
D(4, "Adding " << first << " + " << second << " = " << result); D(3, "Adding " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
} }
@@ -948,7 +937,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first = *(uint16_t *)(vr1.data() + i); uint16_t first = *(uint16_t *)(vr1.data() + i);
uint16_t second = *(uint16_t *)(vr2.data() + i); uint16_t second = *(uint16_t *)(vr2.data() + i);
uint16_t result = first + second; uint16_t result = first + second;
D(4, "Adding " << first << " + " << second << " = " << result); D(3, "Adding " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
} }
@@ -960,7 +949,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first = *(uint32_t *)(vr1.data() + i); uint32_t first = *(uint32_t *)(vr1.data() + i);
uint32_t second = *(uint32_t *)(vr2.data() + i); uint32_t second = *(uint32_t *)(vr2.data() + i);
uint32_t result = first + second; uint32_t result = first + second;
D(4, "Adding " << first << " + " << second << " = " << result); D(3, "Adding " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
} }
@@ -976,7 +965,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first = *(uint8_t *)(vr1.data() + i); uint8_t first = *(uint8_t *)(vr1.data() + i);
uint8_t second = *(uint8_t *)(vr2.data() + i); uint8_t second = *(uint8_t *)(vr2.data() + i);
uint8_t result = (first == second) ? 1 : 0; uint8_t result = (first == second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 16) { } else if (vtype_.vsew == 16) {
@@ -984,7 +973,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first = *(uint16_t *)(vr1.data() + i); uint16_t first = *(uint16_t *)(vr1.data() + i);
uint16_t second = *(uint16_t *)(vr2.data() + i); uint16_t second = *(uint16_t *)(vr2.data() + i);
uint16_t result = (first == second) ? 1 : 0; uint16_t result = (first == second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 32) { } else if (vtype_.vsew == 32) {
@@ -992,7 +981,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first = *(uint32_t *)(vr1.data() + i); uint32_t first = *(uint32_t *)(vr1.data() + i);
uint32_t second = *(uint32_t *)(vr2.data() + i); uint32_t second = *(uint32_t *)(vr2.data() + i);
uint32_t result = (first == second) ? 1 : 0; uint32_t result = (first == second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
} }
@@ -1007,7 +996,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first = *(uint8_t *)(vr1.data() + i); uint8_t first = *(uint8_t *)(vr1.data() + i);
uint8_t second = *(uint8_t *)(vr2.data() + i); uint8_t second = *(uint8_t *)(vr2.data() + i);
uint8_t result = (first != second) ? 1 : 0; uint8_t result = (first != second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 16) { } else if (vtype_.vsew == 16) {
@@ -1015,7 +1004,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first = *(uint16_t *)(vr1.data() + i); uint16_t first = *(uint16_t *)(vr1.data() + i);
uint16_t second = *(uint16_t *)(vr2.data() + i); uint16_t second = *(uint16_t *)(vr2.data() + i);
uint16_t result = (first != second) ? 1 : 0; uint16_t result = (first != second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 32) { } else if (vtype_.vsew == 32) {
@@ -1023,7 +1012,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first = *(uint32_t *)(vr1.data() + i); uint32_t first = *(uint32_t *)(vr1.data() + i);
uint32_t second = *(uint32_t *)(vr2.data() + i); uint32_t second = *(uint32_t *)(vr2.data() + i);
uint32_t result = (first != second) ? 1 : 0; uint32_t result = (first != second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
} }
@@ -1038,7 +1027,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first = *(uint8_t *)(vr1.data() + i); uint8_t first = *(uint8_t *)(vr1.data() + i);
uint8_t second = *(uint8_t *)(vr2.data() + i); uint8_t second = *(uint8_t *)(vr2.data() + i);
uint8_t result = (first < second) ? 1 : 0; uint8_t result = (first < second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 16) { } else if (vtype_.vsew == 16) {
@@ -1046,7 +1035,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first = *(uint16_t *)(vr1.data() + i); uint16_t first = *(uint16_t *)(vr1.data() + i);
uint16_t second = *(uint16_t *)(vr2.data() + i); uint16_t second = *(uint16_t *)(vr2.data() + i);
uint16_t result = (first < second) ? 1 : 0; uint16_t result = (first < second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 32) { } else if (vtype_.vsew == 32) {
@@ -1054,7 +1043,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first = *(uint32_t *)(vr1.data() + i); uint32_t first = *(uint32_t *)(vr1.data() + i);
uint32_t second = *(uint32_t *)(vr2.data() + i); uint32_t second = *(uint32_t *)(vr2.data() + i);
uint32_t result = (first < second) ? 1 : 0; uint32_t result = (first < second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
} }
@@ -1069,7 +1058,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
int8_t first = *(int8_t *)(vr1.data() + i); int8_t first = *(int8_t *)(vr1.data() + i);
int8_t second = *(int8_t *)(vr2.data() + i); int8_t second = *(int8_t *)(vr2.data() + i);
int8_t result = (first < second) ? 1 : 0; int8_t result = (first < second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 16) { } else if (vtype_.vsew == 16) {
@@ -1077,7 +1066,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
int16_t first = *(int16_t *)(vr1.data() + i); int16_t first = *(int16_t *)(vr1.data() + i);
int16_t second = *(int16_t *)(vr2.data() + i); int16_t second = *(int16_t *)(vr2.data() + i);
int16_t result = (first < second) ? 1 : 0; int16_t result = (first < second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(int16_t *)(vd.data() + i) = result; *(int16_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 32) { } else if (vtype_.vsew == 32) {
@@ -1085,7 +1074,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
int32_t first = *(int32_t *)(vr1.data() + i); int32_t first = *(int32_t *)(vr1.data() + i);
int32_t second = *(int32_t *)(vr2.data() + i); int32_t second = *(int32_t *)(vr2.data() + i);
int32_t result = (first < second) ? 1 : 0; int32_t result = (first < second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(int32_t *)(vd.data() + i) = result; *(int32_t *)(vd.data() + i) = result;
} }
} }
@@ -1100,7 +1089,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first = *(uint8_t *)(vr1.data() + i); uint8_t first = *(uint8_t *)(vr1.data() + i);
uint8_t second = *(uint8_t *)(vr2.data() + i); uint8_t second = *(uint8_t *)(vr2.data() + i);
uint8_t result = (first <= second) ? 1 : 0; uint8_t result = (first <= second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 16) { } else if (vtype_.vsew == 16) {
@@ -1108,7 +1097,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first = *(uint16_t *)(vr1.data() + i); uint16_t first = *(uint16_t *)(vr1.data() + i);
uint16_t second = *(uint16_t *)(vr2.data() + i); uint16_t second = *(uint16_t *)(vr2.data() + i);
uint16_t result = (first <= second) ? 1 : 0; uint16_t result = (first <= second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 32) { } else if (vtype_.vsew == 32) {
@@ -1116,7 +1105,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first = *(uint32_t *)(vr1.data() + i); uint32_t first = *(uint32_t *)(vr1.data() + i);
uint32_t second = *(uint32_t *)(vr2.data() + i); uint32_t second = *(uint32_t *)(vr2.data() + i);
uint32_t result = (first <= second) ? 1 : 0; uint32_t result = (first <= second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
} }
@@ -1131,7 +1120,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
int8_t first = *(int8_t *)(vr1.data() + i); int8_t first = *(int8_t *)(vr1.data() + i);
int8_t second = *(int8_t *)(vr2.data() + i); int8_t second = *(int8_t *)(vr2.data() + i);
int8_t result = (first <= second) ? 1 : 0; int8_t result = (first <= second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 16) { } else if (vtype_.vsew == 16) {
@@ -1139,7 +1128,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
int16_t first = *(int16_t *)(vr1.data() + i); int16_t first = *(int16_t *)(vr1.data() + i);
int16_t second = *(int16_t *)(vr2.data() + i); int16_t second = *(int16_t *)(vr2.data() + i);
int16_t result = (first <= second) ? 1 : 0; int16_t result = (first <= second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(int16_t *)(vd.data() + i) = result; *(int16_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 32) { } else if (vtype_.vsew == 32) {
@@ -1147,7 +1136,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
int32_t first = *(int32_t *)(vr1.data() + i); int32_t first = *(int32_t *)(vr1.data() + i);
int32_t second = *(int32_t *)(vr2.data() + i); int32_t second = *(int32_t *)(vr2.data() + i);
int32_t result = (first <= second) ? 1 : 0; int32_t result = (first <= second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(int32_t *)(vd.data() + i) = result; *(int32_t *)(vd.data() + i) = result;
} }
} }
@@ -1162,7 +1151,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first = *(uint8_t *)(vr1.data() + i); uint8_t first = *(uint8_t *)(vr1.data() + i);
uint8_t second = *(uint8_t *)(vr2.data() + i); uint8_t second = *(uint8_t *)(vr2.data() + i);
uint8_t result = (first > second) ? 1 : 0; uint8_t result = (first > second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 16) { } else if (vtype_.vsew == 16) {
@@ -1170,7 +1159,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first = *(uint16_t *)(vr1.data() + i); uint16_t first = *(uint16_t *)(vr1.data() + i);
uint16_t second = *(uint16_t *)(vr2.data() + i); uint16_t second = *(uint16_t *)(vr2.data() + i);
uint16_t result = (first > second) ? 1 : 0; uint16_t result = (first > second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 32) { } else if (vtype_.vsew == 32) {
@@ -1178,7 +1167,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first = *(uint32_t *)(vr1.data() + i); uint32_t first = *(uint32_t *)(vr1.data() + i);
uint32_t second = *(uint32_t *)(vr2.data() + i); uint32_t second = *(uint32_t *)(vr2.data() + i);
uint32_t result = (first > second) ? 1 : 0; uint32_t result = (first > second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
} }
@@ -1193,7 +1182,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
int8_t first = *(int8_t *)(vr1.data() + i); int8_t first = *(int8_t *)(vr1.data() + i);
int8_t second = *(int8_t *)(vr2.data() + i); int8_t second = *(int8_t *)(vr2.data() + i);
int8_t result = (first > second) ? 1 : 0; int8_t result = (first > second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 16) { } else if (vtype_.vsew == 16) {
@@ -1201,7 +1190,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
int16_t first = *(int16_t *)(vr1.data() + i); int16_t first = *(int16_t *)(vr1.data() + i);
int16_t second = *(int16_t *)(vr2.data() + i); int16_t second = *(int16_t *)(vr2.data() + i);
int16_t result = (first > second) ? 1 : 0; int16_t result = (first > second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(int16_t *)(vd.data() + i) = result; *(int16_t *)(vd.data() + i) = result;
} }
} else if (vtype_.vsew == 32) { } else if (vtype_.vsew == 32) {
@@ -1209,7 +1198,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
int32_t first = *(int32_t *)(vr1.data() + i); int32_t first = *(int32_t *)(vr1.data() + i);
int32_t second = *(int32_t *)(vr2.data() + i); int32_t second = *(int32_t *)(vr2.data() + i);
int32_t result = (first > second) ? 1 : 0; int32_t result = (first > second) ? 1 : 0;
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(int32_t *)(vd.data() + i) = result; *(int32_t *)(vd.data() + i) = result;
} }
} }
@@ -1220,7 +1209,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
switch (func6) { switch (func6) {
case 24: { case 24: {
// vmandnot // vmandnot
D(3, "vmandnot");
auto &vr1 = vRegFile_[rsrc0]; auto &vr1 = vRegFile_[rsrc0];
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
@@ -1231,7 +1219,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first_value = (first & 0x1); uint8_t first_value = (first & 0x1);
uint8_t second_value = (second & 0x1); uint8_t second_value = (second & 0x1);
uint8_t result = (first_value & !second_value); uint8_t result = (first_value & !second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1244,7 +1232,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first_value = (first & 0x1); uint16_t first_value = (first & 0x1);
uint16_t second_value = (second & 0x1); uint16_t second_value = (second & 0x1);
uint16_t result = (first_value & !second_value); uint16_t result = (first_value & !second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1257,7 +1245,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first_value = (first & 0x1); uint32_t first_value = (first & 0x1);
uint32_t second_value = (second & 0x1); uint32_t second_value = (second & 0x1);
uint32_t result = (first_value & !second_value); uint32_t result = (first_value & !second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1267,7 +1255,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} break; } break;
case 25: { case 25: {
// vmand // vmand
D(3, "vmand");
auto &vr1 = vRegFile_[rsrc0]; auto &vr1 = vRegFile_[rsrc0];
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
@@ -1278,7 +1265,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first_value = (first & 0x1); uint8_t first_value = (first & 0x1);
uint8_t second_value = (second & 0x1); uint8_t second_value = (second & 0x1);
uint8_t result = (first_value & second_value); uint8_t result = (first_value & second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1291,7 +1278,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first_value = (first & 0x1); uint16_t first_value = (first & 0x1);
uint16_t second_value = (second & 0x1); uint16_t second_value = (second & 0x1);
uint16_t result = (first_value & second_value); uint16_t result = (first_value & second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1304,7 +1291,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first_value = (first & 0x1); uint32_t first_value = (first & 0x1);
uint32_t second_value = (second & 0x1); uint32_t second_value = (second & 0x1);
uint32_t result = (first_value & second_value); uint32_t result = (first_value & second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1314,7 +1301,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} break; } break;
case 26: { case 26: {
// vmor // vmor
D(3, "vmor");
auto &vr1 = vRegFile_[rsrc0]; auto &vr1 = vRegFile_[rsrc0];
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
@@ -1325,7 +1311,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first_value = (first & 0x1); uint8_t first_value = (first & 0x1);
uint8_t second_value = (second & 0x1); uint8_t second_value = (second & 0x1);
uint8_t result = (first_value | second_value); uint8_t result = (first_value | second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1338,7 +1324,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first_value = (first & 0x1); uint16_t first_value = (first & 0x1);
uint16_t second_value = (second & 0x1); uint16_t second_value = (second & 0x1);
uint16_t result = (first_value | second_value); uint16_t result = (first_value | second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1351,7 +1337,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first_value = (first & 0x1); uint32_t first_value = (first & 0x1);
uint32_t second_value = (second & 0x1); uint32_t second_value = (second & 0x1);
uint32_t result = (first_value | second_value); uint32_t result = (first_value | second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1361,7 +1347,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} break; } break;
case 27: { case 27: {
//vmxor //vmxor
D(3, "vmxor");
auto &vr1 = vRegFile_[rsrc0]; auto &vr1 = vRegFile_[rsrc0];
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
@@ -1372,7 +1357,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first_value = (first & 0x1); uint8_t first_value = (first & 0x1);
uint8_t second_value = (second & 0x1); uint8_t second_value = (second & 0x1);
uint8_t result = (first_value ^ second_value); uint8_t result = (first_value ^ second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1385,7 +1370,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first_value = (first & 0x1); uint16_t first_value = (first & 0x1);
uint16_t second_value = (second & 0x1); uint16_t second_value = (second & 0x1);
uint16_t result = (first_value ^ second_value); uint16_t result = (first_value ^ second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1398,7 +1383,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first_value = (first & 0x1); uint32_t first_value = (first & 0x1);
uint32_t second_value = (second & 0x1); uint32_t second_value = (second & 0x1);
uint32_t result = (first_value ^ second_value); uint32_t result = (first_value ^ second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1408,7 +1393,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} break; } break;
case 28: { case 28: {
//vmornot //vmornot
D(3, "vmornot");
auto &vr1 = vRegFile_[rsrc0]; auto &vr1 = vRegFile_[rsrc0];
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
@@ -1419,7 +1403,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first_value = (first & 0x1); uint8_t first_value = (first & 0x1);
uint8_t second_value = (second & 0x1); uint8_t second_value = (second & 0x1);
uint8_t result = (first_value | !second_value); uint8_t result = (first_value | !second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1432,7 +1416,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first_value = (first & 0x1); uint16_t first_value = (first & 0x1);
uint16_t second_value = (second & 0x1); uint16_t second_value = (second & 0x1);
uint16_t result = (first_value | !second_value); uint16_t result = (first_value | !second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1445,7 +1429,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first_value = (first & 0x1); uint32_t first_value = (first & 0x1);
uint32_t second_value = (second & 0x1); uint32_t second_value = (second & 0x1);
uint32_t result = (first_value | !second_value); uint32_t result = (first_value | !second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1455,7 +1439,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} break; } break;
case 29: { case 29: {
//vmnand //vmnand
D(3, "vmnand");
auto &vr1 = vRegFile_[rsrc0]; auto &vr1 = vRegFile_[rsrc0];
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
@@ -1466,7 +1449,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first_value = (first & 0x1); uint8_t first_value = (first & 0x1);
uint8_t second_value = (second & 0x1); uint8_t second_value = (second & 0x1);
uint8_t result = !(first_value & second_value); uint8_t result = !(first_value & second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1479,7 +1462,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first_value = (first & 0x1); uint16_t first_value = (first & 0x1);
uint16_t second_value = (second & 0x1); uint16_t second_value = (second & 0x1);
uint16_t result = !(first_value & second_value); uint16_t result = !(first_value & second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1492,7 +1475,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first_value = (first & 0x1); uint32_t first_value = (first & 0x1);
uint32_t second_value = (second & 0x1); uint32_t second_value = (second & 0x1);
uint32_t result = !(first_value & second_value); uint32_t result = !(first_value & second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1502,7 +1485,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} break; } break;
case 30: { case 30: {
//vmnor //vmnor
D(3, "vmnor");
auto &vr1 = vRegFile_[rsrc0]; auto &vr1 = vRegFile_[rsrc0];
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
@@ -1513,7 +1495,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first_value = (first & 0x1); uint8_t first_value = (first & 0x1);
uint8_t second_value = (second & 0x1); uint8_t second_value = (second & 0x1);
uint8_t result = !(first_value | second_value); uint8_t result = !(first_value | second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1526,7 +1508,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first_value = (first & 0x1); uint16_t first_value = (first & 0x1);
uint16_t second_value = (second & 0x1); uint16_t second_value = (second & 0x1);
uint16_t result = !(first_value | second_value); uint16_t result = !(first_value | second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1539,7 +1521,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first_value = (first & 0x1); uint32_t first_value = (first & 0x1);
uint32_t second_value = (second & 0x1); uint32_t second_value = (second & 0x1);
uint32_t result = !(first_value | second_value); uint32_t result = !(first_value | second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1549,7 +1531,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} break; } break;
case 31: { case 31: {
//vmxnor //vmxnor
D(3, "vmxnor");
auto &vr1 = vRegFile_[rsrc0]; auto &vr1 = vRegFile_[rsrc0];
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
@@ -1560,7 +1541,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first_value = (first & 0x1); uint8_t first_value = (first & 0x1);
uint8_t second_value = (second & 0x1); uint8_t second_value = (second & 0x1);
uint8_t result = !(first_value ^ second_value); uint8_t result = !(first_value ^ second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1573,7 +1554,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first_value = (first & 0x1); uint16_t first_value = (first & 0x1);
uint16_t second_value = (second & 0x1); uint16_t second_value = (second & 0x1);
uint16_t result = !(first_value ^ second_value); uint16_t result = !(first_value ^ second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1586,7 +1567,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first_value = (first & 0x1); uint32_t first_value = (first & 0x1);
uint32_t second_value = (second & 0x1); uint32_t second_value = (second & 0x1);
uint32_t result = !(first_value ^ second_value); uint32_t result = !(first_value ^ second_value);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1596,7 +1577,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} break; } break;
case 37: { case 37: {
//vmul //vmul
D(3, "vmul");
auto &vr1 = vRegFile_[rsrc0]; auto &vr1 = vRegFile_[rsrc0];
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
@@ -1605,7 +1585,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first = *(uint8_t *)(vr1.data() + i); uint8_t first = *(uint8_t *)(vr1.data() + i);
uint8_t second = *(uint8_t *)(vr2.data() + i); uint8_t second = *(uint8_t *)(vr2.data() + i);
uint8_t result = (first * second); uint8_t result = (first * second);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1616,7 +1596,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first = *(uint16_t *)(vr1.data() + i); uint16_t first = *(uint16_t *)(vr1.data() + i);
uint16_t second = *(uint16_t *)(vr2.data() + i); uint16_t second = *(uint16_t *)(vr2.data() + i);
uint16_t result = (first * second); uint16_t result = (first * second);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1627,7 +1607,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first = *(uint32_t *)(vr1.data() + i); uint32_t first = *(uint32_t *)(vr1.data() + i);
uint32_t second = *(uint32_t *)(vr2.data() + i); uint32_t second = *(uint32_t *)(vr2.data() + i);
uint32_t result = (first * second); uint32_t result = (first * second);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1637,7 +1617,6 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} break; } break;
case 45: { case 45: {
// vmacc // vmacc
D(3, "vmacc");
auto &vr1 = vRegFile_[rsrc0]; auto &vr1 = vRegFile_[rsrc0];
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
@@ -1646,7 +1625,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint8_t first = *(uint8_t *)(vr1.data() + i); uint8_t first = *(uint8_t *)(vr1.data() + i);
uint8_t second = *(uint8_t *)(vr2.data() + i); uint8_t second = *(uint8_t *)(vr2.data() + i);
uint8_t result = (first * second); uint8_t result = (first * second);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) += result; *(uint8_t *)(vd.data() + i) += result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1657,7 +1636,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint16_t first = *(uint16_t *)(vr1.data() + i); uint16_t first = *(uint16_t *)(vr1.data() + i);
uint16_t second = *(uint16_t *)(vr2.data() + i); uint16_t second = *(uint16_t *)(vr2.data() + i);
uint16_t result = (first * second); uint16_t result = (first * second);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) += result; *(uint16_t *)(vd.data() + i) += result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1668,7 +1647,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
uint32_t first = *(uint32_t *)(vr1.data() + i); uint32_t first = *(uint32_t *)(vr1.data() + i);
uint32_t second = *(uint32_t *)(vr2.data() + i); uint32_t second = *(uint32_t *)(vr2.data() + i);
uint32_t result = (first * second); uint32_t result = (first * second);
D(4, "Comparing " << first << " + " << second << " = " << result); D(3, "Comparing " << first << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) += result; *(uint32_t *)(vd.data() + i) += result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1681,14 +1660,13 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
case 6: { case 6: {
switch (func6) { switch (func6) {
case 0: { case 0: {
D(3, "vmadd.vx");
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
if (vtype_.vsew == 8) { if (vtype_.vsew == 8) {
for (int i = 0; i < vl_; i++) { for (int i = 0; i < vl_; i++) {
uint8_t second = *(uint8_t *)(vr2.data() + i); uint8_t second = *(uint8_t *)(vr2.data() + i);
uint8_t result = (rsdata[0] + second); uint8_t result = (rsdata[0] + second);
D(4, "Comparing " << rsdata[0] << " + " << second << " = " << result); D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1698,7 +1676,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
for (int i = 0; i < vl_; i++) { for (int i = 0; i < vl_; i++) {
uint16_t second = *(uint16_t *)(vr2.data() + i); uint16_t second = *(uint16_t *)(vr2.data() + i);
uint16_t result = (rsdata[0] + second); uint16_t result = (rsdata[0] + second);
D(4, "Comparing " << rsdata[0] << " + " << second << " = " << result); D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1708,7 +1686,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
for (int i = 0; i < vl_; i++) { for (int i = 0; i < vl_; i++) {
uint32_t second = *(uint32_t *)(vr2.data() + i); uint32_t second = *(uint32_t *)(vr2.data() + i);
uint32_t result = (rsdata[0] + second); uint32_t result = (rsdata[0] + second);
D(4, "Comparing " << rsdata[0] << " + " << second << " = " << result); D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1718,14 +1696,13 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
} break; } break;
case 37: { case 37: {
// vmul.vx // vmul.vx
D(3, "vmul.vx");
auto &vr2 = vRegFile_[rsrc1]; auto &vr2 = vRegFile_[rsrc1];
auto &vd = vRegFile_[rdest]; auto &vd = vRegFile_[rdest];
if (vtype_.vsew == 8) { if (vtype_.vsew == 8) {
for (int i = 0; i < vl_; i++) { for (int i = 0; i < vl_; i++) {
uint8_t second = *(uint8_t *)(vr2.data() + i); uint8_t second = *(uint8_t *)(vr2.data() + i);
uint8_t result = (rsdata[0] * second); uint8_t result = (rsdata[0] * second);
D(4, "Comparing " << rsdata[0] << " + " << second << " = " << result); D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result);
*(uint8_t *)(vd.data() + i) = result; *(uint8_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1735,7 +1712,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
for (int i = 0; i < vl_; i++) { for (int i = 0; i < vl_; i++) {
uint16_t second = *(uint16_t *)(vr2.data() + i); uint16_t second = *(uint16_t *)(vr2.data() + i);
uint16_t result = (rsdata[0] * second); uint16_t result = (rsdata[0] * second);
D(4, "Comparing " << rsdata[0] << " + " << second << " = " << result); D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result);
*(uint16_t *)(vd.data() + i) = result; *(uint16_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1745,7 +1722,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
for (int i = 0; i < vl_; i++) { for (int i = 0; i < vl_; i++) {
uint32_t second = *(uint32_t *)(vr2.data() + i); uint32_t second = *(uint32_t *)(vr2.data() + i);
uint32_t result = (rsdata[0] * second); uint32_t result = (rsdata[0] * second);
D(4, "Comparing " << rsdata[0] << " + " << second << " = " << result); D(3, "Comparing " << rsdata[0] << " + " << second << " = " << result);
*(uint32_t *)(vd.data() + i) = result; *(uint32_t *)(vd.data() + i) = result;
} }
for (int i = vl_; i < VLMAX; i++) { for (int i = vl_; i < VLMAX; i++) {
@@ -1785,12 +1762,12 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
switch (rdt) { switch (rdt) {
case 1: case 1:
if (rdest) { if (rdest) {
D(3, "[" << std::dec << t << "] Dest Register: r" << rdest << "=0x" << std::hex << std::hex << rddata); D(2, "[" << std::dec << t << "] Dest Regs: r" << rdest << "=0x" << std::hex << std::hex << rddata);
iregs[rdest] = rddata; iregs[rdest] = rddata;
} }
break; break;
case 2: case 2:
D(3, "[" << std::dec << t << "] Dest Register: fr" << rdest << "=0x" << std::hex << std::hex << rddata); D(2, "[" << std::dec << t << "] Dest Regs: fr" << rdest << "=0x" << std::hex << std::hex << rddata);
fregs[rdest] = rddata; fregs[rdest] = rddata;
break; break;
default: default:
@@ -1800,7 +1777,7 @@ void Warp::execute(const Instr &instr, Pipeline *pipeline) {
PC_ += core_->arch().wsize(); PC_ += core_->arch().wsize();
if (PC_ != nextPC) { if (PC_ != nextPC) {
D(3, "Next PC: " << std::hex << nextPC << std::dec); D(3, "*** Next PC: " << std::hex << nextPC << std::dec);
PC_ = nextPC; PC_ = nextPC;
} }
} }

View File

@@ -134,11 +134,11 @@ MemoryUnit::TLBEntry MemoryUnit::tlbLookup(Addr vAddr, Word flagMask) {
if (iter->second.flags & flagMask) if (iter->second.flags & flagMask)
return iter->second; return iter->second;
else { else {
D(2, "Page fault on addr 0x" << std::hex << vAddr << "(bad flags)"); D(3, "*** Page fault on addr 0x" << std::hex << vAddr << "(bad flags)");
throw PageFault(vAddr, false); throw PageFault(vAddr, false);
} }
} else { } else {
D(2, "Page fault on addr 0x" << std::hex << vAddr << "(not in TLB)"); D(3, "*** Page fault on addr 0x" << std::hex << vAddr << "(not in TLB)");
throw PageFault(vAddr, true); throw PageFault(vAddr, true);
} }
} }
@@ -168,7 +168,6 @@ void MemoryUnit::write(Addr addr, const void *data, Size size, bool sup) {
} }
void MemoryUnit::tlbAdd(Addr virt, Addr phys, Word flags) { void MemoryUnit::tlbAdd(Addr virt, Addr phys, Word flags) {
D(1, "tlbAdd(0x" << std::hex << virt << ", 0x" << phys << ", 0x" << flags << ')');
tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags); tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags);
} }

View File

@@ -28,7 +28,10 @@ void Warp::clear() {
void Warp::step(Pipeline *pipeline) { void Warp::step(Pipeline *pipeline) {
assert(tmask_.any()); assert(tmask_.any());
D(3, "Step: wid=" << id_ << ", PC=0x" << std::hex << PC_); DPH(2, "Step: wid=" << id_ << ", PC=0x" << std::hex << PC_ << ", tmask=");
for (int i = 0, n = core_->arch().num_threads(); i < n; ++i)
DPN(2, tmask_[n-i-1]);
DPN(2, "\n");
/* Fetch and decode. */ /* Fetch and decode. */
@@ -79,7 +82,6 @@ void Warp::step(Pipeline *pipeline) {
// Execute // Execute
this->execute(*instr, pipeline); this->execute(*instr, pipeline);
// At Debug Level 3, print debug info after each instruction.
D(4, "Register state:"); D(4, "Register state:");
for (int i = 0; i < core_->arch().num_regs(); ++i) { for (int i = 0; i < core_->arch().num_regs(); ++i) {
DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':'); DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
@@ -87,10 +89,5 @@ void Warp::step(Pipeline *pipeline) {
DPN(4, ' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' '); DPN(4, ' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' ');
} }
DPN(4, std::endl); DPN(4, std::endl);
} }
DPH(3, "Thread mask:");
for (int i = 0; i < core_->arch().num_threads(); ++i)
DPN(3, " " << tmask_[i]);
DPN(3, "\n");
} }