Fixed emulator

This commit is contained in:
felsabbagh3
2019-11-06 23:30:07 -05:00
parent 60e6ff0b42
commit 87ae5c8cdf
23 changed files with 947 additions and 793 deletions

Binary file not shown.

View File

@@ -3,6 +3,7 @@
*******************************************************************************/ *******************************************************************************/
#include <iostream> #include <iostream>
#include <iomanip>
// #define USE_DEBUG 7 // #define USE_DEBUG 7
// #define PRINT_ACTIVE_THREADS // #define PRINT_ACTIVE_THREADS
@@ -36,7 +37,7 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id):
a(a), iDec(d), mem(mem), steps(0) a(a), iDec(d), mem(mem), steps(0)
{ {
for (unsigned i = 0; i < a.getNWarps(); ++i) for (unsigned i = 0; i < a.getNWarps(); ++i)
w.push_back(Warp(this)); w.push_back(Warp(this, i));
w[0].activeThreads = 1; w[0].activeThreads = 1;
w[0].spawned = true; w[0].spawned = true;
@@ -53,6 +54,7 @@ void Core::step() {
cout << endl << "Threads:"; cout << endl << "Threads:";
#endif #endif
for (unsigned i = 0; i < w.size(); ++i) { for (unsigned i = 0; i < w.size(); ++i) {
if (w[i].activeThreads) { if (w[i].activeThreads) {
D(3, "Core step stepping warp " << i << '[' << w[i].activeThreads << ']'); D(3, "Core step stepping warp " << i << '[' << w[i].activeThreads << ']');
@@ -93,11 +95,12 @@ void Core::printStats() const {
} }
Warp::Warp(Core *c, Word id) : Warp::Warp(Core *c, Word id) :
core(c), pc(0), interruptEnable(true), core(c), pc(0x80000000), interruptEnable(true),
supervisorMode(true), activeThreads(0), reg(0), pred(0), supervisorMode(true), activeThreads(0), reg(0), pred(0),
shadowReg(core->a.getNRegs()), shadowPReg(core->a.getNPRegs()), id(id), shadowReg(core->a.getNRegs()), shadowPReg(core->a.getNPRegs()), id(id),
spawned(false), steps(0), insts(0), loads(0), stores(0) spawned(false), steps(0), insts(0), loads(0), stores(0)
{ {
D(3, "Creating a new thread with PC: " << hex << this->pc << '\n');
/* Build the register file. */ /* Build the register file. */
Word regNum(0); Word regNum(0);
for (Word j = 0; j < core->a.getNThds(); ++j) { for (Word j = 0; j < core->a.getNThds(); ++j) {
@@ -111,8 +114,10 @@ Warp::Warp(Core *c, Word id) :
pred[j].push_back(Reg<bool>(id, regNum++)); pred[j].push_back(Reg<bool>(id, regNum++));
} }
tmask.push_back(true); bool act = false;
shadowTmask.push_back(true); if (j == 0) act = true;
tmask.push_back(act);
shadowTmask.push_back(act);
} }
Word csrNum(0); Word csrNum(0);
@@ -197,9 +202,9 @@ void Warp::step() {
if (USE_DEBUG >= 3) { if (USE_DEBUG >= 3) {
D(3, "Register state:"); D(3, "Register state:");
for (unsigned i = 0; i < reg[0].size(); ++i) { for (unsigned i = 0; i < reg[0].size(); ++i) {
D_RAW(" %r" << dec << i << ':'); D_RAW(" %r" << setfill(' ') << setw(2) << dec << i << ':');
for (unsigned j = 0; j < reg.size(); ++j) for (unsigned j = 0; j < reg.size(); ++j)
D_RAW(' ' << hex << reg[j][i] << ' '); D_RAW(' ' << setfill('0') << setw(8) << hex << reg[j][i] << setfill(' ') << ' ');
D_RAW('(' << shadowReg[i] << ')' << endl); D_RAW('(' << shadowReg[i] << ')' << endl);
} }
// D(3, "Predicate state:"); // D(3, "Predicate state:");

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -4,7 +4,7 @@
#ifndef __DEBUG_H #ifndef __DEBUG_H
#define __DEBUG_H #define __DEBUG_H
// #define USE_DEBUG 9 #define USE_DEBUG 9
#ifdef USE_DEBUG #ifdef USE_DEBUG
#include <iostream> #include <iostream>

View File

@@ -138,18 +138,38 @@ void Instruction::executeOn(Warp &c) {
bool join = (op == GPGPU) && (func3 == 3); bool join = (op == GPGPU) && (func3 == 3);
predicated = (op == GPGPU) && ((func3 == 7) || (func3 == 2)); // predicated = (op == GPGPU) && ((func3 == 7) || (func3 == 2));
// bool is_branch = (op == B_INST);
// bool is_jump = (op == JAL_INST) || (op == JALR_INST);
bool is_gpgpu = (op == GPGPU);
bool is_tmc = is_gpgpu && (func3 == 0);
bool is_wspawn = is_gpgpu && (func3 == 1);
bool is_barrier = is_gpgpu && (func3 == 4);
bool is_split = is_gpgpu && (func3 == 2);
bool is_join = is_gpgpu && (func3 == 3);
bool gpgpu_zero = (is_tmc || is_barrier || is_wspawn) && (t != 0);
bool not_active = !c.tmask[t];
if (not_active || gpgpu_zero)
{
continue;
}
// printf("Predicated: %d, split: %d, join: %d\n",predicated, split, join ); // printf("Predicated: %d, split: %d, join: %d\n",predicated, split, join );
// printf("%d && ((%d) || (%d))\n",(op == GPGPU), (func3 == 7), (func3 == 2) ); // printf("%d && ((%d) || (%d))\n",(op == GPGPU), (func3 == 7), (func3 == 2) );
// cout << "before " << op << " = " << GPGPU << "\n"; // cout << "before " << op << " = " << GPGPU << "\n";
if (((predicated && !reg[pred]) || !c.tmask[t]) && !split && !join) // if (((predicated && !reg[pred]) || !c.tmask[t]) && !split && !join)
{ // {
// cout << "about to continue\n"; // // cout << "about to continue\n";
continue; // continue;
} // }
// cout << "after\n"; // cout << "after\n";
++c.insts; ++c.insts;
@@ -163,6 +183,7 @@ void Instruction::executeOn(Warp &c) {
bool m_exten; bool m_exten;
// std::cout << "op = " << op << "\n"; // std::cout << "op = " << op << "\n";
// std::cout << "R_INST: " << R_INST << "\n"; // std::cout << "R_INST: " << R_INST << "\n";
int num_to_wspawn;
switch (op) { switch (op) {
case NOP: case NOP:
@@ -462,11 +483,11 @@ void Instruction::executeOn(Warp &c) {
//std::cout << "S_INST\n"; //std::cout << "S_INST\n";
++c.stores; ++c.stores;
memAddr = reg[rsrc[0]] + immsrc; memAddr = reg[rsrc[0]] + immsrc;
// //std::cout << "STORE MEM ADDRESS: " << std::hex << reg[rsrc[0]] << " + " << immsrc << "\n"; std::cout << "STORE MEM ADDRESS: " << std::hex << reg[rsrc[0]] << " + " << immsrc << "\n";
// //std::cout << "FUNC3: " << func3 << "\n"; // //std::cout << "FUNC3: " << func3 << "\n";
if (memAddr == 0x00010000) if ((memAddr == 0x00010000) && (t == 0))
{ {
std::cout << (char) reg[rsrc[1]]; fprintf(stderr, "%c", (char) reg[rsrc[1]]);
break; break;
} }
switch (func3) switch (func3)
@@ -558,7 +579,7 @@ void Instruction::executeOn(Warp &c) {
case JAL_INST: case JAL_INST:
//std::cout << "JAL_INST\n"; //std::cout << "JAL_INST\n";
if (!pcSet) nextPc = (c.pc - 4) + immsrc; if (!pcSet) nextPc = (c.pc - 4) + immsrc;
if (!pcSet) //std::cout << "JAL... SETTING PC: " << nextPc << "\n"; if (!pcSet) {/*std::cout << "JAL... SETTING PC: " << nextPc << "\n"; */}
if (rdest != 0) if (rdest != 0)
{ {
reg[rdest] = c.pc; reg[rdest] = c.pc;
@@ -566,9 +587,9 @@ void Instruction::executeOn(Warp &c) {
pcSet = true; pcSet = true;
break; break;
case JALR_INST: case JALR_INST:
//std::cout << "JALR_INST\n"; std::cout << "JALR_INST\n";
if (!pcSet) nextPc = reg[rsrc[0]] + immsrc; if (!pcSet) nextPc = reg[rsrc[0]] + immsrc;
if (!pcSet) //std::cout << "JALR... SETTING PC: " << nextPc << "\n"; if (!pcSet) {/*std::cout << "JALR... SETTING PC: " << nextPc << "\n";*/ }
if (rdest != 0) if (rdest != 0)
{ {
reg[rdest] = c.pc; reg[rdest] = c.pc;
@@ -578,76 +599,85 @@ void Instruction::executeOn(Warp &c) {
case SYS_INST: case SYS_INST:
//std::cout << "SYS_INST\n"; //std::cout << "SYS_INST\n";
temp = reg[rsrc[0]]; temp = reg[rsrc[0]];
switch (func3) if (immsrc == 0x20) // ThreadID
{ {
case 1: reg[rdest] = t;
// printf("Case 1\n"); D(2, "CSR Reading tid " << hex << immsrc << dec << " and returning " << reg[rdest]);
if (rdest != 0) } else if (immsrc == 0x21) // WarpID
{ {
reg[rdest] = c.csr[immsrc & 0x00000FFF]; reg[rdest] = c.id;
} D(2, "CSR Reading wid " << hex << immsrc << dec << " and returning " << reg[rdest]);
c.csr[immsrc & 0x00000FFF] = temp;
break;
case 2:
// printf("Case 2\n");
if (rdest != 0)
{
// printf("Reading from CSR: %d = %d\n", (immsrc & 0x00000FFF), c.csr[immsrc & 0x00000FFF]);
reg[rdest] = c.csr[immsrc & 0x00000FFF];
}
// printf("Writing to CSR --> %d = %d\n", immsrc, (temp | c.csr[immsrc & 0x00000FFF]));
c.csr[immsrc & 0x00000FFF] = temp | c.csr[immsrc & 0x00000FFF];
break;
case 3:
// printf("Case 3\n");
if (rdest != 0)
{
reg[rdest] = c.csr[immsrc & 0x00000FFF];
}
c.csr[immsrc & 0x00000FFF] = temp & (~c.csr[immsrc & 0x00000FFF]);
break;
case 5:
// printf("Case 5\n");
if (rdest != 0)
{
reg[rdest] = c.csr[immsrc & 0x00000FFF];
}
c.csr[immsrc & 0x00000FFF] = rsrc[0];
break;
case 6:
// printf("Case 6\n");
if (rdest != 0)
{
reg[rdest] = c.csr[immsrc & 0x00000FFF];
}
c.csr[immsrc & 0x00000FFF] = rsrc[0] | c.csr[immsrc & 0x00000FFF];
break;
case 7:
// printf("Case 7\n");
if (rdest != 0)
{
reg[rdest] = c.csr[immsrc & 0x00000FFF];
}
c.csr[immsrc & 0x00000FFF] = rsrc[0] & (~c.csr[immsrc & 0x00000FFF]);
break;
case 0:
if (immsrc < 2)
{
//std::cout << "INTERRUPT ECALL/EBREAK\n";
nextActiveThreads = 0;
c.spawned = false;
// c.interrupt(0);
}
break;
default:
break;
} }
// switch (func3)
// {
// case 1:
// // printf("Case 1\n");
// if (rdest != 0)
// {
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// c.csr[immsrc & 0x00000FFF] = temp;
// break;
// case 2:
// // printf("Case 2\n");
// if (rdest != 0)
// {
// // printf("Reading from CSR: %d = %d\n", (immsrc & 0x00000FFF), c.csr[immsrc & 0x00000FFF]);
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// // printf("Writing to CSR --> %d = %d\n", immsrc, (temp | c.csr[immsrc & 0x00000FFF]));
// c.csr[immsrc & 0x00000FFF] = temp | c.csr[immsrc & 0x00000FFF];
// break;
// case 3:
// // printf("Case 3\n");
// if (rdest != 0)
// {
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// c.csr[immsrc & 0x00000FFF] = temp & (~c.csr[immsrc & 0x00000FFF]);
// break;
// case 5:
// // printf("Case 5\n");
// if (rdest != 0)
// {
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// c.csr[immsrc & 0x00000FFF] = rsrc[0];
// break;
// case 6:
// // printf("Case 6\n");
// if (rdest != 0)
// {
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// c.csr[immsrc & 0x00000FFF] = rsrc[0] | c.csr[immsrc & 0x00000FFF];
// break;
// case 7:
// // printf("Case 7\n");
// if (rdest != 0)
// {
// reg[rdest] = c.csr[immsrc & 0x00000FFF];
// }
// c.csr[immsrc & 0x00000FFF] = rsrc[0] & (~c.csr[immsrc & 0x00000FFF]);
// break;
// case 0:
// if (immsrc < 2)
// {
// //std::cout << "INTERRUPT ECALL/EBREAK\n";
// nextActiveThreads = 0;
// c.spawned = false;
// // c.interrupt(0);
// }
// break;
// default:
// break;
// }
break; break;
case TRAP: case TRAP:
//std::cout << "INTERRUPT TRAP\n"; //std::cout << "INTERRUPT TRAP\n";
@@ -670,30 +700,44 @@ void Instruction::executeOn(Warp &c) {
//std::cout << "GPGPU\n"; //std::cout << "GPGPU\n";
switch(func3) switch(func3)
{ {
case 0: case 1:
// WSPAWN // WSPAWN
//std::cout << "WSPAWN\n"; std::cout << "WSPAWN\n";
if (sjOnce) if (sjOnce)
{ {
sjOnce = false; sjOnce = false;
D(0, "Spawning a new warp.");
// //std::cout << "SIZE: " << c.core->w.size() << "\n"; // //std::cout << "SIZE: " << c.core->w.size() << "\n";
for (unsigned i = 0; i < c.core->w.size(); ++i) num_to_wspawn = reg[rsrc[0]];
D(0, "Spawning " << num_to_wspawn << " new warps at PC: " << hex << reg[rsrc[1]]);
for (unsigned i = 1; i < num_to_wspawn; ++i)
{ {
// std::cout << "SPAWNING WARP\n"; // std::cout << "SPAWNING WARP\n";
Warp &newWarp(c.core->w[i]); Warp &newWarp(c.core->w[i]);
// //std::cout << "STARTING\n"; // //std::cout << "STARTING\n";
if (newWarp.spawned == false) { // if (newWarp.spawned == false)
{
// //std::cout << "ABOUT TO START\n"; // //std::cout << "ABOUT TO START\n";
newWarp.pc = reg[rsrc[0]]; newWarp.pc = reg[rsrc[1]];
newWarp.reg[0] = reg; // newWarp.reg[0] = reg;
newWarp.csr = c.csr; // newWarp.csr = c.csr;
for (int kk = 0; kk < newWarp.tmask.size(); kk++)
{
if (kk == 0)
{
newWarp.tmask[kk] = true;
}
else
{
newWarp.tmask[kk] = false;
}
}
newWarp.activeThreads = 1; newWarp.activeThreads = 1;
newWarp.supervisorMode = false; newWarp.supervisorMode = false;
newWarp.spawned = true; newWarp.spawned = true;
break;
} }
} }
break;
} }
break; break;
case 2: case 2:
@@ -704,12 +748,16 @@ void Instruction::executeOn(Warp &c) {
{ {
sjOnce = false; sjOnce = false;
if (checkUnanimous(pred, c.reg, c.tmask)) { if (checkUnanimous(pred, c.reg, c.tmask)) {
//std::cout << "Unanimous pred: " << pred << " val: " << reg[pred] << "\n"; std::cout << "Unanimous pred: " << pred << " val: " << reg[pred] << "\n";
DomStackEntry e(c.tmask); DomStackEntry e(c.tmask);
e.uni = true; e.uni = true;
c.domStack.push(e); c.domStack.push(e);
break; break;
} }
cout << "Split: Original TM: ";
for (auto y : c.tmask) cout << y << " ";
cout << "\n";
DomStackEntry e(pred, c.reg, c.tmask, c.pc); DomStackEntry e(pred, c.reg, c.tmask, c.pc);
c.domStack.push(c.tmask); c.domStack.push(c.tmask);
c.domStack.push(e); c.domStack.push(e);
@@ -717,49 +765,79 @@ void Instruction::executeOn(Warp &c) {
{ {
c.tmask[i] = !e.tmask[i] && c.tmask[i]; c.tmask[i] = !e.tmask[i] && c.tmask[i];
} }
cout << "Split: New TM\n";
for (auto y : c.tmask) cout << y << " ";
cout << "\n";
cout << "Split: Pushed TM PC: " << hex << e.pc << dec << "\n";
for (auto y : e.tmask) cout << y << " ";
cout << "\n";
} }
}
break; break;
}
case 3: case 3:
// JOIN // JOIN
//std::cout << "JOIN\n"; //std::cout << "JOIN\n";
D(3, "JOIN INSTRUCTION");
if (sjOnce) if (sjOnce)
{ {
sjOnce = false; sjOnce = false;
if (!c.domStack.empty() && c.domStack.top().uni) { if (!c.domStack.empty() && c.domStack.top().uni) {
D(2, "Uni branch at join"); D(2, "Uni branch at join");
printf("NEW DOMESTACK: \n");
c.tmask = c.domStack.top().tmask; c.tmask = c.domStack.top().tmask;
c.domStack.pop(); c.domStack.pop();
break; break;
} }
if (!c.domStack.top().fallThrough) { if (!c.domStack.top().fallThrough) {
if (!pcSet) nextPc = c.domStack.top().pc; if (!pcSet) {
nextPc = c.domStack.top().pc;
cout << "join: NOT FALLTHROUGH PC: " << hex << nextPc << dec << '\n';
}
pcSet = true; pcSet = true;
} }
cout << "Join: Old TM: ";
for (auto y : c.tmask) cout << y << " ";
cout << "\n";
c.tmask = c.domStack.top().tmask; c.tmask = c.domStack.top().tmask;
cout << "Join: New TM: " << '\n';
for (auto y : c.tmask) cout << y << " ";
cout << "\n";
c.domStack.pop(); c.domStack.pop();
} }
break; break;
case 4: case 4:
// JMPRT // is_barrier
//std::cout << "JMPRT\n";
nextActiveThreads = 1;
if (!pcSet) nextPc = reg[rsrc[0]];
pcSet = true;
break; break;
case 5: case 0:
// CLONE // TMC
//std::cout << "CLONE\n";
// //std::cout << "CLONING REG: " << rsrc[0] << " lane: " << reg[rsrc[0]] << "\n";
c.reg[reg[rsrc[0]]] = reg;
break;
case 6:
// JALRS
//std::cout << "JALRS\n"; //std::cout << "JALRS\n";
nextActiveThreads = reg[rsrc[1]];
reg[rdest] = c.pc; nextActiveThreads = reg[rsrc[0]];
if (!pcSet) nextPc = reg[rsrc[0]]; {
pcSet = true; for (int ff = 0; ff < c.tmask.size(); ff++)
{
if (ff < nextActiveThreads)
{
c.tmask[ff] = true;
}
else
{
c.tmask[ff] = false;
}
}
}
if (nextActiveThreads == 0)
{
c.spawned = false;
}
// reg[rdest] = c.pc;
// if (!pcSet) nextPc = reg[rsrc[0]];
// pcSet = true;
// //std::cout << "ACTIVE_THREDS: " << rsrc[1] << " val: " << reg[rsrc[1]] << "\n"; // //std::cout << "ACTIVE_THREDS: " << rsrc[1] << " val: " << reg[rsrc[1]] << "\n";
// //std::cout << "nextPC: " << rsrc[0] << " val: " << std::hex << reg[rsrc[0]] << "\n"; // //std::cout << "nextPC: " << rsrc[0] << " val: " << std::hex << reg[rsrc[0]] << "\n";
break; break;
@@ -794,7 +872,11 @@ void Instruction::executeOn(Warp &c) {
// This way, if pc was set by a side effect (such as interrupt), it will // This way, if pc was set by a side effect (such as interrupt), it will
// retain its new value. // retain its new value.
if (pcSet) c.pc = nextPc; if (pcSet)
{
c.pc = nextPc;
cout << "Next PC: " << hex << nextPc << dec << "\n";
}
if (nextActiveThreads > c.reg.size()) { if (nextActiveThreads > c.reg.size()) {
cerr << "Error: attempt to spawn " << nextActiveThreads << " threads. " cerr << "Error: attempt to spawn " << nextActiveThreads << " threads. "

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -102,10 +102,10 @@ void MemoryUnit::ADecoder::write(Addr a, Word w, bool sup, Size wordSize) {
Word before = m.read(a); Word before = m.read(a);
Word new_word = w; Word new_word = w;
if (a == 0x8000012c) // if (a == 0x00010000)
{ // {
printf("WRITING TO 0x8000012c -> %d\n", w); // fprintf(stderr, "%c", w);
} // }
if (wordSize == 8) if (wordSize == 8)
{ {

Binary file not shown.

View File

@@ -1,4 +1,4 @@
echo start > results.txt echo start > results.txt
echo ../kernel/vortex_test.hex echo ../kernel/vortex_test.hex
./harptool -E -a rv32i --core ../kernel/vortex_test.hex -s -b ./harptool -E -a rv32i --core ../runtime/vortex_runtime.hex -s -b 1> emulator.debug

Binary file not shown.

View File

@@ -69,7 +69,7 @@ int main()
// unsigned scal = 3; // unsigned scal = 3;
// // matrix element add // // matrix element add
// vx_e_mat_add(z, &scal, z, NUM_ROWS, NUM_COLS); vx_e_mat_add(z, &scal, z, NUM_ROWS, NUM_COLS);
// vx_print_str("\n\nMatrix Element Addition\n"); // vx_print_str("\n\nMatrix Element Addition\n");
// print_matrix(z); // print_matrix(z);

View File

@@ -19,7 +19,7 @@ vx_set_sp:
.word 0x0005006b # tmc 4 .word 0x0005006b # tmc 4
csrr a3, 0x21 # get wid csrr a3, 0x21 # get wid
slli a3, a3, 15 # shift by wid slli a3, a3, 0x1a # shift by wid
csrr a2, 0x20 # get tid csrr a2, 0x20 # get tid
slli a1, a2, 10 # multiply tid by 1024 slli a1, a2, 10 # multiply tid by 1024
slli a2, a2, 2 # multiply tid by 4 slli a2, a2, 2 # multiply tid by 4

View File

@@ -106,14 +106,14 @@ void test_wsapwn()
void intrinsics_tests() void intrinsics_tests()
{ {
// // TMC test // TMC test
// test_tmc(); test_tmc();
// // Control Divergence Test // Control Divergence Test
// vx_print_str("test_divergence\n"); vx_print_str("test_divergence\n");
// vx_tmc(4); vx_tmc(4);
// test_divergence(); test_divergence();
// vx_tmc(1); vx_tmc(1);
// Test wspawn // Test wspawn

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -1,7 +1,7 @@
:0200000480007A :0200000480007A
:100000009705000093850502130540006B10B500AD :100000009705000093850502130540006B10B500AD
:10001000EF000001EF000072130500006B00050007 :10001000EF000001EF008074130500006B00050085
:10002000130540006B000500F32610029396F600BE :10002000130540006B000500F32610029396A6010D
:10003000732600029315A6001316260037F1FF6FF2 :10003000732600029315A6001316260037F1FF6FF2
:100040003301B1403301D1403301C100F326100226 :100040003301B1403301D1403301C100F326100226
:1000500063860600130500006B0005006780000042 :1000500063860600130500006B0005006780000042
@@ -18,7 +18,7 @@
:100100001301C10067800000B702010023A0B20004 :100100001301C10067800000B702010023A0B20004
:1001100067800000130101FE232E1100232C8100B3 :1001100067800000130101FE232E1100232C8100B3
:10012000130401022326A4FEB70700810327C4FE9F :10012000130401022326A4FEB70700810327C4FE9F
:10013000131727009387470FB307F70083A707001C :10013000131727009387C70FB307F70083A707009C
:1001400013850700EFF05FF9130000008320C10161 :1001400013850700EFF05FF9130000008320C10161
:10015000032481011301010267800000130101FEE5 :10015000032481011301010267800000130101FEE5
:10016000232E1100232C8100130401022326A4FE58 :10016000232E1100232C8100130401022326A4FE58
@@ -26,100 +26,115 @@
:10018000EFF05FF9B707008113850704EFF0DFF4A4 :10018000EFF05FF9B707008113850704EFF0DFF4A4
:10019000130000008320C101032481011301010227 :10019000130000008320C101032481011301010227
:1001A00067800000130101FE232E1100232C810023 :1001A00067800000130101FE232E1100232C810023
:1001B00013040102B707008183A7C7231385070033 :1001B00013040102B707008183A7472413850700B2
:1001C000EFF09FEAB707008103A78723B7070081F5 :1001C000EFF09FEAB707008103A70724B707008174
:1001D00083A7472313850700E7000700EFF0DFEA56 :1001D00083A7C72313850700E7000700EFF0DFEAD6
:1001E0002326A4FE8327C4FE6388070013050000AE :1001E0002326A4FE8327C4FE6388070013050000AE
:1001F000EFF09FE76F00C00013051000EFF0DFE69F :1001F000EFF09FE76F00C00013051000EFF0DFE69F
:10020000130000008320C1010324810113010102B6 :10020000130000008320C1010324810113010102B6
:1002100067800000130101FE232E1100232C8100B2 :1002100067800000130101FE232E1100232C8100B2
:10022000130401022326A4FE2324B4FE2322C4FEC9 :10022000130401022326A4FE2324B4FE2322C4FEC9
:100230002320D4FEB7070081032744FE23ACE72226 :100230002320D4FEB7070081032744FE23A0E72430
:10024000B7070081032704FE23AAE722B70700812E :10024000B7070081032704FE23AEE722B70700812A
:10025000032784FE23AEE722832744FE938507000D :10025000032784FE23A2E724B70700809387471A69
:100260000325C4FEEFF0DFDFEFF0DFF31300000043 :10026000938507000325C4FEEFF09FDFEFF09FF3B7
:100270008320C10103248101130101026780000072 :10027000130000008320C101032481011301010246
:10028000130101FE232E1100232C8100130401020F :1002800067800000130101FE232E1100232C810042
:10029000B707008113854708EFF01FE413054000FE :1002900013040102B707008113854708EFF0DFE37D
:1002A000EFF09FDCEFF0DFDE2326A4FE0327C4FE81 :1002A00013054000EFF05FDCEFF09FDE2326A4FE95
:1002B000B70700818326C4FE939626009387072400 :1002B0000327C4FEB70700818326C4FE9396260059
:1002C000B387F60023A0E70013051000EFF0DFD995 :1002C00093878724B387F60023A0E7001305100067
:1002D000B707008183A7072413850700EFF09FE38A :1002D000EFF09FD9B707008183A787241385070014
:1002E000B707008113850709EFF01FDFB70700810B :1002E000EFF05FE3B707008113850709EFF0DFDE6A
:1002F0009387072483A7470013850700EFF09FE14A :1002F000B70700819387872483A7470013850700EA
:10030000B707008113850709EFF01FDDB7070081EC :10030000EFF05FE1B707008113850709EFF0DFDC4D
:100310009387072483A7870013850700EFF09FDFEB :10031000B70700819387872483A787001385070089
:10032000B707008113850709EFF01FDBB7070081CE :10032000EFF05FDFB707008113850709EFF0DFDA31
:100330009387072483A7C70013850700EFF09FDD8D :10033000B70700819387872483A7C7001385070029
:10034000B707008113850709EFF01FD913000000DC :10034000EFF05FDDB707008113850709EFF0DFD815
:100350008320C10103248101130101026780000091 :10035000130000008320C101032481011301010265
:10036000130101FE232E1100232C8100130401022E :1003600067800000130101FE232E1100232C810061
:10037000EFF01FD22326A4FE8327C4FE93B72700E5 :1003700013040102EFF0DFD12326A4FE8327C4FE7D
:10038000A305F4FE8347B4FE13850700EFF0DFCE2C :1003800093B72700A305F4FE8347B4FE1385070047
:100390008347B4FE638407068327C4FE93B7170020 :10039000EFF09FCE8347B4FE638407068327C4FE35
:1003A0002305F4FE8347A4FE13850700EFF0DFCC9E :1003A00093B717002305F4FE8347A4FE13850700C7
:1003B0008347A4FE63820702B70700810327C4FEB8 :1003B000EFF09FCC8347A4FE63820702B70700815A
:1003C0001317270093870724B307F7001307A0002C :1003C0000327C4FE1317270093878724B307F7007A
:1003D00023A0E7006F000002B70700810327C4FED7 :1003D0001307A00023A0E7006F000002B707008109
:1003E0001317270093870724B307F7001307B000FC :1003E0000327C4FE1317270093878724B307F7005A
:1003F00023A0E700EFF0DFC86F0040068327C4FEAC :1003F0001307B00023A0E700EFF09FC86F0040068E
:1004000093B73700A304F4FE834794FE13850700D7 :100400008327C4FE93B73700A304F4FE834794FE0A
:10041000EFF09FC6834794FE63820702B70700810F :1004100013850700EFF05FC6834794FE63820702EF
:100420000327C4FE1317270093870724B307F70099 :10042000B70700810327C4FE13172700938787248B
:100430001307C00023A0E7006F000002B707008188 :10043000B307F7001307C00023A0E7006F00000216
:100440000327C4FE1317270093870724B307F70079 :10044000B70700810327C4FE13172700938787246B
:100450001307D00023A0E700EFF09FC2EFF05FC2C8 :10045000B307F7001307D00023A0E700EFF05FC257
:10046000B707008183A7072413850700EFF09FCA11 :10046000EFF01FC2B707008183A787241385070019
:10047000B707008113850709EFF01FC6B707008192 :10047000EFF05FCAB707008113850709EFF0DFC50A
:100480009387072483A7470013850700EFF09FC8D1 :10048000B70700819387872483A747001385070058
:10049000B707008113850709EFF01FC4B707008174 :10049000EFF05FC8B707008113850709EFF0DFC3EE
:1004A0009387072483A7870013850700EFF09FC673 :1004A000B70700819387872483A7870013850700F8
:1004B000B707008113850709EFF01FC2B707008156 :1004B000EFF05FC6B707008113850709EFF0DFC1D2
:1004C0009387072483A7C70013850700EFF09FC415 :1004C000B70700819387872483A7C7001385070098
:1004D000B707008113850709EFF01FC01300000064 :1004D000EFF05FC4B707008113850709EFF0DFBFB6
:1004E0008320C10103248101130101026780000000 :1004E000130000008320C1010324810113010102D4
:1004F000130101FE232E1100232C8100130401029D :1004F00067800000130101FE232E1100232C8100D0
:10050000EFF09FB82326A4FEB70700810327C4FE9F :1005000013040102EFF05FB82326A4FEB7070081B1
:100510001317270093870725B307F7000327C4FEA7 :100510000327C4FE1317270093878725B307F70027
:1005200023A0E7008327C4FE6386070013050000AD :100520000327C4FE23A0E7008327C4FE63860700D9
:10053000EFF09FB3130000008320C1010324810169 :1005300013050000EFF05FB3130000008320C1013A
:100540001301010267800000130101FE232E110038 :10054000032481011301010267800000130101FEF1
:10055000232C810013040102B70700809387074F03 :10055000232E1100232C810013040102B707008011
:100560002326F4FE8325C4FE13054000EFF05FAFA1 :100560009387474F2326F4FE8325C4FE13054000DE
:10057000EFF01FF8B707008183A707251385070051 :10057000EFF01FAFEFF01FF8B707008183A78725C3
:10058000EFF05FB9B707008113850709EFF0DFB41B :1005800013850700EFF01FB9B7070081138507092E
:10059000B70700819387072583A7470013850700C6 :10059000EFF09FB4B70700819387872583A74700B3
:1005A000EFF05FB7B707008113850709EFF0DFB2FF :1005A00013850700EFF01FB7B70700811385070910
:1005B000B70700819387072583A787001385070066 :1005B000EFF09FB2B70700819387872583A7870055
:1005C000EFF05FB5B707008113850709EFF0DFB0E3 :1005C00013850700EFF01FB5B707008113850709F2
:1005D000B70700819387072583A7C7001385070006 :1005D000EFF09FB0B70700819387872583A7C700F7
:1005E000EFF05FB3B707008113850709EFF0DFAEC7 :1005E00013850700EFF01FB3B707008113850709D4
:1005F000130000008320C1010324810113010102C3 :1005F000EFF09FAE130000008320C10103248101AE
:1006000067800000130101FF2326110023248100CD :100600001301010267800000130101FF232611007E
:1006100013040101B707008113854709EFF0DFAB31 :100610002324810013040101EFF0DFC6B707008136
:10062000EFF09FF2130000008320C100032481003B :1006200013854709EFF05FAB13054000EFF0DFA340
:100630001301010167800000130101FC232E110248 :10063000EFF05FD313051000EFF01FA3B7070081A1
:10064000232C8102130401042326A4FC8327C4FC69 :100640001385870AEFF05FA9EFF05FF01300000059
:100650002326F4FEEFF05FA32324A4FEEFF05FA3B4 :100650008320C10003248100130101016780000091
:100660002322A4FE8327C4FE83A70701032784FE59 :10066000130101FC232E1102232C81021304010427
:10067000637EF7008327C4FE83A7C700032744FED9 :100670002326A4FC8327C4FC2326F4FEEFF0DFA08E
:100680006376F700930710006F0080009307000067 :100680002324A4FEEFF0DFA02322A4FE8327C4FED0
:10069000A301F4FE834734FE93F71700A301F4FE91 :1006900083A70701032784FE637EF7008327C4FE38
:1006A000834734FE13850700EFF01F9D834734FE18 :1006A00083A7C700032744FE6376F7009307100073
:1006B000638607068327C4FE03A7C700832784FE3B :1006B0006F00800093070000A301F4FE834734FE1F
:1006C000B307F702032744FEB307F700232EF4FC19 :1006C00093F71700A301F4FE834734FE1385070058
:1006D0008327C4FE03A707008327C4FD9397270041 :1006D000EFF09F9A834734FE638607068327C4FEA4
:1006E000B307F70083A607008327C4FE03A74700CC :1006E00003A7C700832784FEB307F702032744FE4E
:1006F0008327C4FD93972700B307F70003A70700DC :1006F000B307F700232EF4FC8327C4FE03A70700EB
:100700008327C4FE03A687008327C4FD9397270091 :100700008327C4FD93972700B307F70083A607004C
:10071000B307F6003387E60023A0E700EFF05F960B :100710008327C4FE03A747008327C4FD93972700C0
:10072000130000008320C10303248103130101048B :10072000B307F70003A707008327C4FE03A68700CB
:1007300067800000130101FF23261100232481009C :100730008327C4FD93972700B307F6003387E600AD
:100740001304010113051000EFF01F92EFF05FB3E7 :1007400023A0E700EFF0DF93130000008320C10334
:10075000B70700811385070EEFF01F9813054000BF :10075000032481031301010467800000130101FCDD
:10076000EFF09F90EFF0DFBF13051000EFF0DF8F89 :10076000232E1102232C810213040104130510000F
:1007700093070000138507008320C1000324810034 :10077000EFF09F8FB70700819387C71B2324F4FCFA
:08078000130101016780000074 :10078000B70700819387C71F2326F4FCB7070081B2
:10079000938787262328F4FC93074000232AF4FC40
:1007A00093074000232CF4FC930740002322F4FE1F
:1007B000930740002320F4FE032744FE832504FE14
:1007C000930784FC93860700B707008013860766AB
:1007D00013050700EFF01FA4232604FE6F00000896
:1007E000232404FE6F004005032744FD8327C4FE35
:1007F0003307F702832784FEB307F700232EF4FCA8
:10080000B70700810327C4FD1317270093878726A6
:10081000B307F70083A7070013850700EFF09F8F4A
:10082000B70700811385470FEFF01F8B832784FEE6
:10083000938717002324F4FE032744FD832784FEB7
:10084000E3E4E7FAB70700811385870FEFF0DF884D
:100850008327C4FE938717002326F4FE032784FD15
:100860008327C4FEE3EEE7F6930700001385070035
:100870008320C10303248103130101046780000066
:02000004810079 :02000004810079
:10000000300000003100000032000000330000002A :10000000300000003100000032000000330000002A
:10001000340000003500000036000000370000000A :10001000340000003500000036000000370000000A
@@ -130,32 +145,32 @@
:100060003700000038000000390000006100000087 :100060003700000038000000390000006100000087
:1000700062000000630000006400000065000000F2 :1000700062000000630000006400000065000000F2
:1000800066000000746573745F746D630A0000009D :1000800066000000746573745F746D630A0000009D
:100090000A000000746573745F737061776E0A0004 :100090000A000000746573745F6469766572676551
:1000A000300000003100000032000000330000008A :1000A0006E63650A00000000746573745F737061AD
:1000B000340000003500000036000000370000006A :1000B000776E0A00300000003100000032000000BE
:1000C00038000000390000006100000062000000FC :1000C000330000003400000035000000360000005E
:1000D000630000006400000065000000660000008E :1000D0003700000038000000390000006100000017
:1000E000746573745F646976657267656E63650ACB :1000E0006200000063000000640000006500000082
:0100F000000F :0A00F00066000000200000000A0076
:1000F4000000008104000081080000810C000081E0 :1000FC000000008104000081080000810C000081D8
:100104001000008114000081180000811C0000818F :10010C001000008114000081180000811C00008187
:100114002000008124000081280000812C0000813F :10011C002000008124000081280000812C00008137
:100124003000008134000081380000813C000081EF :10012C003000008134000081380000813C000081E7
:1001340044000081480000814C000081500000818F :10013C0044000081480000814C0000815000008187
:1001440054000081580000815C000081600000813F :10014C0054000081580000815C0000816000008137
:1001540064000081680000816C00008170000081EF :10015C0064000081680000816C00008170000081E7
:1001640074000081780000817C000081800000819F :10016C0074000081780000817C0000818000008197
:10017400A0000081A4000081A8000081AC000081DF :10017C00B4000081B8000081BC000081C000008187
:10018400B0000081B4000081B8000081BC0000818F :10018C00C4000081C8000081CC000081D000008137
:10019400C0000081C4000081C8000081CC0000813F :10019C00D4000081D8000081DC000081E0000081E7
:1001A400D0000081D4000081D8000081DC000081EF :1001AC00E4000081E8000081EC000081F000008197
:1001B4000100000001000000010000000100000037 :1001BC00050000000500000005000000050000001F
:1001C4000100000001000000010000000100000027 :1001CC00060000000600000006000000060000000B
:1001D4000100000001000000010000000100000017 :1001DC0007000000070000000700000007000000F7
:1001E4000100000001000000010000000100000007 :1001EC0008000000080000000800000008000000E3
:1001F40006000000060000000600000006000000E3 :1001FC0001000000010000000100000001000000EF
:1002040006000000060000000600000006000000D2 :10020C0001000000010000000100000001000000DE
:1002140006000000060000000600000006000000C2 :10021C0001000000010000000100000001000000CE
:1002240006000000060000000600000006000000B2 :10022C0001000000010000000100000001000000BE
:040000058000000077 :040000058000000077
:00000001FF :00000001FF

View File

@@ -31,7 +31,7 @@ void vx_spawnWarps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void
global_function_pointer = func_ptr; global_function_pointer = func_ptr;
global_argument_struct = args; global_argument_struct = args;
global_num_threads = numThreads; global_num_threads = numThreads;
vx_wspawn(numWarps, (unsigned) func_ptr); vx_wspawn(numWarps, (unsigned) setup_call);
setup_call(); setup_call();
} }

View File

@@ -14,16 +14,16 @@ typedef struct
} mat_add_args_t; } mat_add_args_t;
unsigned x[] = {1, 1, 1, 1, unsigned x[] = {5, 5, 5, 5,
6, 6, 6, 6,
7, 7, 7, 7,
8, 8, 8, 8};
unsigned y[] = {1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1}; 1, 1, 1, 1};
unsigned y[] = {6, 6, 6, 6,
6, 6, 6, 6,
6, 6, 6, 6,
6, 6, 6, 6};
unsigned z[] = {0, 0, 0, 0, unsigned z[] = {0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -51,48 +51,38 @@ int main()
{ {
// Main is called with all threads active of warp 0 // Main is called with all threads active of warp 0
vx_tmc(1); vx_tmc(1);
///////////////////////////////////////////////////////////////////////
// mat_add_args_t arguments;
// arguments.x = x;
// arguments.y = y;
// arguments.z = z;
// arguments.numColums = 4;
// arguments.numRows = 4;
// int numWarps = 4;
// int numThreads = 4;
// vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
/* mat_add_args_t arguments;
NOTE: * when test_wspawn is called from instrinsic_tests, RA 80000458 is stored at address 6fffefbc, arguments.x = x;
but when read back again it reads zeros even though no other write request is made to that arguments.y = y;
address (when only test_wsapwn is called by itself). arguments.z = z;
arguments.numColums = 4;
arguments.numRows = 4;
* When test_wsapwn is called by itself from main new lines are not printed....
* when test_wspawn is called with other tests from main it works fine... int numWarps = 4;
*/ int numThreads = 4;
vx_spawnWarps(numWarps, numThreads, mat_add_kernel, &arguments);
for (int i = 0; i < arguments.numRows; i++)
{
for (int j = 0; j < arguments.numColums; j++)
{
unsigned index = (i * arguments.numColums) + j;
vx_print_hex(z[index]);
vx_print_str(" ");
}
vx_print_str("\n");
}
///////////////////////////////////////////////////////////////////////
// intrinsics_tests(); // intrinsics_tests();
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
test_tmc();
// Control Divergence Test
vx_print_str("test_divergence\n");
vx_tmc(4);
test_divergence();
vx_tmc(1);
// // Test wspawn
// vx_print_str("test_wspawn\n");
// test_wsapwn();
return 0; return 0;
} }