diff --git a/src/core.cpp b/src/core.cpp index 23cc87aa..a4fc57aa 100644 --- a/src/core.cpp +++ b/src/core.cpp @@ -117,7 +117,7 @@ void Core::step() { if (USE_DEBUG >= 3) { D(3, "Register state:"); for (unsigned i = 0; i < reg[0].size(); ++i) { - D_RAW(dec << " %r" << i << ':'); + D_RAW(" %r" << dec << i << ':'); for (unsigned j = 0; j < reg.size(); ++j) D_RAW(' ' << hex << reg[j][i] << ' '); D_RAW('(' << shadowReg[i] << ')' << endl); diff --git a/src/instruction.cpp b/src/instruction.cpp index aaddb5d4..6b551c1a 100644 --- a/src/instruction.cpp +++ b/src/instruction.cpp @@ -87,17 +87,17 @@ ostream &Harp::operator<<(ostream& os, Instruction &inst) { os << dec; if (inst.predicated) { - os << "@p" << inst.pred << " ? "; + os << "@p" << dec << inst.pred << " ? "; } os << Instruction::instTable[inst.op].opString << ' '; - if (inst.rdestPresent) os << "%r" << inst.rdest << ' '; + if (inst.rdestPresent) os << "%r" << dec << inst.rdest << ' '; if (inst.pdestPresent) os << "@p" << inst.pdest << ' '; for (int i = 0; i < inst.nRsrc; i++) { - os << "%r" << inst.rsrc[i] << ' '; + os << "%r" << dec << inst.rsrc[i] << ' '; } for (int i = 0; i < inst.nPsrc; i++) { - os << "@p" << inst.psrc[i] << ' '; + os << "@p" << dec << inst.psrc[i] << ' '; } if (inst.immsrcPresent) { if (inst.refLiteral) os << inst.refLiteral->name; @@ -290,4 +290,9 @@ void Instruction::executeOn(Core &c) { D(3, "End instruction execute."); c.activeThreads = nextActiveThreads; + if (nextActiveThreads > c.reg.size()) { + cerr << "Error: attempt to spawn " << nextActiveThreads << " threads. " + << c.reg.size() << " available.\n"; + abort(); + } } diff --git a/src/test/matmul-mt.s b/src/test/matmul-mt.s index c8c76bd3..83f550ff 100644 --- a/src/test/matmul-mt.s +++ b/src/test/matmul-mt.s @@ -12,7 +12,8 @@ .perm x .entry .global -entry: ldi %r0, matrix_a; +entry: + ldi %r0, matrix_a; ldi %r1, #3; jali %r5, matgen; @@ -98,14 +99,15 @@ sloop: add %r0, %r0, %r17; */ matmulthd: ldi %r9, #0; /* result row: %r9 */ rloop: ldi %r6, #0; /* result col: %r6 */ + cloop: shli %r16, %r6, (`__WORD); shl %r15, %r9, %r10; add %r11, %r15, %r0; add %r12, %r16, %r1; - ldi %r13, #0; ldi %r8, #0 /* dot prod position: %r8 */ + ldi %r13, #0; iloop: ld %r7, %r11, #0; ld %r23, %r12, #0; fmul %r7, %r7, %r23 @@ -129,6 +131,7 @@ iloop: ld %r7, %r11, #0; addi %r9, %r9, #1; sub %r7, %r9, %r24; + rtop @p0, %r7; @p0 ? jmpi rloop;