From cfbbc665fd85aaa5ad6b66f332be114b087968df Mon Sep 17 00:00:00 2001 From: chad Date: Sun, 13 Oct 2013 21:14:03 -0400 Subject: [PATCH 1/3] Register numbers are decimal. Don't spawn more threads than there are lanes. --- src/instruction.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/instruction.cpp b/src/instruction.cpp index a2c70892..7f35f2f7 100644 --- a/src/instruction.cpp +++ b/src/instruction.cpp @@ -85,17 +85,17 @@ Instruction::InstTableEntry Instruction::instTable[] = { ostream &Harp::operator<<(ostream& os, Instruction &inst) { if (inst.predicated) { - os << "@p" << inst.pred << " ? "; + os << "@p" << dec << inst.pred << " ? "; } os << Instruction::instTable[inst.op].opString << ' '; - if (inst.rdestPresent) os << "%r" << inst.rdest << ' '; + if (inst.rdestPresent) os << "%r" << dec << inst.rdest << ' '; if (inst.pdestPresent) os << "@p" << inst.pdest << ' '; for (int i = 0; i < inst.nRsrc; i++) { - os << "%r" << inst.rsrc[i] << ' '; + os << "%r" << dec << inst.rsrc[i] << ' '; } for (int i = 0; i < inst.nPsrc; i++) { - os << "@p" << inst.psrc[i] << ' '; + os << "@p" << dec << inst.psrc[i] << ' '; } if (inst.immsrcPresent) { if (inst.refLiteral) os << inst.refLiteral->name; @@ -288,4 +288,9 @@ void Instruction::executeOn(Core &c) { D(3, "End instruction execute."); c.activeThreads = nextActiveThreads; + if (nextActiveThreads > c.reg.size()) { + cerr << "Error: attempt to spawn " << nextActiveThreads << " threads. " + << c.reg.size() << " available.\n"; + abort(); + } } From 23e3b9a32b247332d669c4a355d4553b5c9d4aa1 Mon Sep 17 00:00:00 2001 From: chad Date: Sun, 13 Oct 2013 21:14:35 -0400 Subject: [PATCH 2/3] Register names are decimal. --- src/core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.cpp b/src/core.cpp index 3835ef50..a4fc57aa 100644 --- a/src/core.cpp +++ b/src/core.cpp @@ -117,7 +117,7 @@ void Core::step() { if (USE_DEBUG >= 3) { D(3, "Register state:"); for (unsigned i = 0; i < reg[0].size(); ++i) { - D_RAW(" %r" << i << ':'); + D_RAW(" %r" << dec << i << ':'); for (unsigned j = 0; j < reg.size(); ++j) D_RAW(' ' << hex << reg[j][i] << ' '); D_RAW('(' << shadowReg[i] << ')' << endl); From c107a49ff0a6d540274a973be905ab32fa097884 Mon Sep 17 00:00:00 2001 From: chad Date: Sun, 13 Oct 2013 21:15:39 -0400 Subject: [PATCH 3/3] A multithreaded version of the matrix multiply. --- src/test/matmul-mt.s | 57 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/src/test/matmul-mt.s b/src/test/matmul-mt.s index 643942ce..0575b63c 100644 --- a/src/test/matmul-mt.s +++ b/src/test/matmul-mt.s @@ -12,7 +12,8 @@ .perm x .entry .global -entry: ldi %r0, matrix_a; +entry: + ldi %r0, matrix_a; ldi %r1, #3; jali %r5, matgen; @@ -59,34 +60,54 @@ mgloop: jali %r5, randf; /* Write the matrix product of square matrix at (%r0) and (%r1) to (%r2). The size of these matrices is 2^Nx2^N, where N = %r3 */ -matmul: ldi %r4, #1; +matmul: ori %r22, %r5, #0 + ldi %r4, #1; ldi %r10, (`__WORD); /* ` is the log base 2 operator */ shl %r4, %r4, %r3; add %r10, %r10, %r3; ldi %r14, #1; shl %r14, %r14, %r10; - divi %r17, %r14, THREADS; /* Spawn threads */ -sloop: + divi %r23, %r4, THREADS; /* Spawn threads */ + ldi %r18, THREADS + ldi %r19, #0 + mul %r20, %r14, %r23; - jmpr %r5; + ori %r30, %r2, #0; /* Save r0 and r2 for thread 0 */ + ori %r31, %r0, #0; + +sloop: add %r0, %r0, %r20 + add %r2, %r2, %r20 + addi %r19, %r19, #1; + subi %r18, %r18, #1; + iszero @p0, %r18; + @p0 ? jmpi sfin; + clone %r19; + jmpi sloop; + +sfin: ori %r2, %r30, #0; /* restore r1 and r2 */ + ori %r0, %r31, #0; + jalis %r5, %r19, matmulthd; + + jmpr %r22; /* One thread of matrix multiplication. Expected register values at start: * %r0 - matrix a pointer (plus offset) * %r1 - matrix b pointer * %r2 - destination matrix pointer (plus offset) - * %r17 - row count + * %r23 - row count */ matmulthd: ldi %r9, #0; /* result row: %r9 */ rloop: ldi %r6, #0; /* result col: %r6 */ + cloop: shli %r16, %r6, (`__WORD); shl %r15, %r9, %r10; add %r11, %r15, %r0; add %r12, %r16, %r1; - ldi %r13, #0; ldi %r8, #0 /* dot prod position: %r8 */ + ldi %r13, #0; iloop: ld %r7, %r11, #0; ld %r17, %r12, #0; fmul %r7, %r7, %r17 @@ -109,7 +130,7 @@ iloop: ld %r7, %r11, #0; @p0 ? jmpi cloop; addi %r9, %r9, #1; - sub %r7, %r9, %r17; + sub %r7, %r9, %r23; rtop @p0, %r7; @p0 ? jmpi rloop; @@ -117,8 +138,24 @@ iloop: ld %r7, %r11, #0; .align 4096 .perm rw -matrix_a: .space 64; -matrix_b: .space 64; matrix_r: .space 64 +matrix_a: .word 1f 2f 3f 4f 5f 6f 7f 8f + .word 2f 3f 4f 5f 6f 7f 8f 9f + .word 3f 4f 5f 6f 7f 8f 9f 10f + .word 4f 5f 6f 7f 8f 9f 10f 11f + .word 5f 6f 7f 8f 9f 10f 11f 12f + .word 6f 7f 8f 9f 10f 11f 12f 13f + .word 7f 8f 9f 10f 11f 12f 13f 14f + .word 8f 9f 10f 11f 12f 13f 14f 15f + +matrix_b: .word 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 + .word 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 + .word 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 + .word 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 + .word 4.0 4.1 4.2 4.3 4.4 4.5 4.6 4.7 + .word 5.0 5.1 5.2 5.3 5.4 5.5 5.6 5.7 + .word 6.0 6.1 6.2 6.3 6.4 6.5 6.6 6.7 + .word 7.0 7.1 7.2 7.3 7.4 7.5 7.6 7.7 + retaddr: .word 0