A multithreaded version of the matrix multiply.

This commit is contained in:
chad
2013-10-13 21:15:39 -04:00
parent 23e3b9a32b
commit c107a49ff0

View File

@@ -12,7 +12,8 @@
.perm x
.entry
.global
entry: ldi %r0, matrix_a;
entry:
ldi %r0, matrix_a;
ldi %r1, #3;
jali %r5, matgen;
@@ -59,34 +60,54 @@ mgloop: jali %r5, randf;
/* Write the matrix product of square matrix at (%r0) and (%r1) to (%r2). The
size of these matrices is 2^Nx2^N, where N = %r3 */
matmul: ldi %r4, #1;
matmul: ori %r22, %r5, #0
ldi %r4, #1;
ldi %r10, (`__WORD); /* ` is the log base 2 operator */
shl %r4, %r4, %r3;
add %r10, %r10, %r3;
ldi %r14, #1;
shl %r14, %r14, %r10;
divi %r17, %r14, THREADS; /* Spawn threads */
sloop:
divi %r23, %r4, THREADS; /* Spawn threads */
ldi %r18, THREADS
ldi %r19, #0
mul %r20, %r14, %r23;
jmpr %r5;
ori %r30, %r2, #0; /* Save r0 and r2 for thread 0 */
ori %r31, %r0, #0;
sloop: add %r0, %r0, %r20
add %r2, %r2, %r20
addi %r19, %r19, #1;
subi %r18, %r18, #1;
iszero @p0, %r18;
@p0 ? jmpi sfin;
clone %r19;
jmpi sloop;
sfin: ori %r2, %r30, #0; /* restore r1 and r2 */
ori %r0, %r31, #0;
jalis %r5, %r19, matmulthd;
jmpr %r22;
/* One thread of matrix multiplication. Expected register values at start:
* %r0 - matrix a pointer (plus offset)
* %r1 - matrix b pointer
* %r2 - destination matrix pointer (plus offset)
* %r17 - row count
* %r23 - row count
*/
matmulthd: ldi %r9, #0; /* result row: %r9 */
rloop: ldi %r6, #0; /* result col: %r6 */
cloop: shli %r16, %r6, (`__WORD);
shl %r15, %r9, %r10;
add %r11, %r15, %r0;
add %r12, %r16, %r1;
ldi %r13, #0;
ldi %r8, #0 /* dot prod position: %r8 */
ldi %r13, #0;
iloop: ld %r7, %r11, #0;
ld %r17, %r12, #0;
fmul %r7, %r7, %r17
@@ -109,7 +130,7 @@ iloop: ld %r7, %r11, #0;
@p0 ? jmpi cloop;
addi %r9, %r9, #1;
sub %r7, %r9, %r17;
sub %r7, %r9, %r23;
rtop @p0, %r7;
@p0 ? jmpi rloop;
@@ -117,8 +138,24 @@ iloop: ld %r7, %r11, #0;
.align 4096
.perm rw
matrix_a: .space 64;
matrix_b: .space 64;
matrix_r: .space 64
matrix_a: .word 1f 2f 3f 4f 5f 6f 7f 8f
.word 2f 3f 4f 5f 6f 7f 8f 9f
.word 3f 4f 5f 6f 7f 8f 9f 10f
.word 4f 5f 6f 7f 8f 9f 10f 11f
.word 5f 6f 7f 8f 9f 10f 11f 12f
.word 6f 7f 8f 9f 10f 11f 12f 13f
.word 7f 8f 9f 10f 11f 12f 13f 14f
.word 8f 9f 10f 11f 12f 13f 14f 15f
matrix_b: .word 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7
.word 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7
.word 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7
.word 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7
.word 4.0 4.1 4.2 4.3 4.4 4.5 4.6 4.7
.word 5.0 5.1 5.2 5.3 5.4 5.5 5.6 5.7
.word 6.0 6.1 6.2 6.3 6.4 6.5 6.6 6.7
.word 7.0 7.1 7.2 7.3 7.4 7.5 7.6 7.7
retaddr: .word 0