A multithreaded version of the matrix multiply.
This commit is contained in:
@@ -12,7 +12,8 @@
|
|||||||
.perm x
|
.perm x
|
||||||
.entry
|
.entry
|
||||||
.global
|
.global
|
||||||
entry: ldi %r0, matrix_a;
|
entry:
|
||||||
|
ldi %r0, matrix_a;
|
||||||
ldi %r1, #3;
|
ldi %r1, #3;
|
||||||
jali %r5, matgen;
|
jali %r5, matgen;
|
||||||
|
|
||||||
@@ -59,34 +60,54 @@ mgloop: jali %r5, randf;
|
|||||||
/* Write the matrix product of square matrix at (%r0) and (%r1) to (%r2). The
|
/* Write the matrix product of square matrix at (%r0) and (%r1) to (%r2). The
|
||||||
size of these matrices is 2^Nx2^N, where N = %r3 */
|
size of these matrices is 2^Nx2^N, where N = %r3 */
|
||||||
|
|
||||||
matmul: ldi %r4, #1;
|
matmul: ori %r22, %r5, #0
|
||||||
|
ldi %r4, #1;
|
||||||
ldi %r10, (`__WORD); /* ` is the log base 2 operator */
|
ldi %r10, (`__WORD); /* ` is the log base 2 operator */
|
||||||
shl %r4, %r4, %r3;
|
shl %r4, %r4, %r3;
|
||||||
add %r10, %r10, %r3;
|
add %r10, %r10, %r3;
|
||||||
ldi %r14, #1;
|
ldi %r14, #1;
|
||||||
shl %r14, %r14, %r10;
|
shl %r14, %r14, %r10;
|
||||||
|
|
||||||
divi %r17, %r14, THREADS; /* Spawn threads */
|
divi %r23, %r4, THREADS; /* Spawn threads */
|
||||||
sloop:
|
ldi %r18, THREADS
|
||||||
|
ldi %r19, #0
|
||||||
|
mul %r20, %r14, %r23;
|
||||||
|
|
||||||
jmpr %r5;
|
ori %r30, %r2, #0; /* Save r0 and r2 for thread 0 */
|
||||||
|
ori %r31, %r0, #0;
|
||||||
|
|
||||||
|
sloop: add %r0, %r0, %r20
|
||||||
|
add %r2, %r2, %r20
|
||||||
|
addi %r19, %r19, #1;
|
||||||
|
subi %r18, %r18, #1;
|
||||||
|
iszero @p0, %r18;
|
||||||
|
@p0 ? jmpi sfin;
|
||||||
|
clone %r19;
|
||||||
|
jmpi sloop;
|
||||||
|
|
||||||
|
sfin: ori %r2, %r30, #0; /* restore r1 and r2 */
|
||||||
|
ori %r0, %r31, #0;
|
||||||
|
jalis %r5, %r19, matmulthd;
|
||||||
|
|
||||||
|
jmpr %r22;
|
||||||
|
|
||||||
/* One thread of matrix multiplication. Expected register values at start:
|
/* One thread of matrix multiplication. Expected register values at start:
|
||||||
* %r0 - matrix a pointer (plus offset)
|
* %r0 - matrix a pointer (plus offset)
|
||||||
* %r1 - matrix b pointer
|
* %r1 - matrix b pointer
|
||||||
* %r2 - destination matrix pointer (plus offset)
|
* %r2 - destination matrix pointer (plus offset)
|
||||||
* %r17 - row count
|
* %r23 - row count
|
||||||
*/
|
*/
|
||||||
matmulthd: ldi %r9, #0; /* result row: %r9 */
|
matmulthd: ldi %r9, #0; /* result row: %r9 */
|
||||||
rloop: ldi %r6, #0; /* result col: %r6 */
|
rloop: ldi %r6, #0; /* result col: %r6 */
|
||||||
|
|
||||||
cloop: shli %r16, %r6, (`__WORD);
|
cloop: shli %r16, %r6, (`__WORD);
|
||||||
shl %r15, %r9, %r10;
|
shl %r15, %r9, %r10;
|
||||||
|
|
||||||
add %r11, %r15, %r0;
|
add %r11, %r15, %r0;
|
||||||
add %r12, %r16, %r1;
|
add %r12, %r16, %r1;
|
||||||
ldi %r13, #0;
|
|
||||||
|
|
||||||
ldi %r8, #0 /* dot prod position: %r8 */
|
ldi %r8, #0 /* dot prod position: %r8 */
|
||||||
|
ldi %r13, #0;
|
||||||
iloop: ld %r7, %r11, #0;
|
iloop: ld %r7, %r11, #0;
|
||||||
ld %r17, %r12, #0;
|
ld %r17, %r12, #0;
|
||||||
fmul %r7, %r7, %r17
|
fmul %r7, %r7, %r17
|
||||||
@@ -109,7 +130,7 @@ iloop: ld %r7, %r11, #0;
|
|||||||
@p0 ? jmpi cloop;
|
@p0 ? jmpi cloop;
|
||||||
|
|
||||||
addi %r9, %r9, #1;
|
addi %r9, %r9, #1;
|
||||||
sub %r7, %r9, %r17;
|
sub %r7, %r9, %r23;
|
||||||
rtop @p0, %r7;
|
rtop @p0, %r7;
|
||||||
@p0 ? jmpi rloop;
|
@p0 ? jmpi rloop;
|
||||||
|
|
||||||
@@ -117,8 +138,24 @@ iloop: ld %r7, %r11, #0;
|
|||||||
|
|
||||||
.align 4096
|
.align 4096
|
||||||
.perm rw
|
.perm rw
|
||||||
matrix_a: .space 64;
|
|
||||||
matrix_b: .space 64;
|
|
||||||
matrix_r: .space 64
|
matrix_r: .space 64
|
||||||
|
|
||||||
|
matrix_a: .word 1f 2f 3f 4f 5f 6f 7f 8f
|
||||||
|
.word 2f 3f 4f 5f 6f 7f 8f 9f
|
||||||
|
.word 3f 4f 5f 6f 7f 8f 9f 10f
|
||||||
|
.word 4f 5f 6f 7f 8f 9f 10f 11f
|
||||||
|
.word 5f 6f 7f 8f 9f 10f 11f 12f
|
||||||
|
.word 6f 7f 8f 9f 10f 11f 12f 13f
|
||||||
|
.word 7f 8f 9f 10f 11f 12f 13f 14f
|
||||||
|
.word 8f 9f 10f 11f 12f 13f 14f 15f
|
||||||
|
|
||||||
|
matrix_b: .word 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7
|
||||||
|
.word 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7
|
||||||
|
.word 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7
|
||||||
|
.word 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7
|
||||||
|
.word 4.0 4.1 4.2 4.3 4.4 4.5 4.6 4.7
|
||||||
|
.word 5.0 5.1 5.2 5.3 5.4 5.5 5.6 5.7
|
||||||
|
.word 6.0 6.1 6.2 6.3 6.4 6.5 6.6 6.7
|
||||||
|
.word 7.0 7.1 7.2 7.3 7.4 7.5 7.6 7.7
|
||||||
|
|
||||||
retaddr: .word 0
|
retaddr: .word 0
|
||||||
|
|||||||
Reference in New Issue
Block a user