From 6935d52c39c99c9c1638c3747f9cdfd9b96ef2f4 Mon Sep 17 00:00:00 2001 From: felsabbagh3 Date: Thu, 14 Feb 2019 13:41:59 -0500 Subject: [PATCH] MWMT tested + minor opt --- src/harptool.cpp | 1 + src/include/archdef.h | 2 +- src/results.txt | 48 +++- src/riscv_gpgpu/gpgpu_test.c | 11 +- src/riscv_gpgpu/gpgpu_test.dump | 427 ++++++++++++++++---------------- src/riscv_gpgpu/gpgpu_test.elf | Bin 9232 -> 9232 bytes src/riscv_gpgpu/gpgpu_test.hex | 107 ++++---- src/riscv_gpgpu/hi.sh | 2 +- src/riscv_gpgpu/lib/lib.c | 10 +- src/riscv_gpgpu/lib/lib.h | 2 +- 10 files changed, 315 insertions(+), 295 deletions(-) diff --git a/src/harptool.cpp b/src/harptool.cpp index 80e53981..2e450471 100644 --- a/src/harptool.cpp +++ b/src/harptool.cpp @@ -256,6 +256,7 @@ int emu_main(int argc, char **argv) { mu.attach(console, 1ll<<(arch.getWordSize()*8 - 1)); // mu.attach(console, 0xf0000000); + // core.w[0].pc = 0x8000007c; // If I want to start at a specific location std::cout << "ABOUT TO START\n"; while (core.running()) { console.poll(); core.step(); } diff --git a/src/include/archdef.h b/src/include/archdef.h index 9b25e1bb..b8599592 100644 --- a/src/include/archdef.h +++ b/src/include/archdef.h @@ -24,7 +24,7 @@ namespace Harp { nRegs = 32; nPRegs = 0; nThds = 8; - nWarps = 3; + nWarps = 8; extent = EXT_WARPS; diff --git a/src/results.txt b/src/results.txt index 2648f79e..ac49e2f8 100644 --- a/src/results.txt +++ b/src/results.txt @@ -3,21 +3,53 @@ start ABOUT TO START INTERRUPT ECALL/EBREAK INTERRUPT ECALL/EBREAK -Total steps: 274 -Total insts: 1561 +INTERRUPT ECALL/EBREAK +INTERRUPT ECALL/EBREAK +Total steps: 300 +Total insts: 1503 === Warp 0 === -Steps : 274 -Insts : 827 +Steps : 300 +Insts : 504 Loads : 0 -Stores: 177 +Stores: 112 GRADE: FAILED 0 === Warp 1 === -Steps : 181 -Insts : 734 +Steps : 129 +Insts : 333 Loads : 0 -Stores: 157 +Stores: 76 GRADE: FAILED 0 === Warp 2 === +Steps : 129 +Insts : 333 +Loads : 0 +Stores: 76 +GRADE: FAILED 0 +=== Warp 3 === +Steps : 129 +Insts : 333 +Loads : 0 +Stores: 76 +GRADE: FAILED 0 +=== Warp 4 === +Steps : 0 +Insts : 0 +Loads : 0 +Stores: 0 +GRADE: FAILED 0 +=== Warp 5 === +Steps : 0 +Insts : 0 +Loads : 0 +Stores: 0 +GRADE: FAILED 0 +=== Warp 6 === +Steps : 0 +Insts : 0 +Loads : 0 +Stores: 0 +GRADE: FAILED 0 +=== Warp 7 === Steps : 0 Insts : 0 Loads : 0 diff --git a/src/riscv_gpgpu/gpgpu_test.c b/src/riscv_gpgpu/gpgpu_test.c index 21859ab4..5afa8694 100644 --- a/src/riscv_gpgpu/gpgpu_test.c +++ b/src/riscv_gpgpu/gpgpu_test.c @@ -4,7 +4,7 @@ int main(void); -void matAddition (); +void matAddition (unsigned, unsigned); #include "./lib/lib.h" @@ -13,8 +13,8 @@ unsigned x[] = {1, 1, 6, 0, 3, 1, 1, 2, 0, 3, 6, 7, 5, 7, 7, 9}; unsigned y[] = {0, 2, 2, 0, 5, 0, 1, 1, 4, 2, 0, 0, 3, 2, 3, 2}; unsigned z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -#define NUM_WARPS 2 -#define NUM_THREADS 8 +#define NUM_WARPS 4 +#define NUM_THREADS 4 int main() { @@ -27,14 +27,13 @@ int main() } -void matAddition(unsigned tid) +void matAddition(unsigned tid, unsigned wid) { - unsigned wid = get_wid(); unsigned * x_ptr = get_1st_arg(); unsigned * y_ptr = get_2nd_arg(); unsigned * z_ptr = get_3rd_arg(); - unsigned i = (wid * 8) + tid; + unsigned i = (wid * NUM_THREADS) + tid; z_ptr[i] = x_ptr[i] + y_ptr[i]; } \ No newline at end of file diff --git a/src/riscv_gpgpu/gpgpu_test.dump b/src/riscv_gpgpu/gpgpu_test.dump index b9271e8d..d7e01d79 100644 --- a/src/riscv_gpgpu/gpgpu_test.dump +++ b/src/riscv_gpgpu/gpgpu_test.dump @@ -9,7 +9,7 @@ Disassembly of section .text: 80000004: 00112623 sw ra,12(sp) 80000008: 00812423 sw s0,8(sp) 8000000c: 01010413 addi s0,sp,16 -80000010: 354000ef jal ra,80000364 +80000010: 348000ef jal ra,80000358 80000014: 810007b7 lui a5,0x81000 80000018: 00078793 mv a5,a5 8000001c: 81000737 lui a4,0x81000 @@ -18,9 +18,9 @@ Disassembly of section .text: 80000028: 04068693 addi a3,a3,64 # 81000040 8000002c: 80000637 lui a2,0x80000 80000030: 05860613 addi a2,a2,88 # 80000058 -80000034: 00800593 li a1,8 -80000038: 00200513 li a0,2 -8000003c: 1e0000ef jal ra,8000021c +80000034: 00400593 li a1,4 +80000038: 00400513 li a0,4 +8000003c: 1d4000ef jal ra,80000210 80000040: 00000793 li a5,0 80000044: 00078513 mv a0,a5 80000048: 00c12083 lw ra,12(sp) @@ -29,225 +29,222 @@ Disassembly of section .text: 80000054: 00008067 ret 80000058 : -80000058: fc010113 addi sp,sp,-64 -8000005c: 02112e23 sw ra,60(sp) -80000060: 02812c23 sw s0,56(sp) -80000064: 04010413 addi s0,sp,64 -80000068: fca42623 sw a0,-52(s0) -8000006c: 258000ef jal ra,800002c4 -80000070: fea42623 sw a0,-20(s0) -80000074: 278000ef jal ra,800002ec -80000078: fea42423 sw a0,-24(s0) -8000007c: 298000ef jal ra,80000314 -80000080: fea42223 sw a0,-28(s0) -80000084: 2b8000ef jal ra,8000033c -80000088: fea42023 sw a0,-32(s0) -8000008c: fec42783 lw a5,-20(s0) -80000090: 00379793 slli a5,a5,0x3 -80000094: fcc42703 lw a4,-52(s0) -80000098: 00f707b3 add a5,a4,a5 -8000009c: fcf42e23 sw a5,-36(s0) -800000a0: fdc42783 lw a5,-36(s0) -800000a4: 00279793 slli a5,a5,0x2 -800000a8: fe842703 lw a4,-24(s0) -800000ac: 00f707b3 add a5,a4,a5 -800000b0: 0007a683 lw a3,0(a5) # 81000000 -800000b4: fdc42783 lw a5,-36(s0) -800000b8: 00279793 slli a5,a5,0x2 -800000bc: fe442703 lw a4,-28(s0) -800000c0: 00f707b3 add a5,a4,a5 -800000c4: 0007a703 lw a4,0(a5) -800000c8: fdc42783 lw a5,-36(s0) -800000cc: 00279793 slli a5,a5,0x2 -800000d0: fe042603 lw a2,-32(s0) -800000d4: 00f607b3 add a5,a2,a5 -800000d8: 00e68733 add a4,a3,a4 -800000dc: 00e7a023 sw a4,0(a5) -800000e0: 00000013 nop -800000e4: 03c12083 lw ra,60(sp) -800000e8: 03812403 lw s0,56(sp) -800000ec: 04010113 addi sp,sp,64 -800000f0: 00008067 ret +80000058: fd010113 addi sp,sp,-48 +8000005c: 02112623 sw ra,44(sp) +80000060: 02812423 sw s0,40(sp) +80000064: 03010413 addi s0,sp,48 +80000068: fca42e23 sw a0,-36(s0) +8000006c: fcb42c23 sw a1,-40(s0) +80000070: 270000ef jal ra,800002e0 +80000074: fea42623 sw a0,-20(s0) +80000078: 290000ef jal ra,80000308 +8000007c: fea42423 sw a0,-24(s0) +80000080: 2b0000ef jal ra,80000330 +80000084: fea42223 sw a0,-28(s0) +80000088: fd842783 lw a5,-40(s0) +8000008c: 00279793 slli a5,a5,0x2 +80000090: fdc42703 lw a4,-36(s0) +80000094: 00f707b3 add a5,a4,a5 +80000098: fef42023 sw a5,-32(s0) +8000009c: fe042783 lw a5,-32(s0) +800000a0: 00279793 slli a5,a5,0x2 +800000a4: fec42703 lw a4,-20(s0) +800000a8: 00f707b3 add a5,a4,a5 +800000ac: 0007a683 lw a3,0(a5) # 81000000 +800000b0: fe042783 lw a5,-32(s0) +800000b4: 00279793 slli a5,a5,0x2 +800000b8: fe842703 lw a4,-24(s0) +800000bc: 00f707b3 add a5,a4,a5 +800000c0: 0007a703 lw a4,0(a5) +800000c4: fe042783 lw a5,-32(s0) +800000c8: 00279793 slli a5,a5,0x2 +800000cc: fe442603 lw a2,-28(s0) +800000d0: 00f607b3 add a5,a2,a5 +800000d4: 00e68733 add a4,a3,a4 +800000d8: 00e7a023 sw a4,0(a5) +800000dc: 00000013 nop +800000e0: 02c12083 lw ra,44(sp) +800000e4: 02812403 lw s0,40(sp) +800000e8: 03010113 addi sp,sp,48 +800000ec: 00008067 ret -800000f4 : -800000f4: fb010113 addi sp,sp,-80 -800000f8: 04812623 sw s0,76(sp) -800000fc: 04912423 sw s1,72(sp) -80000100: 05212223 sw s2,68(sp) -80000104: 05312023 sw s3,64(sp) -80000108: 03412e23 sw s4,60(sp) -8000010c: 03a12c23 sw s10,56(sp) -80000110: 03b12a23 sw s11,52(sp) -80000114: 05010413 addi s0,sp,80 -80000118: fca42623 sw a0,-52(s0) -8000011c: fcb42423 sw a1,-56(s0) -80000120: fcc42223 sw a2,-60(s0) -80000124: fcd42023 sw a3,-64(s0) -80000128: fae42e23 sw a4,-68(s0) -8000012c: faf42c23 sw a5,-72(s0) -80000130: fc042903 lw s2,-64(s0) -80000134: fbc42983 lw s3,-68(s0) -80000138: fb842a03 lw s4,-72(s0) -8000013c: fc842483 lw s1,-56(s0) -80000140: 00010f13 mv t5,sp -80000144: 00100793 li a5,1 -80000148: fcf42e23 sw a5,-36(s0) -8000014c: 0200006f j 8000016c -80000150: fdc42503 lw a0,-36(s0) -80000154: fdc42303 lw t1,-36(s0) -80000158: f0010113 addi sp,sp,-256 -8000015c: 0003506b 0x3506b -80000160: fdc42783 lw a5,-36(s0) -80000164: 00178793 addi a5,a5,1 -80000168: fcf42e23 sw a5,-36(s0) -8000016c: fdc42703 lw a4,-36(s0) -80000170: fcc42783 lw a5,-52(s0) -80000174: fcf76ee3 bltu a4,a5,80000150 -80000178: 000f0113 mv sp,t5 -8000017c: 00000513 li a0,0 -80000180: fc442f83 lw t6,-60(s0) -80000184: fcc42d83 lw s11,-52(s0) -80000188: 01bfe0eb 0x1bfe0eb -8000018c: 00000073 ecall -80000190: 00000013 nop -80000194: 04c12403 lw s0,76(sp) -80000198: 04812483 lw s1,72(sp) -8000019c: 04412903 lw s2,68(sp) -800001a0: 04012983 lw s3,64(sp) -800001a4: 03c12a03 lw s4,60(sp) -800001a8: 03812d03 lw s10,56(sp) -800001ac: 03412d83 lw s11,52(sp) -800001b0: 05010113 addi sp,sp,80 -800001b4: 00008067 ret +800000f0 : +800000f0: fb010113 addi sp,sp,-80 +800000f4: 04812623 sw s0,76(sp) +800000f8: 05212423 sw s2,72(sp) +800000fc: 05312223 sw s3,68(sp) +80000100: 05412023 sw s4,64(sp) +80000104: 03a12e23 sw s10,60(sp) +80000108: 03b12c23 sw s11,56(sp) +8000010c: 05010413 addi s0,sp,80 +80000110: fca42623 sw a0,-52(s0) +80000114: fcb42423 sw a1,-56(s0) +80000118: fcc42223 sw a2,-60(s0) +8000011c: fcd42023 sw a3,-64(s0) +80000120: fae42e23 sw a4,-68(s0) +80000124: faf42c23 sw a5,-72(s0) +80000128: fc042903 lw s2,-64(s0) +8000012c: fbc42983 lw s3,-68(s0) +80000130: fb842a03 lw s4,-72(s0) +80000134: fc842583 lw a1,-56(s0) +80000138: 00010f13 mv t5,sp +8000013c: 00100793 li a5,1 +80000140: fcf42e23 sw a5,-36(s0) +80000144: 0200006f j 80000164 +80000148: fdc42503 lw a0,-36(s0) +8000014c: fdc42303 lw t1,-36(s0) +80000150: f0010113 addi sp,sp,-256 +80000154: 0003506b 0x3506b +80000158: fdc42783 lw a5,-36(s0) +8000015c: 00178793 addi a5,a5,1 +80000160: fcf42e23 sw a5,-36(s0) +80000164: fdc42703 lw a4,-36(s0) +80000168: fcc42783 lw a5,-52(s0) +8000016c: fcf76ee3 bltu a4,a5,80000148 +80000170: 000f0113 mv sp,t5 +80000174: 00000513 li a0,0 +80000178: fc442f83 lw t6,-60(s0) +8000017c: fcc42d83 lw s11,-52(s0) +80000180: 01bfe0eb 0x1bfe0eb +80000184: 00000073 ecall +80000188: 00000013 nop +8000018c: 04c12403 lw s0,76(sp) +80000190: 04812903 lw s2,72(sp) +80000194: 04412983 lw s3,68(sp) +80000198: 04012a03 lw s4,64(sp) +8000019c: 03c12d03 lw s10,60(sp) +800001a0: 03812d83 lw s11,56(sp) +800001a4: 05010113 addi sp,sp,80 +800001a8: 00008067 ret -800001b8 : -800001b8: fd010113 addi sp,sp,-48 -800001bc: 02812623 sw s0,44(sp) -800001c0: 03010413 addi s0,sp,48 -800001c4: fea42623 sw a0,-20(s0) -800001c8: feb42423 sw a1,-24(s0) -800001cc: fec42223 sw a2,-28(s0) -800001d0: fed42023 sw a3,-32(s0) -800001d4: fce42e23 sw a4,-36(s0) -800001d8: fcf42c23 sw a5,-40(s0) -800001dc: fd842383 lw t2,-40(s0) -800001e0: 800007b7 lui a5,0x80000 -800001e4: 0f478793 addi a5,a5,244 # 800000f4 -800001e8: 00078313 mv t1,a5 -800001ec: fec42503 lw a0,-20(s0) -800001f0: fe842583 lw a1,-24(s0) -800001f4: fe442783 lw a5,-28(s0) -800001f8: 00078613 mv a2,a5 -800001fc: fe042683 lw a3,-32(s0) -80000200: fdc42703 lw a4,-36(s0) -80000204: 00038793 mv a5,t2 -80000208: 0003006b 0x3006b -8000020c: 00000013 nop -80000210: 02c12403 lw s0,44(sp) -80000214: 03010113 addi sp,sp,48 -80000218: 00008067 ret +800001ac : +800001ac: fd010113 addi sp,sp,-48 +800001b0: 02812623 sw s0,44(sp) +800001b4: 03010413 addi s0,sp,48 +800001b8: fea42623 sw a0,-20(s0) +800001bc: feb42423 sw a1,-24(s0) +800001c0: fec42223 sw a2,-28(s0) +800001c4: fed42023 sw a3,-32(s0) +800001c8: fce42e23 sw a4,-36(s0) +800001cc: fcf42c23 sw a5,-40(s0) +800001d0: fd842383 lw t2,-40(s0) +800001d4: 800007b7 lui a5,0x80000 +800001d8: 0f078793 addi a5,a5,240 # 800000f0 +800001dc: 00078313 mv t1,a5 +800001e0: fec42503 lw a0,-20(s0) +800001e4: fe842583 lw a1,-24(s0) +800001e8: fe442783 lw a5,-28(s0) +800001ec: 00078613 mv a2,a5 +800001f0: fe042683 lw a3,-32(s0) +800001f4: fdc42703 lw a4,-36(s0) +800001f8: 00038793 mv a5,t2 +800001fc: 0003006b 0x3006b +80000200: 00000013 nop +80000204: 02c12403 lw s0,44(sp) +80000208: 03010113 addi sp,sp,48 +8000020c: 00008067 ret -8000021c : -8000021c: fc010113 addi sp,sp,-64 -80000220: 02112e23 sw ra,60(sp) -80000224: 02812c23 sw s0,56(sp) -80000228: 04010413 addi s0,sp,64 -8000022c: fca42e23 sw a0,-36(s0) -80000230: fcb42c23 sw a1,-40(s0) -80000234: fcc42a23 sw a2,-44(s0) -80000238: fcd42823 sw a3,-48(s0) -8000023c: fce42623 sw a4,-52(s0) -80000240: fcf42423 sw a5,-56(s0) -80000244: 00010f13 mv t5,sp -80000248: 00100793 li a5,1 -8000024c: fef42623 sw a5,-20(s0) -80000250: 0300006f j 80000280 -80000254: 80010113 addi sp,sp,-2048 -80000258: fc842783 lw a5,-56(s0) -8000025c: fcc42703 lw a4,-52(s0) -80000260: fd042683 lw a3,-48(s0) -80000264: fd442603 lw a2,-44(s0) -80000268: fec42583 lw a1,-20(s0) -8000026c: fd842503 lw a0,-40(s0) -80000270: f49ff0ef jal ra,800001b8 -80000274: fec42783 lw a5,-20(s0) -80000278: 00178793 addi a5,a5,1 -8000027c: fef42623 sw a5,-20(s0) -80000280: fec42703 lw a4,-20(s0) -80000284: fdc42783 lw a5,-36(s0) -80000288: fcf766e3 bltu a4,a5,80000254 -8000028c: 000f0113 mv sp,t5 -80000290: fd442603 lw a2,-44(s0) -80000294: fc842783 lw a5,-56(s0) -80000298: fcc42703 lw a4,-52(s0) -8000029c: fd042683 lw a3,-48(s0) -800002a0: 00000593 li a1,0 -800002a4: fd842503 lw a0,-40(s0) -800002a8: e4dff0ef jal ra,800000f4 -800002ac: 00000073 ecall -800002b0: 00000013 nop -800002b4: 03c12083 lw ra,60(sp) -800002b8: 03812403 lw s0,56(sp) -800002bc: 04010113 addi sp,sp,64 -800002c0: 00008067 ret +80000210 : +80000210: fc010113 addi sp,sp,-64 +80000214: 02112e23 sw ra,60(sp) +80000218: 02812c23 sw s0,56(sp) +8000021c: 04010413 addi s0,sp,64 +80000220: fca42e23 sw a0,-36(s0) +80000224: fcb42c23 sw a1,-40(s0) +80000228: fcc42a23 sw a2,-44(s0) +8000022c: fcd42823 sw a3,-48(s0) +80000230: fce42623 sw a4,-52(s0) +80000234: fcf42423 sw a5,-56(s0) +80000238: 00010f13 mv t5,sp +8000023c: 00100793 li a5,1 +80000240: fef42623 sw a5,-20(s0) +80000244: 0300006f j 80000274 +80000248: 80010113 addi sp,sp,-2048 +8000024c: fc842783 lw a5,-56(s0) +80000250: fcc42703 lw a4,-52(s0) +80000254: fd042683 lw a3,-48(s0) +80000258: fd442603 lw a2,-44(s0) +8000025c: fec42583 lw a1,-20(s0) +80000260: fd842503 lw a0,-40(s0) +80000264: f49ff0ef jal ra,800001ac +80000268: fec42783 lw a5,-20(s0) +8000026c: 00178793 addi a5,a5,1 +80000270: fef42623 sw a5,-20(s0) +80000274: fec42703 lw a4,-20(s0) +80000278: fdc42783 lw a5,-36(s0) +8000027c: fcf766e3 bltu a4,a5,80000248 +80000280: 000f0113 mv sp,t5 +80000284: fd442603 lw a2,-44(s0) +80000288: fc842783 lw a5,-56(s0) +8000028c: fcc42703 lw a4,-52(s0) +80000290: fd042683 lw a3,-48(s0) +80000294: 00000593 li a1,0 +80000298: fd842503 lw a0,-40(s0) +8000029c: e55ff0ef jal ra,800000f0 +800002a0: 00000073 ecall +800002a4: 00000013 nop +800002a8: 03c12083 lw ra,60(sp) +800002ac: 03812403 lw s0,56(sp) +800002b0: 04010113 addi sp,sp,64 +800002b4: 00008067 ret -800002c4 : -800002c4: ff010113 addi sp,sp,-16 -800002c8: 00812623 sw s0,12(sp) -800002cc: 00912423 sw s1,8(sp) -800002d0: 01010413 addi s0,sp,16 -800002d4: 00048793 mv a5,s1 -800002d8: 00078513 mv a0,a5 -800002dc: 00c12403 lw s0,12(sp) -800002e0: 00812483 lw s1,8(sp) -800002e4: 01010113 addi sp,sp,16 -800002e8: 00008067 ret +800002b8 : +800002b8: ff010113 addi sp,sp,-16 +800002bc: 00812623 sw s0,12(sp) +800002c0: 00912423 sw s1,8(sp) +800002c4: 01010413 addi s0,sp,16 +800002c8: 00048793 mv a5,s1 +800002cc: 00078513 mv a0,a5 +800002d0: 00c12403 lw s0,12(sp) +800002d4: 00812483 lw s1,8(sp) +800002d8: 01010113 addi sp,sp,16 +800002dc: 00008067 ret -800002ec : -800002ec: ff010113 addi sp,sp,-16 -800002f0: 00812623 sw s0,12(sp) -800002f4: 01212423 sw s2,8(sp) -800002f8: 01010413 addi s0,sp,16 -800002fc: 00090793 mv a5,s2 -80000300: 00078513 mv a0,a5 -80000304: 00c12403 lw s0,12(sp) -80000308: 00812903 lw s2,8(sp) -8000030c: 01010113 addi sp,sp,16 -80000310: 00008067 ret +800002e0 : +800002e0: ff010113 addi sp,sp,-16 +800002e4: 00812623 sw s0,12(sp) +800002e8: 01212423 sw s2,8(sp) +800002ec: 01010413 addi s0,sp,16 +800002f0: 00090793 mv a5,s2 +800002f4: 00078513 mv a0,a5 +800002f8: 00c12403 lw s0,12(sp) +800002fc: 00812903 lw s2,8(sp) +80000300: 01010113 addi sp,sp,16 +80000304: 00008067 ret -80000314 : -80000314: ff010113 addi sp,sp,-16 -80000318: 00812623 sw s0,12(sp) -8000031c: 01312423 sw s3,8(sp) -80000320: 01010413 addi s0,sp,16 -80000324: 00098793 mv a5,s3 -80000328: 00078513 mv a0,a5 -8000032c: 00c12403 lw s0,12(sp) -80000330: 00812983 lw s3,8(sp) -80000334: 01010113 addi sp,sp,16 -80000338: 00008067 ret +80000308 : +80000308: ff010113 addi sp,sp,-16 +8000030c: 00812623 sw s0,12(sp) +80000310: 01312423 sw s3,8(sp) +80000314: 01010413 addi s0,sp,16 +80000318: 00098793 mv a5,s3 +8000031c: 00078513 mv a0,a5 +80000320: 00c12403 lw s0,12(sp) +80000324: 00812983 lw s3,8(sp) +80000328: 01010113 addi sp,sp,16 +8000032c: 00008067 ret -8000033c : -8000033c: ff010113 addi sp,sp,-16 -80000340: 00812623 sw s0,12(sp) -80000344: 01412423 sw s4,8(sp) -80000348: 01010413 addi s0,sp,16 -8000034c: 000a0793 mv a5,s4 -80000350: 00078513 mv a0,a5 -80000354: 00c12403 lw s0,12(sp) -80000358: 00812a03 lw s4,8(sp) -8000035c: 01010113 addi sp,sp,16 -80000360: 00008067 ret +80000330 : +80000330: ff010113 addi sp,sp,-16 +80000334: 00812623 sw s0,12(sp) +80000338: 01412423 sw s4,8(sp) +8000033c: 01010413 addi s0,sp,16 +80000340: 000a0793 mv a5,s4 +80000344: 00078513 mv a0,a5 +80000348: 00c12403 lw s0,12(sp) +8000034c: 00812a03 lw s4,8(sp) +80000350: 01010113 addi sp,sp,16 +80000354: 00008067 ret -80000364 : -80000364: ff010113 addi sp,sp,-16 -80000368: 00812623 sw s0,12(sp) -8000036c: 01010413 addi s0,sp,16 -80000370: 7ffff137 lui sp,0x7ffff -80000374: 00000013 nop -80000378: 00c12403 lw s0,12(sp) # 7ffff00c -8000037c: 01010113 addi sp,sp,16 -80000380: 00008067 ret +80000358 : +80000358: ff010113 addi sp,sp,-16 +8000035c: 00812623 sw s0,12(sp) +80000360: 01010413 addi s0,sp,16 +80000364: 7ffff137 lui sp,0x7ffff +80000368: 00000013 nop +8000036c: 00c12403 lw s0,12(sp) # 7ffff00c +80000370: 01010113 addi sp,sp,16 +80000374: 00008067 ret Disassembly of section .bss: diff --git a/src/riscv_gpgpu/gpgpu_test.elf b/src/riscv_gpgpu/gpgpu_test.elf index 9b99135a3682eb7b78ec7b2d7e3ae0e422c49a3f..2448e0b532a9fc960fe91b56d58fcf83f208d30d 100755 GIT binary patch delta 377 zcmYk0Jxjw-6o${qO=uO1P2*OiNls0y{h*@aB9cOf_8*9gi{M|#WL6t;xYjDrujT8d8>RbZB9F&DGm(4 zndeSAyFVgJl_>8^nv9GpogtNOhXVkctWG3Yp!|eIGCNUVAB^~k4a|NTg^6z<{ZlwO zid{5z!9@c8+`+`L%YV+Q3-<*phwnh`Y9BNJ2=t!B#!n!!Jr6+ehb&*FyrGb}tdL$) zsB+Gz#4~CU7x<>St+9eH2Zfr? z8o9N!1m2Z6m;r=};0J*B5a$iSSJ5xVxgoeE`cr;pWUxHXk#s@?R@3(LP}FX!{j#^ad&bc-kGMrb|^q*uDcG{KA6E&^%^n=rU>> zGTJSQl{1PZo>a=jPx# diff --git a/src/riscv_gpgpu/gpgpu_test.hex b/src/riscv_gpgpu/gpgpu_test.hex index de2a0ba7..87df92ef 100644 --- a/src/riscv_gpgpu/gpgpu_test.hex +++ b/src/riscv_gpgpu/gpgpu_test.hex @@ -1,61 +1,60 @@ :0200000480007A :10000000130101FF232611002324810013040101A1 -:10001000EF004035B707008193870700370700815D +:10001000EF008034B707008193870700370700811E :1000200013070708B7060081938606043706008089 -:10003000130686059305800013052000EF00001EBF +:10003000130686059305400013054000EF00401DA0 :1000400093070000138507008320C100032481006B -:100050001301010167800000130101FC232E11022E -:10006000232C8102130401042326A4FCEF00802525 -:100070002326A4FEEF0080272324A4FEEF0080297E -:100080002322A4FEEF00802B2320A4FE8327C4FE9E -:10009000939737000327C4FCB307F700232EF4FC23 -:1000A0008327C4FD93972700032784FEB307F70037 -:1000B00083A607008327C4FD93972700032744FEE8 -:1000C000B307F70003A707008327C4FD9397270012 -:1000D000032604FEB307F6003387E60023A0E700FB -:1000E000130000008320C1030324810313010104D2 -:1000F00067800000130101FB23268104232491045F -:100100002322210523203105232E4103232CA10383 -:10011000232AB103130401052326A4FC2324B4FCE1 -:100120002322C4FC2320D4FC232EE4FA232CF4FA4B -:10013000032904FC8329C4FB032A84FB832484FC55 -:10014000130F010093071000232EF4FC6F00000230 -:100150000325C4FD0323C4FD130101F06B5003000C -:100160008327C4FD93871700232EF4FC0327C4FDC7 -:100170008327C4FCE36EF7FC13010F001305000096 -:10018000832F44FC832DC4FCEBE0BF01730000000F -:10019000130000000324C1048324810403294104C3 -:1001A00083290104032AC103032D8103832D410305 -:1001B0001301010567800000130101FD2326810260 -:1001C000130401032326A4FE2324B4FE2322C4FE29 -:1001D0002320D4FE232EE4FC232CF4FC832384FD73 -:1001E000B70700809387470F138307000325C4FEDA -:1001F000832584FE832744FE13860700832604FE9E -:100200000327C4FD938703006B0003001300000065 -:100210000324C1021301010367800000130101FCE4 -:10022000232E1102232C810213040104232EA4FC8B -:10023000232CB4FC232AC4FC2328D4FC2326E4FC6E -:100240002324F4FC130F0100930710002326F4FE6F -:100250006F00000313010180832784FC0327C4FC83 -:10026000832604FD032644FD8325C4FE032584FD67 -:10027000EFF09FF48327C4FE938717002326F4FE34 -:100280000327C4FE8327C4FDE366F7FC13010F00B8 -:10029000032644FD832784FC0327C4FC832604FD36 -:1002A00093050000032584FDEFF0DFE473000000F8 -:1002B000130000008320C103032481031301010400 -:1002C00067800000130101FF232681002324910091 -:1002D0001304010193870400138507000324C10060 -:1002E000832481001301010167800000130101FFD5 -:1002F000232681002324210113040101930709000F -:10030000138507000324C1000329810013010101A3 -:1003100067800000130101FF23268100232431019F -:100320001304010193870900138507000324C1000A -:10033000832981001301010167800000130101FF7F -:1003400023268100232441011304010193070A009D -:10035000138507000324C100032A81001301010152 -:1003600067800000130101FF2326810013040101AF -:1003700037F1FF7F130000000324C10013010101C6 -:040380006780000092 +:100050001301010167800000130101FD2326110235 +:100060002324810213040103232EA4FC232CB4FCBB +:10007000EF0000272326A4FEEF0000292324A4FE7E +:10008000EF00002B2322A4FE832784FD93972700F3 +:100090000327C4FDB307F7002320F4FE832704FEE3 +:1000A000939727000327C4FEB307F70083A6070032 +:1000B000832704FE93972700032784FEB307F700E6 +:1000C00003A70700832704FE93972700032644FE17 +:1000D000B307F6003387E60023A0E7001300000013 +:1000E0008320C10203248102130101036780000001 +:1000F000130101FB2326810423242105232231053A +:1001000023204105232EA103232CB1031304010551 +:100110002326A4FC2324B4FC2322C4FC2320D4FCE7 +:10012000232EE4FA232CF4FA032904FC8329C4FBCC +:10013000032A84FB832584FC130F0100930710001E +:10014000232EF4FC6F0000020325C4FD0323C4FD2D +:10015000130101F06B5003008327C4FD9387170040 +:10016000232EF4FC0327C4FD8327C4FCE36EF7FCB5 +:1001700013010F0013050000832F44FC832DC4FCE2 +:10018000EBE0BF0173000000130000000324C10472 +:100190000329810483294104032A0104032DC10397 +:1001A000832D81031301010567800000130101FD08 +:1001B00023268102130401032326A4FE2324B4FE74 +:1001C0002322C4FE2320D4FE232EE4FC232CF4FCA3 +:1001D000832384FDB70700809387070F13830700ED +:1001E0000325C4FE832584FE832744FE138607006F +:1001F000832604FE0327C4FD938703006B000300DE +:10020000130000000324C1021301010367800000F2 +:10021000130101FC232E1102232C8102130401047B +:10022000232EA4FC232CB4FC232AC4FC2328D4FCB6 +:100230002326E4FC2324F4FC130F01009307100091 +:100240002326F4FE6F00000313010180832784FC42 +:100250000327C4FC832604FD032644FD8325C4FE36 +:10026000032584FDEFF09FF48327C4FE93871700D6 +:100270002326F4FE0327C4FE8327C4FDE366F7FCB0 +:1002800013010F00032644FD832784FC0327C4FCCD +:10029000832604FD93050000032584FDEFF05FE550 +:1002A00073000000130000008320C10303248103B6 +:1002B0001301010467800000130101FF2326810060 +:1002C0002324910013040101938704001385070080 +:1002D0000324C10083248100130101016780000011 +:1002E000130101FF232681002324210113040101AE +:1002F00093070900138507000324C1000329810027 +:100300001301010167800000130101FF2326810012 +:100310002324310113040101938709001385070089 +:100320000324C100832981001301010167800000BB +:10033000130101FF2326810023244101130401013D +:1003400093070A00138507000324C100032A8100D4 +:100350001301010167800000130101FF23268100C2 +:100360001304010137F1FF7F130000000324C100D3 +:08037000130101016780000088 :02000004810079 :1000400001000000010000000600000000000000A8 :100050000300000001000000010000000200000099 diff --git a/src/riscv_gpgpu/hi.sh b/src/riscv_gpgpu/hi.sh index df2a2ab8..355ad269 100755 --- a/src/riscv_gpgpu/hi.sh +++ b/src/riscv_gpgpu/hi.sh @@ -1,3 +1,3 @@ - /opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-gcc -march=rv32i -mabi=ilp32 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib gpgpu_test.c ./lib/lib.c -o gpgpu_test.elf + /opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-gcc -march=rv32i -mabi=ilp32 -O0 -Wl,-Bstatic,-T,linker.ld -ffreestanding -nostdlib gpgpu_test.c ./lib/lib.c -o gpgpu_test.elf /opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-objdump -D gpgpu_test.elf > gpgpu_test.dump /opt/riscv-nommu/bin/riscv32-unknown-linux-gnu-objcopy -O ihex gpgpu_test.elf gpgpu_test.hex \ No newline at end of file diff --git a/src/riscv_gpgpu/lib/lib.c b/src/riscv_gpgpu/lib/lib.c index ad932348..0660dc9e 100644 --- a/src/riscv_gpgpu/lib/lib.c +++ b/src/riscv_gpgpu/lib/lib.c @@ -10,7 +10,7 @@ void createThreads(unsigned num_threads, unsigned wid, unsigned func_addr, unsig register unsigned *xx asm("s2") = x_ptr; register unsigned *yy asm("s3") = y_ptr; register unsigned *zz asm("s4") = z_ptr; - register unsigned wid_ asm("s1") = wid; + register unsigned wid_ asm("a1") = wid; asm __volatile__("addi t5, sp, 0"); for (unsigned i = 1; i < num_threads; i++) @@ -79,14 +79,6 @@ void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned * x_pt } - - - - - - - - unsigned get_wid() { register unsigned ret asm("s1"); diff --git a/src/riscv_gpgpu/lib/lib.h b/src/riscv_gpgpu/lib/lib.h index bb2c6c65..95c17fc5 100644 --- a/src/riscv_gpgpu/lib/lib.h +++ b/src/riscv_gpgpu/lib/lib.h @@ -10,7 +10,7 @@ #define ECALL asm __volatile__(".word 0x00000073") -#define FUNC void (func)(unsigned) +#define FUNC void (func)(unsigned, unsigned) void createWarps(unsigned num_Warps, unsigned num_threads, FUNC, unsigned *, unsigned *, unsigned *); unsigned get_wid();