diff --git a/benchmarks/opencl/bfs/Makefile b/benchmarks/opencl/bfs/Makefile index d46eb9bb..a8250c8b 100644 --- a/benchmarks/opencl/bfs/Makefile +++ b/benchmarks/opencl/bfs/Makefile @@ -52,7 +52,7 @@ clean: rm -rf $(PROJECT) *.o .depend clean-all: clean - rm *.pocl *.dump + rm -rf *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/convolution/Makefile b/benchmarks/opencl/convolution/Makefile index c73544aa..79e30d90 100644 --- a/benchmarks/opencl/convolution/Makefile +++ b/benchmarks/opencl/convolution/Makefile @@ -52,7 +52,7 @@ clean: rm -rf $(PROJECT) *.o .depend clean-all: clean - rm *.pocl *.dump + rm -rf *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/guassian/Fan1.dump b/benchmarks/opencl/guassian/Fan1.dump index b7a24155..08e27451 100644 --- a/benchmarks/opencl/guassian/Fan1.dump +++ b/benchmarks/opencl/guassian/Fan1.dump @@ -1,28 +1,28 @@ -/tmp/pocl_vortex_kernel-4d-57-ef-63-3a.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-ad-b4-ee-03-0c.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 05 00 00 auipc a1, 0 -80000004: 93 85 45 6b addi a1, a1, 1716 +80000004: 93 85 c5 66 addi a1, a1, 1644 80000008: 73 25 60 02 csrr a0, 38 8000000c: 6b 10 b5 00 -80000010: ef 00 40 6a jal 1700 +80000010: ef 00 c0 65 jal 1628 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 13 85 c1 c2 addi a0, gp, -980 -80000020: 13 86 01 c3 addi a2, gp, -976 +80000020: 13 86 c1 c4 addi a2, gp, -948 80000024: 33 06 a6 40 sub a2, a2, a0 80000028: 93 05 00 00 mv a1, zero -8000002c: ef 00 d0 07 jal 2172 +8000002c: ef 00 50 03 jal 2100 80000030: 17 05 00 00 auipc a0, 0 -80000034: 13 05 05 78 addi a0, a0, 1920 -80000038: ef 00 40 73 jal 1844 -8000003c: ef 00 00 7d jal 2000 -80000040: ef 00 80 3c jal 968 -80000044: 6f 00 c0 73 j 1852 +80000034: 13 05 85 73 addi a0, a0, 1848 +80000038: ef 00 c0 6e jal 1772 +8000003c: ef 00 80 78 jal 1928 +80000040: ef 00 00 38 jal 896 +80000044: 6f 00 40 6f j 1780 Disassembly of section .text: @@ -30,8 +30,8 @@ Disassembly of section .text: 80000048: 93 07 00 00 mv a5, zero 8000004c: 63 88 07 00 beqz a5, 16 80000050: 37 05 00 80 lui a0, 524288 -80000054: 13 05 05 7b addi a0, a0, 1968 -80000058: 6f 00 40 71 j 1812 +80000054: 13 05 85 76 addi a0, a0, 1896 +80000058: 6f 00 c0 6c j 1740 8000005c: 67 80 00 00 ret 80000060 kernel_spawn_run_warp: @@ -46,116 +46,116 @@ Disassembly of section .text: 80000080: 23 28 61 01 sw s6, 16(sp) 80000084: 23 26 71 01 sw s7, 12(sp) 80000088: 23 24 81 01 sw s8, 8(sp) -8000008c: ef 00 80 6b jal 1720 -80000090: ef 00 c0 66 jal 1644 -80000094: ef 00 00 6b jal 1712 -80000098: 83 a5 c1 c2 lw a1, -980(gp) -8000009c: 83 a5 05 00 lw a1, 0(a1) -800000a0: 83 aa 05 00 lw s5, 0(a1) -800000a4: 03 ab 45 00 lw s6, 4(a1) +8000008c: ef 00 00 67 jal 1648 +80000090: ef 00 40 62 jal 1572 +80000094: ef 00 00 66 jal 1632 +80000098: 93 04 05 00 mv s1, a0 +8000009c: ef 00 80 63 jal 1592 +800000a0: 93 09 05 00 mv s3, a0 +800000a4: ef 00 00 64 jal 1600 800000a8: 13 09 05 00 mv s2, a0 -800000ac: ef 00 00 67 jal 1648 -800000b0: 93 09 05 00 mv s3, a0 -800000b4: ef 00 80 67 jal 1656 -800000b8: 03 a8 c1 c2 lw a6, -980(gp) -800000bc: 03 27 48 01 lw a4, 20(a6) -800000c0: 83 25 08 01 lw a1, 16(a6) -800000c4: 93 06 07 00 mv a3, a4 -800000c8: 63 44 37 01 blt a4, s3, 8 -800000cc: 93 86 09 00 mv a3, s3 -800000d0: 33 a7 e9 00 slt a4, s3, a4 -800000d4: 33 87 e5 00 add a4, a1, a4 -800000d8: 93 07 10 00 addi a5, zero, 1 -800000dc: 63 46 f7 08 blt a4, a5, 140 -800000e0: 33 0a 5b 03 mul s4, s6, s5 -800000e4: 83 27 c8 00 lw a5, 12(a6) -800000e8: 13 0c f7 ff addi s8, a4, -1 -800000ec: b3 85 b9 02 mul a1, s3, a1 -800000f0: b3 85 b6 00 add a1, a3, a1 -800000f4: b3 05 b9 02 mul a1, s2, a1 -800000f8: b3 85 b7 00 add a1, a5, a1 -800000fc: 33 05 e5 02 mul a0, a0, a4 -80000100: b3 84 a5 00 add s1, a1, a0 -80000104: 33 09 60 41 neg s2, s6 -80000108: 33 0b 40 41 neg s6, s4 -8000010c: 33 c7 44 03 div a4, s1, s4 -80000110: 83 25 08 00 lw a1, 0(a6) -80000114: 33 05 eb 02 mul a0, s6, a4 -80000118: 33 85 a4 00 add a0, s1, a0 -8000011c: b3 46 55 03 div a3, a0, s5 -80000120: 03 a5 c5 00 lw a0, 12(a1) -80000124: b3 07 e9 02 mul a5, s2, a4 -80000128: b3 87 d7 40 sub a5, a5, a3 -8000012c: b3 87 fa 02 mul a5, s5, a5 -80000130: b3 08 f5 00 add a7, a0, a5 -80000134: 03 a4 05 01 lw s0, 16(a1) -80000138: 03 a6 45 01 lw a2, 20(a1) -8000013c: 83 27 48 00 lw a5, 4(a6) -80000140: 03 25 88 00 lw a0, 8(a6) -80000144: b3 06 d4 00 add a3, s0, a3 -80000148: 33 07 e6 00 add a4, a2, a4 -8000014c: 33 86 14 01 add a2, s1, a7 -80000150: e7 80 07 00 jalr a5 -80000154: 63 0a 0c 00 beqz s8, 20 -80000158: 03 a8 c1 c2 lw a6, -980(gp) -8000015c: 13 0c fc ff addi s8, s8, -1 -80000160: 93 84 14 00 addi s1, s1, 1 -80000164: 6f f0 9f fa j -88 -80000168: 13 b5 19 00 seqz a0, s3 -8000016c: 03 2c 81 00 lw s8, 8(sp) -80000170: 83 2b c1 00 lw s7, 12(sp) -80000174: 03 2b 01 01 lw s6, 16(sp) -80000178: 83 2a 41 01 lw s5, 20(sp) -8000017c: 03 2a 81 01 lw s4, 24(sp) -80000180: 83 29 c1 01 lw s3, 28(sp) -80000184: 03 29 01 02 lw s2, 32(sp) -80000188: 83 24 41 02 lw s1, 36(sp) -8000018c: 03 24 81 02 lw s0, 40(sp) -80000190: 83 20 c1 02 lw ra, 44(sp) -80000194: 13 01 01 03 addi sp, sp, 48 -80000198: 6f 00 40 56 j 1380 +800000ac: ef 00 00 65 jal 1616 +800000b0: 93 85 c1 c2 addi a1, gp, -980 +800000b4: 13 96 24 00 slli a2, s1, 2 +800000b8: b3 05 b6 00 add a1, a2, a1 +800000bc: 03 ab 05 00 lw s6, 0(a1) +800000c0: 83 25 4b 01 lw a1, 20(s6) +800000c4: 03 26 0b 01 lw a2, 16(s6) +800000c8: 93 86 05 00 mv a3, a1 +800000cc: 63 c4 35 01 blt a1, s3, 8 +800000d0: 93 86 09 00 mv a3, s3 +800000d4: b3 a5 b9 00 slt a1, s3, a1 +800000d8: 33 07 b6 00 add a4, a2, a1 +800000dc: 93 05 10 00 addi a1, zero, 1 +800000e0: 63 4a b7 08 blt a4, a1, 148 +800000e4: 83 25 0b 00 lw a1, 0(s6) +800000e8: 83 aa 05 00 lw s5, 0(a1) +800000ec: 83 a7 45 00 lw a5, 4(a1) +800000f0: 83 24 cb 00 lw s1, 12(s6) +800000f4: 33 8a 57 03 mul s4, a5, s5 +800000f8: 13 0c f7 ff addi s8, a4, -1 +800000fc: 33 86 c9 02 mul a2, s3, a2 +80000100: 33 86 c6 00 add a2, a3, a2 +80000104: 33 05 c5 02 mul a0, a0, a2 +80000108: 33 85 a4 00 add a0, s1, a0 +8000010c: 33 06 e9 02 mul a2, s2, a4 +80000110: b3 04 c5 00 add s1, a0, a2 +80000114: 33 09 f0 40 neg s2, a5 +80000118: b3 0b 40 41 neg s7, s4 +8000011c: 33 c6 44 03 div a2, s1, s4 +80000120: 33 85 cb 02 mul a0, s7, a2 +80000124: 33 85 a4 00 add a0, s1, a0 +80000128: b3 46 55 03 div a3, a0, s5 +8000012c: 03 a5 c5 00 lw a0, 12(a1) +80000130: 33 07 c9 02 mul a4, s2, a2 +80000134: 33 07 d7 40 sub a4, a4, a3 +80000138: 33 87 ea 02 mul a4, s5, a4 +8000013c: 33 08 e5 00 add a6, a0, a4 +80000140: 03 a7 05 01 lw a4, 16(a1) +80000144: 03 a4 45 01 lw s0, 20(a1) +80000148: 83 27 4b 00 lw a5, 4(s6) +8000014c: 03 25 8b 00 lw a0, 8(s6) +80000150: b3 06 d7 00 add a3, a4, a3 +80000154: 33 07 c4 00 add a4, s0, a2 +80000158: 33 86 04 01 add a2, s1, a6 +8000015c: e7 80 07 00 jalr a5 +80000160: 63 0a 0c 00 beqz s8, 20 +80000164: 83 25 0b 00 lw a1, 0(s6) +80000168: 13 0c fc ff addi s8, s8, -1 +8000016c: 93 84 14 00 addi s1, s1, 1 +80000170: 6f f0 df fa j -84 +80000174: 13 b5 19 00 seqz a0, s3 +80000178: 03 2c 81 00 lw s8, 8(sp) +8000017c: 83 2b c1 00 lw s7, 12(sp) +80000180: 03 2b 01 01 lw s6, 16(sp) +80000184: 83 2a 41 01 lw s5, 20(sp) +80000188: 03 2a 81 01 lw s4, 24(sp) +8000018c: 83 29 c1 01 lw s3, 28(sp) +80000190: 03 29 01 02 lw s2, 32(sp) +80000194: 83 24 41 02 lw s1, 36(sp) +80000198: 03 24 81 02 lw s0, 40(sp) +8000019c: 83 20 c1 02 lw ra, 44(sp) +800001a0: 13 01 01 03 addi sp, sp, 48 +800001a4: 6f 00 00 51 j 1296 -8000019c kernel_spawn_run_threads: -8000019c: 13 01 01 fe addi sp, sp, -32 -800001a0: 23 2e 11 00 sw ra, 28(sp) -800001a4: 23 2c 81 00 sw s0, 24(sp) -800001a8: 23 2a 91 00 sw s1, 20(sp) -800001ac: 23 28 21 01 sw s2, 16(sp) -800001b0: 23 26 31 01 sw s3, 12(sp) -800001b4: 93 89 05 00 mv s3, a1 -800001b8: ef 00 40 54 jal 1348 -800001bc: 03 a5 c1 c2 lw a0, -980(gp) -800001c0: 03 25 05 00 lw a0, 0(a0) -800001c4: 83 24 05 00 lw s1, 0(a0) -800001c8: 03 25 45 00 lw a0, 4(a0) -800001cc: 33 04 95 02 mul s0, a0, s1 -800001d0: ef 00 40 56 jal 1380 -800001d4: 33 05 35 01 add a0, a0, s3 -800001d8: 03 a6 c1 c2 lw a2, -980(gp) -800001dc: 33 47 85 02 div a4, a0, s0 -800001e0: b3 05 87 02 mul a1, a4, s0 -800001e4: 33 05 b5 40 sub a0, a0, a1 -800001e8: 83 25 06 00 lw a1, 0(a2) -800001ec: b3 46 95 02 div a3, a0, s1 -800001f0: b3 87 96 02 mul a5, a3, s1 -800001f4: 33 08 f5 40 sub a6, a0, a5 -800001f8: 83 a4 c5 00 lw s1, 12(a1) -800001fc: 03 a4 05 01 lw s0, 16(a1) -80000200: 83 a7 45 01 lw a5, 20(a1) -80000204: 83 28 46 00 lw a7, 4(a2) -80000208: 03 25 86 00 lw a0, 8(a2) -8000020c: 33 06 98 00 add a2, a6, s1 -80000210: b3 06 d4 00 add a3, s0, a3 -80000214: 33 87 e7 00 add a4, a5, a4 -80000218: e7 80 08 00 jalr a7 -8000021c: 13 05 10 00 addi a0, zero, 1 -80000220: 83 29 c1 00 lw s3, 12(sp) -80000224: 03 29 01 01 lw s2, 16(sp) -80000228: 83 24 41 01 lw s1, 20(sp) -8000022c: 03 24 81 01 lw s0, 24(sp) -80000230: 83 20 c1 01 lw ra, 28(sp) -80000234: 13 01 01 02 addi sp, sp, 32 -80000238: 6f 00 40 4c j 1220 +800001a8 kernel_spawn_run_threads: +800001a8: 13 01 01 ff addi sp, sp, -16 +800001ac: 23 26 11 00 sw ra, 12(sp) +800001b0: 23 24 81 00 sw s0, 8(sp) +800001b4: ef 00 00 50 jal 1280 +800001b8: ef 00 c0 53 jal 1340 +800001bc: 13 04 05 00 mv s0, a0 +800001c0: ef 00 c0 52 jal 1324 +800001c4: 93 85 c1 c2 addi a1, gp, -980 +800001c8: 13 16 24 00 slli a2, s0, 2 +800001cc: b3 05 b6 00 add a1, a2, a1 +800001d0: 03 a6 05 00 lw a2, 0(a1) +800001d4: 83 25 06 00 lw a1, 0(a2) +800001d8: 83 26 c6 00 lw a3, 12(a2) +800001dc: 03 a7 05 00 lw a4, 0(a1) +800001e0: 83 a7 45 00 lw a5, 4(a1) +800001e4: 33 85 a6 00 add a0, a3, a0 +800001e8: b3 86 e7 02 mul a3, a5, a4 +800001ec: b3 47 d5 02 div a5, a0, a3 +800001f0: b3 86 d7 02 mul a3, a5, a3 +800001f4: 03 a4 c5 00 lw s0, 12(a1) +800001f8: 33 05 d5 40 sub a0, a0, a3 +800001fc: b3 46 e5 02 div a3, a0, a4 +80000200: 33 88 e6 02 mul a6, a3, a4 +80000204: b3 08 a4 00 add a7, s0, a0 +80000208: 03 a7 05 01 lw a4, 16(a1) +8000020c: 03 a4 45 01 lw s0, 20(a1) +80000210: 83 22 46 00 lw t0, 4(a2) +80000214: 03 25 86 00 lw a0, 8(a2) +80000218: 33 86 08 41 sub a2, a7, a6 +8000021c: b3 06 d7 00 add a3, a4, a3 +80000220: 33 07 f4 00 add a4, s0, a5 +80000224: e7 80 02 00 jalr t0 +80000228: 13 05 10 00 addi a0, zero, 1 +8000022c: 03 24 81 00 lw s0, 8(sp) +80000230: 83 20 c1 00 lw ra, 12(sp) +80000234: 13 01 01 01 addi sp, sp, 16 +80000238: 6f 00 c0 47 j 1148 8000023c kernel_spawn: 8000023c: 13 01 01 fc addi sp, sp, -64 @@ -167,672 +167,654 @@ Disassembly of section .text: 80000254: 23 24 41 03 sw s4, 40(sp) 80000258: 23 22 51 03 sw s5, 36(sp) 8000025c: 23 20 61 03 sw s6, 32(sp) -80000260: 13 0b 05 00 mv s6, a0 -80000264: 03 25 05 00 lw a0, 0(a0) -80000268: 83 26 4b 00 lw a3, 4(s6) -8000026c: 03 27 8b 00 lw a4, 8(s6) -80000270: 13 09 06 00 mv s2, a2 -80000274: 93 89 05 00 mv s3, a1 -80000278: 33 85 a6 02 mul a0, a3, a0 -8000027c: b3 04 e5 02 mul s1, a0, a4 -80000280: ef 00 40 4d jal 1236 -80000284: 13 04 05 00 mv s0, a0 -80000288: ef 00 40 4c jal 1220 +80000260: 23 2e 71 01 sw s7, 28(sp) +80000264: 23 2c 81 01 sw s8, 24(sp) +80000268: 93 04 05 00 mv s1, a0 +8000026c: 83 2b 05 00 lw s7, 0(a0) +80000270: 03 24 45 00 lw s0, 4(a0) +80000274: 03 2c 85 00 lw s8, 8(a0) +80000278: 13 09 06 00 mv s2, a2 +8000027c: 93 89 05 00 mv s3, a1 +80000280: ef 00 c0 48 jal 1164 +80000284: 13 0b 05 00 mv s6, a0 +80000288: ef 00 c0 47 jal 1148 8000028c: 13 0a 05 00 mv s4, a0 -80000290: ef 00 40 4b jal 1204 +80000290: ef 00 c0 46 jal 1132 80000294: 93 0a 05 00 mv s5, a0 -80000298: ef 00 40 4a jal 1188 -8000029c: b3 85 4a 03 mul a1, s5, s4 -800002a0: 13 06 10 00 addi a2, zero, 1 -800002a4: 63 c8 95 00 blt a1, s1, 16 -800002a8: 63 5a 86 00 bge a2, s0, 20 -800002ac: 63 4c c5 00 blt a0, a2, 24 -800002b0: 6f 00 00 13 j 304 -800002b4: 33 c6 b4 02 div a2, s1, a1 -800002b8: e3 4a 86 fe blt a2, s0, -12 -800002bc: 13 06 04 00 mv a2, s0 -800002c0: 63 50 c5 12 bge a0, a2, 288 -800002c4: 93 06 f4 ff addi a3, s0, -1 -800002c8: b3 c5 c4 02 div a1, s1, a2 -800002cc: 63 0e d5 00 beq a0, a3, 28 -800002d0: 13 06 00 00 mv a2, zero -800002d4: b3 06 b6 00 add a3, a2, a1 -800002d8: 33 c6 56 03 div a2, a3, s5 -800002dc: 13 07 00 00 mv a4, zero -800002e0: 63 50 46 03 bge a2, s4, 32 -800002e4: 6f 00 00 02 j 32 -800002e8: 33 86 c5 02 mul a2, a1, a2 -800002ec: 33 86 c4 40 sub a2, s1, a2 -800002f0: b3 06 b6 00 add a3, a2, a1 -800002f4: 33 c6 56 03 div a2, a3, s5 -800002f8: 13 07 00 00 mv a4, zero -800002fc: 63 44 46 01 blt a2, s4, 8 -80000300: 33 47 46 03 div a4, a2, s4 -80000304: 93 07 00 00 mv a5, zero -80000308: b3 0a 56 03 mul s5, a2, s5 -8000030c: 93 04 10 00 addi s1, zero, 1 -80000310: 63 08 07 00 beqz a4, 16 -80000314: b3 07 47 03 mul a5, a4, s4 -80000318: b3 07 f6 40 sub a5, a2, a5 -8000031c: 93 04 07 00 mv s1, a4 -80000320: 33 84 56 41 sub s0, a3, s5 -80000324: 23 24 61 01 sw s6, 8(sp) -80000328: 23 26 31 01 sw s3, 12(sp) -8000032c: 23 28 21 01 sw s2, 16(sp) -80000330: 33 85 a5 02 mul a0, a1, a0 -80000334: 23 2a a1 00 sw a0, 20(sp) -80000338: 23 2c 91 00 sw s1, 24(sp) -8000033c: 23 2e f1 00 sw a5, 28(sp) -80000340: 93 05 81 00 addi a1, sp, 8 -80000344: 93 06 20 00 addi a3, zero, 2 -80000348: 23 a6 b1 c2 sw a1, -980(gp) -8000034c: 63 40 d6 02 blt a2, a3, 32 -80000350: 63 44 46 01 blt a2, s4, 8 -80000354: 13 06 0a 00 mv a2, s4 -80000358: 37 05 00 80 lui a0, 524288 -8000035c: 93 05 05 06 addi a1, a0, 96 -80000360: 13 05 06 00 mv a0, a2 -80000364: ef 00 00 39 jal 912 -80000368: ef f0 9f cf jal -776 -8000036c: 63 0a 04 06 beqz s0, 116 -80000370: 13 05 04 00 mv a0, s0 -80000374: ef 00 80 38 jal 904 -80000378: 03 a5 c1 c2 lw a0, -980(gp) -8000037c: 03 25 05 00 lw a0, 0(a0) -80000380: 83 24 05 00 lw s1, 0(a0) -80000384: 03 25 45 00 lw a0, 4(a0) -80000388: 33 04 95 02 mul s0, a0, s1 -8000038c: ef 00 80 3a jal 936 -80000390: 33 05 55 01 add a0, a0, s5 -80000394: 03 a6 c1 c2 lw a2, -980(gp) -80000398: 33 47 85 02 div a4, a0, s0 -8000039c: b3 05 87 02 mul a1, a4, s0 -800003a0: 33 05 b5 40 sub a0, a0, a1 -800003a4: 83 25 06 00 lw a1, 0(a2) -800003a8: b3 46 95 02 div a3, a0, s1 -800003ac: b3 87 96 02 mul a5, a3, s1 -800003b0: 33 08 f5 40 sub a6, a0, a5 -800003b4: 83 a4 c5 00 lw s1, 12(a1) -800003b8: 03 a4 05 01 lw s0, 16(a1) -800003bc: 83 a7 45 01 lw a5, 20(a1) -800003c0: 83 28 46 00 lw a7, 4(a2) -800003c4: 03 25 86 00 lw a0, 8(a2) -800003c8: 33 06 98 00 add a2, a6, s1 -800003cc: b3 06 d4 00 add a3, s0, a3 -800003d0: 33 87 e7 00 add a4, a5, a4 -800003d4: e7 80 08 00 jalr a7 -800003d8: 13 05 10 00 addi a0, zero, 1 -800003dc: ef 00 00 32 jal 800 -800003e0: 03 2b 01 02 lw s6, 32(sp) -800003e4: 83 2a 41 02 lw s5, 36(sp) -800003e8: 03 2a 81 02 lw s4, 40(sp) -800003ec: 83 29 c1 02 lw s3, 44(sp) -800003f0: 03 29 01 03 lw s2, 48(sp) -800003f4: 83 24 41 03 lw s1, 52(sp) -800003f8: 03 24 81 03 lw s0, 56(sp) -800003fc: 83 20 c1 03 lw ra, 60(sp) -80000400: 13 01 01 04 addi sp, sp, 64 -80000404: 67 80 00 00 ret +80000298: ef 00 c0 45 jal 1116 +8000029c: 93 05 70 00 addi a1, zero, 7 +800002a0: 63 c8 a5 0e blt a1, a0, 240 +800002a4: b3 05 74 03 mul a1, s0, s7 +800002a8: 33 86 85 03 mul a2, a1, s8 +800002ac: b3 85 4a 03 mul a1, s5, s4 +800002b0: 93 06 10 00 addi a3, zero, 1 +800002b4: 63 c8 c5 00 blt a1, a2, 16 +800002b8: 63 da 66 01 bge a3, s6, 20 +800002bc: 63 4c d5 00 blt a0, a3, 24 +800002c0: 6f 00 00 0d j 208 +800002c4: b3 46 b6 02 div a3, a2, a1 +800002c8: e3 ca 66 ff blt a3, s6, -12 +800002cc: 93 06 0b 00 mv a3, s6 +800002d0: 63 50 d5 0c bge a0, a3, 192 +800002d4: 13 07 fb ff addi a4, s6, -1 +800002d8: b3 45 d6 02 div a1, a2, a3 +800002dc: 63 0e e5 00 beq a0, a4, 28 +800002e0: 13 06 00 00 mv a2, zero +800002e4: 33 0b b6 00 add s6, a2, a1 +800002e8: 33 46 5b 03 div a2, s6, s5 +800002ec: 93 06 00 00 mv a3, zero +800002f0: 63 50 46 03 bge a2, s4, 32 +800002f4: 6f 00 00 02 j 32 +800002f8: b3 86 d5 02 mul a3, a1, a3 +800002fc: 33 06 d6 40 sub a2, a2, a3 +80000300: 33 0b b6 00 add s6, a2, a1 +80000304: 33 46 5b 03 div a2, s6, s5 +80000308: 93 06 00 00 mv a3, zero +8000030c: 63 44 46 01 blt a2, s4, 8 +80000310: b3 46 46 03 div a3, a2, s4 +80000314: 13 07 00 00 mv a4, zero +80000318: 93 07 10 00 addi a5, zero, 1 +8000031c: 63 88 06 00 beqz a3, 16 +80000320: 33 87 46 03 mul a4, a3, s4 +80000324: 33 07 e6 40 sub a4, a2, a4 +80000328: 93 87 06 00 mv a5, a3 +8000032c: 33 04 56 03 mul s0, a2, s5 +80000330: 23 20 91 00 sw s1, 0(sp) +80000334: 23 22 31 01 sw s3, 4(sp) +80000338: 23 24 21 01 sw s2, 8(sp) +8000033c: b3 85 a5 02 mul a1, a1, a0 +80000340: 23 26 b1 00 sw a1, 12(sp) +80000344: 23 28 f1 00 sw a5, 16(sp) +80000348: 23 2a e1 00 sw a4, 20(sp) +8000034c: 93 85 c1 c2 addi a1, gp, -980 +80000350: 13 15 25 00 slli a0, a0, 2 +80000354: 33 05 b5 00 add a0, a0, a1 +80000358: 93 05 01 00 mv a1, sp +8000035c: 93 06 20 00 addi a3, zero, 2 +80000360: 23 20 b5 00 sw a1, 0(a0) +80000364: 63 40 d6 02 blt a2, a3, 32 +80000368: 63 44 46 01 blt a2, s4, 8 +8000036c: 13 06 0a 00 mv a2, s4 +80000370: 37 05 00 80 lui a0, 524288 +80000374: 93 05 05 06 addi a1, a0, 96 +80000378: 13 05 06 00 mv a0, a2 +8000037c: ef 00 00 33 jal 816 +80000380: ef f0 1f ce jal -800 +80000384: 63 06 8b 00 beq s6, s0, 12 +80000388: 23 26 81 00 sw s0, 12(sp) +8000038c: ef f0 5f cd jal -812 +80000390: 03 2c 81 01 lw s8, 24(sp) +80000394: 83 2b c1 01 lw s7, 28(sp) +80000398: 03 2b 01 02 lw s6, 32(sp) +8000039c: 83 2a 41 02 lw s5, 36(sp) +800003a0: 03 2a 81 02 lw s4, 40(sp) +800003a4: 83 29 c1 02 lw s3, 44(sp) +800003a8: 03 29 01 03 lw s2, 48(sp) +800003ac: 83 24 41 03 lw s1, 52(sp) +800003b0: 03 24 81 03 lw s0, 56(sp) +800003b4: 83 20 c1 03 lw ra, 60(sp) +800003b8: 13 01 01 04 addi sp, sp, 64 +800003bc: 67 80 00 00 ret -80000408 main: -80000408: 13 01 01 ff addi sp, sp, -16 -8000040c: 23 26 11 00 sw ra, 12(sp) -80000410: 37 05 00 80 lui a0, 524288 -80000414: 93 05 45 51 addi a1, a0, 1300 -80000418: 37 05 ff 7f lui a0, 524272 -8000041c: 13 06 45 03 addi a2, a0, 52 -80000420: 37 05 ff 7f lui a0, 524272 -80000424: ef f0 9f e1 jal -488 -80000428: 13 05 00 00 mv a0, zero -8000042c: 83 20 c1 00 lw ra, 12(sp) -80000430: 13 01 01 01 addi sp, sp, 16 -80000434: 67 80 00 00 ret +800003c0 main: +800003c0: 13 01 01 ff addi sp, sp, -16 +800003c4: 23 26 11 00 sw ra, 12(sp) +800003c8: 37 05 00 80 lui a0, 524288 +800003cc: 93 05 c5 4c addi a1, a0, 1228 +800003d0: 37 05 ff 7f lui a0, 524272 +800003d4: 13 06 45 03 addi a2, a0, 52 +800003d8: 37 05 ff 7f lui a0, 524272 +800003dc: ef f0 1f e6 jal -416 +800003e0: 13 05 00 00 mv a0, zero +800003e4: 83 20 c1 00 lw ra, 12(sp) +800003e8: 13 01 01 01 addi sp, sp, 16 +800003ec: 67 80 00 00 ret -80000438 _pocl_kernel_Fan1: -80000438: 13 01 01 ff addi sp, sp, -16 -8000043c: 23 26 11 00 sw ra, 12(sp) -80000440: 23 24 81 00 sw s0, 8(sp) -80000444: 23 22 91 00 sw s1, 4(sp) -80000448: 13 04 01 01 addi s0, sp, 16 -8000044c: 13 71 c1 ff andi sp, sp, -4 -80000450: 93 08 00 00 mv a7, zero -80000454: 83 ae 87 01 lw t4, 24(a5) -80000458: 83 a2 c7 01 lw t0, 28(a5) -8000045c: 03 a3 07 02 lw t1, 32(a5) -80000460: 03 ae c7 00 lw t3, 12(a5) -80000464: b3 8f 0e 03 mul t6, t4, a6 -80000468: 13 16 27 00 slli a2, a4, 2 -8000046c: 33 88 c5 00 add a6, a1, a2 -80000470: b3 07 d7 02 mul a5, a4, a3 -80000474: 93 97 27 00 slli a5, a5, 2 -80000478: 33 08 f8 00 add a6, a6, a5 -8000047c: 93 47 f7 ff not a5, a4 -80000480: 33 8f d7 00 add t5, a5, a3 -80000484: 33 07 ee 00 add a4, t3, a4 -80000488: 33 07 f7 01 add a4, a4, t6 -8000048c: 13 07 17 00 addi a4, a4, 1 -80000490: 33 87 e6 02 mul a4, a3, a4 -80000494: 13 17 27 00 slli a4, a4, 2 -80000498: b3 03 c7 00 add t2, a4, a2 -8000049c: 93 94 26 00 slli s1, a3, 2 -800004a0: 33 07 fe 01 add a4, t3, t6 -800004a4: 6f 00 c0 00 j 12 -800004a8: 93 88 18 00 addi a7, a7, 1 -800004ac: 63 f8 68 04 bgeu a7, t1, 80 -800004b0: 13 0e 00 00 mv t3, zero -800004b4: 6f 00 c0 00 j 12 -800004b8: 13 0e 1e 00 addi t3, t3, 1 -800004bc: e3 76 5e fe bgeu t3, t0, -20 -800004c0: 13 06 00 00 mv a2, zero -800004c4: 93 87 03 00 mv a5, t2 -800004c8: 6f 00 00 01 j 16 -800004cc: 13 06 16 00 addi a2, a2, 1 -800004d0: b3 87 97 00 add a5, a5, s1 -800004d4: e3 72 d6 ff bgeu a2, t4, -28 -800004d8: b3 06 c7 00 add a3, a4, a2 -800004dc: e3 d8 e6 ff bge a3, t5, -16 -800004e0: b3 86 f5 00 add a3, a1, a5 -800004e4: 07 a0 06 00 flw ft0, 0(a3) -800004e8: 87 20 08 00 flw ft1, 0(a6) -800004ec: 53 70 10 18 fdiv.s ft0, ft0, ft1 -800004f0: b3 06 f5 00 add a3, a0, a5 -800004f4: 27 a0 06 00 fsw ft0, 0(a3) -800004f8: 6f f0 5f fd j -44 -800004fc: 13 01 04 ff addi sp, s0, -16 -80000500: 83 24 41 00 lw s1, 4(sp) -80000504: 03 24 81 00 lw s0, 8(sp) -80000508: 83 20 c1 00 lw ra, 12(sp) -8000050c: 13 01 01 01 addi sp, sp, 16 -80000510: 67 80 00 00 ret +800003f0 _pocl_kernel_Fan1: +800003f0: 13 01 01 ff addi sp, sp, -16 +800003f4: 23 26 11 00 sw ra, 12(sp) +800003f8: 23 24 81 00 sw s0, 8(sp) +800003fc: 23 22 91 00 sw s1, 4(sp) +80000400: 13 04 01 01 addi s0, sp, 16 +80000404: 13 71 c1 ff andi sp, sp, -4 +80000408: 93 08 00 00 mv a7, zero +8000040c: 83 ae 87 01 lw t4, 24(a5) +80000410: 83 a2 c7 01 lw t0, 28(a5) +80000414: 03 a3 07 02 lw t1, 32(a5) +80000418: 03 ae c7 00 lw t3, 12(a5) +8000041c: b3 8f 0e 03 mul t6, t4, a6 +80000420: 13 16 27 00 slli a2, a4, 2 +80000424: 33 88 c5 00 add a6, a1, a2 +80000428: b3 07 d7 02 mul a5, a4, a3 +8000042c: 93 97 27 00 slli a5, a5, 2 +80000430: 33 08 f8 00 add a6, a6, a5 +80000434: 93 47 f7 ff not a5, a4 +80000438: 33 8f d7 00 add t5, a5, a3 +8000043c: 33 07 ee 00 add a4, t3, a4 +80000440: 33 07 f7 01 add a4, a4, t6 +80000444: 13 07 17 00 addi a4, a4, 1 +80000448: 33 87 e6 02 mul a4, a3, a4 +8000044c: 13 17 27 00 slli a4, a4, 2 +80000450: b3 03 c7 00 add t2, a4, a2 +80000454: 93 94 26 00 slli s1, a3, 2 +80000458: 33 07 fe 01 add a4, t3, t6 +8000045c: 6f 00 c0 00 j 12 +80000460: 93 88 18 00 addi a7, a7, 1 +80000464: 63 f8 68 04 bgeu a7, t1, 80 +80000468: 13 0e 00 00 mv t3, zero +8000046c: 6f 00 c0 00 j 12 +80000470: 13 0e 1e 00 addi t3, t3, 1 +80000474: e3 76 5e fe bgeu t3, t0, -20 +80000478: 13 06 00 00 mv a2, zero +8000047c: 93 87 03 00 mv a5, t2 +80000480: 6f 00 00 01 j 16 +80000484: 13 06 16 00 addi a2, a2, 1 +80000488: b3 87 97 00 add a5, a5, s1 +8000048c: e3 72 d6 ff bgeu a2, t4, -28 +80000490: b3 06 c7 00 add a3, a4, a2 +80000494: e3 d8 e6 ff bge a3, t5, -16 +80000498: b3 86 f5 00 add a3, a1, a5 +8000049c: 07 a0 06 00 flw ft0, 0(a3) +800004a0: 87 20 08 00 flw ft1, 0(a6) +800004a4: 53 70 10 18 fdiv.s ft0, ft0, ft1 +800004a8: b3 06 f5 00 add a3, a0, a5 +800004ac: 27 a0 06 00 fsw ft0, 0(a3) +800004b0: 6f f0 5f fd j -44 +800004b4: 13 01 04 ff addi sp, s0, -16 +800004b8: 83 24 41 00 lw s1, 4(sp) +800004bc: 03 24 81 00 lw s0, 8(sp) +800004c0: 83 20 c1 00 lw ra, 12(sp) +800004c4: 13 01 01 01 addi sp, sp, 16 +800004c8: 67 80 00 00 ret -80000514 _pocl_kernel_Fan1_workgroup: -80000514: 13 08 00 00 mv a6, zero -80000518: 83 26 05 00 lw a3, 0(a0) -8000051c: 03 27 45 00 lw a4, 4(a0) -80000520: 83 27 c5 00 lw a5, 12(a0) -80000524: 03 25 05 01 lw a0, 16(a0) -80000528: 83 a3 06 00 lw t2, 0(a3) -8000052c: 83 2e 07 00 lw t4, 0(a4) -80000530: 83 a6 07 00 lw a3, 0(a5) -80000534: 03 25 05 00 lw a0, 0(a0) -80000538: 83 af 85 01 lw t6, 24(a1) -8000053c: 83 a2 c5 01 lw t0, 28(a1) -80000540: 83 a8 05 02 lw a7, 32(a1) -80000544: 03 ae c5 00 lw t3, 12(a1) -80000548: 33 87 cf 02 mul a4, t6, a2 -8000054c: 93 15 25 00 slli a1, a0, 2 -80000550: 33 83 be 00 add t1, t4, a1 -80000554: 33 06 d5 02 mul a2, a0, a3 -80000558: 13 16 26 00 slli a2, a2, 2 -8000055c: 33 0f c3 00 add t5, t1, a2 -80000560: 13 46 f5 ff not a2, a0 -80000564: 33 86 c6 00 add a2, a3, a2 -80000568: 33 05 ae 00 add a0, t3, a0 -8000056c: 33 05 e5 00 add a0, a0, a4 -80000570: 13 05 15 00 addi a0, a0, 1 -80000574: 33 85 a6 02 mul a0, a3, a0 -80000578: 13 15 25 00 slli a0, a0, 2 -8000057c: 33 03 b5 00 add t1, a0, a1 -80000580: 93 96 26 00 slli a3, a3, 2 -80000584: 33 05 ee 00 add a0, t3, a4 -80000588: 6f 00 c0 00 j 12 -8000058c: 13 08 18 00 addi a6, a6, 1 -80000590: 63 78 18 05 bgeu a6, a7, 80 -80000594: 13 0e 00 00 mv t3, zero -80000598: 6f 00 c0 00 j 12 -8000059c: 13 0e 1e 00 addi t3, t3, 1 -800005a0: e3 76 5e fe bgeu t3, t0, -20 -800005a4: 13 07 00 00 mv a4, zero -800005a8: 93 05 03 00 mv a1, t1 -800005ac: 6f 00 00 01 j 16 -800005b0: 13 07 17 00 addi a4, a4, 1 -800005b4: b3 85 d5 00 add a1, a1, a3 -800005b8: e3 72 f7 ff bgeu a4, t6, -28 -800005bc: b3 07 e5 00 add a5, a0, a4 -800005c0: e3 d8 c7 fe bge a5, a2, -16 -800005c4: b3 87 be 00 add a5, t4, a1 -800005c8: 07 a0 07 00 flw ft0, 0(a5) -800005cc: 87 20 0f 00 flw ft1, 0(t5) -800005d0: 53 70 10 18 fdiv.s ft0, ft0, ft1 -800005d4: b3 87 b3 00 add a5, t2, a1 -800005d8: 27 a0 07 00 fsw ft0, 0(a5) -800005dc: 6f f0 5f fd j -44 -800005e0: 67 80 00 00 ret +800004cc _pocl_kernel_Fan1_workgroup: +800004cc: 13 08 00 00 mv a6, zero +800004d0: 83 26 05 00 lw a3, 0(a0) +800004d4: 03 27 45 00 lw a4, 4(a0) +800004d8: 83 27 c5 00 lw a5, 12(a0) +800004dc: 03 25 05 01 lw a0, 16(a0) +800004e0: 83 a3 06 00 lw t2, 0(a3) +800004e4: 83 2e 07 00 lw t4, 0(a4) +800004e8: 83 a6 07 00 lw a3, 0(a5) +800004ec: 03 25 05 00 lw a0, 0(a0) +800004f0: 83 af 85 01 lw t6, 24(a1) +800004f4: 83 a2 c5 01 lw t0, 28(a1) +800004f8: 83 a8 05 02 lw a7, 32(a1) +800004fc: 03 ae c5 00 lw t3, 12(a1) +80000500: 33 87 cf 02 mul a4, t6, a2 +80000504: 93 15 25 00 slli a1, a0, 2 +80000508: 33 83 be 00 add t1, t4, a1 +8000050c: 33 06 d5 02 mul a2, a0, a3 +80000510: 13 16 26 00 slli a2, a2, 2 +80000514: 33 0f c3 00 add t5, t1, a2 +80000518: 13 46 f5 ff not a2, a0 +8000051c: 33 86 c6 00 add a2, a3, a2 +80000520: 33 05 ae 00 add a0, t3, a0 +80000524: 33 05 e5 00 add a0, a0, a4 +80000528: 13 05 15 00 addi a0, a0, 1 +8000052c: 33 85 a6 02 mul a0, a3, a0 +80000530: 13 15 25 00 slli a0, a0, 2 +80000534: 33 03 b5 00 add t1, a0, a1 +80000538: 93 96 26 00 slli a3, a3, 2 +8000053c: 33 05 ee 00 add a0, t3, a4 +80000540: 6f 00 c0 00 j 12 +80000544: 13 08 18 00 addi a6, a6, 1 +80000548: 63 78 18 05 bgeu a6, a7, 80 +8000054c: 13 0e 00 00 mv t3, zero +80000550: 6f 00 c0 00 j 12 +80000554: 13 0e 1e 00 addi t3, t3, 1 +80000558: e3 76 5e fe bgeu t3, t0, -20 +8000055c: 13 07 00 00 mv a4, zero +80000560: 93 05 03 00 mv a1, t1 +80000564: 6f 00 00 01 j 16 +80000568: 13 07 17 00 addi a4, a4, 1 +8000056c: b3 85 d5 00 add a1, a1, a3 +80000570: e3 72 f7 ff bgeu a4, t6, -28 +80000574: b3 07 e5 00 add a5, a0, a4 +80000578: e3 d8 c7 fe bge a5, a2, -16 +8000057c: b3 87 be 00 add a5, t4, a1 +80000580: 07 a0 07 00 flw ft0, 0(a5) +80000584: 87 20 0f 00 flw ft1, 0(t5) +80000588: 53 70 10 18 fdiv.s ft0, ft0, ft1 +8000058c: b3 87 b3 00 add a5, t2, a1 +80000590: 27 a0 07 00 fsw ft0, 0(a5) +80000594: 6f f0 5f fd j -44 +80000598: 67 80 00 00 ret -800005e4 _pocl_kernel_Fan1_workgroup_fast: -800005e4: 13 08 00 00 mv a6, zero -800005e8: 83 26 c5 00 lw a3, 12(a0) -800005ec: 03 27 05 01 lw a4, 16(a0) -800005f0: 83 23 05 00 lw t2, 0(a0) -800005f4: 83 2e 45 00 lw t4, 4(a0) -800005f8: 03 a5 06 00 lw a0, 0(a3) -800005fc: 83 26 07 00 lw a3, 0(a4) -80000600: 83 af 85 01 lw t6, 24(a1) -80000604: 83 a2 c5 01 lw t0, 28(a1) -80000608: 83 a8 05 02 lw a7, 32(a1) -8000060c: 03 ae c5 00 lw t3, 12(a1) -80000610: 33 87 cf 02 mul a4, t6, a2 -80000614: 93 95 26 00 slli a1, a3, 2 -80000618: 33 83 be 00 add t1, t4, a1 -8000061c: 33 86 a6 02 mul a2, a3, a0 -80000620: 13 16 26 00 slli a2, a2, 2 -80000624: 33 0f c3 00 add t5, t1, a2 -80000628: 13 c6 f6 ff not a2, a3 -8000062c: 33 06 c5 00 add a2, a0, a2 -80000630: b3 06 de 00 add a3, t3, a3 -80000634: b3 86 e6 00 add a3, a3, a4 -80000638: 93 86 16 00 addi a3, a3, 1 -8000063c: b3 06 d5 02 mul a3, a0, a3 -80000640: 93 96 26 00 slli a3, a3, 2 -80000644: 33 83 b6 00 add t1, a3, a1 -80000648: 93 16 25 00 slli a3, a0, 2 -8000064c: 33 07 ee 00 add a4, t3, a4 -80000650: 6f 00 c0 00 j 12 -80000654: 13 08 18 00 addi a6, a6, 1 -80000658: 63 78 18 05 bgeu a6, a7, 80 -8000065c: 13 0e 00 00 mv t3, zero -80000660: 6f 00 c0 00 j 12 -80000664: 13 0e 1e 00 addi t3, t3, 1 -80000668: e3 76 5e fe bgeu t3, t0, -20 -8000066c: 13 05 00 00 mv a0, zero -80000670: 93 05 03 00 mv a1, t1 -80000674: 6f 00 00 01 j 16 -80000678: 13 05 15 00 addi a0, a0, 1 -8000067c: b3 85 d5 00 add a1, a1, a3 -80000680: e3 72 f5 ff bgeu a0, t6, -28 -80000684: b3 07 a7 00 add a5, a4, a0 -80000688: e3 d8 c7 fe bge a5, a2, -16 -8000068c: b3 87 be 00 add a5, t4, a1 -80000690: 07 a0 07 00 flw ft0, 0(a5) -80000694: 87 20 0f 00 flw ft1, 0(t5) -80000698: 53 70 10 18 fdiv.s ft0, ft0, ft1 -8000069c: b3 87 b3 00 add a5, t2, a1 -800006a0: 27 a0 07 00 fsw ft0, 0(a5) -800006a4: 6f f0 5f fd j -44 +8000059c _pocl_kernel_Fan1_workgroup_fast: +8000059c: 13 08 00 00 mv a6, zero +800005a0: 83 26 c5 00 lw a3, 12(a0) +800005a4: 03 27 05 01 lw a4, 16(a0) +800005a8: 83 23 05 00 lw t2, 0(a0) +800005ac: 83 2e 45 00 lw t4, 4(a0) +800005b0: 03 a5 06 00 lw a0, 0(a3) +800005b4: 83 26 07 00 lw a3, 0(a4) +800005b8: 83 af 85 01 lw t6, 24(a1) +800005bc: 83 a2 c5 01 lw t0, 28(a1) +800005c0: 83 a8 05 02 lw a7, 32(a1) +800005c4: 03 ae c5 00 lw t3, 12(a1) +800005c8: 33 87 cf 02 mul a4, t6, a2 +800005cc: 93 95 26 00 slli a1, a3, 2 +800005d0: 33 83 be 00 add t1, t4, a1 +800005d4: 33 86 a6 02 mul a2, a3, a0 +800005d8: 13 16 26 00 slli a2, a2, 2 +800005dc: 33 0f c3 00 add t5, t1, a2 +800005e0: 13 c6 f6 ff not a2, a3 +800005e4: 33 06 c5 00 add a2, a0, a2 +800005e8: b3 06 de 00 add a3, t3, a3 +800005ec: b3 86 e6 00 add a3, a3, a4 +800005f0: 93 86 16 00 addi a3, a3, 1 +800005f4: b3 06 d5 02 mul a3, a0, a3 +800005f8: 93 96 26 00 slli a3, a3, 2 +800005fc: 33 83 b6 00 add t1, a3, a1 +80000600: 93 16 25 00 slli a3, a0, 2 +80000604: 33 07 ee 00 add a4, t3, a4 +80000608: 6f 00 c0 00 j 12 +8000060c: 13 08 18 00 addi a6, a6, 1 +80000610: 63 78 18 05 bgeu a6, a7, 80 +80000614: 13 0e 00 00 mv t3, zero +80000618: 6f 00 c0 00 j 12 +8000061c: 13 0e 1e 00 addi t3, t3, 1 +80000620: e3 76 5e fe bgeu t3, t0, -20 +80000624: 13 05 00 00 mv a0, zero +80000628: 93 05 03 00 mv a1, t1 +8000062c: 6f 00 00 01 j 16 +80000630: 13 05 15 00 addi a0, a0, 1 +80000634: b3 85 d5 00 add a1, a1, a3 +80000638: e3 72 f5 ff bgeu a0, t6, -28 +8000063c: b3 07 a7 00 add a5, a4, a0 +80000640: e3 d8 c7 fe bge a5, a2, -16 +80000644: b3 87 be 00 add a5, t4, a1 +80000648: 07 a0 07 00 flw ft0, 0(a5) +8000064c: 87 20 0f 00 flw ft1, 0(t5) +80000650: 53 70 10 18 fdiv.s ft0, ft0, ft1 +80000654: b3 87 b3 00 add a5, t2, a1 +80000658: 27 a0 07 00 fsw ft0, 0(a5) +8000065c: 6f f0 5f fd j -44 +80000660: 67 80 00 00 ret + +80000664 _exit: +80000664: 13 05 00 00 mv a0, zero +80000668: 6b 00 05 00 + +8000066c vx_set_sp: +8000066c: 73 25 50 02 csrr a0, 37 +80000670: 6b 00 05 00 +80000674: 97 21 00 00 auipc gp, 2 +80000678: 93 81 41 c8 addi gp, gp, -892 +8000067c: f3 25 20 02 csrr a1, 34 +80000680: 93 95 a5 00 slli a1, a1, 10 +80000684: 73 26 00 02 csrr a2, 32 +80000688: 13 16 26 00 slli a2, a2, 2 +8000068c: 37 f1 ff 6f lui sp, 458751 +80000690: 33 01 b1 40 sub sp, sp, a1 +80000694: 33 01 c1 00 add sp, sp, a2 +80000698: f3 26 10 02 csrr a3, 33 +8000069c: 63 86 06 00 beqz a3, 12 +800006a0: 13 05 00 00 mv a0, zero +800006a4: 6b 00 05 00 + +800006a8 RETURN: 800006a8: 67 80 00 00 ret -800006ac _exit: -800006ac: 13 05 00 00 mv a0, zero -800006b0: 6b 00 05 00 +800006ac vx_wspawn: +800006ac: 6b 10 b5 00 +800006b0: 67 80 00 00 ret -800006b4 vx_set_sp: -800006b4: 73 25 50 02 csrr a0, 37 -800006b8: 6b 00 05 00 -800006bc: 97 21 00 00 auipc gp, 2 -800006c0: 93 81 41 c8 addi gp, gp, -892 -800006c4: f3 25 20 02 csrr a1, 34 -800006c8: 93 95 a5 00 slli a1, a1, 10 -800006cc: 73 26 00 02 csrr a2, 32 -800006d0: 13 16 26 00 slli a2, a2, 2 -800006d4: 37 f1 ff 6f lui sp, 458751 -800006d8: 33 01 b1 40 sub sp, sp, a1 -800006dc: 33 01 c1 00 add sp, sp, a2 -800006e0: f3 26 10 02 csrr a3, 33 -800006e4: 63 86 06 00 beqz a3, 12 -800006e8: 13 05 00 00 mv a0, zero -800006ec: 6b 00 05 00 +800006b4 vx_tmc: +800006b4: 6b 00 05 00 +800006b8: 67 80 00 00 ret -800006f0 RETURN: +800006bc vx_barrier: +800006bc: 6b 40 b5 00 +800006c0: 67 80 00 00 ret + +800006c4 vx_split: +800006c4: 6b 20 05 00 +800006c8: 67 80 00 00 ret + +800006cc vx_join: +800006cc: 6b 30 00 00 +800006d0: 67 80 00 00 ret + +800006d4 vx_warp_id: +800006d4: 73 25 10 02 csrr a0, 33 +800006d8: 67 80 00 00 ret + +800006dc vx_warp_gid: +800006dc: 73 25 30 02 csrr a0, 35 +800006e0: 67 80 00 00 ret + +800006e4 vx_thread_id: +800006e4: 73 25 00 02 csrr a0, 32 +800006e8: 67 80 00 00 ret + +800006ec vx_thread_gid: +800006ec: 73 25 20 02 csrr a0, 34 800006f0: 67 80 00 00 ret -800006f4 vx_wspawn: -800006f4: 6b 10 b5 00 +800006f4 vx_core_id: +800006f4: 73 25 40 02 csrr a0, 36 800006f8: 67 80 00 00 ret -800006fc vx_tmc: -800006fc: 6b 00 05 00 +800006fc vx_num_threads: +800006fc: 73 25 50 02 csrr a0, 37 80000700: 67 80 00 00 ret -80000704 vx_barrier: -80000704: 6b 40 b5 00 +80000704 vx_num_warps: +80000704: 73 25 60 02 csrr a0, 38 80000708: 67 80 00 00 ret -8000070c vx_split: -8000070c: 6b 20 05 00 +8000070c vx_num_cores: +8000070c: 73 25 70 02 csrr a0, 39 80000710: 67 80 00 00 ret -80000714 vx_join: -80000714: 6b 30 00 00 +80000714 vx_num_cycles: +80000714: 73 25 00 b0 csrr a0, mcycle 80000718: 67 80 00 00 ret -8000071c vx_warp_id: -8000071c: 73 25 10 02 csrr a0, 33 +8000071c vx_num_instrs: +8000071c: 73 25 20 b0 csrr a0, minstret 80000720: 67 80 00 00 ret -80000724 vx_warp_gid: -80000724: 73 25 30 02 csrr a0, 35 -80000728: 67 80 00 00 ret +80000724 atexit: +80000724: 93 05 05 00 mv a1, a0 +80000728: 93 06 00 00 mv a3, zero +8000072c: 13 06 00 00 mv a2, zero +80000730: 13 05 00 00 mv a0, zero +80000734: 6f 00 80 20 j 520 -8000072c vx_thread_id: -8000072c: 73 25 00 02 csrr a0, 32 -80000730: 67 80 00 00 ret +80000738 exit: +80000738: 13 01 01 ff addi sp, sp, -16 +8000073c: 93 05 00 00 mv a1, zero +80000740: 23 24 81 00 sw s0, 8(sp) +80000744: 23 26 11 00 sw ra, 12(sp) +80000748: 13 04 05 00 mv s0, a0 +8000074c: ef 00 80 28 jal 648 +80000750: 03 a5 81 c2 lw a0, -984(gp) +80000754: 83 27 c5 03 lw a5, 60(a0) +80000758: 63 84 07 00 beqz a5, 8 +8000075c: e7 80 07 00 jalr a5 +80000760: 13 05 04 00 mv a0, s0 +80000764: ef f0 1f f0 jal -256 -80000734 vx_thread_gid: -80000734: 73 25 20 02 csrr a0, 34 -80000738: 67 80 00 00 ret +80000768 __libc_fini_array: +80000768: 13 01 01 ff addi sp, sp, -16 +8000076c: 23 24 81 00 sw s0, 8(sp) +80000770: b7 27 00 80 lui a5, 524290 +80000774: 37 24 00 80 lui s0, 524290 +80000778: 13 04 84 af addi s0, s0, -1288 +8000077c: 93 87 87 af addi a5, a5, -1288 +80000780: b3 87 87 40 sub a5, a5, s0 +80000784: 23 22 91 00 sw s1, 4(sp) +80000788: 23 26 11 00 sw ra, 12(sp) +8000078c: 93 d4 27 40 srai s1, a5, 2 +80000790: 63 80 04 02 beqz s1, 32 +80000794: 93 87 c7 ff addi a5, a5, -4 +80000798: 33 84 87 00 add s0, a5, s0 +8000079c: 83 27 04 00 lw a5, 0(s0) +800007a0: 93 84 f4 ff addi s1, s1, -1 +800007a4: 13 04 c4 ff addi s0, s0, -4 +800007a8: e7 80 07 00 jalr a5 +800007ac: e3 98 04 fe bnez s1, -16 +800007b0: 83 20 c1 00 lw ra, 12(sp) +800007b4: 03 24 81 00 lw s0, 8(sp) +800007b8: 83 24 41 00 lw s1, 4(sp) +800007bc: 13 01 01 01 addi sp, sp, 16 +800007c0: 67 80 00 00 ret -8000073c vx_core_id: -8000073c: 73 25 40 02 csrr a0, 36 -80000740: 67 80 00 00 ret +800007c4 __libc_init_array: +800007c4: 13 01 01 ff addi sp, sp, -16 +800007c8: 23 24 81 00 sw s0, 8(sp) +800007cc: 23 20 21 01 sw s2, 0(sp) +800007d0: 37 24 00 80 lui s0, 524290 +800007d4: 37 29 00 80 lui s2, 524290 +800007d8: 93 07 44 af addi a5, s0, -1292 +800007dc: 13 09 49 af addi s2, s2, -1292 +800007e0: 33 09 f9 40 sub s2, s2, a5 +800007e4: 23 26 11 00 sw ra, 12(sp) +800007e8: 23 22 91 00 sw s1, 4(sp) +800007ec: 13 59 29 40 srai s2, s2, 2 +800007f0: 63 00 09 02 beqz s2, 32 +800007f4: 13 04 44 af addi s0, s0, -1292 +800007f8: 93 04 00 00 mv s1, zero +800007fc: 83 27 04 00 lw a5, 0(s0) +80000800: 93 84 14 00 addi s1, s1, 1 +80000804: 13 04 44 00 addi s0, s0, 4 +80000808: e7 80 07 00 jalr a5 +8000080c: e3 18 99 fe bne s2, s1, -16 +80000810: 37 24 00 80 lui s0, 524290 +80000814: 37 29 00 80 lui s2, 524290 +80000818: 93 07 44 af addi a5, s0, -1292 +8000081c: 13 09 89 af addi s2, s2, -1288 +80000820: 33 09 f9 40 sub s2, s2, a5 +80000824: 13 59 29 40 srai s2, s2, 2 +80000828: 63 00 09 02 beqz s2, 32 +8000082c: 13 04 44 af addi s0, s0, -1292 +80000830: 93 04 00 00 mv s1, zero +80000834: 83 27 04 00 lw a5, 0(s0) +80000838: 93 84 14 00 addi s1, s1, 1 +8000083c: 13 04 44 00 addi s0, s0, 4 +80000840: e7 80 07 00 jalr a5 +80000844: e3 18 99 fe bne s2, s1, -16 +80000848: 83 20 c1 00 lw ra, 12(sp) +8000084c: 03 24 81 00 lw s0, 8(sp) +80000850: 83 24 41 00 lw s1, 4(sp) +80000854: 03 29 01 00 lw s2, 0(sp) +80000858: 13 01 01 01 addi sp, sp, 16 +8000085c: 67 80 00 00 ret -80000744 vx_num_threads: -80000744: 73 25 50 02 csrr a0, 37 -80000748: 67 80 00 00 ret +80000860 memset: +80000860: 13 03 f0 00 addi t1, zero, 15 +80000864: 13 07 05 00 mv a4, a0 +80000868: 63 7e c3 02 bgeu t1, a2, 60 +8000086c: 93 77 f7 00 andi a5, a4, 15 +80000870: 63 90 07 0a bnez a5, 160 +80000874: 63 92 05 08 bnez a1, 132 +80000878: 93 76 06 ff andi a3, a2, -16 +8000087c: 13 76 f6 00 andi a2, a2, 15 +80000880: b3 86 e6 00 add a3, a3, a4 +80000884: 23 20 b7 00 sw a1, 0(a4) +80000888: 23 22 b7 00 sw a1, 4(a4) +8000088c: 23 24 b7 00 sw a1, 8(a4) +80000890: 23 26 b7 00 sw a1, 12(a4) +80000894: 13 07 07 01 addi a4, a4, 16 +80000898: e3 66 d7 fe bltu a4, a3, -20 +8000089c: 63 14 06 00 bnez a2, 8 +800008a0: 67 80 00 00 ret +800008a4: b3 06 c3 40 sub a3, t1, a2 +800008a8: 93 96 26 00 slli a3, a3, 2 +800008ac: 97 02 00 00 auipc t0, 0 +800008b0: b3 86 56 00 add a3, a3, t0 +800008b4: 67 80 c6 00 jr 12(a3) +800008b8: 23 07 b7 00 sb a1, 14(a4) +800008bc: a3 06 b7 00 sb a1, 13(a4) +800008c0: 23 06 b7 00 sb a1, 12(a4) +800008c4: a3 05 b7 00 sb a1, 11(a4) +800008c8: 23 05 b7 00 sb a1, 10(a4) +800008cc: a3 04 b7 00 sb a1, 9(a4) +800008d0: 23 04 b7 00 sb a1, 8(a4) +800008d4: a3 03 b7 00 sb a1, 7(a4) +800008d8: 23 03 b7 00 sb a1, 6(a4) +800008dc: a3 02 b7 00 sb a1, 5(a4) +800008e0: 23 02 b7 00 sb a1, 4(a4) +800008e4: a3 01 b7 00 sb a1, 3(a4) +800008e8: 23 01 b7 00 sb a1, 2(a4) +800008ec: a3 00 b7 00 sb a1, 1(a4) +800008f0: 23 00 b7 00 sb a1, 0(a4) +800008f4: 67 80 00 00 ret +800008f8: 93 f5 f5 0f andi a1, a1, 255 +800008fc: 93 96 85 00 slli a3, a1, 8 +80000900: b3 e5 d5 00 or a1, a1, a3 +80000904: 93 96 05 01 slli a3, a1, 16 +80000908: b3 e5 d5 00 or a1, a1, a3 +8000090c: 6f f0 df f6 j -148 +80000910: 93 96 27 00 slli a3, a5, 2 +80000914: 97 02 00 00 auipc t0, 0 +80000918: b3 86 56 00 add a3, a3, t0 +8000091c: 93 82 00 00 mv t0, ra +80000920: e7 80 06 fa jalr -96(a3) +80000924: 93 80 02 00 mv ra, t0 +80000928: 93 87 07 ff addi a5, a5, -16 +8000092c: 33 07 f7 40 sub a4, a4, a5 +80000930: 33 06 f6 00 add a2, a2, a5 +80000934: e3 78 c3 f6 bgeu t1, a2, -144 +80000938: 6f f0 df f3 j -196 -8000074c vx_num_warps: -8000074c: 73 25 60 02 csrr a0, 38 -80000750: 67 80 00 00 ret +8000093c __register_exitproc: +8000093c: 03 a7 81 c2 lw a4, -984(gp) +80000940: 83 27 87 14 lw a5, 328(a4) +80000944: 63 8c 07 04 beqz a5, 88 +80000948: 03 a7 47 00 lw a4, 4(a5) +8000094c: 13 08 f0 01 addi a6, zero, 31 +80000950: 63 4e e8 06 blt a6, a4, 124 +80000954: 13 18 27 00 slli a6, a4, 2 +80000958: 63 06 05 02 beqz a0, 44 +8000095c: 33 83 07 01 add t1, a5, a6 +80000960: 23 24 c3 08 sw a2, 136(t1) +80000964: 83 a8 87 18 lw a7, 392(a5) +80000968: 13 06 10 00 addi a2, zero, 1 +8000096c: 33 16 e6 00 sll a2, a2, a4 +80000970: b3 e8 c8 00 or a7, a7, a2 +80000974: 23 a4 17 19 sw a7, 392(a5) +80000978: 23 24 d3 10 sw a3, 264(t1) +8000097c: 93 06 20 00 addi a3, zero, 2 +80000980: 63 04 d5 02 beq a0, a3, 40 +80000984: 13 07 17 00 addi a4, a4, 1 +80000988: 23 a2 e7 00 sw a4, 4(a5) +8000098c: b3 87 07 01 add a5, a5, a6 +80000990: 23 a4 b7 00 sw a1, 8(a5) +80000994: 13 05 00 00 mv a0, zero +80000998: 67 80 00 00 ret +8000099c: 93 07 c7 14 addi a5, a4, 332 +800009a0: 23 24 f7 14 sw a5, 328(a4) +800009a4: 6f f0 5f fa j -92 +800009a8: 83 a6 c7 18 lw a3, 396(a5) +800009ac: 13 07 17 00 addi a4, a4, 1 +800009b0: 23 a2 e7 00 sw a4, 4(a5) +800009b4: 33 e6 c6 00 or a2, a3, a2 +800009b8: 23 a6 c7 18 sw a2, 396(a5) +800009bc: b3 87 07 01 add a5, a5, a6 +800009c0: 23 a4 b7 00 sw a1, 8(a5) +800009c4: 13 05 00 00 mv a0, zero +800009c8: 67 80 00 00 ret +800009cc: 13 05 f0 ff addi a0, zero, -1 +800009d0: 67 80 00 00 ret -80000754 vx_num_cores: -80000754: 73 25 70 02 csrr a0, 39 -80000758: 67 80 00 00 ret - -8000075c vx_num_cycles: -8000075c: 73 25 00 c0 rdcycle a0 -80000760: 67 80 00 00 ret - -80000764 vx_num_instrs: -80000764: 73 25 20 c0 rdinstret a0 -80000768: 67 80 00 00 ret - -8000076c atexit: -8000076c: 93 05 05 00 mv a1, a0 -80000770: 93 06 00 00 mv a3, zero -80000774: 13 06 00 00 mv a2, zero -80000778: 13 05 00 00 mv a0, zero -8000077c: 6f 00 80 20 j 520 - -80000780 exit: -80000780: 13 01 01 ff addi sp, sp, -16 -80000784: 93 05 00 00 mv a1, zero -80000788: 23 24 81 00 sw s0, 8(sp) -8000078c: 23 26 11 00 sw ra, 12(sp) -80000790: 13 04 05 00 mv s0, a0 -80000794: ef 00 80 28 jal 648 -80000798: 03 a5 81 c2 lw a0, -984(gp) -8000079c: 83 27 c5 03 lw a5, 60(a0) -800007a0: 63 84 07 00 beqz a5, 8 -800007a4: e7 80 07 00 jalr a5 -800007a8: 13 05 04 00 mv a0, s0 -800007ac: ef f0 1f f0 jal -256 - -800007b0 __libc_fini_array: -800007b0: 13 01 01 ff addi sp, sp, -16 -800007b4: 23 24 81 00 sw s0, 8(sp) -800007b8: b7 27 00 80 lui a5, 524290 -800007bc: 37 24 00 80 lui s0, 524290 -800007c0: 13 04 04 b4 addi s0, s0, -1216 -800007c4: 93 87 07 b4 addi a5, a5, -1216 -800007c8: b3 87 87 40 sub a5, a5, s0 -800007cc: 23 22 91 00 sw s1, 4(sp) -800007d0: 23 26 11 00 sw ra, 12(sp) -800007d4: 93 d4 27 40 srai s1, a5, 2 -800007d8: 63 80 04 02 beqz s1, 32 -800007dc: 93 87 c7 ff addi a5, a5, -4 -800007e0: 33 84 87 00 add s0, a5, s0 -800007e4: 83 27 04 00 lw a5, 0(s0) -800007e8: 93 84 f4 ff addi s1, s1, -1 -800007ec: 13 04 c4 ff addi s0, s0, -4 -800007f0: e7 80 07 00 jalr a5 -800007f4: e3 98 04 fe bnez s1, -16 -800007f8: 83 20 c1 00 lw ra, 12(sp) -800007fc: 03 24 81 00 lw s0, 8(sp) -80000800: 83 24 41 00 lw s1, 4(sp) -80000804: 13 01 01 01 addi sp, sp, 16 -80000808: 67 80 00 00 ret - -8000080c __libc_init_array: -8000080c: 13 01 01 ff addi sp, sp, -16 -80000810: 23 24 81 00 sw s0, 8(sp) -80000814: 23 20 21 01 sw s2, 0(sp) -80000818: 37 24 00 80 lui s0, 524290 -8000081c: 37 29 00 80 lui s2, 524290 -80000820: 93 07 c4 b3 addi a5, s0, -1220 -80000824: 13 09 c9 b3 addi s2, s2, -1220 -80000828: 33 09 f9 40 sub s2, s2, a5 -8000082c: 23 26 11 00 sw ra, 12(sp) -80000830: 23 22 91 00 sw s1, 4(sp) -80000834: 13 59 29 40 srai s2, s2, 2 -80000838: 63 00 09 02 beqz s2, 32 -8000083c: 13 04 c4 b3 addi s0, s0, -1220 -80000840: 93 04 00 00 mv s1, zero -80000844: 83 27 04 00 lw a5, 0(s0) -80000848: 93 84 14 00 addi s1, s1, 1 -8000084c: 13 04 44 00 addi s0, s0, 4 -80000850: e7 80 07 00 jalr a5 -80000854: e3 18 99 fe bne s2, s1, -16 -80000858: 37 24 00 80 lui s0, 524290 -8000085c: 37 29 00 80 lui s2, 524290 -80000860: 93 07 c4 b3 addi a5, s0, -1220 -80000864: 13 09 09 b4 addi s2, s2, -1216 -80000868: 33 09 f9 40 sub s2, s2, a5 -8000086c: 13 59 29 40 srai s2, s2, 2 -80000870: 63 00 09 02 beqz s2, 32 -80000874: 13 04 c4 b3 addi s0, s0, -1220 -80000878: 93 04 00 00 mv s1, zero -8000087c: 83 27 04 00 lw a5, 0(s0) -80000880: 93 84 14 00 addi s1, s1, 1 -80000884: 13 04 44 00 addi s0, s0, 4 -80000888: e7 80 07 00 jalr a5 -8000088c: e3 18 99 fe bne s2, s1, -16 -80000890: 83 20 c1 00 lw ra, 12(sp) -80000894: 03 24 81 00 lw s0, 8(sp) -80000898: 83 24 41 00 lw s1, 4(sp) -8000089c: 03 29 01 00 lw s2, 0(sp) -800008a0: 13 01 01 01 addi sp, sp, 16 -800008a4: 67 80 00 00 ret - -800008a8 memset: -800008a8: 13 03 f0 00 addi t1, zero, 15 -800008ac: 13 07 05 00 mv a4, a0 -800008b0: 63 7e c3 02 bgeu t1, a2, 60 -800008b4: 93 77 f7 00 andi a5, a4, 15 -800008b8: 63 90 07 0a bnez a5, 160 -800008bc: 63 92 05 08 bnez a1, 132 -800008c0: 93 76 06 ff andi a3, a2, -16 -800008c4: 13 76 f6 00 andi a2, a2, 15 -800008c8: b3 86 e6 00 add a3, a3, a4 -800008cc: 23 20 b7 00 sw a1, 0(a4) -800008d0: 23 22 b7 00 sw a1, 4(a4) -800008d4: 23 24 b7 00 sw a1, 8(a4) -800008d8: 23 26 b7 00 sw a1, 12(a4) -800008dc: 13 07 07 01 addi a4, a4, 16 -800008e0: e3 66 d7 fe bltu a4, a3, -20 -800008e4: 63 14 06 00 bnez a2, 8 -800008e8: 67 80 00 00 ret -800008ec: b3 06 c3 40 sub a3, t1, a2 -800008f0: 93 96 26 00 slli a3, a3, 2 -800008f4: 97 02 00 00 auipc t0, 0 -800008f8: b3 86 56 00 add a3, a3, t0 -800008fc: 67 80 c6 00 jr 12(a3) -80000900: 23 07 b7 00 sb a1, 14(a4) -80000904: a3 06 b7 00 sb a1, 13(a4) -80000908: 23 06 b7 00 sb a1, 12(a4) -8000090c: a3 05 b7 00 sb a1, 11(a4) -80000910: 23 05 b7 00 sb a1, 10(a4) -80000914: a3 04 b7 00 sb a1, 9(a4) -80000918: 23 04 b7 00 sb a1, 8(a4) -8000091c: a3 03 b7 00 sb a1, 7(a4) -80000920: 23 03 b7 00 sb a1, 6(a4) -80000924: a3 02 b7 00 sb a1, 5(a4) -80000928: 23 02 b7 00 sb a1, 4(a4) -8000092c: a3 01 b7 00 sb a1, 3(a4) -80000930: 23 01 b7 00 sb a1, 2(a4) -80000934: a3 00 b7 00 sb a1, 1(a4) -80000938: 23 00 b7 00 sb a1, 0(a4) -8000093c: 67 80 00 00 ret -80000940: 93 f5 f5 0f andi a1, a1, 255 -80000944: 93 96 85 00 slli a3, a1, 8 -80000948: b3 e5 d5 00 or a1, a1, a3 -8000094c: 93 96 05 01 slli a3, a1, 16 -80000950: b3 e5 d5 00 or a1, a1, a3 -80000954: 6f f0 df f6 j -148 -80000958: 93 96 27 00 slli a3, a5, 2 -8000095c: 97 02 00 00 auipc t0, 0 -80000960: b3 86 56 00 add a3, a3, t0 -80000964: 93 82 00 00 mv t0, ra -80000968: e7 80 06 fa jalr -96(a3) -8000096c: 93 80 02 00 mv ra, t0 -80000970: 93 87 07 ff addi a5, a5, -16 -80000974: 33 07 f7 40 sub a4, a4, a5 -80000978: 33 06 f6 00 add a2, a2, a5 -8000097c: e3 78 c3 f6 bgeu t1, a2, -144 -80000980: 6f f0 df f3 j -196 - -80000984 __register_exitproc: -80000984: 03 a7 81 c2 lw a4, -984(gp) -80000988: 83 27 87 14 lw a5, 328(a4) -8000098c: 63 8c 07 04 beqz a5, 88 -80000990: 03 a7 47 00 lw a4, 4(a5) -80000994: 13 08 f0 01 addi a6, zero, 31 -80000998: 63 4e e8 06 blt a6, a4, 124 -8000099c: 13 18 27 00 slli a6, a4, 2 -800009a0: 63 06 05 02 beqz a0, 44 -800009a4: 33 83 07 01 add t1, a5, a6 -800009a8: 23 24 c3 08 sw a2, 136(t1) -800009ac: 83 a8 87 18 lw a7, 392(a5) -800009b0: 13 06 10 00 addi a2, zero, 1 -800009b4: 33 16 e6 00 sll a2, a2, a4 -800009b8: b3 e8 c8 00 or a7, a7, a2 -800009bc: 23 a4 17 19 sw a7, 392(a5) -800009c0: 23 24 d3 10 sw a3, 264(t1) -800009c4: 93 06 20 00 addi a3, zero, 2 -800009c8: 63 04 d5 02 beq a0, a3, 40 -800009cc: 13 07 17 00 addi a4, a4, 1 -800009d0: 23 a2 e7 00 sw a4, 4(a5) -800009d4: b3 87 07 01 add a5, a5, a6 -800009d8: 23 a4 b7 00 sw a1, 8(a5) -800009dc: 13 05 00 00 mv a0, zero -800009e0: 67 80 00 00 ret -800009e4: 93 07 c7 14 addi a5, a4, 332 -800009e8: 23 24 f7 14 sw a5, 328(a4) -800009ec: 6f f0 5f fa j -92 -800009f0: 83 a6 c7 18 lw a3, 396(a5) -800009f4: 13 07 17 00 addi a4, a4, 1 -800009f8: 23 a2 e7 00 sw a4, 4(a5) -800009fc: 33 e6 c6 00 or a2, a3, a2 -80000a00: 23 a6 c7 18 sw a2, 396(a5) -80000a04: b3 87 07 01 add a5, a5, a6 -80000a08: 23 a4 b7 00 sw a1, 8(a5) -80000a0c: 13 05 00 00 mv a0, zero -80000a10: 67 80 00 00 ret -80000a14: 13 05 f0 ff addi a0, zero, -1 -80000a18: 67 80 00 00 ret - -80000a1c __call_exitprocs: -80000a1c: 13 01 01 fd addi sp, sp, -48 -80000a20: 23 2c 41 01 sw s4, 24(sp) -80000a24: 03 aa 81 c2 lw s4, -984(gp) -80000a28: 23 20 21 03 sw s2, 32(sp) -80000a2c: 23 26 11 02 sw ra, 44(sp) -80000a30: 03 29 8a 14 lw s2, 328(s4) -80000a34: 23 24 81 02 sw s0, 40(sp) -80000a38: 23 22 91 02 sw s1, 36(sp) -80000a3c: 23 2e 31 01 sw s3, 28(sp) -80000a40: 23 2a 51 01 sw s5, 20(sp) -80000a44: 23 28 61 01 sw s6, 16(sp) -80000a48: 23 26 71 01 sw s7, 12(sp) -80000a4c: 23 24 81 01 sw s8, 8(sp) -80000a50: 63 00 09 04 beqz s2, 64 -80000a54: 13 0b 05 00 mv s6, a0 -80000a58: 93 8b 05 00 mv s7, a1 -80000a5c: 93 0a 10 00 addi s5, zero, 1 -80000a60: 93 09 f0 ff addi s3, zero, -1 -80000a64: 83 24 49 00 lw s1, 4(s2) -80000a68: 13 84 f4 ff addi s0, s1, -1 -80000a6c: 63 42 04 02 bltz s0, 36 -80000a70: 93 94 24 00 slli s1, s1, 2 -80000a74: b3 04 99 00 add s1, s2, s1 -80000a78: 63 84 0b 04 beqz s7, 72 -80000a7c: 83 a7 44 10 lw a5, 260(s1) -80000a80: 63 80 77 05 beq a5, s7, 64 -80000a84: 13 04 f4 ff addi s0, s0, -1 -80000a88: 93 84 c4 ff addi s1, s1, -4 -80000a8c: e3 16 34 ff bne s0, s3, -20 -80000a90: 83 20 c1 02 lw ra, 44(sp) -80000a94: 03 24 81 02 lw s0, 40(sp) -80000a98: 83 24 41 02 lw s1, 36(sp) -80000a9c: 03 29 01 02 lw s2, 32(sp) -80000aa0: 83 29 c1 01 lw s3, 28(sp) -80000aa4: 03 2a 81 01 lw s4, 24(sp) -80000aa8: 83 2a 41 01 lw s5, 20(sp) -80000aac: 03 2b 01 01 lw s6, 16(sp) -80000ab0: 83 2b c1 00 lw s7, 12(sp) -80000ab4: 03 2c 81 00 lw s8, 8(sp) -80000ab8: 13 01 01 03 addi sp, sp, 48 -80000abc: 67 80 00 00 ret -80000ac0: 83 27 49 00 lw a5, 4(s2) -80000ac4: 83 a6 44 00 lw a3, 4(s1) -80000ac8: 93 87 f7 ff addi a5, a5, -1 -80000acc: 63 8e 87 04 beq a5, s0, 92 -80000ad0: 23 a2 04 00 sw zero, 4(s1) -80000ad4: e3 88 06 fa beqz a3, -80 -80000ad8: 83 27 89 18 lw a5, 392(s2) -80000adc: 33 97 8a 00 sll a4, s5, s0 -80000ae0: 03 2c 49 00 lw s8, 4(s2) -80000ae4: b3 77 f7 00 and a5, a4, a5 -80000ae8: 63 92 07 02 bnez a5, 36 +800009d4 __call_exitprocs: +800009d4: 13 01 01 fd addi sp, sp, -48 +800009d8: 23 2c 41 01 sw s4, 24(sp) +800009dc: 03 aa 81 c2 lw s4, -984(gp) +800009e0: 23 20 21 03 sw s2, 32(sp) +800009e4: 23 26 11 02 sw ra, 44(sp) +800009e8: 03 29 8a 14 lw s2, 328(s4) +800009ec: 23 24 81 02 sw s0, 40(sp) +800009f0: 23 22 91 02 sw s1, 36(sp) +800009f4: 23 2e 31 01 sw s3, 28(sp) +800009f8: 23 2a 51 01 sw s5, 20(sp) +800009fc: 23 28 61 01 sw s6, 16(sp) +80000a00: 23 26 71 01 sw s7, 12(sp) +80000a04: 23 24 81 01 sw s8, 8(sp) +80000a08: 63 00 09 04 beqz s2, 64 +80000a0c: 13 0b 05 00 mv s6, a0 +80000a10: 93 8b 05 00 mv s7, a1 +80000a14: 93 0a 10 00 addi s5, zero, 1 +80000a18: 93 09 f0 ff addi s3, zero, -1 +80000a1c: 83 24 49 00 lw s1, 4(s2) +80000a20: 13 84 f4 ff addi s0, s1, -1 +80000a24: 63 42 04 02 bltz s0, 36 +80000a28: 93 94 24 00 slli s1, s1, 2 +80000a2c: b3 04 99 00 add s1, s2, s1 +80000a30: 63 84 0b 04 beqz s7, 72 +80000a34: 83 a7 44 10 lw a5, 260(s1) +80000a38: 63 80 77 05 beq a5, s7, 64 +80000a3c: 13 04 f4 ff addi s0, s0, -1 +80000a40: 93 84 c4 ff addi s1, s1, -4 +80000a44: e3 16 34 ff bne s0, s3, -20 +80000a48: 83 20 c1 02 lw ra, 44(sp) +80000a4c: 03 24 81 02 lw s0, 40(sp) +80000a50: 83 24 41 02 lw s1, 36(sp) +80000a54: 03 29 01 02 lw s2, 32(sp) +80000a58: 83 29 c1 01 lw s3, 28(sp) +80000a5c: 03 2a 81 01 lw s4, 24(sp) +80000a60: 83 2a 41 01 lw s5, 20(sp) +80000a64: 03 2b 01 01 lw s6, 16(sp) +80000a68: 83 2b c1 00 lw s7, 12(sp) +80000a6c: 03 2c 81 00 lw s8, 8(sp) +80000a70: 13 01 01 03 addi sp, sp, 48 +80000a74: 67 80 00 00 ret +80000a78: 83 27 49 00 lw a5, 4(s2) +80000a7c: 83 a6 44 00 lw a3, 4(s1) +80000a80: 93 87 f7 ff addi a5, a5, -1 +80000a84: 63 8e 87 04 beq a5, s0, 92 +80000a88: 23 a2 04 00 sw zero, 4(s1) +80000a8c: e3 88 06 fa beqz a3, -80 +80000a90: 83 27 89 18 lw a5, 392(s2) +80000a94: 33 97 8a 00 sll a4, s5, s0 +80000a98: 03 2c 49 00 lw s8, 4(s2) +80000a9c: b3 77 f7 00 and a5, a4, a5 +80000aa0: 63 92 07 02 bnez a5, 36 +80000aa4: e7 80 06 00 jalr a3 +80000aa8: 03 27 49 00 lw a4, 4(s2) +80000aac: 83 27 8a 14 lw a5, 328(s4) +80000ab0: 63 14 87 01 bne a4, s8, 8 +80000ab4: e3 04 f9 f8 beq s2, a5, -120 +80000ab8: e3 88 07 f8 beqz a5, -112 +80000abc: 13 89 07 00 mv s2, a5 +80000ac0: 6f f0 df f5 j -164 +80000ac4: 83 27 c9 18 lw a5, 396(s2) +80000ac8: 83 a5 44 08 lw a1, 132(s1) +80000acc: 33 77 f7 00 and a4, a4, a5 +80000ad0: 63 1c 07 00 bnez a4, 24 +80000ad4: 13 05 0b 00 mv a0, s6 +80000ad8: e7 80 06 00 jalr a3 +80000adc: 6f f0 df fc j -52 +80000ae0: 23 22 89 00 sw s0, 4(s2) +80000ae4: 6f f0 9f fa j -88 +80000ae8: 13 85 05 00 mv a0, a1 80000aec: e7 80 06 00 jalr a3 -80000af0: 03 27 49 00 lw a4, 4(s2) -80000af4: 83 27 8a 14 lw a5, 328(s4) -80000af8: 63 14 87 01 bne a4, s8, 8 -80000afc: e3 04 f9 f8 beq s2, a5, -120 -80000b00: e3 88 07 f8 beqz a5, -112 -80000b04: 13 89 07 00 mv s2, a5 -80000b08: 6f f0 df f5 j -164 -80000b0c: 83 27 c9 18 lw a5, 396(s2) -80000b10: 83 a5 44 08 lw a1, 132(s1) -80000b14: 33 77 f7 00 and a4, a4, a5 -80000b18: 63 1c 07 00 bnez a4, 24 -80000b1c: 13 05 0b 00 mv a0, s6 -80000b20: e7 80 06 00 jalr a3 -80000b24: 6f f0 df fc j -52 -80000b28: 23 22 89 00 sw s0, 4(s2) -80000b2c: 6f f0 9f fa j -88 -80000b30: 13 85 05 00 mv a0, a1 -80000b34: e7 80 06 00 jalr a3 -80000b38: 6f f0 9f fb j -72 +80000af0: 6f f0 9f fb j -72 Disassembly of section .init_array: -80001b3c __preinit_array_start: -80001b3c: 48 00 -80001b3e: 00 80 +80001af4 __preinit_array_start: +80001af4: 48 00 +80001af6: 00 80 Disassembly of section .data: -80001b40 impure_data: -80001b40: 00 00 -80001b42: 00 00 -80001b44: 2c 1e -80001b46: 00 80 -80001b48: 94 1e -80001b4a: 00 80 -80001b4c: fc 1e -80001b4e: 00 80 +80001af8 impure_data: +80001af8: 00 00 +80001afa: 00 00 +80001afc: e4 1d +80001afe: 00 80 +80001b00: 4c 1e +80001b02: 00 80 +80001b04: b4 1e +80001b06: 00 80 ... -80001be8: 01 00 -80001bea: 00 00 -80001bec: 00 00 -80001bee: 00 00 -80001bf0: 0e 33 -80001bf2: cd ab -80001bf4: 34 12 -80001bf6: 6d e6 -80001bf8: ec de -80001bfa: 05 00 -80001bfc: 0b 00 00 00 +80001ba0: 01 00 +80001ba2: 00 00 +80001ba4: 00 00 +80001ba6: 00 00 +80001ba8: 0e 33 +80001baa: cd ab +80001bac: 34 12 +80001bae: 6d e6 +80001bb0: ec de +80001bb2: 05 00 +80001bb4: 0b 00 00 00 ... Disassembly of section .sdata: -80001f68 _global_impure_ptr: -80001f68: 40 1b -80001f6a: 00 80 +80001f20 _global_impure_ptr: +80001f20: f8 1a +80001f22: 00 80 Disassembly of section .bss: -80001f6c g_wspawn_args: +80001f24 g_wspawn_args: ... Disassembly of section .comment: @@ -928,28 +910,28 @@ Disassembly of section .symtab: 2c: 03 00 02 00 lb zero, 0(tp) 30: 00 00 32: 00 00 - 34: 3c 1b + 34: f4 1a 36: 00 80 38: 00 00 3a: 00 00 3c: 03 00 03 00 lb zero, 0(t1) 40: 00 00 42: 00 00 - 44: 40 1b + 44: f8 1a 46: 00 80 48: 00 00 4a: 00 00 4c: 03 00 04 00 lb zero, 0(s0) 50: 00 00 52: 00 00 - 54: 68 1f + 54: 20 1f 56: 00 80 58: 00 00 5a: 00 00 5c: 03 00 05 00 lb zero, 0(a0) 60: 00 00 62: 00 00 - 64: 6c 1f + 64: 24 1f 66: 00 80 68: 00 00 6a: 00 00 @@ -965,7 +947,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: f0 06 + a4: a8 06 a6: 00 80 a8: 00 00 aa: 00 00 @@ -1022,7 +1004,7 @@ Disassembly of section .symtab: 14e: f1 ff 150: 85 00 152: 00 00 - 154: 40 1b + 154: f8 1a 156: 00 80 158: 28 04 15a: 00 00 @@ -1033,7 +1015,7 @@ Disassembly of section .symtab: 16e: f1 ff 170: 91 00 172: 00 00 - 174: 40 1b + 174: f8 1a 176: 00 80 178: 00 00 17a: 00 00 @@ -1041,7 +1023,7 @@ Disassembly of section .symtab: 17e: 04 00 180: a2 00 182: 00 00 - 184: 40 1b + 184: f8 1a 186: 00 80 188: 00 00 18a: 00 00 @@ -1049,35 +1031,35 @@ Disassembly of section .symtab: 18e: 04 00 190: b5 00 192: 00 00 - 194: 40 1b + 194: f8 1a 196: 00 80 198: 00 00 19a: 00 00 19c: 00 00 19e: 03 00 c6 00 lb zero, 12(a2) 1a2: 00 00 - 1a4: 3c 1b + 1a4: f4 1a 1a6: 00 80 1a8: 00 00 1aa: 00 00 1ac: 00 00 1ae: 03 00 da 00 lb zero, 13(s4) 1b2: 00 00 - 1b4: 3c 1b + 1b4: f4 1a 1b6: 00 80 1b8: 00 00 1ba: 00 00 1bc: 00 00 1be: 03 00 ed 00 lb zero, 14(s10) 1c2: 00 00 - 1c4: 3c 1b + 1c4: f4 1a 1c6: 00 80 1c8: 00 00 1ca: 00 00 1cc: 00 00 1ce: 03 00 03 01 lb zero, 16(t1) 1d2: 00 00 - 1d4: 40 23 + 1d4: f8 22 1d6: 00 80 1d8: 00 00 1da: 00 00 @@ -1085,7 +1067,7 @@ Disassembly of section .symtab: 1de: f1 ff 1e0: 15 01 1e2: 00 00 - 1e4: 3c 07 + 1e4: f4 06 1e6: 00 80 1e8: 00 00 1ea: 00 00 @@ -1093,7 +1075,7 @@ Disassembly of section .symtab: 1ee: 02 00 1f0: 20 01 1f2: 00 00 - 1f4: f4 06 + 1f4: ac 06 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1103,21 +1085,21 @@ Disassembly of section .symtab: 202: 00 00 204: 60 00 206: 00 80 - 208: 3c 01 + 208: 48 01 20a: 00 00 20c: 12 00 20e: 02 00 210: 40 01 212: 00 00 - 214: 6c 1f + 214: 24 1f 216: 00 80 - 218: 04 00 + 218: 20 00 21a: 00 00 21c: 11 00 21e: 06 00 220: 4e 01 222: 00 00 - 224: fc 06 + 224: b4 06 226: 00 80 228: 00 00 22a: 00 00 @@ -1125,7 +1107,7 @@ Disassembly of section .symtab: 22e: 02 00 230: 55 01 232: 00 00 - 234: 68 1f + 234: 20 1f 236: 00 80 238: 00 00 23a: 00 00 @@ -1133,7 +1115,7 @@ Disassembly of section .symtab: 23e: 05 00 240: 65 01 242: 00 00 - 244: 14 05 + 244: cc 04 246: 00 80 248: d0 00 24a: 00 00 @@ -1141,7 +1123,7 @@ Disassembly of section .symtab: 24e: 02 00 250: 81 01 252: 00 00 - 254: 14 07 + 254: cc 06 256: 00 80 258: 00 00 25a: 00 00 @@ -1149,7 +1131,7 @@ Disassembly of section .symtab: 25e: 02 00 260: 89 01 262: 00 00 - 264: 4c 07 + 264: 04 07 266: 00 80 268: 00 00 26a: 00 00 @@ -1159,12 +1141,12 @@ Disassembly of section .symtab: 272: 00 00 274: 3c 02 276: 00 80 - 278: cc 01 + 278: 84 01 27a: 00 00 27c: 12 00 27e: 02 00 280: a3 01 00 00 sb zero, 3(zero) - 284: 0c 07 + 284: c4 06 286: 00 80 288: 00 00 28a: 00 00 @@ -1172,14 +1154,14 @@ Disassembly of section .symtab: 28e: 02 00 290: ac 01 292: 00 00 - 294: 68 1f + 294: 20 1f 296: 00 80 298: 04 00 29a: 00 00 29c: 11 00 29e: 05 00 2a0: bf 01 00 00 - 2a4: 0c 08 + 2a4: c4 07 2a6: 00 80 2a8: 9c 00 2aa: 00 00 @@ -1187,7 +1169,7 @@ Disassembly of section .symtab: 2ae: 02 00 2b0: d1 01 2b2: 00 00 - 2b4: 44 07 + 2b4: fc 06 2b6: 00 80 2b8: 00 00 2ba: 00 00 @@ -1195,14 +1177,14 @@ Disassembly of section .symtab: 2be: 02 00 2c0: e0 01 2c2: 00 00 - 2c4: 1c 07 + 2c4: d4 06 2c6: 00 80 2c8: 00 00 2ca: 00 00 2cc: 12 00 2ce: 02 00 2d0: eb 01 00 00 - 2d4: 2c 07 + 2d4: e4 06 2d6: 00 80 2d8: 00 00 2da: 00 00 @@ -1210,7 +1192,7 @@ Disassembly of section .symtab: 2de: 02 00 2e0: f8 01 2e2: 00 00 - 2e4: b0 07 + 2e4: 68 07 2e6: 00 80 2e8: 5c 00 2ea: 00 00 @@ -1218,7 +1200,7 @@ Disassembly of section .symtab: 2ee: 02 00 2f0: 0a 02 2f2: 00 00 - 2f4: 38 04 + 2f4: f0 03 2f6: 00 80 2f8: dc 00 2fa: 00 00 @@ -1226,7 +1208,7 @@ Disassembly of section .symtab: 2fe: 02 00 300: 1c 02 302: 00 00 - 304: b4 06 + 304: 6c 06 306: 00 80 308: 00 00 30a: 00 00 @@ -1234,7 +1216,7 @@ Disassembly of section .symtab: 30e: 02 00 310: 26 02 312: 00 00 - 314: 04 07 + 314: bc 06 316: 00 80 318: 00 00 31a: 00 00 @@ -1242,7 +1224,7 @@ Disassembly of section .symtab: 31e: 02 00 320: 31 02 322: 00 00 - 324: 1c 0a + 324: d4 09 326: 00 80 328: 20 01 32a: 00 00 @@ -1257,7 +1239,7 @@ Disassembly of section .symtab: 33e: 01 00 340: 42 02 342: 00 00 - 344: 84 09 + 344: 3c 09 346: 00 80 348: 98 00 34a: 00 00 @@ -1265,7 +1247,7 @@ Disassembly of section .symtab: 34e: 02 00 350: 56 02 352: 00 00 - 354: 70 1f + 354: 44 1f 356: 00 80 358: 00 00 35a: 00 00 @@ -1273,7 +1255,7 @@ Disassembly of section .symtab: 35e: 06 00 360: 62 02 362: 00 00 - 364: 6c 1f + 364: 24 1f 366: 00 80 368: 00 00 36a: 00 00 @@ -1281,7 +1263,7 @@ Disassembly of section .symtab: 36e: 06 00 370: 6e 02 372: 00 00 - 374: a8 08 + 374: 60 08 376: 00 80 378: dc 00 37a: 00 00 @@ -1289,7 +1271,7 @@ Disassembly of section .symtab: 37e: 02 00 380: 75 02 382: 00 00 - 384: 08 04 + 384: c0 03 386: 00 80 388: 30 00 38a: 00 00 @@ -1297,7 +1279,7 @@ Disassembly of section .symtab: 38e: 02 00 390: 7a 02 392: 00 00 - 394: 5c 07 + 394: 14 07 396: 00 80 398: 00 00 39a: 00 00 @@ -1305,14 +1287,14 @@ Disassembly of section .symtab: 39e: 02 00 3a0: 88 02 3a2: 00 00 - 3a4: 6c 07 + 3a4: 24 07 3a6: 00 80 3a8: 14 00 3aa: 00 00 3ac: 12 00 3ae: 02 00 3b0: 8f 02 00 00 - 3b4: 34 07 + 3b4: ec 06 3b6: 00 80 3b8: 00 00 3ba: 00 00 @@ -1320,7 +1302,7 @@ Disassembly of section .symtab: 3be: 02 00 3c0: 9d 02 3c2: 00 00 - 3c4: 54 07 + 3c4: 0c 07 3c6: 00 80 3c8: 00 00 3ca: 00 00 @@ -1328,7 +1310,7 @@ Disassembly of section .symtab: 3ce: 02 00 3d0: aa 02 3d2: 00 00 - 3d4: 24 07 + 3d4: dc 06 3d6: 00 80 3d8: 00 00 3da: 00 00 @@ -1336,14 +1318,14 @@ Disassembly of section .symtab: 3de: 02 00 3e0: b6 02 3e2: 00 00 - 3e4: 9c 01 + 3e4: a8 01 3e6: 00 80 - 3e8: a0 00 + 3e8: 94 00 3ea: 00 00 3ec: 12 00 3ee: 02 00 3f0: cf 02 00 00 fnmadd.s ft5, ft0, ft0, ft0, rne - 3f4: 40 1b + 3f4: f8 1a 3f6: 00 80 3f8: 00 00 3fa: 00 00 @@ -1351,14 +1333,14 @@ Disassembly of section .symtab: 3fe: 04 00 400: de 02 402: 00 00 - 404: e4 05 + 404: 9c 05 406: 00 80 408: c8 00 40a: 00 00 40c: 12 00 40e: 02 00 410: ff 02 00 00 - 414: 6c 1f + 414: 24 1f 416: 00 80 418: 00 00 41a: 00 00 @@ -1366,14 +1348,14 @@ Disassembly of section .symtab: 41e: 05 00 420: 9d 00 422: 00 00 - 424: 70 1f + 424: 44 1f 426: 00 80 428: 00 00 42a: 00 00 42c: 10 00 42e: 06 00 430: 07 03 00 00 - 434: 80 07 + 434: 38 07 436: 00 80 438: 30 00 43a: 00 00 @@ -1381,7 +1363,7 @@ Disassembly of section .symtab: 43e: 02 00 440: 06 03 442: 00 00 - 444: ac 06 + 444: 64 06 446: 00 80 448: 00 00 44a: 00 00 @@ -1389,7 +1371,7 @@ Disassembly of section .symtab: 44e: 02 00 450: 0c 03 452: 00 00 - 454: 64 07 + 454: 1c 07 456: 00 80 458: 00 00 45a: 00 00 @@ -1429,12 +1411,12 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 39 32 - 48: 2d 38 - 4a: 33 2d 38 62 - 4e: 2d 33 - 50: 30 2d - 52: 34 30 + 46: 39 30 + 48: 2d 39 + 4a: 66 2d + 4c: 30 37 + 4e: 2d 65 + 50: 37 2d 35 35 lui s10, 217938 54: 2e 63 56: 00 70 58: 61 72 diff --git a/benchmarks/opencl/guassian/Fan2.dump b/benchmarks/opencl/guassian/Fan2.dump index 61dc6950..ad9d64c3 100644 --- a/benchmarks/opencl/guassian/Fan2.dump +++ b/benchmarks/opencl/guassian/Fan2.dump @@ -1,28 +1,28 @@ -/tmp/pocl_vortex_kernel-b4-ee-53-1b-d0.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-10-5b-82-29-dc.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 15 00 00 auipc a1, 1 -80000004: 93 85 05 a1 addi a1, a1, -1520 +80000004: 93 85 85 9c addi a1, a1, -1592 80000008: 73 25 60 02 csrr a0, 38 8000000c: 6b 10 b5 00 -80000010: ef 00 10 20 jal 2560 +80000010: ef 00 90 1b jal 2488 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 13 85 c1 c2 addi a0, gp, -980 -80000020: 13 86 01 c3 addi a2, gp, -976 +80000020: 13 86 c1 c4 addi a2, gp, -948 80000024: 33 06 a6 40 sub a2, a2, a0 80000028: 93 05 00 00 mv a1, zero -8000002c: ef 00 90 3d jal 3032 +8000002c: ef 00 10 39 jal 2960 80000030: 17 15 00 00 auipc a0, 1 -80000034: 13 05 c5 ad addi a0, a0, -1316 -80000038: ef 00 10 29 jal 2704 -8000003c: ef 00 d0 32 jal 2860 -80000040: ef 00 80 3c jal 968 -80000044: 6f 00 90 29 j 2712 +80000034: 13 05 45 a9 addi a0, a0, -1388 +80000038: ef 00 90 24 jal 2632 +8000003c: ef 00 50 2e jal 2788 +80000040: ef 00 00 38 jal 896 +80000044: 6f 00 10 25 j 2640 Disassembly of section .text: @@ -30,8 +30,8 @@ Disassembly of section .text: 80000048: 93 07 00 00 mv a5, zero 8000004c: 63 88 07 00 beqz a5, 16 80000050: 37 15 00 80 lui a0, 524289 -80000054: 13 05 c5 b0 addi a0, a0, -1268 -80000058: 6f 00 10 27 j 2672 +80000054: 13 05 45 ac addi a0, a0, -1340 +80000058: 6f 00 90 22 j 2600 8000005c: 67 80 00 00 ret 80000060 kernel_spawn_run_warp: @@ -46,116 +46,116 @@ Disassembly of section .text: 80000080: 23 28 61 01 sw s6, 16(sp) 80000084: 23 26 71 01 sw s7, 12(sp) 80000088: 23 24 81 01 sw s8, 8(sp) -8000008c: ef 00 50 21 jal 2580 -80000090: ef 00 90 1c jal 2504 -80000094: ef 00 d0 20 jal 2572 -80000098: 83 a5 c1 c2 lw a1, -980(gp) -8000009c: 83 a5 05 00 lw a1, 0(a1) -800000a0: 83 aa 05 00 lw s5, 0(a1) -800000a4: 03 ab 45 00 lw s6, 4(a1) +8000008c: ef 00 d0 1c jal 2508 +80000090: ef 00 10 18 jal 2432 +80000094: ef 00 d0 1b jal 2492 +80000098: 93 04 05 00 mv s1, a0 +8000009c: ef 00 50 19 jal 2452 +800000a0: 93 09 05 00 mv s3, a0 +800000a4: ef 00 d0 19 jal 2460 800000a8: 13 09 05 00 mv s2, a0 -800000ac: ef 00 d0 1c jal 2508 -800000b0: 93 09 05 00 mv s3, a0 -800000b4: ef 00 50 1d jal 2516 -800000b8: 03 a8 c1 c2 lw a6, -980(gp) -800000bc: 03 27 48 01 lw a4, 20(a6) -800000c0: 83 25 08 01 lw a1, 16(a6) -800000c4: 93 06 07 00 mv a3, a4 -800000c8: 63 44 37 01 blt a4, s3, 8 -800000cc: 93 86 09 00 mv a3, s3 -800000d0: 33 a7 e9 00 slt a4, s3, a4 -800000d4: 33 87 e5 00 add a4, a1, a4 -800000d8: 93 07 10 00 addi a5, zero, 1 -800000dc: 63 46 f7 08 blt a4, a5, 140 -800000e0: 33 0a 5b 03 mul s4, s6, s5 -800000e4: 83 27 c8 00 lw a5, 12(a6) -800000e8: 13 0c f7 ff addi s8, a4, -1 -800000ec: b3 85 b9 02 mul a1, s3, a1 -800000f0: b3 85 b6 00 add a1, a3, a1 -800000f4: b3 05 b9 02 mul a1, s2, a1 -800000f8: b3 85 b7 00 add a1, a5, a1 -800000fc: 33 05 e5 02 mul a0, a0, a4 -80000100: b3 84 a5 00 add s1, a1, a0 -80000104: 33 09 60 41 neg s2, s6 -80000108: 33 0b 40 41 neg s6, s4 -8000010c: 33 c7 44 03 div a4, s1, s4 -80000110: 83 25 08 00 lw a1, 0(a6) -80000114: 33 05 eb 02 mul a0, s6, a4 -80000118: 33 85 a4 00 add a0, s1, a0 -8000011c: b3 46 55 03 div a3, a0, s5 -80000120: 03 a5 c5 00 lw a0, 12(a1) -80000124: b3 07 e9 02 mul a5, s2, a4 -80000128: b3 87 d7 40 sub a5, a5, a3 -8000012c: b3 87 fa 02 mul a5, s5, a5 -80000130: b3 08 f5 00 add a7, a0, a5 -80000134: 03 a4 05 01 lw s0, 16(a1) -80000138: 03 a6 45 01 lw a2, 20(a1) -8000013c: 83 27 48 00 lw a5, 4(a6) -80000140: 03 25 88 00 lw a0, 8(a6) -80000144: b3 06 d4 00 add a3, s0, a3 -80000148: 33 07 e6 00 add a4, a2, a4 -8000014c: 33 86 14 01 add a2, s1, a7 -80000150: e7 80 07 00 jalr a5 -80000154: 63 0a 0c 00 beqz s8, 20 -80000158: 03 a8 c1 c2 lw a6, -980(gp) -8000015c: 13 0c fc ff addi s8, s8, -1 -80000160: 93 84 14 00 addi s1, s1, 1 -80000164: 6f f0 9f fa j -88 -80000168: 13 b5 19 00 seqz a0, s3 -8000016c: 03 2c 81 00 lw s8, 8(sp) -80000170: 83 2b c1 00 lw s7, 12(sp) -80000174: 03 2b 01 01 lw s6, 16(sp) -80000178: 83 2a 41 01 lw s5, 20(sp) -8000017c: 03 2a 81 01 lw s4, 24(sp) -80000180: 83 29 c1 01 lw s3, 28(sp) -80000184: 03 29 01 02 lw s2, 32(sp) -80000188: 83 24 41 02 lw s1, 36(sp) -8000018c: 03 24 81 02 lw s0, 40(sp) -80000190: 83 20 c1 02 lw ra, 44(sp) -80000194: 13 01 01 03 addi sp, sp, 48 -80000198: 6f 00 10 0c j 2240 +800000ac: ef 00 d0 1a jal 2476 +800000b0: 93 85 c1 c2 addi a1, gp, -980 +800000b4: 13 96 24 00 slli a2, s1, 2 +800000b8: b3 05 b6 00 add a1, a2, a1 +800000bc: 03 ab 05 00 lw s6, 0(a1) +800000c0: 83 25 4b 01 lw a1, 20(s6) +800000c4: 03 26 0b 01 lw a2, 16(s6) +800000c8: 93 86 05 00 mv a3, a1 +800000cc: 63 c4 35 01 blt a1, s3, 8 +800000d0: 93 86 09 00 mv a3, s3 +800000d4: b3 a5 b9 00 slt a1, s3, a1 +800000d8: 33 07 b6 00 add a4, a2, a1 +800000dc: 93 05 10 00 addi a1, zero, 1 +800000e0: 63 4a b7 08 blt a4, a1, 148 +800000e4: 83 25 0b 00 lw a1, 0(s6) +800000e8: 83 aa 05 00 lw s5, 0(a1) +800000ec: 83 a7 45 00 lw a5, 4(a1) +800000f0: 83 24 cb 00 lw s1, 12(s6) +800000f4: 33 8a 57 03 mul s4, a5, s5 +800000f8: 13 0c f7 ff addi s8, a4, -1 +800000fc: 33 86 c9 02 mul a2, s3, a2 +80000100: 33 86 c6 00 add a2, a3, a2 +80000104: 33 05 c5 02 mul a0, a0, a2 +80000108: 33 85 a4 00 add a0, s1, a0 +8000010c: 33 06 e9 02 mul a2, s2, a4 +80000110: b3 04 c5 00 add s1, a0, a2 +80000114: 33 09 f0 40 neg s2, a5 +80000118: b3 0b 40 41 neg s7, s4 +8000011c: 33 c6 44 03 div a2, s1, s4 +80000120: 33 85 cb 02 mul a0, s7, a2 +80000124: 33 85 a4 00 add a0, s1, a0 +80000128: b3 46 55 03 div a3, a0, s5 +8000012c: 03 a5 c5 00 lw a0, 12(a1) +80000130: 33 07 c9 02 mul a4, s2, a2 +80000134: 33 07 d7 40 sub a4, a4, a3 +80000138: 33 87 ea 02 mul a4, s5, a4 +8000013c: 33 08 e5 00 add a6, a0, a4 +80000140: 03 a7 05 01 lw a4, 16(a1) +80000144: 03 a4 45 01 lw s0, 20(a1) +80000148: 83 27 4b 00 lw a5, 4(s6) +8000014c: 03 25 8b 00 lw a0, 8(s6) +80000150: b3 06 d7 00 add a3, a4, a3 +80000154: 33 07 c4 00 add a4, s0, a2 +80000158: 33 86 04 01 add a2, s1, a6 +8000015c: e7 80 07 00 jalr a5 +80000160: 63 0a 0c 00 beqz s8, 20 +80000164: 83 25 0b 00 lw a1, 0(s6) +80000168: 13 0c fc ff addi s8, s8, -1 +8000016c: 93 84 14 00 addi s1, s1, 1 +80000170: 6f f0 df fa j -84 +80000174: 13 b5 19 00 seqz a0, s3 +80000178: 03 2c 81 00 lw s8, 8(sp) +8000017c: 83 2b c1 00 lw s7, 12(sp) +80000180: 03 2b 01 01 lw s6, 16(sp) +80000184: 83 2a 41 01 lw s5, 20(sp) +80000188: 03 2a 81 01 lw s4, 24(sp) +8000018c: 83 29 c1 01 lw s3, 28(sp) +80000190: 03 29 01 02 lw s2, 32(sp) +80000194: 83 24 41 02 lw s1, 36(sp) +80000198: 03 24 81 02 lw s0, 40(sp) +8000019c: 83 20 c1 02 lw ra, 44(sp) +800001a0: 13 01 01 03 addi sp, sp, 48 +800001a4: 6f 00 d0 06 j 2156 -8000019c kernel_spawn_run_threads: -8000019c: 13 01 01 fe addi sp, sp, -32 -800001a0: 23 2e 11 00 sw ra, 28(sp) -800001a4: 23 2c 81 00 sw s0, 24(sp) -800001a8: 23 2a 91 00 sw s1, 20(sp) -800001ac: 23 28 21 01 sw s2, 16(sp) -800001b0: 23 26 31 01 sw s3, 12(sp) -800001b4: 93 89 05 00 mv s3, a1 -800001b8: ef 00 10 0a jal 2208 -800001bc: 03 a5 c1 c2 lw a0, -980(gp) -800001c0: 03 25 05 00 lw a0, 0(a0) -800001c4: 83 24 05 00 lw s1, 0(a0) -800001c8: 03 25 45 00 lw a0, 4(a0) -800001cc: 33 04 95 02 mul s0, a0, s1 -800001d0: ef 00 10 0c jal 2240 -800001d4: 33 05 35 01 add a0, a0, s3 -800001d8: 03 a6 c1 c2 lw a2, -980(gp) -800001dc: 33 47 85 02 div a4, a0, s0 -800001e0: b3 05 87 02 mul a1, a4, s0 -800001e4: 33 05 b5 40 sub a0, a0, a1 -800001e8: 83 25 06 00 lw a1, 0(a2) -800001ec: b3 46 95 02 div a3, a0, s1 -800001f0: b3 87 96 02 mul a5, a3, s1 -800001f4: 33 08 f5 40 sub a6, a0, a5 -800001f8: 83 a4 c5 00 lw s1, 12(a1) -800001fc: 03 a4 05 01 lw s0, 16(a1) -80000200: 83 a7 45 01 lw a5, 20(a1) -80000204: 83 28 46 00 lw a7, 4(a2) -80000208: 03 25 86 00 lw a0, 8(a2) -8000020c: 33 06 98 00 add a2, a6, s1 -80000210: b3 06 d4 00 add a3, s0, a3 -80000214: 33 87 e7 00 add a4, a5, a4 -80000218: e7 80 08 00 jalr a7 -8000021c: 13 05 10 00 addi a0, zero, 1 -80000220: 83 29 c1 00 lw s3, 12(sp) -80000224: 03 29 01 01 lw s2, 16(sp) -80000228: 83 24 41 01 lw s1, 20(sp) -8000022c: 03 24 81 01 lw s0, 24(sp) -80000230: 83 20 c1 01 lw ra, 28(sp) -80000234: 13 01 01 02 addi sp, sp, 32 -80000238: 6f 00 10 02 j 2080 +800001a8 kernel_spawn_run_threads: +800001a8: 13 01 01 ff addi sp, sp, -16 +800001ac: 23 26 11 00 sw ra, 12(sp) +800001b0: 23 24 81 00 sw s0, 8(sp) +800001b4: ef 00 d0 05 jal 2140 +800001b8: ef 00 90 09 jal 2200 +800001bc: 13 04 05 00 mv s0, a0 +800001c0: ef 00 90 08 jal 2184 +800001c4: 93 85 c1 c2 addi a1, gp, -980 +800001c8: 13 16 24 00 slli a2, s0, 2 +800001cc: b3 05 b6 00 add a1, a2, a1 +800001d0: 03 a6 05 00 lw a2, 0(a1) +800001d4: 83 25 06 00 lw a1, 0(a2) +800001d8: 83 26 c6 00 lw a3, 12(a2) +800001dc: 03 a7 05 00 lw a4, 0(a1) +800001e0: 83 a7 45 00 lw a5, 4(a1) +800001e4: 33 85 a6 00 add a0, a3, a0 +800001e8: b3 86 e7 02 mul a3, a5, a4 +800001ec: b3 47 d5 02 div a5, a0, a3 +800001f0: b3 86 d7 02 mul a3, a5, a3 +800001f4: 03 a4 c5 00 lw s0, 12(a1) +800001f8: 33 05 d5 40 sub a0, a0, a3 +800001fc: b3 46 e5 02 div a3, a0, a4 +80000200: 33 88 e6 02 mul a6, a3, a4 +80000204: b3 08 a4 00 add a7, s0, a0 +80000208: 03 a7 05 01 lw a4, 16(a1) +8000020c: 03 a4 45 01 lw s0, 20(a1) +80000210: 83 22 46 00 lw t0, 4(a2) +80000214: 03 25 86 00 lw a0, 8(a2) +80000218: 33 86 08 41 sub a2, a7, a6 +8000021c: b3 06 d7 00 add a3, a4, a3 +80000220: 33 07 f4 00 add a4, s0, a5 +80000224: e7 80 02 00 jalr t0 +80000228: 13 05 10 00 addi a0, zero, 1 +8000022c: 03 24 81 00 lw s0, 8(sp) +80000230: 83 20 c1 00 lw ra, 12(sp) +80000234: 13 01 01 01 addi sp, sp, 16 +80000238: 6f 00 80 7d j 2008 8000023c kernel_spawn: 8000023c: 13 01 01 fc addi sp, sp, -64 @@ -167,846 +167,828 @@ Disassembly of section .text: 80000254: 23 24 41 03 sw s4, 40(sp) 80000258: 23 22 51 03 sw s5, 36(sp) 8000025c: 23 20 61 03 sw s6, 32(sp) -80000260: 13 0b 05 00 mv s6, a0 -80000264: 03 25 05 00 lw a0, 0(a0) -80000268: 83 26 4b 00 lw a3, 4(s6) -8000026c: 03 27 8b 00 lw a4, 8(s6) -80000270: 13 09 06 00 mv s2, a2 -80000274: 93 89 05 00 mv s3, a1 -80000278: 33 85 a6 02 mul a0, a3, a0 -8000027c: b3 04 e5 02 mul s1, a0, a4 -80000280: ef 00 10 03 jal 2096 -80000284: 13 04 05 00 mv s0, a0 -80000288: ef 00 10 02 jal 2080 +80000260: 23 2e 71 01 sw s7, 28(sp) +80000264: 23 2c 81 01 sw s8, 24(sp) +80000268: 93 04 05 00 mv s1, a0 +8000026c: 83 2b 05 00 lw s7, 0(a0) +80000270: 03 24 45 00 lw s0, 4(a0) +80000274: 03 2c 85 00 lw s8, 8(a0) +80000278: 13 09 06 00 mv s2, a2 +8000027c: 93 89 05 00 mv s3, a1 +80000280: ef 00 80 7e jal 2024 +80000284: 13 0b 05 00 mv s6, a0 +80000288: ef 00 80 7d jal 2008 8000028c: 13 0a 05 00 mv s4, a0 -80000290: ef 00 10 01 jal 2064 +80000290: ef 00 80 7c jal 1992 80000294: 93 0a 05 00 mv s5, a0 -80000298: ef 00 10 00 jal 2048 -8000029c: b3 85 4a 03 mul a1, s5, s4 -800002a0: 13 06 10 00 addi a2, zero, 1 -800002a4: 63 c8 95 00 blt a1, s1, 16 -800002a8: 63 5a 86 00 bge a2, s0, 20 -800002ac: 63 4c c5 00 blt a0, a2, 24 -800002b0: 6f 00 00 13 j 304 -800002b4: 33 c6 b4 02 div a2, s1, a1 -800002b8: e3 4a 86 fe blt a2, s0, -12 -800002bc: 13 06 04 00 mv a2, s0 -800002c0: 63 50 c5 12 bge a0, a2, 288 -800002c4: 93 06 f4 ff addi a3, s0, -1 -800002c8: b3 c5 c4 02 div a1, s1, a2 -800002cc: 63 0e d5 00 beq a0, a3, 28 -800002d0: 13 06 00 00 mv a2, zero -800002d4: b3 06 b6 00 add a3, a2, a1 -800002d8: 33 c6 56 03 div a2, a3, s5 -800002dc: 13 07 00 00 mv a4, zero -800002e0: 63 50 46 03 bge a2, s4, 32 -800002e4: 6f 00 00 02 j 32 -800002e8: 33 86 c5 02 mul a2, a1, a2 -800002ec: 33 86 c4 40 sub a2, s1, a2 -800002f0: b3 06 b6 00 add a3, a2, a1 -800002f4: 33 c6 56 03 div a2, a3, s5 -800002f8: 13 07 00 00 mv a4, zero -800002fc: 63 44 46 01 blt a2, s4, 8 -80000300: 33 47 46 03 div a4, a2, s4 -80000304: 93 07 00 00 mv a5, zero -80000308: b3 0a 56 03 mul s5, a2, s5 -8000030c: 93 04 10 00 addi s1, zero, 1 -80000310: 63 08 07 00 beqz a4, 16 -80000314: b3 07 47 03 mul a5, a4, s4 -80000318: b3 07 f6 40 sub a5, a2, a5 -8000031c: 93 04 07 00 mv s1, a4 -80000320: 33 84 56 41 sub s0, a3, s5 -80000324: 23 24 61 01 sw s6, 8(sp) -80000328: 23 26 31 01 sw s3, 12(sp) -8000032c: 23 28 21 01 sw s2, 16(sp) -80000330: 33 85 a5 02 mul a0, a1, a0 -80000334: 23 2a a1 00 sw a0, 20(sp) -80000338: 23 2c 91 00 sw s1, 24(sp) -8000033c: 23 2e f1 00 sw a5, 28(sp) -80000340: 93 05 81 00 addi a1, sp, 8 -80000344: 93 06 20 00 addi a3, zero, 2 -80000348: 23 a6 b1 c2 sw a1, -980(gp) -8000034c: 63 40 d6 02 blt a2, a3, 32 -80000350: 63 44 46 01 blt a2, s4, 8 -80000354: 13 06 0a 00 mv a2, s4 -80000358: 37 05 00 80 lui a0, 524288 -8000035c: 93 05 05 06 addi a1, a0, 96 -80000360: 13 05 06 00 mv a0, a2 -80000364: ef 00 c0 6e jal 1772 -80000368: ef f0 9f cf jal -776 -8000036c: 63 0a 04 06 beqz s0, 116 -80000370: 13 05 04 00 mv a0, s0 -80000374: ef 00 40 6e jal 1764 -80000378: 03 a5 c1 c2 lw a0, -980(gp) -8000037c: 03 25 05 00 lw a0, 0(a0) -80000380: 83 24 05 00 lw s1, 0(a0) -80000384: 03 25 45 00 lw a0, 4(a0) -80000388: 33 04 95 02 mul s0, a0, s1 -8000038c: ef 00 40 70 jal 1796 -80000390: 33 05 55 01 add a0, a0, s5 -80000394: 03 a6 c1 c2 lw a2, -980(gp) -80000398: 33 47 85 02 div a4, a0, s0 -8000039c: b3 05 87 02 mul a1, a4, s0 -800003a0: 33 05 b5 40 sub a0, a0, a1 -800003a4: 83 25 06 00 lw a1, 0(a2) -800003a8: b3 46 95 02 div a3, a0, s1 -800003ac: b3 87 96 02 mul a5, a3, s1 -800003b0: 33 08 f5 40 sub a6, a0, a5 -800003b4: 83 a4 c5 00 lw s1, 12(a1) -800003b8: 03 a4 05 01 lw s0, 16(a1) -800003bc: 83 a7 45 01 lw a5, 20(a1) -800003c0: 83 28 46 00 lw a7, 4(a2) -800003c4: 03 25 86 00 lw a0, 8(a2) -800003c8: 33 06 98 00 add a2, a6, s1 -800003cc: b3 06 d4 00 add a3, s0, a3 -800003d0: 33 87 e7 00 add a4, a5, a4 -800003d4: e7 80 08 00 jalr a7 -800003d8: 13 05 10 00 addi a0, zero, 1 -800003dc: ef 00 c0 67 jal 1660 -800003e0: 03 2b 01 02 lw s6, 32(sp) -800003e4: 83 2a 41 02 lw s5, 36(sp) -800003e8: 03 2a 81 02 lw s4, 40(sp) -800003ec: 83 29 c1 02 lw s3, 44(sp) -800003f0: 03 29 01 03 lw s2, 48(sp) -800003f4: 83 24 41 03 lw s1, 52(sp) -800003f8: 03 24 81 03 lw s0, 56(sp) -800003fc: 83 20 c1 03 lw ra, 60(sp) -80000400: 13 01 01 04 addi sp, sp, 64 -80000404: 67 80 00 00 ret +80000298: ef 00 80 7b jal 1976 +8000029c: 93 05 70 00 addi a1, zero, 7 +800002a0: 63 c8 a5 0e blt a1, a0, 240 +800002a4: b3 05 74 03 mul a1, s0, s7 +800002a8: 33 86 85 03 mul a2, a1, s8 +800002ac: b3 85 4a 03 mul a1, s5, s4 +800002b0: 93 06 10 00 addi a3, zero, 1 +800002b4: 63 c8 c5 00 blt a1, a2, 16 +800002b8: 63 da 66 01 bge a3, s6, 20 +800002bc: 63 4c d5 00 blt a0, a3, 24 +800002c0: 6f 00 00 0d j 208 +800002c4: b3 46 b6 02 div a3, a2, a1 +800002c8: e3 ca 66 ff blt a3, s6, -12 +800002cc: 93 06 0b 00 mv a3, s6 +800002d0: 63 50 d5 0c bge a0, a3, 192 +800002d4: 13 07 fb ff addi a4, s6, -1 +800002d8: b3 45 d6 02 div a1, a2, a3 +800002dc: 63 0e e5 00 beq a0, a4, 28 +800002e0: 13 06 00 00 mv a2, zero +800002e4: 33 0b b6 00 add s6, a2, a1 +800002e8: 33 46 5b 03 div a2, s6, s5 +800002ec: 93 06 00 00 mv a3, zero +800002f0: 63 50 46 03 bge a2, s4, 32 +800002f4: 6f 00 00 02 j 32 +800002f8: b3 86 d5 02 mul a3, a1, a3 +800002fc: 33 06 d6 40 sub a2, a2, a3 +80000300: 33 0b b6 00 add s6, a2, a1 +80000304: 33 46 5b 03 div a2, s6, s5 +80000308: 93 06 00 00 mv a3, zero +8000030c: 63 44 46 01 blt a2, s4, 8 +80000310: b3 46 46 03 div a3, a2, s4 +80000314: 13 07 00 00 mv a4, zero +80000318: 93 07 10 00 addi a5, zero, 1 +8000031c: 63 88 06 00 beqz a3, 16 +80000320: 33 87 46 03 mul a4, a3, s4 +80000324: 33 07 e6 40 sub a4, a2, a4 +80000328: 93 87 06 00 mv a5, a3 +8000032c: 33 04 56 03 mul s0, a2, s5 +80000330: 23 20 91 00 sw s1, 0(sp) +80000334: 23 22 31 01 sw s3, 4(sp) +80000338: 23 24 21 01 sw s2, 8(sp) +8000033c: b3 85 a5 02 mul a1, a1, a0 +80000340: 23 26 b1 00 sw a1, 12(sp) +80000344: 23 28 f1 00 sw a5, 16(sp) +80000348: 23 2a e1 00 sw a4, 20(sp) +8000034c: 93 85 c1 c2 addi a1, gp, -980 +80000350: 13 15 25 00 slli a0, a0, 2 +80000354: 33 05 b5 00 add a0, a0, a1 +80000358: 93 05 01 00 mv a1, sp +8000035c: 93 06 20 00 addi a3, zero, 2 +80000360: 23 20 b5 00 sw a1, 0(a0) +80000364: 63 40 d6 02 blt a2, a3, 32 +80000368: 63 44 46 01 blt a2, s4, 8 +8000036c: 13 06 0a 00 mv a2, s4 +80000370: 37 05 00 80 lui a0, 524288 +80000374: 93 05 05 06 addi a1, a0, 96 +80000378: 13 05 06 00 mv a0, a2 +8000037c: ef 00 c0 68 jal 1676 +80000380: ef f0 1f ce jal -800 +80000384: 63 06 8b 00 beq s6, s0, 12 +80000388: 23 26 81 00 sw s0, 12(sp) +8000038c: ef f0 5f cd jal -812 +80000390: 03 2c 81 01 lw s8, 24(sp) +80000394: 83 2b c1 01 lw s7, 28(sp) +80000398: 03 2b 01 02 lw s6, 32(sp) +8000039c: 83 2a 41 02 lw s5, 36(sp) +800003a0: 03 2a 81 02 lw s4, 40(sp) +800003a4: 83 29 c1 02 lw s3, 44(sp) +800003a8: 03 29 01 03 lw s2, 48(sp) +800003ac: 83 24 41 03 lw s1, 52(sp) +800003b0: 03 24 81 03 lw s0, 56(sp) +800003b4: 83 20 c1 03 lw ra, 60(sp) +800003b8: 13 01 01 04 addi sp, sp, 64 +800003bc: 67 80 00 00 ret -80000408 main: -80000408: 13 01 01 ff addi sp, sp, -16 -8000040c: 23 26 11 00 sw ra, 12(sp) -80000410: 37 05 00 80 lui a0, 524288 -80000414: 93 05 45 62 addi a1, a0, 1572 -80000418: 37 05 ff 7f lui a0, 524272 -8000041c: 13 06 45 03 addi a2, a0, 52 -80000420: 37 05 ff 7f lui a0, 524272 -80000424: ef f0 9f e1 jal -488 -80000428: 13 05 00 00 mv a0, zero -8000042c: 83 20 c1 00 lw ra, 12(sp) -80000430: 13 01 01 01 addi sp, sp, 16 -80000434: 67 80 00 00 ret +800003c0 main: +800003c0: 13 01 01 ff addi sp, sp, -16 +800003c4: 23 26 11 00 sw ra, 12(sp) +800003c8: 37 05 00 80 lui a0, 524288 +800003cc: 93 05 c5 5d addi a1, a0, 1500 +800003d0: 37 05 ff 7f lui a0, 524272 +800003d4: 13 06 45 03 addi a2, a0, 52 +800003d8: 37 05 ff 7f lui a0, 524272 +800003dc: ef f0 1f e6 jal -416 +800003e0: 13 05 00 00 mv a0, zero +800003e4: 83 20 c1 00 lw ra, 12(sp) +800003e8: 13 01 01 01 addi sp, sp, 16 +800003ec: 67 80 00 00 ret -80000438 _pocl_kernel_Fan2: -80000438: 13 01 01 fd addi sp, sp, -48 -8000043c: 23 26 11 02 sw ra, 44(sp) -80000440: 23 24 81 02 sw s0, 40(sp) -80000444: 23 22 91 02 sw s1, 36(sp) -80000448: 23 20 21 03 sw s2, 32(sp) -8000044c: 23 2e 31 01 sw s3, 28(sp) -80000450: 23 2c 41 01 sw s4, 24(sp) -80000454: 23 2a 51 01 sw s5, 20(sp) -80000458: 23 28 61 01 sw s6, 16(sp) -8000045c: 23 26 71 01 sw s7, 12(sp) -80000460: 23 24 81 01 sw s8, 8(sp) -80000464: 23 22 91 01 sw s9, 4(sp) -80000468: 23 20 a1 01 sw s10, 0(sp) -8000046c: 13 04 01 03 addi s0, sp, 48 -80000470: 13 71 c1 ff andi sp, sp, -4 -80000474: 93 02 00 00 mv t0, zero -80000478: 03 a3 87 01 lw t1, 24(a5) -8000047c: 83 a3 c7 01 lw t2, 28(a5) -80000480: 03 ae 07 02 lw t3, 32(a5) -80000484: 03 aa 07 01 lw s4, 16(a5) -80000488: 03 a9 c7 00 lw s2, 12(a5) -8000048c: b3 09 03 03 mul s3, t1, a6 -80000490: b3 87 13 03 mul a5, t2, a7 -80000494: 33 08 fa 00 add a6, s4, a5 -80000498: 93 44 f7 ff not s1, a4 -8000049c: b3 8f d4 00 add t6, s1, a3 -800004a0: b3 88 e6 40 sub a7, a3, a4 -800004a4: b3 0e d7 02 mul t4, a4, a3 -800004a8: 93 14 27 00 slli s1, a4, 2 -800004ac: 33 0f 96 00 add t5, a2, s1 -800004b0: b3 04 ea 00 add s1, s4, a4 -800004b4: 33 8a f4 00 add s4, s1, a5 -800004b8: b3 04 e9 00 add s1, s2, a4 -800004bc: b3 84 34 01 add s1, s1, s3 -800004c0: 13 8b 14 00 addi s6, s1, 1 -800004c4: b3 87 66 03 mul a5, a3, s6 -800004c8: b3 04 fa 00 add s1, s4, a5 -800004cc: 93 94 24 00 slli s1, s1, 2 -800004d0: b3 0c 39 01 add s9, s2, s3 -800004d4: 33 89 95 00 add s2, a1, s1 -800004d8: 93 96 26 00 slli a3, a3, 2 -800004dc: b3 07 f7 00 add a5, a4, a5 -800004e0: 93 97 27 00 slli a5, a5, 2 -800004e4: b3 0a f5 00 add s5, a0, a5 -800004e8: 93 17 2b 00 slli a5, s6, 2 -800004ec: 33 0a f6 00 add s4, a2, a5 -800004f0: b3 09 95 00 add s3, a0, s1 -800004f4: 6f 00 c0 00 j 12 -800004f8: 93 82 12 00 addi t0, t0, 1 -800004fc: 63 f6 c2 0f bgeu t0, t3, 236 -80000500: 13 0b 00 00 mv s6, zero -80000504: 93 8b 09 00 mv s7, s3 -80000508: 13 0d 09 00 mv s10, s2 -8000050c: 6f 00 40 01 j 20 -80000510: 13 0b 1b 00 addi s6, s6, 1 -80000514: 13 0d 4d 00 addi s10, s10, 4 -80000518: 93 8b 4b 00 addi s7, s7, 4 -8000051c: e3 7e 7b fc bgeu s6, t2, -36 -80000520: 33 05 68 01 add a0, a6, s6 -80000524: e3 56 15 ff bge a0, a7, -20 -80000528: 33 06 e5 00 add a2, a0, a4 -8000052c: 33 06 d6 01 add a2, a2, t4 -80000530: 13 16 26 00 slli a2, a2, 2 -80000534: 33 8c c5 00 add s8, a1, a2 -80000538: 63 04 05 04 beqz a0, 72 +800003f0 _pocl_kernel_Fan2: +800003f0: 13 01 01 fd addi sp, sp, -48 +800003f4: 23 26 11 02 sw ra, 44(sp) +800003f8: 23 24 81 02 sw s0, 40(sp) +800003fc: 23 22 91 02 sw s1, 36(sp) +80000400: 23 20 21 03 sw s2, 32(sp) +80000404: 23 2e 31 01 sw s3, 28(sp) +80000408: 23 2c 41 01 sw s4, 24(sp) +8000040c: 23 2a 51 01 sw s5, 20(sp) +80000410: 23 28 61 01 sw s6, 16(sp) +80000414: 23 26 71 01 sw s7, 12(sp) +80000418: 23 24 81 01 sw s8, 8(sp) +8000041c: 23 22 91 01 sw s9, 4(sp) +80000420: 23 20 a1 01 sw s10, 0(sp) +80000424: 13 04 01 03 addi s0, sp, 48 +80000428: 13 71 c1 ff andi sp, sp, -4 +8000042c: 93 02 00 00 mv t0, zero +80000430: 03 a3 87 01 lw t1, 24(a5) +80000434: 83 a3 c7 01 lw t2, 28(a5) +80000438: 03 ae 07 02 lw t3, 32(a5) +8000043c: 03 aa 07 01 lw s4, 16(a5) +80000440: 03 a9 c7 00 lw s2, 12(a5) +80000444: b3 09 03 03 mul s3, t1, a6 +80000448: b3 87 13 03 mul a5, t2, a7 +8000044c: 33 08 fa 00 add a6, s4, a5 +80000450: 93 44 f7 ff not s1, a4 +80000454: b3 8f d4 00 add t6, s1, a3 +80000458: b3 88 e6 40 sub a7, a3, a4 +8000045c: b3 0e d7 02 mul t4, a4, a3 +80000460: 93 14 27 00 slli s1, a4, 2 +80000464: 33 0f 96 00 add t5, a2, s1 +80000468: b3 04 ea 00 add s1, s4, a4 +8000046c: 33 8a f4 00 add s4, s1, a5 +80000470: b3 04 e9 00 add s1, s2, a4 +80000474: b3 84 34 01 add s1, s1, s3 +80000478: 13 8b 14 00 addi s6, s1, 1 +8000047c: b3 87 66 03 mul a5, a3, s6 +80000480: b3 04 fa 00 add s1, s4, a5 +80000484: 93 94 24 00 slli s1, s1, 2 +80000488: b3 0c 39 01 add s9, s2, s3 +8000048c: 33 89 95 00 add s2, a1, s1 +80000490: 93 96 26 00 slli a3, a3, 2 +80000494: b3 07 f7 00 add a5, a4, a5 +80000498: 93 97 27 00 slli a5, a5, 2 +8000049c: b3 0a f5 00 add s5, a0, a5 +800004a0: 93 17 2b 00 slli a5, s6, 2 +800004a4: 33 0a f6 00 add s4, a2, a5 +800004a8: b3 09 95 00 add s3, a0, s1 +800004ac: 6f 00 c0 00 j 12 +800004b0: 93 82 12 00 addi t0, t0, 1 +800004b4: 63 f6 c2 0f bgeu t0, t3, 236 +800004b8: 13 0b 00 00 mv s6, zero +800004bc: 93 8b 09 00 mv s7, s3 +800004c0: 13 0d 09 00 mv s10, s2 +800004c4: 6f 00 40 01 j 20 +800004c8: 13 0b 1b 00 addi s6, s6, 1 +800004cc: 13 0d 4d 00 addi s10, s10, 4 +800004d0: 93 8b 4b 00 addi s7, s7, 4 +800004d4: e3 7e 7b fc bgeu s6, t2, -36 +800004d8: 33 05 68 01 add a0, a6, s6 +800004dc: e3 56 15 ff bge a0, a7, -20 +800004e0: 33 06 e5 00 add a2, a0, a4 +800004e4: 33 06 d6 01 add a2, a2, t4 +800004e8: 13 16 26 00 slli a2, a2, 2 +800004ec: 33 8c c5 00 add s8, a1, a2 +800004f0: 63 04 05 04 beqz a0, 72 +800004f4: 13 05 00 00 mv a0, zero +800004f8: 13 06 00 00 mv a2, zero +800004fc: 6f 00 00 01 j 16 +80000500: 13 06 16 00 addi a2, a2, 1 +80000504: 33 05 d5 00 add a0, a0, a3 +80000508: e3 70 66 fc bgeu a2, t1, -64 +8000050c: b3 87 cc 00 add a5, s9, a2 +80000510: e3 d8 f7 ff bge a5, t6, -16 +80000514: b3 87 aa 00 add a5, s5, a0 +80000518: 07 a0 07 00 flw ft0, 0(a5) +8000051c: 87 20 0c 00 flw ft1, 0(s8) +80000520: b3 07 ad 00 add a5, s10, a0 +80000524: 07 a1 07 00 flw ft2, 0(a5) +80000528: 53 70 10 10 fmul.s ft0, ft0, ft1 +8000052c: 53 70 01 08 fsub.s ft0, ft2, ft0 +80000530: 27 a0 07 00 fsw ft0, 0(a5) +80000534: 6f f0 df fc j -52 +80000538: 13 06 00 00 mv a2, zero 8000053c: 13 05 00 00 mv a0, zero -80000540: 13 06 00 00 mv a2, zero -80000544: 6f 00 00 01 j 16 -80000548: 13 06 16 00 addi a2, a2, 1 -8000054c: 33 05 d5 00 add a0, a0, a3 -80000550: e3 70 66 fc bgeu a2, t1, -64 -80000554: b3 87 cc 00 add a5, s9, a2 -80000558: e3 d8 f7 ff bge a5, t6, -16 -8000055c: b3 87 aa 00 add a5, s5, a0 -80000560: 07 a0 07 00 flw ft0, 0(a5) -80000564: 87 20 0c 00 flw ft1, 0(s8) -80000568: b3 07 ad 00 add a5, s10, a0 -8000056c: 07 a1 07 00 flw ft2, 0(a5) -80000570: 53 70 10 10 fmul.s ft0, ft0, ft1 -80000574: 53 70 01 08 fsub.s ft0, ft2, ft0 -80000578: 27 a0 07 00 fsw ft0, 0(a5) -8000057c: 6f f0 df fc j -52 -80000580: 13 06 00 00 mv a2, zero -80000584: 13 05 00 00 mv a0, zero -80000588: 93 07 0a 00 mv a5, s4 -8000058c: 6f 00 40 01 j 20 -80000590: 13 05 15 00 addi a0, a0, 1 -80000594: 93 87 47 00 addi a5, a5, 4 -80000598: 33 06 d6 00 add a2, a2, a3 -8000059c: e3 7a 65 f6 bgeu a0, t1, -140 -800005a0: b3 84 ac 00 add s1, s9, a0 -800005a4: e3 d6 f4 ff bge s1, t6, -20 -800005a8: b3 84 ca 00 add s1, s5, a2 -800005ac: 07 a0 04 00 flw ft0, 0(s1) -800005b0: 87 20 0c 00 flw ft1, 0(s8) -800005b4: b3 04 cd 00 add s1, s10, a2 -800005b8: 07 a1 04 00 flw ft2, 0(s1) -800005bc: 53 70 10 10 fmul.s ft0, ft0, ft1 -800005c0: 53 70 01 08 fsub.s ft0, ft2, ft0 -800005c4: 27 a0 04 00 fsw ft0, 0(s1) -800005c8: b3 84 cb 00 add s1, s7, a2 -800005cc: 07 a0 04 00 flw ft0, 0(s1) -800005d0: 87 20 0f 00 flw ft1, 0(t5) -800005d4: 07 a1 07 00 flw ft2, 0(a5) -800005d8: 53 70 10 10 fmul.s ft0, ft0, ft1 -800005dc: 53 70 01 08 fsub.s ft0, ft2, ft0 -800005e0: 27 a0 07 00 fsw ft0, 0(a5) -800005e4: 6f f0 df fa j -84 -800005e8: 13 01 04 fd addi sp, s0, -48 -800005ec: 03 2d 01 00 lw s10, 0(sp) -800005f0: 83 2c 41 00 lw s9, 4(sp) -800005f4: 03 2c 81 00 lw s8, 8(sp) -800005f8: 83 2b c1 00 lw s7, 12(sp) -800005fc: 03 2b 01 01 lw s6, 16(sp) -80000600: 83 2a 41 01 lw s5, 20(sp) -80000604: 03 2a 81 01 lw s4, 24(sp) -80000608: 83 29 c1 01 lw s3, 28(sp) -8000060c: 03 29 01 02 lw s2, 32(sp) -80000610: 83 24 41 02 lw s1, 36(sp) -80000614: 03 24 81 02 lw s0, 40(sp) -80000618: 83 20 c1 02 lw ra, 44(sp) -8000061c: 13 01 01 03 addi sp, sp, 48 -80000620: 67 80 00 00 ret +80000540: 93 07 0a 00 mv a5, s4 +80000544: 6f 00 40 01 j 20 +80000548: 13 05 15 00 addi a0, a0, 1 +8000054c: 93 87 47 00 addi a5, a5, 4 +80000550: 33 06 d6 00 add a2, a2, a3 +80000554: e3 7a 65 f6 bgeu a0, t1, -140 +80000558: b3 84 ac 00 add s1, s9, a0 +8000055c: e3 d6 f4 ff bge s1, t6, -20 +80000560: b3 84 ca 00 add s1, s5, a2 +80000564: 07 a0 04 00 flw ft0, 0(s1) +80000568: 87 20 0c 00 flw ft1, 0(s8) +8000056c: b3 04 cd 00 add s1, s10, a2 +80000570: 07 a1 04 00 flw ft2, 0(s1) +80000574: 53 70 10 10 fmul.s ft0, ft0, ft1 +80000578: 53 70 01 08 fsub.s ft0, ft2, ft0 +8000057c: 27 a0 04 00 fsw ft0, 0(s1) +80000580: b3 84 cb 00 add s1, s7, a2 +80000584: 07 a0 04 00 flw ft0, 0(s1) +80000588: 87 20 0f 00 flw ft1, 0(t5) +8000058c: 07 a1 07 00 flw ft2, 0(a5) +80000590: 53 70 10 10 fmul.s ft0, ft0, ft1 +80000594: 53 70 01 08 fsub.s ft0, ft2, ft0 +80000598: 27 a0 07 00 fsw ft0, 0(a5) +8000059c: 6f f0 df fa j -84 +800005a0: 13 01 04 fd addi sp, s0, -48 +800005a4: 03 2d 01 00 lw s10, 0(sp) +800005a8: 83 2c 41 00 lw s9, 4(sp) +800005ac: 03 2c 81 00 lw s8, 8(sp) +800005b0: 83 2b c1 00 lw s7, 12(sp) +800005b4: 03 2b 01 01 lw s6, 16(sp) +800005b8: 83 2a 41 01 lw s5, 20(sp) +800005bc: 03 2a 81 01 lw s4, 24(sp) +800005c0: 83 29 c1 01 lw s3, 28(sp) +800005c4: 03 29 01 02 lw s2, 32(sp) +800005c8: 83 24 41 02 lw s1, 36(sp) +800005cc: 03 24 81 02 lw s0, 40(sp) +800005d0: 83 20 c1 02 lw ra, 44(sp) +800005d4: 13 01 01 03 addi sp, sp, 48 +800005d8: 67 80 00 00 ret -80000624 _pocl_kernel_Fan2_workgroup: -80000624: 13 01 01 fd addi sp, sp, -48 -80000628: 23 26 81 02 sw s0, 44(sp) -8000062c: 23 24 91 02 sw s1, 40(sp) -80000630: 23 22 21 03 sw s2, 36(sp) -80000634: 23 20 31 03 sw s3, 32(sp) -80000638: 23 2e 41 01 sw s4, 28(sp) -8000063c: 23 2c 51 01 sw s5, 24(sp) -80000640: 23 2a 61 01 sw s6, 20(sp) -80000644: 23 28 71 01 sw s7, 16(sp) -80000648: 23 26 81 01 sw s8, 12(sp) -8000064c: 23 24 91 01 sw s9, 8(sp) -80000650: 03 27 05 00 lw a4, 0(a0) -80000654: 13 08 00 00 mv a6, zero -80000658: 03 29 07 00 lw s2, 0(a4) -8000065c: 03 27 45 00 lw a4, 4(a0) -80000660: 83 24 85 00 lw s1, 8(a0) -80000664: 03 24 c5 00 lw s0, 12(a0) -80000668: 03 25 05 01 lw a0, 16(a0) -8000066c: 83 28 07 00 lw a7, 0(a4) -80000670: 83 a9 04 00 lw s3, 0(s1) -80000674: 03 24 04 00 lw s0, 0(s0) -80000678: 03 23 05 00 lw t1, 0(a0) -8000067c: 83 ac 85 01 lw s9, 24(a1) -80000680: 03 ae c5 01 lw t3, 28(a1) -80000684: 83 a2 05 02 lw t0, 32(a1) -80000688: 03 a5 05 01 lw a0, 16(a1) -8000068c: 83 ae c5 00 lw t4, 12(a1) -80000690: b3 85 cc 02 mul a1, s9, a2 -80000694: b3 06 de 02 mul a3, t3, a3 -80000698: 33 0f d5 00 add t5, a0, a3 -8000069c: 13 46 f3 ff not a2, t1 -800006a0: 33 06 c4 00 add a2, s0, a2 -800006a4: b3 0f 64 40 sub t6, s0, t1 -800006a8: b3 03 83 02 mul t2, t1, s0 -800006ac: 93 14 23 00 slli s1, t1, 2 -800006b0: 33 8a 99 00 add s4, s3, s1 -800006b4: 33 05 65 00 add a0, a0, t1 -800006b8: 33 05 d5 00 add a0, a0, a3 -800006bc: b3 86 6e 00 add a3, t4, t1 -800006c0: b3 86 b6 00 add a3, a3, a1 -800006c4: 93 86 16 00 addi a3, a3, 1 -800006c8: b3 04 d4 02 mul s1, s0, a3 -800006cc: 33 05 95 00 add a0, a0, s1 -800006d0: 93 17 25 00 slli a5, a0, 2 -800006d4: 33 85 be 00 add a0, t4, a1 -800006d8: b3 8e f8 00 add t4, a7, a5 -800006dc: 93 15 24 00 slli a1, s0, 2 -800006e0: 33 04 93 00 add s0, t1, s1 -800006e4: 13 14 24 00 slli s0, s0, 2 -800006e8: b3 0a 89 00 add s5, s2, s0 -800006ec: 93 96 26 00 slli a3, a3, 2 -800006f0: b3 89 d9 00 add s3, s3, a3 -800006f4: 33 09 f9 00 add s2, s2, a5 -800006f8: 6f 00 c0 00 j 12 -800006fc: 13 08 18 00 addi a6, a6, 1 -80000700: 63 76 58 0e bgeu a6, t0, 236 -80000704: 13 0b 00 00 mv s6, zero -80000708: 93 0b 09 00 mv s7, s2 -8000070c: 93 87 0e 00 mv a5, t4 -80000710: 6f 00 40 01 j 20 -80000714: 13 0b 1b 00 addi s6, s6, 1 -80000718: 93 87 47 00 addi a5, a5, 4 -8000071c: 93 8b 4b 00 addi s7, s7, 4 -80000720: e3 7e cb fd bgeu s6, t3, -36 -80000724: b3 06 6f 01 add a3, t5, s6 -80000728: e3 d6 f6 ff bge a3, t6, -20 -8000072c: 33 84 66 00 add s0, a3, t1 -80000730: 33 04 74 00 add s0, s0, t2 -80000734: 13 14 24 00 slli s0, s0, 2 -80000738: 33 8c 88 00 add s8, a7, s0 -8000073c: 63 84 06 04 beqz a3, 72 -80000740: 93 06 00 00 mv a3, zero -80000744: 13 04 00 00 mv s0, zero -80000748: 6f 00 00 01 j 16 +800005dc _pocl_kernel_Fan2_workgroup: +800005dc: 13 01 01 fd addi sp, sp, -48 +800005e0: 23 26 81 02 sw s0, 44(sp) +800005e4: 23 24 91 02 sw s1, 40(sp) +800005e8: 23 22 21 03 sw s2, 36(sp) +800005ec: 23 20 31 03 sw s3, 32(sp) +800005f0: 23 2e 41 01 sw s4, 28(sp) +800005f4: 23 2c 51 01 sw s5, 24(sp) +800005f8: 23 2a 61 01 sw s6, 20(sp) +800005fc: 23 28 71 01 sw s7, 16(sp) +80000600: 23 26 81 01 sw s8, 12(sp) +80000604: 23 24 91 01 sw s9, 8(sp) +80000608: 03 27 05 00 lw a4, 0(a0) +8000060c: 13 08 00 00 mv a6, zero +80000610: 03 29 07 00 lw s2, 0(a4) +80000614: 03 27 45 00 lw a4, 4(a0) +80000618: 83 24 85 00 lw s1, 8(a0) +8000061c: 03 24 c5 00 lw s0, 12(a0) +80000620: 03 25 05 01 lw a0, 16(a0) +80000624: 83 28 07 00 lw a7, 0(a4) +80000628: 83 a9 04 00 lw s3, 0(s1) +8000062c: 03 24 04 00 lw s0, 0(s0) +80000630: 03 23 05 00 lw t1, 0(a0) +80000634: 83 ac 85 01 lw s9, 24(a1) +80000638: 03 ae c5 01 lw t3, 28(a1) +8000063c: 83 a2 05 02 lw t0, 32(a1) +80000640: 03 a5 05 01 lw a0, 16(a1) +80000644: 83 ae c5 00 lw t4, 12(a1) +80000648: b3 85 cc 02 mul a1, s9, a2 +8000064c: b3 06 de 02 mul a3, t3, a3 +80000650: 33 0f d5 00 add t5, a0, a3 +80000654: 13 46 f3 ff not a2, t1 +80000658: 33 06 c4 00 add a2, s0, a2 +8000065c: b3 0f 64 40 sub t6, s0, t1 +80000660: b3 03 83 02 mul t2, t1, s0 +80000664: 93 14 23 00 slli s1, t1, 2 +80000668: 33 8a 99 00 add s4, s3, s1 +8000066c: 33 05 65 00 add a0, a0, t1 +80000670: 33 05 d5 00 add a0, a0, a3 +80000674: b3 86 6e 00 add a3, t4, t1 +80000678: b3 86 b6 00 add a3, a3, a1 +8000067c: 93 86 16 00 addi a3, a3, 1 +80000680: b3 04 d4 02 mul s1, s0, a3 +80000684: 33 05 95 00 add a0, a0, s1 +80000688: 93 17 25 00 slli a5, a0, 2 +8000068c: 33 85 be 00 add a0, t4, a1 +80000690: b3 8e f8 00 add t4, a7, a5 +80000694: 93 15 24 00 slli a1, s0, 2 +80000698: 33 04 93 00 add s0, t1, s1 +8000069c: 13 14 24 00 slli s0, s0, 2 +800006a0: b3 0a 89 00 add s5, s2, s0 +800006a4: 93 96 26 00 slli a3, a3, 2 +800006a8: b3 89 d9 00 add s3, s3, a3 +800006ac: 33 09 f9 00 add s2, s2, a5 +800006b0: 6f 00 c0 00 j 12 +800006b4: 13 08 18 00 addi a6, a6, 1 +800006b8: 63 76 58 0e bgeu a6, t0, 236 +800006bc: 13 0b 00 00 mv s6, zero +800006c0: 93 0b 09 00 mv s7, s2 +800006c4: 93 87 0e 00 mv a5, t4 +800006c8: 6f 00 40 01 j 20 +800006cc: 13 0b 1b 00 addi s6, s6, 1 +800006d0: 93 87 47 00 addi a5, a5, 4 +800006d4: 93 8b 4b 00 addi s7, s7, 4 +800006d8: e3 7e cb fd bgeu s6, t3, -36 +800006dc: b3 06 6f 01 add a3, t5, s6 +800006e0: e3 d6 f6 ff bge a3, t6, -20 +800006e4: 33 84 66 00 add s0, a3, t1 +800006e8: 33 04 74 00 add s0, s0, t2 +800006ec: 13 14 24 00 slli s0, s0, 2 +800006f0: 33 8c 88 00 add s8, a7, s0 +800006f4: 63 84 06 04 beqz a3, 72 +800006f8: 93 06 00 00 mv a3, zero +800006fc: 13 04 00 00 mv s0, zero +80000700: 6f 00 00 01 j 16 +80000704: 13 04 14 00 addi s0, s0, 1 +80000708: b3 86 b6 00 add a3, a3, a1 +8000070c: e3 70 94 fd bgeu s0, s9, -64 +80000710: 33 07 85 00 add a4, a0, s0 +80000714: e3 58 c7 fe bge a4, a2, -16 +80000718: 33 87 da 00 add a4, s5, a3 +8000071c: 07 20 07 00 flw ft0, 0(a4) +80000720: 87 20 0c 00 flw ft1, 0(s8) +80000724: 33 87 d7 00 add a4, a5, a3 +80000728: 07 21 07 00 flw ft2, 0(a4) +8000072c: 53 70 10 10 fmul.s ft0, ft0, ft1 +80000730: 53 70 01 08 fsub.s ft0, ft2, ft0 +80000734: 27 20 07 00 fsw ft0, 0(a4) +80000738: 6f f0 df fc j -52 +8000073c: 93 04 00 00 mv s1, zero +80000740: 13 04 00 00 mv s0, zero +80000744: 93 86 09 00 mv a3, s3 +80000748: 6f 00 40 01 j 20 8000074c: 13 04 14 00 addi s0, s0, 1 -80000750: b3 86 b6 00 add a3, a3, a1 -80000754: e3 70 94 fd bgeu s0, s9, -64 -80000758: 33 07 85 00 add a4, a0, s0 -8000075c: e3 58 c7 fe bge a4, a2, -16 -80000760: 33 87 da 00 add a4, s5, a3 -80000764: 07 20 07 00 flw ft0, 0(a4) -80000768: 87 20 0c 00 flw ft1, 0(s8) -8000076c: 33 87 d7 00 add a4, a5, a3 -80000770: 07 21 07 00 flw ft2, 0(a4) -80000774: 53 70 10 10 fmul.s ft0, ft0, ft1 -80000778: 53 70 01 08 fsub.s ft0, ft2, ft0 -8000077c: 27 20 07 00 fsw ft0, 0(a4) -80000780: 6f f0 df fc j -52 -80000784: 93 04 00 00 mv s1, zero -80000788: 13 04 00 00 mv s0, zero -8000078c: 93 86 09 00 mv a3, s3 -80000790: 6f 00 40 01 j 20 -80000794: 13 04 14 00 addi s0, s0, 1 -80000798: 93 86 46 00 addi a3, a3, 4 -8000079c: b3 84 b4 00 add s1, s1, a1 -800007a0: e3 7a 94 f7 bgeu s0, s9, -140 -800007a4: 33 07 85 00 add a4, a0, s0 -800007a8: e3 56 c7 fe bge a4, a2, -20 -800007ac: 33 87 9a 00 add a4, s5, s1 -800007b0: 07 20 07 00 flw ft0, 0(a4) -800007b4: 87 20 0c 00 flw ft1, 0(s8) -800007b8: 33 87 97 00 add a4, a5, s1 -800007bc: 07 21 07 00 flw ft2, 0(a4) -800007c0: 53 70 10 10 fmul.s ft0, ft0, ft1 -800007c4: 53 70 01 08 fsub.s ft0, ft2, ft0 -800007c8: 27 20 07 00 fsw ft0, 0(a4) -800007cc: 33 87 9b 00 add a4, s7, s1 -800007d0: 07 20 07 00 flw ft0, 0(a4) -800007d4: 87 20 0a 00 flw ft1, 0(s4) -800007d8: 07 a1 06 00 flw ft2, 0(a3) -800007dc: 53 70 10 10 fmul.s ft0, ft0, ft1 -800007e0: 53 70 01 08 fsub.s ft0, ft2, ft0 -800007e4: 27 a0 06 00 fsw ft0, 0(a3) -800007e8: 6f f0 df fa j -84 -800007ec: 83 2c 81 00 lw s9, 8(sp) -800007f0: 03 2c c1 00 lw s8, 12(sp) -800007f4: 83 2b 01 01 lw s7, 16(sp) -800007f8: 03 2b 41 01 lw s6, 20(sp) -800007fc: 83 2a 81 01 lw s5, 24(sp) -80000800: 03 2a c1 01 lw s4, 28(sp) -80000804: 83 29 01 02 lw s3, 32(sp) -80000808: 03 29 41 02 lw s2, 36(sp) -8000080c: 83 24 81 02 lw s1, 40(sp) -80000810: 03 24 c1 02 lw s0, 44(sp) -80000814: 13 01 01 03 addi sp, sp, 48 -80000818: 67 80 00 00 ret +80000750: 93 86 46 00 addi a3, a3, 4 +80000754: b3 84 b4 00 add s1, s1, a1 +80000758: e3 7a 94 f7 bgeu s0, s9, -140 +8000075c: 33 07 85 00 add a4, a0, s0 +80000760: e3 56 c7 fe bge a4, a2, -20 +80000764: 33 87 9a 00 add a4, s5, s1 +80000768: 07 20 07 00 flw ft0, 0(a4) +8000076c: 87 20 0c 00 flw ft1, 0(s8) +80000770: 33 87 97 00 add a4, a5, s1 +80000774: 07 21 07 00 flw ft2, 0(a4) +80000778: 53 70 10 10 fmul.s ft0, ft0, ft1 +8000077c: 53 70 01 08 fsub.s ft0, ft2, ft0 +80000780: 27 20 07 00 fsw ft0, 0(a4) +80000784: 33 87 9b 00 add a4, s7, s1 +80000788: 07 20 07 00 flw ft0, 0(a4) +8000078c: 87 20 0a 00 flw ft1, 0(s4) +80000790: 07 a1 06 00 flw ft2, 0(a3) +80000794: 53 70 10 10 fmul.s ft0, ft0, ft1 +80000798: 53 70 01 08 fsub.s ft0, ft2, ft0 +8000079c: 27 a0 06 00 fsw ft0, 0(a3) +800007a0: 6f f0 df fa j -84 +800007a4: 83 2c 81 00 lw s9, 8(sp) +800007a8: 03 2c c1 00 lw s8, 12(sp) +800007ac: 83 2b 01 01 lw s7, 16(sp) +800007b0: 03 2b 41 01 lw s6, 20(sp) +800007b4: 83 2a 81 01 lw s5, 24(sp) +800007b8: 03 2a c1 01 lw s4, 28(sp) +800007bc: 83 29 01 02 lw s3, 32(sp) +800007c0: 03 29 41 02 lw s2, 36(sp) +800007c4: 83 24 81 02 lw s1, 40(sp) +800007c8: 03 24 c1 02 lw s0, 44(sp) +800007cc: 13 01 01 03 addi sp, sp, 48 +800007d0: 67 80 00 00 ret -8000081c _pocl_kernel_Fan2_workgroup_fast: -8000081c: 13 01 01 fd addi sp, sp, -48 -80000820: 23 26 81 02 sw s0, 44(sp) -80000824: 23 24 91 02 sw s1, 40(sp) -80000828: 23 22 21 03 sw s2, 36(sp) -8000082c: 23 20 31 03 sw s3, 32(sp) -80000830: 23 2e 41 01 sw s4, 28(sp) -80000834: 23 2c 51 01 sw s5, 24(sp) -80000838: 23 2a 61 01 sw s6, 20(sp) -8000083c: 23 28 71 01 sw s7, 16(sp) -80000840: 23 26 81 01 sw s8, 12(sp) -80000844: 23 24 91 01 sw s9, 8(sp) -80000848: 13 08 00 00 mv a6, zero -8000084c: 03 29 05 00 lw s2, 0(a0) -80000850: 03 27 c5 00 lw a4, 12(a0) -80000854: 83 27 05 01 lw a5, 16(a0) -80000858: 83 28 45 00 lw a7, 4(a0) -8000085c: 83 29 85 00 lw s3, 8(a0) -80000860: 83 24 07 00 lw s1, 0(a4) -80000864: 03 a3 07 00 lw t1, 0(a5) -80000868: 83 ac 85 01 lw s9, 24(a1) -8000086c: 03 ae c5 01 lw t3, 28(a1) -80000870: 83 a2 05 02 lw t0, 32(a1) -80000874: 83 a7 05 01 lw a5, 16(a1) -80000878: 83 ae c5 00 lw t4, 12(a1) -8000087c: b3 85 cc 02 mul a1, s9, a2 -80000880: b3 06 de 02 mul a3, t3, a3 -80000884: 33 8f d7 00 add t5, a5, a3 -80000888: 13 46 f3 ff not a2, t1 -8000088c: 33 86 c4 00 add a2, s1, a2 -80000890: b3 8f 64 40 sub t6, s1, t1 -80000894: b3 03 93 02 mul t2, t1, s1 -80000898: 13 15 23 00 slli a0, t1, 2 -8000089c: 33 8a a9 00 add s4, s3, a0 -800008a0: 33 85 67 00 add a0, a5, t1 -800008a4: b3 07 d5 00 add a5, a0, a3 -800008a8: b3 86 6e 00 add a3, t4, t1 -800008ac: b3 86 b6 00 add a3, a3, a1 -800008b0: 93 86 16 00 addi a3, a3, 1 -800008b4: 33 85 d4 02 mul a0, s1, a3 -800008b8: b3 87 a7 00 add a5, a5, a0 -800008bc: 13 94 27 00 slli s0, a5, 2 -800008c0: b3 87 be 00 add a5, t4, a1 -800008c4: b3 8e 88 00 add t4, a7, s0 -800008c8: 93 95 24 00 slli a1, s1, 2 -800008cc: 33 05 a3 00 add a0, t1, a0 -800008d0: 13 15 25 00 slli a0, a0, 2 -800008d4: b3 0a a9 00 add s5, s2, a0 -800008d8: 13 95 26 00 slli a0, a3, 2 -800008dc: b3 89 a9 00 add s3, s3, a0 -800008e0: 33 09 89 00 add s2, s2, s0 -800008e4: 6f 00 c0 00 j 12 -800008e8: 13 08 18 00 addi a6, a6, 1 -800008ec: 63 76 58 0e bgeu a6, t0, 236 -800008f0: 13 0b 00 00 mv s6, zero -800008f4: 93 0b 09 00 mv s7, s2 -800008f8: 13 85 0e 00 mv a0, t4 -800008fc: 6f 00 40 01 j 20 -80000900: 13 0b 1b 00 addi s6, s6, 1 -80000904: 13 05 45 00 addi a0, a0, 4 -80000908: 93 8b 4b 00 addi s7, s7, 4 -8000090c: e3 7e cb fd bgeu s6, t3, -36 -80000910: b3 06 6f 01 add a3, t5, s6 -80000914: e3 d6 f6 ff bge a3, t6, -20 -80000918: 33 84 66 00 add s0, a3, t1 -8000091c: 33 04 74 00 add s0, s0, t2 -80000920: 13 14 24 00 slli s0, s0, 2 -80000924: 33 8c 88 00 add s8, a7, s0 -80000928: 63 84 06 04 beqz a3, 72 -8000092c: 93 06 00 00 mv a3, zero -80000930: 13 04 00 00 mv s0, zero -80000934: 6f 00 00 01 j 16 +800007d4 _pocl_kernel_Fan2_workgroup_fast: +800007d4: 13 01 01 fd addi sp, sp, -48 +800007d8: 23 26 81 02 sw s0, 44(sp) +800007dc: 23 24 91 02 sw s1, 40(sp) +800007e0: 23 22 21 03 sw s2, 36(sp) +800007e4: 23 20 31 03 sw s3, 32(sp) +800007e8: 23 2e 41 01 sw s4, 28(sp) +800007ec: 23 2c 51 01 sw s5, 24(sp) +800007f0: 23 2a 61 01 sw s6, 20(sp) +800007f4: 23 28 71 01 sw s7, 16(sp) +800007f8: 23 26 81 01 sw s8, 12(sp) +800007fc: 23 24 91 01 sw s9, 8(sp) +80000800: 13 08 00 00 mv a6, zero +80000804: 03 29 05 00 lw s2, 0(a0) +80000808: 03 27 c5 00 lw a4, 12(a0) +8000080c: 83 27 05 01 lw a5, 16(a0) +80000810: 83 28 45 00 lw a7, 4(a0) +80000814: 83 29 85 00 lw s3, 8(a0) +80000818: 83 24 07 00 lw s1, 0(a4) +8000081c: 03 a3 07 00 lw t1, 0(a5) +80000820: 83 ac 85 01 lw s9, 24(a1) +80000824: 03 ae c5 01 lw t3, 28(a1) +80000828: 83 a2 05 02 lw t0, 32(a1) +8000082c: 83 a7 05 01 lw a5, 16(a1) +80000830: 83 ae c5 00 lw t4, 12(a1) +80000834: b3 85 cc 02 mul a1, s9, a2 +80000838: b3 06 de 02 mul a3, t3, a3 +8000083c: 33 8f d7 00 add t5, a5, a3 +80000840: 13 46 f3 ff not a2, t1 +80000844: 33 86 c4 00 add a2, s1, a2 +80000848: b3 8f 64 40 sub t6, s1, t1 +8000084c: b3 03 93 02 mul t2, t1, s1 +80000850: 13 15 23 00 slli a0, t1, 2 +80000854: 33 8a a9 00 add s4, s3, a0 +80000858: 33 85 67 00 add a0, a5, t1 +8000085c: b3 07 d5 00 add a5, a0, a3 +80000860: b3 86 6e 00 add a3, t4, t1 +80000864: b3 86 b6 00 add a3, a3, a1 +80000868: 93 86 16 00 addi a3, a3, 1 +8000086c: 33 85 d4 02 mul a0, s1, a3 +80000870: b3 87 a7 00 add a5, a5, a0 +80000874: 13 94 27 00 slli s0, a5, 2 +80000878: b3 87 be 00 add a5, t4, a1 +8000087c: b3 8e 88 00 add t4, a7, s0 +80000880: 93 95 24 00 slli a1, s1, 2 +80000884: 33 05 a3 00 add a0, t1, a0 +80000888: 13 15 25 00 slli a0, a0, 2 +8000088c: b3 0a a9 00 add s5, s2, a0 +80000890: 13 95 26 00 slli a0, a3, 2 +80000894: b3 89 a9 00 add s3, s3, a0 +80000898: 33 09 89 00 add s2, s2, s0 +8000089c: 6f 00 c0 00 j 12 +800008a0: 13 08 18 00 addi a6, a6, 1 +800008a4: 63 76 58 0e bgeu a6, t0, 236 +800008a8: 13 0b 00 00 mv s6, zero +800008ac: 93 0b 09 00 mv s7, s2 +800008b0: 13 85 0e 00 mv a0, t4 +800008b4: 6f 00 40 01 j 20 +800008b8: 13 0b 1b 00 addi s6, s6, 1 +800008bc: 13 05 45 00 addi a0, a0, 4 +800008c0: 93 8b 4b 00 addi s7, s7, 4 +800008c4: e3 7e cb fd bgeu s6, t3, -36 +800008c8: b3 06 6f 01 add a3, t5, s6 +800008cc: e3 d6 f6 ff bge a3, t6, -20 +800008d0: 33 84 66 00 add s0, a3, t1 +800008d4: 33 04 74 00 add s0, s0, t2 +800008d8: 13 14 24 00 slli s0, s0, 2 +800008dc: 33 8c 88 00 add s8, a7, s0 +800008e0: 63 84 06 04 beqz a3, 72 +800008e4: 93 06 00 00 mv a3, zero +800008e8: 13 04 00 00 mv s0, zero +800008ec: 6f 00 00 01 j 16 +800008f0: 13 04 14 00 addi s0, s0, 1 +800008f4: b3 86 b6 00 add a3, a3, a1 +800008f8: e3 70 94 fd bgeu s0, s9, -64 +800008fc: 33 87 87 00 add a4, a5, s0 +80000900: e3 58 c7 fe bge a4, a2, -16 +80000904: 33 87 da 00 add a4, s5, a3 +80000908: 07 20 07 00 flw ft0, 0(a4) +8000090c: 87 20 0c 00 flw ft1, 0(s8) +80000910: 33 07 d5 00 add a4, a0, a3 +80000914: 07 21 07 00 flw ft2, 0(a4) +80000918: 53 70 10 10 fmul.s ft0, ft0, ft1 +8000091c: 53 70 01 08 fsub.s ft0, ft2, ft0 +80000920: 27 20 07 00 fsw ft0, 0(a4) +80000924: 6f f0 df fc j -52 +80000928: 93 04 00 00 mv s1, zero +8000092c: 13 04 00 00 mv s0, zero +80000930: 93 86 09 00 mv a3, s3 +80000934: 6f 00 40 01 j 20 80000938: 13 04 14 00 addi s0, s0, 1 -8000093c: b3 86 b6 00 add a3, a3, a1 -80000940: e3 70 94 fd bgeu s0, s9, -64 -80000944: 33 87 87 00 add a4, a5, s0 -80000948: e3 58 c7 fe bge a4, a2, -16 -8000094c: 33 87 da 00 add a4, s5, a3 -80000950: 07 20 07 00 flw ft0, 0(a4) -80000954: 87 20 0c 00 flw ft1, 0(s8) -80000958: 33 07 d5 00 add a4, a0, a3 -8000095c: 07 21 07 00 flw ft2, 0(a4) -80000960: 53 70 10 10 fmul.s ft0, ft0, ft1 -80000964: 53 70 01 08 fsub.s ft0, ft2, ft0 -80000968: 27 20 07 00 fsw ft0, 0(a4) -8000096c: 6f f0 df fc j -52 -80000970: 93 04 00 00 mv s1, zero -80000974: 13 04 00 00 mv s0, zero -80000978: 93 86 09 00 mv a3, s3 -8000097c: 6f 00 40 01 j 20 -80000980: 13 04 14 00 addi s0, s0, 1 -80000984: 93 86 46 00 addi a3, a3, 4 -80000988: b3 84 b4 00 add s1, s1, a1 -8000098c: e3 7a 94 f7 bgeu s0, s9, -140 -80000990: 33 87 87 00 add a4, a5, s0 -80000994: e3 56 c7 fe bge a4, a2, -20 -80000998: 33 87 9a 00 add a4, s5, s1 -8000099c: 07 20 07 00 flw ft0, 0(a4) -800009a0: 87 20 0c 00 flw ft1, 0(s8) -800009a4: 33 07 95 00 add a4, a0, s1 -800009a8: 07 21 07 00 flw ft2, 0(a4) -800009ac: 53 70 10 10 fmul.s ft0, ft0, ft1 -800009b0: 53 70 01 08 fsub.s ft0, ft2, ft0 -800009b4: 27 20 07 00 fsw ft0, 0(a4) -800009b8: 33 87 9b 00 add a4, s7, s1 -800009bc: 07 20 07 00 flw ft0, 0(a4) -800009c0: 87 20 0a 00 flw ft1, 0(s4) -800009c4: 07 a1 06 00 flw ft2, 0(a3) -800009c8: 53 70 10 10 fmul.s ft0, ft0, ft1 -800009cc: 53 70 01 08 fsub.s ft0, ft2, ft0 -800009d0: 27 a0 06 00 fsw ft0, 0(a3) -800009d4: 6f f0 df fa j -84 -800009d8: 83 2c 81 00 lw s9, 8(sp) -800009dc: 03 2c c1 00 lw s8, 12(sp) -800009e0: 83 2b 01 01 lw s7, 16(sp) -800009e4: 03 2b 41 01 lw s6, 20(sp) -800009e8: 83 2a 81 01 lw s5, 24(sp) -800009ec: 03 2a c1 01 lw s4, 28(sp) -800009f0: 83 29 01 02 lw s3, 32(sp) -800009f4: 03 29 41 02 lw s2, 36(sp) -800009f8: 83 24 81 02 lw s1, 40(sp) -800009fc: 03 24 c1 02 lw s0, 44(sp) -80000a00: 13 01 01 03 addi sp, sp, 48 +8000093c: 93 86 46 00 addi a3, a3, 4 +80000940: b3 84 b4 00 add s1, s1, a1 +80000944: e3 7a 94 f7 bgeu s0, s9, -140 +80000948: 33 87 87 00 add a4, a5, s0 +8000094c: e3 56 c7 fe bge a4, a2, -20 +80000950: 33 87 9a 00 add a4, s5, s1 +80000954: 07 20 07 00 flw ft0, 0(a4) +80000958: 87 20 0c 00 flw ft1, 0(s8) +8000095c: 33 07 95 00 add a4, a0, s1 +80000960: 07 21 07 00 flw ft2, 0(a4) +80000964: 53 70 10 10 fmul.s ft0, ft0, ft1 +80000968: 53 70 01 08 fsub.s ft0, ft2, ft0 +8000096c: 27 20 07 00 fsw ft0, 0(a4) +80000970: 33 87 9b 00 add a4, s7, s1 +80000974: 07 20 07 00 flw ft0, 0(a4) +80000978: 87 20 0a 00 flw ft1, 0(s4) +8000097c: 07 a1 06 00 flw ft2, 0(a3) +80000980: 53 70 10 10 fmul.s ft0, ft0, ft1 +80000984: 53 70 01 08 fsub.s ft0, ft2, ft0 +80000988: 27 a0 06 00 fsw ft0, 0(a3) +8000098c: 6f f0 df fa j -84 +80000990: 83 2c 81 00 lw s9, 8(sp) +80000994: 03 2c c1 00 lw s8, 12(sp) +80000998: 83 2b 01 01 lw s7, 16(sp) +8000099c: 03 2b 41 01 lw s6, 20(sp) +800009a0: 83 2a 81 01 lw s5, 24(sp) +800009a4: 03 2a c1 01 lw s4, 28(sp) +800009a8: 83 29 01 02 lw s3, 32(sp) +800009ac: 03 29 41 02 lw s2, 36(sp) +800009b0: 83 24 81 02 lw s1, 40(sp) +800009b4: 03 24 c1 02 lw s0, 44(sp) +800009b8: 13 01 01 03 addi sp, sp, 48 +800009bc: 67 80 00 00 ret + +800009c0 _exit: +800009c0: 13 05 00 00 mv a0, zero +800009c4: 6b 00 05 00 + +800009c8 vx_set_sp: +800009c8: 73 25 50 02 csrr a0, 37 +800009cc: 6b 00 05 00 +800009d0: 97 11 00 00 auipc gp, 1 +800009d4: 93 81 81 e3 addi gp, gp, -456 +800009d8: f3 25 20 02 csrr a1, 34 +800009dc: 93 95 a5 00 slli a1, a1, 10 +800009e0: 73 26 00 02 csrr a2, 32 +800009e4: 13 16 26 00 slli a2, a2, 2 +800009e8: 37 f1 ff 6f lui sp, 458751 +800009ec: 33 01 b1 40 sub sp, sp, a1 +800009f0: 33 01 c1 00 add sp, sp, a2 +800009f4: f3 26 10 02 csrr a3, 33 +800009f8: 63 86 06 00 beqz a3, 12 +800009fc: 13 05 00 00 mv a0, zero +80000a00: 6b 00 05 00 + +80000a04 RETURN: 80000a04: 67 80 00 00 ret -80000a08 _exit: -80000a08: 13 05 00 00 mv a0, zero -80000a0c: 6b 00 05 00 +80000a08 vx_wspawn: +80000a08: 6b 10 b5 00 +80000a0c: 67 80 00 00 ret -80000a10 vx_set_sp: -80000a10: 73 25 50 02 csrr a0, 37 -80000a14: 6b 00 05 00 -80000a18: 97 11 00 00 auipc gp, 1 -80000a1c: 93 81 01 df addi gp, gp, -528 -80000a20: f3 25 20 02 csrr a1, 34 -80000a24: 93 95 a5 00 slli a1, a1, 10 -80000a28: 73 26 00 02 csrr a2, 32 -80000a2c: 13 16 26 00 slli a2, a2, 2 -80000a30: 37 f1 ff 6f lui sp, 458751 -80000a34: 33 01 b1 40 sub sp, sp, a1 -80000a38: 33 01 c1 00 add sp, sp, a2 -80000a3c: f3 26 10 02 csrr a3, 33 -80000a40: 63 86 06 00 beqz a3, 12 -80000a44: 13 05 00 00 mv a0, zero -80000a48: 6b 00 05 00 +80000a10 vx_tmc: +80000a10: 6b 00 05 00 +80000a14: 67 80 00 00 ret -80000a4c RETURN: +80000a18 vx_barrier: +80000a18: 6b 40 b5 00 +80000a1c: 67 80 00 00 ret + +80000a20 vx_split: +80000a20: 6b 20 05 00 +80000a24: 67 80 00 00 ret + +80000a28 vx_join: +80000a28: 6b 30 00 00 +80000a2c: 67 80 00 00 ret + +80000a30 vx_warp_id: +80000a30: 73 25 10 02 csrr a0, 33 +80000a34: 67 80 00 00 ret + +80000a38 vx_warp_gid: +80000a38: 73 25 30 02 csrr a0, 35 +80000a3c: 67 80 00 00 ret + +80000a40 vx_thread_id: +80000a40: 73 25 00 02 csrr a0, 32 +80000a44: 67 80 00 00 ret + +80000a48 vx_thread_gid: +80000a48: 73 25 20 02 csrr a0, 34 80000a4c: 67 80 00 00 ret -80000a50 vx_wspawn: -80000a50: 6b 10 b5 00 +80000a50 vx_core_id: +80000a50: 73 25 40 02 csrr a0, 36 80000a54: 67 80 00 00 ret -80000a58 vx_tmc: -80000a58: 6b 00 05 00 +80000a58 vx_num_threads: +80000a58: 73 25 50 02 csrr a0, 37 80000a5c: 67 80 00 00 ret -80000a60 vx_barrier: -80000a60: 6b 40 b5 00 +80000a60 vx_num_warps: +80000a60: 73 25 60 02 csrr a0, 38 80000a64: 67 80 00 00 ret -80000a68 vx_split: -80000a68: 6b 20 05 00 +80000a68 vx_num_cores: +80000a68: 73 25 70 02 csrr a0, 39 80000a6c: 67 80 00 00 ret -80000a70 vx_join: -80000a70: 6b 30 00 00 +80000a70 vx_num_cycles: +80000a70: 73 25 00 b0 csrr a0, mcycle 80000a74: 67 80 00 00 ret -80000a78 vx_warp_id: -80000a78: 73 25 10 02 csrr a0, 33 +80000a78 vx_num_instrs: +80000a78: 73 25 20 b0 csrr a0, minstret 80000a7c: 67 80 00 00 ret -80000a80 vx_warp_gid: -80000a80: 73 25 30 02 csrr a0, 35 -80000a84: 67 80 00 00 ret +80000a80 atexit: +80000a80: 93 05 05 00 mv a1, a0 +80000a84: 93 06 00 00 mv a3, zero +80000a88: 13 06 00 00 mv a2, zero +80000a8c: 13 05 00 00 mv a0, zero +80000a90: 6f 00 80 20 j 520 -80000a88 vx_thread_id: -80000a88: 73 25 00 02 csrr a0, 32 -80000a8c: 67 80 00 00 ret +80000a94 exit: +80000a94: 13 01 01 ff addi sp, sp, -16 +80000a98: 93 05 00 00 mv a1, zero +80000a9c: 23 24 81 00 sw s0, 8(sp) +80000aa0: 23 26 11 00 sw ra, 12(sp) +80000aa4: 13 04 05 00 mv s0, a0 +80000aa8: ef 00 80 28 jal 648 +80000aac: 03 a5 81 c2 lw a0, -984(gp) +80000ab0: 83 27 c5 03 lw a5, 60(a0) +80000ab4: 63 84 07 00 beqz a5, 8 +80000ab8: e7 80 07 00 jalr a5 +80000abc: 13 05 04 00 mv a0, s0 +80000ac0: ef f0 1f f0 jal -256 -80000a90 vx_thread_gid: -80000a90: 73 25 20 02 csrr a0, 34 -80000a94: 67 80 00 00 ret +80000ac4 __libc_fini_array: +80000ac4: 13 01 01 ff addi sp, sp, -16 +80000ac8: 23 24 81 00 sw s0, 8(sp) +80000acc: b7 17 00 80 lui a5, 524289 +80000ad0: 37 14 00 80 lui s0, 524289 +80000ad4: 13 04 44 00 addi s0, s0, 4 +80000ad8: 93 87 47 00 addi a5, a5, 4 +80000adc: b3 87 87 40 sub a5, a5, s0 +80000ae0: 23 22 91 00 sw s1, 4(sp) +80000ae4: 23 26 11 00 sw ra, 12(sp) +80000ae8: 93 d4 27 40 srai s1, a5, 2 +80000aec: 63 80 04 02 beqz s1, 32 +80000af0: 93 87 c7 ff addi a5, a5, -4 +80000af4: 33 84 87 00 add s0, a5, s0 +80000af8: 83 27 04 00 lw a5, 0(s0) +80000afc: 93 84 f4 ff addi s1, s1, -1 +80000b00: 13 04 c4 ff addi s0, s0, -4 +80000b04: e7 80 07 00 jalr a5 +80000b08: e3 98 04 fe bnez s1, -16 +80000b0c: 83 20 c1 00 lw ra, 12(sp) +80000b10: 03 24 81 00 lw s0, 8(sp) +80000b14: 83 24 41 00 lw s1, 4(sp) +80000b18: 13 01 01 01 addi sp, sp, 16 +80000b1c: 67 80 00 00 ret -80000a98 vx_core_id: -80000a98: 73 25 40 02 csrr a0, 36 -80000a9c: 67 80 00 00 ret +80000b20 __libc_init_array: +80000b20: 13 01 01 ff addi sp, sp, -16 +80000b24: 23 24 81 00 sw s0, 8(sp) +80000b28: 23 20 21 01 sw s2, 0(sp) +80000b2c: 37 14 00 80 lui s0, 524289 +80000b30: 37 19 00 80 lui s2, 524289 +80000b34: 93 07 04 00 mv a5, s0 +80000b38: 13 09 09 00 mv s2, s2 +80000b3c: 33 09 f9 40 sub s2, s2, a5 +80000b40: 23 26 11 00 sw ra, 12(sp) +80000b44: 23 22 91 00 sw s1, 4(sp) +80000b48: 13 59 29 40 srai s2, s2, 2 +80000b4c: 63 00 09 02 beqz s2, 32 +80000b50: 13 04 04 00 mv s0, s0 +80000b54: 93 04 00 00 mv s1, zero +80000b58: 83 27 04 00 lw a5, 0(s0) +80000b5c: 93 84 14 00 addi s1, s1, 1 +80000b60: 13 04 44 00 addi s0, s0, 4 +80000b64: e7 80 07 00 jalr a5 +80000b68: e3 18 99 fe bne s2, s1, -16 +80000b6c: 37 14 00 80 lui s0, 524289 +80000b70: 37 19 00 80 lui s2, 524289 +80000b74: 93 07 04 00 mv a5, s0 +80000b78: 13 09 49 00 addi s2, s2, 4 +80000b7c: 33 09 f9 40 sub s2, s2, a5 +80000b80: 13 59 29 40 srai s2, s2, 2 +80000b84: 63 00 09 02 beqz s2, 32 +80000b88: 13 04 04 00 mv s0, s0 +80000b8c: 93 04 00 00 mv s1, zero +80000b90: 83 27 04 00 lw a5, 0(s0) +80000b94: 93 84 14 00 addi s1, s1, 1 +80000b98: 13 04 44 00 addi s0, s0, 4 +80000b9c: e7 80 07 00 jalr a5 +80000ba0: e3 18 99 fe bne s2, s1, -16 +80000ba4: 83 20 c1 00 lw ra, 12(sp) +80000ba8: 03 24 81 00 lw s0, 8(sp) +80000bac: 83 24 41 00 lw s1, 4(sp) +80000bb0: 03 29 01 00 lw s2, 0(sp) +80000bb4: 13 01 01 01 addi sp, sp, 16 +80000bb8: 67 80 00 00 ret -80000aa0 vx_num_threads: -80000aa0: 73 25 50 02 csrr a0, 37 -80000aa4: 67 80 00 00 ret +80000bbc memset: +80000bbc: 13 03 f0 00 addi t1, zero, 15 +80000bc0: 13 07 05 00 mv a4, a0 +80000bc4: 63 7e c3 02 bgeu t1, a2, 60 +80000bc8: 93 77 f7 00 andi a5, a4, 15 +80000bcc: 63 90 07 0a bnez a5, 160 +80000bd0: 63 92 05 08 bnez a1, 132 +80000bd4: 93 76 06 ff andi a3, a2, -16 +80000bd8: 13 76 f6 00 andi a2, a2, 15 +80000bdc: b3 86 e6 00 add a3, a3, a4 +80000be0: 23 20 b7 00 sw a1, 0(a4) +80000be4: 23 22 b7 00 sw a1, 4(a4) +80000be8: 23 24 b7 00 sw a1, 8(a4) +80000bec: 23 26 b7 00 sw a1, 12(a4) +80000bf0: 13 07 07 01 addi a4, a4, 16 +80000bf4: e3 66 d7 fe bltu a4, a3, -20 +80000bf8: 63 14 06 00 bnez a2, 8 +80000bfc: 67 80 00 00 ret +80000c00: b3 06 c3 40 sub a3, t1, a2 +80000c04: 93 96 26 00 slli a3, a3, 2 +80000c08: 97 02 00 00 auipc t0, 0 +80000c0c: b3 86 56 00 add a3, a3, t0 +80000c10: 67 80 c6 00 jr 12(a3) +80000c14: 23 07 b7 00 sb a1, 14(a4) +80000c18: a3 06 b7 00 sb a1, 13(a4) +80000c1c: 23 06 b7 00 sb a1, 12(a4) +80000c20: a3 05 b7 00 sb a1, 11(a4) +80000c24: 23 05 b7 00 sb a1, 10(a4) +80000c28: a3 04 b7 00 sb a1, 9(a4) +80000c2c: 23 04 b7 00 sb a1, 8(a4) +80000c30: a3 03 b7 00 sb a1, 7(a4) +80000c34: 23 03 b7 00 sb a1, 6(a4) +80000c38: a3 02 b7 00 sb a1, 5(a4) +80000c3c: 23 02 b7 00 sb a1, 4(a4) +80000c40: a3 01 b7 00 sb a1, 3(a4) +80000c44: 23 01 b7 00 sb a1, 2(a4) +80000c48: a3 00 b7 00 sb a1, 1(a4) +80000c4c: 23 00 b7 00 sb a1, 0(a4) +80000c50: 67 80 00 00 ret +80000c54: 93 f5 f5 0f andi a1, a1, 255 +80000c58: 93 96 85 00 slli a3, a1, 8 +80000c5c: b3 e5 d5 00 or a1, a1, a3 +80000c60: 93 96 05 01 slli a3, a1, 16 +80000c64: b3 e5 d5 00 or a1, a1, a3 +80000c68: 6f f0 df f6 j -148 +80000c6c: 93 96 27 00 slli a3, a5, 2 +80000c70: 97 02 00 00 auipc t0, 0 +80000c74: b3 86 56 00 add a3, a3, t0 +80000c78: 93 82 00 00 mv t0, ra +80000c7c: e7 80 06 fa jalr -96(a3) +80000c80: 93 80 02 00 mv ra, t0 +80000c84: 93 87 07 ff addi a5, a5, -16 +80000c88: 33 07 f7 40 sub a4, a4, a5 +80000c8c: 33 06 f6 00 add a2, a2, a5 +80000c90: e3 78 c3 f6 bgeu t1, a2, -144 +80000c94: 6f f0 df f3 j -196 -80000aa8 vx_num_warps: -80000aa8: 73 25 60 02 csrr a0, 38 -80000aac: 67 80 00 00 ret +80000c98 __register_exitproc: +80000c98: 03 a7 81 c2 lw a4, -984(gp) +80000c9c: 83 27 87 14 lw a5, 328(a4) +80000ca0: 63 8c 07 04 beqz a5, 88 +80000ca4: 03 a7 47 00 lw a4, 4(a5) +80000ca8: 13 08 f0 01 addi a6, zero, 31 +80000cac: 63 4e e8 06 blt a6, a4, 124 +80000cb0: 13 18 27 00 slli a6, a4, 2 +80000cb4: 63 06 05 02 beqz a0, 44 +80000cb8: 33 83 07 01 add t1, a5, a6 +80000cbc: 23 24 c3 08 sw a2, 136(t1) +80000cc0: 83 a8 87 18 lw a7, 392(a5) +80000cc4: 13 06 10 00 addi a2, zero, 1 +80000cc8: 33 16 e6 00 sll a2, a2, a4 +80000ccc: b3 e8 c8 00 or a7, a7, a2 +80000cd0: 23 a4 17 19 sw a7, 392(a5) +80000cd4: 23 24 d3 10 sw a3, 264(t1) +80000cd8: 93 06 20 00 addi a3, zero, 2 +80000cdc: 63 04 d5 02 beq a0, a3, 40 +80000ce0: 13 07 17 00 addi a4, a4, 1 +80000ce4: 23 a2 e7 00 sw a4, 4(a5) +80000ce8: b3 87 07 01 add a5, a5, a6 +80000cec: 23 a4 b7 00 sw a1, 8(a5) +80000cf0: 13 05 00 00 mv a0, zero +80000cf4: 67 80 00 00 ret +80000cf8: 93 07 c7 14 addi a5, a4, 332 +80000cfc: 23 24 f7 14 sw a5, 328(a4) +80000d00: 6f f0 5f fa j -92 +80000d04: 83 a6 c7 18 lw a3, 396(a5) +80000d08: 13 07 17 00 addi a4, a4, 1 +80000d0c: 23 a2 e7 00 sw a4, 4(a5) +80000d10: 33 e6 c6 00 or a2, a3, a2 +80000d14: 23 a6 c7 18 sw a2, 396(a5) +80000d18: b3 87 07 01 add a5, a5, a6 +80000d1c: 23 a4 b7 00 sw a1, 8(a5) +80000d20: 13 05 00 00 mv a0, zero +80000d24: 67 80 00 00 ret +80000d28: 13 05 f0 ff addi a0, zero, -1 +80000d2c: 67 80 00 00 ret -80000ab0 vx_num_cores: -80000ab0: 73 25 70 02 csrr a0, 39 -80000ab4: 67 80 00 00 ret - -80000ab8 vx_num_cycles: -80000ab8: 73 25 00 c0 rdcycle a0 -80000abc: 67 80 00 00 ret - -80000ac0 vx_num_instrs: -80000ac0: 73 25 20 c0 rdinstret a0 -80000ac4: 67 80 00 00 ret - -80000ac8 atexit: -80000ac8: 93 05 05 00 mv a1, a0 -80000acc: 93 06 00 00 mv a3, zero -80000ad0: 13 06 00 00 mv a2, zero -80000ad4: 13 05 00 00 mv a0, zero -80000ad8: 6f 00 80 20 j 520 - -80000adc exit: -80000adc: 13 01 01 ff addi sp, sp, -16 -80000ae0: 93 05 00 00 mv a1, zero -80000ae4: 23 24 81 00 sw s0, 8(sp) -80000ae8: 23 26 11 00 sw ra, 12(sp) -80000aec: 13 04 05 00 mv s0, a0 -80000af0: ef 00 80 28 jal 648 -80000af4: 03 a5 81 c2 lw a0, -984(gp) -80000af8: 83 27 c5 03 lw a5, 60(a0) -80000afc: 63 84 07 00 beqz a5, 8 -80000b00: e7 80 07 00 jalr a5 -80000b04: 13 05 04 00 mv a0, s0 -80000b08: ef f0 1f f0 jal -256 - -80000b0c __libc_fini_array: -80000b0c: 13 01 01 ff addi sp, sp, -16 -80000b10: 23 24 81 00 sw s0, 8(sp) -80000b14: b7 17 00 80 lui a5, 524289 -80000b18: 37 14 00 80 lui s0, 524289 -80000b1c: 13 04 44 00 addi s0, s0, 4 -80000b20: 93 87 47 00 addi a5, a5, 4 -80000b24: b3 87 87 40 sub a5, a5, s0 -80000b28: 23 22 91 00 sw s1, 4(sp) -80000b2c: 23 26 11 00 sw ra, 12(sp) -80000b30: 93 d4 27 40 srai s1, a5, 2 -80000b34: 63 80 04 02 beqz s1, 32 -80000b38: 93 87 c7 ff addi a5, a5, -4 -80000b3c: 33 84 87 00 add s0, a5, s0 -80000b40: 83 27 04 00 lw a5, 0(s0) -80000b44: 93 84 f4 ff addi s1, s1, -1 -80000b48: 13 04 c4 ff addi s0, s0, -4 -80000b4c: e7 80 07 00 jalr a5 -80000b50: e3 98 04 fe bnez s1, -16 -80000b54: 83 20 c1 00 lw ra, 12(sp) -80000b58: 03 24 81 00 lw s0, 8(sp) -80000b5c: 83 24 41 00 lw s1, 4(sp) -80000b60: 13 01 01 01 addi sp, sp, 16 -80000b64: 67 80 00 00 ret - -80000b68 __libc_init_array: -80000b68: 13 01 01 ff addi sp, sp, -16 -80000b6c: 23 24 81 00 sw s0, 8(sp) -80000b70: 23 20 21 01 sw s2, 0(sp) -80000b74: 37 14 00 80 lui s0, 524289 -80000b78: 37 19 00 80 lui s2, 524289 -80000b7c: 93 07 04 00 mv a5, s0 -80000b80: 13 09 09 00 mv s2, s2 -80000b84: 33 09 f9 40 sub s2, s2, a5 -80000b88: 23 26 11 00 sw ra, 12(sp) -80000b8c: 23 22 91 00 sw s1, 4(sp) -80000b90: 13 59 29 40 srai s2, s2, 2 -80000b94: 63 00 09 02 beqz s2, 32 -80000b98: 13 04 04 00 mv s0, s0 -80000b9c: 93 04 00 00 mv s1, zero -80000ba0: 83 27 04 00 lw a5, 0(s0) -80000ba4: 93 84 14 00 addi s1, s1, 1 -80000ba8: 13 04 44 00 addi s0, s0, 4 -80000bac: e7 80 07 00 jalr a5 -80000bb0: e3 18 99 fe bne s2, s1, -16 -80000bb4: 37 14 00 80 lui s0, 524289 -80000bb8: 37 19 00 80 lui s2, 524289 -80000bbc: 93 07 04 00 mv a5, s0 -80000bc0: 13 09 49 00 addi s2, s2, 4 -80000bc4: 33 09 f9 40 sub s2, s2, a5 -80000bc8: 13 59 29 40 srai s2, s2, 2 -80000bcc: 63 00 09 02 beqz s2, 32 -80000bd0: 13 04 04 00 mv s0, s0 -80000bd4: 93 04 00 00 mv s1, zero -80000bd8: 83 27 04 00 lw a5, 0(s0) -80000bdc: 93 84 14 00 addi s1, s1, 1 -80000be0: 13 04 44 00 addi s0, s0, 4 -80000be4: e7 80 07 00 jalr a5 -80000be8: e3 18 99 fe bne s2, s1, -16 -80000bec: 83 20 c1 00 lw ra, 12(sp) -80000bf0: 03 24 81 00 lw s0, 8(sp) -80000bf4: 83 24 41 00 lw s1, 4(sp) -80000bf8: 03 29 01 00 lw s2, 0(sp) -80000bfc: 13 01 01 01 addi sp, sp, 16 -80000c00: 67 80 00 00 ret - -80000c04 memset: -80000c04: 13 03 f0 00 addi t1, zero, 15 -80000c08: 13 07 05 00 mv a4, a0 -80000c0c: 63 7e c3 02 bgeu t1, a2, 60 -80000c10: 93 77 f7 00 andi a5, a4, 15 -80000c14: 63 90 07 0a bnez a5, 160 -80000c18: 63 92 05 08 bnez a1, 132 -80000c1c: 93 76 06 ff andi a3, a2, -16 -80000c20: 13 76 f6 00 andi a2, a2, 15 -80000c24: b3 86 e6 00 add a3, a3, a4 -80000c28: 23 20 b7 00 sw a1, 0(a4) -80000c2c: 23 22 b7 00 sw a1, 4(a4) -80000c30: 23 24 b7 00 sw a1, 8(a4) -80000c34: 23 26 b7 00 sw a1, 12(a4) -80000c38: 13 07 07 01 addi a4, a4, 16 -80000c3c: e3 66 d7 fe bltu a4, a3, -20 -80000c40: 63 14 06 00 bnez a2, 8 -80000c44: 67 80 00 00 ret -80000c48: b3 06 c3 40 sub a3, t1, a2 -80000c4c: 93 96 26 00 slli a3, a3, 2 -80000c50: 97 02 00 00 auipc t0, 0 -80000c54: b3 86 56 00 add a3, a3, t0 -80000c58: 67 80 c6 00 jr 12(a3) -80000c5c: 23 07 b7 00 sb a1, 14(a4) -80000c60: a3 06 b7 00 sb a1, 13(a4) -80000c64: 23 06 b7 00 sb a1, 12(a4) -80000c68: a3 05 b7 00 sb a1, 11(a4) -80000c6c: 23 05 b7 00 sb a1, 10(a4) -80000c70: a3 04 b7 00 sb a1, 9(a4) -80000c74: 23 04 b7 00 sb a1, 8(a4) -80000c78: a3 03 b7 00 sb a1, 7(a4) -80000c7c: 23 03 b7 00 sb a1, 6(a4) -80000c80: a3 02 b7 00 sb a1, 5(a4) -80000c84: 23 02 b7 00 sb a1, 4(a4) -80000c88: a3 01 b7 00 sb a1, 3(a4) -80000c8c: 23 01 b7 00 sb a1, 2(a4) -80000c90: a3 00 b7 00 sb a1, 1(a4) -80000c94: 23 00 b7 00 sb a1, 0(a4) -80000c98: 67 80 00 00 ret -80000c9c: 93 f5 f5 0f andi a1, a1, 255 -80000ca0: 93 96 85 00 slli a3, a1, 8 -80000ca4: b3 e5 d5 00 or a1, a1, a3 -80000ca8: 93 96 05 01 slli a3, a1, 16 -80000cac: b3 e5 d5 00 or a1, a1, a3 -80000cb0: 6f f0 df f6 j -148 -80000cb4: 93 96 27 00 slli a3, a5, 2 -80000cb8: 97 02 00 00 auipc t0, 0 -80000cbc: b3 86 56 00 add a3, a3, t0 -80000cc0: 93 82 00 00 mv t0, ra -80000cc4: e7 80 06 fa jalr -96(a3) -80000cc8: 93 80 02 00 mv ra, t0 -80000ccc: 93 87 07 ff addi a5, a5, -16 -80000cd0: 33 07 f7 40 sub a4, a4, a5 -80000cd4: 33 06 f6 00 add a2, a2, a5 -80000cd8: e3 78 c3 f6 bgeu t1, a2, -144 -80000cdc: 6f f0 df f3 j -196 - -80000ce0 __register_exitproc: -80000ce0: 03 a7 81 c2 lw a4, -984(gp) -80000ce4: 83 27 87 14 lw a5, 328(a4) -80000ce8: 63 8c 07 04 beqz a5, 88 -80000cec: 03 a7 47 00 lw a4, 4(a5) -80000cf0: 13 08 f0 01 addi a6, zero, 31 -80000cf4: 63 4e e8 06 blt a6, a4, 124 -80000cf8: 13 18 27 00 slli a6, a4, 2 -80000cfc: 63 06 05 02 beqz a0, 44 -80000d00: 33 83 07 01 add t1, a5, a6 -80000d04: 23 24 c3 08 sw a2, 136(t1) -80000d08: 83 a8 87 18 lw a7, 392(a5) -80000d0c: 13 06 10 00 addi a2, zero, 1 -80000d10: 33 16 e6 00 sll a2, a2, a4 -80000d14: b3 e8 c8 00 or a7, a7, a2 -80000d18: 23 a4 17 19 sw a7, 392(a5) -80000d1c: 23 24 d3 10 sw a3, 264(t1) -80000d20: 93 06 20 00 addi a3, zero, 2 -80000d24: 63 04 d5 02 beq a0, a3, 40 -80000d28: 13 07 17 00 addi a4, a4, 1 -80000d2c: 23 a2 e7 00 sw a4, 4(a5) -80000d30: b3 87 07 01 add a5, a5, a6 -80000d34: 23 a4 b7 00 sw a1, 8(a5) -80000d38: 13 05 00 00 mv a0, zero -80000d3c: 67 80 00 00 ret -80000d40: 93 07 c7 14 addi a5, a4, 332 -80000d44: 23 24 f7 14 sw a5, 328(a4) -80000d48: 6f f0 5f fa j -92 -80000d4c: 83 a6 c7 18 lw a3, 396(a5) -80000d50: 13 07 17 00 addi a4, a4, 1 -80000d54: 23 a2 e7 00 sw a4, 4(a5) -80000d58: 33 e6 c6 00 or a2, a3, a2 -80000d5c: 23 a6 c7 18 sw a2, 396(a5) -80000d60: b3 87 07 01 add a5, a5, a6 -80000d64: 23 a4 b7 00 sw a1, 8(a5) -80000d68: 13 05 00 00 mv a0, zero -80000d6c: 67 80 00 00 ret -80000d70: 13 05 f0 ff addi a0, zero, -1 -80000d74: 67 80 00 00 ret - -80000d78 __call_exitprocs: -80000d78: 13 01 01 fd addi sp, sp, -48 -80000d7c: 23 2c 41 01 sw s4, 24(sp) -80000d80: 03 aa 81 c2 lw s4, -984(gp) -80000d84: 23 20 21 03 sw s2, 32(sp) -80000d88: 23 26 11 02 sw ra, 44(sp) -80000d8c: 03 29 8a 14 lw s2, 328(s4) -80000d90: 23 24 81 02 sw s0, 40(sp) -80000d94: 23 22 91 02 sw s1, 36(sp) -80000d98: 23 2e 31 01 sw s3, 28(sp) -80000d9c: 23 2a 51 01 sw s5, 20(sp) -80000da0: 23 28 61 01 sw s6, 16(sp) -80000da4: 23 26 71 01 sw s7, 12(sp) -80000da8: 23 24 81 01 sw s8, 8(sp) -80000dac: 63 00 09 04 beqz s2, 64 -80000db0: 13 0b 05 00 mv s6, a0 -80000db4: 93 8b 05 00 mv s7, a1 -80000db8: 93 0a 10 00 addi s5, zero, 1 -80000dbc: 93 09 f0 ff addi s3, zero, -1 -80000dc0: 83 24 49 00 lw s1, 4(s2) -80000dc4: 13 84 f4 ff addi s0, s1, -1 -80000dc8: 63 42 04 02 bltz s0, 36 -80000dcc: 93 94 24 00 slli s1, s1, 2 -80000dd0: b3 04 99 00 add s1, s2, s1 -80000dd4: 63 84 0b 04 beqz s7, 72 -80000dd8: 83 a7 44 10 lw a5, 260(s1) -80000ddc: 63 80 77 05 beq a5, s7, 64 -80000de0: 13 04 f4 ff addi s0, s0, -1 -80000de4: 93 84 c4 ff addi s1, s1, -4 -80000de8: e3 16 34 ff bne s0, s3, -20 -80000dec: 83 20 c1 02 lw ra, 44(sp) -80000df0: 03 24 81 02 lw s0, 40(sp) -80000df4: 83 24 41 02 lw s1, 36(sp) -80000df8: 03 29 01 02 lw s2, 32(sp) -80000dfc: 83 29 c1 01 lw s3, 28(sp) -80000e00: 03 2a 81 01 lw s4, 24(sp) -80000e04: 83 2a 41 01 lw s5, 20(sp) -80000e08: 03 2b 01 01 lw s6, 16(sp) -80000e0c: 83 2b c1 00 lw s7, 12(sp) -80000e10: 03 2c 81 00 lw s8, 8(sp) -80000e14: 13 01 01 03 addi sp, sp, 48 -80000e18: 67 80 00 00 ret -80000e1c: 83 27 49 00 lw a5, 4(s2) -80000e20: 83 a6 44 00 lw a3, 4(s1) -80000e24: 93 87 f7 ff addi a5, a5, -1 -80000e28: 63 8e 87 04 beq a5, s0, 92 -80000e2c: 23 a2 04 00 sw zero, 4(s1) -80000e30: e3 88 06 fa beqz a3, -80 -80000e34: 83 27 89 18 lw a5, 392(s2) -80000e38: 33 97 8a 00 sll a4, s5, s0 -80000e3c: 03 2c 49 00 lw s8, 4(s2) -80000e40: b3 77 f7 00 and a5, a4, a5 -80000e44: 63 92 07 02 bnez a5, 36 +80000d30 __call_exitprocs: +80000d30: 13 01 01 fd addi sp, sp, -48 +80000d34: 23 2c 41 01 sw s4, 24(sp) +80000d38: 03 aa 81 c2 lw s4, -984(gp) +80000d3c: 23 20 21 03 sw s2, 32(sp) +80000d40: 23 26 11 02 sw ra, 44(sp) +80000d44: 03 29 8a 14 lw s2, 328(s4) +80000d48: 23 24 81 02 sw s0, 40(sp) +80000d4c: 23 22 91 02 sw s1, 36(sp) +80000d50: 23 2e 31 01 sw s3, 28(sp) +80000d54: 23 2a 51 01 sw s5, 20(sp) +80000d58: 23 28 61 01 sw s6, 16(sp) +80000d5c: 23 26 71 01 sw s7, 12(sp) +80000d60: 23 24 81 01 sw s8, 8(sp) +80000d64: 63 00 09 04 beqz s2, 64 +80000d68: 13 0b 05 00 mv s6, a0 +80000d6c: 93 8b 05 00 mv s7, a1 +80000d70: 93 0a 10 00 addi s5, zero, 1 +80000d74: 93 09 f0 ff addi s3, zero, -1 +80000d78: 83 24 49 00 lw s1, 4(s2) +80000d7c: 13 84 f4 ff addi s0, s1, -1 +80000d80: 63 42 04 02 bltz s0, 36 +80000d84: 93 94 24 00 slli s1, s1, 2 +80000d88: b3 04 99 00 add s1, s2, s1 +80000d8c: 63 84 0b 04 beqz s7, 72 +80000d90: 83 a7 44 10 lw a5, 260(s1) +80000d94: 63 80 77 05 beq a5, s7, 64 +80000d98: 13 04 f4 ff addi s0, s0, -1 +80000d9c: 93 84 c4 ff addi s1, s1, -4 +80000da0: e3 16 34 ff bne s0, s3, -20 +80000da4: 83 20 c1 02 lw ra, 44(sp) +80000da8: 03 24 81 02 lw s0, 40(sp) +80000dac: 83 24 41 02 lw s1, 36(sp) +80000db0: 03 29 01 02 lw s2, 32(sp) +80000db4: 83 29 c1 01 lw s3, 28(sp) +80000db8: 03 2a 81 01 lw s4, 24(sp) +80000dbc: 83 2a 41 01 lw s5, 20(sp) +80000dc0: 03 2b 01 01 lw s6, 16(sp) +80000dc4: 83 2b c1 00 lw s7, 12(sp) +80000dc8: 03 2c 81 00 lw s8, 8(sp) +80000dcc: 13 01 01 03 addi sp, sp, 48 +80000dd0: 67 80 00 00 ret +80000dd4: 83 27 49 00 lw a5, 4(s2) +80000dd8: 83 a6 44 00 lw a3, 4(s1) +80000ddc: 93 87 f7 ff addi a5, a5, -1 +80000de0: 63 8e 87 04 beq a5, s0, 92 +80000de4: 23 a2 04 00 sw zero, 4(s1) +80000de8: e3 88 06 fa beqz a3, -80 +80000dec: 83 27 89 18 lw a5, 392(s2) +80000df0: 33 97 8a 00 sll a4, s5, s0 +80000df4: 03 2c 49 00 lw s8, 4(s2) +80000df8: b3 77 f7 00 and a5, a4, a5 +80000dfc: 63 92 07 02 bnez a5, 36 +80000e00: e7 80 06 00 jalr a3 +80000e04: 03 27 49 00 lw a4, 4(s2) +80000e08: 83 27 8a 14 lw a5, 328(s4) +80000e0c: 63 14 87 01 bne a4, s8, 8 +80000e10: e3 04 f9 f8 beq s2, a5, -120 +80000e14: e3 88 07 f8 beqz a5, -112 +80000e18: 13 89 07 00 mv s2, a5 +80000e1c: 6f f0 df f5 j -164 +80000e20: 83 27 c9 18 lw a5, 396(s2) +80000e24: 83 a5 44 08 lw a1, 132(s1) +80000e28: 33 77 f7 00 and a4, a4, a5 +80000e2c: 63 1c 07 00 bnez a4, 24 +80000e30: 13 05 0b 00 mv a0, s6 +80000e34: e7 80 06 00 jalr a3 +80000e38: 6f f0 df fc j -52 +80000e3c: 23 22 89 00 sw s0, 4(s2) +80000e40: 6f f0 9f fa j -88 +80000e44: 13 85 05 00 mv a0, a1 80000e48: e7 80 06 00 jalr a3 -80000e4c: 03 27 49 00 lw a4, 4(s2) -80000e50: 83 27 8a 14 lw a5, 328(s4) -80000e54: 63 14 87 01 bne a4, s8, 8 -80000e58: e3 04 f9 f8 beq s2, a5, -120 -80000e5c: e3 88 07 f8 beqz a5, -112 -80000e60: 13 89 07 00 mv s2, a5 -80000e64: 6f f0 df f5 j -164 -80000e68: 83 27 c9 18 lw a5, 396(s2) -80000e6c: 83 a5 44 08 lw a1, 132(s1) -80000e70: 33 77 f7 00 and a4, a4, a5 -80000e74: 63 1c 07 00 bnez a4, 24 -80000e78: 13 05 0b 00 mv a0, s6 -80000e7c: e7 80 06 00 jalr a3 -80000e80: 6f f0 df fc j -52 -80000e84: 23 22 89 00 sw s0, 4(s2) -80000e88: 6f f0 9f fa j -88 -80000e8c: 13 85 05 00 mv a0, a1 -80000e90: e7 80 06 00 jalr a3 -80000e94: 6f f0 9f fb j -72 +80000e4c: 6f f0 9f fb j -72 Disassembly of section .init_array: @@ -1180,7 +1162,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: 4c 0a + a4: 04 0a a6: 00 80 a8: 00 00 aa: 00 00 @@ -1298,7 +1280,7 @@ Disassembly of section .symtab: 1de: f1 ff 1e0: 15 01 1e2: 00 00 - 1e4: 98 0a + 1e4: 50 0a 1e6: 00 80 1e8: 00 00 1ea: 00 00 @@ -1306,7 +1288,7 @@ Disassembly of section .symtab: 1ee: 02 00 1f0: 20 01 1f2: 00 00 - 1f4: 50 0a + 1f4: 08 0a 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1316,7 +1298,7 @@ Disassembly of section .symtab: 202: 00 00 204: 60 00 206: 00 80 - 208: 3c 01 + 208: 48 01 20a: 00 00 20c: 12 00 20e: 02 00 @@ -1324,13 +1306,13 @@ Disassembly of section .symtab: 212: 00 00 214: 34 14 216: 00 80 - 218: 04 00 + 218: 20 00 21a: 00 00 21c: 11 00 21e: 06 00 220: 4e 01 222: 00 00 - 224: 58 0a + 224: 10 0a 226: 00 80 228: 00 00 22a: 00 00 @@ -1346,7 +1328,7 @@ Disassembly of section .symtab: 23e: 05 00 240: 65 01 242: 00 00 - 244: 70 0a + 244: 28 0a 246: 00 80 248: 00 00 24a: 00 00 @@ -1354,7 +1336,7 @@ Disassembly of section .symtab: 24e: 02 00 250: 6d 01 252: 00 00 - 254: 1c 08 + 254: d4 07 256: 00 80 258: ec 01 25a: 00 00 @@ -1362,7 +1344,7 @@ Disassembly of section .symtab: 25e: 02 00 260: 8e 01 262: 00 00 - 264: a8 0a + 264: 60 0a 266: 00 80 268: 00 00 26a: 00 00 @@ -1371,13 +1353,13 @@ Disassembly of section .symtab: 270: 9b 01 00 00 274: 3c 02 276: 00 80 - 278: cc 01 + 278: 84 01 27a: 00 00 27c: 12 00 27e: 02 00 280: a8 01 282: 00 00 - 284: 68 0a + 284: 20 0a 286: 00 80 288: 00 00 28a: 00 00 @@ -1393,7 +1375,7 @@ Disassembly of section .symtab: 29e: 05 00 2a0: c4 01 2a2: 00 00 - 2a4: 68 0b + 2a4: 20 0b 2a6: 00 80 2a8: 9c 00 2aa: 00 00 @@ -1401,7 +1383,7 @@ Disassembly of section .symtab: 2ae: 02 00 2b0: d6 01 2b2: 00 00 - 2b4: a0 0a + 2b4: 58 0a 2b6: 00 80 2b8: 00 00 2ba: 00 00 @@ -1409,7 +1391,7 @@ Disassembly of section .symtab: 2be: 02 00 2c0: e5 01 2c2: 00 00 - 2c4: 78 0a + 2c4: 30 0a 2c6: 00 80 2c8: 00 00 2ca: 00 00 @@ -1417,7 +1399,7 @@ Disassembly of section .symtab: 2ce: 02 00 2d0: f0 01 2d2: 00 00 - 2d4: 88 0a + 2d4: 40 0a 2d6: 00 80 2d8: 00 00 2da: 00 00 @@ -1425,14 +1407,14 @@ Disassembly of section .symtab: 2de: 02 00 2e0: fd 01 2e2: 00 00 - 2e4: 0c 0b + 2e4: c4 0a 2e6: 00 80 2e8: 5c 00 2ea: 00 00 2ec: 12 00 2ee: 02 00 2f0: 0f 02 00 00 - 2f4: 10 0a + 2f4: c8 09 2f6: 00 80 2f8: 00 00 2fa: 00 00 @@ -1440,7 +1422,7 @@ Disassembly of section .symtab: 2fe: 02 00 300: 19 02 302: 00 00 - 304: 24 06 + 304: dc 05 306: 00 80 308: f8 01 30a: 00 00 @@ -1448,7 +1430,7 @@ Disassembly of section .symtab: 30e: 02 00 310: 35 02 312: 00 00 - 314: 60 0a + 314: 18 0a 316: 00 80 318: 00 00 31a: 00 00 @@ -1456,7 +1438,7 @@ Disassembly of section .symtab: 31e: 02 00 320: 40 02 322: 00 00 - 324: 78 0d + 324: 30 0d 326: 00 80 328: 20 01 32a: 00 00 @@ -1472,7 +1454,7 @@ Disassembly of section .symtab: 33e: 01 00 340: 51 02 342: 00 00 - 344: e0 0c + 344: 98 0c 346: 00 80 348: 98 00 34a: 00 00 @@ -1480,7 +1462,7 @@ Disassembly of section .symtab: 34e: 02 00 350: 65 02 352: 00 00 - 354: 38 14 + 354: 54 14 356: 00 80 358: 00 00 35a: 00 00 @@ -1496,7 +1478,7 @@ Disassembly of section .symtab: 36e: 06 00 370: 7d 02 372: 00 00 - 374: 04 0c + 374: bc 0b 376: 00 80 378: dc 00 37a: 00 00 @@ -1504,7 +1486,7 @@ Disassembly of section .symtab: 37e: 02 00 380: 84 02 382: 00 00 - 384: 08 04 + 384: c0 03 386: 00 80 388: 30 00 38a: 00 00 @@ -1512,14 +1494,14 @@ Disassembly of section .symtab: 38e: 02 00 390: 89 02 392: 00 00 - 394: b8 0a + 394: 70 0a 396: 00 80 398: 00 00 39a: 00 00 39c: 12 00 39e: 02 00 3a0: 97 02 00 00 auipc t0, 0 - 3a4: c8 0a + 3a4: 80 0a 3a6: 00 80 3a8: 14 00 3aa: 00 00 @@ -1527,7 +1509,7 @@ Disassembly of section .symtab: 3ae: 02 00 3b0: 9e 02 3b2: 00 00 - 3b4: 90 0a + 3b4: 48 0a 3b6: 00 80 3b8: 00 00 3ba: 00 00 @@ -1535,7 +1517,7 @@ Disassembly of section .symtab: 3be: 02 00 3c0: ac 02 3c2: 00 00 - 3c4: b0 0a + 3c4: 68 0a 3c6: 00 80 3c8: 00 00 3ca: 00 00 @@ -1543,7 +1525,7 @@ Disassembly of section .symtab: 3ce: 02 00 3d0: b9 02 3d2: 00 00 - 3d4: 80 0a + 3d4: 38 0a 3d6: 00 80 3d8: 00 00 3da: 00 00 @@ -1551,9 +1533,9 @@ Disassembly of section .symtab: 3de: 02 00 3e0: c5 02 3e2: 00 00 - 3e4: 9c 01 + 3e4: a8 01 3e6: 00 80 - 3e8: a0 00 + 3e8: 94 00 3ea: 00 00 3ec: 12 00 3ee: 02 00 @@ -1575,7 +1557,7 @@ Disassembly of section .symtab: 40e: 05 00 410: 9d 00 412: 00 00 - 414: 38 14 + 414: 54 14 416: 00 80 418: 00 00 41a: 00 00 @@ -1583,7 +1565,7 @@ Disassembly of section .symtab: 41e: 06 00 420: f5 02 422: 00 00 - 424: dc 0a + 424: 94 0a 426: 00 80 428: 30 00 42a: 00 00 @@ -1591,7 +1573,7 @@ Disassembly of section .symtab: 42e: 02 00 430: f4 02 432: 00 00 - 434: 08 0a + 434: c0 09 436: 00 80 438: 00 00 43a: 00 00 @@ -1599,7 +1581,7 @@ Disassembly of section .symtab: 43e: 02 00 440: fa 02 442: 00 00 - 444: 38 04 + 444: f0 03 446: 00 80 448: ec 01 44a: 00 00 @@ -1607,7 +1589,7 @@ Disassembly of section .symtab: 44e: 02 00 450: 0c 03 452: 00 00 - 454: c0 0a + 454: 78 0a 456: 00 80 458: 00 00 45a: 00 00 @@ -1647,13 +1629,13 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 30 33 - 48: 2d 38 - 4a: 35 2d - 4c: 61 36 - 4e: 2d 34 - 50: 31 2d - 52: 39 37 + 46: 35 30 + 48: 2d 30 + 4a: 31 2d + 4c: 35 65 + 4e: 2d 66 + 50: 35 2d + 52: 31 66 54: 2e 63 56: 00 70 58: 61 72 diff --git a/benchmarks/opencl/guassian/Makefile b/benchmarks/opencl/guassian/Makefile index e16e5ca7..29e88502 100644 --- a/benchmarks/opencl/guassian/Makefile +++ b/benchmarks/opencl/guassian/Makefile @@ -55,7 +55,7 @@ clean: rm -rf $(PROJECT) *.o .depend clean-all: clean - rm *.pocl *.dump + rm -rf *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/guassian/kernel.pocl b/benchmarks/opencl/guassian/kernel.pocl index c1ff7e83..7713239b 100644 Binary files a/benchmarks/opencl/guassian/kernel.pocl and b/benchmarks/opencl/guassian/kernel.pocl differ diff --git a/benchmarks/opencl/kmeans/Makefile b/benchmarks/opencl/kmeans/Makefile index 8ade050f..2f7020e9 100644 --- a/benchmarks/opencl/kmeans/Makefile +++ b/benchmarks/opencl/kmeans/Makefile @@ -52,7 +52,7 @@ clean: rm -rf $(PROJECT) *.o .depend clean-all: clean - rm *.pocl *.dump + rm -rf *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/nearn/Makefile b/benchmarks/opencl/nearn/Makefile index f1f3754d..3a9bfdde 100644 --- a/benchmarks/opencl/nearn/Makefile +++ b/benchmarks/opencl/nearn/Makefile @@ -55,7 +55,7 @@ clean: rm -rf $(PROJECT) *.o .depend clean-all: clean - rm *.pocl *.dump + rm -rf *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/nearn/NearestNeighbor.dump b/benchmarks/opencl/nearn/NearestNeighbor.dump index 91c68498..92d1ba0e 100644 --- a/benchmarks/opencl/nearn/NearestNeighbor.dump +++ b/benchmarks/opencl/nearn/NearestNeighbor.dump @@ -1,28 +1,28 @@ -/tmp/pocl_vortex_kernel-35-d0-fd-eb-49.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-29-3b-c3-e2-18.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: -80000000: 97 15 00 00 auipc a1, 1 -80000004: 93 85 85 83 addi a1, a1, -1992 +80000000: 97 05 00 00 auipc a1, 0 +80000004: 93 85 05 7f addi a1, a1, 2032 80000008: 73 25 60 02 csrr a0, 38 8000000c: 6b 10 b5 00 -80000010: ef 00 90 02 jal 2088 +80000010: ef 00 00 7e jal 2016 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 13 85 01 c3 addi a0, gp, -976 -80000020: 13 86 81 c3 addi a2, gp, -968 +80000020: 13 86 41 c5 addi a2, gp, -940 80000024: 33 06 a6 40 sub a2, a2, a0 80000028: 93 05 00 00 mv a1, zero -8000002c: ef 00 50 38 jal 2948 +8000002c: ef 00 d0 33 jal 2876 80000030: 17 15 00 00 auipc a0, 1 -80000034: 13 05 85 a8 addi a0, a0, -1400 -80000038: ef 00 50 23 jal 2612 -8000003c: ef 00 90 2d jal 2776 -80000040: ef 00 80 3c jal 968 -80000044: 6f 00 50 24 j 2628 +80000034: 13 05 05 a4 addi a0, a0, -1472 +80000038: ef 00 d0 1e jal 2540 +8000003c: ef 00 10 29 jal 2704 +80000040: ef 00 00 38 jal 896 +80000044: 6f 00 d0 1f j 2556 Disassembly of section .text: @@ -30,8 +30,8 @@ Disassembly of section .text: 80000048: 93 07 00 00 mv a5, zero 8000004c: 63 88 07 00 beqz a5, 16 80000050: 37 15 00 80 lui a0, 524289 -80000054: 13 05 85 ab addi a0, a0, -1352 -80000058: 6f 00 50 21 j 2580 +80000054: 13 05 05 a7 addi a0, a0, -1424 +80000058: 6f 00 d0 1c j 2508 8000005c: 67 80 00 00 ret 80000060 kernel_spawn_run_warp: @@ -46,116 +46,116 @@ Disassembly of section .text: 80000080: 23 28 61 01 sw s6, 16(sp) 80000084: 23 26 71 01 sw s7, 12(sp) 80000088: 23 24 81 01 sw s8, 8(sp) -8000008c: ef 00 d0 03 jal 2108 -80000090: ef 00 00 7f jal 2032 -80000094: ef 00 50 03 jal 2100 -80000098: 83 a5 41 c3 lw a1, -972(gp) -8000009c: 83 a5 05 00 lw a1, 0(a1) -800000a0: 83 aa 05 00 lw s5, 0(a1) -800000a4: 03 ab 45 00 lw s6, 4(a1) +8000008c: ef 00 40 7f jal 2036 +80000090: ef 00 80 7a jal 1960 +80000094: ef 00 40 7e jal 2020 +80000098: 93 04 05 00 mv s1, a0 +8000009c: ef 00 c0 7b jal 1980 +800000a0: 93 09 05 00 mv s3, a0 +800000a4: ef 00 40 7c jal 1988 800000a8: 13 09 05 00 mv s2, a0 -800000ac: ef 00 40 7f jal 2036 -800000b0: 93 09 05 00 mv s3, a0 -800000b4: ef 00 c0 7f jal 2044 -800000b8: 03 a8 41 c3 lw a6, -972(gp) -800000bc: 03 27 48 01 lw a4, 20(a6) -800000c0: 83 25 08 01 lw a1, 16(a6) -800000c4: 93 06 07 00 mv a3, a4 -800000c8: 63 44 37 01 blt a4, s3, 8 -800000cc: 93 86 09 00 mv a3, s3 -800000d0: 33 a7 e9 00 slt a4, s3, a4 -800000d4: 33 87 e5 00 add a4, a1, a4 -800000d8: 93 07 10 00 addi a5, zero, 1 -800000dc: 63 46 f7 08 blt a4, a5, 140 -800000e0: 33 0a 5b 03 mul s4, s6, s5 -800000e4: 83 27 c8 00 lw a5, 12(a6) -800000e8: 13 0c f7 ff addi s8, a4, -1 -800000ec: b3 85 b9 02 mul a1, s3, a1 -800000f0: b3 85 b6 00 add a1, a3, a1 -800000f4: b3 05 b9 02 mul a1, s2, a1 -800000f8: b3 85 b7 00 add a1, a5, a1 -800000fc: 33 05 e5 02 mul a0, a0, a4 -80000100: b3 84 a5 00 add s1, a1, a0 -80000104: 33 09 60 41 neg s2, s6 -80000108: 33 0b 40 41 neg s6, s4 -8000010c: 33 c7 44 03 div a4, s1, s4 -80000110: 83 25 08 00 lw a1, 0(a6) -80000114: 33 05 eb 02 mul a0, s6, a4 -80000118: 33 85 a4 00 add a0, s1, a0 -8000011c: b3 46 55 03 div a3, a0, s5 -80000120: 03 a5 c5 00 lw a0, 12(a1) -80000124: b3 07 e9 02 mul a5, s2, a4 -80000128: b3 87 d7 40 sub a5, a5, a3 -8000012c: b3 87 fa 02 mul a5, s5, a5 -80000130: b3 08 f5 00 add a7, a0, a5 -80000134: 03 a4 05 01 lw s0, 16(a1) -80000138: 03 a6 45 01 lw a2, 20(a1) -8000013c: 83 27 48 00 lw a5, 4(a6) -80000140: 03 25 88 00 lw a0, 8(a6) -80000144: b3 06 d4 00 add a3, s0, a3 -80000148: 33 07 e6 00 add a4, a2, a4 -8000014c: 33 86 14 01 add a2, s1, a7 -80000150: e7 80 07 00 jalr a5 -80000154: 63 0a 0c 00 beqz s8, 20 -80000158: 03 a8 41 c3 lw a6, -972(gp) -8000015c: 13 0c fc ff addi s8, s8, -1 -80000160: 93 84 14 00 addi s1, s1, 1 -80000164: 6f f0 9f fa j -88 -80000168: 13 b5 19 00 seqz a0, s3 -8000016c: 03 2c 81 00 lw s8, 8(sp) -80000170: 83 2b c1 00 lw s7, 12(sp) -80000174: 03 2b 01 01 lw s6, 16(sp) -80000178: 83 2a 41 01 lw s5, 20(sp) -8000017c: 03 2a 81 01 lw s4, 24(sp) -80000180: 83 29 c1 01 lw s3, 28(sp) -80000184: 03 29 01 02 lw s2, 32(sp) -80000188: 83 24 41 02 lw s1, 36(sp) -8000018c: 03 24 81 02 lw s0, 40(sp) -80000190: 83 20 c1 02 lw ra, 44(sp) -80000194: 13 01 01 03 addi sp, sp, 48 -80000198: 6f 00 80 6e j 1768 +800000ac: ef 00 40 7d jal 2004 +800000b0: 93 85 41 c3 addi a1, gp, -972 +800000b4: 13 96 24 00 slli a2, s1, 2 +800000b8: b3 05 b6 00 add a1, a2, a1 +800000bc: 03 ab 05 00 lw s6, 0(a1) +800000c0: 83 25 4b 01 lw a1, 20(s6) +800000c4: 03 26 0b 01 lw a2, 16(s6) +800000c8: 93 86 05 00 mv a3, a1 +800000cc: 63 c4 35 01 blt a1, s3, 8 +800000d0: 93 86 09 00 mv a3, s3 +800000d4: b3 a5 b9 00 slt a1, s3, a1 +800000d8: 33 07 b6 00 add a4, a2, a1 +800000dc: 93 05 10 00 addi a1, zero, 1 +800000e0: 63 4a b7 08 blt a4, a1, 148 +800000e4: 83 25 0b 00 lw a1, 0(s6) +800000e8: 83 aa 05 00 lw s5, 0(a1) +800000ec: 83 a7 45 00 lw a5, 4(a1) +800000f0: 83 24 cb 00 lw s1, 12(s6) +800000f4: 33 8a 57 03 mul s4, a5, s5 +800000f8: 13 0c f7 ff addi s8, a4, -1 +800000fc: 33 86 c9 02 mul a2, s3, a2 +80000100: 33 86 c6 00 add a2, a3, a2 +80000104: 33 05 c5 02 mul a0, a0, a2 +80000108: 33 85 a4 00 add a0, s1, a0 +8000010c: 33 06 e9 02 mul a2, s2, a4 +80000110: b3 04 c5 00 add s1, a0, a2 +80000114: 33 09 f0 40 neg s2, a5 +80000118: b3 0b 40 41 neg s7, s4 +8000011c: 33 c6 44 03 div a2, s1, s4 +80000120: 33 85 cb 02 mul a0, s7, a2 +80000124: 33 85 a4 00 add a0, s1, a0 +80000128: b3 46 55 03 div a3, a0, s5 +8000012c: 03 a5 c5 00 lw a0, 12(a1) +80000130: 33 07 c9 02 mul a4, s2, a2 +80000134: 33 07 d7 40 sub a4, a4, a3 +80000138: 33 87 ea 02 mul a4, s5, a4 +8000013c: 33 08 e5 00 add a6, a0, a4 +80000140: 03 a7 05 01 lw a4, 16(a1) +80000144: 03 a4 45 01 lw s0, 20(a1) +80000148: 83 27 4b 00 lw a5, 4(s6) +8000014c: 03 25 8b 00 lw a0, 8(s6) +80000150: b3 06 d7 00 add a3, a4, a3 +80000154: 33 07 c4 00 add a4, s0, a2 +80000158: 33 86 04 01 add a2, s1, a6 +8000015c: e7 80 07 00 jalr a5 +80000160: 63 0a 0c 00 beqz s8, 20 +80000164: 83 25 0b 00 lw a1, 0(s6) +80000168: 13 0c fc ff addi s8, s8, -1 +8000016c: 93 84 14 00 addi s1, s1, 1 +80000170: 6f f0 df fa j -84 +80000174: 13 b5 19 00 seqz a0, s3 +80000178: 03 2c 81 00 lw s8, 8(sp) +8000017c: 83 2b c1 00 lw s7, 12(sp) +80000180: 03 2b 01 01 lw s6, 16(sp) +80000184: 83 2a 41 01 lw s5, 20(sp) +80000188: 03 2a 81 01 lw s4, 24(sp) +8000018c: 83 29 c1 01 lw s3, 28(sp) +80000190: 03 29 01 02 lw s2, 32(sp) +80000194: 83 24 41 02 lw s1, 36(sp) +80000198: 03 24 81 02 lw s0, 40(sp) +8000019c: 83 20 c1 02 lw ra, 44(sp) +800001a0: 13 01 01 03 addi sp, sp, 48 +800001a4: 6f 00 40 69 j 1684 -8000019c kernel_spawn_run_threads: -8000019c: 13 01 01 fe addi sp, sp, -32 -800001a0: 23 2e 11 00 sw ra, 28(sp) -800001a4: 23 2c 81 00 sw s0, 24(sp) -800001a8: 23 2a 91 00 sw s1, 20(sp) -800001ac: 23 28 21 01 sw s2, 16(sp) -800001b0: 23 26 31 01 sw s3, 12(sp) -800001b4: 93 89 05 00 mv s3, a1 -800001b8: ef 00 80 6c jal 1736 -800001bc: 03 a5 41 c3 lw a0, -972(gp) -800001c0: 03 25 05 00 lw a0, 0(a0) -800001c4: 83 24 05 00 lw s1, 0(a0) -800001c8: 03 25 45 00 lw a0, 4(a0) -800001cc: 33 04 95 02 mul s0, a0, s1 -800001d0: ef 00 80 6e jal 1768 -800001d4: 33 05 35 01 add a0, a0, s3 -800001d8: 03 a6 41 c3 lw a2, -972(gp) -800001dc: 33 47 85 02 div a4, a0, s0 -800001e0: b3 05 87 02 mul a1, a4, s0 -800001e4: 33 05 b5 40 sub a0, a0, a1 -800001e8: 83 25 06 00 lw a1, 0(a2) -800001ec: b3 46 95 02 div a3, a0, s1 -800001f0: b3 87 96 02 mul a5, a3, s1 -800001f4: 33 08 f5 40 sub a6, a0, a5 -800001f8: 83 a4 c5 00 lw s1, 12(a1) -800001fc: 03 a4 05 01 lw s0, 16(a1) -80000200: 83 a7 45 01 lw a5, 20(a1) -80000204: 83 28 46 00 lw a7, 4(a2) -80000208: 03 25 86 00 lw a0, 8(a2) -8000020c: 33 06 98 00 add a2, a6, s1 -80000210: b3 06 d4 00 add a3, s0, a3 -80000214: 33 87 e7 00 add a4, a5, a4 -80000218: e7 80 08 00 jalr a7 -8000021c: 13 05 10 00 addi a0, zero, 1 -80000220: 83 29 c1 00 lw s3, 12(sp) -80000224: 03 29 01 01 lw s2, 16(sp) -80000228: 83 24 41 01 lw s1, 20(sp) -8000022c: 03 24 81 01 lw s0, 24(sp) -80000230: 83 20 c1 01 lw ra, 28(sp) -80000234: 13 01 01 02 addi sp, sp, 32 -80000238: 6f 00 80 64 j 1608 +800001a8 kernel_spawn_run_threads: +800001a8: 13 01 01 ff addi sp, sp, -16 +800001ac: 23 26 11 00 sw ra, 12(sp) +800001b0: 23 24 81 00 sw s0, 8(sp) +800001b4: ef 00 40 68 jal 1668 +800001b8: ef 00 00 6c jal 1728 +800001bc: 13 04 05 00 mv s0, a0 +800001c0: ef 00 00 6b jal 1712 +800001c4: 93 85 41 c3 addi a1, gp, -972 +800001c8: 13 16 24 00 slli a2, s0, 2 +800001cc: b3 05 b6 00 add a1, a2, a1 +800001d0: 03 a6 05 00 lw a2, 0(a1) +800001d4: 83 25 06 00 lw a1, 0(a2) +800001d8: 83 26 c6 00 lw a3, 12(a2) +800001dc: 03 a7 05 00 lw a4, 0(a1) +800001e0: 83 a7 45 00 lw a5, 4(a1) +800001e4: 33 85 a6 00 add a0, a3, a0 +800001e8: b3 86 e7 02 mul a3, a5, a4 +800001ec: b3 47 d5 02 div a5, a0, a3 +800001f0: b3 86 d7 02 mul a3, a5, a3 +800001f4: 03 a4 c5 00 lw s0, 12(a1) +800001f8: 33 05 d5 40 sub a0, a0, a3 +800001fc: b3 46 e5 02 div a3, a0, a4 +80000200: 33 88 e6 02 mul a6, a3, a4 +80000204: b3 08 a4 00 add a7, s0, a0 +80000208: 03 a7 05 01 lw a4, 16(a1) +8000020c: 03 a4 45 01 lw s0, 20(a1) +80000210: 83 22 46 00 lw t0, 4(a2) +80000214: 03 25 86 00 lw a0, 8(a2) +80000218: 33 86 08 41 sub a2, a7, a6 +8000021c: b3 06 d7 00 add a3, a4, a3 +80000220: 33 07 f4 00 add a4, s0, a5 +80000224: e7 80 02 00 jalr t0 +80000228: 13 05 10 00 addi a0, zero, 1 +8000022c: 03 24 81 00 lw s0, 8(sp) +80000230: 83 20 c1 00 lw ra, 12(sp) +80000234: 13 01 01 01 addi sp, sp, 16 +80000238: 6f 00 00 60 j 1536 8000023c kernel_spawn: 8000023c: 13 01 01 fc addi sp, sp, -64 @@ -167,833 +167,815 @@ Disassembly of section .text: 80000254: 23 24 41 03 sw s4, 40(sp) 80000258: 23 22 51 03 sw s5, 36(sp) 8000025c: 23 20 61 03 sw s6, 32(sp) -80000260: 13 0b 05 00 mv s6, a0 -80000264: 03 25 05 00 lw a0, 0(a0) -80000268: 83 26 4b 00 lw a3, 4(s6) -8000026c: 03 27 8b 00 lw a4, 8(s6) -80000270: 13 09 06 00 mv s2, a2 -80000274: 93 89 05 00 mv s3, a1 -80000278: 33 85 a6 02 mul a0, a3, a0 -8000027c: b3 04 e5 02 mul s1, a0, a4 -80000280: ef 00 80 65 jal 1624 -80000284: 13 04 05 00 mv s0, a0 -80000288: ef 00 80 64 jal 1608 +80000260: 23 2e 71 01 sw s7, 28(sp) +80000264: 23 2c 81 01 sw s8, 24(sp) +80000268: 93 04 05 00 mv s1, a0 +8000026c: 83 2b 05 00 lw s7, 0(a0) +80000270: 03 24 45 00 lw s0, 4(a0) +80000274: 03 2c 85 00 lw s8, 8(a0) +80000278: 13 09 06 00 mv s2, a2 +8000027c: 93 89 05 00 mv s3, a1 +80000280: ef 00 00 61 jal 1552 +80000284: 13 0b 05 00 mv s6, a0 +80000288: ef 00 00 60 jal 1536 8000028c: 13 0a 05 00 mv s4, a0 -80000290: ef 00 80 63 jal 1592 +80000290: ef 00 00 5f jal 1520 80000294: 93 0a 05 00 mv s5, a0 -80000298: ef 00 80 62 jal 1576 -8000029c: b3 85 4a 03 mul a1, s5, s4 -800002a0: 13 06 10 00 addi a2, zero, 1 -800002a4: 63 c8 95 00 blt a1, s1, 16 -800002a8: 63 5a 86 00 bge a2, s0, 20 -800002ac: 63 4c c5 00 blt a0, a2, 24 -800002b0: 6f 00 00 13 j 304 -800002b4: 33 c6 b4 02 div a2, s1, a1 -800002b8: e3 4a 86 fe blt a2, s0, -12 -800002bc: 13 06 04 00 mv a2, s0 -800002c0: 63 50 c5 12 bge a0, a2, 288 -800002c4: 93 06 f4 ff addi a3, s0, -1 -800002c8: b3 c5 c4 02 div a1, s1, a2 -800002cc: 63 0e d5 00 beq a0, a3, 28 -800002d0: 13 06 00 00 mv a2, zero -800002d4: b3 06 b6 00 add a3, a2, a1 -800002d8: 33 c6 56 03 div a2, a3, s5 -800002dc: 13 07 00 00 mv a4, zero -800002e0: 63 50 46 03 bge a2, s4, 32 -800002e4: 6f 00 00 02 j 32 -800002e8: 33 86 c5 02 mul a2, a1, a2 -800002ec: 33 86 c4 40 sub a2, s1, a2 -800002f0: b3 06 b6 00 add a3, a2, a1 -800002f4: 33 c6 56 03 div a2, a3, s5 -800002f8: 13 07 00 00 mv a4, zero -800002fc: 63 44 46 01 blt a2, s4, 8 -80000300: 33 47 46 03 div a4, a2, s4 -80000304: 93 07 00 00 mv a5, zero -80000308: b3 0a 56 03 mul s5, a2, s5 -8000030c: 93 04 10 00 addi s1, zero, 1 -80000310: 63 08 07 00 beqz a4, 16 -80000314: b3 07 47 03 mul a5, a4, s4 -80000318: b3 07 f6 40 sub a5, a2, a5 -8000031c: 93 04 07 00 mv s1, a4 -80000320: 33 84 56 41 sub s0, a3, s5 -80000324: 23 24 61 01 sw s6, 8(sp) -80000328: 23 26 31 01 sw s3, 12(sp) -8000032c: 23 28 21 01 sw s2, 16(sp) -80000330: 33 85 a5 02 mul a0, a1, a0 -80000334: 23 2a a1 00 sw a0, 20(sp) -80000338: 23 2c 91 00 sw s1, 24(sp) -8000033c: 23 2e f1 00 sw a5, 28(sp) -80000340: 93 05 81 00 addi a1, sp, 8 -80000344: 93 06 20 00 addi a3, zero, 2 -80000348: 23 aa b1 c2 sw a1, -972(gp) -8000034c: 63 40 d6 02 blt a2, a3, 32 -80000350: 63 44 46 01 blt a2, s4, 8 -80000354: 13 06 0a 00 mv a2, s4 -80000358: 37 05 00 80 lui a0, 524288 -8000035c: 93 05 05 06 addi a1, a0, 96 -80000360: 13 05 06 00 mv a0, a2 -80000364: ef 00 40 51 jal 1300 -80000368: ef f0 9f cf jal -776 -8000036c: 63 0a 04 06 beqz s0, 116 -80000370: 13 05 04 00 mv a0, s0 -80000374: ef 00 c0 50 jal 1292 -80000378: 03 a5 41 c3 lw a0, -972(gp) -8000037c: 03 25 05 00 lw a0, 0(a0) -80000380: 83 24 05 00 lw s1, 0(a0) -80000384: 03 25 45 00 lw a0, 4(a0) -80000388: 33 04 95 02 mul s0, a0, s1 -8000038c: ef 00 c0 52 jal 1324 -80000390: 33 05 55 01 add a0, a0, s5 -80000394: 03 a6 41 c3 lw a2, -972(gp) -80000398: 33 47 85 02 div a4, a0, s0 -8000039c: b3 05 87 02 mul a1, a4, s0 -800003a0: 33 05 b5 40 sub a0, a0, a1 -800003a4: 83 25 06 00 lw a1, 0(a2) -800003a8: b3 46 95 02 div a3, a0, s1 -800003ac: b3 87 96 02 mul a5, a3, s1 -800003b0: 33 08 f5 40 sub a6, a0, a5 -800003b4: 83 a4 c5 00 lw s1, 12(a1) -800003b8: 03 a4 05 01 lw s0, 16(a1) -800003bc: 83 a7 45 01 lw a5, 20(a1) -800003c0: 83 28 46 00 lw a7, 4(a2) -800003c4: 03 25 86 00 lw a0, 8(a2) -800003c8: 33 06 98 00 add a2, a6, s1 -800003cc: b3 06 d4 00 add a3, s0, a3 -800003d0: 33 87 e7 00 add a4, a5, a4 -800003d4: e7 80 08 00 jalr a7 -800003d8: 13 05 10 00 addi a0, zero, 1 -800003dc: ef 00 40 4a jal 1188 -800003e0: 03 2b 01 02 lw s6, 32(sp) -800003e4: 83 2a 41 02 lw s5, 36(sp) -800003e8: 03 2a 81 02 lw s4, 40(sp) -800003ec: 83 29 c1 02 lw s3, 44(sp) -800003f0: 03 29 01 03 lw s2, 48(sp) -800003f4: 83 24 41 03 lw s1, 52(sp) -800003f8: 03 24 81 03 lw s0, 56(sp) -800003fc: 83 20 c1 03 lw ra, 60(sp) -80000400: 13 01 01 04 addi sp, sp, 64 -80000404: 67 80 00 00 ret +80000298: ef 00 00 5e jal 1504 +8000029c: 93 05 70 00 addi a1, zero, 7 +800002a0: 63 c8 a5 0e blt a1, a0, 240 +800002a4: b3 05 74 03 mul a1, s0, s7 +800002a8: 33 86 85 03 mul a2, a1, s8 +800002ac: b3 85 4a 03 mul a1, s5, s4 +800002b0: 93 06 10 00 addi a3, zero, 1 +800002b4: 63 c8 c5 00 blt a1, a2, 16 +800002b8: 63 da 66 01 bge a3, s6, 20 +800002bc: 63 4c d5 00 blt a0, a3, 24 +800002c0: 6f 00 00 0d j 208 +800002c4: b3 46 b6 02 div a3, a2, a1 +800002c8: e3 ca 66 ff blt a3, s6, -12 +800002cc: 93 06 0b 00 mv a3, s6 +800002d0: 63 50 d5 0c bge a0, a3, 192 +800002d4: 13 07 fb ff addi a4, s6, -1 +800002d8: b3 45 d6 02 div a1, a2, a3 +800002dc: 63 0e e5 00 beq a0, a4, 28 +800002e0: 13 06 00 00 mv a2, zero +800002e4: 33 0b b6 00 add s6, a2, a1 +800002e8: 33 46 5b 03 div a2, s6, s5 +800002ec: 93 06 00 00 mv a3, zero +800002f0: 63 50 46 03 bge a2, s4, 32 +800002f4: 6f 00 00 02 j 32 +800002f8: b3 86 d5 02 mul a3, a1, a3 +800002fc: 33 06 d6 40 sub a2, a2, a3 +80000300: 33 0b b6 00 add s6, a2, a1 +80000304: 33 46 5b 03 div a2, s6, s5 +80000308: 93 06 00 00 mv a3, zero +8000030c: 63 44 46 01 blt a2, s4, 8 +80000310: b3 46 46 03 div a3, a2, s4 +80000314: 13 07 00 00 mv a4, zero +80000318: 93 07 10 00 addi a5, zero, 1 +8000031c: 63 88 06 00 beqz a3, 16 +80000320: 33 87 46 03 mul a4, a3, s4 +80000324: 33 07 e6 40 sub a4, a2, a4 +80000328: 93 87 06 00 mv a5, a3 +8000032c: 33 04 56 03 mul s0, a2, s5 +80000330: 23 20 91 00 sw s1, 0(sp) +80000334: 23 22 31 01 sw s3, 4(sp) +80000338: 23 24 21 01 sw s2, 8(sp) +8000033c: b3 85 a5 02 mul a1, a1, a0 +80000340: 23 26 b1 00 sw a1, 12(sp) +80000344: 23 28 f1 00 sw a5, 16(sp) +80000348: 23 2a e1 00 sw a4, 20(sp) +8000034c: 93 85 41 c3 addi a1, gp, -972 +80000350: 13 15 25 00 slli a0, a0, 2 +80000354: 33 05 b5 00 add a0, a0, a1 +80000358: 93 05 01 00 mv a1, sp +8000035c: 93 06 20 00 addi a3, zero, 2 +80000360: 23 20 b5 00 sw a1, 0(a0) +80000364: 63 40 d6 02 blt a2, a3, 32 +80000368: 63 44 46 01 blt a2, s4, 8 +8000036c: 13 06 0a 00 mv a2, s4 +80000370: 37 05 00 80 lui a0, 524288 +80000374: 93 05 05 06 addi a1, a0, 96 +80000378: 13 05 06 00 mv a0, a2 +8000037c: ef 00 40 4b jal 1204 +80000380: ef f0 1f ce jal -800 +80000384: 63 06 8b 00 beq s6, s0, 12 +80000388: 23 26 81 00 sw s0, 12(sp) +8000038c: ef f0 5f cd jal -812 +80000390: 03 2c 81 01 lw s8, 24(sp) +80000394: 83 2b c1 01 lw s7, 28(sp) +80000398: 03 2b 01 02 lw s6, 32(sp) +8000039c: 83 2a 41 02 lw s5, 36(sp) +800003a0: 03 2a 81 02 lw s4, 40(sp) +800003a4: 83 29 c1 02 lw s3, 44(sp) +800003a8: 03 29 01 03 lw s2, 48(sp) +800003ac: 83 24 41 03 lw s1, 52(sp) +800003b0: 03 24 81 03 lw s0, 56(sp) +800003b4: 83 20 c1 03 lw ra, 60(sp) +800003b8: 13 01 01 04 addi sp, sp, 64 +800003bc: 67 80 00 00 ret -80000408 main: -80000408: 13 01 01 ff addi sp, sp, -16 -8000040c: 23 26 11 00 sw ra, 12(sp) -80000410: 37 05 00 80 lui a0, 524288 -80000414: 93 05 05 5a addi a1, a0, 1440 -80000418: 37 05 ff 7f lui a0, 524272 -8000041c: 13 06 45 03 addi a2, a0, 52 -80000420: 37 05 ff 7f lui a0, 524272 -80000424: ef f0 9f e1 jal -488 -80000428: 13 05 00 00 mv a0, zero -8000042c: 83 20 c1 00 lw ra, 12(sp) -80000430: 13 01 01 01 addi sp, sp, 16 -80000434: 67 80 00 00 ret +800003c0 main: +800003c0: 13 01 01 ff addi sp, sp, -16 +800003c4: 23 26 11 00 sw ra, 12(sp) +800003c8: 37 05 00 80 lui a0, 524288 +800003cc: 93 05 85 55 addi a1, a0, 1368 +800003d0: 37 05 ff 7f lui a0, 524272 +800003d4: 13 06 45 03 addi a2, a0, 52 +800003d8: 37 05 ff 7f lui a0, 524272 +800003dc: ef f0 1f e6 jal -416 +800003e0: 13 05 00 00 mv a0, zero +800003e4: 83 20 c1 00 lw ra, 12(sp) +800003e8: 13 01 01 01 addi sp, sp, 16 +800003ec: 67 80 00 00 ret -80000438 _Z8_cl_sqrtf: -80000438: 13 01 01 ff addi sp, sp, -16 -8000043c: 23 26 11 00 sw ra, 12(sp) -80000440: 23 24 81 00 sw s0, 8(sp) -80000444: 13 04 01 01 addi s0, sp, 16 -80000448: 03 24 81 00 lw s0, 8(sp) -8000044c: 83 20 c1 00 lw ra, 12(sp) -80000450: 13 01 01 01 addi sp, sp, 16 -80000454: 17 03 00 00 auipc t1, 0 -80000458: 67 00 c3 49 jr 1180(t1) +800003f0 _Z8_cl_sqrtf: +800003f0: 13 01 01 ff addi sp, sp, -16 +800003f4: 23 26 11 00 sw ra, 12(sp) +800003f8: 23 24 81 00 sw s0, 8(sp) +800003fc: 13 04 01 01 addi s0, sp, 16 +80000400: 03 24 81 00 lw s0, 8(sp) +80000404: 83 20 c1 00 lw ra, 12(sp) +80000408: 13 01 01 01 addi sp, sp, 16 +8000040c: 17 03 00 00 auipc t1, 0 +80000410: 67 00 c3 49 jr 1180(t1) -8000045c _pocl_kernel_NearestNeighbor: -8000045c: 13 01 01 fc addi sp, sp, -64 -80000460: 23 2e 11 02 sw ra, 60(sp) -80000464: 23 2c 81 02 sw s0, 56(sp) -80000468: 23 2a 91 02 sw s1, 52(sp) -8000046c: 23 28 21 03 sw s2, 48(sp) -80000470: 23 26 31 03 sw s3, 44(sp) -80000474: 23 24 41 03 sw s4, 40(sp) -80000478: 23 22 51 03 sw s5, 36(sp) -8000047c: 23 20 61 03 sw s6, 32(sp) -80000480: 23 2e 71 01 sw s7, 28(sp) -80000484: 23 2c 81 01 sw s8, 24(sp) -80000488: 23 2a 91 01 sw s9, 20(sp) -8000048c: 23 28 a1 01 sw s10, 16(sp) -80000490: 23 26 b1 01 sw s11, 12(sp) -80000494: 27 24 81 00 fsw fs0, 8(sp) -80000498: 27 22 91 00 fsw fs1, 4(sp) -8000049c: 13 04 01 04 addi s0, sp, 64 -800004a0: 13 71 c1 ff andi sp, sp, -4 -800004a4: 53 84 b5 20 fmv.s fs0, fa1 -800004a8: d3 04 a5 20 fmv.s fs1, fa0 -800004ac: 93 09 06 00 mv s3, a2 -800004b0: 13 09 00 00 mv s2, zero -800004b4: 03 ab 86 01 lw s6, 24(a3) -800004b8: 03 a6 c6 00 lw a2, 12(a3) -800004bc: 03 aa c6 01 lw s4, 28(a3) -800004c0: 83 a6 06 02 lw a3, 32(a3) -800004c4: 23 20 d1 00 sw a3, 0(sp) -800004c8: b3 06 eb 02 mul a3, s6, a4 -800004cc: 33 0d d6 00 add s10, a2, a3 -800004d0: 13 16 3d 00 slli a2, s10, 3 -800004d4: 33 05 c5 00 add a0, a0, a2 -800004d8: 13 16 2d 00 slli a2, s10, 2 -800004dc: b3 8b c5 00 add s7, a1, a2 -800004e0: 93 0a 45 00 addi s5, a0, 4 -800004e4: 6f 00 00 01 j 16 -800004e8: 13 09 19 00 addi s2, s2, 1 -800004ec: 03 25 01 00 lw a0, 0(sp) -800004f0: 63 74 a9 06 bgeu s2, a0, 104 -800004f4: 93 0c 00 00 mv s9, zero -800004f8: 6f 00 c0 00 j 12 -800004fc: 93 8c 1c 00 addi s9, s9, 1 -80000500: e3 f4 4c ff bgeu s9, s4, -24 -80000504: 93 04 00 00 mv s1, zero -80000508: 93 8d 0b 00 mv s11, s7 -8000050c: 13 8c 0a 00 mv s8, s5 -80000510: 6f 00 40 01 j 20 -80000514: 93 84 14 00 addi s1, s1, 1 -80000518: 13 0c 8c 00 addi s8, s8, 8 -8000051c: 93 8d 4d 00 addi s11, s11, 4 -80000520: e3 fe 64 fd bgeu s1, s6, -36 -80000524: 33 05 9d 00 add a0, s10, s1 -80000528: e3 56 35 ff bge a0, s3, -20 -8000052c: 07 20 cc ff flw ft0, -4(s8) -80000530: 87 20 0c 00 flw ft1, 0(s8) -80000534: 53 f0 04 08 fsub.s ft0, fs1, ft0 -80000538: d3 70 14 08 fsub.s ft1, fs0, ft1 -8000053c: d3 f0 10 10 fmul.s ft1, ft1, ft1 -80000540: 53 70 00 10 fmul.s ft0, ft0, ft0 -80000544: 53 75 10 00 fadd.s fa0, ft0, ft1 -80000548: 97 00 00 00 auipc ra, 0 -8000054c: e7 80 00 ef jalr -272(ra) -80000550: 27 a0 ad 00 fsw fa0, 0(s11) -80000554: 6f f0 1f fc j -64 -80000558: 13 01 04 fc addi sp, s0, -64 -8000055c: 87 24 41 00 flw fs1, 4(sp) -80000560: 07 24 81 00 flw fs0, 8(sp) -80000564: 83 2d c1 00 lw s11, 12(sp) -80000568: 03 2d 01 01 lw s10, 16(sp) -8000056c: 83 2c 41 01 lw s9, 20(sp) -80000570: 03 2c 81 01 lw s8, 24(sp) -80000574: 83 2b c1 01 lw s7, 28(sp) -80000578: 03 2b 01 02 lw s6, 32(sp) -8000057c: 83 2a 41 02 lw s5, 36(sp) -80000580: 03 2a 81 02 lw s4, 40(sp) -80000584: 83 29 c1 02 lw s3, 44(sp) -80000588: 03 29 01 03 lw s2, 48(sp) -8000058c: 83 24 41 03 lw s1, 52(sp) -80000590: 03 24 81 03 lw s0, 56(sp) -80000594: 83 20 c1 03 lw ra, 60(sp) -80000598: 13 01 01 04 addi sp, sp, 64 -8000059c: 67 80 00 00 ret +80000414 _pocl_kernel_NearestNeighbor: +80000414: 13 01 01 fc addi sp, sp, -64 +80000418: 23 2e 11 02 sw ra, 60(sp) +8000041c: 23 2c 81 02 sw s0, 56(sp) +80000420: 23 2a 91 02 sw s1, 52(sp) +80000424: 23 28 21 03 sw s2, 48(sp) +80000428: 23 26 31 03 sw s3, 44(sp) +8000042c: 23 24 41 03 sw s4, 40(sp) +80000430: 23 22 51 03 sw s5, 36(sp) +80000434: 23 20 61 03 sw s6, 32(sp) +80000438: 23 2e 71 01 sw s7, 28(sp) +8000043c: 23 2c 81 01 sw s8, 24(sp) +80000440: 23 2a 91 01 sw s9, 20(sp) +80000444: 23 28 a1 01 sw s10, 16(sp) +80000448: 23 26 b1 01 sw s11, 12(sp) +8000044c: 27 24 81 00 fsw fs0, 8(sp) +80000450: 27 22 91 00 fsw fs1, 4(sp) +80000454: 13 04 01 04 addi s0, sp, 64 +80000458: 13 71 c1 ff andi sp, sp, -4 +8000045c: 53 84 b5 20 fmv.s fs0, fa1 +80000460: d3 04 a5 20 fmv.s fs1, fa0 +80000464: 93 09 06 00 mv s3, a2 +80000468: 13 09 00 00 mv s2, zero +8000046c: 03 ab 86 01 lw s6, 24(a3) +80000470: 03 a6 c6 00 lw a2, 12(a3) +80000474: 03 aa c6 01 lw s4, 28(a3) +80000478: 83 a6 06 02 lw a3, 32(a3) +8000047c: 23 20 d1 00 sw a3, 0(sp) +80000480: b3 06 eb 02 mul a3, s6, a4 +80000484: 33 0d d6 00 add s10, a2, a3 +80000488: 13 16 3d 00 slli a2, s10, 3 +8000048c: 33 05 c5 00 add a0, a0, a2 +80000490: 13 16 2d 00 slli a2, s10, 2 +80000494: b3 8b c5 00 add s7, a1, a2 +80000498: 93 0a 45 00 addi s5, a0, 4 +8000049c: 6f 00 00 01 j 16 +800004a0: 13 09 19 00 addi s2, s2, 1 +800004a4: 03 25 01 00 lw a0, 0(sp) +800004a8: 63 74 a9 06 bgeu s2, a0, 104 +800004ac: 93 0c 00 00 mv s9, zero +800004b0: 6f 00 c0 00 j 12 +800004b4: 93 8c 1c 00 addi s9, s9, 1 +800004b8: e3 f4 4c ff bgeu s9, s4, -24 +800004bc: 93 04 00 00 mv s1, zero +800004c0: 93 8d 0b 00 mv s11, s7 +800004c4: 13 8c 0a 00 mv s8, s5 +800004c8: 6f 00 40 01 j 20 +800004cc: 93 84 14 00 addi s1, s1, 1 +800004d0: 13 0c 8c 00 addi s8, s8, 8 +800004d4: 93 8d 4d 00 addi s11, s11, 4 +800004d8: e3 fe 64 fd bgeu s1, s6, -36 +800004dc: 33 05 9d 00 add a0, s10, s1 +800004e0: e3 56 35 ff bge a0, s3, -20 +800004e4: 07 20 cc ff flw ft0, -4(s8) +800004e8: 87 20 0c 00 flw ft1, 0(s8) +800004ec: 53 f0 04 08 fsub.s ft0, fs1, ft0 +800004f0: d3 70 14 08 fsub.s ft1, fs0, ft1 +800004f4: d3 f0 10 10 fmul.s ft1, ft1, ft1 +800004f8: 53 70 00 10 fmul.s ft0, ft0, ft0 +800004fc: 53 75 10 00 fadd.s fa0, ft0, ft1 +80000500: 97 00 00 00 auipc ra, 0 +80000504: e7 80 00 ef jalr -272(ra) +80000508: 27 a0 ad 00 fsw fa0, 0(s11) +8000050c: 6f f0 1f fc j -64 +80000510: 13 01 04 fc addi sp, s0, -64 +80000514: 87 24 41 00 flw fs1, 4(sp) +80000518: 07 24 81 00 flw fs0, 8(sp) +8000051c: 83 2d c1 00 lw s11, 12(sp) +80000520: 03 2d 01 01 lw s10, 16(sp) +80000524: 83 2c 41 01 lw s9, 20(sp) +80000528: 03 2c 81 01 lw s8, 24(sp) +8000052c: 83 2b c1 01 lw s7, 28(sp) +80000530: 03 2b 01 02 lw s6, 32(sp) +80000534: 83 2a 41 02 lw s5, 36(sp) +80000538: 03 2a 81 02 lw s4, 40(sp) +8000053c: 83 29 c1 02 lw s3, 44(sp) +80000540: 03 29 01 03 lw s2, 48(sp) +80000544: 83 24 41 03 lw s1, 52(sp) +80000548: 03 24 81 03 lw s0, 56(sp) +8000054c: 83 20 c1 03 lw ra, 60(sp) +80000550: 13 01 01 04 addi sp, sp, 64 +80000554: 67 80 00 00 ret -800005a0 _pocl_kernel_NearestNeighbor_workgroup: -800005a0: 13 01 01 fc addi sp, sp, -64 -800005a4: 23 2e 11 02 sw ra, 60(sp) -800005a8: 23 2c 81 02 sw s0, 56(sp) -800005ac: 23 2a 91 02 sw s1, 52(sp) -800005b0: 23 28 21 03 sw s2, 48(sp) -800005b4: 23 26 31 03 sw s3, 44(sp) -800005b8: 23 24 41 03 sw s4, 40(sp) -800005bc: 23 22 51 03 sw s5, 36(sp) -800005c0: 23 20 61 03 sw s6, 32(sp) -800005c4: 23 2e 71 01 sw s7, 28(sp) -800005c8: 23 2c 81 01 sw s8, 24(sp) -800005cc: 23 2a 91 01 sw s9, 20(sp) -800005d0: 23 28 a1 01 sw s10, 16(sp) -800005d4: 23 26 b1 01 sw s11, 12(sp) -800005d8: 27 24 81 00 fsw fs0, 8(sp) -800005dc: 27 22 91 00 fsw fs1, 4(sp) -800005e0: 83 26 05 00 lw a3, 0(a0) -800005e4: 13 09 00 00 mv s2, zero -800005e8: 83 a6 06 00 lw a3, 0(a3) -800005ec: 03 27 45 00 lw a4, 4(a0) -800005f0: 83 27 85 00 lw a5, 8(a0) -800005f4: 83 24 c5 00 lw s1, 12(a0) -800005f8: 03 25 05 01 lw a0, 16(a0) -800005fc: 03 27 07 00 lw a4, 0(a4) -80000600: 83 aa 07 00 lw s5, 0(a5) -80000604: 07 a4 04 00 flw fs0, 0(s1) -80000608: 87 24 05 00 flw fs1, 0(a0) -8000060c: 83 ac 85 01 lw s9, 24(a1) -80000610: 03 a5 c5 00 lw a0, 12(a1) -80000614: 83 a9 c5 01 lw s3, 28(a1) -80000618: 03 aa 05 02 lw s4, 32(a1) -8000061c: b3 85 cc 02 mul a1, s9, a2 -80000620: 33 0d b5 00 add s10, a0, a1 -80000624: 13 15 3d 00 slli a0, s10, 3 -80000628: 33 85 a6 00 add a0, a3, a0 -8000062c: 93 15 2d 00 slli a1, s10, 2 -80000630: 33 0b b7 00 add s6, a4, a1 -80000634: 93 0b 45 00 addi s7, a0, 4 -80000638: 6f 00 c0 00 j 12 -8000063c: 13 09 19 00 addi s2, s2, 1 -80000640: 63 74 49 07 bgeu s2, s4, 104 -80000644: 13 0c 00 00 mv s8, zero -80000648: 6f 00 c0 00 j 12 -8000064c: 13 0c 1c 00 addi s8, s8, 1 -80000650: e3 76 3c ff bgeu s8, s3, -20 -80000654: 13 04 00 00 mv s0, zero -80000658: 93 0d 0b 00 mv s11, s6 -8000065c: 93 84 0b 00 mv s1, s7 -80000660: 6f 00 40 01 j 20 -80000664: 13 04 14 00 addi s0, s0, 1 -80000668: 93 84 84 00 addi s1, s1, 8 -8000066c: 93 8d 4d 00 addi s11, s11, 4 -80000670: e3 7e 94 fd bgeu s0, s9, -36 -80000674: 33 05 8d 00 add a0, s10, s0 -80000678: e3 56 55 ff bge a0, s5, -20 -8000067c: 07 a0 c4 ff flw ft0, -4(s1) -80000680: 87 a0 04 00 flw ft1, 0(s1) -80000684: 53 70 04 08 fsub.s ft0, fs0, ft0 -80000688: d3 f0 14 08 fsub.s ft1, fs1, ft1 -8000068c: d3 f0 10 10 fmul.s ft1, ft1, ft1 -80000690: 53 70 00 10 fmul.s ft0, ft0, ft0 -80000694: 53 75 10 00 fadd.s fa0, ft0, ft1 -80000698: 97 00 00 00 auipc ra, 0 -8000069c: e7 80 00 da jalr -608(ra) -800006a0: 27 a0 ad 00 fsw fa0, 0(s11) -800006a4: 6f f0 1f fc j -64 -800006a8: 87 24 41 00 flw fs1, 4(sp) -800006ac: 07 24 81 00 flw fs0, 8(sp) -800006b0: 83 2d c1 00 lw s11, 12(sp) -800006b4: 03 2d 01 01 lw s10, 16(sp) -800006b8: 83 2c 41 01 lw s9, 20(sp) -800006bc: 03 2c 81 01 lw s8, 24(sp) -800006c0: 83 2b c1 01 lw s7, 28(sp) -800006c4: 03 2b 01 02 lw s6, 32(sp) -800006c8: 83 2a 41 02 lw s5, 36(sp) -800006cc: 03 2a 81 02 lw s4, 40(sp) -800006d0: 83 29 c1 02 lw s3, 44(sp) -800006d4: 03 29 01 03 lw s2, 48(sp) -800006d8: 83 24 41 03 lw s1, 52(sp) -800006dc: 03 24 81 03 lw s0, 56(sp) -800006e0: 83 20 c1 03 lw ra, 60(sp) -800006e4: 13 01 01 04 addi sp, sp, 64 -800006e8: 67 80 00 00 ret +80000558 _pocl_kernel_NearestNeighbor_workgroup: +80000558: 13 01 01 fc addi sp, sp, -64 +8000055c: 23 2e 11 02 sw ra, 60(sp) +80000560: 23 2c 81 02 sw s0, 56(sp) +80000564: 23 2a 91 02 sw s1, 52(sp) +80000568: 23 28 21 03 sw s2, 48(sp) +8000056c: 23 26 31 03 sw s3, 44(sp) +80000570: 23 24 41 03 sw s4, 40(sp) +80000574: 23 22 51 03 sw s5, 36(sp) +80000578: 23 20 61 03 sw s6, 32(sp) +8000057c: 23 2e 71 01 sw s7, 28(sp) +80000580: 23 2c 81 01 sw s8, 24(sp) +80000584: 23 2a 91 01 sw s9, 20(sp) +80000588: 23 28 a1 01 sw s10, 16(sp) +8000058c: 23 26 b1 01 sw s11, 12(sp) +80000590: 27 24 81 00 fsw fs0, 8(sp) +80000594: 27 22 91 00 fsw fs1, 4(sp) +80000598: 83 26 05 00 lw a3, 0(a0) +8000059c: 13 09 00 00 mv s2, zero +800005a0: 83 a6 06 00 lw a3, 0(a3) +800005a4: 03 27 45 00 lw a4, 4(a0) +800005a8: 83 27 85 00 lw a5, 8(a0) +800005ac: 83 24 c5 00 lw s1, 12(a0) +800005b0: 03 25 05 01 lw a0, 16(a0) +800005b4: 03 27 07 00 lw a4, 0(a4) +800005b8: 83 aa 07 00 lw s5, 0(a5) +800005bc: 07 a4 04 00 flw fs0, 0(s1) +800005c0: 87 24 05 00 flw fs1, 0(a0) +800005c4: 83 ac 85 01 lw s9, 24(a1) +800005c8: 03 a5 c5 00 lw a0, 12(a1) +800005cc: 83 a9 c5 01 lw s3, 28(a1) +800005d0: 03 aa 05 02 lw s4, 32(a1) +800005d4: b3 85 cc 02 mul a1, s9, a2 +800005d8: 33 0d b5 00 add s10, a0, a1 +800005dc: 13 15 3d 00 slli a0, s10, 3 +800005e0: 33 85 a6 00 add a0, a3, a0 +800005e4: 93 15 2d 00 slli a1, s10, 2 +800005e8: 33 0b b7 00 add s6, a4, a1 +800005ec: 93 0b 45 00 addi s7, a0, 4 +800005f0: 6f 00 c0 00 j 12 +800005f4: 13 09 19 00 addi s2, s2, 1 +800005f8: 63 74 49 07 bgeu s2, s4, 104 +800005fc: 13 0c 00 00 mv s8, zero +80000600: 6f 00 c0 00 j 12 +80000604: 13 0c 1c 00 addi s8, s8, 1 +80000608: e3 76 3c ff bgeu s8, s3, -20 +8000060c: 13 04 00 00 mv s0, zero +80000610: 93 0d 0b 00 mv s11, s6 +80000614: 93 84 0b 00 mv s1, s7 +80000618: 6f 00 40 01 j 20 +8000061c: 13 04 14 00 addi s0, s0, 1 +80000620: 93 84 84 00 addi s1, s1, 8 +80000624: 93 8d 4d 00 addi s11, s11, 4 +80000628: e3 7e 94 fd bgeu s0, s9, -36 +8000062c: 33 05 8d 00 add a0, s10, s0 +80000630: e3 56 55 ff bge a0, s5, -20 +80000634: 07 a0 c4 ff flw ft0, -4(s1) +80000638: 87 a0 04 00 flw ft1, 0(s1) +8000063c: 53 70 04 08 fsub.s ft0, fs0, ft0 +80000640: d3 f0 14 08 fsub.s ft1, fs1, ft1 +80000644: d3 f0 10 10 fmul.s ft1, ft1, ft1 +80000648: 53 70 00 10 fmul.s ft0, ft0, ft0 +8000064c: 53 75 10 00 fadd.s fa0, ft0, ft1 +80000650: 97 00 00 00 auipc ra, 0 +80000654: e7 80 00 da jalr -608(ra) +80000658: 27 a0 ad 00 fsw fa0, 0(s11) +8000065c: 6f f0 1f fc j -64 +80000660: 87 24 41 00 flw fs1, 4(sp) +80000664: 07 24 81 00 flw fs0, 8(sp) +80000668: 83 2d c1 00 lw s11, 12(sp) +8000066c: 03 2d 01 01 lw s10, 16(sp) +80000670: 83 2c 41 01 lw s9, 20(sp) +80000674: 03 2c 81 01 lw s8, 24(sp) +80000678: 83 2b c1 01 lw s7, 28(sp) +8000067c: 03 2b 01 02 lw s6, 32(sp) +80000680: 83 2a 41 02 lw s5, 36(sp) +80000684: 03 2a 81 02 lw s4, 40(sp) +80000688: 83 29 c1 02 lw s3, 44(sp) +8000068c: 03 29 01 03 lw s2, 48(sp) +80000690: 83 24 41 03 lw s1, 52(sp) +80000694: 03 24 81 03 lw s0, 56(sp) +80000698: 83 20 c1 03 lw ra, 60(sp) +8000069c: 13 01 01 04 addi sp, sp, 64 +800006a0: 67 80 00 00 ret -800006ec _pocl_kernel_NearestNeighbor_workgroup_fast: -800006ec: 13 01 01 fc addi sp, sp, -64 -800006f0: 23 2e 11 02 sw ra, 60(sp) -800006f4: 23 2c 81 02 sw s0, 56(sp) -800006f8: 23 2a 91 02 sw s1, 52(sp) -800006fc: 23 28 21 03 sw s2, 48(sp) -80000700: 23 26 31 03 sw s3, 44(sp) -80000704: 23 24 41 03 sw s4, 40(sp) -80000708: 23 22 51 03 sw s5, 36(sp) -8000070c: 23 20 61 03 sw s6, 32(sp) -80000710: 23 2e 71 01 sw s7, 28(sp) -80000714: 23 2c 81 01 sw s8, 24(sp) -80000718: 23 2a 91 01 sw s9, 20(sp) -8000071c: 23 28 a1 01 sw s10, 16(sp) -80000720: 23 26 b1 01 sw s11, 12(sp) -80000724: 27 24 81 00 fsw fs0, 8(sp) -80000728: 27 22 91 00 fsw fs1, 4(sp) -8000072c: 13 09 00 00 mv s2, zero -80000730: 83 26 05 00 lw a3, 0(a0) -80000734: 03 27 85 00 lw a4, 8(a0) -80000738: 83 27 c5 00 lw a5, 12(a0) -8000073c: 83 24 05 01 lw s1, 16(a0) -80000740: 03 25 45 00 lw a0, 4(a0) -80000744: 83 2a 07 00 lw s5, 0(a4) -80000748: 07 a4 07 00 flw fs0, 0(a5) -8000074c: 87 a4 04 00 flw fs1, 0(s1) -80000750: 83 ac 85 01 lw s9, 24(a1) -80000754: 03 a7 c5 00 lw a4, 12(a1) -80000758: 83 a9 c5 01 lw s3, 28(a1) -8000075c: 03 aa 05 02 lw s4, 32(a1) -80000760: b3 85 cc 02 mul a1, s9, a2 -80000764: 33 0d b7 00 add s10, a4, a1 -80000768: 93 15 3d 00 slli a1, s10, 3 -8000076c: b3 85 b6 00 add a1, a3, a1 -80000770: 13 16 2d 00 slli a2, s10, 2 -80000774: 33 0b c5 00 add s6, a0, a2 -80000778: 93 8b 45 00 addi s7, a1, 4 -8000077c: 6f 00 c0 00 j 12 -80000780: 13 09 19 00 addi s2, s2, 1 -80000784: 63 74 49 07 bgeu s2, s4, 104 -80000788: 13 0c 00 00 mv s8, zero -8000078c: 6f 00 c0 00 j 12 -80000790: 13 0c 1c 00 addi s8, s8, 1 -80000794: e3 76 3c ff bgeu s8, s3, -20 -80000798: 13 04 00 00 mv s0, zero -8000079c: 93 0d 0b 00 mv s11, s6 -800007a0: 93 84 0b 00 mv s1, s7 -800007a4: 6f 00 40 01 j 20 -800007a8: 13 04 14 00 addi s0, s0, 1 -800007ac: 93 84 84 00 addi s1, s1, 8 -800007b0: 93 8d 4d 00 addi s11, s11, 4 -800007b4: e3 7e 94 fd bgeu s0, s9, -36 -800007b8: 33 05 8d 00 add a0, s10, s0 -800007bc: e3 56 55 ff bge a0, s5, -20 -800007c0: 07 a0 c4 ff flw ft0, -4(s1) -800007c4: 87 a0 04 00 flw ft1, 0(s1) -800007c8: 53 70 04 08 fsub.s ft0, fs0, ft0 -800007cc: d3 f0 14 08 fsub.s ft1, fs1, ft1 -800007d0: d3 f0 10 10 fmul.s ft1, ft1, ft1 -800007d4: 53 70 00 10 fmul.s ft0, ft0, ft0 -800007d8: 53 75 10 00 fadd.s fa0, ft0, ft1 -800007dc: 97 00 00 00 auipc ra, 0 -800007e0: e7 80 c0 c5 jalr -932(ra) -800007e4: 27 a0 ad 00 fsw fa0, 0(s11) -800007e8: 6f f0 1f fc j -64 -800007ec: 87 24 41 00 flw fs1, 4(sp) -800007f0: 07 24 81 00 flw fs0, 8(sp) -800007f4: 83 2d c1 00 lw s11, 12(sp) -800007f8: 03 2d 01 01 lw s10, 16(sp) -800007fc: 83 2c 41 01 lw s9, 20(sp) -80000800: 03 2c 81 01 lw s8, 24(sp) -80000804: 83 2b c1 01 lw s7, 28(sp) -80000808: 03 2b 01 02 lw s6, 32(sp) -8000080c: 83 2a 41 02 lw s5, 36(sp) -80000810: 03 2a 81 02 lw s4, 40(sp) -80000814: 83 29 c1 02 lw s3, 44(sp) -80000818: 03 29 01 03 lw s2, 48(sp) -8000081c: 83 24 41 03 lw s1, 52(sp) -80000820: 03 24 81 03 lw s0, 56(sp) -80000824: 83 20 c1 03 lw ra, 60(sp) -80000828: 13 01 01 04 addi sp, sp, 64 +800006a4 _pocl_kernel_NearestNeighbor_workgroup_fast: +800006a4: 13 01 01 fc addi sp, sp, -64 +800006a8: 23 2e 11 02 sw ra, 60(sp) +800006ac: 23 2c 81 02 sw s0, 56(sp) +800006b0: 23 2a 91 02 sw s1, 52(sp) +800006b4: 23 28 21 03 sw s2, 48(sp) +800006b8: 23 26 31 03 sw s3, 44(sp) +800006bc: 23 24 41 03 sw s4, 40(sp) +800006c0: 23 22 51 03 sw s5, 36(sp) +800006c4: 23 20 61 03 sw s6, 32(sp) +800006c8: 23 2e 71 01 sw s7, 28(sp) +800006cc: 23 2c 81 01 sw s8, 24(sp) +800006d0: 23 2a 91 01 sw s9, 20(sp) +800006d4: 23 28 a1 01 sw s10, 16(sp) +800006d8: 23 26 b1 01 sw s11, 12(sp) +800006dc: 27 24 81 00 fsw fs0, 8(sp) +800006e0: 27 22 91 00 fsw fs1, 4(sp) +800006e4: 13 09 00 00 mv s2, zero +800006e8: 83 26 05 00 lw a3, 0(a0) +800006ec: 03 27 85 00 lw a4, 8(a0) +800006f0: 83 27 c5 00 lw a5, 12(a0) +800006f4: 83 24 05 01 lw s1, 16(a0) +800006f8: 03 25 45 00 lw a0, 4(a0) +800006fc: 83 2a 07 00 lw s5, 0(a4) +80000700: 07 a4 07 00 flw fs0, 0(a5) +80000704: 87 a4 04 00 flw fs1, 0(s1) +80000708: 83 ac 85 01 lw s9, 24(a1) +8000070c: 03 a7 c5 00 lw a4, 12(a1) +80000710: 83 a9 c5 01 lw s3, 28(a1) +80000714: 03 aa 05 02 lw s4, 32(a1) +80000718: b3 85 cc 02 mul a1, s9, a2 +8000071c: 33 0d b7 00 add s10, a4, a1 +80000720: 93 15 3d 00 slli a1, s10, 3 +80000724: b3 85 b6 00 add a1, a3, a1 +80000728: 13 16 2d 00 slli a2, s10, 2 +8000072c: 33 0b c5 00 add s6, a0, a2 +80000730: 93 8b 45 00 addi s7, a1, 4 +80000734: 6f 00 c0 00 j 12 +80000738: 13 09 19 00 addi s2, s2, 1 +8000073c: 63 74 49 07 bgeu s2, s4, 104 +80000740: 13 0c 00 00 mv s8, zero +80000744: 6f 00 c0 00 j 12 +80000748: 13 0c 1c 00 addi s8, s8, 1 +8000074c: e3 76 3c ff bgeu s8, s3, -20 +80000750: 13 04 00 00 mv s0, zero +80000754: 93 0d 0b 00 mv s11, s6 +80000758: 93 84 0b 00 mv s1, s7 +8000075c: 6f 00 40 01 j 20 +80000760: 13 04 14 00 addi s0, s0, 1 +80000764: 93 84 84 00 addi s1, s1, 8 +80000768: 93 8d 4d 00 addi s11, s11, 4 +8000076c: e3 7e 94 fd bgeu s0, s9, -36 +80000770: 33 05 8d 00 add a0, s10, s0 +80000774: e3 56 55 ff bge a0, s5, -20 +80000778: 07 a0 c4 ff flw ft0, -4(s1) +8000077c: 87 a0 04 00 flw ft1, 0(s1) +80000780: 53 70 04 08 fsub.s ft0, fs0, ft0 +80000784: d3 f0 14 08 fsub.s ft1, fs1, ft1 +80000788: d3 f0 10 10 fmul.s ft1, ft1, ft1 +8000078c: 53 70 00 10 fmul.s ft0, ft0, ft0 +80000790: 53 75 10 00 fadd.s fa0, ft0, ft1 +80000794: 97 00 00 00 auipc ra, 0 +80000798: e7 80 c0 c5 jalr -932(ra) +8000079c: 27 a0 ad 00 fsw fa0, 0(s11) +800007a0: 6f f0 1f fc j -64 +800007a4: 87 24 41 00 flw fs1, 4(sp) +800007a8: 07 24 81 00 flw fs0, 8(sp) +800007ac: 83 2d c1 00 lw s11, 12(sp) +800007b0: 03 2d 01 01 lw s10, 16(sp) +800007b4: 83 2c 41 01 lw s9, 20(sp) +800007b8: 03 2c 81 01 lw s8, 24(sp) +800007bc: 83 2b c1 01 lw s7, 28(sp) +800007c0: 03 2b 01 02 lw s6, 32(sp) +800007c4: 83 2a 41 02 lw s5, 36(sp) +800007c8: 03 2a 81 02 lw s4, 40(sp) +800007cc: 83 29 c1 02 lw s3, 44(sp) +800007d0: 03 29 01 03 lw s2, 48(sp) +800007d4: 83 24 41 03 lw s1, 52(sp) +800007d8: 03 24 81 03 lw s0, 56(sp) +800007dc: 83 20 c1 03 lw ra, 60(sp) +800007e0: 13 01 01 04 addi sp, sp, 64 +800007e4: 67 80 00 00 ret + +800007e8 _exit: +800007e8: 13 05 00 00 mv a0, zero +800007ec: 6b 00 05 00 + +800007f0 vx_set_sp: +800007f0: 73 25 50 02 csrr a0, 37 +800007f4: 6b 00 05 00 +800007f8: 97 11 00 00 auipc gp, 1 +800007fc: 93 81 01 01 addi gp, gp, 16 +80000800: f3 25 20 02 csrr a1, 34 +80000804: 93 95 a5 00 slli a1, a1, 10 +80000808: 73 26 00 02 csrr a2, 32 +8000080c: 13 16 26 00 slli a2, a2, 2 +80000810: 37 f1 ff 6f lui sp, 458751 +80000814: 33 01 b1 40 sub sp, sp, a1 +80000818: 33 01 c1 00 add sp, sp, a2 +8000081c: f3 26 10 02 csrr a3, 33 +80000820: 63 86 06 00 beqz a3, 12 +80000824: 13 05 00 00 mv a0, zero +80000828: 6b 00 05 00 + +8000082c RETURN: 8000082c: 67 80 00 00 ret -80000830 _exit: -80000830: 13 05 00 00 mv a0, zero -80000834: 6b 00 05 00 +80000830 vx_wspawn: +80000830: 6b 10 b5 00 +80000834: 67 80 00 00 ret -80000838 vx_set_sp: -80000838: 73 25 50 02 csrr a0, 37 -8000083c: 6b 00 05 00 -80000840: 97 11 00 00 auipc gp, 1 -80000844: 93 81 81 fc addi gp, gp, -56 -80000848: f3 25 20 02 csrr a1, 34 -8000084c: 93 95 a5 00 slli a1, a1, 10 -80000850: 73 26 00 02 csrr a2, 32 -80000854: 13 16 26 00 slli a2, a2, 2 -80000858: 37 f1 ff 6f lui sp, 458751 -8000085c: 33 01 b1 40 sub sp, sp, a1 -80000860: 33 01 c1 00 add sp, sp, a2 -80000864: f3 26 10 02 csrr a3, 33 -80000868: 63 86 06 00 beqz a3, 12 -8000086c: 13 05 00 00 mv a0, zero -80000870: 6b 00 05 00 +80000838 vx_tmc: +80000838: 6b 00 05 00 +8000083c: 67 80 00 00 ret -80000874 RETURN: +80000840 vx_barrier: +80000840: 6b 40 b5 00 +80000844: 67 80 00 00 ret + +80000848 vx_split: +80000848: 6b 20 05 00 +8000084c: 67 80 00 00 ret + +80000850 vx_join: +80000850: 6b 30 00 00 +80000854: 67 80 00 00 ret + +80000858 vx_warp_id: +80000858: 73 25 10 02 csrr a0, 33 +8000085c: 67 80 00 00 ret + +80000860 vx_warp_gid: +80000860: 73 25 30 02 csrr a0, 35 +80000864: 67 80 00 00 ret + +80000868 vx_thread_id: +80000868: 73 25 00 02 csrr a0, 32 +8000086c: 67 80 00 00 ret + +80000870 vx_thread_gid: +80000870: 73 25 20 02 csrr a0, 34 80000874: 67 80 00 00 ret -80000878 vx_wspawn: -80000878: 6b 10 b5 00 +80000878 vx_core_id: +80000878: 73 25 40 02 csrr a0, 36 8000087c: 67 80 00 00 ret -80000880 vx_tmc: -80000880: 6b 00 05 00 +80000880 vx_num_threads: +80000880: 73 25 50 02 csrr a0, 37 80000884: 67 80 00 00 ret -80000888 vx_barrier: -80000888: 6b 40 b5 00 +80000888 vx_num_warps: +80000888: 73 25 60 02 csrr a0, 38 8000088c: 67 80 00 00 ret -80000890 vx_split: -80000890: 6b 20 05 00 +80000890 vx_num_cores: +80000890: 73 25 70 02 csrr a0, 39 80000894: 67 80 00 00 ret -80000898 vx_join: -80000898: 6b 30 00 00 +80000898 vx_num_cycles: +80000898: 73 25 00 b0 csrr a0, mcycle 8000089c: 67 80 00 00 ret -800008a0 vx_warp_id: -800008a0: 73 25 10 02 csrr a0, 33 +800008a0 vx_num_instrs: +800008a0: 73 25 20 b0 csrr a0, minstret 800008a4: 67 80 00 00 ret -800008a8 vx_warp_gid: -800008a8: 73 25 30 02 csrr a0, 35 -800008ac: 67 80 00 00 ret +800008a8 sqrtf: +800008a8: 13 01 01 fe addi sp, sp, -32 +800008ac: 27 26 81 00 fsw fs0, 12(sp) +800008b0: 23 2e 11 00 sw ra, 28(sp) +800008b4: 53 04 a5 20 fmv.s fs0, fa0 +800008b8: 27 24 91 00 fsw fs1, 8(sp) +800008bc: ef 00 c0 05 jal 92 +800008c0: 03 a7 01 c3 lw a4, -976(gp) +800008c4: 93 07 f0 ff addi a5, zero, -1 +800008c8: 63 0c f7 00 beq a4, a5, 24 +800008cc: d3 27 84 a0 feq.s a5, fs0, fs0 +800008d0: 63 88 07 00 beqz a5, 16 +800008d4: d3 04 00 f0 fmv.w.x fs1, zero +800008d8: d3 17 94 a0 flt.s a5, fs0, fs1 +800008dc: 63 9c 07 00 bnez a5, 24 +800008e0: 83 20 c1 01 lw ra, 28(sp) +800008e4: 07 24 c1 00 flw fs0, 12(sp) +800008e8: 87 24 81 00 flw fs1, 8(sp) +800008ec: 13 01 01 02 addi sp, sp, 32 +800008f0: 67 80 00 00 ret +800008f4: ef 00 40 14 jal 324 +800008f8: 83 20 c1 01 lw ra, 28(sp) +800008fc: 93 07 10 02 addi a5, zero, 33 +80000900: 23 20 f5 00 sw a5, 0(a0) +80000904: 53 f5 94 18 fdiv.s fa0, fs1, fs1 +80000908: 07 24 c1 00 flw fs0, 12(sp) +8000090c: 87 24 81 00 flw fs1, 8(sp) +80000910: 13 01 01 02 addi sp, sp, 32 +80000914: 67 80 00 00 ret -800008b0 vx_thread_id: -800008b0: 73 25 00 02 csrr a0, 32 -800008b4: 67 80 00 00 ret +80000918 __ieee754_sqrtf: +80000918: d3 06 05 e0 fmv.x.w a3, fa0 +8000091c: 37 07 80 7f lui a4, 522240 +80000920: 93 97 16 00 slli a5, a3, 1 +80000924: 93 d7 17 00 srli a5, a5, 1 +80000928: 63 f2 e7 0c bgeu a5, a4, 196 +8000092c: 53 05 05 e0 fmv.x.w a0, fa0 +80000930: 63 8a 07 0a beqz a5, 180 +80000934: 93 87 06 00 mv a5, a3 +80000938: 63 c6 06 0c bltz a3, 204 +8000093c: 33 76 d7 00 and a2, a4, a3 +80000940: 13 d7 76 41 srai a4, a3, 23 +80000944: 63 14 06 02 bnez a2, 40 +80000948: 37 06 80 00 lui a2, 2048 +8000094c: b3 76 d6 00 and a3, a2, a3 +80000950: 63 94 06 0c bnez a3, 200 +80000954: 93 97 17 00 slli a5, a5, 1 +80000958: 93 95 87 00 slli a1, a5, 8 +8000095c: 13 86 06 00 mv a2, a3 +80000960: 93 86 16 00 addi a3, a3, 1 +80000964: e3 d8 05 fe bgez a1, -16 +80000968: 33 07 c7 40 sub a4, a4, a2 +8000096c: b7 06 80 00 lui a3, 2048 +80000970: 13 86 f6 ff addi a2, a3, -1 +80000974: b3 f7 c7 00 and a5, a5, a2 +80000978: 13 07 17 f8 addi a4, a4, -127 +8000097c: b3 e6 d7 00 or a3, a5, a3 +80000980: 13 76 17 00 andi a2, a4, 1 +80000984: 93 97 16 00 slli a5, a3, 1 +80000988: 63 1a 06 06 bnez a2, 116 +8000098c: 13 58 17 40 srai a6, a4, 1 +80000990: 93 06 90 01 addi a3, zero, 25 +80000994: 13 05 00 00 mv a0, zero +80000998: 93 05 00 00 mv a1, zero +8000099c: 37 07 00 01 lui a4, 4096 +800009a0: 33 86 e5 00 add a2, a1, a4 +800009a4: 93 86 f6 ff addi a3, a3, -1 +800009a8: 63 c8 c7 00 blt a5, a2, 16 +800009ac: b3 05 e6 00 add a1, a2, a4 +800009b0: b3 87 c7 40 sub a5, a5, a2 +800009b4: 33 05 e5 00 add a0, a0, a4 +800009b8: 93 97 17 00 slli a5, a5, 1 +800009bc: 13 57 17 00 srli a4, a4, 1 +800009c0: e3 90 06 fe bnez a3, -32 +800009c4: 63 86 07 00 beqz a5, 12 +800009c8: 13 05 15 00 addi a0, a0, 1 +800009cc: 13 75 e5 ff andi a0, a0, -2 +800009d0: 13 55 15 40 srai a0, a0, 1 +800009d4: b7 07 00 3f lui a5, 258048 +800009d8: 33 05 f5 00 add a0, a0, a5 +800009dc: 13 17 78 01 slli a4, a6, 23 +800009e0: 33 05 a7 00 add a0, a4, a0 +800009e4: 53 05 05 f0 fmv.w.x fa0, a0 +800009e8: 67 80 00 00 ret +800009ec: c3 77 a5 50 fmadd.s fa5, fa0, fa0, fa0 +800009f0: 53 85 07 e0 fmv.x.w a0, fa5 +800009f4: 53 05 05 f0 fmv.w.x fa0, a0 +800009f8: 67 80 00 00 ret +800009fc: 93 97 26 00 slli a5, a3, 2 +80000a00: 6f f0 df f8 j -116 +80000a04: d3 77 a5 08 fsub.s fa5, fa0, fa0 +80000a08: d3 f7 f7 18 fdiv.s fa5, fa5, fa5 +80000a0c: 53 85 07 e0 fmv.x.w a0, fa5 +80000a10: 53 05 05 f0 fmv.w.x fa0, a0 +80000a14: 67 80 00 00 ret +80000a18: 13 06 f0 ff addi a2, zero, -1 +80000a1c: 33 07 c7 40 sub a4, a4, a2 +80000a20: 6f f0 df f4 j -180 -800008b8 vx_thread_gid: -800008b8: 73 25 20 02 csrr a0, 34 -800008bc: 67 80 00 00 ret +80000a24 atexit: +80000a24: 93 05 05 00 mv a1, a0 +80000a28: 93 06 00 00 mv a3, zero +80000a2c: 13 06 00 00 mv a2, zero +80000a30: 13 05 00 00 mv a0, zero +80000a34: 6f 00 00 21 j 528 -800008c0 vx_core_id: -800008c0: 73 25 40 02 csrr a0, 36 -800008c4: 67 80 00 00 ret +80000a38 __errno: +80000a38: 03 a5 c1 c2 lw a0, -980(gp) +80000a3c: 67 80 00 00 ret -800008c8 vx_num_threads: -800008c8: 73 25 50 02 csrr a0, 37 -800008cc: 67 80 00 00 ret +80000a40 exit: +80000a40: 13 01 01 ff addi sp, sp, -16 +80000a44: 93 05 00 00 mv a1, zero +80000a48: 23 24 81 00 sw s0, 8(sp) +80000a4c: 23 26 11 00 sw ra, 12(sp) +80000a50: 13 04 05 00 mv s0, a0 +80000a54: ef 00 80 28 jal 648 +80000a58: 03 a5 81 c2 lw a0, -984(gp) +80000a5c: 83 27 c5 03 lw a5, 60(a0) +80000a60: 63 84 07 00 beqz a5, 8 +80000a64: e7 80 07 00 jalr a5 +80000a68: 13 05 04 00 mv a0, s0 +80000a6c: ef f0 df d7 jal -644 -800008d0 vx_num_warps: -800008d0: 73 25 60 02 csrr a0, 38 -800008d4: 67 80 00 00 ret +80000a70 __libc_fini_array: +80000a70: 13 01 01 ff addi sp, sp, -16 +80000a74: 23 24 81 00 sw s0, 8(sp) +80000a78: b7 17 00 80 lui a5, 524289 +80000a7c: 37 14 00 80 lui s0, 524289 +80000a80: 13 04 44 00 addi s0, s0, 4 +80000a84: 93 87 47 00 addi a5, a5, 4 +80000a88: b3 87 87 40 sub a5, a5, s0 +80000a8c: 23 22 91 00 sw s1, 4(sp) +80000a90: 23 26 11 00 sw ra, 12(sp) +80000a94: 93 d4 27 40 srai s1, a5, 2 +80000a98: 63 80 04 02 beqz s1, 32 +80000a9c: 93 87 c7 ff addi a5, a5, -4 +80000aa0: 33 84 87 00 add s0, a5, s0 +80000aa4: 83 27 04 00 lw a5, 0(s0) +80000aa8: 93 84 f4 ff addi s1, s1, -1 +80000aac: 13 04 c4 ff addi s0, s0, -4 +80000ab0: e7 80 07 00 jalr a5 +80000ab4: e3 98 04 fe bnez s1, -16 +80000ab8: 83 20 c1 00 lw ra, 12(sp) +80000abc: 03 24 81 00 lw s0, 8(sp) +80000ac0: 83 24 41 00 lw s1, 4(sp) +80000ac4: 13 01 01 01 addi sp, sp, 16 +80000ac8: 67 80 00 00 ret -800008d8 vx_num_cores: -800008d8: 73 25 70 02 csrr a0, 39 -800008dc: 67 80 00 00 ret +80000acc __libc_init_array: +80000acc: 13 01 01 ff addi sp, sp, -16 +80000ad0: 23 24 81 00 sw s0, 8(sp) +80000ad4: 23 20 21 01 sw s2, 0(sp) +80000ad8: 37 14 00 80 lui s0, 524289 +80000adc: 37 19 00 80 lui s2, 524289 +80000ae0: 93 07 04 00 mv a5, s0 +80000ae4: 13 09 09 00 mv s2, s2 +80000ae8: 33 09 f9 40 sub s2, s2, a5 +80000aec: 23 26 11 00 sw ra, 12(sp) +80000af0: 23 22 91 00 sw s1, 4(sp) +80000af4: 13 59 29 40 srai s2, s2, 2 +80000af8: 63 00 09 02 beqz s2, 32 +80000afc: 13 04 04 00 mv s0, s0 +80000b00: 93 04 00 00 mv s1, zero +80000b04: 83 27 04 00 lw a5, 0(s0) +80000b08: 93 84 14 00 addi s1, s1, 1 +80000b0c: 13 04 44 00 addi s0, s0, 4 +80000b10: e7 80 07 00 jalr a5 +80000b14: e3 18 99 fe bne s2, s1, -16 +80000b18: 37 14 00 80 lui s0, 524289 +80000b1c: 37 19 00 80 lui s2, 524289 +80000b20: 93 07 04 00 mv a5, s0 +80000b24: 13 09 49 00 addi s2, s2, 4 +80000b28: 33 09 f9 40 sub s2, s2, a5 +80000b2c: 13 59 29 40 srai s2, s2, 2 +80000b30: 63 00 09 02 beqz s2, 32 +80000b34: 13 04 04 00 mv s0, s0 +80000b38: 93 04 00 00 mv s1, zero +80000b3c: 83 27 04 00 lw a5, 0(s0) +80000b40: 93 84 14 00 addi s1, s1, 1 +80000b44: 13 04 44 00 addi s0, s0, 4 +80000b48: e7 80 07 00 jalr a5 +80000b4c: e3 18 99 fe bne s2, s1, -16 +80000b50: 83 20 c1 00 lw ra, 12(sp) +80000b54: 03 24 81 00 lw s0, 8(sp) +80000b58: 83 24 41 00 lw s1, 4(sp) +80000b5c: 03 29 01 00 lw s2, 0(sp) +80000b60: 13 01 01 01 addi sp, sp, 16 +80000b64: 67 80 00 00 ret -800008e0 vx_num_cycles: -800008e0: 73 25 00 c0 rdcycle a0 -800008e4: 67 80 00 00 ret +80000b68 memset: +80000b68: 13 03 f0 00 addi t1, zero, 15 +80000b6c: 13 07 05 00 mv a4, a0 +80000b70: 63 7e c3 02 bgeu t1, a2, 60 +80000b74: 93 77 f7 00 andi a5, a4, 15 +80000b78: 63 90 07 0a bnez a5, 160 +80000b7c: 63 92 05 08 bnez a1, 132 +80000b80: 93 76 06 ff andi a3, a2, -16 +80000b84: 13 76 f6 00 andi a2, a2, 15 +80000b88: b3 86 e6 00 add a3, a3, a4 +80000b8c: 23 20 b7 00 sw a1, 0(a4) +80000b90: 23 22 b7 00 sw a1, 4(a4) +80000b94: 23 24 b7 00 sw a1, 8(a4) +80000b98: 23 26 b7 00 sw a1, 12(a4) +80000b9c: 13 07 07 01 addi a4, a4, 16 +80000ba0: e3 66 d7 fe bltu a4, a3, -20 +80000ba4: 63 14 06 00 bnez a2, 8 +80000ba8: 67 80 00 00 ret +80000bac: b3 06 c3 40 sub a3, t1, a2 +80000bb0: 93 96 26 00 slli a3, a3, 2 +80000bb4: 97 02 00 00 auipc t0, 0 +80000bb8: b3 86 56 00 add a3, a3, t0 +80000bbc: 67 80 c6 00 jr 12(a3) +80000bc0: 23 07 b7 00 sb a1, 14(a4) +80000bc4: a3 06 b7 00 sb a1, 13(a4) +80000bc8: 23 06 b7 00 sb a1, 12(a4) +80000bcc: a3 05 b7 00 sb a1, 11(a4) +80000bd0: 23 05 b7 00 sb a1, 10(a4) +80000bd4: a3 04 b7 00 sb a1, 9(a4) +80000bd8: 23 04 b7 00 sb a1, 8(a4) +80000bdc: a3 03 b7 00 sb a1, 7(a4) +80000be0: 23 03 b7 00 sb a1, 6(a4) +80000be4: a3 02 b7 00 sb a1, 5(a4) +80000be8: 23 02 b7 00 sb a1, 4(a4) +80000bec: a3 01 b7 00 sb a1, 3(a4) +80000bf0: 23 01 b7 00 sb a1, 2(a4) +80000bf4: a3 00 b7 00 sb a1, 1(a4) +80000bf8: 23 00 b7 00 sb a1, 0(a4) +80000bfc: 67 80 00 00 ret +80000c00: 93 f5 f5 0f andi a1, a1, 255 +80000c04: 93 96 85 00 slli a3, a1, 8 +80000c08: b3 e5 d5 00 or a1, a1, a3 +80000c0c: 93 96 05 01 slli a3, a1, 16 +80000c10: b3 e5 d5 00 or a1, a1, a3 +80000c14: 6f f0 df f6 j -148 +80000c18: 93 96 27 00 slli a3, a5, 2 +80000c1c: 97 02 00 00 auipc t0, 0 +80000c20: b3 86 56 00 add a3, a3, t0 +80000c24: 93 82 00 00 mv t0, ra +80000c28: e7 80 06 fa jalr -96(a3) +80000c2c: 93 80 02 00 mv ra, t0 +80000c30: 93 87 07 ff addi a5, a5, -16 +80000c34: 33 07 f7 40 sub a4, a4, a5 +80000c38: 33 06 f6 00 add a2, a2, a5 +80000c3c: e3 78 c3 f6 bgeu t1, a2, -144 +80000c40: 6f f0 df f3 j -196 -800008e8 vx_num_instrs: -800008e8: 73 25 20 c0 rdinstret a0 -800008ec: 67 80 00 00 ret +80000c44 __register_exitproc: +80000c44: 03 a7 81 c2 lw a4, -984(gp) +80000c48: 83 27 87 14 lw a5, 328(a4) +80000c4c: 63 8c 07 04 beqz a5, 88 +80000c50: 03 a7 47 00 lw a4, 4(a5) +80000c54: 13 08 f0 01 addi a6, zero, 31 +80000c58: 63 4e e8 06 blt a6, a4, 124 +80000c5c: 13 18 27 00 slli a6, a4, 2 +80000c60: 63 06 05 02 beqz a0, 44 +80000c64: 33 83 07 01 add t1, a5, a6 +80000c68: 23 24 c3 08 sw a2, 136(t1) +80000c6c: 83 a8 87 18 lw a7, 392(a5) +80000c70: 13 06 10 00 addi a2, zero, 1 +80000c74: 33 16 e6 00 sll a2, a2, a4 +80000c78: b3 e8 c8 00 or a7, a7, a2 +80000c7c: 23 a4 17 19 sw a7, 392(a5) +80000c80: 23 24 d3 10 sw a3, 264(t1) +80000c84: 93 06 20 00 addi a3, zero, 2 +80000c88: 63 04 d5 02 beq a0, a3, 40 +80000c8c: 13 07 17 00 addi a4, a4, 1 +80000c90: 23 a2 e7 00 sw a4, 4(a5) +80000c94: b3 87 07 01 add a5, a5, a6 +80000c98: 23 a4 b7 00 sw a1, 8(a5) +80000c9c: 13 05 00 00 mv a0, zero +80000ca0: 67 80 00 00 ret +80000ca4: 93 07 c7 14 addi a5, a4, 332 +80000ca8: 23 24 f7 14 sw a5, 328(a4) +80000cac: 6f f0 5f fa j -92 +80000cb0: 83 a6 c7 18 lw a3, 396(a5) +80000cb4: 13 07 17 00 addi a4, a4, 1 +80000cb8: 23 a2 e7 00 sw a4, 4(a5) +80000cbc: 33 e6 c6 00 or a2, a3, a2 +80000cc0: 23 a6 c7 18 sw a2, 396(a5) +80000cc4: b3 87 07 01 add a5, a5, a6 +80000cc8: 23 a4 b7 00 sw a1, 8(a5) +80000ccc: 13 05 00 00 mv a0, zero +80000cd0: 67 80 00 00 ret +80000cd4: 13 05 f0 ff addi a0, zero, -1 +80000cd8: 67 80 00 00 ret -800008f0 sqrtf: -800008f0: 13 01 01 fe addi sp, sp, -32 -800008f4: 27 26 81 00 fsw fs0, 12(sp) -800008f8: 23 2e 11 00 sw ra, 28(sp) -800008fc: 53 04 a5 20 fmv.s fs0, fa0 -80000900: 27 24 91 00 fsw fs1, 8(sp) -80000904: ef 00 c0 05 jal 92 -80000908: 03 a7 01 c3 lw a4, -976(gp) -8000090c: 93 07 f0 ff addi a5, zero, -1 -80000910: 63 0c f7 00 beq a4, a5, 24 -80000914: d3 27 84 a0 feq.s a5, fs0, fs0 -80000918: 63 88 07 00 beqz a5, 16 -8000091c: d3 04 00 f0 fmv.w.x fs1, zero -80000920: d3 17 94 a0 flt.s a5, fs0, fs1 -80000924: 63 9c 07 00 bnez a5, 24 -80000928: 83 20 c1 01 lw ra, 28(sp) -8000092c: 07 24 c1 00 flw fs0, 12(sp) -80000930: 87 24 81 00 flw fs1, 8(sp) -80000934: 13 01 01 02 addi sp, sp, 32 -80000938: 67 80 00 00 ret -8000093c: ef 00 40 14 jal 324 -80000940: 83 20 c1 01 lw ra, 28(sp) -80000944: 93 07 10 02 addi a5, zero, 33 -80000948: 23 20 f5 00 sw a5, 0(a0) -8000094c: 53 f5 94 18 fdiv.s fa0, fs1, fs1 -80000950: 07 24 c1 00 flw fs0, 12(sp) -80000954: 87 24 81 00 flw fs1, 8(sp) -80000958: 13 01 01 02 addi sp, sp, 32 -8000095c: 67 80 00 00 ret - -80000960 __ieee754_sqrtf: -80000960: d3 06 05 e0 fmv.x.w a3, fa0 -80000964: 37 07 80 7f lui a4, 522240 -80000968: 93 97 16 00 slli a5, a3, 1 -8000096c: 93 d7 17 00 srli a5, a5, 1 -80000970: 63 f2 e7 0c bgeu a5, a4, 196 -80000974: 53 05 05 e0 fmv.x.w a0, fa0 -80000978: 63 8a 07 0a beqz a5, 180 -8000097c: 93 87 06 00 mv a5, a3 -80000980: 63 c6 06 0c bltz a3, 204 -80000984: 33 76 d7 00 and a2, a4, a3 -80000988: 13 d7 76 41 srai a4, a3, 23 -8000098c: 63 14 06 02 bnez a2, 40 -80000990: 37 06 80 00 lui a2, 2048 -80000994: b3 76 d6 00 and a3, a2, a3 -80000998: 63 94 06 0c bnez a3, 200 -8000099c: 93 97 17 00 slli a5, a5, 1 -800009a0: 93 95 87 00 slli a1, a5, 8 -800009a4: 13 86 06 00 mv a2, a3 -800009a8: 93 86 16 00 addi a3, a3, 1 -800009ac: e3 d8 05 fe bgez a1, -16 -800009b0: 33 07 c7 40 sub a4, a4, a2 -800009b4: b7 06 80 00 lui a3, 2048 -800009b8: 13 86 f6 ff addi a2, a3, -1 -800009bc: b3 f7 c7 00 and a5, a5, a2 -800009c0: 13 07 17 f8 addi a4, a4, -127 -800009c4: b3 e6 d7 00 or a3, a5, a3 -800009c8: 13 76 17 00 andi a2, a4, 1 -800009cc: 93 97 16 00 slli a5, a3, 1 -800009d0: 63 1a 06 06 bnez a2, 116 -800009d4: 13 58 17 40 srai a6, a4, 1 -800009d8: 93 06 90 01 addi a3, zero, 25 -800009dc: 13 05 00 00 mv a0, zero -800009e0: 93 05 00 00 mv a1, zero -800009e4: 37 07 00 01 lui a4, 4096 -800009e8: 33 86 e5 00 add a2, a1, a4 -800009ec: 93 86 f6 ff addi a3, a3, -1 -800009f0: 63 c8 c7 00 blt a5, a2, 16 -800009f4: b3 05 e6 00 add a1, a2, a4 -800009f8: b3 87 c7 40 sub a5, a5, a2 -800009fc: 33 05 e5 00 add a0, a0, a4 -80000a00: 93 97 17 00 slli a5, a5, 1 -80000a04: 13 57 17 00 srli a4, a4, 1 -80000a08: e3 90 06 fe bnez a3, -32 -80000a0c: 63 86 07 00 beqz a5, 12 -80000a10: 13 05 15 00 addi a0, a0, 1 -80000a14: 13 75 e5 ff andi a0, a0, -2 -80000a18: 13 55 15 40 srai a0, a0, 1 -80000a1c: b7 07 00 3f lui a5, 258048 -80000a20: 33 05 f5 00 add a0, a0, a5 -80000a24: 13 17 78 01 slli a4, a6, 23 -80000a28: 33 05 a7 00 add a0, a4, a0 -80000a2c: 53 05 05 f0 fmv.w.x fa0, a0 -80000a30: 67 80 00 00 ret -80000a34: c3 77 a5 50 fmadd.s fa5, fa0, fa0, fa0 -80000a38: 53 85 07 e0 fmv.x.w a0, fa5 -80000a3c: 53 05 05 f0 fmv.w.x fa0, a0 -80000a40: 67 80 00 00 ret -80000a44: 93 97 26 00 slli a5, a3, 2 -80000a48: 6f f0 df f8 j -116 -80000a4c: d3 77 a5 08 fsub.s fa5, fa0, fa0 -80000a50: d3 f7 f7 18 fdiv.s fa5, fa5, fa5 -80000a54: 53 85 07 e0 fmv.x.w a0, fa5 -80000a58: 53 05 05 f0 fmv.w.x fa0, a0 -80000a5c: 67 80 00 00 ret -80000a60: 13 06 f0 ff addi a2, zero, -1 -80000a64: 33 07 c7 40 sub a4, a4, a2 -80000a68: 6f f0 df f4 j -180 - -80000a6c atexit: -80000a6c: 93 05 05 00 mv a1, a0 -80000a70: 93 06 00 00 mv a3, zero -80000a74: 13 06 00 00 mv a2, zero -80000a78: 13 05 00 00 mv a0, zero -80000a7c: 6f 00 00 21 j 528 - -80000a80 __errno: -80000a80: 03 a5 c1 c2 lw a0, -980(gp) -80000a84: 67 80 00 00 ret - -80000a88 exit: -80000a88: 13 01 01 ff addi sp, sp, -16 -80000a8c: 93 05 00 00 mv a1, zero -80000a90: 23 24 81 00 sw s0, 8(sp) -80000a94: 23 26 11 00 sw ra, 12(sp) -80000a98: 13 04 05 00 mv s0, a0 -80000a9c: ef 00 80 28 jal 648 -80000aa0: 03 a5 81 c2 lw a0, -984(gp) -80000aa4: 83 27 c5 03 lw a5, 60(a0) -80000aa8: 63 84 07 00 beqz a5, 8 -80000aac: e7 80 07 00 jalr a5 -80000ab0: 13 05 04 00 mv a0, s0 -80000ab4: ef f0 df d7 jal -644 - -80000ab8 __libc_fini_array: -80000ab8: 13 01 01 ff addi sp, sp, -16 -80000abc: 23 24 81 00 sw s0, 8(sp) -80000ac0: b7 17 00 80 lui a5, 524289 -80000ac4: 37 14 00 80 lui s0, 524289 -80000ac8: 13 04 44 00 addi s0, s0, 4 -80000acc: 93 87 47 00 addi a5, a5, 4 -80000ad0: b3 87 87 40 sub a5, a5, s0 -80000ad4: 23 22 91 00 sw s1, 4(sp) -80000ad8: 23 26 11 00 sw ra, 12(sp) -80000adc: 93 d4 27 40 srai s1, a5, 2 -80000ae0: 63 80 04 02 beqz s1, 32 -80000ae4: 93 87 c7 ff addi a5, a5, -4 -80000ae8: 33 84 87 00 add s0, a5, s0 -80000aec: 83 27 04 00 lw a5, 0(s0) -80000af0: 93 84 f4 ff addi s1, s1, -1 -80000af4: 13 04 c4 ff addi s0, s0, -4 -80000af8: e7 80 07 00 jalr a5 -80000afc: e3 98 04 fe bnez s1, -16 -80000b00: 83 20 c1 00 lw ra, 12(sp) -80000b04: 03 24 81 00 lw s0, 8(sp) -80000b08: 83 24 41 00 lw s1, 4(sp) -80000b0c: 13 01 01 01 addi sp, sp, 16 -80000b10: 67 80 00 00 ret - -80000b14 __libc_init_array: -80000b14: 13 01 01 ff addi sp, sp, -16 -80000b18: 23 24 81 00 sw s0, 8(sp) -80000b1c: 23 20 21 01 sw s2, 0(sp) -80000b20: 37 14 00 80 lui s0, 524289 -80000b24: 37 19 00 80 lui s2, 524289 -80000b28: 93 07 04 00 mv a5, s0 -80000b2c: 13 09 09 00 mv s2, s2 -80000b30: 33 09 f9 40 sub s2, s2, a5 -80000b34: 23 26 11 00 sw ra, 12(sp) -80000b38: 23 22 91 00 sw s1, 4(sp) -80000b3c: 13 59 29 40 srai s2, s2, 2 -80000b40: 63 00 09 02 beqz s2, 32 -80000b44: 13 04 04 00 mv s0, s0 -80000b48: 93 04 00 00 mv s1, zero -80000b4c: 83 27 04 00 lw a5, 0(s0) -80000b50: 93 84 14 00 addi s1, s1, 1 -80000b54: 13 04 44 00 addi s0, s0, 4 -80000b58: e7 80 07 00 jalr a5 -80000b5c: e3 18 99 fe bne s2, s1, -16 -80000b60: 37 14 00 80 lui s0, 524289 -80000b64: 37 19 00 80 lui s2, 524289 -80000b68: 93 07 04 00 mv a5, s0 -80000b6c: 13 09 49 00 addi s2, s2, 4 -80000b70: 33 09 f9 40 sub s2, s2, a5 -80000b74: 13 59 29 40 srai s2, s2, 2 -80000b78: 63 00 09 02 beqz s2, 32 -80000b7c: 13 04 04 00 mv s0, s0 -80000b80: 93 04 00 00 mv s1, zero -80000b84: 83 27 04 00 lw a5, 0(s0) -80000b88: 93 84 14 00 addi s1, s1, 1 -80000b8c: 13 04 44 00 addi s0, s0, 4 -80000b90: e7 80 07 00 jalr a5 -80000b94: e3 18 99 fe bne s2, s1, -16 -80000b98: 83 20 c1 00 lw ra, 12(sp) -80000b9c: 03 24 81 00 lw s0, 8(sp) -80000ba0: 83 24 41 00 lw s1, 4(sp) -80000ba4: 03 29 01 00 lw s2, 0(sp) -80000ba8: 13 01 01 01 addi sp, sp, 16 -80000bac: 67 80 00 00 ret - -80000bb0 memset: -80000bb0: 13 03 f0 00 addi t1, zero, 15 -80000bb4: 13 07 05 00 mv a4, a0 -80000bb8: 63 7e c3 02 bgeu t1, a2, 60 -80000bbc: 93 77 f7 00 andi a5, a4, 15 -80000bc0: 63 90 07 0a bnez a5, 160 -80000bc4: 63 92 05 08 bnez a1, 132 -80000bc8: 93 76 06 ff andi a3, a2, -16 -80000bcc: 13 76 f6 00 andi a2, a2, 15 -80000bd0: b3 86 e6 00 add a3, a3, a4 -80000bd4: 23 20 b7 00 sw a1, 0(a4) -80000bd8: 23 22 b7 00 sw a1, 4(a4) -80000bdc: 23 24 b7 00 sw a1, 8(a4) -80000be0: 23 26 b7 00 sw a1, 12(a4) -80000be4: 13 07 07 01 addi a4, a4, 16 -80000be8: e3 66 d7 fe bltu a4, a3, -20 -80000bec: 63 14 06 00 bnez a2, 8 -80000bf0: 67 80 00 00 ret -80000bf4: b3 06 c3 40 sub a3, t1, a2 -80000bf8: 93 96 26 00 slli a3, a3, 2 -80000bfc: 97 02 00 00 auipc t0, 0 -80000c00: b3 86 56 00 add a3, a3, t0 -80000c04: 67 80 c6 00 jr 12(a3) -80000c08: 23 07 b7 00 sb a1, 14(a4) -80000c0c: a3 06 b7 00 sb a1, 13(a4) -80000c10: 23 06 b7 00 sb a1, 12(a4) -80000c14: a3 05 b7 00 sb a1, 11(a4) -80000c18: 23 05 b7 00 sb a1, 10(a4) -80000c1c: a3 04 b7 00 sb a1, 9(a4) -80000c20: 23 04 b7 00 sb a1, 8(a4) -80000c24: a3 03 b7 00 sb a1, 7(a4) -80000c28: 23 03 b7 00 sb a1, 6(a4) -80000c2c: a3 02 b7 00 sb a1, 5(a4) -80000c30: 23 02 b7 00 sb a1, 4(a4) -80000c34: a3 01 b7 00 sb a1, 3(a4) -80000c38: 23 01 b7 00 sb a1, 2(a4) -80000c3c: a3 00 b7 00 sb a1, 1(a4) -80000c40: 23 00 b7 00 sb a1, 0(a4) -80000c44: 67 80 00 00 ret -80000c48: 93 f5 f5 0f andi a1, a1, 255 -80000c4c: 93 96 85 00 slli a3, a1, 8 -80000c50: b3 e5 d5 00 or a1, a1, a3 -80000c54: 93 96 05 01 slli a3, a1, 16 -80000c58: b3 e5 d5 00 or a1, a1, a3 -80000c5c: 6f f0 df f6 j -148 -80000c60: 93 96 27 00 slli a3, a5, 2 -80000c64: 97 02 00 00 auipc t0, 0 -80000c68: b3 86 56 00 add a3, a3, t0 -80000c6c: 93 82 00 00 mv t0, ra -80000c70: e7 80 06 fa jalr -96(a3) -80000c74: 93 80 02 00 mv ra, t0 -80000c78: 93 87 07 ff addi a5, a5, -16 -80000c7c: 33 07 f7 40 sub a4, a4, a5 -80000c80: 33 06 f6 00 add a2, a2, a5 -80000c84: e3 78 c3 f6 bgeu t1, a2, -144 -80000c88: 6f f0 df f3 j -196 - -80000c8c __register_exitproc: -80000c8c: 03 a7 81 c2 lw a4, -984(gp) -80000c90: 83 27 87 14 lw a5, 328(a4) -80000c94: 63 8c 07 04 beqz a5, 88 -80000c98: 03 a7 47 00 lw a4, 4(a5) -80000c9c: 13 08 f0 01 addi a6, zero, 31 -80000ca0: 63 4e e8 06 blt a6, a4, 124 -80000ca4: 13 18 27 00 slli a6, a4, 2 -80000ca8: 63 06 05 02 beqz a0, 44 -80000cac: 33 83 07 01 add t1, a5, a6 -80000cb0: 23 24 c3 08 sw a2, 136(t1) -80000cb4: 83 a8 87 18 lw a7, 392(a5) -80000cb8: 13 06 10 00 addi a2, zero, 1 -80000cbc: 33 16 e6 00 sll a2, a2, a4 -80000cc0: b3 e8 c8 00 or a7, a7, a2 -80000cc4: 23 a4 17 19 sw a7, 392(a5) -80000cc8: 23 24 d3 10 sw a3, 264(t1) -80000ccc: 93 06 20 00 addi a3, zero, 2 -80000cd0: 63 04 d5 02 beq a0, a3, 40 -80000cd4: 13 07 17 00 addi a4, a4, 1 -80000cd8: 23 a2 e7 00 sw a4, 4(a5) -80000cdc: b3 87 07 01 add a5, a5, a6 -80000ce0: 23 a4 b7 00 sw a1, 8(a5) -80000ce4: 13 05 00 00 mv a0, zero -80000ce8: 67 80 00 00 ret -80000cec: 93 07 c7 14 addi a5, a4, 332 -80000cf0: 23 24 f7 14 sw a5, 328(a4) -80000cf4: 6f f0 5f fa j -92 -80000cf8: 83 a6 c7 18 lw a3, 396(a5) -80000cfc: 13 07 17 00 addi a4, a4, 1 -80000d00: 23 a2 e7 00 sw a4, 4(a5) -80000d04: 33 e6 c6 00 or a2, a3, a2 -80000d08: 23 a6 c7 18 sw a2, 396(a5) -80000d0c: b3 87 07 01 add a5, a5, a6 -80000d10: 23 a4 b7 00 sw a1, 8(a5) -80000d14: 13 05 00 00 mv a0, zero -80000d18: 67 80 00 00 ret -80000d1c: 13 05 f0 ff addi a0, zero, -1 -80000d20: 67 80 00 00 ret - -80000d24 __call_exitprocs: -80000d24: 13 01 01 fd addi sp, sp, -48 -80000d28: 23 2c 41 01 sw s4, 24(sp) -80000d2c: 03 aa 81 c2 lw s4, -984(gp) -80000d30: 23 20 21 03 sw s2, 32(sp) -80000d34: 23 26 11 02 sw ra, 44(sp) -80000d38: 03 29 8a 14 lw s2, 328(s4) -80000d3c: 23 24 81 02 sw s0, 40(sp) -80000d40: 23 22 91 02 sw s1, 36(sp) -80000d44: 23 2e 31 01 sw s3, 28(sp) -80000d48: 23 2a 51 01 sw s5, 20(sp) -80000d4c: 23 28 61 01 sw s6, 16(sp) -80000d50: 23 26 71 01 sw s7, 12(sp) -80000d54: 23 24 81 01 sw s8, 8(sp) -80000d58: 63 00 09 04 beqz s2, 64 -80000d5c: 13 0b 05 00 mv s6, a0 -80000d60: 93 8b 05 00 mv s7, a1 -80000d64: 93 0a 10 00 addi s5, zero, 1 -80000d68: 93 09 f0 ff addi s3, zero, -1 -80000d6c: 83 24 49 00 lw s1, 4(s2) -80000d70: 13 84 f4 ff addi s0, s1, -1 -80000d74: 63 42 04 02 bltz s0, 36 -80000d78: 93 94 24 00 slli s1, s1, 2 -80000d7c: b3 04 99 00 add s1, s2, s1 -80000d80: 63 84 0b 04 beqz s7, 72 -80000d84: 83 a7 44 10 lw a5, 260(s1) -80000d88: 63 80 77 05 beq a5, s7, 64 -80000d8c: 13 04 f4 ff addi s0, s0, -1 -80000d90: 93 84 c4 ff addi s1, s1, -4 -80000d94: e3 16 34 ff bne s0, s3, -20 -80000d98: 83 20 c1 02 lw ra, 44(sp) -80000d9c: 03 24 81 02 lw s0, 40(sp) -80000da0: 83 24 41 02 lw s1, 36(sp) -80000da4: 03 29 01 02 lw s2, 32(sp) -80000da8: 83 29 c1 01 lw s3, 28(sp) -80000dac: 03 2a 81 01 lw s4, 24(sp) -80000db0: 83 2a 41 01 lw s5, 20(sp) -80000db4: 03 2b 01 01 lw s6, 16(sp) -80000db8: 83 2b c1 00 lw s7, 12(sp) -80000dbc: 03 2c 81 00 lw s8, 8(sp) -80000dc0: 13 01 01 03 addi sp, sp, 48 -80000dc4: 67 80 00 00 ret -80000dc8: 83 27 49 00 lw a5, 4(s2) -80000dcc: 83 a6 44 00 lw a3, 4(s1) -80000dd0: 93 87 f7 ff addi a5, a5, -1 -80000dd4: 63 8e 87 04 beq a5, s0, 92 -80000dd8: 23 a2 04 00 sw zero, 4(s1) -80000ddc: e3 88 06 fa beqz a3, -80 -80000de0: 83 27 89 18 lw a5, 392(s2) -80000de4: 33 97 8a 00 sll a4, s5, s0 -80000de8: 03 2c 49 00 lw s8, 4(s2) -80000dec: b3 77 f7 00 and a5, a4, a5 -80000df0: 63 92 07 02 bnez a5, 36 +80000cdc __call_exitprocs: +80000cdc: 13 01 01 fd addi sp, sp, -48 +80000ce0: 23 2c 41 01 sw s4, 24(sp) +80000ce4: 03 aa 81 c2 lw s4, -984(gp) +80000ce8: 23 20 21 03 sw s2, 32(sp) +80000cec: 23 26 11 02 sw ra, 44(sp) +80000cf0: 03 29 8a 14 lw s2, 328(s4) +80000cf4: 23 24 81 02 sw s0, 40(sp) +80000cf8: 23 22 91 02 sw s1, 36(sp) +80000cfc: 23 2e 31 01 sw s3, 28(sp) +80000d00: 23 2a 51 01 sw s5, 20(sp) +80000d04: 23 28 61 01 sw s6, 16(sp) +80000d08: 23 26 71 01 sw s7, 12(sp) +80000d0c: 23 24 81 01 sw s8, 8(sp) +80000d10: 63 00 09 04 beqz s2, 64 +80000d14: 13 0b 05 00 mv s6, a0 +80000d18: 93 8b 05 00 mv s7, a1 +80000d1c: 93 0a 10 00 addi s5, zero, 1 +80000d20: 93 09 f0 ff addi s3, zero, -1 +80000d24: 83 24 49 00 lw s1, 4(s2) +80000d28: 13 84 f4 ff addi s0, s1, -1 +80000d2c: 63 42 04 02 bltz s0, 36 +80000d30: 93 94 24 00 slli s1, s1, 2 +80000d34: b3 04 99 00 add s1, s2, s1 +80000d38: 63 84 0b 04 beqz s7, 72 +80000d3c: 83 a7 44 10 lw a5, 260(s1) +80000d40: 63 80 77 05 beq a5, s7, 64 +80000d44: 13 04 f4 ff addi s0, s0, -1 +80000d48: 93 84 c4 ff addi s1, s1, -4 +80000d4c: e3 16 34 ff bne s0, s3, -20 +80000d50: 83 20 c1 02 lw ra, 44(sp) +80000d54: 03 24 81 02 lw s0, 40(sp) +80000d58: 83 24 41 02 lw s1, 36(sp) +80000d5c: 03 29 01 02 lw s2, 32(sp) +80000d60: 83 29 c1 01 lw s3, 28(sp) +80000d64: 03 2a 81 01 lw s4, 24(sp) +80000d68: 83 2a 41 01 lw s5, 20(sp) +80000d6c: 03 2b 01 01 lw s6, 16(sp) +80000d70: 83 2b c1 00 lw s7, 12(sp) +80000d74: 03 2c 81 00 lw s8, 8(sp) +80000d78: 13 01 01 03 addi sp, sp, 48 +80000d7c: 67 80 00 00 ret +80000d80: 83 27 49 00 lw a5, 4(s2) +80000d84: 83 a6 44 00 lw a3, 4(s1) +80000d88: 93 87 f7 ff addi a5, a5, -1 +80000d8c: 63 8e 87 04 beq a5, s0, 92 +80000d90: 23 a2 04 00 sw zero, 4(s1) +80000d94: e3 88 06 fa beqz a3, -80 +80000d98: 83 27 89 18 lw a5, 392(s2) +80000d9c: 33 97 8a 00 sll a4, s5, s0 +80000da0: 03 2c 49 00 lw s8, 4(s2) +80000da4: b3 77 f7 00 and a5, a4, a5 +80000da8: 63 92 07 02 bnez a5, 36 +80000dac: e7 80 06 00 jalr a3 +80000db0: 03 27 49 00 lw a4, 4(s2) +80000db4: 83 27 8a 14 lw a5, 328(s4) +80000db8: 63 14 87 01 bne a4, s8, 8 +80000dbc: e3 04 f9 f8 beq s2, a5, -120 +80000dc0: e3 88 07 f8 beqz a5, -112 +80000dc4: 13 89 07 00 mv s2, a5 +80000dc8: 6f f0 df f5 j -164 +80000dcc: 83 27 c9 18 lw a5, 396(s2) +80000dd0: 83 a5 44 08 lw a1, 132(s1) +80000dd4: 33 77 f7 00 and a4, a4, a5 +80000dd8: 63 1c 07 00 bnez a4, 24 +80000ddc: 13 05 0b 00 mv a0, s6 +80000de0: e7 80 06 00 jalr a3 +80000de4: 6f f0 df fc j -52 +80000de8: 23 22 89 00 sw s0, 4(s2) +80000dec: 6f f0 9f fa j -88 +80000df0: 13 85 05 00 mv a0, a1 80000df4: e7 80 06 00 jalr a3 -80000df8: 03 27 49 00 lw a4, 4(s2) -80000dfc: 83 27 8a 14 lw a5, 328(s4) -80000e00: 63 14 87 01 bne a4, s8, 8 -80000e04: e3 04 f9 f8 beq s2, a5, -120 -80000e08: e3 88 07 f8 beqz a5, -112 -80000e0c: 13 89 07 00 mv s2, a5 -80000e10: 6f f0 df f5 j -164 -80000e14: 83 27 c9 18 lw a5, 396(s2) -80000e18: 83 a5 44 08 lw a1, 132(s1) -80000e1c: 33 77 f7 00 and a4, a4, a5 -80000e20: 63 1c 07 00 bnez a4, 24 -80000e24: 13 05 0b 00 mv a0, s6 -80000e28: e7 80 06 00 jalr a3 -80000e2c: 6f f0 df fc j -52 -80000e30: 23 22 89 00 sw s0, 4(s2) -80000e34: 6f f0 9f fa j -88 -80000e38: 13 85 05 00 mv a0, a1 -80000e3c: e7 80 06 00 jalr a3 -80000e40: 6f f0 9f fb j -72 +80000df8: 6f f0 9f fb j -72 Disassembly of section .init_array: @@ -1183,7 +1165,7 @@ Disassembly of section .symtab: ae: f1 ff b0: 0e 00 b2: 00 00 - b4: 74 08 + b4: 2c 08 b6: 00 80 b8: 00 00 ba: 00 00 @@ -1211,7 +1193,7 @@ Disassembly of section .symtab: fc: 04 00 fe: f1 ff 100: 63 00 00 00 beqz zero, 0 - 104: 38 04 + 104: f0 03 106: 00 80 108: 24 00 10a: 00 00 @@ -1324,7 +1306,7 @@ Disassembly of section .symtab: 22e: f1 ff 230: 3e 01 232: 00 00 - 234: c0 08 + 234: 78 08 236: 00 80 238: 00 00 23a: 00 00 @@ -1332,14 +1314,14 @@ Disassembly of section .symtab: 23e: 02 00 240: 49 01 242: 00 00 - 244: 78 08 + 244: 30 08 246: 00 80 248: 00 00 24a: 00 00 24c: 12 00 24e: 02 00 250: 53 01 00 00 fadd.s ft2, ft0, ft0, rne - 254: 80 0a + 254: 38 0a 256: 00 80 258: 08 00 25a: 00 00 @@ -1348,7 +1330,7 @@ Disassembly of section .symtab: 260: 5b 01 00 00 264: 60 00 266: 00 80 - 268: 3c 01 + 268: 48 01 26a: 00 00 26c: 12 00 26e: 02 00 @@ -1356,12 +1338,12 @@ Disassembly of section .symtab: 272: 00 00 274: 3c 14 276: 00 80 - 278: 04 00 + 278: 20 00 27a: 00 00 27c: 11 00 27e: 07 00 7f 01 282: 00 00 - 284: 80 08 + 284: 38 08 286: 00 80 288: 00 00 28a: 00 00 @@ -1377,7 +1359,7 @@ Disassembly of section .symtab: 29e: 05 00 2a0: 92 02 2a2: 00 00 - 2a4: f0 08 + 2a4: a8 08 2a6: 00 80 2a8: 70 00 2aa: 00 00 @@ -1385,7 +1367,7 @@ Disassembly of section .symtab: 2ae: 02 00 2b0: 96 01 2b2: 00 00 - 2b4: 98 08 + 2b4: 50 08 2b6: 00 80 2b8: 00 00 2ba: 00 00 @@ -1393,7 +1375,7 @@ Disassembly of section .symtab: 2be: 02 00 2c0: 9e 01 2c2: 00 00 - 2c4: d0 08 + 2c4: 88 08 2c6: 00 80 2c8: 00 00 2ca: 00 00 @@ -1402,13 +1384,13 @@ Disassembly of section .symtab: 2d0: ab 01 00 00 2d4: 3c 02 2d6: 00 80 - 2d8: cc 01 + 2d8: 84 01 2da: 00 00 2dc: 12 00 2de: 02 00 2e0: b8 01 2e2: 00 00 - 2e4: 90 08 + 2e4: 48 08 2e6: 00 80 2e8: 00 00 2ea: 00 00 @@ -1432,7 +1414,7 @@ Disassembly of section .symtab: 30e: 05 00 310: e4 01 312: 00 00 - 314: 14 0b + 314: cc 0a 316: 00 80 318: 9c 00 31a: 00 00 @@ -1440,7 +1422,7 @@ Disassembly of section .symtab: 31e: 02 00 320: f6 01 322: 00 00 - 324: c8 08 + 324: 80 08 326: 00 80 328: 00 00 32a: 00 00 @@ -1448,7 +1430,7 @@ Disassembly of section .symtab: 32e: 02 00 330: 05 02 332: 00 00 - 334: a0 08 + 334: 58 08 336: 00 80 338: 00 00 33a: 00 00 @@ -1456,7 +1438,7 @@ Disassembly of section .symtab: 33e: 02 00 340: 10 02 342: 00 00 - 344: b0 08 + 344: 68 08 346: 00 80 348: 00 00 34a: 00 00 @@ -1464,14 +1446,14 @@ Disassembly of section .symtab: 34e: 02 00 350: 1d 02 352: 00 00 - 354: b8 0a + 354: 70 0a 356: 00 80 358: 5c 00 35a: 00 00 35c: 12 00 35e: 02 00 360: 2f 02 00 00 - 364: 38 08 + 364: f0 07 366: 00 80 368: 00 00 36a: 00 00 @@ -1479,7 +1461,7 @@ Disassembly of section .symtab: 36e: 02 00 370: 39 02 372: 00 00 - 374: 88 08 + 374: 40 08 376: 00 80 378: 00 00 37a: 00 00 @@ -1487,7 +1469,7 @@ Disassembly of section .symtab: 37e: 02 00 380: 44 02 382: 00 00 - 384: 24 0d + 384: dc 0c 386: 00 80 388: 20 01 38a: 00 00 @@ -1503,7 +1485,7 @@ Disassembly of section .symtab: 39e: 01 00 3a0: 55 02 3a2: 00 00 - 3a4: 8c 0c + 3a4: 44 0c 3a6: 00 80 3a8: 98 00 3aa: 00 00 @@ -1511,7 +1493,7 @@ Disassembly of section .symtab: 3ae: 02 00 3b0: 69 02 3b2: 00 00 - 3b4: 40 14 + 3b4: 5c 14 3b6: 00 80 3b8: 00 00 3ba: 00 00 @@ -1526,7 +1508,7 @@ Disassembly of section .symtab: 3ce: 06 00 3d0: 81 02 3d2: 00 00 - 3d4: b0 0b + 3d4: 68 0b 3d6: 00 80 3d8: dc 00 3da: 00 00 @@ -1534,7 +1516,7 @@ Disassembly of section .symtab: 3de: 02 00 3e0: 88 02 3e2: 00 00 - 3e4: 60 09 + 3e4: 18 09 3e6: 00 80 3e8: 0c 01 3ea: 00 00 @@ -1542,7 +1524,7 @@ Disassembly of section .symtab: 3ee: 02 00 3f0: 98 02 3f2: 00 00 - 3f4: 08 04 + 3f4: c0 03 3f6: 00 80 3f8: 30 00 3fa: 00 00 @@ -1550,7 +1532,7 @@ Disassembly of section .symtab: 3fe: 02 00 400: 9d 02 402: 00 00 - 404: ec 06 + 404: a4 06 406: 00 80 408: 44 01 40a: 00 00 @@ -1558,14 +1540,14 @@ Disassembly of section .symtab: 40e: 02 00 410: c9 02 412: 00 00 - 414: e0 08 + 414: 98 08 416: 00 80 418: 00 00 41a: 00 00 41c: 12 00 41e: 02 00 420: d7 02 00 00 - 424: 6c 0a + 424: 24 0a 426: 00 80 428: 14 00 42a: 00 00 @@ -1581,7 +1563,7 @@ Disassembly of section .symtab: 43e: 05 00 440: de 02 442: 00 00 - 444: b8 08 + 444: 70 08 446: 00 80 448: 00 00 44a: 00 00 @@ -1589,7 +1571,7 @@ Disassembly of section .symtab: 44e: 02 00 450: ec 02 452: 00 00 - 454: d8 08 + 454: 90 08 456: 00 80 458: 00 00 45a: 00 00 @@ -1597,7 +1579,7 @@ Disassembly of section .symtab: 45e: 02 00 460: f9 02 462: 00 00 - 464: a8 08 + 464: 60 08 466: 00 80 468: 00 00 46a: 00 00 @@ -1605,15 +1587,15 @@ Disassembly of section .symtab: 46e: 02 00 470: 05 03 472: 00 00 - 474: 9c 01 + 474: a8 01 476: 00 80 - 478: a0 00 + 478: 94 00 47a: 00 00 47c: 12 00 47e: 02 00 480: 1e 03 482: 00 00 - 484: a0 05 + 484: 58 05 486: 00 80 488: 4c 01 48a: 00 00 @@ -1637,14 +1619,14 @@ Disassembly of section .symtab: 4ae: 05 00 4b0: c6 00 4b2: 00 00 - 4b4: 40 14 + 4b4: 5c 14 4b6: 00 80 4b8: 00 00 4ba: 00 00 4bc: 10 00 4be: 07 00 5b 03 4c2: 00 00 - 4c4: 5c 04 + 4c4: 14 04 4c6: 00 80 4c8: 44 01 4ca: 00 00 @@ -1652,7 +1634,7 @@ Disassembly of section .symtab: 4ce: 02 00 4d0: 79 03 4d2: 00 00 - 4d4: 88 0a + 4d4: 40 0a 4d6: 00 80 4d8: 30 00 4da: 00 00 @@ -1660,7 +1642,7 @@ Disassembly of section .symtab: 4de: 02 00 4e0: 78 03 4e2: 00 00 - 4e4: 30 08 + 4e4: e8 07 4e6: 00 80 4e8: 00 00 4ea: 00 00 @@ -1668,7 +1650,7 @@ Disassembly of section .symtab: 4ee: 02 00 4f0: 7e 03 4f2: 00 00 - 4f4: e8 08 + 4f4: a0 08 4f6: 00 80 4f8: 00 00 4fa: 00 00 @@ -1708,13 +1690,14 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 30 36 - 48: 2d 37 - 4a: 35 2d - 4c: 35 32 - 4e: 2d 37 - 50: 39 2d - 52: 33 64 2e 63 + 46: 62 31 + 48: 2d 33 + 4a: 36 2d + 4c: 61 38 + 4e: 2d 62 + 50: 30 2d + 52: 38 61 + 54: 2e 63 56: 00 70 58: 61 72 5a: 61 6c diff --git a/benchmarks/opencl/nearn/kernel.pocl b/benchmarks/opencl/nearn/kernel.pocl index 5f5c324e..535972b0 100644 Binary files a/benchmarks/opencl/nearn/kernel.pocl and b/benchmarks/opencl/nearn/kernel.pocl differ diff --git a/benchmarks/opencl/saxpy/Makefile b/benchmarks/opencl/saxpy/Makefile index 8c9de60b..8d896110 100644 --- a/benchmarks/opencl/saxpy/Makefile +++ b/benchmarks/opencl/saxpy/Makefile @@ -54,7 +54,7 @@ clean: rm -rf $(PROJECT) *.o .depend clean-all: clean - rm *.pocl *.dump + rm -rf *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/saxpy/kernel.pocl b/benchmarks/opencl/saxpy/kernel.pocl index bd50a2ae..52908e39 100644 Binary files a/benchmarks/opencl/saxpy/kernel.pocl and b/benchmarks/opencl/saxpy/kernel.pocl differ diff --git a/benchmarks/opencl/saxpy/saxpy.dump b/benchmarks/opencl/saxpy/saxpy.dump index 389172fa..fd8e9aba 100644 --- a/benchmarks/opencl/saxpy/saxpy.dump +++ b/benchmarks/opencl/saxpy/saxpy.dump @@ -1,28 +1,28 @@ -/tmp/pocl_vortex_kernel-ee-e5-c6-d2-34.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-1e-78-7a-30-94.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 05 00 00 auipc a1, 0 -80000004: 93 85 c5 5d addi a1, a1, 1500 +80000004: 93 85 45 59 addi a1, a1, 1428 80000008: 73 25 60 02 csrr a0, 38 8000000c: 6b 10 b5 00 -80000010: ef 00 c0 5c jal 1484 +80000010: ef 00 40 58 jal 1412 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 13 85 c1 c2 addi a0, gp, -980 -80000020: 13 86 01 c3 addi a2, gp, -976 +80000020: 13 86 c1 c4 addi a2, gp, -948 80000024: 33 06 a6 40 sub a2, a2, a0 80000028: 93 05 00 00 mv a1, zero -8000002c: ef 00 40 7a jal 1956 +8000002c: ef 00 c0 75 jal 1884 80000030: 17 05 00 00 auipc a0, 0 -80000034: 13 05 85 6a addi a0, a0, 1704 -80000038: ef 00 c0 65 jal 1628 -8000003c: ef 00 80 6f jal 1784 -80000040: ef 00 80 3c jal 968 -80000044: 6f 00 40 66 j 1636 +80000034: 13 05 05 66 addi a0, a0, 1632 +80000038: ef 00 40 61 jal 1556 +8000003c: ef 00 00 6b jal 1712 +80000040: ef 00 00 38 jal 896 +80000044: 6f 00 c0 61 j 1564 Disassembly of section .text: @@ -30,8 +30,8 @@ Disassembly of section .text: 80000048: 93 07 00 00 mv a5, zero 8000004c: 63 88 07 00 beqz a5, 16 80000050: 37 05 00 80 lui a0, 524288 -80000054: 13 05 85 6d addi a0, a0, 1752 -80000058: 6f 00 c0 63 j 1596 +80000054: 13 05 05 69 addi a0, a0, 1680 +80000058: 6f 00 40 5f j 1524 8000005c: 67 80 00 00 ret 80000060 kernel_spawn_run_warp: @@ -46,116 +46,116 @@ Disassembly of section .text: 80000080: 23 28 61 01 sw s6, 16(sp) 80000084: 23 26 71 01 sw s7, 12(sp) 80000088: 23 24 81 01 sw s8, 8(sp) -8000008c: ef 00 00 5e jal 1504 -80000090: ef 00 40 59 jal 1428 -80000094: ef 00 80 5d jal 1496 -80000098: 83 a5 c1 c2 lw a1, -980(gp) -8000009c: 83 a5 05 00 lw a1, 0(a1) -800000a0: 83 aa 05 00 lw s5, 0(a1) -800000a4: 03 ab 45 00 lw s6, 4(a1) +8000008c: ef 00 80 59 jal 1432 +80000090: ef 00 c0 54 jal 1356 +80000094: ef 00 80 58 jal 1416 +80000098: 93 04 05 00 mv s1, a0 +8000009c: ef 00 00 56 jal 1376 +800000a0: 93 09 05 00 mv s3, a0 +800000a4: ef 00 80 56 jal 1384 800000a8: 13 09 05 00 mv s2, a0 -800000ac: ef 00 80 59 jal 1432 -800000b0: 93 09 05 00 mv s3, a0 -800000b4: ef 00 00 5a jal 1440 -800000b8: 03 a8 c1 c2 lw a6, -980(gp) -800000bc: 03 27 48 01 lw a4, 20(a6) -800000c0: 83 25 08 01 lw a1, 16(a6) -800000c4: 93 06 07 00 mv a3, a4 -800000c8: 63 44 37 01 blt a4, s3, 8 -800000cc: 93 86 09 00 mv a3, s3 -800000d0: 33 a7 e9 00 slt a4, s3, a4 -800000d4: 33 87 e5 00 add a4, a1, a4 -800000d8: 93 07 10 00 addi a5, zero, 1 -800000dc: 63 46 f7 08 blt a4, a5, 140 -800000e0: 33 0a 5b 03 mul s4, s6, s5 -800000e4: 83 27 c8 00 lw a5, 12(a6) -800000e8: 13 0c f7 ff addi s8, a4, -1 -800000ec: b3 85 b9 02 mul a1, s3, a1 -800000f0: b3 85 b6 00 add a1, a3, a1 -800000f4: b3 05 b9 02 mul a1, s2, a1 -800000f8: b3 85 b7 00 add a1, a5, a1 -800000fc: 33 05 e5 02 mul a0, a0, a4 -80000100: b3 84 a5 00 add s1, a1, a0 -80000104: 33 09 60 41 neg s2, s6 -80000108: 33 0b 40 41 neg s6, s4 -8000010c: 33 c7 44 03 div a4, s1, s4 -80000110: 83 25 08 00 lw a1, 0(a6) -80000114: 33 05 eb 02 mul a0, s6, a4 -80000118: 33 85 a4 00 add a0, s1, a0 -8000011c: b3 46 55 03 div a3, a0, s5 -80000120: 03 a5 c5 00 lw a0, 12(a1) -80000124: b3 07 e9 02 mul a5, s2, a4 -80000128: b3 87 d7 40 sub a5, a5, a3 -8000012c: b3 87 fa 02 mul a5, s5, a5 -80000130: b3 08 f5 00 add a7, a0, a5 -80000134: 03 a4 05 01 lw s0, 16(a1) -80000138: 03 a6 45 01 lw a2, 20(a1) -8000013c: 83 27 48 00 lw a5, 4(a6) -80000140: 03 25 88 00 lw a0, 8(a6) -80000144: b3 06 d4 00 add a3, s0, a3 -80000148: 33 07 e6 00 add a4, a2, a4 -8000014c: 33 86 14 01 add a2, s1, a7 -80000150: e7 80 07 00 jalr a5 -80000154: 63 0a 0c 00 beqz s8, 20 -80000158: 03 a8 c1 c2 lw a6, -980(gp) -8000015c: 13 0c fc ff addi s8, s8, -1 -80000160: 93 84 14 00 addi s1, s1, 1 -80000164: 6f f0 9f fa j -88 -80000168: 13 b5 19 00 seqz a0, s3 -8000016c: 03 2c 81 00 lw s8, 8(sp) -80000170: 83 2b c1 00 lw s7, 12(sp) -80000174: 03 2b 01 01 lw s6, 16(sp) -80000178: 83 2a 41 01 lw s5, 20(sp) -8000017c: 03 2a 81 01 lw s4, 24(sp) -80000180: 83 29 c1 01 lw s3, 28(sp) -80000184: 03 29 01 02 lw s2, 32(sp) -80000188: 83 24 41 02 lw s1, 36(sp) -8000018c: 03 24 81 02 lw s0, 40(sp) -80000190: 83 20 c1 02 lw ra, 44(sp) -80000194: 13 01 01 03 addi sp, sp, 48 -80000198: 6f 00 c0 48 j 1164 +800000ac: ef 00 80 57 jal 1400 +800000b0: 93 85 c1 c2 addi a1, gp, -980 +800000b4: 13 96 24 00 slli a2, s1, 2 +800000b8: b3 05 b6 00 add a1, a2, a1 +800000bc: 03 ab 05 00 lw s6, 0(a1) +800000c0: 83 25 4b 01 lw a1, 20(s6) +800000c4: 03 26 0b 01 lw a2, 16(s6) +800000c8: 93 86 05 00 mv a3, a1 +800000cc: 63 c4 35 01 blt a1, s3, 8 +800000d0: 93 86 09 00 mv a3, s3 +800000d4: b3 a5 b9 00 slt a1, s3, a1 +800000d8: 33 07 b6 00 add a4, a2, a1 +800000dc: 93 05 10 00 addi a1, zero, 1 +800000e0: 63 4a b7 08 blt a4, a1, 148 +800000e4: 83 25 0b 00 lw a1, 0(s6) +800000e8: 83 aa 05 00 lw s5, 0(a1) +800000ec: 83 a7 45 00 lw a5, 4(a1) +800000f0: 83 24 cb 00 lw s1, 12(s6) +800000f4: 33 8a 57 03 mul s4, a5, s5 +800000f8: 13 0c f7 ff addi s8, a4, -1 +800000fc: 33 86 c9 02 mul a2, s3, a2 +80000100: 33 86 c6 00 add a2, a3, a2 +80000104: 33 05 c5 02 mul a0, a0, a2 +80000108: 33 85 a4 00 add a0, s1, a0 +8000010c: 33 06 e9 02 mul a2, s2, a4 +80000110: b3 04 c5 00 add s1, a0, a2 +80000114: 33 09 f0 40 neg s2, a5 +80000118: b3 0b 40 41 neg s7, s4 +8000011c: 33 c6 44 03 div a2, s1, s4 +80000120: 33 85 cb 02 mul a0, s7, a2 +80000124: 33 85 a4 00 add a0, s1, a0 +80000128: b3 46 55 03 div a3, a0, s5 +8000012c: 03 a5 c5 00 lw a0, 12(a1) +80000130: 33 07 c9 02 mul a4, s2, a2 +80000134: 33 07 d7 40 sub a4, a4, a3 +80000138: 33 87 ea 02 mul a4, s5, a4 +8000013c: 33 08 e5 00 add a6, a0, a4 +80000140: 03 a7 05 01 lw a4, 16(a1) +80000144: 03 a4 45 01 lw s0, 20(a1) +80000148: 83 27 4b 00 lw a5, 4(s6) +8000014c: 03 25 8b 00 lw a0, 8(s6) +80000150: b3 06 d7 00 add a3, a4, a3 +80000154: 33 07 c4 00 add a4, s0, a2 +80000158: 33 86 04 01 add a2, s1, a6 +8000015c: e7 80 07 00 jalr a5 +80000160: 63 0a 0c 00 beqz s8, 20 +80000164: 83 25 0b 00 lw a1, 0(s6) +80000168: 13 0c fc ff addi s8, s8, -1 +8000016c: 93 84 14 00 addi s1, s1, 1 +80000170: 6f f0 df fa j -84 +80000174: 13 b5 19 00 seqz a0, s3 +80000178: 03 2c 81 00 lw s8, 8(sp) +8000017c: 83 2b c1 00 lw s7, 12(sp) +80000180: 03 2b 01 01 lw s6, 16(sp) +80000184: 83 2a 41 01 lw s5, 20(sp) +80000188: 03 2a 81 01 lw s4, 24(sp) +8000018c: 83 29 c1 01 lw s3, 28(sp) +80000190: 03 29 01 02 lw s2, 32(sp) +80000194: 83 24 41 02 lw s1, 36(sp) +80000198: 03 24 81 02 lw s0, 40(sp) +8000019c: 83 20 c1 02 lw ra, 44(sp) +800001a0: 13 01 01 03 addi sp, sp, 48 +800001a4: 6f 00 80 43 j 1080 -8000019c kernel_spawn_run_threads: -8000019c: 13 01 01 fe addi sp, sp, -32 -800001a0: 23 2e 11 00 sw ra, 28(sp) -800001a4: 23 2c 81 00 sw s0, 24(sp) -800001a8: 23 2a 91 00 sw s1, 20(sp) -800001ac: 23 28 21 01 sw s2, 16(sp) -800001b0: 23 26 31 01 sw s3, 12(sp) -800001b4: 93 89 05 00 mv s3, a1 -800001b8: ef 00 c0 46 jal 1132 -800001bc: 03 a5 c1 c2 lw a0, -980(gp) -800001c0: 03 25 05 00 lw a0, 0(a0) -800001c4: 83 24 05 00 lw s1, 0(a0) -800001c8: 03 25 45 00 lw a0, 4(a0) -800001cc: 33 04 95 02 mul s0, a0, s1 -800001d0: ef 00 c0 48 jal 1164 -800001d4: 33 05 35 01 add a0, a0, s3 -800001d8: 03 a6 c1 c2 lw a2, -980(gp) -800001dc: 33 47 85 02 div a4, a0, s0 -800001e0: b3 05 87 02 mul a1, a4, s0 -800001e4: 33 05 b5 40 sub a0, a0, a1 -800001e8: 83 25 06 00 lw a1, 0(a2) -800001ec: b3 46 95 02 div a3, a0, s1 -800001f0: b3 87 96 02 mul a5, a3, s1 -800001f4: 33 08 f5 40 sub a6, a0, a5 -800001f8: 83 a4 c5 00 lw s1, 12(a1) -800001fc: 03 a4 05 01 lw s0, 16(a1) -80000200: 83 a7 45 01 lw a5, 20(a1) -80000204: 83 28 46 00 lw a7, 4(a2) -80000208: 03 25 86 00 lw a0, 8(a2) -8000020c: 33 06 98 00 add a2, a6, s1 -80000210: b3 06 d4 00 add a3, s0, a3 -80000214: 33 87 e7 00 add a4, a5, a4 -80000218: e7 80 08 00 jalr a7 -8000021c: 13 05 10 00 addi a0, zero, 1 -80000220: 83 29 c1 00 lw s3, 12(sp) -80000224: 03 29 01 01 lw s2, 16(sp) -80000228: 83 24 41 01 lw s1, 20(sp) -8000022c: 03 24 81 01 lw s0, 24(sp) -80000230: 83 20 c1 01 lw ra, 28(sp) -80000234: 13 01 01 02 addi sp, sp, 32 -80000238: 6f 00 c0 3e j 1004 +800001a8 kernel_spawn_run_threads: +800001a8: 13 01 01 ff addi sp, sp, -16 +800001ac: 23 26 11 00 sw ra, 12(sp) +800001b0: 23 24 81 00 sw s0, 8(sp) +800001b4: ef 00 80 42 jal 1064 +800001b8: ef 00 40 46 jal 1124 +800001bc: 13 04 05 00 mv s0, a0 +800001c0: ef 00 40 45 jal 1108 +800001c4: 93 85 c1 c2 addi a1, gp, -980 +800001c8: 13 16 24 00 slli a2, s0, 2 +800001cc: b3 05 b6 00 add a1, a2, a1 +800001d0: 03 a6 05 00 lw a2, 0(a1) +800001d4: 83 25 06 00 lw a1, 0(a2) +800001d8: 83 26 c6 00 lw a3, 12(a2) +800001dc: 03 a7 05 00 lw a4, 0(a1) +800001e0: 83 a7 45 00 lw a5, 4(a1) +800001e4: 33 85 a6 00 add a0, a3, a0 +800001e8: b3 86 e7 02 mul a3, a5, a4 +800001ec: b3 47 d5 02 div a5, a0, a3 +800001f0: b3 86 d7 02 mul a3, a5, a3 +800001f4: 03 a4 c5 00 lw s0, 12(a1) +800001f8: 33 05 d5 40 sub a0, a0, a3 +800001fc: b3 46 e5 02 div a3, a0, a4 +80000200: 33 88 e6 02 mul a6, a3, a4 +80000204: b3 08 a4 00 add a7, s0, a0 +80000208: 03 a7 05 01 lw a4, 16(a1) +8000020c: 03 a4 45 01 lw s0, 20(a1) +80000210: 83 22 46 00 lw t0, 4(a2) +80000214: 03 25 86 00 lw a0, 8(a2) +80000218: 33 86 08 41 sub a2, a7, a6 +8000021c: b3 06 d7 00 add a3, a4, a3 +80000220: 33 07 f4 00 add a4, s0, a5 +80000224: e7 80 02 00 jalr t0 +80000228: 13 05 10 00 addi a0, zero, 1 +8000022c: 03 24 81 00 lw s0, 8(sp) +80000230: 83 20 c1 00 lw ra, 12(sp) +80000234: 13 01 01 01 addi sp, sp, 16 +80000238: 6f 00 40 3a j 932 8000023c kernel_spawn: 8000023c: 13 01 01 fc addi sp, sp, -64 @@ -167,618 +167,600 @@ Disassembly of section .text: 80000254: 23 24 41 03 sw s4, 40(sp) 80000258: 23 22 51 03 sw s5, 36(sp) 8000025c: 23 20 61 03 sw s6, 32(sp) -80000260: 13 0b 05 00 mv s6, a0 -80000264: 03 25 05 00 lw a0, 0(a0) -80000268: 83 26 4b 00 lw a3, 4(s6) -8000026c: 03 27 8b 00 lw a4, 8(s6) -80000270: 13 09 06 00 mv s2, a2 -80000274: 93 89 05 00 mv s3, a1 -80000278: 33 85 a6 02 mul a0, a3, a0 -8000027c: b3 04 e5 02 mul s1, a0, a4 -80000280: ef 00 c0 3f jal 1020 -80000284: 13 04 05 00 mv s0, a0 -80000288: ef 00 c0 3e jal 1004 +80000260: 23 2e 71 01 sw s7, 28(sp) +80000264: 23 2c 81 01 sw s8, 24(sp) +80000268: 93 04 05 00 mv s1, a0 +8000026c: 83 2b 05 00 lw s7, 0(a0) +80000270: 03 24 45 00 lw s0, 4(a0) +80000274: 03 2c 85 00 lw s8, 8(a0) +80000278: 13 09 06 00 mv s2, a2 +8000027c: 93 89 05 00 mv s3, a1 +80000280: ef 00 40 3b jal 948 +80000284: 13 0b 05 00 mv s6, a0 +80000288: ef 00 40 3a jal 932 8000028c: 13 0a 05 00 mv s4, a0 -80000290: ef 00 c0 3d jal 988 +80000290: ef 00 40 39 jal 916 80000294: 93 0a 05 00 mv s5, a0 -80000298: ef 00 c0 3c jal 972 -8000029c: b3 85 4a 03 mul a1, s5, s4 -800002a0: 13 06 10 00 addi a2, zero, 1 -800002a4: 63 c8 95 00 blt a1, s1, 16 -800002a8: 63 5a 86 00 bge a2, s0, 20 -800002ac: 63 4c c5 00 blt a0, a2, 24 -800002b0: 6f 00 00 13 j 304 -800002b4: 33 c6 b4 02 div a2, s1, a1 -800002b8: e3 4a 86 fe blt a2, s0, -12 -800002bc: 13 06 04 00 mv a2, s0 -800002c0: 63 50 c5 12 bge a0, a2, 288 -800002c4: 93 06 f4 ff addi a3, s0, -1 -800002c8: b3 c5 c4 02 div a1, s1, a2 -800002cc: 63 0e d5 00 beq a0, a3, 28 -800002d0: 13 06 00 00 mv a2, zero -800002d4: b3 06 b6 00 add a3, a2, a1 -800002d8: 33 c6 56 03 div a2, a3, s5 -800002dc: 13 07 00 00 mv a4, zero -800002e0: 63 50 46 03 bge a2, s4, 32 -800002e4: 6f 00 00 02 j 32 -800002e8: 33 86 c5 02 mul a2, a1, a2 -800002ec: 33 86 c4 40 sub a2, s1, a2 -800002f0: b3 06 b6 00 add a3, a2, a1 -800002f4: 33 c6 56 03 div a2, a3, s5 -800002f8: 13 07 00 00 mv a4, zero -800002fc: 63 44 46 01 blt a2, s4, 8 -80000300: 33 47 46 03 div a4, a2, s4 -80000304: 93 07 00 00 mv a5, zero -80000308: b3 0a 56 03 mul s5, a2, s5 -8000030c: 93 04 10 00 addi s1, zero, 1 -80000310: 63 08 07 00 beqz a4, 16 -80000314: b3 07 47 03 mul a5, a4, s4 -80000318: b3 07 f6 40 sub a5, a2, a5 -8000031c: 93 04 07 00 mv s1, a4 -80000320: 33 84 56 41 sub s0, a3, s5 -80000324: 23 24 61 01 sw s6, 8(sp) -80000328: 23 26 31 01 sw s3, 12(sp) -8000032c: 23 28 21 01 sw s2, 16(sp) -80000330: 33 85 a5 02 mul a0, a1, a0 -80000334: 23 2a a1 00 sw a0, 20(sp) -80000338: 23 2c 91 00 sw s1, 24(sp) -8000033c: 23 2e f1 00 sw a5, 28(sp) -80000340: 93 05 81 00 addi a1, sp, 8 -80000344: 93 06 20 00 addi a3, zero, 2 -80000348: 23 a6 b1 c2 sw a1, -980(gp) -8000034c: 63 40 d6 02 blt a2, a3, 32 -80000350: 63 44 46 01 blt a2, s4, 8 -80000354: 13 06 0a 00 mv a2, s4 -80000358: 37 05 00 80 lui a0, 524288 -8000035c: 93 05 05 06 addi a1, a0, 96 -80000360: 13 05 06 00 mv a0, a2 -80000364: ef 00 80 2b jal 696 -80000368: ef f0 9f cf jal -776 -8000036c: 63 0a 04 06 beqz s0, 116 -80000370: 13 05 04 00 mv a0, s0 -80000374: ef 00 00 2b jal 688 -80000378: 03 a5 c1 c2 lw a0, -980(gp) -8000037c: 03 25 05 00 lw a0, 0(a0) -80000380: 83 24 05 00 lw s1, 0(a0) -80000384: 03 25 45 00 lw a0, 4(a0) -80000388: 33 04 95 02 mul s0, a0, s1 -8000038c: ef 00 00 2d jal 720 -80000390: 33 05 55 01 add a0, a0, s5 -80000394: 03 a6 c1 c2 lw a2, -980(gp) -80000398: 33 47 85 02 div a4, a0, s0 -8000039c: b3 05 87 02 mul a1, a4, s0 -800003a0: 33 05 b5 40 sub a0, a0, a1 -800003a4: 83 25 06 00 lw a1, 0(a2) -800003a8: b3 46 95 02 div a3, a0, s1 -800003ac: b3 87 96 02 mul a5, a3, s1 -800003b0: 33 08 f5 40 sub a6, a0, a5 -800003b4: 83 a4 c5 00 lw s1, 12(a1) -800003b8: 03 a4 05 01 lw s0, 16(a1) -800003bc: 83 a7 45 01 lw a5, 20(a1) -800003c0: 83 28 46 00 lw a7, 4(a2) -800003c4: 03 25 86 00 lw a0, 8(a2) -800003c8: 33 06 98 00 add a2, a6, s1 -800003cc: b3 06 d4 00 add a3, s0, a3 -800003d0: 33 87 e7 00 add a4, a5, a4 -800003d4: e7 80 08 00 jalr a7 -800003d8: 13 05 10 00 addi a0, zero, 1 -800003dc: ef 00 80 24 jal 584 -800003e0: 03 2b 01 02 lw s6, 32(sp) -800003e4: 83 2a 41 02 lw s5, 36(sp) -800003e8: 03 2a 81 02 lw s4, 40(sp) -800003ec: 83 29 c1 02 lw s3, 44(sp) -800003f0: 03 29 01 03 lw s2, 48(sp) -800003f4: 83 24 41 03 lw s1, 52(sp) -800003f8: 03 24 81 03 lw s0, 56(sp) -800003fc: 83 20 c1 03 lw ra, 60(sp) -80000400: 13 01 01 04 addi sp, sp, 64 -80000404: 67 80 00 00 ret +80000298: ef 00 40 38 jal 900 +8000029c: 93 05 70 00 addi a1, zero, 7 +800002a0: 63 c8 a5 0e blt a1, a0, 240 +800002a4: b3 05 74 03 mul a1, s0, s7 +800002a8: 33 86 85 03 mul a2, a1, s8 +800002ac: b3 85 4a 03 mul a1, s5, s4 +800002b0: 93 06 10 00 addi a3, zero, 1 +800002b4: 63 c8 c5 00 blt a1, a2, 16 +800002b8: 63 da 66 01 bge a3, s6, 20 +800002bc: 63 4c d5 00 blt a0, a3, 24 +800002c0: 6f 00 00 0d j 208 +800002c4: b3 46 b6 02 div a3, a2, a1 +800002c8: e3 ca 66 ff blt a3, s6, -12 +800002cc: 93 06 0b 00 mv a3, s6 +800002d0: 63 50 d5 0c bge a0, a3, 192 +800002d4: 13 07 fb ff addi a4, s6, -1 +800002d8: b3 45 d6 02 div a1, a2, a3 +800002dc: 63 0e e5 00 beq a0, a4, 28 +800002e0: 13 06 00 00 mv a2, zero +800002e4: 33 0b b6 00 add s6, a2, a1 +800002e8: 33 46 5b 03 div a2, s6, s5 +800002ec: 93 06 00 00 mv a3, zero +800002f0: 63 50 46 03 bge a2, s4, 32 +800002f4: 6f 00 00 02 j 32 +800002f8: b3 86 d5 02 mul a3, a1, a3 +800002fc: 33 06 d6 40 sub a2, a2, a3 +80000300: 33 0b b6 00 add s6, a2, a1 +80000304: 33 46 5b 03 div a2, s6, s5 +80000308: 93 06 00 00 mv a3, zero +8000030c: 63 44 46 01 blt a2, s4, 8 +80000310: b3 46 46 03 div a3, a2, s4 +80000314: 13 07 00 00 mv a4, zero +80000318: 93 07 10 00 addi a5, zero, 1 +8000031c: 63 88 06 00 beqz a3, 16 +80000320: 33 87 46 03 mul a4, a3, s4 +80000324: 33 07 e6 40 sub a4, a2, a4 +80000328: 93 87 06 00 mv a5, a3 +8000032c: 33 04 56 03 mul s0, a2, s5 +80000330: 23 20 91 00 sw s1, 0(sp) +80000334: 23 22 31 01 sw s3, 4(sp) +80000338: 23 24 21 01 sw s2, 8(sp) +8000033c: b3 85 a5 02 mul a1, a1, a0 +80000340: 23 26 b1 00 sw a1, 12(sp) +80000344: 23 28 f1 00 sw a5, 16(sp) +80000348: 23 2a e1 00 sw a4, 20(sp) +8000034c: 93 85 c1 c2 addi a1, gp, -980 +80000350: 13 15 25 00 slli a0, a0, 2 +80000354: 33 05 b5 00 add a0, a0, a1 +80000358: 93 05 01 00 mv a1, sp +8000035c: 93 06 20 00 addi a3, zero, 2 +80000360: 23 20 b5 00 sw a1, 0(a0) +80000364: 63 40 d6 02 blt a2, a3, 32 +80000368: 63 44 46 01 blt a2, s4, 8 +8000036c: 13 06 0a 00 mv a2, s4 +80000370: 37 05 00 80 lui a0, 524288 +80000374: 93 05 05 06 addi a1, a0, 96 +80000378: 13 05 06 00 mv a0, a2 +8000037c: ef 00 80 25 jal 600 +80000380: ef f0 1f ce jal -800 +80000384: 63 06 8b 00 beq s6, s0, 12 +80000388: 23 26 81 00 sw s0, 12(sp) +8000038c: ef f0 5f cd jal -812 +80000390: 03 2c 81 01 lw s8, 24(sp) +80000394: 83 2b c1 01 lw s7, 28(sp) +80000398: 03 2b 01 02 lw s6, 32(sp) +8000039c: 83 2a 41 02 lw s5, 36(sp) +800003a0: 03 2a 81 02 lw s4, 40(sp) +800003a4: 83 29 c1 02 lw s3, 44(sp) +800003a8: 03 29 01 03 lw s2, 48(sp) +800003ac: 83 24 41 03 lw s1, 52(sp) +800003b0: 03 24 81 03 lw s0, 56(sp) +800003b4: 83 20 c1 03 lw ra, 60(sp) +800003b8: 13 01 01 04 addi sp, sp, 64 +800003bc: 67 80 00 00 ret -80000408 main: -80000408: 13 01 01 ff addi sp, sp, -16 -8000040c: 23 26 11 00 sw ra, 12(sp) -80000410: 37 05 00 80 lui a0, 524288 -80000414: 93 05 c5 4c addi a1, a0, 1228 -80000418: 37 05 ff 7f lui a0, 524272 -8000041c: 13 06 45 03 addi a2, a0, 52 -80000420: 37 05 ff 7f lui a0, 524272 -80000424: ef f0 9f e1 jal -488 -80000428: 13 05 00 00 mv a0, zero -8000042c: 83 20 c1 00 lw ra, 12(sp) -80000430: 13 01 01 01 addi sp, sp, 16 -80000434: 67 80 00 00 ret +800003c0 main: +800003c0: 13 01 01 ff addi sp, sp, -16 +800003c4: 23 26 11 00 sw ra, 12(sp) +800003c8: 37 05 00 80 lui a0, 524288 +800003cc: 93 05 45 48 addi a1, a0, 1156 +800003d0: 37 05 ff 7f lui a0, 524272 +800003d4: 13 06 45 03 addi a2, a0, 52 +800003d8: 37 05 ff 7f lui a0, 524272 +800003dc: ef f0 1f e6 jal -416 +800003e0: 13 05 00 00 mv a0, zero +800003e4: 83 20 c1 00 lw ra, 12(sp) +800003e8: 13 01 01 01 addi sp, sp, 16 +800003ec: 67 80 00 00 ret -80000438 _pocl_kernel_saxpy: -80000438: 13 01 01 ff addi sp, sp, -16 -8000043c: 23 26 11 00 sw ra, 12(sp) -80000440: 23 24 81 00 sw s0, 8(sp) -80000444: 13 04 01 01 addi s0, sp, 16 -80000448: 13 71 c1 ff andi sp, sp, -4 -8000044c: 03 27 86 01 lw a4, 24(a2) -80000450: 83 27 c6 00 lw a5, 12(a2) -80000454: 93 08 00 00 mv a7, zero -80000458: b3 06 d7 02 mul a3, a4, a3 -8000045c: b3 86 d7 00 add a3, a5, a3 -80000460: 83 22 c6 01 lw t0, 28(a2) -80000464: 03 28 06 02 lw a6, 32(a2) -80000468: 13 96 26 00 slli a2, a3, 2 -8000046c: 33 83 c5 00 add t1, a1, a2 -80000470: b3 06 c5 00 add a3, a0, a2 -80000474: 13 06 00 00 mv a2, zero -80000478: 93 07 00 00 mv a5, zero -8000047c: 13 85 06 00 mv a0, a3 -80000480: 93 05 03 00 mv a1, t1 -80000484: 07 20 05 00 flw ft0, 0(a0) -80000488: 87 a0 05 00 flw ft1, 0(a1) -8000048c: 53 70 a0 10 fmul.s ft0, ft0, fa0 -80000490: 53 70 10 00 fadd.s ft0, ft0, ft1 -80000494: 27 a0 05 00 fsw ft0, 0(a1) -80000498: 93 87 17 00 addi a5, a5, 1 -8000049c: 93 85 45 00 addi a1, a1, 4 -800004a0: 13 05 45 00 addi a0, a0, 4 -800004a4: e3 e0 e7 fe bltu a5, a4, -32 -800004a8: 13 06 16 00 addi a2, a2, 1 -800004ac: e3 66 56 fc bltu a2, t0, -52 -800004b0: 93 88 18 00 addi a7, a7, 1 -800004b4: e3 e0 08 fd bltu a7, a6, -64 -800004b8: 13 01 04 ff addi sp, s0, -16 -800004bc: 03 24 81 00 lw s0, 8(sp) -800004c0: 83 20 c1 00 lw ra, 12(sp) -800004c4: 13 01 01 01 addi sp, sp, 16 -800004c8: 67 80 00 00 ret +800003f0 _pocl_kernel_saxpy: +800003f0: 13 01 01 ff addi sp, sp, -16 +800003f4: 23 26 11 00 sw ra, 12(sp) +800003f8: 23 24 81 00 sw s0, 8(sp) +800003fc: 13 04 01 01 addi s0, sp, 16 +80000400: 13 71 c1 ff andi sp, sp, -4 +80000404: 03 27 86 01 lw a4, 24(a2) +80000408: 83 27 c6 00 lw a5, 12(a2) +8000040c: 93 08 00 00 mv a7, zero +80000410: b3 06 d7 02 mul a3, a4, a3 +80000414: b3 86 d7 00 add a3, a5, a3 +80000418: 83 22 c6 01 lw t0, 28(a2) +8000041c: 03 28 06 02 lw a6, 32(a2) +80000420: 13 96 26 00 slli a2, a3, 2 +80000424: 33 83 c5 00 add t1, a1, a2 +80000428: b3 06 c5 00 add a3, a0, a2 +8000042c: 13 06 00 00 mv a2, zero +80000430: 93 07 00 00 mv a5, zero +80000434: 13 85 06 00 mv a0, a3 +80000438: 93 05 03 00 mv a1, t1 +8000043c: 07 20 05 00 flw ft0, 0(a0) +80000440: 87 a0 05 00 flw ft1, 0(a1) +80000444: 53 70 a0 10 fmul.s ft0, ft0, fa0 +80000448: 53 70 10 00 fadd.s ft0, ft0, ft1 +8000044c: 27 a0 05 00 fsw ft0, 0(a1) +80000450: 93 87 17 00 addi a5, a5, 1 +80000454: 93 85 45 00 addi a1, a1, 4 +80000458: 13 05 45 00 addi a0, a0, 4 +8000045c: e3 e0 e7 fe bltu a5, a4, -32 +80000460: 13 06 16 00 addi a2, a2, 1 +80000464: e3 66 56 fc bltu a2, t0, -52 +80000468: 93 88 18 00 addi a7, a7, 1 +8000046c: e3 e0 08 fd bltu a7, a6, -64 +80000470: 13 01 04 ff addi sp, s0, -16 +80000474: 03 24 81 00 lw s0, 8(sp) +80000478: 83 20 c1 00 lw ra, 12(sp) +8000047c: 13 01 01 01 addi sp, sp, 16 +80000480: 67 80 00 00 ret -800004cc _pocl_kernel_saxpy_workgroup: -800004cc: 83 26 85 00 lw a3, 8(a0) -800004d0: 93 08 00 00 mv a7, zero -800004d4: 03 27 05 00 lw a4, 0(a0) -800004d8: 83 27 45 00 lw a5, 4(a0) -800004dc: 07 a0 06 00 flw ft0, 0(a3) -800004e0: 03 a5 85 01 lw a0, 24(a1) -800004e4: 83 a6 c5 00 lw a3, 12(a1) -800004e8: 03 27 07 00 lw a4, 0(a4) -800004ec: 83 a7 07 00 lw a5, 0(a5) -800004f0: 33 06 c5 02 mul a2, a0, a2 -800004f4: 33 86 c6 00 add a2, a3, a2 -800004f8: 83 a2 c5 01 lw t0, 28(a1) -800004fc: 03 a8 05 02 lw a6, 32(a1) -80000500: 93 15 26 00 slli a1, a2, 2 -80000504: 33 83 b7 00 add t1, a5, a1 -80000508: 33 06 b7 00 add a2, a4, a1 -8000050c: 93 05 00 00 mv a1, zero -80000510: 93 06 00 00 mv a3, zero -80000514: 93 07 06 00 mv a5, a2 -80000518: 13 07 03 00 mv a4, t1 -8000051c: 87 a0 07 00 flw ft1, 0(a5) -80000520: 07 21 07 00 flw ft2, 0(a4) -80000524: d3 f0 00 10 fmul.s ft1, ft1, ft0 -80000528: d3 f0 20 00 fadd.s ft1, ft1, ft2 -8000052c: 27 20 17 00 fsw ft1, 0(a4) -80000530: 93 86 16 00 addi a3, a3, 1 -80000534: 13 07 47 00 addi a4, a4, 4 -80000538: 93 87 47 00 addi a5, a5, 4 -8000053c: e3 e0 a6 fe bltu a3, a0, -32 -80000540: 93 85 15 00 addi a1, a1, 1 -80000544: e3 e6 55 fc bltu a1, t0, -52 -80000548: 93 88 18 00 addi a7, a7, 1 -8000054c: e3 e0 08 fd bltu a7, a6, -64 -80000550: 67 80 00 00 ret +80000484 _pocl_kernel_saxpy_workgroup: +80000484: 83 26 85 00 lw a3, 8(a0) +80000488: 93 08 00 00 mv a7, zero +8000048c: 03 27 05 00 lw a4, 0(a0) +80000490: 83 27 45 00 lw a5, 4(a0) +80000494: 07 a0 06 00 flw ft0, 0(a3) +80000498: 03 a5 85 01 lw a0, 24(a1) +8000049c: 83 a6 c5 00 lw a3, 12(a1) +800004a0: 03 27 07 00 lw a4, 0(a4) +800004a4: 83 a7 07 00 lw a5, 0(a5) +800004a8: 33 06 c5 02 mul a2, a0, a2 +800004ac: 33 86 c6 00 add a2, a3, a2 +800004b0: 83 a2 c5 01 lw t0, 28(a1) +800004b4: 03 a8 05 02 lw a6, 32(a1) +800004b8: 93 15 26 00 slli a1, a2, 2 +800004bc: 33 83 b7 00 add t1, a5, a1 +800004c0: 33 06 b7 00 add a2, a4, a1 +800004c4: 93 05 00 00 mv a1, zero +800004c8: 93 06 00 00 mv a3, zero +800004cc: 93 07 06 00 mv a5, a2 +800004d0: 13 07 03 00 mv a4, t1 +800004d4: 87 a0 07 00 flw ft1, 0(a5) +800004d8: 07 21 07 00 flw ft2, 0(a4) +800004dc: d3 f0 00 10 fmul.s ft1, ft1, ft0 +800004e0: d3 f0 20 00 fadd.s ft1, ft1, ft2 +800004e4: 27 20 17 00 fsw ft1, 0(a4) +800004e8: 93 86 16 00 addi a3, a3, 1 +800004ec: 13 07 47 00 addi a4, a4, 4 +800004f0: 93 87 47 00 addi a5, a5, 4 +800004f4: e3 e0 a6 fe bltu a3, a0, -32 +800004f8: 93 85 15 00 addi a1, a1, 1 +800004fc: e3 e6 55 fc bltu a1, t0, -52 +80000500: 93 88 18 00 addi a7, a7, 1 +80000504: e3 e0 08 fd bltu a7, a6, -64 +80000508: 67 80 00 00 ret -80000554 _pocl_kernel_saxpy_workgroup_fast: -80000554: 83 26 85 00 lw a3, 8(a0) -80000558: 93 08 00 00 mv a7, zero -8000055c: 07 a0 06 00 flw ft0, 0(a3) -80000560: 03 a7 85 01 lw a4, 24(a1) -80000564: 83 a6 c5 00 lw a3, 12(a1) -80000568: 83 27 05 00 lw a5, 0(a0) -8000056c: 03 25 45 00 lw a0, 4(a0) -80000570: 33 06 c7 02 mul a2, a4, a2 -80000574: 33 86 c6 00 add a2, a3, a2 -80000578: 83 a2 c5 01 lw t0, 28(a1) -8000057c: 03 a8 05 02 lw a6, 32(a1) -80000580: 93 15 26 00 slli a1, a2, 2 -80000584: 33 03 b5 00 add t1, a0, a1 -80000588: 33 85 b7 00 add a0, a5, a1 -8000058c: 93 05 00 00 mv a1, zero -80000590: 93 06 00 00 mv a3, zero -80000594: 93 07 05 00 mv a5, a0 -80000598: 13 06 03 00 mv a2, t1 -8000059c: 87 a0 07 00 flw ft1, 0(a5) -800005a0: 07 21 06 00 flw ft2, 0(a2) -800005a4: d3 f0 00 10 fmul.s ft1, ft1, ft0 -800005a8: d3 f0 20 00 fadd.s ft1, ft1, ft2 -800005ac: 27 20 16 00 fsw ft1, 0(a2) -800005b0: 93 86 16 00 addi a3, a3, 1 -800005b4: 13 06 46 00 addi a2, a2, 4 -800005b8: 93 87 47 00 addi a5, a5, 4 -800005bc: e3 e0 e6 fe bltu a3, a4, -32 -800005c0: 93 85 15 00 addi a1, a1, 1 -800005c4: e3 e6 55 fc bltu a1, t0, -52 -800005c8: 93 88 18 00 addi a7, a7, 1 -800005cc: e3 e0 08 fd bltu a7, a6, -64 +8000050c _pocl_kernel_saxpy_workgroup_fast: +8000050c: 83 26 85 00 lw a3, 8(a0) +80000510: 93 08 00 00 mv a7, zero +80000514: 07 a0 06 00 flw ft0, 0(a3) +80000518: 03 a7 85 01 lw a4, 24(a1) +8000051c: 83 a6 c5 00 lw a3, 12(a1) +80000520: 83 27 05 00 lw a5, 0(a0) +80000524: 03 25 45 00 lw a0, 4(a0) +80000528: 33 06 c7 02 mul a2, a4, a2 +8000052c: 33 86 c6 00 add a2, a3, a2 +80000530: 83 a2 c5 01 lw t0, 28(a1) +80000534: 03 a8 05 02 lw a6, 32(a1) +80000538: 93 15 26 00 slli a1, a2, 2 +8000053c: 33 03 b5 00 add t1, a0, a1 +80000540: 33 85 b7 00 add a0, a5, a1 +80000544: 93 05 00 00 mv a1, zero +80000548: 93 06 00 00 mv a3, zero +8000054c: 93 07 05 00 mv a5, a0 +80000550: 13 06 03 00 mv a2, t1 +80000554: 87 a0 07 00 flw ft1, 0(a5) +80000558: 07 21 06 00 flw ft2, 0(a2) +8000055c: d3 f0 00 10 fmul.s ft1, ft1, ft0 +80000560: d3 f0 20 00 fadd.s ft1, ft1, ft2 +80000564: 27 20 16 00 fsw ft1, 0(a2) +80000568: 93 86 16 00 addi a3, a3, 1 +8000056c: 13 06 46 00 addi a2, a2, 4 +80000570: 93 87 47 00 addi a5, a5, 4 +80000574: e3 e0 e6 fe bltu a3, a4, -32 +80000578: 93 85 15 00 addi a1, a1, 1 +8000057c: e3 e6 55 fc bltu a1, t0, -52 +80000580: 93 88 18 00 addi a7, a7, 1 +80000584: e3 e0 08 fd bltu a7, a6, -64 +80000588: 67 80 00 00 ret + +8000058c _exit: +8000058c: 13 05 00 00 mv a0, zero +80000590: 6b 00 05 00 + +80000594 vx_set_sp: +80000594: 73 25 50 02 csrr a0, 37 +80000598: 6b 00 05 00 +8000059c: 97 21 00 00 auipc gp, 2 +800005a0: 93 81 41 c8 addi gp, gp, -892 +800005a4: f3 25 20 02 csrr a1, 34 +800005a8: 93 95 a5 00 slli a1, a1, 10 +800005ac: 73 26 00 02 csrr a2, 32 +800005b0: 13 16 26 00 slli a2, a2, 2 +800005b4: 37 f1 ff 6f lui sp, 458751 +800005b8: 33 01 b1 40 sub sp, sp, a1 +800005bc: 33 01 c1 00 add sp, sp, a2 +800005c0: f3 26 10 02 csrr a3, 33 +800005c4: 63 86 06 00 beqz a3, 12 +800005c8: 13 05 00 00 mv a0, zero +800005cc: 6b 00 05 00 + +800005d0 RETURN: 800005d0: 67 80 00 00 ret -800005d4 _exit: -800005d4: 13 05 00 00 mv a0, zero -800005d8: 6b 00 05 00 +800005d4 vx_wspawn: +800005d4: 6b 10 b5 00 +800005d8: 67 80 00 00 ret -800005dc vx_set_sp: -800005dc: 73 25 50 02 csrr a0, 37 -800005e0: 6b 00 05 00 -800005e4: 97 21 00 00 auipc gp, 2 -800005e8: 93 81 41 c8 addi gp, gp, -892 -800005ec: f3 25 20 02 csrr a1, 34 -800005f0: 93 95 a5 00 slli a1, a1, 10 -800005f4: 73 26 00 02 csrr a2, 32 -800005f8: 13 16 26 00 slli a2, a2, 2 -800005fc: 37 f1 ff 6f lui sp, 458751 -80000600: 33 01 b1 40 sub sp, sp, a1 -80000604: 33 01 c1 00 add sp, sp, a2 -80000608: f3 26 10 02 csrr a3, 33 -8000060c: 63 86 06 00 beqz a3, 12 -80000610: 13 05 00 00 mv a0, zero -80000614: 6b 00 05 00 +800005dc vx_tmc: +800005dc: 6b 00 05 00 +800005e0: 67 80 00 00 ret -80000618 RETURN: +800005e4 vx_barrier: +800005e4: 6b 40 b5 00 +800005e8: 67 80 00 00 ret + +800005ec vx_split: +800005ec: 6b 20 05 00 +800005f0: 67 80 00 00 ret + +800005f4 vx_join: +800005f4: 6b 30 00 00 +800005f8: 67 80 00 00 ret + +800005fc vx_warp_id: +800005fc: 73 25 10 02 csrr a0, 33 +80000600: 67 80 00 00 ret + +80000604 vx_warp_gid: +80000604: 73 25 30 02 csrr a0, 35 +80000608: 67 80 00 00 ret + +8000060c vx_thread_id: +8000060c: 73 25 00 02 csrr a0, 32 +80000610: 67 80 00 00 ret + +80000614 vx_thread_gid: +80000614: 73 25 20 02 csrr a0, 34 80000618: 67 80 00 00 ret -8000061c vx_wspawn: -8000061c: 6b 10 b5 00 +8000061c vx_core_id: +8000061c: 73 25 40 02 csrr a0, 36 80000620: 67 80 00 00 ret -80000624 vx_tmc: -80000624: 6b 00 05 00 +80000624 vx_num_threads: +80000624: 73 25 50 02 csrr a0, 37 80000628: 67 80 00 00 ret -8000062c vx_barrier: -8000062c: 6b 40 b5 00 +8000062c vx_num_warps: +8000062c: 73 25 60 02 csrr a0, 38 80000630: 67 80 00 00 ret -80000634 vx_split: -80000634: 6b 20 05 00 +80000634 vx_num_cores: +80000634: 73 25 70 02 csrr a0, 39 80000638: 67 80 00 00 ret -8000063c vx_join: -8000063c: 6b 30 00 00 +8000063c vx_num_cycles: +8000063c: 73 25 00 b0 csrr a0, mcycle 80000640: 67 80 00 00 ret -80000644 vx_warp_id: -80000644: 73 25 10 02 csrr a0, 33 +80000644 vx_num_instrs: +80000644: 73 25 20 b0 csrr a0, minstret 80000648: 67 80 00 00 ret -8000064c vx_warp_gid: -8000064c: 73 25 30 02 csrr a0, 35 -80000650: 67 80 00 00 ret +8000064c atexit: +8000064c: 93 05 05 00 mv a1, a0 +80000650: 93 06 00 00 mv a3, zero +80000654: 13 06 00 00 mv a2, zero +80000658: 13 05 00 00 mv a0, zero +8000065c: 6f 00 80 20 j 520 -80000654 vx_thread_id: -80000654: 73 25 00 02 csrr a0, 32 -80000658: 67 80 00 00 ret +80000660 exit: +80000660: 13 01 01 ff addi sp, sp, -16 +80000664: 93 05 00 00 mv a1, zero +80000668: 23 24 81 00 sw s0, 8(sp) +8000066c: 23 26 11 00 sw ra, 12(sp) +80000670: 13 04 05 00 mv s0, a0 +80000674: ef 00 80 28 jal 648 +80000678: 03 a5 81 c2 lw a0, -984(gp) +8000067c: 83 27 c5 03 lw a5, 60(a0) +80000680: 63 84 07 00 beqz a5, 8 +80000684: e7 80 07 00 jalr a5 +80000688: 13 05 04 00 mv a0, s0 +8000068c: ef f0 1f f0 jal -256 -8000065c vx_thread_gid: -8000065c: 73 25 20 02 csrr a0, 34 -80000660: 67 80 00 00 ret +80000690 __libc_fini_array: +80000690: 13 01 01 ff addi sp, sp, -16 +80000694: 23 24 81 00 sw s0, 8(sp) +80000698: b7 27 00 80 lui a5, 524290 +8000069c: 37 24 00 80 lui s0, 524290 +800006a0: 13 04 04 a2 addi s0, s0, -1504 +800006a4: 93 87 07 a2 addi a5, a5, -1504 +800006a8: b3 87 87 40 sub a5, a5, s0 +800006ac: 23 22 91 00 sw s1, 4(sp) +800006b0: 23 26 11 00 sw ra, 12(sp) +800006b4: 93 d4 27 40 srai s1, a5, 2 +800006b8: 63 80 04 02 beqz s1, 32 +800006bc: 93 87 c7 ff addi a5, a5, -4 +800006c0: 33 84 87 00 add s0, a5, s0 +800006c4: 83 27 04 00 lw a5, 0(s0) +800006c8: 93 84 f4 ff addi s1, s1, -1 +800006cc: 13 04 c4 ff addi s0, s0, -4 +800006d0: e7 80 07 00 jalr a5 +800006d4: e3 98 04 fe bnez s1, -16 +800006d8: 83 20 c1 00 lw ra, 12(sp) +800006dc: 03 24 81 00 lw s0, 8(sp) +800006e0: 83 24 41 00 lw s1, 4(sp) +800006e4: 13 01 01 01 addi sp, sp, 16 +800006e8: 67 80 00 00 ret -80000664 vx_core_id: -80000664: 73 25 40 02 csrr a0, 36 -80000668: 67 80 00 00 ret +800006ec __libc_init_array: +800006ec: 13 01 01 ff addi sp, sp, -16 +800006f0: 23 24 81 00 sw s0, 8(sp) +800006f4: 23 20 21 01 sw s2, 0(sp) +800006f8: 37 24 00 80 lui s0, 524290 +800006fc: 37 29 00 80 lui s2, 524290 +80000700: 93 07 c4 a1 addi a5, s0, -1508 +80000704: 13 09 c9 a1 addi s2, s2, -1508 +80000708: 33 09 f9 40 sub s2, s2, a5 +8000070c: 23 26 11 00 sw ra, 12(sp) +80000710: 23 22 91 00 sw s1, 4(sp) +80000714: 13 59 29 40 srai s2, s2, 2 +80000718: 63 00 09 02 beqz s2, 32 +8000071c: 13 04 c4 a1 addi s0, s0, -1508 +80000720: 93 04 00 00 mv s1, zero +80000724: 83 27 04 00 lw a5, 0(s0) +80000728: 93 84 14 00 addi s1, s1, 1 +8000072c: 13 04 44 00 addi s0, s0, 4 +80000730: e7 80 07 00 jalr a5 +80000734: e3 18 99 fe bne s2, s1, -16 +80000738: 37 24 00 80 lui s0, 524290 +8000073c: 37 29 00 80 lui s2, 524290 +80000740: 93 07 c4 a1 addi a5, s0, -1508 +80000744: 13 09 09 a2 addi s2, s2, -1504 +80000748: 33 09 f9 40 sub s2, s2, a5 +8000074c: 13 59 29 40 srai s2, s2, 2 +80000750: 63 00 09 02 beqz s2, 32 +80000754: 13 04 c4 a1 addi s0, s0, -1508 +80000758: 93 04 00 00 mv s1, zero +8000075c: 83 27 04 00 lw a5, 0(s0) +80000760: 93 84 14 00 addi s1, s1, 1 +80000764: 13 04 44 00 addi s0, s0, 4 +80000768: e7 80 07 00 jalr a5 +8000076c: e3 18 99 fe bne s2, s1, -16 +80000770: 83 20 c1 00 lw ra, 12(sp) +80000774: 03 24 81 00 lw s0, 8(sp) +80000778: 83 24 41 00 lw s1, 4(sp) +8000077c: 03 29 01 00 lw s2, 0(sp) +80000780: 13 01 01 01 addi sp, sp, 16 +80000784: 67 80 00 00 ret -8000066c vx_num_threads: -8000066c: 73 25 50 02 csrr a0, 37 -80000670: 67 80 00 00 ret +80000788 memset: +80000788: 13 03 f0 00 addi t1, zero, 15 +8000078c: 13 07 05 00 mv a4, a0 +80000790: 63 7e c3 02 bgeu t1, a2, 60 +80000794: 93 77 f7 00 andi a5, a4, 15 +80000798: 63 90 07 0a bnez a5, 160 +8000079c: 63 92 05 08 bnez a1, 132 +800007a0: 93 76 06 ff andi a3, a2, -16 +800007a4: 13 76 f6 00 andi a2, a2, 15 +800007a8: b3 86 e6 00 add a3, a3, a4 +800007ac: 23 20 b7 00 sw a1, 0(a4) +800007b0: 23 22 b7 00 sw a1, 4(a4) +800007b4: 23 24 b7 00 sw a1, 8(a4) +800007b8: 23 26 b7 00 sw a1, 12(a4) +800007bc: 13 07 07 01 addi a4, a4, 16 +800007c0: e3 66 d7 fe bltu a4, a3, -20 +800007c4: 63 14 06 00 bnez a2, 8 +800007c8: 67 80 00 00 ret +800007cc: b3 06 c3 40 sub a3, t1, a2 +800007d0: 93 96 26 00 slli a3, a3, 2 +800007d4: 97 02 00 00 auipc t0, 0 +800007d8: b3 86 56 00 add a3, a3, t0 +800007dc: 67 80 c6 00 jr 12(a3) +800007e0: 23 07 b7 00 sb a1, 14(a4) +800007e4: a3 06 b7 00 sb a1, 13(a4) +800007e8: 23 06 b7 00 sb a1, 12(a4) +800007ec: a3 05 b7 00 sb a1, 11(a4) +800007f0: 23 05 b7 00 sb a1, 10(a4) +800007f4: a3 04 b7 00 sb a1, 9(a4) +800007f8: 23 04 b7 00 sb a1, 8(a4) +800007fc: a3 03 b7 00 sb a1, 7(a4) +80000800: 23 03 b7 00 sb a1, 6(a4) +80000804: a3 02 b7 00 sb a1, 5(a4) +80000808: 23 02 b7 00 sb a1, 4(a4) +8000080c: a3 01 b7 00 sb a1, 3(a4) +80000810: 23 01 b7 00 sb a1, 2(a4) +80000814: a3 00 b7 00 sb a1, 1(a4) +80000818: 23 00 b7 00 sb a1, 0(a4) +8000081c: 67 80 00 00 ret +80000820: 93 f5 f5 0f andi a1, a1, 255 +80000824: 93 96 85 00 slli a3, a1, 8 +80000828: b3 e5 d5 00 or a1, a1, a3 +8000082c: 93 96 05 01 slli a3, a1, 16 +80000830: b3 e5 d5 00 or a1, a1, a3 +80000834: 6f f0 df f6 j -148 +80000838: 93 96 27 00 slli a3, a5, 2 +8000083c: 97 02 00 00 auipc t0, 0 +80000840: b3 86 56 00 add a3, a3, t0 +80000844: 93 82 00 00 mv t0, ra +80000848: e7 80 06 fa jalr -96(a3) +8000084c: 93 80 02 00 mv ra, t0 +80000850: 93 87 07 ff addi a5, a5, -16 +80000854: 33 07 f7 40 sub a4, a4, a5 +80000858: 33 06 f6 00 add a2, a2, a5 +8000085c: e3 78 c3 f6 bgeu t1, a2, -144 +80000860: 6f f0 df f3 j -196 -80000674 vx_num_warps: -80000674: 73 25 60 02 csrr a0, 38 -80000678: 67 80 00 00 ret +80000864 __register_exitproc: +80000864: 03 a7 81 c2 lw a4, -984(gp) +80000868: 83 27 87 14 lw a5, 328(a4) +8000086c: 63 8c 07 04 beqz a5, 88 +80000870: 03 a7 47 00 lw a4, 4(a5) +80000874: 13 08 f0 01 addi a6, zero, 31 +80000878: 63 4e e8 06 blt a6, a4, 124 +8000087c: 13 18 27 00 slli a6, a4, 2 +80000880: 63 06 05 02 beqz a0, 44 +80000884: 33 83 07 01 add t1, a5, a6 +80000888: 23 24 c3 08 sw a2, 136(t1) +8000088c: 83 a8 87 18 lw a7, 392(a5) +80000890: 13 06 10 00 addi a2, zero, 1 +80000894: 33 16 e6 00 sll a2, a2, a4 +80000898: b3 e8 c8 00 or a7, a7, a2 +8000089c: 23 a4 17 19 sw a7, 392(a5) +800008a0: 23 24 d3 10 sw a3, 264(t1) +800008a4: 93 06 20 00 addi a3, zero, 2 +800008a8: 63 04 d5 02 beq a0, a3, 40 +800008ac: 13 07 17 00 addi a4, a4, 1 +800008b0: 23 a2 e7 00 sw a4, 4(a5) +800008b4: b3 87 07 01 add a5, a5, a6 +800008b8: 23 a4 b7 00 sw a1, 8(a5) +800008bc: 13 05 00 00 mv a0, zero +800008c0: 67 80 00 00 ret +800008c4: 93 07 c7 14 addi a5, a4, 332 +800008c8: 23 24 f7 14 sw a5, 328(a4) +800008cc: 6f f0 5f fa j -92 +800008d0: 83 a6 c7 18 lw a3, 396(a5) +800008d4: 13 07 17 00 addi a4, a4, 1 +800008d8: 23 a2 e7 00 sw a4, 4(a5) +800008dc: 33 e6 c6 00 or a2, a3, a2 +800008e0: 23 a6 c7 18 sw a2, 396(a5) +800008e4: b3 87 07 01 add a5, a5, a6 +800008e8: 23 a4 b7 00 sw a1, 8(a5) +800008ec: 13 05 00 00 mv a0, zero +800008f0: 67 80 00 00 ret +800008f4: 13 05 f0 ff addi a0, zero, -1 +800008f8: 67 80 00 00 ret -8000067c vx_num_cores: -8000067c: 73 25 70 02 csrr a0, 39 -80000680: 67 80 00 00 ret - -80000684 vx_num_cycles: -80000684: 73 25 00 c0 rdcycle a0 -80000688: 67 80 00 00 ret - -8000068c vx_num_instrs: -8000068c: 73 25 20 c0 rdinstret a0 -80000690: 67 80 00 00 ret - -80000694 atexit: -80000694: 93 05 05 00 mv a1, a0 -80000698: 93 06 00 00 mv a3, zero -8000069c: 13 06 00 00 mv a2, zero -800006a0: 13 05 00 00 mv a0, zero -800006a4: 6f 00 80 20 j 520 - -800006a8 exit: -800006a8: 13 01 01 ff addi sp, sp, -16 -800006ac: 93 05 00 00 mv a1, zero -800006b0: 23 24 81 00 sw s0, 8(sp) -800006b4: 23 26 11 00 sw ra, 12(sp) -800006b8: 13 04 05 00 mv s0, a0 -800006bc: ef 00 80 28 jal 648 -800006c0: 03 a5 81 c2 lw a0, -984(gp) -800006c4: 83 27 c5 03 lw a5, 60(a0) -800006c8: 63 84 07 00 beqz a5, 8 -800006cc: e7 80 07 00 jalr a5 -800006d0: 13 05 04 00 mv a0, s0 -800006d4: ef f0 1f f0 jal -256 - -800006d8 __libc_fini_array: -800006d8: 13 01 01 ff addi sp, sp, -16 -800006dc: 23 24 81 00 sw s0, 8(sp) -800006e0: b7 27 00 80 lui a5, 524290 -800006e4: 37 24 00 80 lui s0, 524290 -800006e8: 13 04 84 a6 addi s0, s0, -1432 -800006ec: 93 87 87 a6 addi a5, a5, -1432 -800006f0: b3 87 87 40 sub a5, a5, s0 -800006f4: 23 22 91 00 sw s1, 4(sp) -800006f8: 23 26 11 00 sw ra, 12(sp) -800006fc: 93 d4 27 40 srai s1, a5, 2 -80000700: 63 80 04 02 beqz s1, 32 -80000704: 93 87 c7 ff addi a5, a5, -4 -80000708: 33 84 87 00 add s0, a5, s0 -8000070c: 83 27 04 00 lw a5, 0(s0) -80000710: 93 84 f4 ff addi s1, s1, -1 -80000714: 13 04 c4 ff addi s0, s0, -4 -80000718: e7 80 07 00 jalr a5 -8000071c: e3 98 04 fe bnez s1, -16 -80000720: 83 20 c1 00 lw ra, 12(sp) -80000724: 03 24 81 00 lw s0, 8(sp) -80000728: 83 24 41 00 lw s1, 4(sp) -8000072c: 13 01 01 01 addi sp, sp, 16 -80000730: 67 80 00 00 ret - -80000734 __libc_init_array: -80000734: 13 01 01 ff addi sp, sp, -16 -80000738: 23 24 81 00 sw s0, 8(sp) -8000073c: 23 20 21 01 sw s2, 0(sp) -80000740: 37 24 00 80 lui s0, 524290 -80000744: 37 29 00 80 lui s2, 524290 -80000748: 93 07 44 a6 addi a5, s0, -1436 -8000074c: 13 09 49 a6 addi s2, s2, -1436 -80000750: 33 09 f9 40 sub s2, s2, a5 -80000754: 23 26 11 00 sw ra, 12(sp) -80000758: 23 22 91 00 sw s1, 4(sp) -8000075c: 13 59 29 40 srai s2, s2, 2 -80000760: 63 00 09 02 beqz s2, 32 -80000764: 13 04 44 a6 addi s0, s0, -1436 -80000768: 93 04 00 00 mv s1, zero -8000076c: 83 27 04 00 lw a5, 0(s0) -80000770: 93 84 14 00 addi s1, s1, 1 -80000774: 13 04 44 00 addi s0, s0, 4 -80000778: e7 80 07 00 jalr a5 -8000077c: e3 18 99 fe bne s2, s1, -16 -80000780: 37 24 00 80 lui s0, 524290 -80000784: 37 29 00 80 lui s2, 524290 -80000788: 93 07 44 a6 addi a5, s0, -1436 -8000078c: 13 09 89 a6 addi s2, s2, -1432 -80000790: 33 09 f9 40 sub s2, s2, a5 -80000794: 13 59 29 40 srai s2, s2, 2 -80000798: 63 00 09 02 beqz s2, 32 -8000079c: 13 04 44 a6 addi s0, s0, -1436 -800007a0: 93 04 00 00 mv s1, zero -800007a4: 83 27 04 00 lw a5, 0(s0) -800007a8: 93 84 14 00 addi s1, s1, 1 -800007ac: 13 04 44 00 addi s0, s0, 4 -800007b0: e7 80 07 00 jalr a5 -800007b4: e3 18 99 fe bne s2, s1, -16 -800007b8: 83 20 c1 00 lw ra, 12(sp) -800007bc: 03 24 81 00 lw s0, 8(sp) -800007c0: 83 24 41 00 lw s1, 4(sp) -800007c4: 03 29 01 00 lw s2, 0(sp) -800007c8: 13 01 01 01 addi sp, sp, 16 -800007cc: 67 80 00 00 ret - -800007d0 memset: -800007d0: 13 03 f0 00 addi t1, zero, 15 -800007d4: 13 07 05 00 mv a4, a0 -800007d8: 63 7e c3 02 bgeu t1, a2, 60 -800007dc: 93 77 f7 00 andi a5, a4, 15 -800007e0: 63 90 07 0a bnez a5, 160 -800007e4: 63 92 05 08 bnez a1, 132 -800007e8: 93 76 06 ff andi a3, a2, -16 -800007ec: 13 76 f6 00 andi a2, a2, 15 -800007f0: b3 86 e6 00 add a3, a3, a4 -800007f4: 23 20 b7 00 sw a1, 0(a4) -800007f8: 23 22 b7 00 sw a1, 4(a4) -800007fc: 23 24 b7 00 sw a1, 8(a4) -80000800: 23 26 b7 00 sw a1, 12(a4) -80000804: 13 07 07 01 addi a4, a4, 16 -80000808: e3 66 d7 fe bltu a4, a3, -20 -8000080c: 63 14 06 00 bnez a2, 8 -80000810: 67 80 00 00 ret -80000814: b3 06 c3 40 sub a3, t1, a2 -80000818: 93 96 26 00 slli a3, a3, 2 -8000081c: 97 02 00 00 auipc t0, 0 -80000820: b3 86 56 00 add a3, a3, t0 -80000824: 67 80 c6 00 jr 12(a3) -80000828: 23 07 b7 00 sb a1, 14(a4) -8000082c: a3 06 b7 00 sb a1, 13(a4) -80000830: 23 06 b7 00 sb a1, 12(a4) -80000834: a3 05 b7 00 sb a1, 11(a4) -80000838: 23 05 b7 00 sb a1, 10(a4) -8000083c: a3 04 b7 00 sb a1, 9(a4) -80000840: 23 04 b7 00 sb a1, 8(a4) -80000844: a3 03 b7 00 sb a1, 7(a4) -80000848: 23 03 b7 00 sb a1, 6(a4) -8000084c: a3 02 b7 00 sb a1, 5(a4) -80000850: 23 02 b7 00 sb a1, 4(a4) -80000854: a3 01 b7 00 sb a1, 3(a4) -80000858: 23 01 b7 00 sb a1, 2(a4) -8000085c: a3 00 b7 00 sb a1, 1(a4) -80000860: 23 00 b7 00 sb a1, 0(a4) -80000864: 67 80 00 00 ret -80000868: 93 f5 f5 0f andi a1, a1, 255 -8000086c: 93 96 85 00 slli a3, a1, 8 -80000870: b3 e5 d5 00 or a1, a1, a3 -80000874: 93 96 05 01 slli a3, a1, 16 -80000878: b3 e5 d5 00 or a1, a1, a3 -8000087c: 6f f0 df f6 j -148 -80000880: 93 96 27 00 slli a3, a5, 2 -80000884: 97 02 00 00 auipc t0, 0 -80000888: b3 86 56 00 add a3, a3, t0 -8000088c: 93 82 00 00 mv t0, ra -80000890: e7 80 06 fa jalr -96(a3) -80000894: 93 80 02 00 mv ra, t0 -80000898: 93 87 07 ff addi a5, a5, -16 -8000089c: 33 07 f7 40 sub a4, a4, a5 -800008a0: 33 06 f6 00 add a2, a2, a5 -800008a4: e3 78 c3 f6 bgeu t1, a2, -144 -800008a8: 6f f0 df f3 j -196 - -800008ac __register_exitproc: -800008ac: 03 a7 81 c2 lw a4, -984(gp) -800008b0: 83 27 87 14 lw a5, 328(a4) -800008b4: 63 8c 07 04 beqz a5, 88 -800008b8: 03 a7 47 00 lw a4, 4(a5) -800008bc: 13 08 f0 01 addi a6, zero, 31 -800008c0: 63 4e e8 06 blt a6, a4, 124 -800008c4: 13 18 27 00 slli a6, a4, 2 -800008c8: 63 06 05 02 beqz a0, 44 -800008cc: 33 83 07 01 add t1, a5, a6 -800008d0: 23 24 c3 08 sw a2, 136(t1) -800008d4: 83 a8 87 18 lw a7, 392(a5) -800008d8: 13 06 10 00 addi a2, zero, 1 -800008dc: 33 16 e6 00 sll a2, a2, a4 -800008e0: b3 e8 c8 00 or a7, a7, a2 -800008e4: 23 a4 17 19 sw a7, 392(a5) -800008e8: 23 24 d3 10 sw a3, 264(t1) -800008ec: 93 06 20 00 addi a3, zero, 2 -800008f0: 63 04 d5 02 beq a0, a3, 40 -800008f4: 13 07 17 00 addi a4, a4, 1 -800008f8: 23 a2 e7 00 sw a4, 4(a5) -800008fc: b3 87 07 01 add a5, a5, a6 -80000900: 23 a4 b7 00 sw a1, 8(a5) -80000904: 13 05 00 00 mv a0, zero -80000908: 67 80 00 00 ret -8000090c: 93 07 c7 14 addi a5, a4, 332 -80000910: 23 24 f7 14 sw a5, 328(a4) -80000914: 6f f0 5f fa j -92 -80000918: 83 a6 c7 18 lw a3, 396(a5) -8000091c: 13 07 17 00 addi a4, a4, 1 -80000920: 23 a2 e7 00 sw a4, 4(a5) -80000924: 33 e6 c6 00 or a2, a3, a2 -80000928: 23 a6 c7 18 sw a2, 396(a5) -8000092c: b3 87 07 01 add a5, a5, a6 -80000930: 23 a4 b7 00 sw a1, 8(a5) -80000934: 13 05 00 00 mv a0, zero -80000938: 67 80 00 00 ret -8000093c: 13 05 f0 ff addi a0, zero, -1 -80000940: 67 80 00 00 ret - -80000944 __call_exitprocs: -80000944: 13 01 01 fd addi sp, sp, -48 -80000948: 23 2c 41 01 sw s4, 24(sp) -8000094c: 03 aa 81 c2 lw s4, -984(gp) -80000950: 23 20 21 03 sw s2, 32(sp) -80000954: 23 26 11 02 sw ra, 44(sp) -80000958: 03 29 8a 14 lw s2, 328(s4) -8000095c: 23 24 81 02 sw s0, 40(sp) -80000960: 23 22 91 02 sw s1, 36(sp) -80000964: 23 2e 31 01 sw s3, 28(sp) -80000968: 23 2a 51 01 sw s5, 20(sp) -8000096c: 23 28 61 01 sw s6, 16(sp) -80000970: 23 26 71 01 sw s7, 12(sp) -80000974: 23 24 81 01 sw s8, 8(sp) -80000978: 63 00 09 04 beqz s2, 64 -8000097c: 13 0b 05 00 mv s6, a0 -80000980: 93 8b 05 00 mv s7, a1 -80000984: 93 0a 10 00 addi s5, zero, 1 -80000988: 93 09 f0 ff addi s3, zero, -1 -8000098c: 83 24 49 00 lw s1, 4(s2) -80000990: 13 84 f4 ff addi s0, s1, -1 -80000994: 63 42 04 02 bltz s0, 36 -80000998: 93 94 24 00 slli s1, s1, 2 -8000099c: b3 04 99 00 add s1, s2, s1 -800009a0: 63 84 0b 04 beqz s7, 72 -800009a4: 83 a7 44 10 lw a5, 260(s1) -800009a8: 63 80 77 05 beq a5, s7, 64 -800009ac: 13 04 f4 ff addi s0, s0, -1 -800009b0: 93 84 c4 ff addi s1, s1, -4 -800009b4: e3 16 34 ff bne s0, s3, -20 -800009b8: 83 20 c1 02 lw ra, 44(sp) -800009bc: 03 24 81 02 lw s0, 40(sp) -800009c0: 83 24 41 02 lw s1, 36(sp) -800009c4: 03 29 01 02 lw s2, 32(sp) -800009c8: 83 29 c1 01 lw s3, 28(sp) -800009cc: 03 2a 81 01 lw s4, 24(sp) -800009d0: 83 2a 41 01 lw s5, 20(sp) -800009d4: 03 2b 01 01 lw s6, 16(sp) -800009d8: 83 2b c1 00 lw s7, 12(sp) -800009dc: 03 2c 81 00 lw s8, 8(sp) -800009e0: 13 01 01 03 addi sp, sp, 48 -800009e4: 67 80 00 00 ret -800009e8: 83 27 49 00 lw a5, 4(s2) -800009ec: 83 a6 44 00 lw a3, 4(s1) -800009f0: 93 87 f7 ff addi a5, a5, -1 -800009f4: 63 8e 87 04 beq a5, s0, 92 -800009f8: 23 a2 04 00 sw zero, 4(s1) -800009fc: e3 88 06 fa beqz a3, -80 -80000a00: 83 27 89 18 lw a5, 392(s2) -80000a04: 33 97 8a 00 sll a4, s5, s0 -80000a08: 03 2c 49 00 lw s8, 4(s2) -80000a0c: b3 77 f7 00 and a5, a4, a5 -80000a10: 63 92 07 02 bnez a5, 36 +800008fc __call_exitprocs: +800008fc: 13 01 01 fd addi sp, sp, -48 +80000900: 23 2c 41 01 sw s4, 24(sp) +80000904: 03 aa 81 c2 lw s4, -984(gp) +80000908: 23 20 21 03 sw s2, 32(sp) +8000090c: 23 26 11 02 sw ra, 44(sp) +80000910: 03 29 8a 14 lw s2, 328(s4) +80000914: 23 24 81 02 sw s0, 40(sp) +80000918: 23 22 91 02 sw s1, 36(sp) +8000091c: 23 2e 31 01 sw s3, 28(sp) +80000920: 23 2a 51 01 sw s5, 20(sp) +80000924: 23 28 61 01 sw s6, 16(sp) +80000928: 23 26 71 01 sw s7, 12(sp) +8000092c: 23 24 81 01 sw s8, 8(sp) +80000930: 63 00 09 04 beqz s2, 64 +80000934: 13 0b 05 00 mv s6, a0 +80000938: 93 8b 05 00 mv s7, a1 +8000093c: 93 0a 10 00 addi s5, zero, 1 +80000940: 93 09 f0 ff addi s3, zero, -1 +80000944: 83 24 49 00 lw s1, 4(s2) +80000948: 13 84 f4 ff addi s0, s1, -1 +8000094c: 63 42 04 02 bltz s0, 36 +80000950: 93 94 24 00 slli s1, s1, 2 +80000954: b3 04 99 00 add s1, s2, s1 +80000958: 63 84 0b 04 beqz s7, 72 +8000095c: 83 a7 44 10 lw a5, 260(s1) +80000960: 63 80 77 05 beq a5, s7, 64 +80000964: 13 04 f4 ff addi s0, s0, -1 +80000968: 93 84 c4 ff addi s1, s1, -4 +8000096c: e3 16 34 ff bne s0, s3, -20 +80000970: 83 20 c1 02 lw ra, 44(sp) +80000974: 03 24 81 02 lw s0, 40(sp) +80000978: 83 24 41 02 lw s1, 36(sp) +8000097c: 03 29 01 02 lw s2, 32(sp) +80000980: 83 29 c1 01 lw s3, 28(sp) +80000984: 03 2a 81 01 lw s4, 24(sp) +80000988: 83 2a 41 01 lw s5, 20(sp) +8000098c: 03 2b 01 01 lw s6, 16(sp) +80000990: 83 2b c1 00 lw s7, 12(sp) +80000994: 03 2c 81 00 lw s8, 8(sp) +80000998: 13 01 01 03 addi sp, sp, 48 +8000099c: 67 80 00 00 ret +800009a0: 83 27 49 00 lw a5, 4(s2) +800009a4: 83 a6 44 00 lw a3, 4(s1) +800009a8: 93 87 f7 ff addi a5, a5, -1 +800009ac: 63 8e 87 04 beq a5, s0, 92 +800009b0: 23 a2 04 00 sw zero, 4(s1) +800009b4: e3 88 06 fa beqz a3, -80 +800009b8: 83 27 89 18 lw a5, 392(s2) +800009bc: 33 97 8a 00 sll a4, s5, s0 +800009c0: 03 2c 49 00 lw s8, 4(s2) +800009c4: b3 77 f7 00 and a5, a4, a5 +800009c8: 63 92 07 02 bnez a5, 36 +800009cc: e7 80 06 00 jalr a3 +800009d0: 03 27 49 00 lw a4, 4(s2) +800009d4: 83 27 8a 14 lw a5, 328(s4) +800009d8: 63 14 87 01 bne a4, s8, 8 +800009dc: e3 04 f9 f8 beq s2, a5, -120 +800009e0: e3 88 07 f8 beqz a5, -112 +800009e4: 13 89 07 00 mv s2, a5 +800009e8: 6f f0 df f5 j -164 +800009ec: 83 27 c9 18 lw a5, 396(s2) +800009f0: 83 a5 44 08 lw a1, 132(s1) +800009f4: 33 77 f7 00 and a4, a4, a5 +800009f8: 63 1c 07 00 bnez a4, 24 +800009fc: 13 05 0b 00 mv a0, s6 +80000a00: e7 80 06 00 jalr a3 +80000a04: 6f f0 df fc j -52 +80000a08: 23 22 89 00 sw s0, 4(s2) +80000a0c: 6f f0 9f fa j -88 +80000a10: 13 85 05 00 mv a0, a1 80000a14: e7 80 06 00 jalr a3 -80000a18: 03 27 49 00 lw a4, 4(s2) -80000a1c: 83 27 8a 14 lw a5, 328(s4) -80000a20: 63 14 87 01 bne a4, s8, 8 -80000a24: e3 04 f9 f8 beq s2, a5, -120 -80000a28: e3 88 07 f8 beqz a5, -112 -80000a2c: 13 89 07 00 mv s2, a5 -80000a30: 6f f0 df f5 j -164 -80000a34: 83 27 c9 18 lw a5, 396(s2) -80000a38: 83 a5 44 08 lw a1, 132(s1) -80000a3c: 33 77 f7 00 and a4, a4, a5 -80000a40: 63 1c 07 00 bnez a4, 24 -80000a44: 13 05 0b 00 mv a0, s6 -80000a48: e7 80 06 00 jalr a3 -80000a4c: 6f f0 df fc j -52 -80000a50: 23 22 89 00 sw s0, 4(s2) -80000a54: 6f f0 9f fa j -88 -80000a58: 13 85 05 00 mv a0, a1 -80000a5c: e7 80 06 00 jalr a3 -80000a60: 6f f0 9f fb j -72 +80000a18: 6f f0 9f fb j -72 Disassembly of section .init_array: -80001a64 __preinit_array_start: -80001a64: 48 00 -80001a66: 00 80 +80001a1c __preinit_array_start: +80001a1c: 48 00 +80001a1e: 00 80 Disassembly of section .data: -80001a68 impure_data: -80001a68: 00 00 -80001a6a: 00 00 -80001a6c: 54 1d -80001a6e: 00 80 -80001a70: bc 1d -80001a72: 00 80 -80001a74: 24 1e -80001a76: 00 80 +80001a20 impure_data: +80001a20: 00 00 +80001a22: 00 00 +80001a24: 0c 1d +80001a26: 00 80 +80001a28: 74 1d +80001a2a: 00 80 +80001a2c: dc 1d +80001a2e: 00 80 ... -80001b10: 01 00 -80001b12: 00 00 -80001b14: 00 00 -80001b16: 00 00 -80001b18: 0e 33 -80001b1a: cd ab -80001b1c: 34 12 -80001b1e: 6d e6 -80001b20: ec de -80001b22: 05 00 -80001b24: 0b 00 00 00 +80001ac8: 01 00 +80001aca: 00 00 +80001acc: 00 00 +80001ace: 00 00 +80001ad0: 0e 33 +80001ad2: cd ab +80001ad4: 34 12 +80001ad6: 6d e6 +80001ad8: ec de +80001ada: 05 00 +80001adc: 0b 00 00 00 ... Disassembly of section .sdata: -80001e90 _global_impure_ptr: -80001e90: 68 1a -80001e92: 00 80 +80001e48 _global_impure_ptr: +80001e48: 20 1a +80001e4a: 00 80 Disassembly of section .bss: -80001e94 g_wspawn_args: +80001e4c g_wspawn_args: ... Disassembly of section .comment: @@ -874,28 +856,28 @@ Disassembly of section .symtab: 2c: 03 00 02 00 lb zero, 0(tp) 30: 00 00 32: 00 00 - 34: 64 1a + 34: 1c 1a 36: 00 80 38: 00 00 3a: 00 00 3c: 03 00 03 00 lb zero, 0(t1) 40: 00 00 42: 00 00 - 44: 68 1a + 44: 20 1a 46: 00 80 48: 00 00 4a: 00 00 4c: 03 00 04 00 lb zero, 0(s0) 50: 00 00 52: 00 00 - 54: 90 1e + 54: 48 1e 56: 00 80 58: 00 00 5a: 00 00 5c: 03 00 05 00 lb zero, 0(a0) 60: 00 00 62: 00 00 - 64: 94 1e + 64: 4c 1e 66: 00 80 68: 00 00 6a: 00 00 @@ -911,7 +893,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: 18 06 + a4: d0 05 a6: 00 80 a8: 00 00 aa: 00 00 @@ -968,7 +950,7 @@ Disassembly of section .symtab: 14e: f1 ff 150: 85 00 152: 00 00 - 154: 68 1a + 154: 20 1a 156: 00 80 158: 28 04 15a: 00 00 @@ -979,7 +961,7 @@ Disassembly of section .symtab: 16e: f1 ff 170: 91 00 172: 00 00 - 174: 68 1a + 174: 20 1a 176: 00 80 178: 00 00 17a: 00 00 @@ -987,7 +969,7 @@ Disassembly of section .symtab: 17e: 04 00 180: a2 00 182: 00 00 - 184: 68 1a + 184: 20 1a 186: 00 80 188: 00 00 18a: 00 00 @@ -995,35 +977,35 @@ Disassembly of section .symtab: 18e: 04 00 190: b5 00 192: 00 00 - 194: 68 1a + 194: 20 1a 196: 00 80 198: 00 00 19a: 00 00 19c: 00 00 19e: 03 00 c6 00 lb zero, 12(a2) 1a2: 00 00 - 1a4: 64 1a + 1a4: 1c 1a 1a6: 00 80 1a8: 00 00 1aa: 00 00 1ac: 00 00 1ae: 03 00 da 00 lb zero, 13(s4) 1b2: 00 00 - 1b4: 64 1a + 1b4: 1c 1a 1b6: 00 80 1b8: 00 00 1ba: 00 00 1bc: 00 00 1be: 03 00 ed 00 lb zero, 14(s10) 1c2: 00 00 - 1c4: 64 1a + 1c4: 1c 1a 1c6: 00 80 1c8: 00 00 1ca: 00 00 1cc: 00 00 1ce: 03 00 03 01 lb zero, 16(t1) 1d2: 00 00 - 1d4: 68 22 + 1d4: 20 22 1d6: 00 80 1d8: 00 00 1da: 00 00 @@ -1031,7 +1013,7 @@ Disassembly of section .symtab: 1de: f1 ff 1e0: 15 01 1e2: 00 00 - 1e4: 64 06 + 1e4: 1c 06 1e6: 00 80 1e8: 00 00 1ea: 00 00 @@ -1039,7 +1021,7 @@ Disassembly of section .symtab: 1ee: 02 00 1f0: 20 01 1f2: 00 00 - 1f4: 1c 06 + 1f4: d4 05 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1049,21 +1031,21 @@ Disassembly of section .symtab: 202: 00 00 204: 60 00 206: 00 80 - 208: 3c 01 + 208: 48 01 20a: 00 00 20c: 12 00 20e: 02 00 210: 40 01 212: 00 00 - 214: 94 1e + 214: 4c 1e 216: 00 80 - 218: 04 00 + 218: 20 00 21a: 00 00 21c: 11 00 21e: 06 00 220: 4e 01 222: 00 00 - 224: 24 06 + 224: dc 05 226: 00 80 228: 00 00 22a: 00 00 @@ -1071,7 +1053,7 @@ Disassembly of section .symtab: 22e: 02 00 230: 55 01 232: 00 00 - 234: 90 1e + 234: 48 1e 236: 00 80 238: 00 00 23a: 00 00 @@ -1079,7 +1061,7 @@ Disassembly of section .symtab: 23e: 05 00 240: 65 01 242: 00 00 - 244: 3c 06 + 244: f4 05 246: 00 80 248: 00 00 24a: 00 00 @@ -1087,7 +1069,7 @@ Disassembly of section .symtab: 24e: 02 00 250: 6d 01 252: 00 00 - 254: 74 06 + 254: 2c 06 256: 00 80 258: 00 00 25a: 00 00 @@ -1097,12 +1079,12 @@ Disassembly of section .symtab: 262: 00 00 264: 3c 02 266: 00 80 - 268: cc 01 + 268: 84 01 26a: 00 00 26c: 12 00 26e: 02 00 270: 87 01 00 00 - 274: 34 06 + 274: ec 05 276: 00 80 278: 00 00 27a: 00 00 @@ -1110,14 +1092,14 @@ Disassembly of section .symtab: 27e: 02 00 280: 90 01 282: 00 00 - 284: 90 1e + 284: 48 1e 286: 00 80 288: 04 00 28a: 00 00 28c: 11 00 28e: 05 00 290: a3 01 00 00 sb zero, 3(zero) - 294: 34 07 + 294: ec 06 296: 00 80 298: 9c 00 29a: 00 00 @@ -1125,7 +1107,7 @@ Disassembly of section .symtab: 29e: 02 00 2a0: b5 01 2a2: 00 00 - 2a4: 6c 06 + 2a4: 24 06 2a6: 00 80 2a8: 00 00 2aa: 00 00 @@ -1133,14 +1115,14 @@ Disassembly of section .symtab: 2ae: 02 00 2b0: c4 01 2b2: 00 00 - 2b4: 44 06 + 2b4: fc 05 2b6: 00 80 2b8: 00 00 2ba: 00 00 2bc: 12 00 2be: 02 00 2c0: cf 01 00 00 fnmadd.s ft3, ft0, ft0, ft0, rne - 2c4: 54 06 + 2c4: 0c 06 2c6: 00 80 2c8: 00 00 2ca: 00 00 @@ -1148,7 +1130,7 @@ Disassembly of section .symtab: 2ce: 02 00 2d0: dc 01 2d2: 00 00 - 2d4: d8 06 + 2d4: 90 06 2d6: 00 80 2d8: 5c 00 2da: 00 00 @@ -1156,7 +1138,7 @@ Disassembly of section .symtab: 2de: 02 00 2e0: ee 01 2e2: 00 00 - 2e4: 38 04 + 2e4: f0 03 2e6: 00 80 2e8: 94 00 2ea: 00 00 @@ -1164,14 +1146,14 @@ Disassembly of section .symtab: 2ee: 02 00 2f0: 01 02 2f2: 00 00 - 2f4: dc 05 + 2f4: 94 05 2f6: 00 80 2f8: 00 00 2fa: 00 00 2fc: 12 00 2fe: 02 00 300: 0b 02 00 00 - 304: 2c 06 + 304: e4 05 306: 00 80 308: 00 00 30a: 00 00 @@ -1179,7 +1161,7 @@ Disassembly of section .symtab: 30e: 02 00 310: 16 02 312: 00 00 - 314: 44 09 + 314: fc 08 316: 00 80 318: 20 01 31a: 00 00 @@ -1194,28 +1176,28 @@ Disassembly of section .symtab: 32c: 12 00 32e: 01 00 330: 27 02 00 00 - 334: ac 08 + 334: 64 08 336: 00 80 338: 98 00 33a: 00 00 33c: 12 00 33e: 02 00 340: 3b 02 00 00 - 344: 98 1e + 344: 6c 1e 346: 00 80 348: 00 00 34a: 00 00 34c: 10 00 34e: 06 00 350: 47 02 00 00 fmsub.s ft4, ft0, ft0, ft0, rne - 354: 94 1e + 354: 4c 1e 356: 00 80 358: 00 00 35a: 00 00 35c: 10 00 35e: 06 00 360: 53 02 00 00 fadd.s ft4, ft0, ft0, rne - 364: d0 07 + 364: 88 07 366: 00 80 368: dc 00 36a: 00 00 @@ -1223,14 +1205,14 @@ Disassembly of section .symtab: 36e: 02 00 370: 5a 02 372: 00 00 - 374: 08 04 + 374: c0 03 376: 00 80 378: 30 00 37a: 00 00 37c: 12 00 37e: 02 00 380: 5f 02 00 00 - 384: 54 05 + 384: 0c 05 386: 00 80 388: 80 00 38a: 00 00 @@ -1238,14 +1220,14 @@ Disassembly of section .symtab: 38e: 02 00 390: 81 02 392: 00 00 - 394: 84 06 + 394: 3c 06 396: 00 80 398: 00 00 39a: 00 00 39c: 12 00 39e: 02 00 3a0: 8f 02 00 00 - 3a4: 94 06 + 3a4: 4c 06 3a6: 00 80 3a8: 14 00 3aa: 00 00 @@ -1253,7 +1235,7 @@ Disassembly of section .symtab: 3ae: 02 00 3b0: 96 02 3b2: 00 00 - 3b4: 5c 06 + 3b4: 14 06 3b6: 00 80 3b8: 00 00 3ba: 00 00 @@ -1261,7 +1243,7 @@ Disassembly of section .symtab: 3be: 02 00 3c0: a4 02 3c2: 00 00 - 3c4: 7c 06 + 3c4: 34 06 3c6: 00 80 3c8: 00 00 3ca: 00 00 @@ -1269,7 +1251,7 @@ Disassembly of section .symtab: 3ce: 02 00 3d0: b1 02 3d2: 00 00 - 3d4: 4c 06 + 3d4: 04 06 3d6: 00 80 3d8: 00 00 3da: 00 00 @@ -1277,15 +1259,15 @@ Disassembly of section .symtab: 3de: 02 00 3e0: bd 02 3e2: 00 00 - 3e4: 9c 01 + 3e4: a8 01 3e6: 00 80 - 3e8: a0 00 + 3e8: 94 00 3ea: 00 00 3ec: 12 00 3ee: 02 00 3f0: d6 02 3f2: 00 00 - 3f4: 68 1a + 3f4: 20 1a 3f6: 00 80 3f8: 00 00 3fa: 00 00 @@ -1293,7 +1275,7 @@ Disassembly of section .symtab: 3fe: 04 00 400: e5 02 402: 00 00 - 404: 94 1e + 404: 4c 1e 406: 00 80 408: 00 00 40a: 00 00 @@ -1301,7 +1283,7 @@ Disassembly of section .symtab: 40e: 05 00 410: 9d 00 412: 00 00 - 414: 98 1e + 414: 6c 1e 416: 00 80 418: 00 00 41a: 00 00 @@ -1309,7 +1291,7 @@ Disassembly of section .symtab: 41e: 06 00 420: ed 02 422: 00 00 - 424: a8 06 + 424: 60 06 426: 00 80 428: 30 00 42a: 00 00 @@ -1317,7 +1299,7 @@ Disassembly of section .symtab: 42e: 02 00 430: ec 02 432: 00 00 - 434: d4 05 + 434: 8c 05 436: 00 80 438: 00 00 43a: 00 00 @@ -1325,14 +1307,14 @@ Disassembly of section .symtab: 43e: 02 00 440: f2 02 442: 00 00 - 444: cc 04 + 444: 84 04 446: 00 80 448: 88 00 44a: 00 00 44c: 12 00 44e: 02 00 450: 0f 03 00 00 - 454: 8c 06 + 454: 44 06 456: 00 80 458: 00 00 45a: 00 00 @@ -1372,14 +1354,13 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 31 39 - 48: 2d 63 - 4a: 65 2d - 4c: 31 65 - 4e: 2d 37 - 50: 64 2d - 52: 62 39 - 54: 2e 63 + 46: 36 64 + 48: 2d 37 + 4a: 64 2d + 4c: 61 30 + 4e: 2d 65 + 50: 38 2d + 52: 37 63 2e 63 lui t1, 406246 56: 00 70 58: 61 72 5a: 61 6c diff --git a/benchmarks/opencl/sfilter/Makefile b/benchmarks/opencl/sfilter/Makefile index 714190b8..daa0331b 100644 --- a/benchmarks/opencl/sfilter/Makefile +++ b/benchmarks/opencl/sfilter/Makefile @@ -54,7 +54,7 @@ clean: rm -rf $(PROJECT) *.o .depend clean-all: clean - rm *.pocl *.dump + rm -rf *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/sfilter/kernel.pocl b/benchmarks/opencl/sfilter/kernel.pocl index 5e4a322e..4653aa3f 100644 Binary files a/benchmarks/opencl/sfilter/kernel.pocl and b/benchmarks/opencl/sfilter/kernel.pocl differ diff --git a/benchmarks/opencl/sfilter/sfilter.dump b/benchmarks/opencl/sfilter/sfilter.dump index 9cd90cb4..dbed07a7 100644 --- a/benchmarks/opencl/sfilter/sfilter.dump +++ b/benchmarks/opencl/sfilter/sfilter.dump @@ -1,28 +1,28 @@ -/tmp/pocl_vortex_kernel-3d-4f-8a-16-33.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-ff-da-7c-29-27.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 15 00 00 auipc a1, 1 -80000004: 93 85 c5 b6 addi a1, a1, -1172 +80000004: 93 85 45 b2 addi a1, a1, -1244 80000008: 73 25 60 02 csrr a0, 38 8000000c: 6b 10 b5 00 -80000010: ef 00 d0 35 jal 2908 +80000010: ef 00 50 31 jal 2836 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 13 85 c1 c2 addi a0, gp, -980 -80000020: 13 86 01 c3 addi a2, gp, -976 +80000020: 13 86 c1 c4 addi a2, gp, -948 80000024: 33 06 a6 40 sub a2, a2, a0 80000028: 93 05 00 00 mv a1, zero -8000002c: ef 00 50 53 jal 3380 +8000002c: ef 00 d0 4e jal 3308 80000030: 17 15 00 00 auipc a0, 1 -80000034: 13 05 85 c3 addi a0, a0, -968 -80000038: ef 00 d0 3e jal 3052 -8000003c: ef 00 90 48 jal 3208 -80000040: ef 00 80 3c jal 968 -80000044: 6f 00 50 3f j 3060 +80000034: 13 05 05 bf addi a0, a0, -1040 +80000038: ef 00 50 3a jal 2980 +8000003c: ef 00 10 44 jal 3136 +80000040: ef 00 00 38 jal 896 +80000044: 6f 00 d0 3a j 2988 Disassembly of section .text: @@ -30,8 +30,8 @@ Disassembly of section .text: 80000048: 93 07 00 00 mv a5, zero 8000004c: 63 88 07 00 beqz a5, 16 80000050: 37 15 00 80 lui a0, 524289 -80000054: 13 05 85 c6 addi a0, a0, -920 -80000058: 6f 00 d0 3c j 3020 +80000054: 13 05 05 c2 addi a0, a0, -992 +80000058: 6f 00 50 38 j 2948 8000005c: 67 80 00 00 ret 80000060 kernel_spawn_run_warp: @@ -46,116 +46,116 @@ Disassembly of section .text: 80000080: 23 28 61 01 sw s6, 16(sp) 80000084: 23 26 71 01 sw s7, 12(sp) 80000088: 23 24 81 01 sw s8, 8(sp) -8000008c: ef 00 10 37 jal 2928 -80000090: ef 00 50 32 jal 2852 -80000094: ef 00 90 36 jal 2920 -80000098: 83 a5 c1 c2 lw a1, -980(gp) -8000009c: 83 a5 05 00 lw a1, 0(a1) -800000a0: 83 aa 05 00 lw s5, 0(a1) -800000a4: 03 ab 45 00 lw s6, 4(a1) +8000008c: ef 00 90 32 jal 2856 +80000090: ef 00 d0 2d jal 2780 +80000094: ef 00 90 31 jal 2840 +80000098: 93 04 05 00 mv s1, a0 +8000009c: ef 00 10 2f jal 2800 +800000a0: 93 09 05 00 mv s3, a0 +800000a4: ef 00 90 2f jal 2808 800000a8: 13 09 05 00 mv s2, a0 -800000ac: ef 00 90 32 jal 2856 -800000b0: 93 09 05 00 mv s3, a0 -800000b4: ef 00 10 33 jal 2864 -800000b8: 03 a8 c1 c2 lw a6, -980(gp) -800000bc: 03 27 48 01 lw a4, 20(a6) -800000c0: 83 25 08 01 lw a1, 16(a6) -800000c4: 93 06 07 00 mv a3, a4 -800000c8: 63 44 37 01 blt a4, s3, 8 -800000cc: 93 86 09 00 mv a3, s3 -800000d0: 33 a7 e9 00 slt a4, s3, a4 -800000d4: 33 87 e5 00 add a4, a1, a4 -800000d8: 93 07 10 00 addi a5, zero, 1 -800000dc: 63 46 f7 08 blt a4, a5, 140 -800000e0: 33 0a 5b 03 mul s4, s6, s5 -800000e4: 83 27 c8 00 lw a5, 12(a6) -800000e8: 13 0c f7 ff addi s8, a4, -1 -800000ec: b3 85 b9 02 mul a1, s3, a1 -800000f0: b3 85 b6 00 add a1, a3, a1 -800000f4: b3 05 b9 02 mul a1, s2, a1 -800000f8: b3 85 b7 00 add a1, a5, a1 -800000fc: 33 05 e5 02 mul a0, a0, a4 -80000100: b3 84 a5 00 add s1, a1, a0 -80000104: 33 09 60 41 neg s2, s6 -80000108: 33 0b 40 41 neg s6, s4 -8000010c: 33 c7 44 03 div a4, s1, s4 -80000110: 83 25 08 00 lw a1, 0(a6) -80000114: 33 05 eb 02 mul a0, s6, a4 -80000118: 33 85 a4 00 add a0, s1, a0 -8000011c: b3 46 55 03 div a3, a0, s5 -80000120: 03 a5 c5 00 lw a0, 12(a1) -80000124: b3 07 e9 02 mul a5, s2, a4 -80000128: b3 87 d7 40 sub a5, a5, a3 -8000012c: b3 87 fa 02 mul a5, s5, a5 -80000130: b3 08 f5 00 add a7, a0, a5 -80000134: 03 a4 05 01 lw s0, 16(a1) -80000138: 03 a6 45 01 lw a2, 20(a1) -8000013c: 83 27 48 00 lw a5, 4(a6) -80000140: 03 25 88 00 lw a0, 8(a6) -80000144: b3 06 d4 00 add a3, s0, a3 -80000148: 33 07 e6 00 add a4, a2, a4 -8000014c: 33 86 14 01 add a2, s1, a7 -80000150: e7 80 07 00 jalr a5 -80000154: 63 0a 0c 00 beqz s8, 20 -80000158: 03 a8 c1 c2 lw a6, -980(gp) -8000015c: 13 0c fc ff addi s8, s8, -1 -80000160: 93 84 14 00 addi s1, s1, 1 -80000164: 6f f0 9f fa j -88 -80000168: 13 b5 19 00 seqz a0, s3 -8000016c: 03 2c 81 00 lw s8, 8(sp) -80000170: 83 2b c1 00 lw s7, 12(sp) -80000174: 03 2b 01 01 lw s6, 16(sp) -80000178: 83 2a 41 01 lw s5, 20(sp) -8000017c: 03 2a 81 01 lw s4, 24(sp) -80000180: 83 29 c1 01 lw s3, 28(sp) -80000184: 03 29 01 02 lw s2, 32(sp) -80000188: 83 24 41 02 lw s1, 36(sp) -8000018c: 03 24 81 02 lw s0, 40(sp) -80000190: 83 20 c1 02 lw ra, 44(sp) -80000194: 13 01 01 03 addi sp, sp, 48 -80000198: 6f 00 d0 21 j 2588 +800000ac: ef 00 90 30 jal 2824 +800000b0: 93 85 c1 c2 addi a1, gp, -980 +800000b4: 13 96 24 00 slli a2, s1, 2 +800000b8: b3 05 b6 00 add a1, a2, a1 +800000bc: 03 ab 05 00 lw s6, 0(a1) +800000c0: 83 25 4b 01 lw a1, 20(s6) +800000c4: 03 26 0b 01 lw a2, 16(s6) +800000c8: 93 86 05 00 mv a3, a1 +800000cc: 63 c4 35 01 blt a1, s3, 8 +800000d0: 93 86 09 00 mv a3, s3 +800000d4: b3 a5 b9 00 slt a1, s3, a1 +800000d8: 33 07 b6 00 add a4, a2, a1 +800000dc: 93 05 10 00 addi a1, zero, 1 +800000e0: 63 4a b7 08 blt a4, a1, 148 +800000e4: 83 25 0b 00 lw a1, 0(s6) +800000e8: 83 aa 05 00 lw s5, 0(a1) +800000ec: 83 a7 45 00 lw a5, 4(a1) +800000f0: 83 24 cb 00 lw s1, 12(s6) +800000f4: 33 8a 57 03 mul s4, a5, s5 +800000f8: 13 0c f7 ff addi s8, a4, -1 +800000fc: 33 86 c9 02 mul a2, s3, a2 +80000100: 33 86 c6 00 add a2, a3, a2 +80000104: 33 05 c5 02 mul a0, a0, a2 +80000108: 33 85 a4 00 add a0, s1, a0 +8000010c: 33 06 e9 02 mul a2, s2, a4 +80000110: b3 04 c5 00 add s1, a0, a2 +80000114: 33 09 f0 40 neg s2, a5 +80000118: b3 0b 40 41 neg s7, s4 +8000011c: 33 c6 44 03 div a2, s1, s4 +80000120: 33 85 cb 02 mul a0, s7, a2 +80000124: 33 85 a4 00 add a0, s1, a0 +80000128: b3 46 55 03 div a3, a0, s5 +8000012c: 03 a5 c5 00 lw a0, 12(a1) +80000130: 33 07 c9 02 mul a4, s2, a2 +80000134: 33 07 d7 40 sub a4, a4, a3 +80000138: 33 87 ea 02 mul a4, s5, a4 +8000013c: 33 08 e5 00 add a6, a0, a4 +80000140: 03 a7 05 01 lw a4, 16(a1) +80000144: 03 a4 45 01 lw s0, 20(a1) +80000148: 83 27 4b 00 lw a5, 4(s6) +8000014c: 03 25 8b 00 lw a0, 8(s6) +80000150: b3 06 d7 00 add a3, a4, a3 +80000154: 33 07 c4 00 add a4, s0, a2 +80000158: 33 86 04 01 add a2, s1, a6 +8000015c: e7 80 07 00 jalr a5 +80000160: 63 0a 0c 00 beqz s8, 20 +80000164: 83 25 0b 00 lw a1, 0(s6) +80000168: 13 0c fc ff addi s8, s8, -1 +8000016c: 93 84 14 00 addi s1, s1, 1 +80000170: 6f f0 df fa j -84 +80000174: 13 b5 19 00 seqz a0, s3 +80000178: 03 2c 81 00 lw s8, 8(sp) +8000017c: 83 2b c1 00 lw s7, 12(sp) +80000180: 03 2b 01 01 lw s6, 16(sp) +80000184: 83 2a 41 01 lw s5, 20(sp) +80000188: 03 2a 81 01 lw s4, 24(sp) +8000018c: 83 29 c1 01 lw s3, 28(sp) +80000190: 03 29 01 02 lw s2, 32(sp) +80000194: 83 24 41 02 lw s1, 36(sp) +80000198: 03 24 81 02 lw s0, 40(sp) +8000019c: 83 20 c1 02 lw ra, 44(sp) +800001a0: 13 01 01 03 addi sp, sp, 48 +800001a4: 6f 00 90 1c j 2504 -8000019c kernel_spawn_run_threads: -8000019c: 13 01 01 fe addi sp, sp, -32 -800001a0: 23 2e 11 00 sw ra, 28(sp) -800001a4: 23 2c 81 00 sw s0, 24(sp) -800001a8: 23 2a 91 00 sw s1, 20(sp) -800001ac: 23 28 21 01 sw s2, 16(sp) -800001b0: 23 26 31 01 sw s3, 12(sp) -800001b4: 93 89 05 00 mv s3, a1 -800001b8: ef 00 d0 1f jal 2556 -800001bc: 03 a5 c1 c2 lw a0, -980(gp) -800001c0: 03 25 05 00 lw a0, 0(a0) -800001c4: 83 24 05 00 lw s1, 0(a0) -800001c8: 03 25 45 00 lw a0, 4(a0) -800001cc: 33 04 95 02 mul s0, a0, s1 -800001d0: ef 00 d0 21 jal 2588 -800001d4: 33 05 35 01 add a0, a0, s3 -800001d8: 03 a6 c1 c2 lw a2, -980(gp) -800001dc: 33 47 85 02 div a4, a0, s0 -800001e0: b3 05 87 02 mul a1, a4, s0 -800001e4: 33 05 b5 40 sub a0, a0, a1 -800001e8: 83 25 06 00 lw a1, 0(a2) -800001ec: b3 46 95 02 div a3, a0, s1 -800001f0: b3 87 96 02 mul a5, a3, s1 -800001f4: 33 08 f5 40 sub a6, a0, a5 -800001f8: 83 a4 c5 00 lw s1, 12(a1) -800001fc: 03 a4 05 01 lw s0, 16(a1) -80000200: 83 a7 45 01 lw a5, 20(a1) -80000204: 83 28 46 00 lw a7, 4(a2) -80000208: 03 25 86 00 lw a0, 8(a2) -8000020c: 33 06 98 00 add a2, a6, s1 -80000210: b3 06 d4 00 add a3, s0, a3 -80000214: 33 87 e7 00 add a4, a5, a4 -80000218: e7 80 08 00 jalr a7 -8000021c: 13 05 10 00 addi a0, zero, 1 -80000220: 83 29 c1 00 lw s3, 12(sp) -80000224: 03 29 01 01 lw s2, 16(sp) -80000228: 83 24 41 01 lw s1, 20(sp) -8000022c: 03 24 81 01 lw s0, 24(sp) -80000230: 83 20 c1 01 lw ra, 28(sp) -80000234: 13 01 01 02 addi sp, sp, 32 -80000238: 6f 00 d0 17 j 2428 +800001a8 kernel_spawn_run_threads: +800001a8: 13 01 01 ff addi sp, sp, -16 +800001ac: 23 26 11 00 sw ra, 12(sp) +800001b0: 23 24 81 00 sw s0, 8(sp) +800001b4: ef 00 90 1b jal 2488 +800001b8: ef 00 50 1f jal 2548 +800001bc: 13 04 05 00 mv s0, a0 +800001c0: ef 00 50 1e jal 2532 +800001c4: 93 85 c1 c2 addi a1, gp, -980 +800001c8: 13 16 24 00 slli a2, s0, 2 +800001cc: b3 05 b6 00 add a1, a2, a1 +800001d0: 03 a6 05 00 lw a2, 0(a1) +800001d4: 83 25 06 00 lw a1, 0(a2) +800001d8: 83 26 c6 00 lw a3, 12(a2) +800001dc: 03 a7 05 00 lw a4, 0(a1) +800001e0: 83 a7 45 00 lw a5, 4(a1) +800001e4: 33 85 a6 00 add a0, a3, a0 +800001e8: b3 86 e7 02 mul a3, a5, a4 +800001ec: b3 47 d5 02 div a5, a0, a3 +800001f0: b3 86 d7 02 mul a3, a5, a3 +800001f4: 03 a4 c5 00 lw s0, 12(a1) +800001f8: 33 05 d5 40 sub a0, a0, a3 +800001fc: b3 46 e5 02 div a3, a0, a4 +80000200: 33 88 e6 02 mul a6, a3, a4 +80000204: b3 08 a4 00 add a7, s0, a0 +80000208: 03 a7 05 01 lw a4, 16(a1) +8000020c: 03 a4 45 01 lw s0, 20(a1) +80000210: 83 22 46 00 lw t0, 4(a2) +80000214: 03 25 86 00 lw a0, 8(a2) +80000218: 33 86 08 41 sub a2, a7, a6 +8000021c: b3 06 d7 00 add a3, a4, a3 +80000220: 33 07 f4 00 add a4, s0, a5 +80000224: e7 80 02 00 jalr t0 +80000228: 13 05 10 00 addi a0, zero, 1 +8000022c: 03 24 81 00 lw s0, 8(sp) +80000230: 83 20 c1 00 lw ra, 12(sp) +80000234: 13 01 01 01 addi sp, sp, 16 +80000238: 6f 00 50 13 j 2356 8000023c kernel_spawn: 8000023c: 13 01 01 fc addi sp, sp, -64 @@ -167,933 +167,915 @@ Disassembly of section .text: 80000254: 23 24 41 03 sw s4, 40(sp) 80000258: 23 22 51 03 sw s5, 36(sp) 8000025c: 23 20 61 03 sw s6, 32(sp) -80000260: 13 0b 05 00 mv s6, a0 -80000264: 03 25 05 00 lw a0, 0(a0) -80000268: 83 26 4b 00 lw a3, 4(s6) -8000026c: 03 27 8b 00 lw a4, 8(s6) -80000270: 13 09 06 00 mv s2, a2 -80000274: 93 89 05 00 mv s3, a1 -80000278: 33 85 a6 02 mul a0, a3, a0 -8000027c: b3 04 e5 02 mul s1, a0, a4 -80000280: ef 00 d0 18 jal 2444 -80000284: 13 04 05 00 mv s0, a0 -80000288: ef 00 d0 17 jal 2428 +80000260: 23 2e 71 01 sw s7, 28(sp) +80000264: 23 2c 81 01 sw s8, 24(sp) +80000268: 93 04 05 00 mv s1, a0 +8000026c: 83 2b 05 00 lw s7, 0(a0) +80000270: 03 24 45 00 lw s0, 4(a0) +80000274: 03 2c 85 00 lw s8, 8(a0) +80000278: 13 09 06 00 mv s2, a2 +8000027c: 93 89 05 00 mv s3, a1 +80000280: ef 00 50 14 jal 2372 +80000284: 13 0b 05 00 mv s6, a0 +80000288: ef 00 50 13 jal 2356 8000028c: 13 0a 05 00 mv s4, a0 -80000290: ef 00 d0 16 jal 2412 +80000290: ef 00 50 12 jal 2340 80000294: 93 0a 05 00 mv s5, a0 -80000298: ef 00 d0 15 jal 2396 -8000029c: b3 85 4a 03 mul a1, s5, s4 -800002a0: 13 06 10 00 addi a2, zero, 1 -800002a4: 63 c8 95 00 blt a1, s1, 16 -800002a8: 63 5a 86 00 bge a2, s0, 20 -800002ac: 63 4c c5 00 blt a0, a2, 24 -800002b0: 6f 00 00 13 j 304 -800002b4: 33 c6 b4 02 div a2, s1, a1 -800002b8: e3 4a 86 fe blt a2, s0, -12 -800002bc: 13 06 04 00 mv a2, s0 -800002c0: 63 50 c5 12 bge a0, a2, 288 -800002c4: 93 06 f4 ff addi a3, s0, -1 -800002c8: b3 c5 c4 02 div a1, s1, a2 -800002cc: 63 0e d5 00 beq a0, a3, 28 -800002d0: 13 06 00 00 mv a2, zero -800002d4: b3 06 b6 00 add a3, a2, a1 -800002d8: 33 c6 56 03 div a2, a3, s5 -800002dc: 13 07 00 00 mv a4, zero -800002e0: 63 50 46 03 bge a2, s4, 32 -800002e4: 6f 00 00 02 j 32 -800002e8: 33 86 c5 02 mul a2, a1, a2 -800002ec: 33 86 c4 40 sub a2, s1, a2 -800002f0: b3 06 b6 00 add a3, a2, a1 -800002f4: 33 c6 56 03 div a2, a3, s5 -800002f8: 13 07 00 00 mv a4, zero -800002fc: 63 44 46 01 blt a2, s4, 8 -80000300: 33 47 46 03 div a4, a2, s4 -80000304: 93 07 00 00 mv a5, zero -80000308: b3 0a 56 03 mul s5, a2, s5 -8000030c: 93 04 10 00 addi s1, zero, 1 -80000310: 63 08 07 00 beqz a4, 16 -80000314: b3 07 47 03 mul a5, a4, s4 -80000318: b3 07 f6 40 sub a5, a2, a5 -8000031c: 93 04 07 00 mv s1, a4 -80000320: 33 84 56 41 sub s0, a3, s5 -80000324: 23 24 61 01 sw s6, 8(sp) -80000328: 23 26 31 01 sw s3, 12(sp) -8000032c: 23 28 21 01 sw s2, 16(sp) -80000330: 33 85 a5 02 mul a0, a1, a0 -80000334: 23 2a a1 00 sw a0, 20(sp) -80000338: 23 2c 91 00 sw s1, 24(sp) -8000033c: 23 2e f1 00 sw a5, 28(sp) -80000340: 93 05 81 00 addi a1, sp, 8 -80000344: 93 06 20 00 addi a3, zero, 2 -80000348: 23 a6 b1 c2 sw a1, -980(gp) -8000034c: 63 40 d6 02 blt a2, a3, 32 -80000350: 63 44 46 01 blt a2, s4, 8 -80000354: 13 06 0a 00 mv a2, s4 -80000358: 37 05 00 80 lui a0, 524288 -8000035c: 93 05 05 06 addi a1, a0, 96 -80000360: 13 05 06 00 mv a0, a2 -80000364: ef 00 90 04 jal 2120 -80000368: ef f0 9f cf jal -776 -8000036c: 63 0a 04 06 beqz s0, 116 -80000370: 13 05 04 00 mv a0, s0 -80000374: ef 00 10 04 jal 2112 -80000378: 03 a5 c1 c2 lw a0, -980(gp) -8000037c: 03 25 05 00 lw a0, 0(a0) -80000380: 83 24 05 00 lw s1, 0(a0) -80000384: 03 25 45 00 lw a0, 4(a0) -80000388: 33 04 95 02 mul s0, a0, s1 -8000038c: ef 00 10 06 jal 2144 -80000390: 33 05 55 01 add a0, a0, s5 -80000394: 03 a6 c1 c2 lw a2, -980(gp) -80000398: 33 47 85 02 div a4, a0, s0 -8000039c: b3 05 87 02 mul a1, a4, s0 -800003a0: 33 05 b5 40 sub a0, a0, a1 -800003a4: 83 25 06 00 lw a1, 0(a2) -800003a8: b3 46 95 02 div a3, a0, s1 -800003ac: b3 87 96 02 mul a5, a3, s1 -800003b0: 33 08 f5 40 sub a6, a0, a5 -800003b4: 83 a4 c5 00 lw s1, 12(a1) -800003b8: 03 a4 05 01 lw s0, 16(a1) -800003bc: 83 a7 45 01 lw a5, 20(a1) -800003c0: 83 28 46 00 lw a7, 4(a2) -800003c4: 03 25 86 00 lw a0, 8(a2) -800003c8: 33 06 98 00 add a2, a6, s1 -800003cc: b3 06 d4 00 add a3, s0, a3 -800003d0: 33 87 e7 00 add a4, a5, a4 -800003d4: e7 80 08 00 jalr a7 -800003d8: 13 05 10 00 addi a0, zero, 1 -800003dc: ef 00 80 7d jal 2008 -800003e0: 03 2b 01 02 lw s6, 32(sp) -800003e4: 83 2a 41 02 lw s5, 36(sp) -800003e8: 03 2a 81 02 lw s4, 40(sp) -800003ec: 83 29 c1 02 lw s3, 44(sp) -800003f0: 03 29 01 03 lw s2, 48(sp) -800003f4: 83 24 41 03 lw s1, 52(sp) -800003f8: 03 24 81 03 lw s0, 56(sp) -800003fc: 83 20 c1 03 lw ra, 60(sp) -80000400: 13 01 01 04 addi sp, sp, 64 -80000404: 67 80 00 00 ret +80000298: ef 00 50 11 jal 2324 +8000029c: 93 05 70 00 addi a1, zero, 7 +800002a0: 63 c8 a5 0e blt a1, a0, 240 +800002a4: b3 05 74 03 mul a1, s0, s7 +800002a8: 33 86 85 03 mul a2, a1, s8 +800002ac: b3 85 4a 03 mul a1, s5, s4 +800002b0: 93 06 10 00 addi a3, zero, 1 +800002b4: 63 c8 c5 00 blt a1, a2, 16 +800002b8: 63 da 66 01 bge a3, s6, 20 +800002bc: 63 4c d5 00 blt a0, a3, 24 +800002c0: 6f 00 00 0d j 208 +800002c4: b3 46 b6 02 div a3, a2, a1 +800002c8: e3 ca 66 ff blt a3, s6, -12 +800002cc: 93 06 0b 00 mv a3, s6 +800002d0: 63 50 d5 0c bge a0, a3, 192 +800002d4: 13 07 fb ff addi a4, s6, -1 +800002d8: b3 45 d6 02 div a1, a2, a3 +800002dc: 63 0e e5 00 beq a0, a4, 28 +800002e0: 13 06 00 00 mv a2, zero +800002e4: 33 0b b6 00 add s6, a2, a1 +800002e8: 33 46 5b 03 div a2, s6, s5 +800002ec: 93 06 00 00 mv a3, zero +800002f0: 63 50 46 03 bge a2, s4, 32 +800002f4: 6f 00 00 02 j 32 +800002f8: b3 86 d5 02 mul a3, a1, a3 +800002fc: 33 06 d6 40 sub a2, a2, a3 +80000300: 33 0b b6 00 add s6, a2, a1 +80000304: 33 46 5b 03 div a2, s6, s5 +80000308: 93 06 00 00 mv a3, zero +8000030c: 63 44 46 01 blt a2, s4, 8 +80000310: b3 46 46 03 div a3, a2, s4 +80000314: 13 07 00 00 mv a4, zero +80000318: 93 07 10 00 addi a5, zero, 1 +8000031c: 63 88 06 00 beqz a3, 16 +80000320: 33 87 46 03 mul a4, a3, s4 +80000324: 33 07 e6 40 sub a4, a2, a4 +80000328: 93 87 06 00 mv a5, a3 +8000032c: 33 04 56 03 mul s0, a2, s5 +80000330: 23 20 91 00 sw s1, 0(sp) +80000334: 23 22 31 01 sw s3, 4(sp) +80000338: 23 24 21 01 sw s2, 8(sp) +8000033c: b3 85 a5 02 mul a1, a1, a0 +80000340: 23 26 b1 00 sw a1, 12(sp) +80000344: 23 28 f1 00 sw a5, 16(sp) +80000348: 23 2a e1 00 sw a4, 20(sp) +8000034c: 93 85 c1 c2 addi a1, gp, -980 +80000350: 13 15 25 00 slli a0, a0, 2 +80000354: 33 05 b5 00 add a0, a0, a1 +80000358: 93 05 01 00 mv a1, sp +8000035c: 93 06 20 00 addi a3, zero, 2 +80000360: 23 20 b5 00 sw a1, 0(a0) +80000364: 63 40 d6 02 blt a2, a3, 32 +80000368: 63 44 46 01 blt a2, s4, 8 +8000036c: 13 06 0a 00 mv a2, s4 +80000370: 37 05 00 80 lui a0, 524288 +80000374: 93 05 05 06 addi a1, a0, 96 +80000378: 13 05 06 00 mv a0, a2 +8000037c: ef 00 80 7e jal 2024 +80000380: ef f0 1f ce jal -800 +80000384: 63 06 8b 00 beq s6, s0, 12 +80000388: 23 26 81 00 sw s0, 12(sp) +8000038c: ef f0 5f cd jal -812 +80000390: 03 2c 81 01 lw s8, 24(sp) +80000394: 83 2b c1 01 lw s7, 28(sp) +80000398: 03 2b 01 02 lw s6, 32(sp) +8000039c: 83 2a 41 02 lw s5, 36(sp) +800003a0: 03 2a 81 02 lw s4, 40(sp) +800003a4: 83 29 c1 02 lw s3, 44(sp) +800003a8: 03 29 01 03 lw s2, 48(sp) +800003ac: 83 24 41 03 lw s1, 52(sp) +800003b0: 03 24 81 03 lw s0, 56(sp) +800003b4: 83 20 c1 03 lw ra, 60(sp) +800003b8: 13 01 01 04 addi sp, sp, 64 +800003bc: 67 80 00 00 ret -80000408 main: -80000408: 13 01 01 ff addi sp, sp, -16 -8000040c: 23 26 11 00 sw ra, 12(sp) -80000410: 37 05 00 80 lui a0, 524288 -80000414: 93 05 c5 66 addi a1, a0, 1644 -80000418: 37 05 ff 7f lui a0, 524272 -8000041c: 13 06 45 03 addi a2, a0, 52 -80000420: 37 05 ff 7f lui a0, 524272 -80000424: ef f0 9f e1 jal -488 -80000428: 13 05 00 00 mv a0, zero -8000042c: 83 20 c1 00 lw ra, 12(sp) -80000430: 13 01 01 01 addi sp, sp, 16 -80000434: 67 80 00 00 ret +800003c0 main: +800003c0: 13 01 01 ff addi sp, sp, -16 +800003c4: 23 26 11 00 sw ra, 12(sp) +800003c8: 37 05 00 80 lui a0, 524288 +800003cc: 93 05 45 62 addi a1, a0, 1572 +800003d0: 37 05 ff 7f lui a0, 524272 +800003d4: 13 06 45 03 addi a2, a0, 52 +800003d8: 37 05 ff 7f lui a0, 524272 +800003dc: ef f0 1f e6 jal -416 +800003e0: 13 05 00 00 mv a0, zero +800003e4: 83 20 c1 00 lw ra, 12(sp) +800003e8: 13 01 01 01 addi sp, sp, 16 +800003ec: 67 80 00 00 ret -80000438 _pocl_kernel_sfilter: -80000438: 13 01 01 fc addi sp, sp, -64 -8000043c: 23 2e 11 02 sw ra, 60(sp) -80000440: 23 2c 81 02 sw s0, 56(sp) -80000444: 23 2a 91 02 sw s1, 52(sp) -80000448: 23 28 21 03 sw s2, 48(sp) -8000044c: 23 26 31 03 sw s3, 44(sp) -80000450: 23 24 41 03 sw s4, 40(sp) -80000454: 23 22 51 03 sw s5, 36(sp) -80000458: 23 20 61 03 sw s6, 32(sp) -8000045c: 23 2e 71 01 sw s7, 28(sp) -80000460: 23 2c 81 01 sw s8, 24(sp) -80000464: 23 2a 91 01 sw s9, 20(sp) -80000468: 23 28 a1 01 sw s10, 16(sp) -8000046c: 23 26 b1 01 sw s11, 12(sp) -80000470: 13 04 01 04 addi s0, sp, 64 -80000474: 13 71 c1 ff andi sp, sp, -4 -80000478: 93 02 00 00 mv t0, zero -8000047c: 53 00 07 f0 fmv.w.x ft0, a4 -80000480: 83 ab 87 01 lw s7, 24(a5) -80000484: 83 a3 c7 01 lw t2, 28(a5) -80000488: 83 a6 c7 00 lw a3, 12(a5) -8000048c: 03 a7 07 02 lw a4, 32(a5) -80000490: 23 24 e1 00 sw a4, 8(sp) -80000494: 03 a7 07 01 lw a4, 16(a5) -80000498: b3 87 0b 03 mul a5, s7, a6 -8000049c: 33 8c f6 00 add s8, a3, a5 -800004a0: b3 86 13 03 mul a3, t2, a7 -800004a4: b3 08 d7 00 add a7, a4, a3 -800004a8: 93 86 18 00 addi a3, a7, 1 -800004ac: b3 86 c6 02 mul a3, a3, a2 -800004b0: b3 06 dc 00 add a3, s8, a3 -800004b4: 93 96 26 00 slli a3, a3, 2 -800004b8: b3 06 d5 00 add a3, a0, a3 -800004bc: 23 22 d1 00 sw a3, 4(sp) -800004c0: 93 1f 26 00 slli t6, a2, 2 -800004c4: b3 86 c8 02 mul a3, a7, a2 -800004c8: b3 06 dc 00 add a3, s8, a3 -800004cc: 93 96 26 00 slli a3, a3, 2 -800004d0: b3 85 d5 00 add a1, a1, a3 -800004d4: 23 20 b1 00 sw a1, 0(sp) -800004d8: 33 0e d5 00 add t3, a0, a3 -800004dc: 93 85 f8 ff addi a1, a7, -1 -800004e0: b3 85 c5 02 mul a1, a1, a2 -800004e4: b3 05 bc 00 add a1, s8, a1 -800004e8: 93 95 25 00 slli a1, a1, 2 -800004ec: 33 0f b5 00 add t5, a0, a1 -800004f0: 13 0b 00 00 mv s6, zero -800004f4: 13 09 0f 00 mv s2, t5 -800004f8: 93 0e 0e 00 mv t4, t3 -800004fc: 03 2a 01 00 lw s4, 0(sp) -80000500: 83 2a 41 00 lw s5, 4(sp) -80000504: 93 06 00 00 mv a3, zero -80000508: b3 85 68 01 add a1, a7, s6 -8000050c: 13 87 f5 ff addi a4, a1, -1 -80000510: b3 0c c7 02 mul s9, a4, a2 -80000514: 33 8d c5 02 mul s10, a1, a2 -80000518: 93 85 15 00 addi a1, a1, 1 -8000051c: b3 8d c5 02 mul s11, a1, a2 -80000520: 93 00 09 00 mv ra, s2 -80000524: 93 89 0e 00 mv s3, t4 -80000528: 93 05 0a 00 mv a1, s4 -8000052c: 13 87 0a 00 mv a4, s5 -80000530: b3 07 dc 00 add a5, s8, a3 -80000534: 13 83 f7 ff addi t1, a5, -1 -80000538: 33 08 93 01 add a6, t1, s9 -8000053c: 93 14 28 00 slli s1, a6, 2 -80000540: b3 04 95 00 add s1, a0, s1 -80000544: 87 a0 04 00 flw ft1, 0(s1) -80000548: d3 f0 a0 10 fmul.s ft1, ft1, fa0 -8000054c: 07 a1 00 00 flw ft2, 0(ra) -80000550: 93 87 17 00 addi a5, a5, 1 -80000554: b3 84 97 01 add s1, a5, s9 -80000558: 93 94 24 00 slli s1, s1, 2 -8000055c: b3 04 95 00 add s1, a0, s1 -80000560: 87 a1 04 00 flw ft3, 0(s1) -80000564: b3 04 a3 01 add s1, t1, s10 -80000568: 93 94 24 00 slli s1, s1, 2 -8000056c: b3 04 95 00 add s1, a0, s1 -80000570: 07 a2 04 00 flw ft4, 0(s1) -80000574: 87 a2 09 00 flw ft5, 0(s3) -80000578: 53 71 b1 10 fmul.s ft2, ft2, fa1 -8000057c: d3 f1 c1 10 fmul.s ft3, ft3, fa2 -80000580: 53 72 d2 10 fmul.s ft4, ft4, fa3 -80000584: d3 f2 e2 10 fmul.s ft5, ft5, fa4 -80000588: b3 84 a7 01 add s1, a5, s10 -8000058c: 93 94 24 00 slli s1, s1, 2 -80000590: b3 04 95 00 add s1, a0, s1 -80000594: 07 a3 04 00 flw ft6, 0(s1) -80000598: b3 04 b3 01 add s1, t1, s11 -8000059c: 93 94 24 00 slli s1, s1, 2 -800005a0: b3 04 95 00 add s1, a0, s1 -800005a4: 87 a3 04 00 flw ft7, 0(s1) -800005a8: 07 2e 07 00 flw ft8, 0(a4) -800005ac: b3 87 b7 01 add a5, a5, s11 -800005b0: 93 97 27 00 slli a5, a5, 2 -800005b4: b3 07 f5 00 add a5, a0, a5 -800005b8: 87 ae 07 00 flw ft9, 0(a5) -800005bc: 53 73 f3 10 fmul.s ft6, ft6, fa5 -800005c0: d3 f3 03 11 fmul.s ft7, ft7, fa6 -800005c4: 53 7e 1e 11 fmul.s ft8, ft8, fa7 -800005c8: d3 fe 0e 10 fmul.s ft9, ft9, ft0 -800005cc: d3 f0 20 00 fadd.s ft1, ft1, ft2 -800005d0: d3 f0 30 00 fadd.s ft1, ft1, ft3 -800005d4: d3 f0 40 00 fadd.s ft1, ft1, ft4 -800005d8: d3 f0 50 00 fadd.s ft1, ft1, ft5 -800005dc: d3 f0 60 00 fadd.s ft1, ft1, ft6 -800005e0: d3 f0 70 00 fadd.s ft1, ft1, ft7 -800005e4: d3 f0 c0 01 fadd.s ft1, ft1, ft8 -800005e8: d3 f0 d0 01 fadd.s ft1, ft1, ft9 -800005ec: 27 a0 15 00 fsw ft1, 0(a1) -800005f0: 93 86 16 00 addi a3, a3, 1 -800005f4: 13 07 47 00 addi a4, a4, 4 -800005f8: 93 85 45 00 addi a1, a1, 4 -800005fc: 93 89 49 00 addi s3, s3, 4 -80000600: 93 80 40 00 addi ra, ra, 4 -80000604: e3 e6 76 f3 bltu a3, s7, -212 -80000608: 13 0b 1b 00 addi s6, s6, 1 -8000060c: b3 8a fa 01 add s5, s5, t6 -80000610: 33 0a fa 01 add s4, s4, t6 -80000614: b3 8e fe 01 add t4, t4, t6 -80000618: 33 09 f9 01 add s2, s2, t6 -8000061c: e3 64 7b ee bltu s6, t2, -280 -80000620: 93 82 12 00 addi t0, t0, 1 -80000624: 83 25 81 00 lw a1, 8(sp) -80000628: e3 e4 b2 ec bltu t0, a1, -312 -8000062c: 13 01 04 fc addi sp, s0, -64 -80000630: 83 2d c1 00 lw s11, 12(sp) -80000634: 03 2d 01 01 lw s10, 16(sp) -80000638: 83 2c 41 01 lw s9, 20(sp) -8000063c: 03 2c 81 01 lw s8, 24(sp) -80000640: 83 2b c1 01 lw s7, 28(sp) -80000644: 03 2b 01 02 lw s6, 32(sp) -80000648: 83 2a 41 02 lw s5, 36(sp) -8000064c: 03 2a 81 02 lw s4, 40(sp) -80000650: 83 29 c1 02 lw s3, 44(sp) -80000654: 03 29 01 03 lw s2, 48(sp) -80000658: 83 24 41 03 lw s1, 52(sp) -8000065c: 03 24 81 03 lw s0, 56(sp) -80000660: 83 20 c1 03 lw ra, 60(sp) -80000664: 13 01 01 04 addi sp, sp, 64 -80000668: 67 80 00 00 ret +800003f0 _pocl_kernel_sfilter: +800003f0: 13 01 01 fc addi sp, sp, -64 +800003f4: 23 2e 11 02 sw ra, 60(sp) +800003f8: 23 2c 81 02 sw s0, 56(sp) +800003fc: 23 2a 91 02 sw s1, 52(sp) +80000400: 23 28 21 03 sw s2, 48(sp) +80000404: 23 26 31 03 sw s3, 44(sp) +80000408: 23 24 41 03 sw s4, 40(sp) +8000040c: 23 22 51 03 sw s5, 36(sp) +80000410: 23 20 61 03 sw s6, 32(sp) +80000414: 23 2e 71 01 sw s7, 28(sp) +80000418: 23 2c 81 01 sw s8, 24(sp) +8000041c: 23 2a 91 01 sw s9, 20(sp) +80000420: 23 28 a1 01 sw s10, 16(sp) +80000424: 23 26 b1 01 sw s11, 12(sp) +80000428: 13 04 01 04 addi s0, sp, 64 +8000042c: 13 71 c1 ff andi sp, sp, -4 +80000430: 93 02 00 00 mv t0, zero +80000434: 53 00 07 f0 fmv.w.x ft0, a4 +80000438: 83 ab 87 01 lw s7, 24(a5) +8000043c: 83 a3 c7 01 lw t2, 28(a5) +80000440: 83 a6 c7 00 lw a3, 12(a5) +80000444: 03 a7 07 02 lw a4, 32(a5) +80000448: 23 24 e1 00 sw a4, 8(sp) +8000044c: 03 a7 07 01 lw a4, 16(a5) +80000450: b3 87 0b 03 mul a5, s7, a6 +80000454: 33 8c f6 00 add s8, a3, a5 +80000458: b3 86 13 03 mul a3, t2, a7 +8000045c: b3 08 d7 00 add a7, a4, a3 +80000460: 93 86 18 00 addi a3, a7, 1 +80000464: b3 86 c6 02 mul a3, a3, a2 +80000468: b3 06 dc 00 add a3, s8, a3 +8000046c: 93 96 26 00 slli a3, a3, 2 +80000470: b3 06 d5 00 add a3, a0, a3 +80000474: 23 22 d1 00 sw a3, 4(sp) +80000478: 93 1f 26 00 slli t6, a2, 2 +8000047c: b3 86 c8 02 mul a3, a7, a2 +80000480: b3 06 dc 00 add a3, s8, a3 +80000484: 93 96 26 00 slli a3, a3, 2 +80000488: b3 85 d5 00 add a1, a1, a3 +8000048c: 23 20 b1 00 sw a1, 0(sp) +80000490: 33 0e d5 00 add t3, a0, a3 +80000494: 93 85 f8 ff addi a1, a7, -1 +80000498: b3 85 c5 02 mul a1, a1, a2 +8000049c: b3 05 bc 00 add a1, s8, a1 +800004a0: 93 95 25 00 slli a1, a1, 2 +800004a4: 33 0f b5 00 add t5, a0, a1 +800004a8: 13 0b 00 00 mv s6, zero +800004ac: 13 09 0f 00 mv s2, t5 +800004b0: 93 0e 0e 00 mv t4, t3 +800004b4: 03 2a 01 00 lw s4, 0(sp) +800004b8: 83 2a 41 00 lw s5, 4(sp) +800004bc: 93 06 00 00 mv a3, zero +800004c0: b3 85 68 01 add a1, a7, s6 +800004c4: 13 87 f5 ff addi a4, a1, -1 +800004c8: b3 0c c7 02 mul s9, a4, a2 +800004cc: 33 8d c5 02 mul s10, a1, a2 +800004d0: 93 85 15 00 addi a1, a1, 1 +800004d4: b3 8d c5 02 mul s11, a1, a2 +800004d8: 93 00 09 00 mv ra, s2 +800004dc: 93 89 0e 00 mv s3, t4 +800004e0: 93 05 0a 00 mv a1, s4 +800004e4: 13 87 0a 00 mv a4, s5 +800004e8: b3 07 dc 00 add a5, s8, a3 +800004ec: 13 83 f7 ff addi t1, a5, -1 +800004f0: 33 08 93 01 add a6, t1, s9 +800004f4: 93 14 28 00 slli s1, a6, 2 +800004f8: b3 04 95 00 add s1, a0, s1 +800004fc: 87 a0 04 00 flw ft1, 0(s1) +80000500: d3 f0 a0 10 fmul.s ft1, ft1, fa0 +80000504: 07 a1 00 00 flw ft2, 0(ra) +80000508: 93 87 17 00 addi a5, a5, 1 +8000050c: b3 84 97 01 add s1, a5, s9 +80000510: 93 94 24 00 slli s1, s1, 2 +80000514: b3 04 95 00 add s1, a0, s1 +80000518: 87 a1 04 00 flw ft3, 0(s1) +8000051c: b3 04 a3 01 add s1, t1, s10 +80000520: 93 94 24 00 slli s1, s1, 2 +80000524: b3 04 95 00 add s1, a0, s1 +80000528: 07 a2 04 00 flw ft4, 0(s1) +8000052c: 87 a2 09 00 flw ft5, 0(s3) +80000530: 53 71 b1 10 fmul.s ft2, ft2, fa1 +80000534: d3 f1 c1 10 fmul.s ft3, ft3, fa2 +80000538: 53 72 d2 10 fmul.s ft4, ft4, fa3 +8000053c: d3 f2 e2 10 fmul.s ft5, ft5, fa4 +80000540: b3 84 a7 01 add s1, a5, s10 +80000544: 93 94 24 00 slli s1, s1, 2 +80000548: b3 04 95 00 add s1, a0, s1 +8000054c: 07 a3 04 00 flw ft6, 0(s1) +80000550: b3 04 b3 01 add s1, t1, s11 +80000554: 93 94 24 00 slli s1, s1, 2 +80000558: b3 04 95 00 add s1, a0, s1 +8000055c: 87 a3 04 00 flw ft7, 0(s1) +80000560: 07 2e 07 00 flw ft8, 0(a4) +80000564: b3 87 b7 01 add a5, a5, s11 +80000568: 93 97 27 00 slli a5, a5, 2 +8000056c: b3 07 f5 00 add a5, a0, a5 +80000570: 87 ae 07 00 flw ft9, 0(a5) +80000574: 53 73 f3 10 fmul.s ft6, ft6, fa5 +80000578: d3 f3 03 11 fmul.s ft7, ft7, fa6 +8000057c: 53 7e 1e 11 fmul.s ft8, ft8, fa7 +80000580: d3 fe 0e 10 fmul.s ft9, ft9, ft0 +80000584: d3 f0 20 00 fadd.s ft1, ft1, ft2 +80000588: d3 f0 30 00 fadd.s ft1, ft1, ft3 +8000058c: d3 f0 40 00 fadd.s ft1, ft1, ft4 +80000590: d3 f0 50 00 fadd.s ft1, ft1, ft5 +80000594: d3 f0 60 00 fadd.s ft1, ft1, ft6 +80000598: d3 f0 70 00 fadd.s ft1, ft1, ft7 +8000059c: d3 f0 c0 01 fadd.s ft1, ft1, ft8 +800005a0: d3 f0 d0 01 fadd.s ft1, ft1, ft9 +800005a4: 27 a0 15 00 fsw ft1, 0(a1) +800005a8: 93 86 16 00 addi a3, a3, 1 +800005ac: 13 07 47 00 addi a4, a4, 4 +800005b0: 93 85 45 00 addi a1, a1, 4 +800005b4: 93 89 49 00 addi s3, s3, 4 +800005b8: 93 80 40 00 addi ra, ra, 4 +800005bc: e3 e6 76 f3 bltu a3, s7, -212 +800005c0: 13 0b 1b 00 addi s6, s6, 1 +800005c4: b3 8a fa 01 add s5, s5, t6 +800005c8: 33 0a fa 01 add s4, s4, t6 +800005cc: b3 8e fe 01 add t4, t4, t6 +800005d0: 33 09 f9 01 add s2, s2, t6 +800005d4: e3 64 7b ee bltu s6, t2, -280 +800005d8: 93 82 12 00 addi t0, t0, 1 +800005dc: 83 25 81 00 lw a1, 8(sp) +800005e0: e3 e4 b2 ec bltu t0, a1, -312 +800005e4: 13 01 04 fc addi sp, s0, -64 +800005e8: 83 2d c1 00 lw s11, 12(sp) +800005ec: 03 2d 01 01 lw s10, 16(sp) +800005f0: 83 2c 41 01 lw s9, 20(sp) +800005f4: 03 2c 81 01 lw s8, 24(sp) +800005f8: 83 2b c1 01 lw s7, 28(sp) +800005fc: 03 2b 01 02 lw s6, 32(sp) +80000600: 83 2a 41 02 lw s5, 36(sp) +80000604: 03 2a 81 02 lw s4, 40(sp) +80000608: 83 29 c1 02 lw s3, 44(sp) +8000060c: 03 29 01 03 lw s2, 48(sp) +80000610: 83 24 41 03 lw s1, 52(sp) +80000614: 03 24 81 03 lw s0, 56(sp) +80000618: 83 20 c1 03 lw ra, 60(sp) +8000061c: 13 01 01 04 addi sp, sp, 64 +80000620: 67 80 00 00 ret -8000066c _pocl_kernel_sfilter_workgroup: -8000066c: 13 01 01 fc addi sp, sp, -64 -80000670: 23 2e 11 02 sw ra, 60(sp) -80000674: 23 2c 81 02 sw s0, 56(sp) -80000678: 23 2a 91 02 sw s1, 52(sp) -8000067c: 23 28 21 03 sw s2, 48(sp) -80000680: 23 26 31 03 sw s3, 44(sp) -80000684: 23 24 41 03 sw s4, 40(sp) -80000688: 23 22 51 03 sw s5, 36(sp) -8000068c: 23 20 61 03 sw s6, 32(sp) -80000690: 23 2e 71 01 sw s7, 28(sp) -80000694: 23 2c 81 01 sw s8, 24(sp) -80000698: 23 2a 91 01 sw s9, 20(sp) -8000069c: 23 28 a1 01 sw s10, 16(sp) -800006a0: 23 26 b1 01 sw s11, 12(sp) -800006a4: 13 08 00 00 mv a6, zero -800006a8: 03 27 05 00 lw a4, 0(a0) -800006ac: 83 24 45 00 lw s1, 4(a0) -800006b0: 03 24 85 00 lw s0, 8(a0) -800006b4: 83 28 c5 00 lw a7, 12(a0) -800006b8: 03 23 07 00 lw t1, 0(a4) -800006bc: 83 a3 04 00 lw t2, 0(s1) -800006c0: 83 2f 04 00 lw t6, 0(s0) -800006c4: 07 a0 08 00 flw ft0, 0(a7) -800006c8: 83 24 05 01 lw s1, 16(a0) -800006cc: 03 24 45 01 lw s0, 20(a0) -800006d0: 03 27 85 01 lw a4, 24(a0) -800006d4: 83 28 c5 01 lw a7, 28(a0) -800006d8: 87 a0 04 00 flw ft1, 0(s1) -800006dc: 07 21 04 00 flw ft2, 0(s0) -800006e0: 87 21 07 00 flw ft3, 0(a4) -800006e4: 07 a2 08 00 flw ft4, 0(a7) -800006e8: 03 27 05 02 lw a4, 32(a0) -800006ec: 83 24 45 02 lw s1, 36(a0) -800006f0: 03 24 85 02 lw s0, 40(a0) -800006f4: 03 25 c5 02 lw a0, 44(a0) -800006f8: 87 22 07 00 flw ft5, 0(a4) -800006fc: 07 a3 04 00 flw ft6, 0(s1) -80000700: 87 23 04 00 flw ft7, 0(s0) -80000704: 07 25 05 00 flw fa0, 0(a0) -80000708: 03 ac 85 01 lw s8, 24(a1) -8000070c: 83 a8 c5 01 lw a7, 28(a1) -80000710: 03 a5 c5 00 lw a0, 12(a1) -80000714: 03 a7 05 02 lw a4, 32(a1) -80000718: 23 24 e1 00 sw a4, 8(sp) -8000071c: 83 a5 05 01 lw a1, 16(a1) -80000720: 33 06 cc 02 mul a2, s8, a2 -80000724: b3 0c c5 00 add s9, a0, a2 -80000728: 33 85 d8 02 mul a0, a7, a3 -8000072c: 33 8f a5 00 add t5, a1, a0 -80000730: 13 05 1f 00 addi a0, t5, 1 -80000734: 33 05 f5 03 mul a0, a0, t6 -80000738: 33 85 ac 00 add a0, s9, a0 -8000073c: 13 15 25 00 slli a0, a0, 2 -80000740: 33 05 a3 00 add a0, t1, a0 -80000744: 23 22 a1 00 sw a0, 4(sp) -80000748: 13 99 2f 00 slli s2, t6, 2 -8000074c: 33 05 ff 03 mul a0, t5, t6 -80000750: 33 85 ac 00 add a0, s9, a0 -80000754: 13 15 25 00 slli a0, a0, 2 -80000758: b3 83 a3 00 add t2, t2, a0 -8000075c: 33 0e a3 00 add t3, t1, a0 -80000760: 13 05 ff ff addi a0, t5, -1 -80000764: 33 05 f5 03 mul a0, a0, t6 -80000768: 33 85 ac 00 add a0, s9, a0 -8000076c: 13 15 25 00 slli a0, a0, 2 -80000770: b3 0e a3 00 add t4, t1, a0 -80000774: 93 0b 00 00 mv s7, zero -80000778: 93 89 0e 00 mv s3, t4 -8000077c: 13 0a 0e 00 mv s4, t3 -80000780: 93 8a 03 00 mv s5, t2 -80000784: 03 2b 41 00 lw s6, 4(sp) -80000788: 13 07 00 00 mv a4, zero -8000078c: 33 05 7f 01 add a0, t5, s7 -80000790: 93 05 f5 ff addi a1, a0, -1 -80000794: 33 8d f5 03 mul s10, a1, t6 -80000798: b3 8d af 02 mul s11, t6, a0 -8000079c: 13 05 15 00 addi a0, a0, 1 -800007a0: b3 00 f5 03 mul ra, a0, t6 -800007a4: 13 86 09 00 mv a2, s3 -800007a8: 13 05 0a 00 mv a0, s4 -800007ac: 93 85 0a 00 mv a1, s5 -800007b0: 13 04 0b 00 mv s0, s6 -800007b4: b3 84 ec 00 add s1, s9, a4 -800007b8: 93 86 f4 ff addi a3, s1, -1 -800007bc: b3 82 a6 01 add t0, a3, s10 -800007c0: 93 97 22 00 slli a5, t0, 2 -800007c4: b3 07 f3 00 add a5, t1, a5 -800007c8: 87 a5 07 00 flw fa1, 0(a5) -800007cc: d3 75 b0 10 fmul.s fa1, ft0, fa1 -800007d0: 07 26 06 00 flw fa2, 0(a2) -800007d4: 93 87 14 00 addi a5, s1, 1 -800007d8: b3 84 a7 01 add s1, a5, s10 -800007dc: 93 94 24 00 slli s1, s1, 2 -800007e0: b3 04 93 00 add s1, t1, s1 -800007e4: 87 a6 04 00 flw fa3, 0(s1) -800007e8: b3 84 b6 01 add s1, a3, s11 -800007ec: 93 94 24 00 slli s1, s1, 2 -800007f0: b3 04 93 00 add s1, t1, s1 -800007f4: 07 a7 04 00 flw fa4, 0(s1) -800007f8: 87 27 05 00 flw fa5, 0(a0) -800007fc: 53 f6 c0 10 fmul.s fa2, ft1, fa2 -80000800: d3 76 d1 10 fmul.s fa3, ft2, fa3 -80000804: 53 f7 e1 10 fmul.s fa4, ft3, fa4 -80000808: d3 77 f2 10 fmul.s fa5, ft4, fa5 -8000080c: b3 84 b7 01 add s1, a5, s11 -80000810: 93 94 24 00 slli s1, s1, 2 -80000814: b3 04 93 00 add s1, t1, s1 -80000818: 07 a8 04 00 flw fa6, 0(s1) -8000081c: b3 86 16 00 add a3, a3, ra -80000820: 93 96 26 00 slli a3, a3, 2 -80000824: b3 06 d3 00 add a3, t1, a3 -80000828: 87 a8 06 00 flw fa7, 0(a3) -8000082c: 07 2e 04 00 flw ft8, 0(s0) -80000830: b3 86 17 00 add a3, a5, ra -80000834: 93 96 26 00 slli a3, a3, 2 -80000838: b3 06 d3 00 add a3, t1, a3 -8000083c: 87 ae 06 00 flw ft9, 0(a3) -80000840: 53 f8 02 11 fmul.s fa6, ft5, fa6 -80000844: d3 78 13 11 fmul.s fa7, ft6, fa7 -80000848: 53 fe c3 11 fmul.s ft8, ft7, ft8 -8000084c: d3 7e d5 11 fmul.s ft9, fa0, ft9 -80000850: d3 f5 c5 00 fadd.s fa1, fa1, fa2 -80000854: d3 f5 d5 00 fadd.s fa1, fa1, fa3 -80000858: d3 f5 e5 00 fadd.s fa1, fa1, fa4 -8000085c: d3 f5 f5 00 fadd.s fa1, fa1, fa5 -80000860: d3 f5 05 01 fadd.s fa1, fa1, fa6 -80000864: d3 f5 15 01 fadd.s fa1, fa1, fa7 -80000868: d3 f5 c5 01 fadd.s fa1, fa1, ft8 -8000086c: d3 f5 d5 01 fadd.s fa1, fa1, ft9 -80000870: 27 a0 b5 00 fsw fa1, 0(a1) -80000874: 13 07 17 00 addi a4, a4, 1 -80000878: 13 04 44 00 addi s0, s0, 4 -8000087c: 93 85 45 00 addi a1, a1, 4 -80000880: 13 05 45 00 addi a0, a0, 4 -80000884: 13 06 46 00 addi a2, a2, 4 -80000888: e3 66 87 f3 bltu a4, s8, -212 -8000088c: 93 8b 1b 00 addi s7, s7, 1 -80000890: 33 0b 2b 01 add s6, s6, s2 -80000894: b3 8a 2a 01 add s5, s5, s2 -80000898: 33 0a 2a 01 add s4, s4, s2 -8000089c: b3 89 29 01 add s3, s3, s2 -800008a0: e3 e4 1b ef bltu s7, a7, -280 -800008a4: 13 08 18 00 addi a6, a6, 1 -800008a8: 03 25 81 00 lw a0, 8(sp) -800008ac: e3 64 a8 ec bltu a6, a0, -312 -800008b0: 83 2d c1 00 lw s11, 12(sp) -800008b4: 03 2d 01 01 lw s10, 16(sp) -800008b8: 83 2c 41 01 lw s9, 20(sp) -800008bc: 03 2c 81 01 lw s8, 24(sp) -800008c0: 83 2b c1 01 lw s7, 28(sp) -800008c4: 03 2b 01 02 lw s6, 32(sp) -800008c8: 83 2a 41 02 lw s5, 36(sp) -800008cc: 03 2a 81 02 lw s4, 40(sp) -800008d0: 83 29 c1 02 lw s3, 44(sp) -800008d4: 03 29 01 03 lw s2, 48(sp) -800008d8: 83 24 41 03 lw s1, 52(sp) -800008dc: 03 24 81 03 lw s0, 56(sp) -800008e0: 83 20 c1 03 lw ra, 60(sp) -800008e4: 13 01 01 04 addi sp, sp, 64 -800008e8: 67 80 00 00 ret +80000624 _pocl_kernel_sfilter_workgroup: +80000624: 13 01 01 fc addi sp, sp, -64 +80000628: 23 2e 11 02 sw ra, 60(sp) +8000062c: 23 2c 81 02 sw s0, 56(sp) +80000630: 23 2a 91 02 sw s1, 52(sp) +80000634: 23 28 21 03 sw s2, 48(sp) +80000638: 23 26 31 03 sw s3, 44(sp) +8000063c: 23 24 41 03 sw s4, 40(sp) +80000640: 23 22 51 03 sw s5, 36(sp) +80000644: 23 20 61 03 sw s6, 32(sp) +80000648: 23 2e 71 01 sw s7, 28(sp) +8000064c: 23 2c 81 01 sw s8, 24(sp) +80000650: 23 2a 91 01 sw s9, 20(sp) +80000654: 23 28 a1 01 sw s10, 16(sp) +80000658: 23 26 b1 01 sw s11, 12(sp) +8000065c: 13 08 00 00 mv a6, zero +80000660: 03 27 05 00 lw a4, 0(a0) +80000664: 83 24 45 00 lw s1, 4(a0) +80000668: 03 24 85 00 lw s0, 8(a0) +8000066c: 83 28 c5 00 lw a7, 12(a0) +80000670: 03 23 07 00 lw t1, 0(a4) +80000674: 83 a3 04 00 lw t2, 0(s1) +80000678: 83 2f 04 00 lw t6, 0(s0) +8000067c: 07 a0 08 00 flw ft0, 0(a7) +80000680: 83 24 05 01 lw s1, 16(a0) +80000684: 03 24 45 01 lw s0, 20(a0) +80000688: 03 27 85 01 lw a4, 24(a0) +8000068c: 83 28 c5 01 lw a7, 28(a0) +80000690: 87 a0 04 00 flw ft1, 0(s1) +80000694: 07 21 04 00 flw ft2, 0(s0) +80000698: 87 21 07 00 flw ft3, 0(a4) +8000069c: 07 a2 08 00 flw ft4, 0(a7) +800006a0: 03 27 05 02 lw a4, 32(a0) +800006a4: 83 24 45 02 lw s1, 36(a0) +800006a8: 03 24 85 02 lw s0, 40(a0) +800006ac: 03 25 c5 02 lw a0, 44(a0) +800006b0: 87 22 07 00 flw ft5, 0(a4) +800006b4: 07 a3 04 00 flw ft6, 0(s1) +800006b8: 87 23 04 00 flw ft7, 0(s0) +800006bc: 07 25 05 00 flw fa0, 0(a0) +800006c0: 03 ac 85 01 lw s8, 24(a1) +800006c4: 83 a8 c5 01 lw a7, 28(a1) +800006c8: 03 a5 c5 00 lw a0, 12(a1) +800006cc: 03 a7 05 02 lw a4, 32(a1) +800006d0: 23 24 e1 00 sw a4, 8(sp) +800006d4: 83 a5 05 01 lw a1, 16(a1) +800006d8: 33 06 cc 02 mul a2, s8, a2 +800006dc: b3 0c c5 00 add s9, a0, a2 +800006e0: 33 85 d8 02 mul a0, a7, a3 +800006e4: 33 8f a5 00 add t5, a1, a0 +800006e8: 13 05 1f 00 addi a0, t5, 1 +800006ec: 33 05 f5 03 mul a0, a0, t6 +800006f0: 33 85 ac 00 add a0, s9, a0 +800006f4: 13 15 25 00 slli a0, a0, 2 +800006f8: 33 05 a3 00 add a0, t1, a0 +800006fc: 23 22 a1 00 sw a0, 4(sp) +80000700: 13 99 2f 00 slli s2, t6, 2 +80000704: 33 05 ff 03 mul a0, t5, t6 +80000708: 33 85 ac 00 add a0, s9, a0 +8000070c: 13 15 25 00 slli a0, a0, 2 +80000710: b3 83 a3 00 add t2, t2, a0 +80000714: 33 0e a3 00 add t3, t1, a0 +80000718: 13 05 ff ff addi a0, t5, -1 +8000071c: 33 05 f5 03 mul a0, a0, t6 +80000720: 33 85 ac 00 add a0, s9, a0 +80000724: 13 15 25 00 slli a0, a0, 2 +80000728: b3 0e a3 00 add t4, t1, a0 +8000072c: 93 0b 00 00 mv s7, zero +80000730: 93 89 0e 00 mv s3, t4 +80000734: 13 0a 0e 00 mv s4, t3 +80000738: 93 8a 03 00 mv s5, t2 +8000073c: 03 2b 41 00 lw s6, 4(sp) +80000740: 13 07 00 00 mv a4, zero +80000744: 33 05 7f 01 add a0, t5, s7 +80000748: 93 05 f5 ff addi a1, a0, -1 +8000074c: 33 8d f5 03 mul s10, a1, t6 +80000750: b3 8d af 02 mul s11, t6, a0 +80000754: 13 05 15 00 addi a0, a0, 1 +80000758: b3 00 f5 03 mul ra, a0, t6 +8000075c: 13 86 09 00 mv a2, s3 +80000760: 13 05 0a 00 mv a0, s4 +80000764: 93 85 0a 00 mv a1, s5 +80000768: 13 04 0b 00 mv s0, s6 +8000076c: b3 84 ec 00 add s1, s9, a4 +80000770: 93 86 f4 ff addi a3, s1, -1 +80000774: b3 82 a6 01 add t0, a3, s10 +80000778: 93 97 22 00 slli a5, t0, 2 +8000077c: b3 07 f3 00 add a5, t1, a5 +80000780: 87 a5 07 00 flw fa1, 0(a5) +80000784: d3 75 b0 10 fmul.s fa1, ft0, fa1 +80000788: 07 26 06 00 flw fa2, 0(a2) +8000078c: 93 87 14 00 addi a5, s1, 1 +80000790: b3 84 a7 01 add s1, a5, s10 +80000794: 93 94 24 00 slli s1, s1, 2 +80000798: b3 04 93 00 add s1, t1, s1 +8000079c: 87 a6 04 00 flw fa3, 0(s1) +800007a0: b3 84 b6 01 add s1, a3, s11 +800007a4: 93 94 24 00 slli s1, s1, 2 +800007a8: b3 04 93 00 add s1, t1, s1 +800007ac: 07 a7 04 00 flw fa4, 0(s1) +800007b0: 87 27 05 00 flw fa5, 0(a0) +800007b4: 53 f6 c0 10 fmul.s fa2, ft1, fa2 +800007b8: d3 76 d1 10 fmul.s fa3, ft2, fa3 +800007bc: 53 f7 e1 10 fmul.s fa4, ft3, fa4 +800007c0: d3 77 f2 10 fmul.s fa5, ft4, fa5 +800007c4: b3 84 b7 01 add s1, a5, s11 +800007c8: 93 94 24 00 slli s1, s1, 2 +800007cc: b3 04 93 00 add s1, t1, s1 +800007d0: 07 a8 04 00 flw fa6, 0(s1) +800007d4: b3 86 16 00 add a3, a3, ra +800007d8: 93 96 26 00 slli a3, a3, 2 +800007dc: b3 06 d3 00 add a3, t1, a3 +800007e0: 87 a8 06 00 flw fa7, 0(a3) +800007e4: 07 2e 04 00 flw ft8, 0(s0) +800007e8: b3 86 17 00 add a3, a5, ra +800007ec: 93 96 26 00 slli a3, a3, 2 +800007f0: b3 06 d3 00 add a3, t1, a3 +800007f4: 87 ae 06 00 flw ft9, 0(a3) +800007f8: 53 f8 02 11 fmul.s fa6, ft5, fa6 +800007fc: d3 78 13 11 fmul.s fa7, ft6, fa7 +80000800: 53 fe c3 11 fmul.s ft8, ft7, ft8 +80000804: d3 7e d5 11 fmul.s ft9, fa0, ft9 +80000808: d3 f5 c5 00 fadd.s fa1, fa1, fa2 +8000080c: d3 f5 d5 00 fadd.s fa1, fa1, fa3 +80000810: d3 f5 e5 00 fadd.s fa1, fa1, fa4 +80000814: d3 f5 f5 00 fadd.s fa1, fa1, fa5 +80000818: d3 f5 05 01 fadd.s fa1, fa1, fa6 +8000081c: d3 f5 15 01 fadd.s fa1, fa1, fa7 +80000820: d3 f5 c5 01 fadd.s fa1, fa1, ft8 +80000824: d3 f5 d5 01 fadd.s fa1, fa1, ft9 +80000828: 27 a0 b5 00 fsw fa1, 0(a1) +8000082c: 13 07 17 00 addi a4, a4, 1 +80000830: 13 04 44 00 addi s0, s0, 4 +80000834: 93 85 45 00 addi a1, a1, 4 +80000838: 13 05 45 00 addi a0, a0, 4 +8000083c: 13 06 46 00 addi a2, a2, 4 +80000840: e3 66 87 f3 bltu a4, s8, -212 +80000844: 93 8b 1b 00 addi s7, s7, 1 +80000848: 33 0b 2b 01 add s6, s6, s2 +8000084c: b3 8a 2a 01 add s5, s5, s2 +80000850: 33 0a 2a 01 add s4, s4, s2 +80000854: b3 89 29 01 add s3, s3, s2 +80000858: e3 e4 1b ef bltu s7, a7, -280 +8000085c: 13 08 18 00 addi a6, a6, 1 +80000860: 03 25 81 00 lw a0, 8(sp) +80000864: e3 64 a8 ec bltu a6, a0, -312 +80000868: 83 2d c1 00 lw s11, 12(sp) +8000086c: 03 2d 01 01 lw s10, 16(sp) +80000870: 83 2c 41 01 lw s9, 20(sp) +80000874: 03 2c 81 01 lw s8, 24(sp) +80000878: 83 2b c1 01 lw s7, 28(sp) +8000087c: 03 2b 01 02 lw s6, 32(sp) +80000880: 83 2a 41 02 lw s5, 36(sp) +80000884: 03 2a 81 02 lw s4, 40(sp) +80000888: 83 29 c1 02 lw s3, 44(sp) +8000088c: 03 29 01 03 lw s2, 48(sp) +80000890: 83 24 41 03 lw s1, 52(sp) +80000894: 03 24 81 03 lw s0, 56(sp) +80000898: 83 20 c1 03 lw ra, 60(sp) +8000089c: 13 01 01 04 addi sp, sp, 64 +800008a0: 67 80 00 00 ret -800008ec _pocl_kernel_sfilter_workgroup_fast: -800008ec: 13 01 01 fc addi sp, sp, -64 -800008f0: 23 2e 11 02 sw ra, 60(sp) -800008f4: 23 2c 81 02 sw s0, 56(sp) -800008f8: 23 2a 91 02 sw s1, 52(sp) -800008fc: 23 28 21 03 sw s2, 48(sp) -80000900: 23 26 31 03 sw s3, 44(sp) -80000904: 23 24 41 03 sw s4, 40(sp) -80000908: 23 22 51 03 sw s5, 36(sp) -8000090c: 23 20 61 03 sw s6, 32(sp) -80000910: 23 2e 71 01 sw s7, 28(sp) -80000914: 23 2c 81 01 sw s8, 24(sp) -80000918: 23 2a 91 01 sw s9, 20(sp) -8000091c: 23 28 a1 01 sw s10, 16(sp) -80000920: 23 26 b1 01 sw s11, 12(sp) -80000924: 13 08 00 00 mv a6, zero -80000928: 03 27 85 00 lw a4, 8(a0) -8000092c: 83 24 c5 00 lw s1, 12(a0) -80000930: 03 23 05 00 lw t1, 0(a0) -80000934: 83 23 45 00 lw t2, 4(a0) -80000938: 83 2f 07 00 lw t6, 0(a4) -8000093c: 07 a0 04 00 flw ft0, 0(s1) -80000940: 03 27 05 01 lw a4, 16(a0) -80000944: 83 24 45 01 lw s1, 20(a0) -80000948: 03 24 85 01 lw s0, 24(a0) -8000094c: 83 28 c5 01 lw a7, 28(a0) -80000950: 87 20 07 00 flw ft1, 0(a4) -80000954: 07 a1 04 00 flw ft2, 0(s1) -80000958: 87 21 04 00 flw ft3, 0(s0) -8000095c: 07 a2 08 00 flw ft4, 0(a7) -80000960: 03 27 05 02 lw a4, 32(a0) -80000964: 83 24 45 02 lw s1, 36(a0) -80000968: 03 24 85 02 lw s0, 40(a0) -8000096c: 03 25 c5 02 lw a0, 44(a0) -80000970: 87 22 07 00 flw ft5, 0(a4) -80000974: 07 a3 04 00 flw ft6, 0(s1) -80000978: 87 23 04 00 flw ft7, 0(s0) -8000097c: 07 25 05 00 flw fa0, 0(a0) -80000980: 03 ac 85 01 lw s8, 24(a1) -80000984: 83 a8 c5 01 lw a7, 28(a1) -80000988: 03 a5 c5 00 lw a0, 12(a1) -8000098c: 03 a7 05 02 lw a4, 32(a1) -80000990: 23 24 e1 00 sw a4, 8(sp) -80000994: 83 a5 05 01 lw a1, 16(a1) -80000998: 33 06 cc 02 mul a2, s8, a2 -8000099c: b3 0c c5 00 add s9, a0, a2 -800009a0: 33 85 d8 02 mul a0, a7, a3 -800009a4: 33 8f a5 00 add t5, a1, a0 -800009a8: 13 05 1f 00 addi a0, t5, 1 -800009ac: 33 05 f5 03 mul a0, a0, t6 -800009b0: 33 85 ac 00 add a0, s9, a0 -800009b4: 13 15 25 00 slli a0, a0, 2 -800009b8: 33 05 a3 00 add a0, t1, a0 -800009bc: 23 22 a1 00 sw a0, 4(sp) -800009c0: 13 99 2f 00 slli s2, t6, 2 -800009c4: 33 05 ff 03 mul a0, t5, t6 -800009c8: 33 85 ac 00 add a0, s9, a0 -800009cc: 13 15 25 00 slli a0, a0, 2 -800009d0: b3 83 a3 00 add t2, t2, a0 -800009d4: 33 0e a3 00 add t3, t1, a0 -800009d8: 13 05 ff ff addi a0, t5, -1 -800009dc: 33 05 f5 03 mul a0, a0, t6 -800009e0: 33 85 ac 00 add a0, s9, a0 -800009e4: 13 15 25 00 slli a0, a0, 2 -800009e8: b3 0e a3 00 add t4, t1, a0 -800009ec: 93 0b 00 00 mv s7, zero -800009f0: 93 89 0e 00 mv s3, t4 -800009f4: 13 0a 0e 00 mv s4, t3 -800009f8: 93 8a 03 00 mv s5, t2 -800009fc: 03 2b 41 00 lw s6, 4(sp) -80000a00: 13 07 00 00 mv a4, zero -80000a04: 33 05 7f 01 add a0, t5, s7 -80000a08: 93 05 f5 ff addi a1, a0, -1 -80000a0c: 33 8d f5 03 mul s10, a1, t6 -80000a10: b3 8d af 02 mul s11, t6, a0 -80000a14: 13 05 15 00 addi a0, a0, 1 -80000a18: b3 00 f5 03 mul ra, a0, t6 -80000a1c: 13 86 09 00 mv a2, s3 -80000a20: 13 05 0a 00 mv a0, s4 -80000a24: 93 85 0a 00 mv a1, s5 -80000a28: 13 04 0b 00 mv s0, s6 -80000a2c: b3 84 ec 00 add s1, s9, a4 -80000a30: 93 86 f4 ff addi a3, s1, -1 -80000a34: b3 82 a6 01 add t0, a3, s10 -80000a38: 93 97 22 00 slli a5, t0, 2 -80000a3c: b3 07 f3 00 add a5, t1, a5 -80000a40: 87 a5 07 00 flw fa1, 0(a5) -80000a44: d3 75 b0 10 fmul.s fa1, ft0, fa1 -80000a48: 07 26 06 00 flw fa2, 0(a2) -80000a4c: 93 87 14 00 addi a5, s1, 1 -80000a50: b3 84 a7 01 add s1, a5, s10 -80000a54: 93 94 24 00 slli s1, s1, 2 -80000a58: b3 04 93 00 add s1, t1, s1 -80000a5c: 87 a6 04 00 flw fa3, 0(s1) -80000a60: b3 84 b6 01 add s1, a3, s11 -80000a64: 93 94 24 00 slli s1, s1, 2 -80000a68: b3 04 93 00 add s1, t1, s1 -80000a6c: 07 a7 04 00 flw fa4, 0(s1) -80000a70: 87 27 05 00 flw fa5, 0(a0) -80000a74: 53 f6 c0 10 fmul.s fa2, ft1, fa2 -80000a78: d3 76 d1 10 fmul.s fa3, ft2, fa3 -80000a7c: 53 f7 e1 10 fmul.s fa4, ft3, fa4 -80000a80: d3 77 f2 10 fmul.s fa5, ft4, fa5 -80000a84: b3 84 b7 01 add s1, a5, s11 -80000a88: 93 94 24 00 slli s1, s1, 2 -80000a8c: b3 04 93 00 add s1, t1, s1 -80000a90: 07 a8 04 00 flw fa6, 0(s1) -80000a94: b3 86 16 00 add a3, a3, ra -80000a98: 93 96 26 00 slli a3, a3, 2 -80000a9c: b3 06 d3 00 add a3, t1, a3 -80000aa0: 87 a8 06 00 flw fa7, 0(a3) -80000aa4: 07 2e 04 00 flw ft8, 0(s0) -80000aa8: b3 86 17 00 add a3, a5, ra -80000aac: 93 96 26 00 slli a3, a3, 2 -80000ab0: b3 06 d3 00 add a3, t1, a3 -80000ab4: 87 ae 06 00 flw ft9, 0(a3) -80000ab8: 53 f8 02 11 fmul.s fa6, ft5, fa6 -80000abc: d3 78 13 11 fmul.s fa7, ft6, fa7 -80000ac0: 53 fe c3 11 fmul.s ft8, ft7, ft8 -80000ac4: d3 7e d5 11 fmul.s ft9, fa0, ft9 -80000ac8: d3 f5 c5 00 fadd.s fa1, fa1, fa2 -80000acc: d3 f5 d5 00 fadd.s fa1, fa1, fa3 -80000ad0: d3 f5 e5 00 fadd.s fa1, fa1, fa4 -80000ad4: d3 f5 f5 00 fadd.s fa1, fa1, fa5 -80000ad8: d3 f5 05 01 fadd.s fa1, fa1, fa6 -80000adc: d3 f5 15 01 fadd.s fa1, fa1, fa7 -80000ae0: d3 f5 c5 01 fadd.s fa1, fa1, ft8 -80000ae4: d3 f5 d5 01 fadd.s fa1, fa1, ft9 -80000ae8: 27 a0 b5 00 fsw fa1, 0(a1) -80000aec: 13 07 17 00 addi a4, a4, 1 -80000af0: 13 04 44 00 addi s0, s0, 4 -80000af4: 93 85 45 00 addi a1, a1, 4 -80000af8: 13 05 45 00 addi a0, a0, 4 -80000afc: 13 06 46 00 addi a2, a2, 4 -80000b00: e3 66 87 f3 bltu a4, s8, -212 -80000b04: 93 8b 1b 00 addi s7, s7, 1 -80000b08: 33 0b 2b 01 add s6, s6, s2 -80000b0c: b3 8a 2a 01 add s5, s5, s2 -80000b10: 33 0a 2a 01 add s4, s4, s2 -80000b14: b3 89 29 01 add s3, s3, s2 -80000b18: e3 e4 1b ef bltu s7, a7, -280 -80000b1c: 13 08 18 00 addi a6, a6, 1 -80000b20: 03 25 81 00 lw a0, 8(sp) -80000b24: e3 64 a8 ec bltu a6, a0, -312 -80000b28: 83 2d c1 00 lw s11, 12(sp) -80000b2c: 03 2d 01 01 lw s10, 16(sp) -80000b30: 83 2c 41 01 lw s9, 20(sp) -80000b34: 03 2c 81 01 lw s8, 24(sp) -80000b38: 83 2b c1 01 lw s7, 28(sp) -80000b3c: 03 2b 01 02 lw s6, 32(sp) -80000b40: 83 2a 41 02 lw s5, 36(sp) -80000b44: 03 2a 81 02 lw s4, 40(sp) -80000b48: 83 29 c1 02 lw s3, 44(sp) -80000b4c: 03 29 01 03 lw s2, 48(sp) -80000b50: 83 24 41 03 lw s1, 52(sp) -80000b54: 03 24 81 03 lw s0, 56(sp) -80000b58: 83 20 c1 03 lw ra, 60(sp) -80000b5c: 13 01 01 04 addi sp, sp, 64 +800008a4 _pocl_kernel_sfilter_workgroup_fast: +800008a4: 13 01 01 fc addi sp, sp, -64 +800008a8: 23 2e 11 02 sw ra, 60(sp) +800008ac: 23 2c 81 02 sw s0, 56(sp) +800008b0: 23 2a 91 02 sw s1, 52(sp) +800008b4: 23 28 21 03 sw s2, 48(sp) +800008b8: 23 26 31 03 sw s3, 44(sp) +800008bc: 23 24 41 03 sw s4, 40(sp) +800008c0: 23 22 51 03 sw s5, 36(sp) +800008c4: 23 20 61 03 sw s6, 32(sp) +800008c8: 23 2e 71 01 sw s7, 28(sp) +800008cc: 23 2c 81 01 sw s8, 24(sp) +800008d0: 23 2a 91 01 sw s9, 20(sp) +800008d4: 23 28 a1 01 sw s10, 16(sp) +800008d8: 23 26 b1 01 sw s11, 12(sp) +800008dc: 13 08 00 00 mv a6, zero +800008e0: 03 27 85 00 lw a4, 8(a0) +800008e4: 83 24 c5 00 lw s1, 12(a0) +800008e8: 03 23 05 00 lw t1, 0(a0) +800008ec: 83 23 45 00 lw t2, 4(a0) +800008f0: 83 2f 07 00 lw t6, 0(a4) +800008f4: 07 a0 04 00 flw ft0, 0(s1) +800008f8: 03 27 05 01 lw a4, 16(a0) +800008fc: 83 24 45 01 lw s1, 20(a0) +80000900: 03 24 85 01 lw s0, 24(a0) +80000904: 83 28 c5 01 lw a7, 28(a0) +80000908: 87 20 07 00 flw ft1, 0(a4) +8000090c: 07 a1 04 00 flw ft2, 0(s1) +80000910: 87 21 04 00 flw ft3, 0(s0) +80000914: 07 a2 08 00 flw ft4, 0(a7) +80000918: 03 27 05 02 lw a4, 32(a0) +8000091c: 83 24 45 02 lw s1, 36(a0) +80000920: 03 24 85 02 lw s0, 40(a0) +80000924: 03 25 c5 02 lw a0, 44(a0) +80000928: 87 22 07 00 flw ft5, 0(a4) +8000092c: 07 a3 04 00 flw ft6, 0(s1) +80000930: 87 23 04 00 flw ft7, 0(s0) +80000934: 07 25 05 00 flw fa0, 0(a0) +80000938: 03 ac 85 01 lw s8, 24(a1) +8000093c: 83 a8 c5 01 lw a7, 28(a1) +80000940: 03 a5 c5 00 lw a0, 12(a1) +80000944: 03 a7 05 02 lw a4, 32(a1) +80000948: 23 24 e1 00 sw a4, 8(sp) +8000094c: 83 a5 05 01 lw a1, 16(a1) +80000950: 33 06 cc 02 mul a2, s8, a2 +80000954: b3 0c c5 00 add s9, a0, a2 +80000958: 33 85 d8 02 mul a0, a7, a3 +8000095c: 33 8f a5 00 add t5, a1, a0 +80000960: 13 05 1f 00 addi a0, t5, 1 +80000964: 33 05 f5 03 mul a0, a0, t6 +80000968: 33 85 ac 00 add a0, s9, a0 +8000096c: 13 15 25 00 slli a0, a0, 2 +80000970: 33 05 a3 00 add a0, t1, a0 +80000974: 23 22 a1 00 sw a0, 4(sp) +80000978: 13 99 2f 00 slli s2, t6, 2 +8000097c: 33 05 ff 03 mul a0, t5, t6 +80000980: 33 85 ac 00 add a0, s9, a0 +80000984: 13 15 25 00 slli a0, a0, 2 +80000988: b3 83 a3 00 add t2, t2, a0 +8000098c: 33 0e a3 00 add t3, t1, a0 +80000990: 13 05 ff ff addi a0, t5, -1 +80000994: 33 05 f5 03 mul a0, a0, t6 +80000998: 33 85 ac 00 add a0, s9, a0 +8000099c: 13 15 25 00 slli a0, a0, 2 +800009a0: b3 0e a3 00 add t4, t1, a0 +800009a4: 93 0b 00 00 mv s7, zero +800009a8: 93 89 0e 00 mv s3, t4 +800009ac: 13 0a 0e 00 mv s4, t3 +800009b0: 93 8a 03 00 mv s5, t2 +800009b4: 03 2b 41 00 lw s6, 4(sp) +800009b8: 13 07 00 00 mv a4, zero +800009bc: 33 05 7f 01 add a0, t5, s7 +800009c0: 93 05 f5 ff addi a1, a0, -1 +800009c4: 33 8d f5 03 mul s10, a1, t6 +800009c8: b3 8d af 02 mul s11, t6, a0 +800009cc: 13 05 15 00 addi a0, a0, 1 +800009d0: b3 00 f5 03 mul ra, a0, t6 +800009d4: 13 86 09 00 mv a2, s3 +800009d8: 13 05 0a 00 mv a0, s4 +800009dc: 93 85 0a 00 mv a1, s5 +800009e0: 13 04 0b 00 mv s0, s6 +800009e4: b3 84 ec 00 add s1, s9, a4 +800009e8: 93 86 f4 ff addi a3, s1, -1 +800009ec: b3 82 a6 01 add t0, a3, s10 +800009f0: 93 97 22 00 slli a5, t0, 2 +800009f4: b3 07 f3 00 add a5, t1, a5 +800009f8: 87 a5 07 00 flw fa1, 0(a5) +800009fc: d3 75 b0 10 fmul.s fa1, ft0, fa1 +80000a00: 07 26 06 00 flw fa2, 0(a2) +80000a04: 93 87 14 00 addi a5, s1, 1 +80000a08: b3 84 a7 01 add s1, a5, s10 +80000a0c: 93 94 24 00 slli s1, s1, 2 +80000a10: b3 04 93 00 add s1, t1, s1 +80000a14: 87 a6 04 00 flw fa3, 0(s1) +80000a18: b3 84 b6 01 add s1, a3, s11 +80000a1c: 93 94 24 00 slli s1, s1, 2 +80000a20: b3 04 93 00 add s1, t1, s1 +80000a24: 07 a7 04 00 flw fa4, 0(s1) +80000a28: 87 27 05 00 flw fa5, 0(a0) +80000a2c: 53 f6 c0 10 fmul.s fa2, ft1, fa2 +80000a30: d3 76 d1 10 fmul.s fa3, ft2, fa3 +80000a34: 53 f7 e1 10 fmul.s fa4, ft3, fa4 +80000a38: d3 77 f2 10 fmul.s fa5, ft4, fa5 +80000a3c: b3 84 b7 01 add s1, a5, s11 +80000a40: 93 94 24 00 slli s1, s1, 2 +80000a44: b3 04 93 00 add s1, t1, s1 +80000a48: 07 a8 04 00 flw fa6, 0(s1) +80000a4c: b3 86 16 00 add a3, a3, ra +80000a50: 93 96 26 00 slli a3, a3, 2 +80000a54: b3 06 d3 00 add a3, t1, a3 +80000a58: 87 a8 06 00 flw fa7, 0(a3) +80000a5c: 07 2e 04 00 flw ft8, 0(s0) +80000a60: b3 86 17 00 add a3, a5, ra +80000a64: 93 96 26 00 slli a3, a3, 2 +80000a68: b3 06 d3 00 add a3, t1, a3 +80000a6c: 87 ae 06 00 flw ft9, 0(a3) +80000a70: 53 f8 02 11 fmul.s fa6, ft5, fa6 +80000a74: d3 78 13 11 fmul.s fa7, ft6, fa7 +80000a78: 53 fe c3 11 fmul.s ft8, ft7, ft8 +80000a7c: d3 7e d5 11 fmul.s ft9, fa0, ft9 +80000a80: d3 f5 c5 00 fadd.s fa1, fa1, fa2 +80000a84: d3 f5 d5 00 fadd.s fa1, fa1, fa3 +80000a88: d3 f5 e5 00 fadd.s fa1, fa1, fa4 +80000a8c: d3 f5 f5 00 fadd.s fa1, fa1, fa5 +80000a90: d3 f5 05 01 fadd.s fa1, fa1, fa6 +80000a94: d3 f5 15 01 fadd.s fa1, fa1, fa7 +80000a98: d3 f5 c5 01 fadd.s fa1, fa1, ft8 +80000a9c: d3 f5 d5 01 fadd.s fa1, fa1, ft9 +80000aa0: 27 a0 b5 00 fsw fa1, 0(a1) +80000aa4: 13 07 17 00 addi a4, a4, 1 +80000aa8: 13 04 44 00 addi s0, s0, 4 +80000aac: 93 85 45 00 addi a1, a1, 4 +80000ab0: 13 05 45 00 addi a0, a0, 4 +80000ab4: 13 06 46 00 addi a2, a2, 4 +80000ab8: e3 66 87 f3 bltu a4, s8, -212 +80000abc: 93 8b 1b 00 addi s7, s7, 1 +80000ac0: 33 0b 2b 01 add s6, s6, s2 +80000ac4: b3 8a 2a 01 add s5, s5, s2 +80000ac8: 33 0a 2a 01 add s4, s4, s2 +80000acc: b3 89 29 01 add s3, s3, s2 +80000ad0: e3 e4 1b ef bltu s7, a7, -280 +80000ad4: 13 08 18 00 addi a6, a6, 1 +80000ad8: 03 25 81 00 lw a0, 8(sp) +80000adc: e3 64 a8 ec bltu a6, a0, -312 +80000ae0: 83 2d c1 00 lw s11, 12(sp) +80000ae4: 03 2d 01 01 lw s10, 16(sp) +80000ae8: 83 2c 41 01 lw s9, 20(sp) +80000aec: 03 2c 81 01 lw s8, 24(sp) +80000af0: 83 2b c1 01 lw s7, 28(sp) +80000af4: 03 2b 01 02 lw s6, 32(sp) +80000af8: 83 2a 41 02 lw s5, 36(sp) +80000afc: 03 2a 81 02 lw s4, 40(sp) +80000b00: 83 29 c1 02 lw s3, 44(sp) +80000b04: 03 29 01 03 lw s2, 48(sp) +80000b08: 83 24 41 03 lw s1, 52(sp) +80000b0c: 03 24 81 03 lw s0, 56(sp) +80000b10: 83 20 c1 03 lw ra, 60(sp) +80000b14: 13 01 01 04 addi sp, sp, 64 +80000b18: 67 80 00 00 ret + +80000b1c _exit: +80000b1c: 13 05 00 00 mv a0, zero +80000b20: 6b 00 05 00 + +80000b24 vx_set_sp: +80000b24: 73 25 50 02 csrr a0, 37 +80000b28: 6b 00 05 00 +80000b2c: 97 11 00 00 auipc gp, 1 +80000b30: 93 81 c1 cd addi gp, gp, -804 +80000b34: f3 25 20 02 csrr a1, 34 +80000b38: 93 95 a5 00 slli a1, a1, 10 +80000b3c: 73 26 00 02 csrr a2, 32 +80000b40: 13 16 26 00 slli a2, a2, 2 +80000b44: 37 f1 ff 6f lui sp, 458751 +80000b48: 33 01 b1 40 sub sp, sp, a1 +80000b4c: 33 01 c1 00 add sp, sp, a2 +80000b50: f3 26 10 02 csrr a3, 33 +80000b54: 63 86 06 00 beqz a3, 12 +80000b58: 13 05 00 00 mv a0, zero +80000b5c: 6b 00 05 00 + +80000b60 RETURN: 80000b60: 67 80 00 00 ret -80000b64 _exit: -80000b64: 13 05 00 00 mv a0, zero -80000b68: 6b 00 05 00 +80000b64 vx_wspawn: +80000b64: 6b 10 b5 00 +80000b68: 67 80 00 00 ret -80000b6c vx_set_sp: -80000b6c: 73 25 50 02 csrr a0, 37 -80000b70: 6b 00 05 00 -80000b74: 97 11 00 00 auipc gp, 1 -80000b78: 93 81 41 c9 addi gp, gp, -876 -80000b7c: f3 25 20 02 csrr a1, 34 -80000b80: 93 95 a5 00 slli a1, a1, 10 -80000b84: 73 26 00 02 csrr a2, 32 -80000b88: 13 16 26 00 slli a2, a2, 2 -80000b8c: 37 f1 ff 6f lui sp, 458751 -80000b90: 33 01 b1 40 sub sp, sp, a1 -80000b94: 33 01 c1 00 add sp, sp, a2 -80000b98: f3 26 10 02 csrr a3, 33 -80000b9c: 63 86 06 00 beqz a3, 12 -80000ba0: 13 05 00 00 mv a0, zero -80000ba4: 6b 00 05 00 +80000b6c vx_tmc: +80000b6c: 6b 00 05 00 +80000b70: 67 80 00 00 ret -80000ba8 RETURN: +80000b74 vx_barrier: +80000b74: 6b 40 b5 00 +80000b78: 67 80 00 00 ret + +80000b7c vx_split: +80000b7c: 6b 20 05 00 +80000b80: 67 80 00 00 ret + +80000b84 vx_join: +80000b84: 6b 30 00 00 +80000b88: 67 80 00 00 ret + +80000b8c vx_warp_id: +80000b8c: 73 25 10 02 csrr a0, 33 +80000b90: 67 80 00 00 ret + +80000b94 vx_warp_gid: +80000b94: 73 25 30 02 csrr a0, 35 +80000b98: 67 80 00 00 ret + +80000b9c vx_thread_id: +80000b9c: 73 25 00 02 csrr a0, 32 +80000ba0: 67 80 00 00 ret + +80000ba4 vx_thread_gid: +80000ba4: 73 25 20 02 csrr a0, 34 80000ba8: 67 80 00 00 ret -80000bac vx_wspawn: -80000bac: 6b 10 b5 00 +80000bac vx_core_id: +80000bac: 73 25 40 02 csrr a0, 36 80000bb0: 67 80 00 00 ret -80000bb4 vx_tmc: -80000bb4: 6b 00 05 00 +80000bb4 vx_num_threads: +80000bb4: 73 25 50 02 csrr a0, 37 80000bb8: 67 80 00 00 ret -80000bbc vx_barrier: -80000bbc: 6b 40 b5 00 +80000bbc vx_num_warps: +80000bbc: 73 25 60 02 csrr a0, 38 80000bc0: 67 80 00 00 ret -80000bc4 vx_split: -80000bc4: 6b 20 05 00 +80000bc4 vx_num_cores: +80000bc4: 73 25 70 02 csrr a0, 39 80000bc8: 67 80 00 00 ret -80000bcc vx_join: -80000bcc: 6b 30 00 00 +80000bcc vx_num_cycles: +80000bcc: 73 25 00 b0 csrr a0, mcycle 80000bd0: 67 80 00 00 ret -80000bd4 vx_warp_id: -80000bd4: 73 25 10 02 csrr a0, 33 +80000bd4 vx_num_instrs: +80000bd4: 73 25 20 b0 csrr a0, minstret 80000bd8: 67 80 00 00 ret -80000bdc vx_warp_gid: -80000bdc: 73 25 30 02 csrr a0, 35 -80000be0: 67 80 00 00 ret +80000bdc atexit: +80000bdc: 93 05 05 00 mv a1, a0 +80000be0: 93 06 00 00 mv a3, zero +80000be4: 13 06 00 00 mv a2, zero +80000be8: 13 05 00 00 mv a0, zero +80000bec: 6f 00 80 20 j 520 -80000be4 vx_thread_id: -80000be4: 73 25 00 02 csrr a0, 32 -80000be8: 67 80 00 00 ret +80000bf0 exit: +80000bf0: 13 01 01 ff addi sp, sp, -16 +80000bf4: 93 05 00 00 mv a1, zero +80000bf8: 23 24 81 00 sw s0, 8(sp) +80000bfc: 23 26 11 00 sw ra, 12(sp) +80000c00: 13 04 05 00 mv s0, a0 +80000c04: ef 00 80 28 jal 648 +80000c08: 03 a5 81 c2 lw a0, -984(gp) +80000c0c: 83 27 c5 03 lw a5, 60(a0) +80000c10: 63 84 07 00 beqz a5, 8 +80000c14: e7 80 07 00 jalr a5 +80000c18: 13 05 04 00 mv a0, s0 +80000c1c: ef f0 1f f0 jal -256 -80000bec vx_thread_gid: -80000bec: 73 25 20 02 csrr a0, 34 -80000bf0: 67 80 00 00 ret +80000c20 __libc_fini_array: +80000c20: 13 01 01 ff addi sp, sp, -16 +80000c24: 23 24 81 00 sw s0, 8(sp) +80000c28: b7 17 00 80 lui a5, 524289 +80000c2c: 37 14 00 80 lui s0, 524289 +80000c30: 13 04 44 00 addi s0, s0, 4 +80000c34: 93 87 47 00 addi a5, a5, 4 +80000c38: b3 87 87 40 sub a5, a5, s0 +80000c3c: 23 22 91 00 sw s1, 4(sp) +80000c40: 23 26 11 00 sw ra, 12(sp) +80000c44: 93 d4 27 40 srai s1, a5, 2 +80000c48: 63 80 04 02 beqz s1, 32 +80000c4c: 93 87 c7 ff addi a5, a5, -4 +80000c50: 33 84 87 00 add s0, a5, s0 +80000c54: 83 27 04 00 lw a5, 0(s0) +80000c58: 93 84 f4 ff addi s1, s1, -1 +80000c5c: 13 04 c4 ff addi s0, s0, -4 +80000c60: e7 80 07 00 jalr a5 +80000c64: e3 98 04 fe bnez s1, -16 +80000c68: 83 20 c1 00 lw ra, 12(sp) +80000c6c: 03 24 81 00 lw s0, 8(sp) +80000c70: 83 24 41 00 lw s1, 4(sp) +80000c74: 13 01 01 01 addi sp, sp, 16 +80000c78: 67 80 00 00 ret -80000bf4 vx_core_id: -80000bf4: 73 25 40 02 csrr a0, 36 -80000bf8: 67 80 00 00 ret +80000c7c __libc_init_array: +80000c7c: 13 01 01 ff addi sp, sp, -16 +80000c80: 23 24 81 00 sw s0, 8(sp) +80000c84: 23 20 21 01 sw s2, 0(sp) +80000c88: 37 14 00 80 lui s0, 524289 +80000c8c: 37 19 00 80 lui s2, 524289 +80000c90: 93 07 04 00 mv a5, s0 +80000c94: 13 09 09 00 mv s2, s2 +80000c98: 33 09 f9 40 sub s2, s2, a5 +80000c9c: 23 26 11 00 sw ra, 12(sp) +80000ca0: 23 22 91 00 sw s1, 4(sp) +80000ca4: 13 59 29 40 srai s2, s2, 2 +80000ca8: 63 00 09 02 beqz s2, 32 +80000cac: 13 04 04 00 mv s0, s0 +80000cb0: 93 04 00 00 mv s1, zero +80000cb4: 83 27 04 00 lw a5, 0(s0) +80000cb8: 93 84 14 00 addi s1, s1, 1 +80000cbc: 13 04 44 00 addi s0, s0, 4 +80000cc0: e7 80 07 00 jalr a5 +80000cc4: e3 18 99 fe bne s2, s1, -16 +80000cc8: 37 14 00 80 lui s0, 524289 +80000ccc: 37 19 00 80 lui s2, 524289 +80000cd0: 93 07 04 00 mv a5, s0 +80000cd4: 13 09 49 00 addi s2, s2, 4 +80000cd8: 33 09 f9 40 sub s2, s2, a5 +80000cdc: 13 59 29 40 srai s2, s2, 2 +80000ce0: 63 00 09 02 beqz s2, 32 +80000ce4: 13 04 04 00 mv s0, s0 +80000ce8: 93 04 00 00 mv s1, zero +80000cec: 83 27 04 00 lw a5, 0(s0) +80000cf0: 93 84 14 00 addi s1, s1, 1 +80000cf4: 13 04 44 00 addi s0, s0, 4 +80000cf8: e7 80 07 00 jalr a5 +80000cfc: e3 18 99 fe bne s2, s1, -16 +80000d00: 83 20 c1 00 lw ra, 12(sp) +80000d04: 03 24 81 00 lw s0, 8(sp) +80000d08: 83 24 41 00 lw s1, 4(sp) +80000d0c: 03 29 01 00 lw s2, 0(sp) +80000d10: 13 01 01 01 addi sp, sp, 16 +80000d14: 67 80 00 00 ret -80000bfc vx_num_threads: -80000bfc: 73 25 50 02 csrr a0, 37 -80000c00: 67 80 00 00 ret +80000d18 memset: +80000d18: 13 03 f0 00 addi t1, zero, 15 +80000d1c: 13 07 05 00 mv a4, a0 +80000d20: 63 7e c3 02 bgeu t1, a2, 60 +80000d24: 93 77 f7 00 andi a5, a4, 15 +80000d28: 63 90 07 0a bnez a5, 160 +80000d2c: 63 92 05 08 bnez a1, 132 +80000d30: 93 76 06 ff andi a3, a2, -16 +80000d34: 13 76 f6 00 andi a2, a2, 15 +80000d38: b3 86 e6 00 add a3, a3, a4 +80000d3c: 23 20 b7 00 sw a1, 0(a4) +80000d40: 23 22 b7 00 sw a1, 4(a4) +80000d44: 23 24 b7 00 sw a1, 8(a4) +80000d48: 23 26 b7 00 sw a1, 12(a4) +80000d4c: 13 07 07 01 addi a4, a4, 16 +80000d50: e3 66 d7 fe bltu a4, a3, -20 +80000d54: 63 14 06 00 bnez a2, 8 +80000d58: 67 80 00 00 ret +80000d5c: b3 06 c3 40 sub a3, t1, a2 +80000d60: 93 96 26 00 slli a3, a3, 2 +80000d64: 97 02 00 00 auipc t0, 0 +80000d68: b3 86 56 00 add a3, a3, t0 +80000d6c: 67 80 c6 00 jr 12(a3) +80000d70: 23 07 b7 00 sb a1, 14(a4) +80000d74: a3 06 b7 00 sb a1, 13(a4) +80000d78: 23 06 b7 00 sb a1, 12(a4) +80000d7c: a3 05 b7 00 sb a1, 11(a4) +80000d80: 23 05 b7 00 sb a1, 10(a4) +80000d84: a3 04 b7 00 sb a1, 9(a4) +80000d88: 23 04 b7 00 sb a1, 8(a4) +80000d8c: a3 03 b7 00 sb a1, 7(a4) +80000d90: 23 03 b7 00 sb a1, 6(a4) +80000d94: a3 02 b7 00 sb a1, 5(a4) +80000d98: 23 02 b7 00 sb a1, 4(a4) +80000d9c: a3 01 b7 00 sb a1, 3(a4) +80000da0: 23 01 b7 00 sb a1, 2(a4) +80000da4: a3 00 b7 00 sb a1, 1(a4) +80000da8: 23 00 b7 00 sb a1, 0(a4) +80000dac: 67 80 00 00 ret +80000db0: 93 f5 f5 0f andi a1, a1, 255 +80000db4: 93 96 85 00 slli a3, a1, 8 +80000db8: b3 e5 d5 00 or a1, a1, a3 +80000dbc: 93 96 05 01 slli a3, a1, 16 +80000dc0: b3 e5 d5 00 or a1, a1, a3 +80000dc4: 6f f0 df f6 j -148 +80000dc8: 93 96 27 00 slli a3, a5, 2 +80000dcc: 97 02 00 00 auipc t0, 0 +80000dd0: b3 86 56 00 add a3, a3, t0 +80000dd4: 93 82 00 00 mv t0, ra +80000dd8: e7 80 06 fa jalr -96(a3) +80000ddc: 93 80 02 00 mv ra, t0 +80000de0: 93 87 07 ff addi a5, a5, -16 +80000de4: 33 07 f7 40 sub a4, a4, a5 +80000de8: 33 06 f6 00 add a2, a2, a5 +80000dec: e3 78 c3 f6 bgeu t1, a2, -144 +80000df0: 6f f0 df f3 j -196 -80000c04 vx_num_warps: -80000c04: 73 25 60 02 csrr a0, 38 -80000c08: 67 80 00 00 ret +80000df4 __register_exitproc: +80000df4: 03 a7 81 c2 lw a4, -984(gp) +80000df8: 83 27 87 14 lw a5, 328(a4) +80000dfc: 63 8c 07 04 beqz a5, 88 +80000e00: 03 a7 47 00 lw a4, 4(a5) +80000e04: 13 08 f0 01 addi a6, zero, 31 +80000e08: 63 4e e8 06 blt a6, a4, 124 +80000e0c: 13 18 27 00 slli a6, a4, 2 +80000e10: 63 06 05 02 beqz a0, 44 +80000e14: 33 83 07 01 add t1, a5, a6 +80000e18: 23 24 c3 08 sw a2, 136(t1) +80000e1c: 83 a8 87 18 lw a7, 392(a5) +80000e20: 13 06 10 00 addi a2, zero, 1 +80000e24: 33 16 e6 00 sll a2, a2, a4 +80000e28: b3 e8 c8 00 or a7, a7, a2 +80000e2c: 23 a4 17 19 sw a7, 392(a5) +80000e30: 23 24 d3 10 sw a3, 264(t1) +80000e34: 93 06 20 00 addi a3, zero, 2 +80000e38: 63 04 d5 02 beq a0, a3, 40 +80000e3c: 13 07 17 00 addi a4, a4, 1 +80000e40: 23 a2 e7 00 sw a4, 4(a5) +80000e44: b3 87 07 01 add a5, a5, a6 +80000e48: 23 a4 b7 00 sw a1, 8(a5) +80000e4c: 13 05 00 00 mv a0, zero +80000e50: 67 80 00 00 ret +80000e54: 93 07 c7 14 addi a5, a4, 332 +80000e58: 23 24 f7 14 sw a5, 328(a4) +80000e5c: 6f f0 5f fa j -92 +80000e60: 83 a6 c7 18 lw a3, 396(a5) +80000e64: 13 07 17 00 addi a4, a4, 1 +80000e68: 23 a2 e7 00 sw a4, 4(a5) +80000e6c: 33 e6 c6 00 or a2, a3, a2 +80000e70: 23 a6 c7 18 sw a2, 396(a5) +80000e74: b3 87 07 01 add a5, a5, a6 +80000e78: 23 a4 b7 00 sw a1, 8(a5) +80000e7c: 13 05 00 00 mv a0, zero +80000e80: 67 80 00 00 ret +80000e84: 13 05 f0 ff addi a0, zero, -1 +80000e88: 67 80 00 00 ret -80000c0c vx_num_cores: -80000c0c: 73 25 70 02 csrr a0, 39 -80000c10: 67 80 00 00 ret - -80000c14 vx_num_cycles: -80000c14: 73 25 00 c0 rdcycle a0 -80000c18: 67 80 00 00 ret - -80000c1c vx_num_instrs: -80000c1c: 73 25 20 c0 rdinstret a0 -80000c20: 67 80 00 00 ret - -80000c24 atexit: -80000c24: 93 05 05 00 mv a1, a0 -80000c28: 93 06 00 00 mv a3, zero -80000c2c: 13 06 00 00 mv a2, zero -80000c30: 13 05 00 00 mv a0, zero -80000c34: 6f 00 80 20 j 520 - -80000c38 exit: -80000c38: 13 01 01 ff addi sp, sp, -16 -80000c3c: 93 05 00 00 mv a1, zero -80000c40: 23 24 81 00 sw s0, 8(sp) -80000c44: 23 26 11 00 sw ra, 12(sp) -80000c48: 13 04 05 00 mv s0, a0 -80000c4c: ef 00 80 28 jal 648 -80000c50: 03 a5 81 c2 lw a0, -984(gp) -80000c54: 83 27 c5 03 lw a5, 60(a0) -80000c58: 63 84 07 00 beqz a5, 8 -80000c5c: e7 80 07 00 jalr a5 -80000c60: 13 05 04 00 mv a0, s0 -80000c64: ef f0 1f f0 jal -256 - -80000c68 __libc_fini_array: -80000c68: 13 01 01 ff addi sp, sp, -16 -80000c6c: 23 24 81 00 sw s0, 8(sp) -80000c70: b7 17 00 80 lui a5, 524289 -80000c74: 37 14 00 80 lui s0, 524289 -80000c78: 13 04 44 00 addi s0, s0, 4 -80000c7c: 93 87 47 00 addi a5, a5, 4 -80000c80: b3 87 87 40 sub a5, a5, s0 -80000c84: 23 22 91 00 sw s1, 4(sp) -80000c88: 23 26 11 00 sw ra, 12(sp) -80000c8c: 93 d4 27 40 srai s1, a5, 2 -80000c90: 63 80 04 02 beqz s1, 32 -80000c94: 93 87 c7 ff addi a5, a5, -4 -80000c98: 33 84 87 00 add s0, a5, s0 -80000c9c: 83 27 04 00 lw a5, 0(s0) -80000ca0: 93 84 f4 ff addi s1, s1, -1 -80000ca4: 13 04 c4 ff addi s0, s0, -4 -80000ca8: e7 80 07 00 jalr a5 -80000cac: e3 98 04 fe bnez s1, -16 -80000cb0: 83 20 c1 00 lw ra, 12(sp) -80000cb4: 03 24 81 00 lw s0, 8(sp) -80000cb8: 83 24 41 00 lw s1, 4(sp) -80000cbc: 13 01 01 01 addi sp, sp, 16 -80000cc0: 67 80 00 00 ret - -80000cc4 __libc_init_array: -80000cc4: 13 01 01 ff addi sp, sp, -16 -80000cc8: 23 24 81 00 sw s0, 8(sp) -80000ccc: 23 20 21 01 sw s2, 0(sp) -80000cd0: 37 14 00 80 lui s0, 524289 -80000cd4: 37 19 00 80 lui s2, 524289 -80000cd8: 93 07 04 00 mv a5, s0 -80000cdc: 13 09 09 00 mv s2, s2 -80000ce0: 33 09 f9 40 sub s2, s2, a5 -80000ce4: 23 26 11 00 sw ra, 12(sp) -80000ce8: 23 22 91 00 sw s1, 4(sp) -80000cec: 13 59 29 40 srai s2, s2, 2 -80000cf0: 63 00 09 02 beqz s2, 32 -80000cf4: 13 04 04 00 mv s0, s0 -80000cf8: 93 04 00 00 mv s1, zero -80000cfc: 83 27 04 00 lw a5, 0(s0) -80000d00: 93 84 14 00 addi s1, s1, 1 -80000d04: 13 04 44 00 addi s0, s0, 4 -80000d08: e7 80 07 00 jalr a5 -80000d0c: e3 18 99 fe bne s2, s1, -16 -80000d10: 37 14 00 80 lui s0, 524289 -80000d14: 37 19 00 80 lui s2, 524289 -80000d18: 93 07 04 00 mv a5, s0 -80000d1c: 13 09 49 00 addi s2, s2, 4 -80000d20: 33 09 f9 40 sub s2, s2, a5 -80000d24: 13 59 29 40 srai s2, s2, 2 -80000d28: 63 00 09 02 beqz s2, 32 -80000d2c: 13 04 04 00 mv s0, s0 -80000d30: 93 04 00 00 mv s1, zero -80000d34: 83 27 04 00 lw a5, 0(s0) -80000d38: 93 84 14 00 addi s1, s1, 1 -80000d3c: 13 04 44 00 addi s0, s0, 4 -80000d40: e7 80 07 00 jalr a5 -80000d44: e3 18 99 fe bne s2, s1, -16 -80000d48: 83 20 c1 00 lw ra, 12(sp) -80000d4c: 03 24 81 00 lw s0, 8(sp) -80000d50: 83 24 41 00 lw s1, 4(sp) -80000d54: 03 29 01 00 lw s2, 0(sp) -80000d58: 13 01 01 01 addi sp, sp, 16 -80000d5c: 67 80 00 00 ret - -80000d60 memset: -80000d60: 13 03 f0 00 addi t1, zero, 15 -80000d64: 13 07 05 00 mv a4, a0 -80000d68: 63 7e c3 02 bgeu t1, a2, 60 -80000d6c: 93 77 f7 00 andi a5, a4, 15 -80000d70: 63 90 07 0a bnez a5, 160 -80000d74: 63 92 05 08 bnez a1, 132 -80000d78: 93 76 06 ff andi a3, a2, -16 -80000d7c: 13 76 f6 00 andi a2, a2, 15 -80000d80: b3 86 e6 00 add a3, a3, a4 -80000d84: 23 20 b7 00 sw a1, 0(a4) -80000d88: 23 22 b7 00 sw a1, 4(a4) -80000d8c: 23 24 b7 00 sw a1, 8(a4) -80000d90: 23 26 b7 00 sw a1, 12(a4) -80000d94: 13 07 07 01 addi a4, a4, 16 -80000d98: e3 66 d7 fe bltu a4, a3, -20 -80000d9c: 63 14 06 00 bnez a2, 8 -80000da0: 67 80 00 00 ret -80000da4: b3 06 c3 40 sub a3, t1, a2 -80000da8: 93 96 26 00 slli a3, a3, 2 -80000dac: 97 02 00 00 auipc t0, 0 -80000db0: b3 86 56 00 add a3, a3, t0 -80000db4: 67 80 c6 00 jr 12(a3) -80000db8: 23 07 b7 00 sb a1, 14(a4) -80000dbc: a3 06 b7 00 sb a1, 13(a4) -80000dc0: 23 06 b7 00 sb a1, 12(a4) -80000dc4: a3 05 b7 00 sb a1, 11(a4) -80000dc8: 23 05 b7 00 sb a1, 10(a4) -80000dcc: a3 04 b7 00 sb a1, 9(a4) -80000dd0: 23 04 b7 00 sb a1, 8(a4) -80000dd4: a3 03 b7 00 sb a1, 7(a4) -80000dd8: 23 03 b7 00 sb a1, 6(a4) -80000ddc: a3 02 b7 00 sb a1, 5(a4) -80000de0: 23 02 b7 00 sb a1, 4(a4) -80000de4: a3 01 b7 00 sb a1, 3(a4) -80000de8: 23 01 b7 00 sb a1, 2(a4) -80000dec: a3 00 b7 00 sb a1, 1(a4) -80000df0: 23 00 b7 00 sb a1, 0(a4) -80000df4: 67 80 00 00 ret -80000df8: 93 f5 f5 0f andi a1, a1, 255 -80000dfc: 93 96 85 00 slli a3, a1, 8 -80000e00: b3 e5 d5 00 or a1, a1, a3 -80000e04: 93 96 05 01 slli a3, a1, 16 -80000e08: b3 e5 d5 00 or a1, a1, a3 -80000e0c: 6f f0 df f6 j -148 -80000e10: 93 96 27 00 slli a3, a5, 2 -80000e14: 97 02 00 00 auipc t0, 0 -80000e18: b3 86 56 00 add a3, a3, t0 -80000e1c: 93 82 00 00 mv t0, ra -80000e20: e7 80 06 fa jalr -96(a3) -80000e24: 93 80 02 00 mv ra, t0 -80000e28: 93 87 07 ff addi a5, a5, -16 -80000e2c: 33 07 f7 40 sub a4, a4, a5 -80000e30: 33 06 f6 00 add a2, a2, a5 -80000e34: e3 78 c3 f6 bgeu t1, a2, -144 -80000e38: 6f f0 df f3 j -196 - -80000e3c __register_exitproc: -80000e3c: 03 a7 81 c2 lw a4, -984(gp) -80000e40: 83 27 87 14 lw a5, 328(a4) -80000e44: 63 8c 07 04 beqz a5, 88 -80000e48: 03 a7 47 00 lw a4, 4(a5) -80000e4c: 13 08 f0 01 addi a6, zero, 31 -80000e50: 63 4e e8 06 blt a6, a4, 124 -80000e54: 13 18 27 00 slli a6, a4, 2 -80000e58: 63 06 05 02 beqz a0, 44 -80000e5c: 33 83 07 01 add t1, a5, a6 -80000e60: 23 24 c3 08 sw a2, 136(t1) -80000e64: 83 a8 87 18 lw a7, 392(a5) -80000e68: 13 06 10 00 addi a2, zero, 1 -80000e6c: 33 16 e6 00 sll a2, a2, a4 -80000e70: b3 e8 c8 00 or a7, a7, a2 -80000e74: 23 a4 17 19 sw a7, 392(a5) -80000e78: 23 24 d3 10 sw a3, 264(t1) -80000e7c: 93 06 20 00 addi a3, zero, 2 -80000e80: 63 04 d5 02 beq a0, a3, 40 -80000e84: 13 07 17 00 addi a4, a4, 1 -80000e88: 23 a2 e7 00 sw a4, 4(a5) -80000e8c: b3 87 07 01 add a5, a5, a6 -80000e90: 23 a4 b7 00 sw a1, 8(a5) -80000e94: 13 05 00 00 mv a0, zero -80000e98: 67 80 00 00 ret -80000e9c: 93 07 c7 14 addi a5, a4, 332 -80000ea0: 23 24 f7 14 sw a5, 328(a4) -80000ea4: 6f f0 5f fa j -92 -80000ea8: 83 a6 c7 18 lw a3, 396(a5) -80000eac: 13 07 17 00 addi a4, a4, 1 -80000eb0: 23 a2 e7 00 sw a4, 4(a5) -80000eb4: 33 e6 c6 00 or a2, a3, a2 -80000eb8: 23 a6 c7 18 sw a2, 396(a5) -80000ebc: b3 87 07 01 add a5, a5, a6 -80000ec0: 23 a4 b7 00 sw a1, 8(a5) -80000ec4: 13 05 00 00 mv a0, zero -80000ec8: 67 80 00 00 ret -80000ecc: 13 05 f0 ff addi a0, zero, -1 -80000ed0: 67 80 00 00 ret - -80000ed4 __call_exitprocs: -80000ed4: 13 01 01 fd addi sp, sp, -48 -80000ed8: 23 2c 41 01 sw s4, 24(sp) -80000edc: 03 aa 81 c2 lw s4, -984(gp) -80000ee0: 23 20 21 03 sw s2, 32(sp) -80000ee4: 23 26 11 02 sw ra, 44(sp) -80000ee8: 03 29 8a 14 lw s2, 328(s4) -80000eec: 23 24 81 02 sw s0, 40(sp) -80000ef0: 23 22 91 02 sw s1, 36(sp) -80000ef4: 23 2e 31 01 sw s3, 28(sp) -80000ef8: 23 2a 51 01 sw s5, 20(sp) -80000efc: 23 28 61 01 sw s6, 16(sp) -80000f00: 23 26 71 01 sw s7, 12(sp) -80000f04: 23 24 81 01 sw s8, 8(sp) -80000f08: 63 00 09 04 beqz s2, 64 -80000f0c: 13 0b 05 00 mv s6, a0 -80000f10: 93 8b 05 00 mv s7, a1 -80000f14: 93 0a 10 00 addi s5, zero, 1 -80000f18: 93 09 f0 ff addi s3, zero, -1 -80000f1c: 83 24 49 00 lw s1, 4(s2) -80000f20: 13 84 f4 ff addi s0, s1, -1 -80000f24: 63 42 04 02 bltz s0, 36 -80000f28: 93 94 24 00 slli s1, s1, 2 -80000f2c: b3 04 99 00 add s1, s2, s1 -80000f30: 63 84 0b 04 beqz s7, 72 -80000f34: 83 a7 44 10 lw a5, 260(s1) -80000f38: 63 80 77 05 beq a5, s7, 64 -80000f3c: 13 04 f4 ff addi s0, s0, -1 -80000f40: 93 84 c4 ff addi s1, s1, -4 -80000f44: e3 16 34 ff bne s0, s3, -20 -80000f48: 83 20 c1 02 lw ra, 44(sp) -80000f4c: 03 24 81 02 lw s0, 40(sp) -80000f50: 83 24 41 02 lw s1, 36(sp) -80000f54: 03 29 01 02 lw s2, 32(sp) -80000f58: 83 29 c1 01 lw s3, 28(sp) -80000f5c: 03 2a 81 01 lw s4, 24(sp) -80000f60: 83 2a 41 01 lw s5, 20(sp) -80000f64: 03 2b 01 01 lw s6, 16(sp) -80000f68: 83 2b c1 00 lw s7, 12(sp) -80000f6c: 03 2c 81 00 lw s8, 8(sp) -80000f70: 13 01 01 03 addi sp, sp, 48 -80000f74: 67 80 00 00 ret -80000f78: 83 27 49 00 lw a5, 4(s2) -80000f7c: 83 a6 44 00 lw a3, 4(s1) -80000f80: 93 87 f7 ff addi a5, a5, -1 -80000f84: 63 8e 87 04 beq a5, s0, 92 -80000f88: 23 a2 04 00 sw zero, 4(s1) -80000f8c: e3 88 06 fa beqz a3, -80 -80000f90: 83 27 89 18 lw a5, 392(s2) -80000f94: 33 97 8a 00 sll a4, s5, s0 -80000f98: 03 2c 49 00 lw s8, 4(s2) -80000f9c: b3 77 f7 00 and a5, a4, a5 -80000fa0: 63 92 07 02 bnez a5, 36 +80000e8c __call_exitprocs: +80000e8c: 13 01 01 fd addi sp, sp, -48 +80000e90: 23 2c 41 01 sw s4, 24(sp) +80000e94: 03 aa 81 c2 lw s4, -984(gp) +80000e98: 23 20 21 03 sw s2, 32(sp) +80000e9c: 23 26 11 02 sw ra, 44(sp) +80000ea0: 03 29 8a 14 lw s2, 328(s4) +80000ea4: 23 24 81 02 sw s0, 40(sp) +80000ea8: 23 22 91 02 sw s1, 36(sp) +80000eac: 23 2e 31 01 sw s3, 28(sp) +80000eb0: 23 2a 51 01 sw s5, 20(sp) +80000eb4: 23 28 61 01 sw s6, 16(sp) +80000eb8: 23 26 71 01 sw s7, 12(sp) +80000ebc: 23 24 81 01 sw s8, 8(sp) +80000ec0: 63 00 09 04 beqz s2, 64 +80000ec4: 13 0b 05 00 mv s6, a0 +80000ec8: 93 8b 05 00 mv s7, a1 +80000ecc: 93 0a 10 00 addi s5, zero, 1 +80000ed0: 93 09 f0 ff addi s3, zero, -1 +80000ed4: 83 24 49 00 lw s1, 4(s2) +80000ed8: 13 84 f4 ff addi s0, s1, -1 +80000edc: 63 42 04 02 bltz s0, 36 +80000ee0: 93 94 24 00 slli s1, s1, 2 +80000ee4: b3 04 99 00 add s1, s2, s1 +80000ee8: 63 84 0b 04 beqz s7, 72 +80000eec: 83 a7 44 10 lw a5, 260(s1) +80000ef0: 63 80 77 05 beq a5, s7, 64 +80000ef4: 13 04 f4 ff addi s0, s0, -1 +80000ef8: 93 84 c4 ff addi s1, s1, -4 +80000efc: e3 16 34 ff bne s0, s3, -20 +80000f00: 83 20 c1 02 lw ra, 44(sp) +80000f04: 03 24 81 02 lw s0, 40(sp) +80000f08: 83 24 41 02 lw s1, 36(sp) +80000f0c: 03 29 01 02 lw s2, 32(sp) +80000f10: 83 29 c1 01 lw s3, 28(sp) +80000f14: 03 2a 81 01 lw s4, 24(sp) +80000f18: 83 2a 41 01 lw s5, 20(sp) +80000f1c: 03 2b 01 01 lw s6, 16(sp) +80000f20: 83 2b c1 00 lw s7, 12(sp) +80000f24: 03 2c 81 00 lw s8, 8(sp) +80000f28: 13 01 01 03 addi sp, sp, 48 +80000f2c: 67 80 00 00 ret +80000f30: 83 27 49 00 lw a5, 4(s2) +80000f34: 83 a6 44 00 lw a3, 4(s1) +80000f38: 93 87 f7 ff addi a5, a5, -1 +80000f3c: 63 8e 87 04 beq a5, s0, 92 +80000f40: 23 a2 04 00 sw zero, 4(s1) +80000f44: e3 88 06 fa beqz a3, -80 +80000f48: 83 27 89 18 lw a5, 392(s2) +80000f4c: 33 97 8a 00 sll a4, s5, s0 +80000f50: 03 2c 49 00 lw s8, 4(s2) +80000f54: b3 77 f7 00 and a5, a4, a5 +80000f58: 63 92 07 02 bnez a5, 36 +80000f5c: e7 80 06 00 jalr a3 +80000f60: 03 27 49 00 lw a4, 4(s2) +80000f64: 83 27 8a 14 lw a5, 328(s4) +80000f68: 63 14 87 01 bne a4, s8, 8 +80000f6c: e3 04 f9 f8 beq s2, a5, -120 +80000f70: e3 88 07 f8 beqz a5, -112 +80000f74: 13 89 07 00 mv s2, a5 +80000f78: 6f f0 df f5 j -164 +80000f7c: 83 27 c9 18 lw a5, 396(s2) +80000f80: 83 a5 44 08 lw a1, 132(s1) +80000f84: 33 77 f7 00 and a4, a4, a5 +80000f88: 63 1c 07 00 bnez a4, 24 +80000f8c: 13 05 0b 00 mv a0, s6 +80000f90: e7 80 06 00 jalr a3 +80000f94: 6f f0 df fc j -52 +80000f98: 23 22 89 00 sw s0, 4(s2) +80000f9c: 6f f0 9f fa j -88 +80000fa0: 13 85 05 00 mv a0, a1 80000fa4: e7 80 06 00 jalr a3 -80000fa8: 03 27 49 00 lw a4, 4(s2) -80000fac: 83 27 8a 14 lw a5, 328(s4) -80000fb0: 63 14 87 01 bne a4, s8, 8 -80000fb4: e3 04 f9 f8 beq s2, a5, -120 -80000fb8: e3 88 07 f8 beqz a5, -112 -80000fbc: 13 89 07 00 mv s2, a5 -80000fc0: 6f f0 df f5 j -164 -80000fc4: 83 27 c9 18 lw a5, 396(s2) -80000fc8: 83 a5 44 08 lw a1, 132(s1) -80000fcc: 33 77 f7 00 and a4, a4, a5 -80000fd0: 63 1c 07 00 bnez a4, 24 -80000fd4: 13 05 0b 00 mv a0, s6 -80000fd8: e7 80 06 00 jalr a3 -80000fdc: 6f f0 df fc j -52 -80000fe0: 23 22 89 00 sw s0, 4(s2) -80000fe4: 6f f0 9f fa j -88 -80000fe8: 13 85 05 00 mv a0, a1 -80000fec: e7 80 06 00 jalr a3 -80000ff0: 6f f0 9f fb j -72 +80000fa8: 6f f0 9f fb j -72 Disassembly of section .init_array: @@ -1267,7 +1249,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: a8 0b + a4: 60 0b a6: 00 80 a8: 00 00 aa: 00 00 @@ -1385,7 +1367,7 @@ Disassembly of section .symtab: 1de: f1 ff 1e0: 15 01 1e2: 00 00 - 1e4: f4 0b + 1e4: ac 0b 1e6: 00 80 1e8: 00 00 1ea: 00 00 @@ -1393,7 +1375,7 @@ Disassembly of section .symtab: 1ee: 02 00 1f0: 20 01 1f2: 00 00 - 1f4: ac 0b + 1f4: 64 0b 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1403,7 +1385,7 @@ Disassembly of section .symtab: 202: 00 00 204: 60 00 206: 00 80 - 208: 3c 01 + 208: 48 01 20a: 00 00 20c: 12 00 20e: 02 00 @@ -1411,13 +1393,13 @@ Disassembly of section .symtab: 212: 00 00 214: 34 14 216: 00 80 - 218: 04 00 + 218: 20 00 21a: 00 00 21c: 11 00 21e: 06 00 220: 4e 01 222: 00 00 - 224: b4 0b + 224: 6c 0b 226: 00 80 228: 00 00 22a: 00 00 @@ -1433,7 +1415,7 @@ Disassembly of section .symtab: 23e: 05 00 240: 65 01 242: 00 00 - 244: cc 0b + 244: 84 0b 246: 00 80 248: 00 00 24a: 00 00 @@ -1441,7 +1423,7 @@ Disassembly of section .symtab: 24e: 02 00 250: 6d 01 252: 00 00 - 254: 04 0c + 254: bc 0b 256: 00 80 258: 00 00 25a: 00 00 @@ -1451,12 +1433,12 @@ Disassembly of section .symtab: 262: 00 00 264: 3c 02 266: 00 80 - 268: cc 01 + 268: 84 01 26a: 00 00 26c: 12 00 26e: 02 00 270: 87 01 00 00 - 274: c4 0b + 274: 7c 0b 276: 00 80 278: 00 00 27a: 00 00 @@ -1471,7 +1453,7 @@ Disassembly of section .symtab: 28c: 11 00 28e: 05 00 290: a3 01 00 00 sb zero, 3(zero) - 294: c4 0c + 294: 7c 0c 296: 00 80 298: 9c 00 29a: 00 00 @@ -1479,7 +1461,7 @@ Disassembly of section .symtab: 29e: 02 00 2a0: b5 01 2a2: 00 00 - 2a4: fc 0b + 2a4: b4 0b 2a6: 00 80 2a8: 00 00 2aa: 00 00 @@ -1487,14 +1469,14 @@ Disassembly of section .symtab: 2ae: 02 00 2b0: c4 01 2b2: 00 00 - 2b4: d4 0b + 2b4: 8c 0b 2b6: 00 80 2b8: 00 00 2ba: 00 00 2bc: 12 00 2be: 02 00 2c0: cf 01 00 00 fnmadd.s ft3, ft0, ft0, ft0, rne - 2c4: e4 0b + 2c4: 9c 0b 2c6: 00 80 2c8: 00 00 2ca: 00 00 @@ -1502,7 +1484,7 @@ Disassembly of section .symtab: 2ce: 02 00 2d0: dc 01 2d2: 00 00 - 2d4: 68 0c + 2d4: 20 0c 2d6: 00 80 2d8: 5c 00 2da: 00 00 @@ -1510,7 +1492,7 @@ Disassembly of section .symtab: 2de: 02 00 2e0: ee 01 2e2: 00 00 - 2e4: 6c 0b + 2e4: 24 0b 2e6: 00 80 2e8: 00 00 2ea: 00 00 @@ -1518,14 +1500,14 @@ Disassembly of section .symtab: 2ee: 02 00 2f0: f8 01 2f2: 00 00 - 2f4: bc 0b + 2f4: 74 0b 2f6: 00 80 2f8: 00 00 2fa: 00 00 2fc: 12 00 2fe: 02 00 300: 03 02 00 00 lb tp, 0(zero) - 304: d4 0e + 304: 8c 0e 306: 00 80 308: 20 01 30a: 00 00 @@ -1541,7 +1523,7 @@ Disassembly of section .symtab: 31e: 01 00 320: 14 02 322: 00 00 - 324: 3c 0e + 324: f4 0d 326: 00 80 328: 98 00 32a: 00 00 @@ -1549,7 +1531,7 @@ Disassembly of section .symtab: 32e: 02 00 330: 28 02 332: 00 00 - 334: 38 04 + 334: f0 03 336: 00 80 338: 34 02 33a: 00 00 @@ -1557,7 +1539,7 @@ Disassembly of section .symtab: 33e: 02 00 340: 3d 02 342: 00 00 - 344: 38 14 + 344: 54 14 346: 00 80 348: 00 00 34a: 00 00 @@ -1573,7 +1555,7 @@ Disassembly of section .symtab: 35e: 06 00 360: 55 02 362: 00 00 - 364: 60 0d + 364: 18 0d 366: 00 80 368: dc 00 36a: 00 00 @@ -1581,7 +1563,7 @@ Disassembly of section .symtab: 36e: 02 00 370: 5c 02 372: 00 00 - 374: 08 04 + 374: c0 03 376: 00 80 378: 30 00 37a: 00 00 @@ -1589,7 +1571,7 @@ Disassembly of section .symtab: 37e: 02 00 380: 61 02 382: 00 00 - 384: 6c 06 + 384: 24 06 386: 00 80 388: 80 02 38a: 00 00 @@ -1597,7 +1579,7 @@ Disassembly of section .symtab: 38e: 02 00 390: 80 02 392: 00 00 - 394: 14 0c + 394: cc 0b 396: 00 80 398: 00 00 39a: 00 00 @@ -1605,7 +1587,7 @@ Disassembly of section .symtab: 39e: 02 00 3a0: 8e 02 3a2: 00 00 - 3a4: 24 0c + 3a4: dc 0b 3a6: 00 80 3a8: 14 00 3aa: 00 00 @@ -1613,14 +1595,14 @@ Disassembly of section .symtab: 3ae: 02 00 3b0: 95 02 3b2: 00 00 - 3b4: ec 0b + 3b4: a4 0b 3b6: 00 80 3b8: 00 00 3ba: 00 00 3bc: 12 00 3be: 02 00 3c0: a3 02 00 00 sb zero, 5(zero) - 3c4: 0c 0c + 3c4: c4 0b 3c6: 00 80 3c8: 00 00 3ca: 00 00 @@ -1628,7 +1610,7 @@ Disassembly of section .symtab: 3ce: 02 00 3d0: b0 02 3d2: 00 00 - 3d4: dc 0b + 3d4: 94 0b 3d6: 00 80 3d8: 00 00 3da: 00 00 @@ -1636,9 +1618,9 @@ Disassembly of section .symtab: 3de: 02 00 3e0: bc 02 3e2: 00 00 - 3e4: 9c 01 + 3e4: a8 01 3e6: 00 80 - 3e8: a0 00 + 3e8: 94 00 3ea: 00 00 3ec: 12 00 3ee: 02 00 @@ -1660,7 +1642,7 @@ Disassembly of section .symtab: 40e: 05 00 410: 9d 00 412: 00 00 - 414: 38 14 + 414: 54 14 416: 00 80 418: 00 00 41a: 00 00 @@ -1668,14 +1650,14 @@ Disassembly of section .symtab: 41e: 06 00 420: ec 02 422: 00 00 - 424: 38 0c + 424: f0 0b 426: 00 80 428: 30 00 42a: 00 00 42c: 12 00 42e: 02 00 430: eb 02 00 00 - 434: 64 0b + 434: 1c 0b 436: 00 80 438: 00 00 43a: 00 00 @@ -1683,7 +1665,7 @@ Disassembly of section .symtab: 43e: 02 00 440: f1 02 442: 00 00 - 444: ec 08 + 444: a4 08 446: 00 80 448: 78 02 44a: 00 00 @@ -1691,7 +1673,7 @@ Disassembly of section .symtab: 44e: 02 00 450: 15 03 452: 00 00 - 454: 1c 0c + 454: d4 0b 456: 00 80 458: 00 00 45a: 00 00 @@ -1731,13 +1713,14 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 31 32 - 48: 2d 65 - 4a: 61 2d - 4c: 35 65 - 4e: 2d 39 - 50: 66 2d - 52: 63 61 2e 63 bltu t3, s2, 1570 + 46: 39 35 + 48: 2d 62 + 4a: 35 2d + 4c: 38 38 + 4e: 2d 32 + 50: 32 2d + 52: 61 34 + 54: 2e 63 56: 00 70 58: 61 72 5a: 61 6c diff --git a/benchmarks/opencl/sgemm/Makefile b/benchmarks/opencl/sgemm/Makefile index 01aa86e0..7ebef20d 100644 --- a/benchmarks/opencl/sgemm/Makefile +++ b/benchmarks/opencl/sgemm/Makefile @@ -54,7 +54,7 @@ clean: rm -rf $(PROJECT) *.o .depend clean-all: clean - rm *.pocl *.dump + rm -rf *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/sgemm/kernel.pocl b/benchmarks/opencl/sgemm/kernel.pocl index 6d7544ff..6d5e0466 100644 Binary files a/benchmarks/opencl/sgemm/kernel.pocl and b/benchmarks/opencl/sgemm/kernel.pocl differ diff --git a/benchmarks/opencl/sgemm/sgemm.dump b/benchmarks/opencl/sgemm/sgemm.dump index 24545382..a818a667 100644 --- a/benchmarks/opencl/sgemm/sgemm.dump +++ b/benchmarks/opencl/sgemm/sgemm.dump @@ -1,28 +1,28 @@ -/tmp/pocl_vortex_kernel-3e-a9-1d-05-03.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-7d-19-25-85-6c.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 15 00 00 auipc a1, 1 -80000004: 93 85 45 87 addi a1, a1, -1932 +80000004: 93 85 c5 82 addi a1, a1, -2004 80000008: 73 25 60 02 csrr a0, 38 8000000c: 6b 10 b5 00 -80000010: ef 00 50 06 jal 2148 +80000010: ef 00 d0 01 jal 2076 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 13 85 81 c3 addi a0, gp, -968 -80000020: 13 86 c1 c3 addi a2, gp, -964 +80000020: 13 86 81 c5 addi a2, gp, -936 80000024: 33 06 a6 40 sub a2, a2, a0 80000028: 93 05 00 00 mv a1, zero -8000002c: ef 00 d0 23 jal 2620 +8000002c: ef 00 50 1f jal 2548 80000030: 17 15 00 00 auipc a0, 1 -80000034: 13 05 05 94 addi a0, a0, -1728 -80000038: ef 00 50 0f jal 2292 -8000003c: ef 00 10 19 jal 2448 -80000040: ef 00 80 3c jal 968 -80000044: 6f 00 d0 0f j 2300 +80000034: 13 05 85 8f addi a0, a0, -1800 +80000038: ef 00 d0 0a jal 2220 +8000003c: ef 00 90 14 jal 2376 +80000040: ef 00 00 38 jal 896 +80000044: 6f 00 50 0b j 2228 Disassembly of section .text: @@ -30,8 +30,8 @@ Disassembly of section .text: 80000048: 93 07 00 00 mv a5, zero 8000004c: 63 88 07 00 beqz a5, 16 80000050: 37 15 00 80 lui a0, 524289 -80000054: 13 05 05 97 addi a0, a0, -1680 -80000058: 6f 00 50 0d j 2260 +80000054: 13 05 85 92 addi a0, a0, -1752 +80000058: 6f 00 d0 08 j 2188 8000005c: 67 80 00 00 ret 80000060 kernel_spawn_run_warp: @@ -46,116 +46,116 @@ Disassembly of section .text: 80000080: 23 28 61 01 sw s6, 16(sp) 80000084: 23 26 71 01 sw s7, 12(sp) 80000088: 23 24 81 01 sw s8, 8(sp) -8000008c: ef 00 90 07 jal 2168 -80000090: ef 00 d0 02 jal 2092 -80000094: ef 00 10 07 jal 2160 -80000098: 83 a5 81 c3 lw a1, -968(gp) -8000009c: 83 a5 05 00 lw a1, 0(a1) -800000a0: 83 aa 05 00 lw s5, 0(a1) -800000a4: 03 ab 45 00 lw s6, 4(a1) +8000008c: ef 00 10 03 jal 2096 +80000090: ef 00 40 7e jal 2020 +80000094: ef 00 10 02 jal 2080 +80000098: 93 04 05 00 mv s1, a0 +8000009c: ef 00 80 7f jal 2040 +800000a0: 93 09 05 00 mv s3, a0 +800000a4: ef 00 10 00 jal 2048 800000a8: 13 09 05 00 mv s2, a0 -800000ac: ef 00 10 03 jal 2096 -800000b0: 93 09 05 00 mv s3, a0 -800000b4: ef 00 90 03 jal 2104 -800000b8: 03 a8 81 c3 lw a6, -968(gp) -800000bc: 03 27 48 01 lw a4, 20(a6) -800000c0: 83 25 08 01 lw a1, 16(a6) -800000c4: 93 06 07 00 mv a3, a4 -800000c8: 63 44 37 01 blt a4, s3, 8 -800000cc: 93 86 09 00 mv a3, s3 -800000d0: 33 a7 e9 00 slt a4, s3, a4 -800000d4: 33 87 e5 00 add a4, a1, a4 -800000d8: 93 07 10 00 addi a5, zero, 1 -800000dc: 63 46 f7 08 blt a4, a5, 140 -800000e0: 33 0a 5b 03 mul s4, s6, s5 -800000e4: 83 27 c8 00 lw a5, 12(a6) -800000e8: 13 0c f7 ff addi s8, a4, -1 -800000ec: b3 85 b9 02 mul a1, s3, a1 -800000f0: b3 85 b6 00 add a1, a3, a1 -800000f4: b3 05 b9 02 mul a1, s2, a1 -800000f8: b3 85 b7 00 add a1, a5, a1 -800000fc: 33 05 e5 02 mul a0, a0, a4 -80000100: b3 84 a5 00 add s1, a1, a0 -80000104: 33 09 60 41 neg s2, s6 -80000108: 33 0b 40 41 neg s6, s4 -8000010c: 33 c7 44 03 div a4, s1, s4 -80000110: 83 25 08 00 lw a1, 0(a6) -80000114: 33 05 eb 02 mul a0, s6, a4 -80000118: 33 85 a4 00 add a0, s1, a0 -8000011c: b3 46 55 03 div a3, a0, s5 -80000120: 03 a5 c5 00 lw a0, 12(a1) -80000124: b3 07 e9 02 mul a5, s2, a4 -80000128: b3 87 d7 40 sub a5, a5, a3 -8000012c: b3 87 fa 02 mul a5, s5, a5 -80000130: b3 08 f5 00 add a7, a0, a5 -80000134: 03 a4 05 01 lw s0, 16(a1) -80000138: 03 a6 45 01 lw a2, 20(a1) -8000013c: 83 27 48 00 lw a5, 4(a6) -80000140: 03 25 88 00 lw a0, 8(a6) -80000144: b3 06 d4 00 add a3, s0, a3 -80000148: 33 07 e6 00 add a4, a2, a4 -8000014c: 33 86 14 01 add a2, s1, a7 -80000150: e7 80 07 00 jalr a5 -80000154: 63 0a 0c 00 beqz s8, 20 -80000158: 03 a8 81 c3 lw a6, -968(gp) -8000015c: 13 0c fc ff addi s8, s8, -1 -80000160: 93 84 14 00 addi s1, s1, 1 -80000164: 6f f0 9f fa j -88 -80000168: 13 b5 19 00 seqz a0, s3 -8000016c: 03 2c 81 00 lw s8, 8(sp) -80000170: 83 2b c1 00 lw s7, 12(sp) -80000174: 03 2b 01 01 lw s6, 16(sp) -80000178: 83 2a 41 01 lw s5, 20(sp) -8000017c: 03 2a 81 01 lw s4, 24(sp) -80000180: 83 29 c1 01 lw s3, 28(sp) -80000184: 03 29 01 02 lw s2, 32(sp) -80000188: 83 24 41 02 lw s1, 36(sp) -8000018c: 03 24 81 02 lw s0, 40(sp) -80000190: 83 20 c1 02 lw ra, 44(sp) -80000194: 13 01 01 03 addi sp, sp, 48 -80000198: 6f 00 40 72 j 1828 +800000ac: ef 00 10 01 jal 2064 +800000b0: 93 85 81 c3 addi a1, gp, -968 +800000b4: 13 96 24 00 slli a2, s1, 2 +800000b8: b3 05 b6 00 add a1, a2, a1 +800000bc: 03 ab 05 00 lw s6, 0(a1) +800000c0: 83 25 4b 01 lw a1, 20(s6) +800000c4: 03 26 0b 01 lw a2, 16(s6) +800000c8: 93 86 05 00 mv a3, a1 +800000cc: 63 c4 35 01 blt a1, s3, 8 +800000d0: 93 86 09 00 mv a3, s3 +800000d4: b3 a5 b9 00 slt a1, s3, a1 +800000d8: 33 07 b6 00 add a4, a2, a1 +800000dc: 93 05 10 00 addi a1, zero, 1 +800000e0: 63 4a b7 08 blt a4, a1, 148 +800000e4: 83 25 0b 00 lw a1, 0(s6) +800000e8: 83 aa 05 00 lw s5, 0(a1) +800000ec: 83 a7 45 00 lw a5, 4(a1) +800000f0: 83 24 cb 00 lw s1, 12(s6) +800000f4: 33 8a 57 03 mul s4, a5, s5 +800000f8: 13 0c f7 ff addi s8, a4, -1 +800000fc: 33 86 c9 02 mul a2, s3, a2 +80000100: 33 86 c6 00 add a2, a3, a2 +80000104: 33 05 c5 02 mul a0, a0, a2 +80000108: 33 85 a4 00 add a0, s1, a0 +8000010c: 33 06 e9 02 mul a2, s2, a4 +80000110: b3 04 c5 00 add s1, a0, a2 +80000114: 33 09 f0 40 neg s2, a5 +80000118: b3 0b 40 41 neg s7, s4 +8000011c: 33 c6 44 03 div a2, s1, s4 +80000120: 33 85 cb 02 mul a0, s7, a2 +80000124: 33 85 a4 00 add a0, s1, a0 +80000128: b3 46 55 03 div a3, a0, s5 +8000012c: 03 a5 c5 00 lw a0, 12(a1) +80000130: 33 07 c9 02 mul a4, s2, a2 +80000134: 33 07 d7 40 sub a4, a4, a3 +80000138: 33 87 ea 02 mul a4, s5, a4 +8000013c: 33 08 e5 00 add a6, a0, a4 +80000140: 03 a7 05 01 lw a4, 16(a1) +80000144: 03 a4 45 01 lw s0, 20(a1) +80000148: 83 27 4b 00 lw a5, 4(s6) +8000014c: 03 25 8b 00 lw a0, 8(s6) +80000150: b3 06 d7 00 add a3, a4, a3 +80000154: 33 07 c4 00 add a4, s0, a2 +80000158: 33 86 04 01 add a2, s1, a6 +8000015c: e7 80 07 00 jalr a5 +80000160: 63 0a 0c 00 beqz s8, 20 +80000164: 83 25 0b 00 lw a1, 0(s6) +80000168: 13 0c fc ff addi s8, s8, -1 +8000016c: 93 84 14 00 addi s1, s1, 1 +80000170: 6f f0 df fa j -84 +80000174: 13 b5 19 00 seqz a0, s3 +80000178: 03 2c 81 00 lw s8, 8(sp) +8000017c: 83 2b c1 00 lw s7, 12(sp) +80000180: 03 2b 01 01 lw s6, 16(sp) +80000184: 83 2a 41 01 lw s5, 20(sp) +80000188: 03 2a 81 01 lw s4, 24(sp) +8000018c: 83 29 c1 01 lw s3, 28(sp) +80000190: 03 29 01 02 lw s2, 32(sp) +80000194: 83 24 41 02 lw s1, 36(sp) +80000198: 03 24 81 02 lw s0, 40(sp) +8000019c: 83 20 c1 02 lw ra, 44(sp) +800001a0: 13 01 01 03 addi sp, sp, 48 +800001a4: 6f 00 00 6d j 1744 -8000019c kernel_spawn_run_threads: -8000019c: 13 01 01 fe addi sp, sp, -32 -800001a0: 23 2e 11 00 sw ra, 28(sp) -800001a4: 23 2c 81 00 sw s0, 24(sp) -800001a8: 23 2a 91 00 sw s1, 20(sp) -800001ac: 23 28 21 01 sw s2, 16(sp) -800001b0: 23 26 31 01 sw s3, 12(sp) -800001b4: 93 89 05 00 mv s3, a1 -800001b8: ef 00 40 70 jal 1796 -800001bc: 03 a5 81 c3 lw a0, -968(gp) -800001c0: 03 25 05 00 lw a0, 0(a0) -800001c4: 83 24 05 00 lw s1, 0(a0) -800001c8: 03 25 45 00 lw a0, 4(a0) -800001cc: 33 04 95 02 mul s0, a0, s1 -800001d0: ef 00 40 72 jal 1828 -800001d4: 33 05 35 01 add a0, a0, s3 -800001d8: 03 a6 81 c3 lw a2, -968(gp) -800001dc: 33 47 85 02 div a4, a0, s0 -800001e0: b3 05 87 02 mul a1, a4, s0 -800001e4: 33 05 b5 40 sub a0, a0, a1 -800001e8: 83 25 06 00 lw a1, 0(a2) -800001ec: b3 46 95 02 div a3, a0, s1 -800001f0: b3 87 96 02 mul a5, a3, s1 -800001f4: 33 08 f5 40 sub a6, a0, a5 -800001f8: 83 a4 c5 00 lw s1, 12(a1) -800001fc: 03 a4 05 01 lw s0, 16(a1) -80000200: 83 a7 45 01 lw a5, 20(a1) -80000204: 83 28 46 00 lw a7, 4(a2) -80000208: 03 25 86 00 lw a0, 8(a2) -8000020c: 33 06 98 00 add a2, a6, s1 -80000210: b3 06 d4 00 add a3, s0, a3 -80000214: 33 87 e7 00 add a4, a5, a4 -80000218: e7 80 08 00 jalr a7 -8000021c: 13 05 10 00 addi a0, zero, 1 -80000220: 83 29 c1 00 lw s3, 12(sp) -80000224: 03 29 01 01 lw s2, 16(sp) -80000228: 83 24 41 01 lw s1, 20(sp) -8000022c: 03 24 81 01 lw s0, 24(sp) -80000230: 83 20 c1 01 lw ra, 28(sp) -80000234: 13 01 01 02 addi sp, sp, 32 -80000238: 6f 00 40 68 j 1668 +800001a8 kernel_spawn_run_threads: +800001a8: 13 01 01 ff addi sp, sp, -16 +800001ac: 23 26 11 00 sw ra, 12(sp) +800001b0: 23 24 81 00 sw s0, 8(sp) +800001b4: ef 00 00 6c jal 1728 +800001b8: ef 00 c0 6f jal 1788 +800001bc: 13 04 05 00 mv s0, a0 +800001c0: ef 00 c0 6e jal 1772 +800001c4: 93 85 81 c3 addi a1, gp, -968 +800001c8: 13 16 24 00 slli a2, s0, 2 +800001cc: b3 05 b6 00 add a1, a2, a1 +800001d0: 03 a6 05 00 lw a2, 0(a1) +800001d4: 83 25 06 00 lw a1, 0(a2) +800001d8: 83 26 c6 00 lw a3, 12(a2) +800001dc: 03 a7 05 00 lw a4, 0(a1) +800001e0: 83 a7 45 00 lw a5, 4(a1) +800001e4: 33 85 a6 00 add a0, a3, a0 +800001e8: b3 86 e7 02 mul a3, a5, a4 +800001ec: b3 47 d5 02 div a5, a0, a3 +800001f0: b3 86 d7 02 mul a3, a5, a3 +800001f4: 03 a4 c5 00 lw s0, 12(a1) +800001f8: 33 05 d5 40 sub a0, a0, a3 +800001fc: b3 46 e5 02 div a3, a0, a4 +80000200: 33 88 e6 02 mul a6, a3, a4 +80000204: b3 08 a4 00 add a7, s0, a0 +80000208: 03 a7 05 01 lw a4, 16(a1) +8000020c: 03 a4 45 01 lw s0, 20(a1) +80000210: 83 22 46 00 lw t0, 4(a2) +80000214: 03 25 86 00 lw a0, 8(a2) +80000218: 33 86 08 41 sub a2, a7, a6 +8000021c: b3 06 d7 00 add a3, a4, a3 +80000220: 33 07 f4 00 add a4, s0, a5 +80000224: e7 80 02 00 jalr t0 +80000228: 13 05 10 00 addi a0, zero, 1 +8000022c: 03 24 81 00 lw s0, 8(sp) +80000230: 83 20 c1 00 lw ra, 12(sp) +80000234: 13 01 01 01 addi sp, sp, 16 +80000238: 6f 00 c0 63 j 1596 8000023c kernel_spawn: 8000023c: 13 01 01 fc addi sp, sp, -64 @@ -167,743 +167,725 @@ Disassembly of section .text: 80000254: 23 24 41 03 sw s4, 40(sp) 80000258: 23 22 51 03 sw s5, 36(sp) 8000025c: 23 20 61 03 sw s6, 32(sp) -80000260: 13 0b 05 00 mv s6, a0 -80000264: 03 25 05 00 lw a0, 0(a0) -80000268: 83 26 4b 00 lw a3, 4(s6) -8000026c: 03 27 8b 00 lw a4, 8(s6) -80000270: 13 09 06 00 mv s2, a2 -80000274: 93 89 05 00 mv s3, a1 -80000278: 33 85 a6 02 mul a0, a3, a0 -8000027c: b3 04 e5 02 mul s1, a0, a4 -80000280: ef 00 40 69 jal 1684 -80000284: 13 04 05 00 mv s0, a0 -80000288: ef 00 40 68 jal 1668 +80000260: 23 2e 71 01 sw s7, 28(sp) +80000264: 23 2c 81 01 sw s8, 24(sp) +80000268: 93 04 05 00 mv s1, a0 +8000026c: 83 2b 05 00 lw s7, 0(a0) +80000270: 03 24 45 00 lw s0, 4(a0) +80000274: 03 2c 85 00 lw s8, 8(a0) +80000278: 13 09 06 00 mv s2, a2 +8000027c: 93 89 05 00 mv s3, a1 +80000280: ef 00 c0 64 jal 1612 +80000284: 13 0b 05 00 mv s6, a0 +80000288: ef 00 c0 63 jal 1596 8000028c: 13 0a 05 00 mv s4, a0 -80000290: ef 00 40 67 jal 1652 +80000290: ef 00 c0 62 jal 1580 80000294: 93 0a 05 00 mv s5, a0 -80000298: ef 00 40 66 jal 1636 -8000029c: b3 85 4a 03 mul a1, s5, s4 -800002a0: 13 06 10 00 addi a2, zero, 1 -800002a4: 63 c8 95 00 blt a1, s1, 16 -800002a8: 63 5a 86 00 bge a2, s0, 20 -800002ac: 63 4c c5 00 blt a0, a2, 24 -800002b0: 6f 00 00 13 j 304 -800002b4: 33 c6 b4 02 div a2, s1, a1 -800002b8: e3 4a 86 fe blt a2, s0, -12 -800002bc: 13 06 04 00 mv a2, s0 -800002c0: 63 50 c5 12 bge a0, a2, 288 -800002c4: 93 06 f4 ff addi a3, s0, -1 -800002c8: b3 c5 c4 02 div a1, s1, a2 -800002cc: 63 0e d5 00 beq a0, a3, 28 -800002d0: 13 06 00 00 mv a2, zero -800002d4: b3 06 b6 00 add a3, a2, a1 -800002d8: 33 c6 56 03 div a2, a3, s5 -800002dc: 13 07 00 00 mv a4, zero -800002e0: 63 50 46 03 bge a2, s4, 32 -800002e4: 6f 00 00 02 j 32 -800002e8: 33 86 c5 02 mul a2, a1, a2 -800002ec: 33 86 c4 40 sub a2, s1, a2 -800002f0: b3 06 b6 00 add a3, a2, a1 -800002f4: 33 c6 56 03 div a2, a3, s5 -800002f8: 13 07 00 00 mv a4, zero -800002fc: 63 44 46 01 blt a2, s4, 8 -80000300: 33 47 46 03 div a4, a2, s4 -80000304: 93 07 00 00 mv a5, zero -80000308: b3 0a 56 03 mul s5, a2, s5 -8000030c: 93 04 10 00 addi s1, zero, 1 -80000310: 63 08 07 00 beqz a4, 16 -80000314: b3 07 47 03 mul a5, a4, s4 -80000318: b3 07 f6 40 sub a5, a2, a5 -8000031c: 93 04 07 00 mv s1, a4 -80000320: 33 84 56 41 sub s0, a3, s5 -80000324: 23 24 61 01 sw s6, 8(sp) -80000328: 23 26 31 01 sw s3, 12(sp) -8000032c: 23 28 21 01 sw s2, 16(sp) -80000330: 33 85 a5 02 mul a0, a1, a0 -80000334: 23 2a a1 00 sw a0, 20(sp) -80000338: 23 2c 91 00 sw s1, 24(sp) -8000033c: 23 2e f1 00 sw a5, 28(sp) -80000340: 93 05 81 00 addi a1, sp, 8 -80000344: 93 06 20 00 addi a3, zero, 2 -80000348: 23 ac b1 c2 sw a1, -968(gp) -8000034c: 63 40 d6 02 blt a2, a3, 32 -80000350: 63 44 46 01 blt a2, s4, 8 -80000354: 13 06 0a 00 mv a2, s4 -80000358: 37 05 00 80 lui a0, 524288 -8000035c: 93 05 05 06 addi a1, a0, 96 -80000360: 13 05 06 00 mv a0, a2 -80000364: ef 00 00 55 jal 1360 -80000368: ef f0 9f cf jal -776 -8000036c: 63 0a 04 06 beqz s0, 116 -80000370: 13 05 04 00 mv a0, s0 -80000374: ef 00 80 54 jal 1352 -80000378: 03 a5 81 c3 lw a0, -968(gp) -8000037c: 03 25 05 00 lw a0, 0(a0) -80000380: 83 24 05 00 lw s1, 0(a0) -80000384: 03 25 45 00 lw a0, 4(a0) -80000388: 33 04 95 02 mul s0, a0, s1 -8000038c: ef 00 80 56 jal 1384 -80000390: 33 05 55 01 add a0, a0, s5 -80000394: 03 a6 81 c3 lw a2, -968(gp) -80000398: 33 47 85 02 div a4, a0, s0 -8000039c: b3 05 87 02 mul a1, a4, s0 -800003a0: 33 05 b5 40 sub a0, a0, a1 -800003a4: 83 25 06 00 lw a1, 0(a2) -800003a8: b3 46 95 02 div a3, a0, s1 -800003ac: b3 87 96 02 mul a5, a3, s1 -800003b0: 33 08 f5 40 sub a6, a0, a5 -800003b4: 83 a4 c5 00 lw s1, 12(a1) -800003b8: 03 a4 05 01 lw s0, 16(a1) -800003bc: 83 a7 45 01 lw a5, 20(a1) -800003c0: 83 28 46 00 lw a7, 4(a2) -800003c4: 03 25 86 00 lw a0, 8(a2) -800003c8: 33 06 98 00 add a2, a6, s1 -800003cc: b3 06 d4 00 add a3, s0, a3 -800003d0: 33 87 e7 00 add a4, a5, a4 -800003d4: e7 80 08 00 jalr a7 -800003d8: 13 05 10 00 addi a0, zero, 1 -800003dc: ef 00 00 4e jal 1248 -800003e0: 03 2b 01 02 lw s6, 32(sp) -800003e4: 83 2a 41 02 lw s5, 36(sp) -800003e8: 03 2a 81 02 lw s4, 40(sp) -800003ec: 83 29 c1 02 lw s3, 44(sp) -800003f0: 03 29 01 03 lw s2, 48(sp) -800003f4: 83 24 41 03 lw s1, 52(sp) -800003f8: 03 24 81 03 lw s0, 56(sp) -800003fc: 83 20 c1 03 lw ra, 60(sp) -80000400: 13 01 01 04 addi sp, sp, 64 -80000404: 67 80 00 00 ret +80000298: ef 00 c0 61 jal 1564 +8000029c: 93 05 70 00 addi a1, zero, 7 +800002a0: 63 c8 a5 0e blt a1, a0, 240 +800002a4: b3 05 74 03 mul a1, s0, s7 +800002a8: 33 86 85 03 mul a2, a1, s8 +800002ac: b3 85 4a 03 mul a1, s5, s4 +800002b0: 93 06 10 00 addi a3, zero, 1 +800002b4: 63 c8 c5 00 blt a1, a2, 16 +800002b8: 63 da 66 01 bge a3, s6, 20 +800002bc: 63 4c d5 00 blt a0, a3, 24 +800002c0: 6f 00 00 0d j 208 +800002c4: b3 46 b6 02 div a3, a2, a1 +800002c8: e3 ca 66 ff blt a3, s6, -12 +800002cc: 93 06 0b 00 mv a3, s6 +800002d0: 63 50 d5 0c bge a0, a3, 192 +800002d4: 13 07 fb ff addi a4, s6, -1 +800002d8: b3 45 d6 02 div a1, a2, a3 +800002dc: 63 0e e5 00 beq a0, a4, 28 +800002e0: 13 06 00 00 mv a2, zero +800002e4: 33 0b b6 00 add s6, a2, a1 +800002e8: 33 46 5b 03 div a2, s6, s5 +800002ec: 93 06 00 00 mv a3, zero +800002f0: 63 50 46 03 bge a2, s4, 32 +800002f4: 6f 00 00 02 j 32 +800002f8: b3 86 d5 02 mul a3, a1, a3 +800002fc: 33 06 d6 40 sub a2, a2, a3 +80000300: 33 0b b6 00 add s6, a2, a1 +80000304: 33 46 5b 03 div a2, s6, s5 +80000308: 93 06 00 00 mv a3, zero +8000030c: 63 44 46 01 blt a2, s4, 8 +80000310: b3 46 46 03 div a3, a2, s4 +80000314: 13 07 00 00 mv a4, zero +80000318: 93 07 10 00 addi a5, zero, 1 +8000031c: 63 88 06 00 beqz a3, 16 +80000320: 33 87 46 03 mul a4, a3, s4 +80000324: 33 07 e6 40 sub a4, a2, a4 +80000328: 93 87 06 00 mv a5, a3 +8000032c: 33 04 56 03 mul s0, a2, s5 +80000330: 23 20 91 00 sw s1, 0(sp) +80000334: 23 22 31 01 sw s3, 4(sp) +80000338: 23 24 21 01 sw s2, 8(sp) +8000033c: b3 85 a5 02 mul a1, a1, a0 +80000340: 23 26 b1 00 sw a1, 12(sp) +80000344: 23 28 f1 00 sw a5, 16(sp) +80000348: 23 2a e1 00 sw a4, 20(sp) +8000034c: 93 85 81 c3 addi a1, gp, -968 +80000350: 13 15 25 00 slli a0, a0, 2 +80000354: 33 05 b5 00 add a0, a0, a1 +80000358: 93 05 01 00 mv a1, sp +8000035c: 93 06 20 00 addi a3, zero, 2 +80000360: 23 20 b5 00 sw a1, 0(a0) +80000364: 63 40 d6 02 blt a2, a3, 32 +80000368: 63 44 46 01 blt a2, s4, 8 +8000036c: 13 06 0a 00 mv a2, s4 +80000370: 37 05 00 80 lui a0, 524288 +80000374: 93 05 05 06 addi a1, a0, 96 +80000378: 13 05 06 00 mv a0, a2 +8000037c: ef 00 00 4f jal 1264 +80000380: ef f0 1f ce jal -800 +80000384: 63 06 8b 00 beq s6, s0, 12 +80000388: 23 26 81 00 sw s0, 12(sp) +8000038c: ef f0 5f cd jal -812 +80000390: 03 2c 81 01 lw s8, 24(sp) +80000394: 83 2b c1 01 lw s7, 28(sp) +80000398: 03 2b 01 02 lw s6, 32(sp) +8000039c: 83 2a 41 02 lw s5, 36(sp) +800003a0: 03 2a 81 02 lw s4, 40(sp) +800003a4: 83 29 c1 02 lw s3, 44(sp) +800003a8: 03 29 01 03 lw s2, 48(sp) +800003ac: 83 24 41 03 lw s1, 52(sp) +800003b0: 03 24 81 03 lw s0, 56(sp) +800003b4: 83 20 c1 03 lw ra, 60(sp) +800003b8: 13 01 01 04 addi sp, sp, 64 +800003bc: 67 80 00 00 ret -80000408 main: -80000408: 13 01 01 ff addi sp, sp, -16 -8000040c: 23 26 11 00 sw ra, 12(sp) -80000410: 37 05 00 80 lui a0, 524288 -80000414: 93 05 05 5a addi a1, a0, 1440 -80000418: 37 05 ff 7f lui a0, 524272 -8000041c: 13 06 45 03 addi a2, a0, 52 -80000420: 37 05 ff 7f lui a0, 524272 -80000424: ef f0 9f e1 jal -488 -80000428: 13 05 00 00 mv a0, zero -8000042c: 83 20 c1 00 lw ra, 12(sp) -80000430: 13 01 01 01 addi sp, sp, 16 -80000434: 67 80 00 00 ret +800003c0 main: +800003c0: 13 01 01 ff addi sp, sp, -16 +800003c4: 23 26 11 00 sw ra, 12(sp) +800003c8: 37 05 00 80 lui a0, 524288 +800003cc: 93 05 85 55 addi a1, a0, 1368 +800003d0: 37 05 ff 7f lui a0, 524272 +800003d4: 13 06 45 03 addi a2, a0, 52 +800003d8: 37 05 ff 7f lui a0, 524272 +800003dc: ef f0 1f e6 jal -416 +800003e0: 13 05 00 00 mv a0, zero +800003e4: 83 20 c1 00 lw ra, 12(sp) +800003e8: 13 01 01 01 addi sp, sp, 16 +800003ec: 67 80 00 00 ret -80000438 _pocl_kernel_sgemm: -80000438: 13 01 01 fe addi sp, sp, -32 -8000043c: 23 2e 11 00 sw ra, 28(sp) -80000440: 23 2c 81 00 sw s0, 24(sp) -80000444: 23 2a 91 00 sw s1, 20(sp) -80000448: 23 28 21 01 sw s2, 16(sp) -8000044c: 23 26 31 01 sw s3, 12(sp) -80000450: 23 24 41 01 sw s4, 8(sp) -80000454: 23 22 51 01 sw s5, 4(sp) -80000458: 23 20 61 01 sw s6, 0(sp) -8000045c: 13 04 01 02 addi s0, sp, 32 -80000460: 13 71 c1 ff andi sp, sp, -4 -80000464: 93 08 00 00 mv a7, zero -80000468: 83 22 87 01 lw t0, 24(a4) -8000046c: 03 23 c7 01 lw t1, 28(a4) -80000470: 83 24 c7 00 lw s1, 12(a4) -80000474: 83 23 07 02 lw t2, 32(a4) -80000478: 03 27 07 01 lw a4, 16(a4) -8000047c: b3 87 f2 02 mul a5, t0, a5 -80000480: 33 89 f4 00 add s2, s1, a5 -80000484: b3 07 03 03 mul a5, t1, a6 -80000488: b3 0e f7 00 add t4, a4, a5 -8000048c: 33 87 d6 03 mul a4, a3, t4 -80000490: b3 07 e9 00 add a5, s2, a4 -80000494: 93 97 27 00 slli a5, a5, 2 -80000498: 33 08 f6 00 add a6, a2, a5 -8000049c: 13 17 27 00 slli a4, a4, 2 -800004a0: 33 8e e5 00 add t3, a1, a4 -800004a4: b7 15 00 80 lui a1, 524289 -800004a8: 93 85 45 43 addi a1, a1, 1076 -800004ac: 07 a0 05 00 flw ft0, 0(a1) -800004b0: 93 15 29 00 slli a1, s2, 2 -800004b4: 33 0f b5 00 add t5, a0, a1 -800004b8: 93 95 26 00 slli a1, a3, 2 -800004bc: 6f 00 c0 00 j 12 -800004c0: 93 88 18 00 addi a7, a7, 1 -800004c4: 63 f8 78 0a bgeu a7, t2, 176 -800004c8: 63 5e d0 06 blez a3, 124 -800004cc: 93 0f 00 00 mv t6, zero -800004d0: 93 09 0e 00 mv s3, t3 -800004d4: 93 07 00 00 mv a5, zero -800004d8: 33 85 fe 01 add a0, t4, t6 -800004dc: 33 0a d5 02 mul s4, a0, a3 -800004e0: 13 0b 0f 00 mv s6, t5 -800004e4: b3 0a f9 00 add s5, s2, a5 -800004e8: 13 07 0b 00 mv a4, s6 -800004ec: 13 85 09 00 mv a0, s3 -800004f0: 93 84 06 00 mv s1, a3 -800004f4: d3 00 00 20 fmv.s ft1, ft0 -800004f8: 07 21 07 00 flw ft2, 0(a4) -800004fc: 87 21 05 00 flw ft3, 0(a0) -80000500: 53 71 31 10 fmul.s ft2, ft2, ft3 -80000504: d3 70 11 00 fadd.s ft1, ft2, ft1 -80000508: 93 84 f4 ff addi s1, s1, -1 -8000050c: 13 05 45 00 addi a0, a0, 4 -80000510: 33 07 b7 00 add a4, a4, a1 -80000514: e3 92 04 fe bnez s1, -28 -80000518: 33 85 4a 01 add a0, s5, s4 -8000051c: 13 15 25 00 slli a0, a0, 2 -80000520: 33 05 a6 00 add a0, a2, a0 -80000524: 27 20 15 00 fsw ft1, 0(a0) -80000528: 93 87 17 00 addi a5, a5, 1 -8000052c: 13 0b 4b 00 addi s6, s6, 4 -80000530: e3 ea 57 fa bltu a5, t0, -76 -80000534: 93 8f 1f 00 addi t6, t6, 1 -80000538: b3 89 b9 00 add s3, s3, a1 -8000053c: e3 ec 6f f8 bltu t6, t1, -104 -80000540: 6f f0 1f f8 j -128 -80000544: 13 05 00 00 mv a0, zero -80000548: 93 04 08 00 mv s1, a6 -8000054c: 93 07 00 00 mv a5, zero -80000550: 13 87 04 00 mv a4, s1 -80000554: 23 20 07 00 sw zero, 0(a4) -80000558: 93 87 17 00 addi a5, a5, 1 -8000055c: 13 07 47 00 addi a4, a4, 4 -80000560: e3 ea 57 fe bltu a5, t0, -12 -80000564: 13 05 15 00 addi a0, a0, 1 -80000568: b3 84 b4 00 add s1, s1, a1 -8000056c: e3 60 65 fe bltu a0, t1, -32 -80000570: 6f f0 1f f5 j -176 -80000574: 13 01 04 fe addi sp, s0, -32 -80000578: 03 2b 01 00 lw s6, 0(sp) -8000057c: 83 2a 41 00 lw s5, 4(sp) -80000580: 03 2a 81 00 lw s4, 8(sp) -80000584: 83 29 c1 00 lw s3, 12(sp) -80000588: 03 29 01 01 lw s2, 16(sp) -8000058c: 83 24 41 01 lw s1, 20(sp) -80000590: 03 24 81 01 lw s0, 24(sp) -80000594: 83 20 c1 01 lw ra, 28(sp) -80000598: 13 01 01 02 addi sp, sp, 32 -8000059c: 67 80 00 00 ret +800003f0 _pocl_kernel_sgemm: +800003f0: 13 01 01 fe addi sp, sp, -32 +800003f4: 23 2e 11 00 sw ra, 28(sp) +800003f8: 23 2c 81 00 sw s0, 24(sp) +800003fc: 23 2a 91 00 sw s1, 20(sp) +80000400: 23 28 21 01 sw s2, 16(sp) +80000404: 23 26 31 01 sw s3, 12(sp) +80000408: 23 24 41 01 sw s4, 8(sp) +8000040c: 23 22 51 01 sw s5, 4(sp) +80000410: 23 20 61 01 sw s6, 0(sp) +80000414: 13 04 01 02 addi s0, sp, 32 +80000418: 13 71 c1 ff andi sp, sp, -4 +8000041c: 93 08 00 00 mv a7, zero +80000420: 83 22 87 01 lw t0, 24(a4) +80000424: 03 23 c7 01 lw t1, 28(a4) +80000428: 83 24 c7 00 lw s1, 12(a4) +8000042c: 83 23 07 02 lw t2, 32(a4) +80000430: 03 27 07 01 lw a4, 16(a4) +80000434: b3 87 f2 02 mul a5, t0, a5 +80000438: 33 89 f4 00 add s2, s1, a5 +8000043c: b3 07 03 03 mul a5, t1, a6 +80000440: b3 0e f7 00 add t4, a4, a5 +80000444: 33 87 d6 03 mul a4, a3, t4 +80000448: b3 07 e9 00 add a5, s2, a4 +8000044c: 93 97 27 00 slli a5, a5, 2 +80000450: 33 08 f6 00 add a6, a2, a5 +80000454: 13 17 27 00 slli a4, a4, 2 +80000458: 33 8e e5 00 add t3, a1, a4 +8000045c: b7 15 00 80 lui a1, 524289 +80000460: 93 85 45 43 addi a1, a1, 1076 +80000464: 07 a0 05 00 flw ft0, 0(a1) +80000468: 93 15 29 00 slli a1, s2, 2 +8000046c: 33 0f b5 00 add t5, a0, a1 +80000470: 93 95 26 00 slli a1, a3, 2 +80000474: 6f 00 c0 00 j 12 +80000478: 93 88 18 00 addi a7, a7, 1 +8000047c: 63 f8 78 0a bgeu a7, t2, 176 +80000480: 63 5e d0 06 blez a3, 124 +80000484: 93 0f 00 00 mv t6, zero +80000488: 93 09 0e 00 mv s3, t3 +8000048c: 93 07 00 00 mv a5, zero +80000490: 33 85 fe 01 add a0, t4, t6 +80000494: 33 0a d5 02 mul s4, a0, a3 +80000498: 13 0b 0f 00 mv s6, t5 +8000049c: b3 0a f9 00 add s5, s2, a5 +800004a0: 13 07 0b 00 mv a4, s6 +800004a4: 13 85 09 00 mv a0, s3 +800004a8: 93 84 06 00 mv s1, a3 +800004ac: d3 00 00 20 fmv.s ft1, ft0 +800004b0: 07 21 07 00 flw ft2, 0(a4) +800004b4: 87 21 05 00 flw ft3, 0(a0) +800004b8: 53 71 31 10 fmul.s ft2, ft2, ft3 +800004bc: d3 70 11 00 fadd.s ft1, ft2, ft1 +800004c0: 93 84 f4 ff addi s1, s1, -1 +800004c4: 13 05 45 00 addi a0, a0, 4 +800004c8: 33 07 b7 00 add a4, a4, a1 +800004cc: e3 92 04 fe bnez s1, -28 +800004d0: 33 85 4a 01 add a0, s5, s4 +800004d4: 13 15 25 00 slli a0, a0, 2 +800004d8: 33 05 a6 00 add a0, a2, a0 +800004dc: 27 20 15 00 fsw ft1, 0(a0) +800004e0: 93 87 17 00 addi a5, a5, 1 +800004e4: 13 0b 4b 00 addi s6, s6, 4 +800004e8: e3 ea 57 fa bltu a5, t0, -76 +800004ec: 93 8f 1f 00 addi t6, t6, 1 +800004f0: b3 89 b9 00 add s3, s3, a1 +800004f4: e3 ec 6f f8 bltu t6, t1, -104 +800004f8: 6f f0 1f f8 j -128 +800004fc: 13 05 00 00 mv a0, zero +80000500: 93 04 08 00 mv s1, a6 +80000504: 93 07 00 00 mv a5, zero +80000508: 13 87 04 00 mv a4, s1 +8000050c: 23 20 07 00 sw zero, 0(a4) +80000510: 93 87 17 00 addi a5, a5, 1 +80000514: 13 07 47 00 addi a4, a4, 4 +80000518: e3 ea 57 fe bltu a5, t0, -12 +8000051c: 13 05 15 00 addi a0, a0, 1 +80000520: b3 84 b4 00 add s1, s1, a1 +80000524: e3 60 65 fe bltu a0, t1, -32 +80000528: 6f f0 1f f5 j -176 +8000052c: 13 01 04 fe addi sp, s0, -32 +80000530: 03 2b 01 00 lw s6, 0(sp) +80000534: 83 2a 41 00 lw s5, 4(sp) +80000538: 03 2a 81 00 lw s4, 8(sp) +8000053c: 83 29 c1 00 lw s3, 12(sp) +80000540: 03 29 01 01 lw s2, 16(sp) +80000544: 83 24 41 01 lw s1, 20(sp) +80000548: 03 24 81 01 lw s0, 24(sp) +8000054c: 83 20 c1 01 lw ra, 28(sp) +80000550: 13 01 01 02 addi sp, sp, 32 +80000554: 67 80 00 00 ret -800005a0 _pocl_kernel_sgemm_workgroup: -800005a0: 13 01 01 fe addi sp, sp, -32 -800005a4: 23 2e 81 00 sw s0, 28(sp) -800005a8: 23 2c 91 00 sw s1, 24(sp) -800005ac: 23 2a 21 01 sw s2, 20(sp) -800005b0: 23 28 31 01 sw s3, 16(sp) -800005b4: 23 26 41 01 sw s4, 12(sp) -800005b8: 23 24 51 01 sw s5, 8(sp) -800005bc: 13 08 00 00 mv a6, zero -800005c0: 03 27 05 00 lw a4, 0(a0) -800005c4: 83 27 45 00 lw a5, 4(a0) -800005c8: 83 24 85 00 lw s1, 8(a0) -800005cc: 03 25 c5 00 lw a0, 12(a0) -800005d0: 03 27 07 00 lw a4, 0(a4) -800005d4: 83 a7 07 00 lw a5, 0(a5) -800005d8: 83 af 04 00 lw t6, 0(s1) -800005dc: 83 29 05 00 lw s3, 0(a0) -800005e0: 03 aa 85 01 lw s4, 24(a1) -800005e4: 83 a8 c5 01 lw a7, 28(a1) -800005e8: 03 a5 c5 00 lw a0, 12(a1) -800005ec: 83 a2 05 02 lw t0, 32(a1) -800005f0: 83 a5 05 01 lw a1, 16(a1) -800005f4: 33 06 ca 02 mul a2, s4, a2 -800005f8: 33 09 c5 00 add s2, a0, a2 -800005fc: 33 85 d8 02 mul a0, a7, a3 -80000600: 33 8e a5 00 add t3, a1, a0 -80000604: 33 85 c9 03 mul a0, s3, t3 -80000608: b3 05 a9 00 add a1, s2, a0 -8000060c: 93 95 25 00 slli a1, a1, 2 -80000610: 33 83 bf 00 add t1, t6, a1 -80000614: 13 15 25 00 slli a0, a0, 2 -80000618: b3 83 a7 00 add t2, a5, a0 -8000061c: 37 15 00 80 lui a0, 524289 -80000620: 13 05 85 43 addi a0, a0, 1080 -80000624: 07 20 05 00 flw ft0, 0(a0) -80000628: 13 15 29 00 slli a0, s2, 2 -8000062c: b3 0e a7 00 add t4, a4, a0 -80000630: 13 96 29 00 slli a2, s3, 2 -80000634: 6f 00 c0 00 j 12 -80000638: 13 08 18 00 addi a6, a6, 1 -8000063c: 63 78 58 0a bgeu a6, t0, 176 -80000640: 63 5e 30 07 blez s3, 124 -80000644: 13 0f 00 00 mv t5, zero -80000648: 93 8a 03 00 mv s5, t2 -8000064c: 13 05 00 00 mv a0, zero -80000650: b3 05 ee 01 add a1, t3, t5 -80000654: 33 84 35 03 mul s0, a1, s3 -80000658: 93 87 0e 00 mv a5, t4 -8000065c: b3 05 a9 00 add a1, s2, a0 -80000660: 93 84 07 00 mv s1, a5 -80000664: 93 86 0a 00 mv a3, s5 -80000668: 13 87 09 00 mv a4, s3 -8000066c: d3 00 00 20 fmv.s ft1, ft0 -80000670: 07 a1 04 00 flw ft2, 0(s1) -80000674: 87 a1 06 00 flw ft3, 0(a3) -80000678: 53 71 31 10 fmul.s ft2, ft2, ft3 -8000067c: d3 70 11 00 fadd.s ft1, ft2, ft1 -80000680: 13 07 f7 ff addi a4, a4, -1 -80000684: 93 86 46 00 addi a3, a3, 4 -80000688: b3 84 c4 00 add s1, s1, a2 -8000068c: e3 12 07 fe bnez a4, -28 -80000690: b3 85 85 00 add a1, a1, s0 -80000694: 93 95 25 00 slli a1, a1, 2 -80000698: b3 85 bf 00 add a1, t6, a1 -8000069c: 27 a0 15 00 fsw ft1, 0(a1) -800006a0: 13 05 15 00 addi a0, a0, 1 -800006a4: 93 87 47 00 addi a5, a5, 4 -800006a8: e3 6a 45 fb bltu a0, s4, -76 -800006ac: 13 0f 1f 00 addi t5, t5, 1 -800006b0: b3 8a ca 00 add s5, s5, a2 -800006b4: e3 6c 1f f9 bltu t5, a7, -104 -800006b8: 6f f0 1f f8 j -128 -800006bc: 13 05 00 00 mv a0, zero -800006c0: 93 06 03 00 mv a3, t1 -800006c4: 93 05 00 00 mv a1, zero -800006c8: 13 87 06 00 mv a4, a3 -800006cc: 23 20 07 00 sw zero, 0(a4) -800006d0: 93 85 15 00 addi a1, a1, 1 -800006d4: 13 07 47 00 addi a4, a4, 4 -800006d8: e3 ea 45 ff bltu a1, s4, -12 -800006dc: 13 05 15 00 addi a0, a0, 1 -800006e0: b3 86 c6 00 add a3, a3, a2 -800006e4: e3 60 15 ff bltu a0, a7, -32 -800006e8: 6f f0 1f f5 j -176 -800006ec: 83 2a 81 00 lw s5, 8(sp) -800006f0: 03 2a c1 00 lw s4, 12(sp) -800006f4: 83 29 01 01 lw s3, 16(sp) -800006f8: 03 29 41 01 lw s2, 20(sp) -800006fc: 83 24 81 01 lw s1, 24(sp) -80000700: 03 24 c1 01 lw s0, 28(sp) -80000704: 13 01 01 02 addi sp, sp, 32 -80000708: 67 80 00 00 ret +80000558 _pocl_kernel_sgemm_workgroup: +80000558: 13 01 01 fe addi sp, sp, -32 +8000055c: 23 2e 81 00 sw s0, 28(sp) +80000560: 23 2c 91 00 sw s1, 24(sp) +80000564: 23 2a 21 01 sw s2, 20(sp) +80000568: 23 28 31 01 sw s3, 16(sp) +8000056c: 23 26 41 01 sw s4, 12(sp) +80000570: 23 24 51 01 sw s5, 8(sp) +80000574: 13 08 00 00 mv a6, zero +80000578: 03 27 05 00 lw a4, 0(a0) +8000057c: 83 27 45 00 lw a5, 4(a0) +80000580: 83 24 85 00 lw s1, 8(a0) +80000584: 03 25 c5 00 lw a0, 12(a0) +80000588: 03 27 07 00 lw a4, 0(a4) +8000058c: 83 a7 07 00 lw a5, 0(a5) +80000590: 83 af 04 00 lw t6, 0(s1) +80000594: 83 29 05 00 lw s3, 0(a0) +80000598: 03 aa 85 01 lw s4, 24(a1) +8000059c: 83 a8 c5 01 lw a7, 28(a1) +800005a0: 03 a5 c5 00 lw a0, 12(a1) +800005a4: 83 a2 05 02 lw t0, 32(a1) +800005a8: 83 a5 05 01 lw a1, 16(a1) +800005ac: 33 06 ca 02 mul a2, s4, a2 +800005b0: 33 09 c5 00 add s2, a0, a2 +800005b4: 33 85 d8 02 mul a0, a7, a3 +800005b8: 33 8e a5 00 add t3, a1, a0 +800005bc: 33 85 c9 03 mul a0, s3, t3 +800005c0: b3 05 a9 00 add a1, s2, a0 +800005c4: 93 95 25 00 slli a1, a1, 2 +800005c8: 33 83 bf 00 add t1, t6, a1 +800005cc: 13 15 25 00 slli a0, a0, 2 +800005d0: b3 83 a7 00 add t2, a5, a0 +800005d4: 37 15 00 80 lui a0, 524289 +800005d8: 13 05 85 43 addi a0, a0, 1080 +800005dc: 07 20 05 00 flw ft0, 0(a0) +800005e0: 13 15 29 00 slli a0, s2, 2 +800005e4: b3 0e a7 00 add t4, a4, a0 +800005e8: 13 96 29 00 slli a2, s3, 2 +800005ec: 6f 00 c0 00 j 12 +800005f0: 13 08 18 00 addi a6, a6, 1 +800005f4: 63 78 58 0a bgeu a6, t0, 176 +800005f8: 63 5e 30 07 blez s3, 124 +800005fc: 13 0f 00 00 mv t5, zero +80000600: 93 8a 03 00 mv s5, t2 +80000604: 13 05 00 00 mv a0, zero +80000608: b3 05 ee 01 add a1, t3, t5 +8000060c: 33 84 35 03 mul s0, a1, s3 +80000610: 93 87 0e 00 mv a5, t4 +80000614: b3 05 a9 00 add a1, s2, a0 +80000618: 93 84 07 00 mv s1, a5 +8000061c: 93 86 0a 00 mv a3, s5 +80000620: 13 87 09 00 mv a4, s3 +80000624: d3 00 00 20 fmv.s ft1, ft0 +80000628: 07 a1 04 00 flw ft2, 0(s1) +8000062c: 87 a1 06 00 flw ft3, 0(a3) +80000630: 53 71 31 10 fmul.s ft2, ft2, ft3 +80000634: d3 70 11 00 fadd.s ft1, ft2, ft1 +80000638: 13 07 f7 ff addi a4, a4, -1 +8000063c: 93 86 46 00 addi a3, a3, 4 +80000640: b3 84 c4 00 add s1, s1, a2 +80000644: e3 12 07 fe bnez a4, -28 +80000648: b3 85 85 00 add a1, a1, s0 +8000064c: 93 95 25 00 slli a1, a1, 2 +80000650: b3 85 bf 00 add a1, t6, a1 +80000654: 27 a0 15 00 fsw ft1, 0(a1) +80000658: 13 05 15 00 addi a0, a0, 1 +8000065c: 93 87 47 00 addi a5, a5, 4 +80000660: e3 6a 45 fb bltu a0, s4, -76 +80000664: 13 0f 1f 00 addi t5, t5, 1 +80000668: b3 8a ca 00 add s5, s5, a2 +8000066c: e3 6c 1f f9 bltu t5, a7, -104 +80000670: 6f f0 1f f8 j -128 +80000674: 13 05 00 00 mv a0, zero +80000678: 93 06 03 00 mv a3, t1 +8000067c: 93 05 00 00 mv a1, zero +80000680: 13 87 06 00 mv a4, a3 +80000684: 23 20 07 00 sw zero, 0(a4) +80000688: 93 85 15 00 addi a1, a1, 1 +8000068c: 13 07 47 00 addi a4, a4, 4 +80000690: e3 ea 45 ff bltu a1, s4, -12 +80000694: 13 05 15 00 addi a0, a0, 1 +80000698: b3 86 c6 00 add a3, a3, a2 +8000069c: e3 60 15 ff bltu a0, a7, -32 +800006a0: 6f f0 1f f5 j -176 +800006a4: 83 2a 81 00 lw s5, 8(sp) +800006a8: 03 2a c1 00 lw s4, 12(sp) +800006ac: 83 29 01 01 lw s3, 16(sp) +800006b0: 03 29 41 01 lw s2, 20(sp) +800006b4: 83 24 81 01 lw s1, 24(sp) +800006b8: 03 24 c1 01 lw s0, 28(sp) +800006bc: 13 01 01 02 addi sp, sp, 32 +800006c0: 67 80 00 00 ret -8000070c _pocl_kernel_sgemm_workgroup_fast: -8000070c: 13 01 01 fe addi sp, sp, -32 -80000710: 23 2e 81 00 sw s0, 28(sp) -80000714: 23 2c 91 00 sw s1, 24(sp) -80000718: 23 2a 21 01 sw s2, 20(sp) -8000071c: 23 28 31 01 sw s3, 16(sp) -80000720: 23 26 41 01 sw s4, 12(sp) -80000724: 23 24 51 01 sw s5, 8(sp) -80000728: 13 08 00 00 mv a6, zero -8000072c: 03 27 c5 00 lw a4, 12(a0) -80000730: 83 27 05 00 lw a5, 0(a0) -80000734: 83 24 45 00 lw s1, 4(a0) -80000738: 83 2f 85 00 lw t6, 8(a0) -8000073c: 83 29 07 00 lw s3, 0(a4) -80000740: 03 aa 85 01 lw s4, 24(a1) -80000744: 83 a8 c5 01 lw a7, 28(a1) -80000748: 03 a5 c5 00 lw a0, 12(a1) -8000074c: 83 a2 05 02 lw t0, 32(a1) -80000750: 83 a5 05 01 lw a1, 16(a1) -80000754: 33 06 ca 02 mul a2, s4, a2 -80000758: 33 09 c5 00 add s2, a0, a2 -8000075c: 33 85 d8 02 mul a0, a7, a3 -80000760: 33 8e a5 00 add t3, a1, a0 -80000764: 33 85 c9 03 mul a0, s3, t3 -80000768: b3 05 a9 00 add a1, s2, a0 -8000076c: 93 95 25 00 slli a1, a1, 2 -80000770: 33 83 bf 00 add t1, t6, a1 -80000774: 13 15 25 00 slli a0, a0, 2 -80000778: b3 83 a4 00 add t2, s1, a0 -8000077c: 37 15 00 80 lui a0, 524289 -80000780: 13 05 c5 43 addi a0, a0, 1084 -80000784: 07 20 05 00 flw ft0, 0(a0) -80000788: 13 15 29 00 slli a0, s2, 2 -8000078c: b3 8e a7 00 add t4, a5, a0 -80000790: 13 96 29 00 slli a2, s3, 2 -80000794: 6f 00 c0 00 j 12 -80000798: 13 08 18 00 addi a6, a6, 1 -8000079c: 63 78 58 0a bgeu a6, t0, 176 -800007a0: 63 5e 30 07 blez s3, 124 -800007a4: 13 0f 00 00 mv t5, zero -800007a8: 93 8a 03 00 mv s5, t2 -800007ac: 13 05 00 00 mv a0, zero -800007b0: b3 05 ee 01 add a1, t3, t5 -800007b4: 33 84 35 03 mul s0, a1, s3 -800007b8: 93 87 0e 00 mv a5, t4 -800007bc: b3 05 a9 00 add a1, s2, a0 -800007c0: 93 84 07 00 mv s1, a5 -800007c4: 93 86 0a 00 mv a3, s5 -800007c8: 13 87 09 00 mv a4, s3 -800007cc: d3 00 00 20 fmv.s ft1, ft0 -800007d0: 07 a1 04 00 flw ft2, 0(s1) -800007d4: 87 a1 06 00 flw ft3, 0(a3) -800007d8: 53 71 31 10 fmul.s ft2, ft2, ft3 -800007dc: d3 70 11 00 fadd.s ft1, ft2, ft1 -800007e0: 13 07 f7 ff addi a4, a4, -1 -800007e4: 93 86 46 00 addi a3, a3, 4 -800007e8: b3 84 c4 00 add s1, s1, a2 -800007ec: e3 12 07 fe bnez a4, -28 -800007f0: b3 85 85 00 add a1, a1, s0 -800007f4: 93 95 25 00 slli a1, a1, 2 -800007f8: b3 85 bf 00 add a1, t6, a1 -800007fc: 27 a0 15 00 fsw ft1, 0(a1) -80000800: 13 05 15 00 addi a0, a0, 1 -80000804: 93 87 47 00 addi a5, a5, 4 -80000808: e3 6a 45 fb bltu a0, s4, -76 -8000080c: 13 0f 1f 00 addi t5, t5, 1 -80000810: b3 8a ca 00 add s5, s5, a2 -80000814: e3 6c 1f f9 bltu t5, a7, -104 -80000818: 6f f0 1f f8 j -128 -8000081c: 13 05 00 00 mv a0, zero -80000820: 93 05 03 00 mv a1, t1 -80000824: 93 06 00 00 mv a3, zero -80000828: 13 87 05 00 mv a4, a1 -8000082c: 23 20 07 00 sw zero, 0(a4) -80000830: 93 86 16 00 addi a3, a3, 1 -80000834: 13 07 47 00 addi a4, a4, 4 -80000838: e3 ea 46 ff bltu a3, s4, -12 -8000083c: 13 05 15 00 addi a0, a0, 1 -80000840: b3 85 c5 00 add a1, a1, a2 -80000844: e3 60 15 ff bltu a0, a7, -32 -80000848: 6f f0 1f f5 j -176 -8000084c: 83 2a 81 00 lw s5, 8(sp) -80000850: 03 2a c1 00 lw s4, 12(sp) -80000854: 83 29 01 01 lw s3, 16(sp) -80000858: 03 29 41 01 lw s2, 20(sp) -8000085c: 83 24 81 01 lw s1, 24(sp) -80000860: 03 24 c1 01 lw s0, 28(sp) -80000864: 13 01 01 02 addi sp, sp, 32 +800006c4 _pocl_kernel_sgemm_workgroup_fast: +800006c4: 13 01 01 fe addi sp, sp, -32 +800006c8: 23 2e 81 00 sw s0, 28(sp) +800006cc: 23 2c 91 00 sw s1, 24(sp) +800006d0: 23 2a 21 01 sw s2, 20(sp) +800006d4: 23 28 31 01 sw s3, 16(sp) +800006d8: 23 26 41 01 sw s4, 12(sp) +800006dc: 23 24 51 01 sw s5, 8(sp) +800006e0: 13 08 00 00 mv a6, zero +800006e4: 03 27 c5 00 lw a4, 12(a0) +800006e8: 83 27 05 00 lw a5, 0(a0) +800006ec: 83 24 45 00 lw s1, 4(a0) +800006f0: 83 2f 85 00 lw t6, 8(a0) +800006f4: 83 29 07 00 lw s3, 0(a4) +800006f8: 03 aa 85 01 lw s4, 24(a1) +800006fc: 83 a8 c5 01 lw a7, 28(a1) +80000700: 03 a5 c5 00 lw a0, 12(a1) +80000704: 83 a2 05 02 lw t0, 32(a1) +80000708: 83 a5 05 01 lw a1, 16(a1) +8000070c: 33 06 ca 02 mul a2, s4, a2 +80000710: 33 09 c5 00 add s2, a0, a2 +80000714: 33 85 d8 02 mul a0, a7, a3 +80000718: 33 8e a5 00 add t3, a1, a0 +8000071c: 33 85 c9 03 mul a0, s3, t3 +80000720: b3 05 a9 00 add a1, s2, a0 +80000724: 93 95 25 00 slli a1, a1, 2 +80000728: 33 83 bf 00 add t1, t6, a1 +8000072c: 13 15 25 00 slli a0, a0, 2 +80000730: b3 83 a4 00 add t2, s1, a0 +80000734: 37 15 00 80 lui a0, 524289 +80000738: 13 05 c5 43 addi a0, a0, 1084 +8000073c: 07 20 05 00 flw ft0, 0(a0) +80000740: 13 15 29 00 slli a0, s2, 2 +80000744: b3 8e a7 00 add t4, a5, a0 +80000748: 13 96 29 00 slli a2, s3, 2 +8000074c: 6f 00 c0 00 j 12 +80000750: 13 08 18 00 addi a6, a6, 1 +80000754: 63 78 58 0a bgeu a6, t0, 176 +80000758: 63 5e 30 07 blez s3, 124 +8000075c: 13 0f 00 00 mv t5, zero +80000760: 93 8a 03 00 mv s5, t2 +80000764: 13 05 00 00 mv a0, zero +80000768: b3 05 ee 01 add a1, t3, t5 +8000076c: 33 84 35 03 mul s0, a1, s3 +80000770: 93 87 0e 00 mv a5, t4 +80000774: b3 05 a9 00 add a1, s2, a0 +80000778: 93 84 07 00 mv s1, a5 +8000077c: 93 86 0a 00 mv a3, s5 +80000780: 13 87 09 00 mv a4, s3 +80000784: d3 00 00 20 fmv.s ft1, ft0 +80000788: 07 a1 04 00 flw ft2, 0(s1) +8000078c: 87 a1 06 00 flw ft3, 0(a3) +80000790: 53 71 31 10 fmul.s ft2, ft2, ft3 +80000794: d3 70 11 00 fadd.s ft1, ft2, ft1 +80000798: 13 07 f7 ff addi a4, a4, -1 +8000079c: 93 86 46 00 addi a3, a3, 4 +800007a0: b3 84 c4 00 add s1, s1, a2 +800007a4: e3 12 07 fe bnez a4, -28 +800007a8: b3 85 85 00 add a1, a1, s0 +800007ac: 93 95 25 00 slli a1, a1, 2 +800007b0: b3 85 bf 00 add a1, t6, a1 +800007b4: 27 a0 15 00 fsw ft1, 0(a1) +800007b8: 13 05 15 00 addi a0, a0, 1 +800007bc: 93 87 47 00 addi a5, a5, 4 +800007c0: e3 6a 45 fb bltu a0, s4, -76 +800007c4: 13 0f 1f 00 addi t5, t5, 1 +800007c8: b3 8a ca 00 add s5, s5, a2 +800007cc: e3 6c 1f f9 bltu t5, a7, -104 +800007d0: 6f f0 1f f8 j -128 +800007d4: 13 05 00 00 mv a0, zero +800007d8: 93 05 03 00 mv a1, t1 +800007dc: 93 06 00 00 mv a3, zero +800007e0: 13 87 05 00 mv a4, a1 +800007e4: 23 20 07 00 sw zero, 0(a4) +800007e8: 93 86 16 00 addi a3, a3, 1 +800007ec: 13 07 47 00 addi a4, a4, 4 +800007f0: e3 ea 46 ff bltu a3, s4, -12 +800007f4: 13 05 15 00 addi a0, a0, 1 +800007f8: b3 85 c5 00 add a1, a1, a2 +800007fc: e3 60 15 ff bltu a0, a7, -32 +80000800: 6f f0 1f f5 j -176 +80000804: 83 2a 81 00 lw s5, 8(sp) +80000808: 03 2a c1 00 lw s4, 12(sp) +8000080c: 83 29 01 01 lw s3, 16(sp) +80000810: 03 29 41 01 lw s2, 20(sp) +80000814: 83 24 81 01 lw s1, 24(sp) +80000818: 03 24 c1 01 lw s0, 28(sp) +8000081c: 13 01 01 02 addi sp, sp, 32 +80000820: 67 80 00 00 ret + +80000824 _exit: +80000824: 13 05 00 00 mv a0, zero +80000828: 6b 00 05 00 + +8000082c vx_set_sp: +8000082c: 73 25 50 02 csrr a0, 37 +80000830: 6b 00 05 00 +80000834: 97 11 00 00 auipc gp, 1 +80000838: 93 81 41 fd addi gp, gp, -44 +8000083c: f3 25 20 02 csrr a1, 34 +80000840: 93 95 a5 00 slli a1, a1, 10 +80000844: 73 26 00 02 csrr a2, 32 +80000848: 13 16 26 00 slli a2, a2, 2 +8000084c: 37 f1 ff 6f lui sp, 458751 +80000850: 33 01 b1 40 sub sp, sp, a1 +80000854: 33 01 c1 00 add sp, sp, a2 +80000858: f3 26 10 02 csrr a3, 33 +8000085c: 63 86 06 00 beqz a3, 12 +80000860: 13 05 00 00 mv a0, zero +80000864: 6b 00 05 00 + +80000868 RETURN: 80000868: 67 80 00 00 ret -8000086c _exit: -8000086c: 13 05 00 00 mv a0, zero -80000870: 6b 00 05 00 +8000086c vx_wspawn: +8000086c: 6b 10 b5 00 +80000870: 67 80 00 00 ret -80000874 vx_set_sp: -80000874: 73 25 50 02 csrr a0, 37 -80000878: 6b 00 05 00 -8000087c: 97 11 00 00 auipc gp, 1 -80000880: 93 81 c1 f8 addi gp, gp, -116 -80000884: f3 25 20 02 csrr a1, 34 -80000888: 93 95 a5 00 slli a1, a1, 10 -8000088c: 73 26 00 02 csrr a2, 32 -80000890: 13 16 26 00 slli a2, a2, 2 -80000894: 37 f1 ff 6f lui sp, 458751 -80000898: 33 01 b1 40 sub sp, sp, a1 -8000089c: 33 01 c1 00 add sp, sp, a2 -800008a0: f3 26 10 02 csrr a3, 33 -800008a4: 63 86 06 00 beqz a3, 12 -800008a8: 13 05 00 00 mv a0, zero -800008ac: 6b 00 05 00 +80000874 vx_tmc: +80000874: 6b 00 05 00 +80000878: 67 80 00 00 ret -800008b0 RETURN: +8000087c vx_barrier: +8000087c: 6b 40 b5 00 +80000880: 67 80 00 00 ret + +80000884 vx_split: +80000884: 6b 20 05 00 +80000888: 67 80 00 00 ret + +8000088c vx_join: +8000088c: 6b 30 00 00 +80000890: 67 80 00 00 ret + +80000894 vx_warp_id: +80000894: 73 25 10 02 csrr a0, 33 +80000898: 67 80 00 00 ret + +8000089c vx_warp_gid: +8000089c: 73 25 30 02 csrr a0, 35 +800008a0: 67 80 00 00 ret + +800008a4 vx_thread_id: +800008a4: 73 25 00 02 csrr a0, 32 +800008a8: 67 80 00 00 ret + +800008ac vx_thread_gid: +800008ac: 73 25 20 02 csrr a0, 34 800008b0: 67 80 00 00 ret -800008b4 vx_wspawn: -800008b4: 6b 10 b5 00 +800008b4 vx_core_id: +800008b4: 73 25 40 02 csrr a0, 36 800008b8: 67 80 00 00 ret -800008bc vx_tmc: -800008bc: 6b 00 05 00 +800008bc vx_num_threads: +800008bc: 73 25 50 02 csrr a0, 37 800008c0: 67 80 00 00 ret -800008c4 vx_barrier: -800008c4: 6b 40 b5 00 +800008c4 vx_num_warps: +800008c4: 73 25 60 02 csrr a0, 38 800008c8: 67 80 00 00 ret -800008cc vx_split: -800008cc: 6b 20 05 00 +800008cc vx_num_cores: +800008cc: 73 25 70 02 csrr a0, 39 800008d0: 67 80 00 00 ret -800008d4 vx_join: -800008d4: 6b 30 00 00 +800008d4 vx_num_cycles: +800008d4: 73 25 00 b0 csrr a0, mcycle 800008d8: 67 80 00 00 ret -800008dc vx_warp_id: -800008dc: 73 25 10 02 csrr a0, 33 +800008dc vx_num_instrs: +800008dc: 73 25 20 b0 csrr a0, minstret 800008e0: 67 80 00 00 ret -800008e4 vx_warp_gid: -800008e4: 73 25 30 02 csrr a0, 35 -800008e8: 67 80 00 00 ret +800008e4 atexit: +800008e4: 93 05 05 00 mv a1, a0 +800008e8: 93 06 00 00 mv a3, zero +800008ec: 13 06 00 00 mv a2, zero +800008f0: 13 05 00 00 mv a0, zero +800008f4: 6f 00 80 20 j 520 -800008ec vx_thread_id: -800008ec: 73 25 00 02 csrr a0, 32 -800008f0: 67 80 00 00 ret +800008f8 exit: +800008f8: 13 01 01 ff addi sp, sp, -16 +800008fc: 93 05 00 00 mv a1, zero +80000900: 23 24 81 00 sw s0, 8(sp) +80000904: 23 26 11 00 sw ra, 12(sp) +80000908: 13 04 05 00 mv s0, a0 +8000090c: ef 00 80 28 jal 648 +80000910: 03 a5 81 c2 lw a0, -984(gp) +80000914: 83 27 c5 03 lw a5, 60(a0) +80000918: 63 84 07 00 beqz a5, 8 +8000091c: e7 80 07 00 jalr a5 +80000920: 13 05 04 00 mv a0, s0 +80000924: ef f0 1f f0 jal -256 -800008f4 vx_thread_gid: -800008f4: 73 25 20 02 csrr a0, 34 -800008f8: 67 80 00 00 ret +80000928 __libc_fini_array: +80000928: 13 01 01 ff addi sp, sp, -16 +8000092c: 23 24 81 00 sw s0, 8(sp) +80000930: b7 17 00 80 lui a5, 524289 +80000934: 37 14 00 80 lui s0, 524289 +80000938: 13 04 44 00 addi s0, s0, 4 +8000093c: 93 87 47 00 addi a5, a5, 4 +80000940: b3 87 87 40 sub a5, a5, s0 +80000944: 23 22 91 00 sw s1, 4(sp) +80000948: 23 26 11 00 sw ra, 12(sp) +8000094c: 93 d4 27 40 srai s1, a5, 2 +80000950: 63 80 04 02 beqz s1, 32 +80000954: 93 87 c7 ff addi a5, a5, -4 +80000958: 33 84 87 00 add s0, a5, s0 +8000095c: 83 27 04 00 lw a5, 0(s0) +80000960: 93 84 f4 ff addi s1, s1, -1 +80000964: 13 04 c4 ff addi s0, s0, -4 +80000968: e7 80 07 00 jalr a5 +8000096c: e3 98 04 fe bnez s1, -16 +80000970: 83 20 c1 00 lw ra, 12(sp) +80000974: 03 24 81 00 lw s0, 8(sp) +80000978: 83 24 41 00 lw s1, 4(sp) +8000097c: 13 01 01 01 addi sp, sp, 16 +80000980: 67 80 00 00 ret -800008fc vx_core_id: -800008fc: 73 25 40 02 csrr a0, 36 -80000900: 67 80 00 00 ret +80000984 __libc_init_array: +80000984: 13 01 01 ff addi sp, sp, -16 +80000988: 23 24 81 00 sw s0, 8(sp) +8000098c: 23 20 21 01 sw s2, 0(sp) +80000990: 37 14 00 80 lui s0, 524289 +80000994: 37 19 00 80 lui s2, 524289 +80000998: 93 07 04 00 mv a5, s0 +8000099c: 13 09 09 00 mv s2, s2 +800009a0: 33 09 f9 40 sub s2, s2, a5 +800009a4: 23 26 11 00 sw ra, 12(sp) +800009a8: 23 22 91 00 sw s1, 4(sp) +800009ac: 13 59 29 40 srai s2, s2, 2 +800009b0: 63 00 09 02 beqz s2, 32 +800009b4: 13 04 04 00 mv s0, s0 +800009b8: 93 04 00 00 mv s1, zero +800009bc: 83 27 04 00 lw a5, 0(s0) +800009c0: 93 84 14 00 addi s1, s1, 1 +800009c4: 13 04 44 00 addi s0, s0, 4 +800009c8: e7 80 07 00 jalr a5 +800009cc: e3 18 99 fe bne s2, s1, -16 +800009d0: 37 14 00 80 lui s0, 524289 +800009d4: 37 19 00 80 lui s2, 524289 +800009d8: 93 07 04 00 mv a5, s0 +800009dc: 13 09 49 00 addi s2, s2, 4 +800009e0: 33 09 f9 40 sub s2, s2, a5 +800009e4: 13 59 29 40 srai s2, s2, 2 +800009e8: 63 00 09 02 beqz s2, 32 +800009ec: 13 04 04 00 mv s0, s0 +800009f0: 93 04 00 00 mv s1, zero +800009f4: 83 27 04 00 lw a5, 0(s0) +800009f8: 93 84 14 00 addi s1, s1, 1 +800009fc: 13 04 44 00 addi s0, s0, 4 +80000a00: e7 80 07 00 jalr a5 +80000a04: e3 18 99 fe bne s2, s1, -16 +80000a08: 83 20 c1 00 lw ra, 12(sp) +80000a0c: 03 24 81 00 lw s0, 8(sp) +80000a10: 83 24 41 00 lw s1, 4(sp) +80000a14: 03 29 01 00 lw s2, 0(sp) +80000a18: 13 01 01 01 addi sp, sp, 16 +80000a1c: 67 80 00 00 ret -80000904 vx_num_threads: -80000904: 73 25 50 02 csrr a0, 37 -80000908: 67 80 00 00 ret +80000a20 memset: +80000a20: 13 03 f0 00 addi t1, zero, 15 +80000a24: 13 07 05 00 mv a4, a0 +80000a28: 63 7e c3 02 bgeu t1, a2, 60 +80000a2c: 93 77 f7 00 andi a5, a4, 15 +80000a30: 63 90 07 0a bnez a5, 160 +80000a34: 63 92 05 08 bnez a1, 132 +80000a38: 93 76 06 ff andi a3, a2, -16 +80000a3c: 13 76 f6 00 andi a2, a2, 15 +80000a40: b3 86 e6 00 add a3, a3, a4 +80000a44: 23 20 b7 00 sw a1, 0(a4) +80000a48: 23 22 b7 00 sw a1, 4(a4) +80000a4c: 23 24 b7 00 sw a1, 8(a4) +80000a50: 23 26 b7 00 sw a1, 12(a4) +80000a54: 13 07 07 01 addi a4, a4, 16 +80000a58: e3 66 d7 fe bltu a4, a3, -20 +80000a5c: 63 14 06 00 bnez a2, 8 +80000a60: 67 80 00 00 ret +80000a64: b3 06 c3 40 sub a3, t1, a2 +80000a68: 93 96 26 00 slli a3, a3, 2 +80000a6c: 97 02 00 00 auipc t0, 0 +80000a70: b3 86 56 00 add a3, a3, t0 +80000a74: 67 80 c6 00 jr 12(a3) +80000a78: 23 07 b7 00 sb a1, 14(a4) +80000a7c: a3 06 b7 00 sb a1, 13(a4) +80000a80: 23 06 b7 00 sb a1, 12(a4) +80000a84: a3 05 b7 00 sb a1, 11(a4) +80000a88: 23 05 b7 00 sb a1, 10(a4) +80000a8c: a3 04 b7 00 sb a1, 9(a4) +80000a90: 23 04 b7 00 sb a1, 8(a4) +80000a94: a3 03 b7 00 sb a1, 7(a4) +80000a98: 23 03 b7 00 sb a1, 6(a4) +80000a9c: a3 02 b7 00 sb a1, 5(a4) +80000aa0: 23 02 b7 00 sb a1, 4(a4) +80000aa4: a3 01 b7 00 sb a1, 3(a4) +80000aa8: 23 01 b7 00 sb a1, 2(a4) +80000aac: a3 00 b7 00 sb a1, 1(a4) +80000ab0: 23 00 b7 00 sb a1, 0(a4) +80000ab4: 67 80 00 00 ret +80000ab8: 93 f5 f5 0f andi a1, a1, 255 +80000abc: 93 96 85 00 slli a3, a1, 8 +80000ac0: b3 e5 d5 00 or a1, a1, a3 +80000ac4: 93 96 05 01 slli a3, a1, 16 +80000ac8: b3 e5 d5 00 or a1, a1, a3 +80000acc: 6f f0 df f6 j -148 +80000ad0: 93 96 27 00 slli a3, a5, 2 +80000ad4: 97 02 00 00 auipc t0, 0 +80000ad8: b3 86 56 00 add a3, a3, t0 +80000adc: 93 82 00 00 mv t0, ra +80000ae0: e7 80 06 fa jalr -96(a3) +80000ae4: 93 80 02 00 mv ra, t0 +80000ae8: 93 87 07 ff addi a5, a5, -16 +80000aec: 33 07 f7 40 sub a4, a4, a5 +80000af0: 33 06 f6 00 add a2, a2, a5 +80000af4: e3 78 c3 f6 bgeu t1, a2, -144 +80000af8: 6f f0 df f3 j -196 -8000090c vx_num_warps: -8000090c: 73 25 60 02 csrr a0, 38 -80000910: 67 80 00 00 ret +80000afc __register_exitproc: +80000afc: 03 a7 81 c2 lw a4, -984(gp) +80000b00: 83 27 87 14 lw a5, 328(a4) +80000b04: 63 8c 07 04 beqz a5, 88 +80000b08: 03 a7 47 00 lw a4, 4(a5) +80000b0c: 13 08 f0 01 addi a6, zero, 31 +80000b10: 63 4e e8 06 blt a6, a4, 124 +80000b14: 13 18 27 00 slli a6, a4, 2 +80000b18: 63 06 05 02 beqz a0, 44 +80000b1c: 33 83 07 01 add t1, a5, a6 +80000b20: 23 24 c3 08 sw a2, 136(t1) +80000b24: 83 a8 87 18 lw a7, 392(a5) +80000b28: 13 06 10 00 addi a2, zero, 1 +80000b2c: 33 16 e6 00 sll a2, a2, a4 +80000b30: b3 e8 c8 00 or a7, a7, a2 +80000b34: 23 a4 17 19 sw a7, 392(a5) +80000b38: 23 24 d3 10 sw a3, 264(t1) +80000b3c: 93 06 20 00 addi a3, zero, 2 +80000b40: 63 04 d5 02 beq a0, a3, 40 +80000b44: 13 07 17 00 addi a4, a4, 1 +80000b48: 23 a2 e7 00 sw a4, 4(a5) +80000b4c: b3 87 07 01 add a5, a5, a6 +80000b50: 23 a4 b7 00 sw a1, 8(a5) +80000b54: 13 05 00 00 mv a0, zero +80000b58: 67 80 00 00 ret +80000b5c: 93 07 c7 14 addi a5, a4, 332 +80000b60: 23 24 f7 14 sw a5, 328(a4) +80000b64: 6f f0 5f fa j -92 +80000b68: 83 a6 c7 18 lw a3, 396(a5) +80000b6c: 13 07 17 00 addi a4, a4, 1 +80000b70: 23 a2 e7 00 sw a4, 4(a5) +80000b74: 33 e6 c6 00 or a2, a3, a2 +80000b78: 23 a6 c7 18 sw a2, 396(a5) +80000b7c: b3 87 07 01 add a5, a5, a6 +80000b80: 23 a4 b7 00 sw a1, 8(a5) +80000b84: 13 05 00 00 mv a0, zero +80000b88: 67 80 00 00 ret +80000b8c: 13 05 f0 ff addi a0, zero, -1 +80000b90: 67 80 00 00 ret -80000914 vx_num_cores: -80000914: 73 25 70 02 csrr a0, 39 -80000918: 67 80 00 00 ret - -8000091c vx_num_cycles: -8000091c: 73 25 00 c0 rdcycle a0 -80000920: 67 80 00 00 ret - -80000924 vx_num_instrs: -80000924: 73 25 20 c0 rdinstret a0 -80000928: 67 80 00 00 ret - -8000092c atexit: -8000092c: 93 05 05 00 mv a1, a0 -80000930: 93 06 00 00 mv a3, zero -80000934: 13 06 00 00 mv a2, zero -80000938: 13 05 00 00 mv a0, zero -8000093c: 6f 00 80 20 j 520 - -80000940 exit: -80000940: 13 01 01 ff addi sp, sp, -16 -80000944: 93 05 00 00 mv a1, zero -80000948: 23 24 81 00 sw s0, 8(sp) -8000094c: 23 26 11 00 sw ra, 12(sp) -80000950: 13 04 05 00 mv s0, a0 -80000954: ef 00 80 28 jal 648 -80000958: 03 a5 81 c2 lw a0, -984(gp) -8000095c: 83 27 c5 03 lw a5, 60(a0) -80000960: 63 84 07 00 beqz a5, 8 -80000964: e7 80 07 00 jalr a5 -80000968: 13 05 04 00 mv a0, s0 -8000096c: ef f0 1f f0 jal -256 - -80000970 __libc_fini_array: -80000970: 13 01 01 ff addi sp, sp, -16 -80000974: 23 24 81 00 sw s0, 8(sp) -80000978: b7 17 00 80 lui a5, 524289 -8000097c: 37 14 00 80 lui s0, 524289 -80000980: 13 04 44 00 addi s0, s0, 4 -80000984: 93 87 47 00 addi a5, a5, 4 -80000988: b3 87 87 40 sub a5, a5, s0 -8000098c: 23 22 91 00 sw s1, 4(sp) -80000990: 23 26 11 00 sw ra, 12(sp) -80000994: 93 d4 27 40 srai s1, a5, 2 -80000998: 63 80 04 02 beqz s1, 32 -8000099c: 93 87 c7 ff addi a5, a5, -4 -800009a0: 33 84 87 00 add s0, a5, s0 -800009a4: 83 27 04 00 lw a5, 0(s0) -800009a8: 93 84 f4 ff addi s1, s1, -1 -800009ac: 13 04 c4 ff addi s0, s0, -4 -800009b0: e7 80 07 00 jalr a5 -800009b4: e3 98 04 fe bnez s1, -16 -800009b8: 83 20 c1 00 lw ra, 12(sp) -800009bc: 03 24 81 00 lw s0, 8(sp) -800009c0: 83 24 41 00 lw s1, 4(sp) -800009c4: 13 01 01 01 addi sp, sp, 16 -800009c8: 67 80 00 00 ret - -800009cc __libc_init_array: -800009cc: 13 01 01 ff addi sp, sp, -16 -800009d0: 23 24 81 00 sw s0, 8(sp) -800009d4: 23 20 21 01 sw s2, 0(sp) -800009d8: 37 14 00 80 lui s0, 524289 -800009dc: 37 19 00 80 lui s2, 524289 -800009e0: 93 07 04 00 mv a5, s0 -800009e4: 13 09 09 00 mv s2, s2 -800009e8: 33 09 f9 40 sub s2, s2, a5 -800009ec: 23 26 11 00 sw ra, 12(sp) -800009f0: 23 22 91 00 sw s1, 4(sp) -800009f4: 13 59 29 40 srai s2, s2, 2 -800009f8: 63 00 09 02 beqz s2, 32 -800009fc: 13 04 04 00 mv s0, s0 -80000a00: 93 04 00 00 mv s1, zero -80000a04: 83 27 04 00 lw a5, 0(s0) -80000a08: 93 84 14 00 addi s1, s1, 1 -80000a0c: 13 04 44 00 addi s0, s0, 4 -80000a10: e7 80 07 00 jalr a5 -80000a14: e3 18 99 fe bne s2, s1, -16 -80000a18: 37 14 00 80 lui s0, 524289 -80000a1c: 37 19 00 80 lui s2, 524289 -80000a20: 93 07 04 00 mv a5, s0 -80000a24: 13 09 49 00 addi s2, s2, 4 -80000a28: 33 09 f9 40 sub s2, s2, a5 -80000a2c: 13 59 29 40 srai s2, s2, 2 -80000a30: 63 00 09 02 beqz s2, 32 -80000a34: 13 04 04 00 mv s0, s0 -80000a38: 93 04 00 00 mv s1, zero -80000a3c: 83 27 04 00 lw a5, 0(s0) -80000a40: 93 84 14 00 addi s1, s1, 1 -80000a44: 13 04 44 00 addi s0, s0, 4 -80000a48: e7 80 07 00 jalr a5 -80000a4c: e3 18 99 fe bne s2, s1, -16 -80000a50: 83 20 c1 00 lw ra, 12(sp) -80000a54: 03 24 81 00 lw s0, 8(sp) -80000a58: 83 24 41 00 lw s1, 4(sp) -80000a5c: 03 29 01 00 lw s2, 0(sp) -80000a60: 13 01 01 01 addi sp, sp, 16 -80000a64: 67 80 00 00 ret - -80000a68 memset: -80000a68: 13 03 f0 00 addi t1, zero, 15 -80000a6c: 13 07 05 00 mv a4, a0 -80000a70: 63 7e c3 02 bgeu t1, a2, 60 -80000a74: 93 77 f7 00 andi a5, a4, 15 -80000a78: 63 90 07 0a bnez a5, 160 -80000a7c: 63 92 05 08 bnez a1, 132 -80000a80: 93 76 06 ff andi a3, a2, -16 -80000a84: 13 76 f6 00 andi a2, a2, 15 -80000a88: b3 86 e6 00 add a3, a3, a4 -80000a8c: 23 20 b7 00 sw a1, 0(a4) -80000a90: 23 22 b7 00 sw a1, 4(a4) -80000a94: 23 24 b7 00 sw a1, 8(a4) -80000a98: 23 26 b7 00 sw a1, 12(a4) -80000a9c: 13 07 07 01 addi a4, a4, 16 -80000aa0: e3 66 d7 fe bltu a4, a3, -20 -80000aa4: 63 14 06 00 bnez a2, 8 -80000aa8: 67 80 00 00 ret -80000aac: b3 06 c3 40 sub a3, t1, a2 -80000ab0: 93 96 26 00 slli a3, a3, 2 -80000ab4: 97 02 00 00 auipc t0, 0 -80000ab8: b3 86 56 00 add a3, a3, t0 -80000abc: 67 80 c6 00 jr 12(a3) -80000ac0: 23 07 b7 00 sb a1, 14(a4) -80000ac4: a3 06 b7 00 sb a1, 13(a4) -80000ac8: 23 06 b7 00 sb a1, 12(a4) -80000acc: a3 05 b7 00 sb a1, 11(a4) -80000ad0: 23 05 b7 00 sb a1, 10(a4) -80000ad4: a3 04 b7 00 sb a1, 9(a4) -80000ad8: 23 04 b7 00 sb a1, 8(a4) -80000adc: a3 03 b7 00 sb a1, 7(a4) -80000ae0: 23 03 b7 00 sb a1, 6(a4) -80000ae4: a3 02 b7 00 sb a1, 5(a4) -80000ae8: 23 02 b7 00 sb a1, 4(a4) -80000aec: a3 01 b7 00 sb a1, 3(a4) -80000af0: 23 01 b7 00 sb a1, 2(a4) -80000af4: a3 00 b7 00 sb a1, 1(a4) -80000af8: 23 00 b7 00 sb a1, 0(a4) -80000afc: 67 80 00 00 ret -80000b00: 93 f5 f5 0f andi a1, a1, 255 -80000b04: 93 96 85 00 slli a3, a1, 8 -80000b08: b3 e5 d5 00 or a1, a1, a3 -80000b0c: 93 96 05 01 slli a3, a1, 16 -80000b10: b3 e5 d5 00 or a1, a1, a3 -80000b14: 6f f0 df f6 j -148 -80000b18: 93 96 27 00 slli a3, a5, 2 -80000b1c: 97 02 00 00 auipc t0, 0 -80000b20: b3 86 56 00 add a3, a3, t0 -80000b24: 93 82 00 00 mv t0, ra -80000b28: e7 80 06 fa jalr -96(a3) -80000b2c: 93 80 02 00 mv ra, t0 -80000b30: 93 87 07 ff addi a5, a5, -16 -80000b34: 33 07 f7 40 sub a4, a4, a5 -80000b38: 33 06 f6 00 add a2, a2, a5 -80000b3c: e3 78 c3 f6 bgeu t1, a2, -144 -80000b40: 6f f0 df f3 j -196 - -80000b44 __register_exitproc: -80000b44: 03 a7 81 c2 lw a4, -984(gp) -80000b48: 83 27 87 14 lw a5, 328(a4) -80000b4c: 63 8c 07 04 beqz a5, 88 -80000b50: 03 a7 47 00 lw a4, 4(a5) -80000b54: 13 08 f0 01 addi a6, zero, 31 -80000b58: 63 4e e8 06 blt a6, a4, 124 -80000b5c: 13 18 27 00 slli a6, a4, 2 -80000b60: 63 06 05 02 beqz a0, 44 -80000b64: 33 83 07 01 add t1, a5, a6 -80000b68: 23 24 c3 08 sw a2, 136(t1) -80000b6c: 83 a8 87 18 lw a7, 392(a5) -80000b70: 13 06 10 00 addi a2, zero, 1 -80000b74: 33 16 e6 00 sll a2, a2, a4 -80000b78: b3 e8 c8 00 or a7, a7, a2 -80000b7c: 23 a4 17 19 sw a7, 392(a5) -80000b80: 23 24 d3 10 sw a3, 264(t1) -80000b84: 93 06 20 00 addi a3, zero, 2 -80000b88: 63 04 d5 02 beq a0, a3, 40 -80000b8c: 13 07 17 00 addi a4, a4, 1 -80000b90: 23 a2 e7 00 sw a4, 4(a5) -80000b94: b3 87 07 01 add a5, a5, a6 -80000b98: 23 a4 b7 00 sw a1, 8(a5) -80000b9c: 13 05 00 00 mv a0, zero -80000ba0: 67 80 00 00 ret -80000ba4: 93 07 c7 14 addi a5, a4, 332 -80000ba8: 23 24 f7 14 sw a5, 328(a4) -80000bac: 6f f0 5f fa j -92 -80000bb0: 83 a6 c7 18 lw a3, 396(a5) -80000bb4: 13 07 17 00 addi a4, a4, 1 -80000bb8: 23 a2 e7 00 sw a4, 4(a5) -80000bbc: 33 e6 c6 00 or a2, a3, a2 -80000bc0: 23 a6 c7 18 sw a2, 396(a5) -80000bc4: b3 87 07 01 add a5, a5, a6 -80000bc8: 23 a4 b7 00 sw a1, 8(a5) -80000bcc: 13 05 00 00 mv a0, zero -80000bd0: 67 80 00 00 ret -80000bd4: 13 05 f0 ff addi a0, zero, -1 -80000bd8: 67 80 00 00 ret - -80000bdc __call_exitprocs: -80000bdc: 13 01 01 fd addi sp, sp, -48 -80000be0: 23 2c 41 01 sw s4, 24(sp) -80000be4: 03 aa 81 c2 lw s4, -984(gp) -80000be8: 23 20 21 03 sw s2, 32(sp) -80000bec: 23 26 11 02 sw ra, 44(sp) -80000bf0: 03 29 8a 14 lw s2, 328(s4) -80000bf4: 23 24 81 02 sw s0, 40(sp) -80000bf8: 23 22 91 02 sw s1, 36(sp) -80000bfc: 23 2e 31 01 sw s3, 28(sp) -80000c00: 23 2a 51 01 sw s5, 20(sp) -80000c04: 23 28 61 01 sw s6, 16(sp) -80000c08: 23 26 71 01 sw s7, 12(sp) -80000c0c: 23 24 81 01 sw s8, 8(sp) -80000c10: 63 00 09 04 beqz s2, 64 -80000c14: 13 0b 05 00 mv s6, a0 -80000c18: 93 8b 05 00 mv s7, a1 -80000c1c: 93 0a 10 00 addi s5, zero, 1 -80000c20: 93 09 f0 ff addi s3, zero, -1 -80000c24: 83 24 49 00 lw s1, 4(s2) -80000c28: 13 84 f4 ff addi s0, s1, -1 -80000c2c: 63 42 04 02 bltz s0, 36 -80000c30: 93 94 24 00 slli s1, s1, 2 -80000c34: b3 04 99 00 add s1, s2, s1 -80000c38: 63 84 0b 04 beqz s7, 72 -80000c3c: 83 a7 44 10 lw a5, 260(s1) -80000c40: 63 80 77 05 beq a5, s7, 64 -80000c44: 13 04 f4 ff addi s0, s0, -1 -80000c48: 93 84 c4 ff addi s1, s1, -4 -80000c4c: e3 16 34 ff bne s0, s3, -20 -80000c50: 83 20 c1 02 lw ra, 44(sp) -80000c54: 03 24 81 02 lw s0, 40(sp) -80000c58: 83 24 41 02 lw s1, 36(sp) -80000c5c: 03 29 01 02 lw s2, 32(sp) -80000c60: 83 29 c1 01 lw s3, 28(sp) -80000c64: 03 2a 81 01 lw s4, 24(sp) -80000c68: 83 2a 41 01 lw s5, 20(sp) -80000c6c: 03 2b 01 01 lw s6, 16(sp) -80000c70: 83 2b c1 00 lw s7, 12(sp) -80000c74: 03 2c 81 00 lw s8, 8(sp) -80000c78: 13 01 01 03 addi sp, sp, 48 -80000c7c: 67 80 00 00 ret -80000c80: 83 27 49 00 lw a5, 4(s2) -80000c84: 83 a6 44 00 lw a3, 4(s1) -80000c88: 93 87 f7 ff addi a5, a5, -1 -80000c8c: 63 8e 87 04 beq a5, s0, 92 -80000c90: 23 a2 04 00 sw zero, 4(s1) -80000c94: e3 88 06 fa beqz a3, -80 -80000c98: 83 27 89 18 lw a5, 392(s2) -80000c9c: 33 97 8a 00 sll a4, s5, s0 -80000ca0: 03 2c 49 00 lw s8, 4(s2) -80000ca4: b3 77 f7 00 and a5, a4, a5 -80000ca8: 63 92 07 02 bnez a5, 36 +80000b94 __call_exitprocs: +80000b94: 13 01 01 fd addi sp, sp, -48 +80000b98: 23 2c 41 01 sw s4, 24(sp) +80000b9c: 03 aa 81 c2 lw s4, -984(gp) +80000ba0: 23 20 21 03 sw s2, 32(sp) +80000ba4: 23 26 11 02 sw ra, 44(sp) +80000ba8: 03 29 8a 14 lw s2, 328(s4) +80000bac: 23 24 81 02 sw s0, 40(sp) +80000bb0: 23 22 91 02 sw s1, 36(sp) +80000bb4: 23 2e 31 01 sw s3, 28(sp) +80000bb8: 23 2a 51 01 sw s5, 20(sp) +80000bbc: 23 28 61 01 sw s6, 16(sp) +80000bc0: 23 26 71 01 sw s7, 12(sp) +80000bc4: 23 24 81 01 sw s8, 8(sp) +80000bc8: 63 00 09 04 beqz s2, 64 +80000bcc: 13 0b 05 00 mv s6, a0 +80000bd0: 93 8b 05 00 mv s7, a1 +80000bd4: 93 0a 10 00 addi s5, zero, 1 +80000bd8: 93 09 f0 ff addi s3, zero, -1 +80000bdc: 83 24 49 00 lw s1, 4(s2) +80000be0: 13 84 f4 ff addi s0, s1, -1 +80000be4: 63 42 04 02 bltz s0, 36 +80000be8: 93 94 24 00 slli s1, s1, 2 +80000bec: b3 04 99 00 add s1, s2, s1 +80000bf0: 63 84 0b 04 beqz s7, 72 +80000bf4: 83 a7 44 10 lw a5, 260(s1) +80000bf8: 63 80 77 05 beq a5, s7, 64 +80000bfc: 13 04 f4 ff addi s0, s0, -1 +80000c00: 93 84 c4 ff addi s1, s1, -4 +80000c04: e3 16 34 ff bne s0, s3, -20 +80000c08: 83 20 c1 02 lw ra, 44(sp) +80000c0c: 03 24 81 02 lw s0, 40(sp) +80000c10: 83 24 41 02 lw s1, 36(sp) +80000c14: 03 29 01 02 lw s2, 32(sp) +80000c18: 83 29 c1 01 lw s3, 28(sp) +80000c1c: 03 2a 81 01 lw s4, 24(sp) +80000c20: 83 2a 41 01 lw s5, 20(sp) +80000c24: 03 2b 01 01 lw s6, 16(sp) +80000c28: 83 2b c1 00 lw s7, 12(sp) +80000c2c: 03 2c 81 00 lw s8, 8(sp) +80000c30: 13 01 01 03 addi sp, sp, 48 +80000c34: 67 80 00 00 ret +80000c38: 83 27 49 00 lw a5, 4(s2) +80000c3c: 83 a6 44 00 lw a3, 4(s1) +80000c40: 93 87 f7 ff addi a5, a5, -1 +80000c44: 63 8e 87 04 beq a5, s0, 92 +80000c48: 23 a2 04 00 sw zero, 4(s1) +80000c4c: e3 88 06 fa beqz a3, -80 +80000c50: 83 27 89 18 lw a5, 392(s2) +80000c54: 33 97 8a 00 sll a4, s5, s0 +80000c58: 03 2c 49 00 lw s8, 4(s2) +80000c5c: b3 77 f7 00 and a5, a4, a5 +80000c60: 63 92 07 02 bnez a5, 36 +80000c64: e7 80 06 00 jalr a3 +80000c68: 03 27 49 00 lw a4, 4(s2) +80000c6c: 83 27 8a 14 lw a5, 328(s4) +80000c70: 63 14 87 01 bne a4, s8, 8 +80000c74: e3 04 f9 f8 beq s2, a5, -120 +80000c78: e3 88 07 f8 beqz a5, -112 +80000c7c: 13 89 07 00 mv s2, a5 +80000c80: 6f f0 df f5 j -164 +80000c84: 83 27 c9 18 lw a5, 396(s2) +80000c88: 83 a5 44 08 lw a1, 132(s1) +80000c8c: 33 77 f7 00 and a4, a4, a5 +80000c90: 63 1c 07 00 bnez a4, 24 +80000c94: 13 05 0b 00 mv a0, s6 +80000c98: e7 80 06 00 jalr a3 +80000c9c: 6f f0 df fc j -52 +80000ca0: 23 22 89 00 sw s0, 4(s2) +80000ca4: 6f f0 9f fa j -88 +80000ca8: 13 85 05 00 mv a0, a1 80000cac: e7 80 06 00 jalr a3 -80000cb0: 03 27 49 00 lw a4, 4(s2) -80000cb4: 83 27 8a 14 lw a5, 328(s4) -80000cb8: 63 14 87 01 bne a4, s8, 8 -80000cbc: e3 04 f9 f8 beq s2, a5, -120 -80000cc0: e3 88 07 f8 beqz a5, -112 -80000cc4: 13 89 07 00 mv s2, a5 -80000cc8: 6f f0 df f5 j -164 -80000ccc: 83 27 c9 18 lw a5, 396(s2) -80000cd0: 83 a5 44 08 lw a1, 132(s1) -80000cd4: 33 77 f7 00 and a4, a4, a5 -80000cd8: 63 1c 07 00 bnez a4, 24 -80000cdc: 13 05 0b 00 mv a0, s6 -80000ce0: e7 80 06 00 jalr a3 -80000ce4: 6f f0 df fc j -52 -80000ce8: 23 22 89 00 sw s0, 4(s2) -80000cec: 6f f0 9f fa j -88 -80000cf0: 13 85 05 00 mv a0, a1 -80000cf4: e7 80 06 00 jalr a3 -80000cf8: 6f f0 9f fb j -72 +80000cb0: 6f f0 9f fb j -72 Disassembly of section .init_array: @@ -1078,7 +1060,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: b0 08 + a4: 68 08 a6: 00 80 a8: 00 00 aa: 00 00 @@ -1196,7 +1178,7 @@ Disassembly of section .symtab: 1de: f1 ff 1e0: 15 01 1e2: 00 00 - 1e4: fc 08 + 1e4: b4 08 1e6: 00 80 1e8: 00 00 1ea: 00 00 @@ -1204,7 +1186,7 @@ Disassembly of section .symtab: 1ee: 02 00 1f0: 20 01 1f2: 00 00 - 1f4: b4 08 + 1f4: 6c 08 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1214,7 +1196,7 @@ Disassembly of section .symtab: 202: 00 00 204: 60 00 206: 00 80 - 208: 3c 01 + 208: 48 01 20a: 00 00 20c: 12 00 20e: 02 00 @@ -1222,13 +1204,13 @@ Disassembly of section .symtab: 212: 00 00 214: 40 14 216: 00 80 - 218: 04 00 + 218: 20 00 21a: 00 00 21c: 11 00 21e: 06 00 220: 4e 01 222: 00 00 - 224: bc 08 + 224: 74 08 226: 00 80 228: 00 00 22a: 00 00 @@ -1236,7 +1218,7 @@ Disassembly of section .symtab: 22e: 02 00 230: 55 01 232: 00 00 - 234: a0 05 + 234: 58 05 236: 00 80 238: 6c 01 23a: 00 00 @@ -1252,7 +1234,7 @@ Disassembly of section .symtab: 24e: 05 00 250: 82 01 252: 00 00 - 254: d4 08 + 254: 8c 08 256: 00 80 258: 00 00 25a: 00 00 @@ -1260,7 +1242,7 @@ Disassembly of section .symtab: 25e: 02 00 260: 8a 01 262: 00 00 - 264: 0c 09 + 264: c4 08 266: 00 80 268: 00 00 26a: 00 00 @@ -1269,13 +1251,13 @@ Disassembly of section .symtab: 270: 97 01 00 00 auipc gp, 0 274: 3c 02 276: 00 80 - 278: cc 01 + 278: 84 01 27a: 00 00 27c: 12 00 27e: 02 00 280: a4 01 282: 00 00 - 284: cc 08 + 284: 84 08 286: 00 80 288: 00 00 28a: 00 00 @@ -1291,7 +1273,7 @@ Disassembly of section .symtab: 29e: 05 00 2a0: c0 01 2a2: 00 00 - 2a4: cc 09 + 2a4: 84 09 2a6: 00 80 2a8: 9c 00 2aa: 00 00 @@ -1299,7 +1281,7 @@ Disassembly of section .symtab: 2ae: 02 00 2b0: d2 01 2b2: 00 00 - 2b4: 04 09 + 2b4: bc 08 2b6: 00 80 2b8: 00 00 2ba: 00 00 @@ -1307,7 +1289,7 @@ Disassembly of section .symtab: 2be: 02 00 2c0: e1 01 2c2: 00 00 - 2c4: dc 08 + 2c4: 94 08 2c6: 00 80 2c8: 00 00 2ca: 00 00 @@ -1315,7 +1297,7 @@ Disassembly of section .symtab: 2ce: 02 00 2d0: ec 01 2d2: 00 00 - 2d4: ec 08 + 2d4: a4 08 2d6: 00 80 2d8: 00 00 2da: 00 00 @@ -1323,14 +1305,14 @@ Disassembly of section .symtab: 2de: 02 00 2e0: f9 01 2e2: 00 00 - 2e4: 70 09 + 2e4: 28 09 2e6: 00 80 2e8: 5c 00 2ea: 00 00 2ec: 12 00 2ee: 02 00 2f0: 0b 02 00 00 - 2f4: 74 08 + 2f4: 2c 08 2f6: 00 80 2f8: 00 00 2fa: 00 00 @@ -1338,7 +1320,7 @@ Disassembly of section .symtab: 2fe: 02 00 300: 15 02 302: 00 00 - 304: c4 08 + 304: 7c 08 306: 00 80 308: 00 00 30a: 00 00 @@ -1346,7 +1328,7 @@ Disassembly of section .symtab: 30e: 02 00 310: 20 02 312: 00 00 - 314: dc 0b + 314: 94 0b 316: 00 80 318: 20 01 31a: 00 00 @@ -1362,7 +1344,7 @@ Disassembly of section .symtab: 32e: 01 00 330: 31 02 332: 00 00 - 334: 44 0b + 334: fc 0a 336: 00 80 338: 98 00 33a: 00 00 @@ -1370,7 +1352,7 @@ Disassembly of section .symtab: 33e: 02 00 340: 45 02 342: 00 00 - 344: 44 14 + 344: 60 14 346: 00 80 348: 00 00 34a: 00 00 @@ -1386,14 +1368,14 @@ Disassembly of section .symtab: 35e: 06 00 360: 5d 02 362: 00 00 - 364: 0c 07 + 364: c4 06 366: 00 80 368: 60 01 36a: 00 00 36c: 12 00 36e: 02 00 370: 7f 02 00 00 - 374: 68 0a + 374: 20 0a 376: 00 80 378: dc 00 37a: 00 00 @@ -1401,14 +1383,14 @@ Disassembly of section .symtab: 37e: 02 00 380: 86 02 382: 00 00 - 384: 08 04 + 384: c0 03 386: 00 80 388: 30 00 38a: 00 00 38c: 12 00 38e: 02 00 390: 8b 02 00 00 - 394: 1c 09 + 394: d4 08 396: 00 80 398: 00 00 39a: 00 00 @@ -1416,7 +1398,7 @@ Disassembly of section .symtab: 39e: 02 00 3a0: 99 02 3a2: 00 00 - 3a4: 2c 09 + 3a4: e4 08 3a6: 00 80 3a8: 14 00 3aa: 00 00 @@ -1424,7 +1406,7 @@ Disassembly of section .symtab: 3ae: 02 00 3b0: a0 02 3b2: 00 00 - 3b4: f4 08 + 3b4: ac 08 3b6: 00 80 3b8: 00 00 3ba: 00 00 @@ -1432,23 +1414,23 @@ Disassembly of section .symtab: 3be: 02 00 3c0: ae 02 3c2: 00 00 - 3c4: 14 09 + 3c4: cc 08 3c6: 00 80 3c8: 00 00 3ca: 00 00 3cc: 12 00 3ce: 02 00 3d0: bb 02 00 00 - 3d4: e4 08 + 3d4: 9c 08 3d6: 00 80 3d8: 00 00 3da: 00 00 3dc: 12 00 3de: 02 00 3e0: c7 02 00 00 fmsub.s ft5, ft0, ft0, ft0, rne - 3e4: 9c 01 + 3e4: a8 01 3e6: 00 80 - 3e8: a0 00 + 3e8: 94 00 3ea: 00 00 3ec: 12 00 3ee: 02 00 @@ -1469,7 +1451,7 @@ Disassembly of section .symtab: 40e: 05 00 410: 9d 00 412: 00 00 - 414: 44 14 + 414: 60 14 416: 00 80 418: 00 00 41a: 00 00 @@ -1477,7 +1459,7 @@ Disassembly of section .symtab: 41e: 06 00 420: 0a 03 422: 00 00 - 424: 40 09 + 424: f8 08 426: 00 80 428: 30 00 42a: 00 00 @@ -1485,7 +1467,7 @@ Disassembly of section .symtab: 42e: 02 00 430: f6 02 432: 00 00 - 434: 38 04 + 434: f0 03 436: 00 80 438: 68 01 43a: 00 00 @@ -1493,14 +1475,14 @@ Disassembly of section .symtab: 43e: 02 00 440: 09 03 442: 00 00 - 444: 6c 08 + 444: 24 08 446: 00 80 448: 00 00 44a: 00 00 44c: 12 00 44e: 02 00 450: 0f 03 00 00 - 454: 24 09 + 454: dc 08 456: 00 80 458: 00 00 45a: 00 00 @@ -1540,12 +1522,13 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 37 38 2d 33 lui a6, 209619 - 4a: 36 2d - 4c: 32 62 - 4e: 2d 35 + 46: 34 62 + 48: 2d 64 + 4a: 39 2d + 4c: 37 38 2d 65 lui a6, 414419 50: 32 2d - 52: 63 39 2e 63 + 52: 32 38 + 54: 2e 63 56: 00 70 58: 61 72 5a: 61 6c diff --git a/benchmarks/opencl/transpose/Makefile b/benchmarks/opencl/transpose/Makefile index dce57c43..7faeefdc 100644 --- a/benchmarks/opencl/transpose/Makefile +++ b/benchmarks/opencl/transpose/Makefile @@ -52,7 +52,7 @@ clean: rm -rf $(PROJECT) *.o .depend clean-all: clean - rm *.pocl *.dump + rm -rf *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/vecadd/Makefile b/benchmarks/opencl/vecadd/Makefile index 6ddf59dd..31500ac6 100644 --- a/benchmarks/opencl/vecadd/Makefile +++ b/benchmarks/opencl/vecadd/Makefile @@ -54,7 +54,7 @@ clean: rm -rf $(PROJECT) *.o .depend clean-all: clean - rm *.pocl *.dump + rm -rf *.pocl *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/benchmarks/opencl/vecadd/kernel.pocl b/benchmarks/opencl/vecadd/kernel.pocl index 2656fb37..5d31b213 100644 Binary files a/benchmarks/opencl/vecadd/kernel.pocl and b/benchmarks/opencl/vecadd/kernel.pocl differ diff --git a/benchmarks/opencl/vecadd/vecadd.dump b/benchmarks/opencl/vecadd/vecadd.dump index ea5e1946..ebd00651 100644 --- a/benchmarks/opencl/vecadd/vecadd.dump +++ b/benchmarks/opencl/vecadd/vecadd.dump @@ -1,28 +1,28 @@ -/tmp/pocl_vortex_kernel-4b-aa-30-de-b5.elf: file format ELF32-riscv +/tmp/pocl_vortex_kernel-32-e0-bf-fa-01.elf: file format ELF32-riscv Disassembly of section .init: 80000000 _start: 80000000: 97 05 00 00 auipc a1, 0 -80000004: 93 85 05 5f addi a1, a1, 1520 +80000004: 93 85 85 5a addi a1, a1, 1448 80000008: 73 25 60 02 csrr a0, 38 8000000c: 6b 10 b5 00 -80000010: ef 00 00 5e jal 1504 +80000010: ef 00 80 59 jal 1432 80000014: 13 05 10 00 addi a0, zero, 1 80000018: 6b 00 05 00 8000001c: 13 85 c1 c2 addi a0, gp, -980 -80000020: 13 86 01 c3 addi a2, gp, -976 +80000020: 13 86 c1 c4 addi a2, gp, -948 80000024: 33 06 a6 40 sub a2, a2, a0 80000028: 93 05 00 00 mv a1, zero -8000002c: ef 00 80 7b jal 1976 +8000002c: ef 00 00 77 jal 1904 80000030: 17 05 00 00 auipc a0, 0 -80000034: 13 05 c5 6b addi a0, a0, 1724 -80000038: ef 00 00 67 jal 1648 -8000003c: ef 00 c0 70 jal 1804 -80000040: ef 00 80 3c jal 968 -80000044: 6f 00 80 67 j 1656 +80000034: 13 05 45 67 addi a0, a0, 1652 +80000038: ef 00 80 62 jal 1576 +8000003c: ef 00 40 6c jal 1732 +80000040: ef 00 00 38 jal 896 +80000044: 6f 00 00 63 j 1584 Disassembly of section .text: @@ -30,8 +30,8 @@ Disassembly of section .text: 80000048: 93 07 00 00 mv a5, zero 8000004c: 63 88 07 00 beqz a5, 16 80000050: 37 05 00 80 lui a0, 524288 -80000054: 13 05 c5 6e addi a0, a0, 1772 -80000058: 6f 00 00 65 j 1616 +80000054: 13 05 45 6a addi a0, a0, 1700 +80000058: 6f 00 80 60 j 1544 8000005c: 67 80 00 00 ret 80000060 kernel_spawn_run_warp: @@ -46,116 +46,116 @@ Disassembly of section .text: 80000080: 23 28 61 01 sw s6, 16(sp) 80000084: 23 26 71 01 sw s7, 12(sp) 80000088: 23 24 81 01 sw s8, 8(sp) -8000008c: ef 00 40 5f jal 1524 -80000090: ef 00 80 5a jal 1448 -80000094: ef 00 c0 5e jal 1516 -80000098: 83 a5 c1 c2 lw a1, -980(gp) -8000009c: 83 a5 05 00 lw a1, 0(a1) -800000a0: 83 aa 05 00 lw s5, 0(a1) -800000a4: 03 ab 45 00 lw s6, 4(a1) +8000008c: ef 00 c0 5a jal 1452 +80000090: ef 00 00 56 jal 1376 +80000094: ef 00 c0 59 jal 1436 +80000098: 93 04 05 00 mv s1, a0 +8000009c: ef 00 40 57 jal 1396 +800000a0: 93 09 05 00 mv s3, a0 +800000a4: ef 00 c0 57 jal 1404 800000a8: 13 09 05 00 mv s2, a0 -800000ac: ef 00 c0 5a jal 1452 -800000b0: 93 09 05 00 mv s3, a0 -800000b4: ef 00 40 5b jal 1460 -800000b8: 03 a8 c1 c2 lw a6, -980(gp) -800000bc: 03 27 48 01 lw a4, 20(a6) -800000c0: 83 25 08 01 lw a1, 16(a6) -800000c4: 93 06 07 00 mv a3, a4 -800000c8: 63 44 37 01 blt a4, s3, 8 -800000cc: 93 86 09 00 mv a3, s3 -800000d0: 33 a7 e9 00 slt a4, s3, a4 -800000d4: 33 87 e5 00 add a4, a1, a4 -800000d8: 93 07 10 00 addi a5, zero, 1 -800000dc: 63 46 f7 08 blt a4, a5, 140 -800000e0: 33 0a 5b 03 mul s4, s6, s5 -800000e4: 83 27 c8 00 lw a5, 12(a6) -800000e8: 13 0c f7 ff addi s8, a4, -1 -800000ec: b3 85 b9 02 mul a1, s3, a1 -800000f0: b3 85 b6 00 add a1, a3, a1 -800000f4: b3 05 b9 02 mul a1, s2, a1 -800000f8: b3 85 b7 00 add a1, a5, a1 -800000fc: 33 05 e5 02 mul a0, a0, a4 -80000100: b3 84 a5 00 add s1, a1, a0 -80000104: 33 09 60 41 neg s2, s6 -80000108: 33 0b 40 41 neg s6, s4 -8000010c: 33 c7 44 03 div a4, s1, s4 -80000110: 83 25 08 00 lw a1, 0(a6) -80000114: 33 05 eb 02 mul a0, s6, a4 -80000118: 33 85 a4 00 add a0, s1, a0 -8000011c: b3 46 55 03 div a3, a0, s5 -80000120: 03 a5 c5 00 lw a0, 12(a1) -80000124: b3 07 e9 02 mul a5, s2, a4 -80000128: b3 87 d7 40 sub a5, a5, a3 -8000012c: b3 87 fa 02 mul a5, s5, a5 -80000130: b3 08 f5 00 add a7, a0, a5 -80000134: 03 a4 05 01 lw s0, 16(a1) -80000138: 03 a6 45 01 lw a2, 20(a1) -8000013c: 83 27 48 00 lw a5, 4(a6) -80000140: 03 25 88 00 lw a0, 8(a6) -80000144: b3 06 d4 00 add a3, s0, a3 -80000148: 33 07 e6 00 add a4, a2, a4 -8000014c: 33 86 14 01 add a2, s1, a7 -80000150: e7 80 07 00 jalr a5 -80000154: 63 0a 0c 00 beqz s8, 20 -80000158: 03 a8 c1 c2 lw a6, -980(gp) -8000015c: 13 0c fc ff addi s8, s8, -1 -80000160: 93 84 14 00 addi s1, s1, 1 -80000164: 6f f0 9f fa j -88 -80000168: 13 b5 19 00 seqz a0, s3 -8000016c: 03 2c 81 00 lw s8, 8(sp) -80000170: 83 2b c1 00 lw s7, 12(sp) -80000174: 03 2b 01 01 lw s6, 16(sp) -80000178: 83 2a 41 01 lw s5, 20(sp) -8000017c: 03 2a 81 01 lw s4, 24(sp) -80000180: 83 29 c1 01 lw s3, 28(sp) -80000184: 03 29 01 02 lw s2, 32(sp) -80000188: 83 24 41 02 lw s1, 36(sp) -8000018c: 03 24 81 02 lw s0, 40(sp) -80000190: 83 20 c1 02 lw ra, 44(sp) -80000194: 13 01 01 03 addi sp, sp, 48 -80000198: 6f 00 00 4a j 1184 +800000ac: ef 00 c0 58 jal 1420 +800000b0: 93 85 c1 c2 addi a1, gp, -980 +800000b4: 13 96 24 00 slli a2, s1, 2 +800000b8: b3 05 b6 00 add a1, a2, a1 +800000bc: 03 ab 05 00 lw s6, 0(a1) +800000c0: 83 25 4b 01 lw a1, 20(s6) +800000c4: 03 26 0b 01 lw a2, 16(s6) +800000c8: 93 86 05 00 mv a3, a1 +800000cc: 63 c4 35 01 blt a1, s3, 8 +800000d0: 93 86 09 00 mv a3, s3 +800000d4: b3 a5 b9 00 slt a1, s3, a1 +800000d8: 33 07 b6 00 add a4, a2, a1 +800000dc: 93 05 10 00 addi a1, zero, 1 +800000e0: 63 4a b7 08 blt a4, a1, 148 +800000e4: 83 25 0b 00 lw a1, 0(s6) +800000e8: 83 aa 05 00 lw s5, 0(a1) +800000ec: 83 a7 45 00 lw a5, 4(a1) +800000f0: 83 24 cb 00 lw s1, 12(s6) +800000f4: 33 8a 57 03 mul s4, a5, s5 +800000f8: 13 0c f7 ff addi s8, a4, -1 +800000fc: 33 86 c9 02 mul a2, s3, a2 +80000100: 33 86 c6 00 add a2, a3, a2 +80000104: 33 05 c5 02 mul a0, a0, a2 +80000108: 33 85 a4 00 add a0, s1, a0 +8000010c: 33 06 e9 02 mul a2, s2, a4 +80000110: b3 04 c5 00 add s1, a0, a2 +80000114: 33 09 f0 40 neg s2, a5 +80000118: b3 0b 40 41 neg s7, s4 +8000011c: 33 c6 44 03 div a2, s1, s4 +80000120: 33 85 cb 02 mul a0, s7, a2 +80000124: 33 85 a4 00 add a0, s1, a0 +80000128: b3 46 55 03 div a3, a0, s5 +8000012c: 03 a5 c5 00 lw a0, 12(a1) +80000130: 33 07 c9 02 mul a4, s2, a2 +80000134: 33 07 d7 40 sub a4, a4, a3 +80000138: 33 87 ea 02 mul a4, s5, a4 +8000013c: 33 08 e5 00 add a6, a0, a4 +80000140: 03 a7 05 01 lw a4, 16(a1) +80000144: 03 a4 45 01 lw s0, 20(a1) +80000148: 83 27 4b 00 lw a5, 4(s6) +8000014c: 03 25 8b 00 lw a0, 8(s6) +80000150: b3 06 d7 00 add a3, a4, a3 +80000154: 33 07 c4 00 add a4, s0, a2 +80000158: 33 86 04 01 add a2, s1, a6 +8000015c: e7 80 07 00 jalr a5 +80000160: 63 0a 0c 00 beqz s8, 20 +80000164: 83 25 0b 00 lw a1, 0(s6) +80000168: 13 0c fc ff addi s8, s8, -1 +8000016c: 93 84 14 00 addi s1, s1, 1 +80000170: 6f f0 df fa j -84 +80000174: 13 b5 19 00 seqz a0, s3 +80000178: 03 2c 81 00 lw s8, 8(sp) +8000017c: 83 2b c1 00 lw s7, 12(sp) +80000180: 03 2b 01 01 lw s6, 16(sp) +80000184: 83 2a 41 01 lw s5, 20(sp) +80000188: 03 2a 81 01 lw s4, 24(sp) +8000018c: 83 29 c1 01 lw s3, 28(sp) +80000190: 03 29 01 02 lw s2, 32(sp) +80000194: 83 24 41 02 lw s1, 36(sp) +80000198: 03 24 81 02 lw s0, 40(sp) +8000019c: 83 20 c1 02 lw ra, 44(sp) +800001a0: 13 01 01 03 addi sp, sp, 48 +800001a4: 6f 00 c0 44 j 1100 -8000019c kernel_spawn_run_threads: -8000019c: 13 01 01 fe addi sp, sp, -32 -800001a0: 23 2e 11 00 sw ra, 28(sp) -800001a4: 23 2c 81 00 sw s0, 24(sp) -800001a8: 23 2a 91 00 sw s1, 20(sp) -800001ac: 23 28 21 01 sw s2, 16(sp) -800001b0: 23 26 31 01 sw s3, 12(sp) -800001b4: 93 89 05 00 mv s3, a1 -800001b8: ef 00 00 48 jal 1152 -800001bc: 03 a5 c1 c2 lw a0, -980(gp) -800001c0: 03 25 05 00 lw a0, 0(a0) -800001c4: 83 24 05 00 lw s1, 0(a0) -800001c8: 03 25 45 00 lw a0, 4(a0) -800001cc: 33 04 95 02 mul s0, a0, s1 -800001d0: ef 00 00 4a jal 1184 -800001d4: 33 05 35 01 add a0, a0, s3 -800001d8: 03 a6 c1 c2 lw a2, -980(gp) -800001dc: 33 47 85 02 div a4, a0, s0 -800001e0: b3 05 87 02 mul a1, a4, s0 -800001e4: 33 05 b5 40 sub a0, a0, a1 -800001e8: 83 25 06 00 lw a1, 0(a2) -800001ec: b3 46 95 02 div a3, a0, s1 -800001f0: b3 87 96 02 mul a5, a3, s1 -800001f4: 33 08 f5 40 sub a6, a0, a5 -800001f8: 83 a4 c5 00 lw s1, 12(a1) -800001fc: 03 a4 05 01 lw s0, 16(a1) -80000200: 83 a7 45 01 lw a5, 20(a1) -80000204: 83 28 46 00 lw a7, 4(a2) -80000208: 03 25 86 00 lw a0, 8(a2) -8000020c: 33 06 98 00 add a2, a6, s1 -80000210: b3 06 d4 00 add a3, s0, a3 -80000214: 33 87 e7 00 add a4, a5, a4 -80000218: e7 80 08 00 jalr a7 -8000021c: 13 05 10 00 addi a0, zero, 1 -80000220: 83 29 c1 00 lw s3, 12(sp) -80000224: 03 29 01 01 lw s2, 16(sp) -80000228: 83 24 41 01 lw s1, 20(sp) -8000022c: 03 24 81 01 lw s0, 24(sp) -80000230: 83 20 c1 01 lw ra, 28(sp) -80000234: 13 01 01 02 addi sp, sp, 32 -80000238: 6f 00 00 40 j 1024 +800001a8 kernel_spawn_run_threads: +800001a8: 13 01 01 ff addi sp, sp, -16 +800001ac: 23 26 11 00 sw ra, 12(sp) +800001b0: 23 24 81 00 sw s0, 8(sp) +800001b4: ef 00 c0 43 jal 1084 +800001b8: ef 00 80 47 jal 1144 +800001bc: 13 04 05 00 mv s0, a0 +800001c0: ef 00 80 46 jal 1128 +800001c4: 93 85 c1 c2 addi a1, gp, -980 +800001c8: 13 16 24 00 slli a2, s0, 2 +800001cc: b3 05 b6 00 add a1, a2, a1 +800001d0: 03 a6 05 00 lw a2, 0(a1) +800001d4: 83 25 06 00 lw a1, 0(a2) +800001d8: 83 26 c6 00 lw a3, 12(a2) +800001dc: 03 a7 05 00 lw a4, 0(a1) +800001e0: 83 a7 45 00 lw a5, 4(a1) +800001e4: 33 85 a6 00 add a0, a3, a0 +800001e8: b3 86 e7 02 mul a3, a5, a4 +800001ec: b3 47 d5 02 div a5, a0, a3 +800001f0: b3 86 d7 02 mul a3, a5, a3 +800001f4: 03 a4 c5 00 lw s0, 12(a1) +800001f8: 33 05 d5 40 sub a0, a0, a3 +800001fc: b3 46 e5 02 div a3, a0, a4 +80000200: 33 88 e6 02 mul a6, a3, a4 +80000204: b3 08 a4 00 add a7, s0, a0 +80000208: 03 a7 05 01 lw a4, 16(a1) +8000020c: 03 a4 45 01 lw s0, 20(a1) +80000210: 83 22 46 00 lw t0, 4(a2) +80000214: 03 25 86 00 lw a0, 8(a2) +80000218: 33 86 08 41 sub a2, a7, a6 +8000021c: b3 06 d7 00 add a3, a4, a3 +80000220: 33 07 f4 00 add a4, s0, a5 +80000224: e7 80 02 00 jalr t0 +80000228: 13 05 10 00 addi a0, zero, 1 +8000022c: 03 24 81 00 lw s0, 8(sp) +80000230: 83 20 c1 00 lw ra, 12(sp) +80000234: 13 01 01 01 addi sp, sp, 16 +80000238: 6f 00 80 3b j 952 8000023c kernel_spawn: 8000023c: 13 01 01 fc addi sp, sp, -64 @@ -167,623 +167,605 @@ Disassembly of section .text: 80000254: 23 24 41 03 sw s4, 40(sp) 80000258: 23 22 51 03 sw s5, 36(sp) 8000025c: 23 20 61 03 sw s6, 32(sp) -80000260: 13 0b 05 00 mv s6, a0 -80000264: 03 25 05 00 lw a0, 0(a0) -80000268: 83 26 4b 00 lw a3, 4(s6) -8000026c: 03 27 8b 00 lw a4, 8(s6) -80000270: 13 09 06 00 mv s2, a2 -80000274: 93 89 05 00 mv s3, a1 -80000278: 33 85 a6 02 mul a0, a3, a0 -8000027c: b3 04 e5 02 mul s1, a0, a4 -80000280: ef 00 00 41 jal 1040 -80000284: 13 04 05 00 mv s0, a0 -80000288: ef 00 00 40 jal 1024 +80000260: 23 2e 71 01 sw s7, 28(sp) +80000264: 23 2c 81 01 sw s8, 24(sp) +80000268: 93 04 05 00 mv s1, a0 +8000026c: 83 2b 05 00 lw s7, 0(a0) +80000270: 03 24 45 00 lw s0, 4(a0) +80000274: 03 2c 85 00 lw s8, 8(a0) +80000278: 13 09 06 00 mv s2, a2 +8000027c: 93 89 05 00 mv s3, a1 +80000280: ef 00 80 3c jal 968 +80000284: 13 0b 05 00 mv s6, a0 +80000288: ef 00 80 3b jal 952 8000028c: 13 0a 05 00 mv s4, a0 -80000290: ef 00 00 3f jal 1008 +80000290: ef 00 80 3a jal 936 80000294: 93 0a 05 00 mv s5, a0 -80000298: ef 00 00 3e jal 992 -8000029c: b3 85 4a 03 mul a1, s5, s4 -800002a0: 13 06 10 00 addi a2, zero, 1 -800002a4: 63 c8 95 00 blt a1, s1, 16 -800002a8: 63 5a 86 00 bge a2, s0, 20 -800002ac: 63 4c c5 00 blt a0, a2, 24 -800002b0: 6f 00 00 13 j 304 -800002b4: 33 c6 b4 02 div a2, s1, a1 -800002b8: e3 4a 86 fe blt a2, s0, -12 -800002bc: 13 06 04 00 mv a2, s0 -800002c0: 63 50 c5 12 bge a0, a2, 288 -800002c4: 93 06 f4 ff addi a3, s0, -1 -800002c8: b3 c5 c4 02 div a1, s1, a2 -800002cc: 63 0e d5 00 beq a0, a3, 28 -800002d0: 13 06 00 00 mv a2, zero -800002d4: b3 06 b6 00 add a3, a2, a1 -800002d8: 33 c6 56 03 div a2, a3, s5 -800002dc: 13 07 00 00 mv a4, zero -800002e0: 63 50 46 03 bge a2, s4, 32 -800002e4: 6f 00 00 02 j 32 -800002e8: 33 86 c5 02 mul a2, a1, a2 -800002ec: 33 86 c4 40 sub a2, s1, a2 -800002f0: b3 06 b6 00 add a3, a2, a1 -800002f4: 33 c6 56 03 div a2, a3, s5 -800002f8: 13 07 00 00 mv a4, zero -800002fc: 63 44 46 01 blt a2, s4, 8 -80000300: 33 47 46 03 div a4, a2, s4 -80000304: 93 07 00 00 mv a5, zero -80000308: b3 0a 56 03 mul s5, a2, s5 -8000030c: 93 04 10 00 addi s1, zero, 1 -80000310: 63 08 07 00 beqz a4, 16 -80000314: b3 07 47 03 mul a5, a4, s4 -80000318: b3 07 f6 40 sub a5, a2, a5 -8000031c: 93 04 07 00 mv s1, a4 -80000320: 33 84 56 41 sub s0, a3, s5 -80000324: 23 24 61 01 sw s6, 8(sp) -80000328: 23 26 31 01 sw s3, 12(sp) -8000032c: 23 28 21 01 sw s2, 16(sp) -80000330: 33 85 a5 02 mul a0, a1, a0 -80000334: 23 2a a1 00 sw a0, 20(sp) -80000338: 23 2c 91 00 sw s1, 24(sp) -8000033c: 23 2e f1 00 sw a5, 28(sp) -80000340: 93 05 81 00 addi a1, sp, 8 -80000344: 93 06 20 00 addi a3, zero, 2 -80000348: 23 a6 b1 c2 sw a1, -980(gp) -8000034c: 63 40 d6 02 blt a2, a3, 32 -80000350: 63 44 46 01 blt a2, s4, 8 -80000354: 13 06 0a 00 mv a2, s4 -80000358: 37 05 00 80 lui a0, 524288 -8000035c: 93 05 05 06 addi a1, a0, 96 -80000360: 13 05 06 00 mv a0, a2 -80000364: ef 00 c0 2c jal 716 -80000368: ef f0 9f cf jal -776 -8000036c: 63 0a 04 06 beqz s0, 116 -80000370: 13 05 04 00 mv a0, s0 -80000374: ef 00 40 2c jal 708 -80000378: 03 a5 c1 c2 lw a0, -980(gp) -8000037c: 03 25 05 00 lw a0, 0(a0) -80000380: 83 24 05 00 lw s1, 0(a0) -80000384: 03 25 45 00 lw a0, 4(a0) -80000388: 33 04 95 02 mul s0, a0, s1 -8000038c: ef 00 40 2e jal 740 -80000390: 33 05 55 01 add a0, a0, s5 -80000394: 03 a6 c1 c2 lw a2, -980(gp) -80000398: 33 47 85 02 div a4, a0, s0 -8000039c: b3 05 87 02 mul a1, a4, s0 -800003a0: 33 05 b5 40 sub a0, a0, a1 -800003a4: 83 25 06 00 lw a1, 0(a2) -800003a8: b3 46 95 02 div a3, a0, s1 -800003ac: b3 87 96 02 mul a5, a3, s1 -800003b0: 33 08 f5 40 sub a6, a0, a5 -800003b4: 83 a4 c5 00 lw s1, 12(a1) -800003b8: 03 a4 05 01 lw s0, 16(a1) -800003bc: 83 a7 45 01 lw a5, 20(a1) -800003c0: 83 28 46 00 lw a7, 4(a2) -800003c4: 03 25 86 00 lw a0, 8(a2) -800003c8: 33 06 98 00 add a2, a6, s1 -800003cc: b3 06 d4 00 add a3, s0, a3 -800003d0: 33 87 e7 00 add a4, a5, a4 -800003d4: e7 80 08 00 jalr a7 -800003d8: 13 05 10 00 addi a0, zero, 1 -800003dc: ef 00 c0 25 jal 604 -800003e0: 03 2b 01 02 lw s6, 32(sp) -800003e4: 83 2a 41 02 lw s5, 36(sp) -800003e8: 03 2a 81 02 lw s4, 40(sp) -800003ec: 83 29 c1 02 lw s3, 44(sp) -800003f0: 03 29 01 03 lw s2, 48(sp) -800003f4: 83 24 41 03 lw s1, 52(sp) -800003f8: 03 24 81 03 lw s0, 56(sp) -800003fc: 83 20 c1 03 lw ra, 60(sp) -80000400: 13 01 01 04 addi sp, sp, 64 -80000404: 67 80 00 00 ret +80000298: ef 00 80 39 jal 920 +8000029c: 93 05 70 00 addi a1, zero, 7 +800002a0: 63 c8 a5 0e blt a1, a0, 240 +800002a4: b3 05 74 03 mul a1, s0, s7 +800002a8: 33 86 85 03 mul a2, a1, s8 +800002ac: b3 85 4a 03 mul a1, s5, s4 +800002b0: 93 06 10 00 addi a3, zero, 1 +800002b4: 63 c8 c5 00 blt a1, a2, 16 +800002b8: 63 da 66 01 bge a3, s6, 20 +800002bc: 63 4c d5 00 blt a0, a3, 24 +800002c0: 6f 00 00 0d j 208 +800002c4: b3 46 b6 02 div a3, a2, a1 +800002c8: e3 ca 66 ff blt a3, s6, -12 +800002cc: 93 06 0b 00 mv a3, s6 +800002d0: 63 50 d5 0c bge a0, a3, 192 +800002d4: 13 07 fb ff addi a4, s6, -1 +800002d8: b3 45 d6 02 div a1, a2, a3 +800002dc: 63 0e e5 00 beq a0, a4, 28 +800002e0: 13 06 00 00 mv a2, zero +800002e4: 33 0b b6 00 add s6, a2, a1 +800002e8: 33 46 5b 03 div a2, s6, s5 +800002ec: 93 06 00 00 mv a3, zero +800002f0: 63 50 46 03 bge a2, s4, 32 +800002f4: 6f 00 00 02 j 32 +800002f8: b3 86 d5 02 mul a3, a1, a3 +800002fc: 33 06 d6 40 sub a2, a2, a3 +80000300: 33 0b b6 00 add s6, a2, a1 +80000304: 33 46 5b 03 div a2, s6, s5 +80000308: 93 06 00 00 mv a3, zero +8000030c: 63 44 46 01 blt a2, s4, 8 +80000310: b3 46 46 03 div a3, a2, s4 +80000314: 13 07 00 00 mv a4, zero +80000318: 93 07 10 00 addi a5, zero, 1 +8000031c: 63 88 06 00 beqz a3, 16 +80000320: 33 87 46 03 mul a4, a3, s4 +80000324: 33 07 e6 40 sub a4, a2, a4 +80000328: 93 87 06 00 mv a5, a3 +8000032c: 33 04 56 03 mul s0, a2, s5 +80000330: 23 20 91 00 sw s1, 0(sp) +80000334: 23 22 31 01 sw s3, 4(sp) +80000338: 23 24 21 01 sw s2, 8(sp) +8000033c: b3 85 a5 02 mul a1, a1, a0 +80000340: 23 26 b1 00 sw a1, 12(sp) +80000344: 23 28 f1 00 sw a5, 16(sp) +80000348: 23 2a e1 00 sw a4, 20(sp) +8000034c: 93 85 c1 c2 addi a1, gp, -980 +80000350: 13 15 25 00 slli a0, a0, 2 +80000354: 33 05 b5 00 add a0, a0, a1 +80000358: 93 05 01 00 mv a1, sp +8000035c: 93 06 20 00 addi a3, zero, 2 +80000360: 23 20 b5 00 sw a1, 0(a0) +80000364: 63 40 d6 02 blt a2, a3, 32 +80000368: 63 44 46 01 blt a2, s4, 8 +8000036c: 13 06 0a 00 mv a2, s4 +80000370: 37 05 00 80 lui a0, 524288 +80000374: 93 05 05 06 addi a1, a0, 96 +80000378: 13 05 06 00 mv a0, a2 +8000037c: ef 00 c0 26 jal 620 +80000380: ef f0 1f ce jal -800 +80000384: 63 06 8b 00 beq s6, s0, 12 +80000388: 23 26 81 00 sw s0, 12(sp) +8000038c: ef f0 5f cd jal -812 +80000390: 03 2c 81 01 lw s8, 24(sp) +80000394: 83 2b c1 01 lw s7, 28(sp) +80000398: 03 2b 01 02 lw s6, 32(sp) +8000039c: 83 2a 41 02 lw s5, 36(sp) +800003a0: 03 2a 81 02 lw s4, 40(sp) +800003a4: 83 29 c1 02 lw s3, 44(sp) +800003a8: 03 29 01 03 lw s2, 48(sp) +800003ac: 83 24 41 03 lw s1, 52(sp) +800003b0: 03 24 81 03 lw s0, 56(sp) +800003b4: 83 20 c1 03 lw ra, 60(sp) +800003b8: 13 01 01 04 addi sp, sp, 64 +800003bc: 67 80 00 00 ret -80000408 main: -80000408: 13 01 01 ff addi sp, sp, -16 -8000040c: 23 26 11 00 sw ra, 12(sp) -80000410: 37 05 00 80 lui a0, 524288 -80000414: 93 05 45 4d addi a1, a0, 1236 -80000418: 37 05 ff 7f lui a0, 524272 -8000041c: 13 06 45 03 addi a2, a0, 52 -80000420: 37 05 ff 7f lui a0, 524272 -80000424: ef f0 9f e1 jal -488 -80000428: 13 05 00 00 mv a0, zero -8000042c: 83 20 c1 00 lw ra, 12(sp) -80000430: 13 01 01 01 addi sp, sp, 16 -80000434: 67 80 00 00 ret +800003c0 main: +800003c0: 13 01 01 ff addi sp, sp, -16 +800003c4: 23 26 11 00 sw ra, 12(sp) +800003c8: 37 05 00 80 lui a0, 524288 +800003cc: 93 05 c5 48 addi a1, a0, 1164 +800003d0: 37 05 ff 7f lui a0, 524272 +800003d4: 13 06 45 03 addi a2, a0, 52 +800003d8: 37 05 ff 7f lui a0, 524272 +800003dc: ef f0 1f e6 jal -416 +800003e0: 13 05 00 00 mv a0, zero +800003e4: 83 20 c1 00 lw ra, 12(sp) +800003e8: 13 01 01 01 addi sp, sp, 16 +800003ec: 67 80 00 00 ret -80000438 _pocl_kernel_vecadd: -80000438: 13 01 01 ff addi sp, sp, -16 -8000043c: 23 26 11 00 sw ra, 12(sp) -80000440: 23 24 81 00 sw s0, 8(sp) -80000444: 13 04 01 01 addi s0, sp, 16 -80000448: 13 71 c1 ff andi sp, sp, -4 -8000044c: 83 a7 86 01 lw a5, 24(a3) -80000450: 83 a8 c6 00 lw a7, 12(a3) -80000454: 13 08 00 00 mv a6, zero -80000458: 33 87 e7 02 mul a4, a5, a4 -8000045c: 33 87 e8 00 add a4, a7, a4 -80000460: 83 a2 c6 01 lw t0, 28(a3) -80000464: 83 a8 06 02 lw a7, 32(a3) -80000468: 93 16 27 00 slli a3, a4, 2 -8000046c: 33 03 d6 00 add t1, a2, a3 -80000470: b3 83 d5 00 add t2, a1, a3 -80000474: 33 0e d5 00 add t3, a0, a3 -80000478: 93 06 00 00 mv a3, zero -8000047c: 13 07 00 00 mv a4, zero -80000480: 13 05 0e 00 mv a0, t3 -80000484: 93 85 03 00 mv a1, t2 -80000488: 13 06 03 00 mv a2, t1 -8000048c: 07 20 05 00 flw ft0, 0(a0) -80000490: 87 a0 05 00 flw ft1, 0(a1) -80000494: 53 70 10 00 fadd.s ft0, ft0, ft1 -80000498: 27 20 06 00 fsw ft0, 0(a2) -8000049c: 13 07 17 00 addi a4, a4, 1 -800004a0: 13 06 46 00 addi a2, a2, 4 -800004a4: 93 85 45 00 addi a1, a1, 4 -800004a8: 13 05 45 00 addi a0, a0, 4 -800004ac: e3 60 f7 fe bltu a4, a5, -32 -800004b0: 93 86 16 00 addi a3, a3, 1 -800004b4: e3 e4 56 fc bltu a3, t0, -56 -800004b8: 13 08 18 00 addi a6, a6, 1 -800004bc: e3 6e 18 fb bltu a6, a7, -68 -800004c0: 13 01 04 ff addi sp, s0, -16 -800004c4: 03 24 81 00 lw s0, 8(sp) -800004c8: 83 20 c1 00 lw ra, 12(sp) -800004cc: 13 01 01 01 addi sp, sp, 16 -800004d0: 67 80 00 00 ret +800003f0 _pocl_kernel_vecadd: +800003f0: 13 01 01 ff addi sp, sp, -16 +800003f4: 23 26 11 00 sw ra, 12(sp) +800003f8: 23 24 81 00 sw s0, 8(sp) +800003fc: 13 04 01 01 addi s0, sp, 16 +80000400: 13 71 c1 ff andi sp, sp, -4 +80000404: 83 a7 86 01 lw a5, 24(a3) +80000408: 83 a8 c6 00 lw a7, 12(a3) +8000040c: 13 08 00 00 mv a6, zero +80000410: 33 87 e7 02 mul a4, a5, a4 +80000414: 33 87 e8 00 add a4, a7, a4 +80000418: 83 a2 c6 01 lw t0, 28(a3) +8000041c: 83 a8 06 02 lw a7, 32(a3) +80000420: 93 16 27 00 slli a3, a4, 2 +80000424: 33 03 d6 00 add t1, a2, a3 +80000428: b3 83 d5 00 add t2, a1, a3 +8000042c: 33 0e d5 00 add t3, a0, a3 +80000430: 93 06 00 00 mv a3, zero +80000434: 13 07 00 00 mv a4, zero +80000438: 13 05 0e 00 mv a0, t3 +8000043c: 93 85 03 00 mv a1, t2 +80000440: 13 06 03 00 mv a2, t1 +80000444: 07 20 05 00 flw ft0, 0(a0) +80000448: 87 a0 05 00 flw ft1, 0(a1) +8000044c: 53 70 10 00 fadd.s ft0, ft0, ft1 +80000450: 27 20 06 00 fsw ft0, 0(a2) +80000454: 13 07 17 00 addi a4, a4, 1 +80000458: 13 06 46 00 addi a2, a2, 4 +8000045c: 93 85 45 00 addi a1, a1, 4 +80000460: 13 05 45 00 addi a0, a0, 4 +80000464: e3 60 f7 fe bltu a4, a5, -32 +80000468: 93 86 16 00 addi a3, a3, 1 +8000046c: e3 e4 56 fc bltu a3, t0, -56 +80000470: 13 08 18 00 addi a6, a6, 1 +80000474: e3 6e 18 fb bltu a6, a7, -68 +80000478: 13 01 04 ff addi sp, s0, -16 +8000047c: 03 24 81 00 lw s0, 8(sp) +80000480: 83 20 c1 00 lw ra, 12(sp) +80000484: 13 01 01 01 addi sp, sp, 16 +80000488: 67 80 00 00 ret -800004d4 _pocl_kernel_vecadd_workgroup: -800004d4: 83 26 05 00 lw a3, 0(a0) -800004d8: 93 08 00 00 mv a7, zero -800004dc: 03 ae 06 00 lw t3, 0(a3) -800004e0: 03 27 45 00 lw a4, 4(a0) -800004e4: 83 27 85 00 lw a5, 8(a0) -800004e8: 03 a5 85 01 lw a0, 24(a1) -800004ec: 83 a6 c5 00 lw a3, 12(a1) -800004f0: 03 27 07 00 lw a4, 0(a4) -800004f4: 83 a7 07 00 lw a5, 0(a5) -800004f8: 33 06 c5 02 mul a2, a0, a2 -800004fc: 33 86 c6 00 add a2, a3, a2 -80000500: 83 a2 c5 01 lw t0, 28(a1) -80000504: 03 a8 05 02 lw a6, 32(a1) -80000508: 93 15 26 00 slli a1, a2, 2 -8000050c: 33 83 b7 00 add t1, a5, a1 -80000510: b3 03 b7 00 add t2, a4, a1 -80000514: 33 0e be 00 add t3, t3, a1 -80000518: 93 06 00 00 mv a3, zero -8000051c: 13 06 00 00 mv a2, zero -80000520: 93 05 0e 00 mv a1, t3 -80000524: 93 87 03 00 mv a5, t2 -80000528: 13 07 03 00 mv a4, t1 -8000052c: 07 a0 05 00 flw ft0, 0(a1) -80000530: 87 a0 07 00 flw ft1, 0(a5) -80000534: 53 70 10 00 fadd.s ft0, ft0, ft1 -80000538: 27 20 07 00 fsw ft0, 0(a4) -8000053c: 13 06 16 00 addi a2, a2, 1 -80000540: 13 07 47 00 addi a4, a4, 4 -80000544: 93 87 47 00 addi a5, a5, 4 -80000548: 93 85 45 00 addi a1, a1, 4 -8000054c: e3 60 a6 fe bltu a2, a0, -32 -80000550: 93 86 16 00 addi a3, a3, 1 -80000554: e3 e4 56 fc bltu a3, t0, -56 -80000558: 93 88 18 00 addi a7, a7, 1 -8000055c: e3 ee 08 fb bltu a7, a6, -68 -80000560: 67 80 00 00 ret +8000048c _pocl_kernel_vecadd_workgroup: +8000048c: 83 26 05 00 lw a3, 0(a0) +80000490: 93 08 00 00 mv a7, zero +80000494: 03 ae 06 00 lw t3, 0(a3) +80000498: 03 27 45 00 lw a4, 4(a0) +8000049c: 83 27 85 00 lw a5, 8(a0) +800004a0: 03 a5 85 01 lw a0, 24(a1) +800004a4: 83 a6 c5 00 lw a3, 12(a1) +800004a8: 03 27 07 00 lw a4, 0(a4) +800004ac: 83 a7 07 00 lw a5, 0(a5) +800004b0: 33 06 c5 02 mul a2, a0, a2 +800004b4: 33 86 c6 00 add a2, a3, a2 +800004b8: 83 a2 c5 01 lw t0, 28(a1) +800004bc: 03 a8 05 02 lw a6, 32(a1) +800004c0: 93 15 26 00 slli a1, a2, 2 +800004c4: 33 83 b7 00 add t1, a5, a1 +800004c8: b3 03 b7 00 add t2, a4, a1 +800004cc: 33 0e be 00 add t3, t3, a1 +800004d0: 93 06 00 00 mv a3, zero +800004d4: 13 06 00 00 mv a2, zero +800004d8: 93 05 0e 00 mv a1, t3 +800004dc: 93 87 03 00 mv a5, t2 +800004e0: 13 07 03 00 mv a4, t1 +800004e4: 07 a0 05 00 flw ft0, 0(a1) +800004e8: 87 a0 07 00 flw ft1, 0(a5) +800004ec: 53 70 10 00 fadd.s ft0, ft0, ft1 +800004f0: 27 20 07 00 fsw ft0, 0(a4) +800004f4: 13 06 16 00 addi a2, a2, 1 +800004f8: 13 07 47 00 addi a4, a4, 4 +800004fc: 93 87 47 00 addi a5, a5, 4 +80000500: 93 85 45 00 addi a1, a1, 4 +80000504: e3 60 a6 fe bltu a2, a0, -32 +80000508: 93 86 16 00 addi a3, a3, 1 +8000050c: e3 e4 56 fc bltu a3, t0, -56 +80000510: 93 88 18 00 addi a7, a7, 1 +80000514: e3 ee 08 fb bltu a7, a6, -68 +80000518: 67 80 00 00 ret -80000564 _pocl_kernel_vecadd_workgroup_fast: -80000564: 93 08 00 00 mv a7, zero -80000568: 03 2e 05 00 lw t3, 0(a0) -8000056c: 03 a7 85 01 lw a4, 24(a1) -80000570: 83 a7 c5 00 lw a5, 12(a1) -80000574: 83 26 45 00 lw a3, 4(a0) -80000578: 03 25 85 00 lw a0, 8(a0) -8000057c: 33 06 c7 02 mul a2, a4, a2 -80000580: 33 86 c7 00 add a2, a5, a2 -80000584: 83 a2 c5 01 lw t0, 28(a1) -80000588: 03 a8 05 02 lw a6, 32(a1) -8000058c: 93 15 26 00 slli a1, a2, 2 -80000590: 33 03 b5 00 add t1, a0, a1 -80000594: b3 83 b6 00 add t2, a3, a1 -80000598: 33 0e be 00 add t3, t3, a1 -8000059c: 93 06 00 00 mv a3, zero +8000051c _pocl_kernel_vecadd_workgroup_fast: +8000051c: 93 08 00 00 mv a7, zero +80000520: 03 2e 05 00 lw t3, 0(a0) +80000524: 03 a7 85 01 lw a4, 24(a1) +80000528: 83 a7 c5 00 lw a5, 12(a1) +8000052c: 83 26 45 00 lw a3, 4(a0) +80000530: 03 25 85 00 lw a0, 8(a0) +80000534: 33 06 c7 02 mul a2, a4, a2 +80000538: 33 86 c7 00 add a2, a5, a2 +8000053c: 83 a2 c5 01 lw t0, 28(a1) +80000540: 03 a8 05 02 lw a6, 32(a1) +80000544: 93 15 26 00 slli a1, a2, 2 +80000548: 33 03 b5 00 add t1, a0, a1 +8000054c: b3 83 b6 00 add t2, a3, a1 +80000550: 33 0e be 00 add t3, t3, a1 +80000554: 93 06 00 00 mv a3, zero +80000558: 13 05 00 00 mv a0, zero +8000055c: 93 05 0e 00 mv a1, t3 +80000560: 93 87 03 00 mv a5, t2 +80000564: 13 06 03 00 mv a2, t1 +80000568: 07 a0 05 00 flw ft0, 0(a1) +8000056c: 87 a0 07 00 flw ft1, 0(a5) +80000570: 53 70 10 00 fadd.s ft0, ft0, ft1 +80000574: 27 20 06 00 fsw ft0, 0(a2) +80000578: 13 05 15 00 addi a0, a0, 1 +8000057c: 13 06 46 00 addi a2, a2, 4 +80000580: 93 87 47 00 addi a5, a5, 4 +80000584: 93 85 45 00 addi a1, a1, 4 +80000588: e3 60 e5 fe bltu a0, a4, -32 +8000058c: 93 86 16 00 addi a3, a3, 1 +80000590: e3 e4 56 fc bltu a3, t0, -56 +80000594: 93 88 18 00 addi a7, a7, 1 +80000598: e3 ee 08 fb bltu a7, a6, -68 +8000059c: 67 80 00 00 ret + +800005a0 _exit: 800005a0: 13 05 00 00 mv a0, zero -800005a4: 93 05 0e 00 mv a1, t3 -800005a8: 93 87 03 00 mv a5, t2 -800005ac: 13 06 03 00 mv a2, t1 -800005b0: 07 a0 05 00 flw ft0, 0(a1) -800005b4: 87 a0 07 00 flw ft1, 0(a5) -800005b8: 53 70 10 00 fadd.s ft0, ft0, ft1 -800005bc: 27 20 06 00 fsw ft0, 0(a2) -800005c0: 13 05 15 00 addi a0, a0, 1 -800005c4: 13 06 46 00 addi a2, a2, 4 -800005c8: 93 87 47 00 addi a5, a5, 4 -800005cc: 93 85 45 00 addi a1, a1, 4 -800005d0: e3 60 e5 fe bltu a0, a4, -32 -800005d4: 93 86 16 00 addi a3, a3, 1 -800005d8: e3 e4 56 fc bltu a3, t0, -56 -800005dc: 93 88 18 00 addi a7, a7, 1 -800005e0: e3 ee 08 fb bltu a7, a6, -68 +800005a4: 6b 00 05 00 + +800005a8 vx_set_sp: +800005a8: 73 25 50 02 csrr a0, 37 +800005ac: 6b 00 05 00 +800005b0: 97 21 00 00 auipc gp, 2 +800005b4: 93 81 81 c8 addi gp, gp, -888 +800005b8: f3 25 20 02 csrr a1, 34 +800005bc: 93 95 a5 00 slli a1, a1, 10 +800005c0: 73 26 00 02 csrr a2, 32 +800005c4: 13 16 26 00 slli a2, a2, 2 +800005c8: 37 f1 ff 6f lui sp, 458751 +800005cc: 33 01 b1 40 sub sp, sp, a1 +800005d0: 33 01 c1 00 add sp, sp, a2 +800005d4: f3 26 10 02 csrr a3, 33 +800005d8: 63 86 06 00 beqz a3, 12 +800005dc: 13 05 00 00 mv a0, zero +800005e0: 6b 00 05 00 + +800005e4 RETURN: 800005e4: 67 80 00 00 ret -800005e8 _exit: -800005e8: 13 05 00 00 mv a0, zero -800005ec: 6b 00 05 00 +800005e8 vx_wspawn: +800005e8: 6b 10 b5 00 +800005ec: 67 80 00 00 ret -800005f0 vx_set_sp: -800005f0: 73 25 50 02 csrr a0, 37 -800005f4: 6b 00 05 00 -800005f8: 97 21 00 00 auipc gp, 2 -800005fc: 93 81 81 c8 addi gp, gp, -888 -80000600: f3 25 20 02 csrr a1, 34 -80000604: 93 95 a5 00 slli a1, a1, 10 -80000608: 73 26 00 02 csrr a2, 32 -8000060c: 13 16 26 00 slli a2, a2, 2 -80000610: 37 f1 ff 6f lui sp, 458751 -80000614: 33 01 b1 40 sub sp, sp, a1 -80000618: 33 01 c1 00 add sp, sp, a2 -8000061c: f3 26 10 02 csrr a3, 33 -80000620: 63 86 06 00 beqz a3, 12 -80000624: 13 05 00 00 mv a0, zero -80000628: 6b 00 05 00 +800005f0 vx_tmc: +800005f0: 6b 00 05 00 +800005f4: 67 80 00 00 ret -8000062c RETURN: +800005f8 vx_barrier: +800005f8: 6b 40 b5 00 +800005fc: 67 80 00 00 ret + +80000600 vx_split: +80000600: 6b 20 05 00 +80000604: 67 80 00 00 ret + +80000608 vx_join: +80000608: 6b 30 00 00 +8000060c: 67 80 00 00 ret + +80000610 vx_warp_id: +80000610: 73 25 10 02 csrr a0, 33 +80000614: 67 80 00 00 ret + +80000618 vx_warp_gid: +80000618: 73 25 30 02 csrr a0, 35 +8000061c: 67 80 00 00 ret + +80000620 vx_thread_id: +80000620: 73 25 00 02 csrr a0, 32 +80000624: 67 80 00 00 ret + +80000628 vx_thread_gid: +80000628: 73 25 20 02 csrr a0, 34 8000062c: 67 80 00 00 ret -80000630 vx_wspawn: -80000630: 6b 10 b5 00 +80000630 vx_core_id: +80000630: 73 25 40 02 csrr a0, 36 80000634: 67 80 00 00 ret -80000638 vx_tmc: -80000638: 6b 00 05 00 +80000638 vx_num_threads: +80000638: 73 25 50 02 csrr a0, 37 8000063c: 67 80 00 00 ret -80000640 vx_barrier: -80000640: 6b 40 b5 00 +80000640 vx_num_warps: +80000640: 73 25 60 02 csrr a0, 38 80000644: 67 80 00 00 ret -80000648 vx_split: -80000648: 6b 20 05 00 +80000648 vx_num_cores: +80000648: 73 25 70 02 csrr a0, 39 8000064c: 67 80 00 00 ret -80000650 vx_join: -80000650: 6b 30 00 00 +80000650 vx_num_cycles: +80000650: 73 25 00 b0 csrr a0, mcycle 80000654: 67 80 00 00 ret -80000658 vx_warp_id: -80000658: 73 25 10 02 csrr a0, 33 +80000658 vx_num_instrs: +80000658: 73 25 20 b0 csrr a0, minstret 8000065c: 67 80 00 00 ret -80000660 vx_warp_gid: -80000660: 73 25 30 02 csrr a0, 35 -80000664: 67 80 00 00 ret +80000660 atexit: +80000660: 93 05 05 00 mv a1, a0 +80000664: 93 06 00 00 mv a3, zero +80000668: 13 06 00 00 mv a2, zero +8000066c: 13 05 00 00 mv a0, zero +80000670: 6f 00 80 20 j 520 -80000668 vx_thread_id: -80000668: 73 25 00 02 csrr a0, 32 -8000066c: 67 80 00 00 ret +80000674 exit: +80000674: 13 01 01 ff addi sp, sp, -16 +80000678: 93 05 00 00 mv a1, zero +8000067c: 23 24 81 00 sw s0, 8(sp) +80000680: 23 26 11 00 sw ra, 12(sp) +80000684: 13 04 05 00 mv s0, a0 +80000688: ef 00 80 28 jal 648 +8000068c: 03 a5 81 c2 lw a0, -984(gp) +80000690: 83 27 c5 03 lw a5, 60(a0) +80000694: 63 84 07 00 beqz a5, 8 +80000698: e7 80 07 00 jalr a5 +8000069c: 13 05 04 00 mv a0, s0 +800006a0: ef f0 1f f0 jal -256 -80000670 vx_thread_gid: -80000670: 73 25 20 02 csrr a0, 34 -80000674: 67 80 00 00 ret +800006a4 __libc_fini_array: +800006a4: 13 01 01 ff addi sp, sp, -16 +800006a8: 23 24 81 00 sw s0, 8(sp) +800006ac: b7 27 00 80 lui a5, 524290 +800006b0: 37 24 00 80 lui s0, 524290 +800006b4: 13 04 44 a3 addi s0, s0, -1484 +800006b8: 93 87 47 a3 addi a5, a5, -1484 +800006bc: b3 87 87 40 sub a5, a5, s0 +800006c0: 23 22 91 00 sw s1, 4(sp) +800006c4: 23 26 11 00 sw ra, 12(sp) +800006c8: 93 d4 27 40 srai s1, a5, 2 +800006cc: 63 80 04 02 beqz s1, 32 +800006d0: 93 87 c7 ff addi a5, a5, -4 +800006d4: 33 84 87 00 add s0, a5, s0 +800006d8: 83 27 04 00 lw a5, 0(s0) +800006dc: 93 84 f4 ff addi s1, s1, -1 +800006e0: 13 04 c4 ff addi s0, s0, -4 +800006e4: e7 80 07 00 jalr a5 +800006e8: e3 98 04 fe bnez s1, -16 +800006ec: 83 20 c1 00 lw ra, 12(sp) +800006f0: 03 24 81 00 lw s0, 8(sp) +800006f4: 83 24 41 00 lw s1, 4(sp) +800006f8: 13 01 01 01 addi sp, sp, 16 +800006fc: 67 80 00 00 ret -80000678 vx_core_id: -80000678: 73 25 40 02 csrr a0, 36 -8000067c: 67 80 00 00 ret +80000700 __libc_init_array: +80000700: 13 01 01 ff addi sp, sp, -16 +80000704: 23 24 81 00 sw s0, 8(sp) +80000708: 23 20 21 01 sw s2, 0(sp) +8000070c: 37 24 00 80 lui s0, 524290 +80000710: 37 29 00 80 lui s2, 524290 +80000714: 93 07 04 a3 addi a5, s0, -1488 +80000718: 13 09 09 a3 addi s2, s2, -1488 +8000071c: 33 09 f9 40 sub s2, s2, a5 +80000720: 23 26 11 00 sw ra, 12(sp) +80000724: 23 22 91 00 sw s1, 4(sp) +80000728: 13 59 29 40 srai s2, s2, 2 +8000072c: 63 00 09 02 beqz s2, 32 +80000730: 13 04 04 a3 addi s0, s0, -1488 +80000734: 93 04 00 00 mv s1, zero +80000738: 83 27 04 00 lw a5, 0(s0) +8000073c: 93 84 14 00 addi s1, s1, 1 +80000740: 13 04 44 00 addi s0, s0, 4 +80000744: e7 80 07 00 jalr a5 +80000748: e3 18 99 fe bne s2, s1, -16 +8000074c: 37 24 00 80 lui s0, 524290 +80000750: 37 29 00 80 lui s2, 524290 +80000754: 93 07 04 a3 addi a5, s0, -1488 +80000758: 13 09 49 a3 addi s2, s2, -1484 +8000075c: 33 09 f9 40 sub s2, s2, a5 +80000760: 13 59 29 40 srai s2, s2, 2 +80000764: 63 00 09 02 beqz s2, 32 +80000768: 13 04 04 a3 addi s0, s0, -1488 +8000076c: 93 04 00 00 mv s1, zero +80000770: 83 27 04 00 lw a5, 0(s0) +80000774: 93 84 14 00 addi s1, s1, 1 +80000778: 13 04 44 00 addi s0, s0, 4 +8000077c: e7 80 07 00 jalr a5 +80000780: e3 18 99 fe bne s2, s1, -16 +80000784: 83 20 c1 00 lw ra, 12(sp) +80000788: 03 24 81 00 lw s0, 8(sp) +8000078c: 83 24 41 00 lw s1, 4(sp) +80000790: 03 29 01 00 lw s2, 0(sp) +80000794: 13 01 01 01 addi sp, sp, 16 +80000798: 67 80 00 00 ret -80000680 vx_num_threads: -80000680: 73 25 50 02 csrr a0, 37 -80000684: 67 80 00 00 ret +8000079c memset: +8000079c: 13 03 f0 00 addi t1, zero, 15 +800007a0: 13 07 05 00 mv a4, a0 +800007a4: 63 7e c3 02 bgeu t1, a2, 60 +800007a8: 93 77 f7 00 andi a5, a4, 15 +800007ac: 63 90 07 0a bnez a5, 160 +800007b0: 63 92 05 08 bnez a1, 132 +800007b4: 93 76 06 ff andi a3, a2, -16 +800007b8: 13 76 f6 00 andi a2, a2, 15 +800007bc: b3 86 e6 00 add a3, a3, a4 +800007c0: 23 20 b7 00 sw a1, 0(a4) +800007c4: 23 22 b7 00 sw a1, 4(a4) +800007c8: 23 24 b7 00 sw a1, 8(a4) +800007cc: 23 26 b7 00 sw a1, 12(a4) +800007d0: 13 07 07 01 addi a4, a4, 16 +800007d4: e3 66 d7 fe bltu a4, a3, -20 +800007d8: 63 14 06 00 bnez a2, 8 +800007dc: 67 80 00 00 ret +800007e0: b3 06 c3 40 sub a3, t1, a2 +800007e4: 93 96 26 00 slli a3, a3, 2 +800007e8: 97 02 00 00 auipc t0, 0 +800007ec: b3 86 56 00 add a3, a3, t0 +800007f0: 67 80 c6 00 jr 12(a3) +800007f4: 23 07 b7 00 sb a1, 14(a4) +800007f8: a3 06 b7 00 sb a1, 13(a4) +800007fc: 23 06 b7 00 sb a1, 12(a4) +80000800: a3 05 b7 00 sb a1, 11(a4) +80000804: 23 05 b7 00 sb a1, 10(a4) +80000808: a3 04 b7 00 sb a1, 9(a4) +8000080c: 23 04 b7 00 sb a1, 8(a4) +80000810: a3 03 b7 00 sb a1, 7(a4) +80000814: 23 03 b7 00 sb a1, 6(a4) +80000818: a3 02 b7 00 sb a1, 5(a4) +8000081c: 23 02 b7 00 sb a1, 4(a4) +80000820: a3 01 b7 00 sb a1, 3(a4) +80000824: 23 01 b7 00 sb a1, 2(a4) +80000828: a3 00 b7 00 sb a1, 1(a4) +8000082c: 23 00 b7 00 sb a1, 0(a4) +80000830: 67 80 00 00 ret +80000834: 93 f5 f5 0f andi a1, a1, 255 +80000838: 93 96 85 00 slli a3, a1, 8 +8000083c: b3 e5 d5 00 or a1, a1, a3 +80000840: 93 96 05 01 slli a3, a1, 16 +80000844: b3 e5 d5 00 or a1, a1, a3 +80000848: 6f f0 df f6 j -148 +8000084c: 93 96 27 00 slli a3, a5, 2 +80000850: 97 02 00 00 auipc t0, 0 +80000854: b3 86 56 00 add a3, a3, t0 +80000858: 93 82 00 00 mv t0, ra +8000085c: e7 80 06 fa jalr -96(a3) +80000860: 93 80 02 00 mv ra, t0 +80000864: 93 87 07 ff addi a5, a5, -16 +80000868: 33 07 f7 40 sub a4, a4, a5 +8000086c: 33 06 f6 00 add a2, a2, a5 +80000870: e3 78 c3 f6 bgeu t1, a2, -144 +80000874: 6f f0 df f3 j -196 -80000688 vx_num_warps: -80000688: 73 25 60 02 csrr a0, 38 -8000068c: 67 80 00 00 ret +80000878 __register_exitproc: +80000878: 03 a7 81 c2 lw a4, -984(gp) +8000087c: 83 27 87 14 lw a5, 328(a4) +80000880: 63 8c 07 04 beqz a5, 88 +80000884: 03 a7 47 00 lw a4, 4(a5) +80000888: 13 08 f0 01 addi a6, zero, 31 +8000088c: 63 4e e8 06 blt a6, a4, 124 +80000890: 13 18 27 00 slli a6, a4, 2 +80000894: 63 06 05 02 beqz a0, 44 +80000898: 33 83 07 01 add t1, a5, a6 +8000089c: 23 24 c3 08 sw a2, 136(t1) +800008a0: 83 a8 87 18 lw a7, 392(a5) +800008a4: 13 06 10 00 addi a2, zero, 1 +800008a8: 33 16 e6 00 sll a2, a2, a4 +800008ac: b3 e8 c8 00 or a7, a7, a2 +800008b0: 23 a4 17 19 sw a7, 392(a5) +800008b4: 23 24 d3 10 sw a3, 264(t1) +800008b8: 93 06 20 00 addi a3, zero, 2 +800008bc: 63 04 d5 02 beq a0, a3, 40 +800008c0: 13 07 17 00 addi a4, a4, 1 +800008c4: 23 a2 e7 00 sw a4, 4(a5) +800008c8: b3 87 07 01 add a5, a5, a6 +800008cc: 23 a4 b7 00 sw a1, 8(a5) +800008d0: 13 05 00 00 mv a0, zero +800008d4: 67 80 00 00 ret +800008d8: 93 07 c7 14 addi a5, a4, 332 +800008dc: 23 24 f7 14 sw a5, 328(a4) +800008e0: 6f f0 5f fa j -92 +800008e4: 83 a6 c7 18 lw a3, 396(a5) +800008e8: 13 07 17 00 addi a4, a4, 1 +800008ec: 23 a2 e7 00 sw a4, 4(a5) +800008f0: 33 e6 c6 00 or a2, a3, a2 +800008f4: 23 a6 c7 18 sw a2, 396(a5) +800008f8: b3 87 07 01 add a5, a5, a6 +800008fc: 23 a4 b7 00 sw a1, 8(a5) +80000900: 13 05 00 00 mv a0, zero +80000904: 67 80 00 00 ret +80000908: 13 05 f0 ff addi a0, zero, -1 +8000090c: 67 80 00 00 ret -80000690 vx_num_cores: -80000690: 73 25 70 02 csrr a0, 39 -80000694: 67 80 00 00 ret - -80000698 vx_num_cycles: -80000698: 73 25 00 c0 rdcycle a0 -8000069c: 67 80 00 00 ret - -800006a0 vx_num_instrs: -800006a0: 73 25 20 c0 rdinstret a0 -800006a4: 67 80 00 00 ret - -800006a8 atexit: -800006a8: 93 05 05 00 mv a1, a0 -800006ac: 93 06 00 00 mv a3, zero -800006b0: 13 06 00 00 mv a2, zero -800006b4: 13 05 00 00 mv a0, zero -800006b8: 6f 00 80 20 j 520 - -800006bc exit: -800006bc: 13 01 01 ff addi sp, sp, -16 -800006c0: 93 05 00 00 mv a1, zero -800006c4: 23 24 81 00 sw s0, 8(sp) -800006c8: 23 26 11 00 sw ra, 12(sp) -800006cc: 13 04 05 00 mv s0, a0 -800006d0: ef 00 80 28 jal 648 -800006d4: 03 a5 81 c2 lw a0, -984(gp) -800006d8: 83 27 c5 03 lw a5, 60(a0) -800006dc: 63 84 07 00 beqz a5, 8 -800006e0: e7 80 07 00 jalr a5 -800006e4: 13 05 04 00 mv a0, s0 -800006e8: ef f0 1f f0 jal -256 - -800006ec __libc_fini_array: -800006ec: 13 01 01 ff addi sp, sp, -16 -800006f0: 23 24 81 00 sw s0, 8(sp) -800006f4: b7 27 00 80 lui a5, 524290 -800006f8: 37 24 00 80 lui s0, 524290 -800006fc: 13 04 c4 a7 addi s0, s0, -1412 -80000700: 93 87 c7 a7 addi a5, a5, -1412 -80000704: b3 87 87 40 sub a5, a5, s0 -80000708: 23 22 91 00 sw s1, 4(sp) -8000070c: 23 26 11 00 sw ra, 12(sp) -80000710: 93 d4 27 40 srai s1, a5, 2 -80000714: 63 80 04 02 beqz s1, 32 -80000718: 93 87 c7 ff addi a5, a5, -4 -8000071c: 33 84 87 00 add s0, a5, s0 -80000720: 83 27 04 00 lw a5, 0(s0) -80000724: 93 84 f4 ff addi s1, s1, -1 -80000728: 13 04 c4 ff addi s0, s0, -4 -8000072c: e7 80 07 00 jalr a5 -80000730: e3 98 04 fe bnez s1, -16 -80000734: 83 20 c1 00 lw ra, 12(sp) -80000738: 03 24 81 00 lw s0, 8(sp) -8000073c: 83 24 41 00 lw s1, 4(sp) -80000740: 13 01 01 01 addi sp, sp, 16 -80000744: 67 80 00 00 ret - -80000748 __libc_init_array: -80000748: 13 01 01 ff addi sp, sp, -16 -8000074c: 23 24 81 00 sw s0, 8(sp) -80000750: 23 20 21 01 sw s2, 0(sp) -80000754: 37 24 00 80 lui s0, 524290 -80000758: 37 29 00 80 lui s2, 524290 -8000075c: 93 07 84 a7 addi a5, s0, -1416 -80000760: 13 09 89 a7 addi s2, s2, -1416 -80000764: 33 09 f9 40 sub s2, s2, a5 -80000768: 23 26 11 00 sw ra, 12(sp) -8000076c: 23 22 91 00 sw s1, 4(sp) -80000770: 13 59 29 40 srai s2, s2, 2 -80000774: 63 00 09 02 beqz s2, 32 -80000778: 13 04 84 a7 addi s0, s0, -1416 -8000077c: 93 04 00 00 mv s1, zero -80000780: 83 27 04 00 lw a5, 0(s0) -80000784: 93 84 14 00 addi s1, s1, 1 -80000788: 13 04 44 00 addi s0, s0, 4 -8000078c: e7 80 07 00 jalr a5 -80000790: e3 18 99 fe bne s2, s1, -16 -80000794: 37 24 00 80 lui s0, 524290 -80000798: 37 29 00 80 lui s2, 524290 -8000079c: 93 07 84 a7 addi a5, s0, -1416 -800007a0: 13 09 c9 a7 addi s2, s2, -1412 -800007a4: 33 09 f9 40 sub s2, s2, a5 -800007a8: 13 59 29 40 srai s2, s2, 2 -800007ac: 63 00 09 02 beqz s2, 32 -800007b0: 13 04 84 a7 addi s0, s0, -1416 -800007b4: 93 04 00 00 mv s1, zero -800007b8: 83 27 04 00 lw a5, 0(s0) -800007bc: 93 84 14 00 addi s1, s1, 1 -800007c0: 13 04 44 00 addi s0, s0, 4 -800007c4: e7 80 07 00 jalr a5 -800007c8: e3 18 99 fe bne s2, s1, -16 -800007cc: 83 20 c1 00 lw ra, 12(sp) -800007d0: 03 24 81 00 lw s0, 8(sp) -800007d4: 83 24 41 00 lw s1, 4(sp) -800007d8: 03 29 01 00 lw s2, 0(sp) -800007dc: 13 01 01 01 addi sp, sp, 16 -800007e0: 67 80 00 00 ret - -800007e4 memset: -800007e4: 13 03 f0 00 addi t1, zero, 15 -800007e8: 13 07 05 00 mv a4, a0 -800007ec: 63 7e c3 02 bgeu t1, a2, 60 -800007f0: 93 77 f7 00 andi a5, a4, 15 -800007f4: 63 90 07 0a bnez a5, 160 -800007f8: 63 92 05 08 bnez a1, 132 -800007fc: 93 76 06 ff andi a3, a2, -16 -80000800: 13 76 f6 00 andi a2, a2, 15 -80000804: b3 86 e6 00 add a3, a3, a4 -80000808: 23 20 b7 00 sw a1, 0(a4) -8000080c: 23 22 b7 00 sw a1, 4(a4) -80000810: 23 24 b7 00 sw a1, 8(a4) -80000814: 23 26 b7 00 sw a1, 12(a4) -80000818: 13 07 07 01 addi a4, a4, 16 -8000081c: e3 66 d7 fe bltu a4, a3, -20 -80000820: 63 14 06 00 bnez a2, 8 -80000824: 67 80 00 00 ret -80000828: b3 06 c3 40 sub a3, t1, a2 -8000082c: 93 96 26 00 slli a3, a3, 2 -80000830: 97 02 00 00 auipc t0, 0 -80000834: b3 86 56 00 add a3, a3, t0 -80000838: 67 80 c6 00 jr 12(a3) -8000083c: 23 07 b7 00 sb a1, 14(a4) -80000840: a3 06 b7 00 sb a1, 13(a4) -80000844: 23 06 b7 00 sb a1, 12(a4) -80000848: a3 05 b7 00 sb a1, 11(a4) -8000084c: 23 05 b7 00 sb a1, 10(a4) -80000850: a3 04 b7 00 sb a1, 9(a4) -80000854: 23 04 b7 00 sb a1, 8(a4) -80000858: a3 03 b7 00 sb a1, 7(a4) -8000085c: 23 03 b7 00 sb a1, 6(a4) -80000860: a3 02 b7 00 sb a1, 5(a4) -80000864: 23 02 b7 00 sb a1, 4(a4) -80000868: a3 01 b7 00 sb a1, 3(a4) -8000086c: 23 01 b7 00 sb a1, 2(a4) -80000870: a3 00 b7 00 sb a1, 1(a4) -80000874: 23 00 b7 00 sb a1, 0(a4) -80000878: 67 80 00 00 ret -8000087c: 93 f5 f5 0f andi a1, a1, 255 -80000880: 93 96 85 00 slli a3, a1, 8 -80000884: b3 e5 d5 00 or a1, a1, a3 -80000888: 93 96 05 01 slli a3, a1, 16 -8000088c: b3 e5 d5 00 or a1, a1, a3 -80000890: 6f f0 df f6 j -148 -80000894: 93 96 27 00 slli a3, a5, 2 -80000898: 97 02 00 00 auipc t0, 0 -8000089c: b3 86 56 00 add a3, a3, t0 -800008a0: 93 82 00 00 mv t0, ra -800008a4: e7 80 06 fa jalr -96(a3) -800008a8: 93 80 02 00 mv ra, t0 -800008ac: 93 87 07 ff addi a5, a5, -16 -800008b0: 33 07 f7 40 sub a4, a4, a5 -800008b4: 33 06 f6 00 add a2, a2, a5 -800008b8: e3 78 c3 f6 bgeu t1, a2, -144 -800008bc: 6f f0 df f3 j -196 - -800008c0 __register_exitproc: -800008c0: 03 a7 81 c2 lw a4, -984(gp) -800008c4: 83 27 87 14 lw a5, 328(a4) -800008c8: 63 8c 07 04 beqz a5, 88 -800008cc: 03 a7 47 00 lw a4, 4(a5) -800008d0: 13 08 f0 01 addi a6, zero, 31 -800008d4: 63 4e e8 06 blt a6, a4, 124 -800008d8: 13 18 27 00 slli a6, a4, 2 -800008dc: 63 06 05 02 beqz a0, 44 -800008e0: 33 83 07 01 add t1, a5, a6 -800008e4: 23 24 c3 08 sw a2, 136(t1) -800008e8: 83 a8 87 18 lw a7, 392(a5) -800008ec: 13 06 10 00 addi a2, zero, 1 -800008f0: 33 16 e6 00 sll a2, a2, a4 -800008f4: b3 e8 c8 00 or a7, a7, a2 -800008f8: 23 a4 17 19 sw a7, 392(a5) -800008fc: 23 24 d3 10 sw a3, 264(t1) -80000900: 93 06 20 00 addi a3, zero, 2 -80000904: 63 04 d5 02 beq a0, a3, 40 -80000908: 13 07 17 00 addi a4, a4, 1 -8000090c: 23 a2 e7 00 sw a4, 4(a5) -80000910: b3 87 07 01 add a5, a5, a6 -80000914: 23 a4 b7 00 sw a1, 8(a5) -80000918: 13 05 00 00 mv a0, zero -8000091c: 67 80 00 00 ret -80000920: 93 07 c7 14 addi a5, a4, 332 -80000924: 23 24 f7 14 sw a5, 328(a4) -80000928: 6f f0 5f fa j -92 -8000092c: 83 a6 c7 18 lw a3, 396(a5) -80000930: 13 07 17 00 addi a4, a4, 1 -80000934: 23 a2 e7 00 sw a4, 4(a5) -80000938: 33 e6 c6 00 or a2, a3, a2 -8000093c: 23 a6 c7 18 sw a2, 396(a5) -80000940: b3 87 07 01 add a5, a5, a6 -80000944: 23 a4 b7 00 sw a1, 8(a5) -80000948: 13 05 00 00 mv a0, zero -8000094c: 67 80 00 00 ret -80000950: 13 05 f0 ff addi a0, zero, -1 -80000954: 67 80 00 00 ret - -80000958 __call_exitprocs: -80000958: 13 01 01 fd addi sp, sp, -48 -8000095c: 23 2c 41 01 sw s4, 24(sp) -80000960: 03 aa 81 c2 lw s4, -984(gp) -80000964: 23 20 21 03 sw s2, 32(sp) -80000968: 23 26 11 02 sw ra, 44(sp) -8000096c: 03 29 8a 14 lw s2, 328(s4) -80000970: 23 24 81 02 sw s0, 40(sp) -80000974: 23 22 91 02 sw s1, 36(sp) -80000978: 23 2e 31 01 sw s3, 28(sp) -8000097c: 23 2a 51 01 sw s5, 20(sp) -80000980: 23 28 61 01 sw s6, 16(sp) -80000984: 23 26 71 01 sw s7, 12(sp) -80000988: 23 24 81 01 sw s8, 8(sp) -8000098c: 63 00 09 04 beqz s2, 64 -80000990: 13 0b 05 00 mv s6, a0 -80000994: 93 8b 05 00 mv s7, a1 -80000998: 93 0a 10 00 addi s5, zero, 1 -8000099c: 93 09 f0 ff addi s3, zero, -1 -800009a0: 83 24 49 00 lw s1, 4(s2) -800009a4: 13 84 f4 ff addi s0, s1, -1 -800009a8: 63 42 04 02 bltz s0, 36 -800009ac: 93 94 24 00 slli s1, s1, 2 -800009b0: b3 04 99 00 add s1, s2, s1 -800009b4: 63 84 0b 04 beqz s7, 72 -800009b8: 83 a7 44 10 lw a5, 260(s1) -800009bc: 63 80 77 05 beq a5, s7, 64 -800009c0: 13 04 f4 ff addi s0, s0, -1 -800009c4: 93 84 c4 ff addi s1, s1, -4 -800009c8: e3 16 34 ff bne s0, s3, -20 -800009cc: 83 20 c1 02 lw ra, 44(sp) -800009d0: 03 24 81 02 lw s0, 40(sp) -800009d4: 83 24 41 02 lw s1, 36(sp) -800009d8: 03 29 01 02 lw s2, 32(sp) -800009dc: 83 29 c1 01 lw s3, 28(sp) -800009e0: 03 2a 81 01 lw s4, 24(sp) -800009e4: 83 2a 41 01 lw s5, 20(sp) -800009e8: 03 2b 01 01 lw s6, 16(sp) -800009ec: 83 2b c1 00 lw s7, 12(sp) -800009f0: 03 2c 81 00 lw s8, 8(sp) -800009f4: 13 01 01 03 addi sp, sp, 48 -800009f8: 67 80 00 00 ret -800009fc: 83 27 49 00 lw a5, 4(s2) -80000a00: 83 a6 44 00 lw a3, 4(s1) -80000a04: 93 87 f7 ff addi a5, a5, -1 -80000a08: 63 8e 87 04 beq a5, s0, 92 -80000a0c: 23 a2 04 00 sw zero, 4(s1) -80000a10: e3 88 06 fa beqz a3, -80 -80000a14: 83 27 89 18 lw a5, 392(s2) -80000a18: 33 97 8a 00 sll a4, s5, s0 -80000a1c: 03 2c 49 00 lw s8, 4(s2) -80000a20: b3 77 f7 00 and a5, a4, a5 -80000a24: 63 92 07 02 bnez a5, 36 +80000910 __call_exitprocs: +80000910: 13 01 01 fd addi sp, sp, -48 +80000914: 23 2c 41 01 sw s4, 24(sp) +80000918: 03 aa 81 c2 lw s4, -984(gp) +8000091c: 23 20 21 03 sw s2, 32(sp) +80000920: 23 26 11 02 sw ra, 44(sp) +80000924: 03 29 8a 14 lw s2, 328(s4) +80000928: 23 24 81 02 sw s0, 40(sp) +8000092c: 23 22 91 02 sw s1, 36(sp) +80000930: 23 2e 31 01 sw s3, 28(sp) +80000934: 23 2a 51 01 sw s5, 20(sp) +80000938: 23 28 61 01 sw s6, 16(sp) +8000093c: 23 26 71 01 sw s7, 12(sp) +80000940: 23 24 81 01 sw s8, 8(sp) +80000944: 63 00 09 04 beqz s2, 64 +80000948: 13 0b 05 00 mv s6, a0 +8000094c: 93 8b 05 00 mv s7, a1 +80000950: 93 0a 10 00 addi s5, zero, 1 +80000954: 93 09 f0 ff addi s3, zero, -1 +80000958: 83 24 49 00 lw s1, 4(s2) +8000095c: 13 84 f4 ff addi s0, s1, -1 +80000960: 63 42 04 02 bltz s0, 36 +80000964: 93 94 24 00 slli s1, s1, 2 +80000968: b3 04 99 00 add s1, s2, s1 +8000096c: 63 84 0b 04 beqz s7, 72 +80000970: 83 a7 44 10 lw a5, 260(s1) +80000974: 63 80 77 05 beq a5, s7, 64 +80000978: 13 04 f4 ff addi s0, s0, -1 +8000097c: 93 84 c4 ff addi s1, s1, -4 +80000980: e3 16 34 ff bne s0, s3, -20 +80000984: 83 20 c1 02 lw ra, 44(sp) +80000988: 03 24 81 02 lw s0, 40(sp) +8000098c: 83 24 41 02 lw s1, 36(sp) +80000990: 03 29 01 02 lw s2, 32(sp) +80000994: 83 29 c1 01 lw s3, 28(sp) +80000998: 03 2a 81 01 lw s4, 24(sp) +8000099c: 83 2a 41 01 lw s5, 20(sp) +800009a0: 03 2b 01 01 lw s6, 16(sp) +800009a4: 83 2b c1 00 lw s7, 12(sp) +800009a8: 03 2c 81 00 lw s8, 8(sp) +800009ac: 13 01 01 03 addi sp, sp, 48 +800009b0: 67 80 00 00 ret +800009b4: 83 27 49 00 lw a5, 4(s2) +800009b8: 83 a6 44 00 lw a3, 4(s1) +800009bc: 93 87 f7 ff addi a5, a5, -1 +800009c0: 63 8e 87 04 beq a5, s0, 92 +800009c4: 23 a2 04 00 sw zero, 4(s1) +800009c8: e3 88 06 fa beqz a3, -80 +800009cc: 83 27 89 18 lw a5, 392(s2) +800009d0: 33 97 8a 00 sll a4, s5, s0 +800009d4: 03 2c 49 00 lw s8, 4(s2) +800009d8: b3 77 f7 00 and a5, a4, a5 +800009dc: 63 92 07 02 bnez a5, 36 +800009e0: e7 80 06 00 jalr a3 +800009e4: 03 27 49 00 lw a4, 4(s2) +800009e8: 83 27 8a 14 lw a5, 328(s4) +800009ec: 63 14 87 01 bne a4, s8, 8 +800009f0: e3 04 f9 f8 beq s2, a5, -120 +800009f4: e3 88 07 f8 beqz a5, -112 +800009f8: 13 89 07 00 mv s2, a5 +800009fc: 6f f0 df f5 j -164 +80000a00: 83 27 c9 18 lw a5, 396(s2) +80000a04: 83 a5 44 08 lw a1, 132(s1) +80000a08: 33 77 f7 00 and a4, a4, a5 +80000a0c: 63 1c 07 00 bnez a4, 24 +80000a10: 13 05 0b 00 mv a0, s6 +80000a14: e7 80 06 00 jalr a3 +80000a18: 6f f0 df fc j -52 +80000a1c: 23 22 89 00 sw s0, 4(s2) +80000a20: 6f f0 9f fa j -88 +80000a24: 13 85 05 00 mv a0, a1 80000a28: e7 80 06 00 jalr a3 -80000a2c: 03 27 49 00 lw a4, 4(s2) -80000a30: 83 27 8a 14 lw a5, 328(s4) -80000a34: 63 14 87 01 bne a4, s8, 8 -80000a38: e3 04 f9 f8 beq s2, a5, -120 -80000a3c: e3 88 07 f8 beqz a5, -112 -80000a40: 13 89 07 00 mv s2, a5 -80000a44: 6f f0 df f5 j -164 -80000a48: 83 27 c9 18 lw a5, 396(s2) -80000a4c: 83 a5 44 08 lw a1, 132(s1) -80000a50: 33 77 f7 00 and a4, a4, a5 -80000a54: 63 1c 07 00 bnez a4, 24 -80000a58: 13 05 0b 00 mv a0, s6 -80000a5c: e7 80 06 00 jalr a3 -80000a60: 6f f0 df fc j -52 -80000a64: 23 22 89 00 sw s0, 4(s2) -80000a68: 6f f0 9f fa j -88 -80000a6c: 13 85 05 00 mv a0, a1 -80000a70: e7 80 06 00 jalr a3 -80000a74: 6f f0 9f fb j -72 +80000a2c: 6f f0 9f fb j -72 Disassembly of section .init_array: -80001a78 __preinit_array_start: -80001a78: 48 00 -80001a7a: 00 80 +80001a30 __preinit_array_start: +80001a30: 48 00 +80001a32: 00 80 Disassembly of section .data: -80001a80 impure_data: -80001a80: 00 00 -80001a82: 00 00 -80001a84: 6c 1d -80001a86: 00 80 -80001a88: d4 1d -80001a8a: 00 80 -80001a8c: 3c 1e -80001a8e: 00 80 +80001a38 impure_data: +80001a38: 00 00 +80001a3a: 00 00 +80001a3c: 24 1d +80001a3e: 00 80 +80001a40: 8c 1d +80001a42: 00 80 +80001a44: f4 1d +80001a46: 00 80 ... -80001b28: 01 00 -80001b2a: 00 00 -80001b2c: 00 00 -80001b2e: 00 00 -80001b30: 0e 33 -80001b32: cd ab -80001b34: 34 12 -80001b36: 6d e6 -80001b38: ec de -80001b3a: 05 00 -80001b3c: 0b 00 00 00 +80001ae0: 01 00 +80001ae2: 00 00 +80001ae4: 00 00 +80001ae6: 00 00 +80001ae8: 0e 33 +80001aea: cd ab +80001aec: 34 12 +80001aee: 6d e6 +80001af0: ec de +80001af2: 05 00 +80001af4: 0b 00 00 00 ... Disassembly of section .sdata: -80001ea8 _global_impure_ptr: -80001ea8: 80 1a -80001eaa: 00 80 +80001e60 _global_impure_ptr: +80001e60: 38 1a +80001e62: 00 80 Disassembly of section .bss: -80001eac g_wspawn_args: +80001e64 g_wspawn_args: ... Disassembly of section .comment: @@ -879,28 +861,28 @@ Disassembly of section .symtab: 2c: 03 00 02 00 lb zero, 0(tp) 30: 00 00 32: 00 00 - 34: 78 1a + 34: 30 1a 36: 00 80 38: 00 00 3a: 00 00 3c: 03 00 03 00 lb zero, 0(t1) 40: 00 00 42: 00 00 - 44: 80 1a + 44: 38 1a 46: 00 80 48: 00 00 4a: 00 00 4c: 03 00 04 00 lb zero, 0(s0) 50: 00 00 52: 00 00 - 54: a8 1e + 54: 60 1e 56: 00 80 58: 00 00 5a: 00 00 5c: 03 00 05 00 lb zero, 0(a0) 60: 00 00 62: 00 00 - 64: ac 1e + 64: 64 1e 66: 00 80 68: 00 00 6a: 00 00 @@ -916,7 +898,7 @@ Disassembly of section .symtab: 9e: f1 ff a0: 0e 00 a2: 00 00 - a4: 2c 06 + a4: e4 05 a6: 00 80 a8: 00 00 aa: 00 00 @@ -973,7 +955,7 @@ Disassembly of section .symtab: 14e: f1 ff 150: 85 00 152: 00 00 - 154: 80 1a + 154: 38 1a 156: 00 80 158: 28 04 15a: 00 00 @@ -984,49 +966,49 @@ Disassembly of section .symtab: 16e: f1 ff 170: 91 00 172: 00 00 - 174: 7c 1a + 174: 34 1a 176: 00 80 178: 00 00 17a: 00 00 17c: 00 00 17e: 03 00 a2 00 lb zero, 10(tp) 182: 00 00 - 184: 7c 1a + 184: 34 1a 186: 00 80 188: 00 00 18a: 00 00 18c: 00 00 18e: 03 00 b5 00 lb zero, 11(a0) 192: 00 00 - 194: 7c 1a + 194: 34 1a 196: 00 80 198: 00 00 19a: 00 00 19c: 00 00 19e: 03 00 c6 00 lb zero, 12(a2) 1a2: 00 00 - 1a4: 78 1a + 1a4: 30 1a 1a6: 00 80 1a8: 00 00 1aa: 00 00 1ac: 00 00 1ae: 03 00 da 00 lb zero, 13(s4) 1b2: 00 00 - 1b4: 78 1a + 1b4: 30 1a 1b6: 00 80 1b8: 00 00 1ba: 00 00 1bc: 00 00 1be: 03 00 ed 00 lb zero, 14(s10) 1c2: 00 00 - 1c4: 78 1a + 1c4: 30 1a 1c6: 00 80 1c8: 00 00 1ca: 00 00 1cc: 00 00 1ce: 03 00 03 01 lb zero, 16(t1) 1d2: 00 00 - 1d4: 80 22 + 1d4: 38 22 1d6: 00 80 1d8: 00 00 1da: 00 00 @@ -1034,7 +1016,7 @@ Disassembly of section .symtab: 1de: f1 ff 1e0: 15 01 1e2: 00 00 - 1e4: 78 06 + 1e4: 30 06 1e6: 00 80 1e8: 00 00 1ea: 00 00 @@ -1042,7 +1024,7 @@ Disassembly of section .symtab: 1ee: 02 00 1f0: 20 01 1f2: 00 00 - 1f4: 30 06 + 1f4: e8 05 1f6: 00 80 1f8: 00 00 1fa: 00 00 @@ -1050,7 +1032,7 @@ Disassembly of section .symtab: 1fe: 02 00 200: 2a 01 202: 00 00 - 204: 38 04 + 204: f0 03 206: 00 80 208: 9c 00 20a: 00 00 @@ -1060,21 +1042,21 @@ Disassembly of section .symtab: 212: 00 00 214: 60 00 216: 00 80 - 218: 3c 01 + 218: 48 01 21a: 00 00 21c: 12 00 21e: 02 00 220: 54 01 222: 00 00 - 224: ac 1e + 224: 64 1e 226: 00 80 - 228: 04 00 + 228: 20 00 22a: 00 00 22c: 11 00 22e: 06 00 230: 62 01 232: 00 00 - 234: 38 06 + 234: f0 05 236: 00 80 238: 00 00 23a: 00 00 @@ -1082,7 +1064,7 @@ Disassembly of section .symtab: 23e: 02 00 240: 69 01 242: 00 00 - 244: a8 1e + 244: 60 1e 246: 00 80 248: 00 00 24a: 00 00 @@ -1090,7 +1072,7 @@ Disassembly of section .symtab: 24e: 05 00 250: 79 01 252: 00 00 - 254: 50 06 + 254: 08 06 256: 00 80 258: 00 00 25a: 00 00 @@ -1098,7 +1080,7 @@ Disassembly of section .symtab: 25e: 02 00 260: 81 01 262: 00 00 - 264: 88 06 + 264: 40 06 266: 00 80 268: 00 00 26a: 00 00 @@ -1108,12 +1090,12 @@ Disassembly of section .symtab: 272: 00 00 274: 3c 02 276: 00 80 - 278: cc 01 + 278: 84 01 27a: 00 00 27c: 12 00 27e: 02 00 280: 9b 01 00 00 - 284: 48 06 + 284: 00 06 286: 00 80 288: 00 00 28a: 00 00 @@ -1121,14 +1103,14 @@ Disassembly of section .symtab: 28e: 02 00 290: a4 01 292: 00 00 - 294: a8 1e + 294: 60 1e 296: 00 80 298: 04 00 29a: 00 00 29c: 11 00 29e: 05 00 2a0: b7 01 00 00 lui gp, 0 - 2a4: 48 07 + 2a4: 00 07 2a6: 00 80 2a8: 9c 00 2aa: 00 00 @@ -1136,7 +1118,7 @@ Disassembly of section .symtab: 2ae: 02 00 2b0: c9 01 2b2: 00 00 - 2b4: 80 06 + 2b4: 38 06 2b6: 00 80 2b8: 00 00 2ba: 00 00 @@ -1144,14 +1126,14 @@ Disassembly of section .symtab: 2be: 02 00 2c0: d8 01 2c2: 00 00 - 2c4: 58 06 + 2c4: 10 06 2c6: 00 80 2c8: 00 00 2ca: 00 00 2cc: 12 00 2ce: 02 00 2d0: e3 01 00 00 beqz zero, 2050 - 2d4: 68 06 + 2d4: 20 06 2d6: 00 80 2d8: 00 00 2da: 00 00 @@ -1159,7 +1141,7 @@ Disassembly of section .symtab: 2de: 02 00 2e0: f0 01 2e2: 00 00 - 2e4: ec 06 + 2e4: a4 06 2e6: 00 80 2e8: 5c 00 2ea: 00 00 @@ -1167,7 +1149,7 @@ Disassembly of section .symtab: 2ee: 02 00 2f0: 02 02 2f2: 00 00 - 2f4: f0 05 + 2f4: a8 05 2f6: 00 80 2f8: 00 00 2fa: 00 00 @@ -1175,14 +1157,14 @@ Disassembly of section .symtab: 2fe: 02 00 300: 0c 02 302: 00 00 - 304: 40 06 + 304: f8 05 306: 00 80 308: 00 00 30a: 00 00 30c: 12 00 30e: 02 00 310: 17 02 00 00 auipc tp, 0 - 314: 58 09 + 314: 10 09 316: 00 80 318: 20 01 31a: 00 00 @@ -1198,7 +1180,7 @@ Disassembly of section .symtab: 32e: 01 00 330: 28 02 332: 00 00 - 334: c0 08 + 334: 78 08 336: 00 80 338: 98 00 33a: 00 00 @@ -1206,7 +1188,7 @@ Disassembly of section .symtab: 33e: 02 00 340: 3c 02 342: 00 00 - 344: b0 1e + 344: 84 1e 346: 00 80 348: 00 00 34a: 00 00 @@ -1214,7 +1196,7 @@ Disassembly of section .symtab: 34e: 06 00 350: 48 02 352: 00 00 - 354: ac 1e + 354: 64 1e 356: 00 80 358: 00 00 35a: 00 00 @@ -1222,14 +1204,14 @@ Disassembly of section .symtab: 35e: 06 00 360: 54 02 362: 00 00 - 364: e4 07 + 364: 9c 07 366: 00 80 368: dc 00 36a: 00 00 36c: 12 00 36e: 02 00 370: 5b 02 00 00 - 374: 08 04 + 374: c0 03 376: 00 80 378: 30 00 37a: 00 00 @@ -1237,14 +1219,14 @@ Disassembly of section .symtab: 37e: 02 00 380: 60 02 382: 00 00 - 384: 64 05 + 384: 1c 05 386: 00 80 388: 84 00 38a: 00 00 38c: 12 00 38e: 02 00 390: 83 02 00 00 lb t0, 0(zero) - 394: 98 06 + 394: 50 06 396: 00 80 398: 00 00 39a: 00 00 @@ -1252,7 +1234,7 @@ Disassembly of section .symtab: 39e: 02 00 3a0: 91 02 3a2: 00 00 - 3a4: a8 06 + 3a4: 60 06 3a6: 00 80 3a8: 14 00 3aa: 00 00 @@ -1260,7 +1242,7 @@ Disassembly of section .symtab: 3ae: 02 00 3b0: 98 02 3b2: 00 00 - 3b4: 70 06 + 3b4: 28 06 3b6: 00 80 3b8: 00 00 3ba: 00 00 @@ -1268,14 +1250,14 @@ Disassembly of section .symtab: 3be: 02 00 3c0: a6 02 3c2: 00 00 - 3c4: 90 06 + 3c4: 48 06 3c6: 00 80 3c8: 00 00 3ca: 00 00 3cc: 12 00 3ce: 02 00 3d0: b3 02 00 00 add t0, zero, zero - 3d4: d4 04 + 3d4: 8c 04 3d6: 00 80 3d8: 90 00 3da: 00 00 @@ -1283,7 +1265,7 @@ Disassembly of section .symtab: 3de: 02 00 3e0: d1 02 3e2: 00 00 - 3e4: 60 06 + 3e4: 18 06 3e6: 00 80 3e8: 00 00 3ea: 00 00 @@ -1291,15 +1273,15 @@ Disassembly of section .symtab: 3ee: 02 00 3f0: dd 02 3f2: 00 00 - 3f4: 9c 01 + 3f4: a8 01 3f6: 00 80 - 3f8: a0 00 + 3f8: 94 00 3fa: 00 00 3fc: 12 00 3fe: 02 00 400: f6 02 402: 00 00 - 404: 80 1a + 404: 38 1a 406: 00 80 408: 00 00 40a: 00 00 @@ -1307,7 +1289,7 @@ Disassembly of section .symtab: 40e: 04 00 410: 05 03 412: 00 00 - 414: ac 1e + 414: 64 1e 416: 00 80 418: 00 00 41a: 00 00 @@ -1315,7 +1297,7 @@ Disassembly of section .symtab: 41e: 05 00 420: 9d 00 422: 00 00 - 424: b0 1e + 424: 84 1e 426: 00 80 428: 00 00 42a: 00 00 @@ -1323,7 +1305,7 @@ Disassembly of section .symtab: 42e: 06 00 430: 0d 03 432: 00 00 - 434: bc 06 + 434: 74 06 436: 00 80 438: 30 00 43a: 00 00 @@ -1331,7 +1313,7 @@ Disassembly of section .symtab: 43e: 02 00 440: 0c 03 442: 00 00 - 444: e8 05 + 444: a0 05 446: 00 80 448: 00 00 44a: 00 00 @@ -1339,7 +1321,7 @@ Disassembly of section .symtab: 44e: 02 00 450: 12 03 452: 00 00 - 454: a0 06 + 454: 58 06 456: 00 80 458: 00 00 45a: 00 00 @@ -1379,12 +1361,12 @@ Disassembly of section .strtab: 3e: 5f 6b 65 72 42: 6e 65 44: 6c 2d - 46: 32 33 - 48: 2d 34 - 4a: 62 2d - 4c: 37 63 2d 39 lui t1, 234198 - 50: 32 2d - 52: 30 30 + 46: 63 37 2d 31 + 4a: 32 2d + 4c: 64 32 + 4e: 2d 62 + 50: 34 2d + 52: 64 30 54: 2e 63 56: 00 70 58: 61 72 diff --git a/driver/tests/basic/Makefile b/driver/tests/basic/Makefile index 92456e48..3db5d4b4 100644 --- a/driver/tests/basic/Makefile +++ b/driver/tests/basic/Makefile @@ -61,8 +61,8 @@ run-simx: $(PROJECT) clean: rm -rf $(PROJECT) *.o .depend -clean-all: - rm -rf $(PROJECT) *.o *.elf *.bin *.dump .depend +clean-all: clean + rm -rf *.elf *.bin *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/driver/tests/basic/kernel.bin b/driver/tests/basic/kernel.bin index af234296..0485cf18 100755 Binary files a/driver/tests/basic/kernel.bin and b/driver/tests/basic/kernel.bin differ diff --git a/driver/tests/basic/kernel.dump b/driver/tests/basic/kernel.dump index 6294fb65..8e623c63 100644 --- a/driver/tests/basic/kernel.dump +++ b/driver/tests/basic/kernel.dump @@ -390,7 +390,7 @@ Disassembly of section .text: 80000518: 00492703 lw a4,4(s2) 8000051c: 148a2783 lw a5,328(s4) 80000520: 01871463 bne a4,s8,80000528 <__call_exitprocs+0xe4> -80000524: f92784e3 beq a5,s2,800004ac <__call_exitprocs+0x68> +80000524: f8f904e3 beq s2,a5,800004ac <__call_exitprocs+0x68> 80000528: f80788e3 beqz a5,800004b8 <__call_exitprocs+0x74> 8000052c: 00078913 mv s2,a5 80000530: f5dff06f j 8000048c <__call_exitprocs+0x48> @@ -450,20 +450,21 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3120 fld fs0,96(a0) - c: 2e30 fld fa2,88(a2) - e: 2e32 fld ft8,264(sp) - 10: 0030 addi a2,sp,8 + a: 3920 fld fs0,112(a0) + c: 322e fld ft4,232(sp) + e: 302e fld ft0,232(sp) + ... Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2941 jal 490 <_start-0x7ffffb70> + 0: 2541 jal 680 <_start-0x7ffff980> 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec> - c: 001f 0000 1004 0x10040000001f + c: 0000001b 0x1b + 10: 1004 addi s1,sp,32 12: 7205 lui tp,0xfffe1 14: 3376 fld ft6,376(sp) 16: 6932 flw fs2,12(sp) @@ -472,5 +473,3 @@ Disassembly of section .riscv.attributes: 1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdc5e> 1e: 3070 fld fa2,224(s0) 20: 665f 7032 0030 0x307032665f - 26: 0108 addi a0,sp,128 - 28: 0b0a slli s6,s6,0x2 diff --git a/driver/tests/basic/kernel.elf b/driver/tests/basic/kernel.elf index 0cad586a..53214a03 100755 Binary files a/driver/tests/basic/kernel.elf and b/driver/tests/basic/kernel.elf differ diff --git a/driver/tests/demo/Makefile b/driver/tests/demo/Makefile index 244bcd14..9effa380 100644 --- a/driver/tests/demo/Makefile +++ b/driver/tests/demo/Makefile @@ -59,8 +59,8 @@ run-simx: $(PROJECT) clean: rm -rf $(PROJECT) *.o .depend -clean-all: - rm -rf $(PROJECT) *.o *.elf *.bin *.dump .depend +clean-all: clean + rm -rf *.elf *.bin *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/driver/tests/demo/kernel.bin b/driver/tests/demo/kernel.bin index aeeeab99..1874f61d 100755 Binary files a/driver/tests/demo/kernel.bin and b/driver/tests/demo/kernel.bin differ diff --git a/driver/tests/demo/kernel.dump b/driver/tests/demo/kernel.dump index 98cb45a2..d8f0278d 100644 --- a/driver/tests/demo/kernel.dump +++ b/driver/tests/demo/kernel.dump @@ -461,7 +461,7 @@ Disassembly of section .text: 8000061c: 00492703 lw a4,4(s2) 80000620: 148a2783 lw a5,328(s4) 80000624: 01871463 bne a4,s8,8000062c <__call_exitprocs+0xe4> -80000628: f92784e3 beq a5,s2,800005b0 <__call_exitprocs+0x68> +80000628: f8f904e3 beq s2,a5,800005b0 <__call_exitprocs+0x68> 8000062c: f80788e3 beqz a5,800005bc <__call_exitprocs+0x74> 80000630: 00078913 mv s2,a5 80000634: f5dff06f j 80000590 <__call_exitprocs+0x48> @@ -527,20 +527,21 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3120 fld fs0,96(a0) - c: 2e30 fld fa2,88(a2) - e: 2e32 fld ft8,264(sp) - 10: 0030 addi a2,sp,8 + a: 3920 fld fs0,112(a0) + c: 322e fld ft4,232(sp) + e: 302e fld ft0,232(sp) + ... Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2941 jal 490 <_start-0x7ffffb70> + 0: 2541 jal 680 <_start-0x7ffff980> 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec> - c: 001f 0000 1004 0x10040000001f + c: 0000001b 0x1b + 10: 1004 addi s1,sp,32 12: 7205 lui tp,0xfffe1 14: 3376 fld ft6,376(sp) 16: 6932 flw fs2,12(sp) @@ -549,5 +550,3 @@ Disassembly of section .riscv.attributes: 1c: 326d jal fffff9c6 <__global_pointer$+0x7fffdb56> 1e: 3070 fld fa2,224(s0) 20: 665f 7032 0030 0x307032665f - 26: 0108 addi a0,sp,128 - 28: 0b0a slli s6,s6,0x2 diff --git a/driver/tests/demo/kernel.elf b/driver/tests/demo/kernel.elf index dd166d7e..879adf4e 100755 Binary files a/driver/tests/demo/kernel.elf and b/driver/tests/demo/kernel.elf differ diff --git a/driver/tests/dogfood/Makefile b/driver/tests/dogfood/Makefile index 1bf81cc1..c12ede08 100644 --- a/driver/tests/dogfood/Makefile +++ b/driver/tests/dogfood/Makefile @@ -60,8 +60,8 @@ run-simx: $(PROJECT) clean: rm -rf $(PROJECT) *.o .depend -clean-all: - rm -rf $(PROJECT) *.o *.elf *.bin *.dump .depend +clean-all: clean + rm -rf *.elf *.bin *.dump ifneq ($(MAKECMDGOALS),clean) -include .depend diff --git a/driver/tests/dogfood/kernel.bin b/driver/tests/dogfood/kernel.bin index 599ab919..58d0974e 100755 Binary files a/driver/tests/dogfood/kernel.bin and b/driver/tests/dogfood/kernel.bin differ diff --git a/driver/tests/dogfood/kernel.dump b/driver/tests/dogfood/kernel.dump index 8fcd6374..220ce9b3 100644 --- a/driver/tests/dogfood/kernel.dump +++ b/driver/tests/dogfood/kernel.dump @@ -6,23 +6,23 @@ Disassembly of section .init: 80000000 <_start>: 80000000: 00001597 auipc a1,0x1 -80000004: b8458593 addi a1,a1,-1148 # 80000b84 +80000004: b8c58593 addi a1,a1,-1140 # 80000b8c 80000008: 02602573 csrr a0,0x26 8000000c: 00b5106b 0xb5106b -80000010: 375000ef jal ra,80000b84 +80000010: 37d000ef jal ra,80000b8c 80000014: 00100513 li a0,1 80000018: 0005006b 0x5006b 8000001c: c3018513 addi a0,gp,-976 # 800026c8 80000020: c3818613 addi a2,gp,-968 # 800026d0 <__BSS_END__> 80000024: 40a60633 sub a2,a2,a0 80000028: 00000593 li a1,0 -8000002c: 785000ef jal ra,80000fb0 +8000002c: 789000ef jal ra,80000fb4 80000030: 00001517 auipc a0,0x1 -80000034: e8850513 addi a0,a0,-376 # 80000eb8 <__libc_fini_array> -80000038: 635000ef jal ra,80000e6c -8000003c: 6d9000ef jal ra,80000f14 <__libc_init_array> +80000034: e8c50513 addi a0,a0,-372 # 80000ebc <__libc_fini_array> +80000038: 639000ef jal ra,80000e70 +8000003c: 6dd000ef jal ra,80000f18 <__libc_init_array> 80000040: 008000ef jal ra,80000048
-80000044: 6450006f j 80000e88 +80000044: 6490006f j 80000e8c Disassembly of section .text: @@ -30,30 +30,30 @@ Disassembly of section .text: 80000048: ff010113 addi sp,sp,-16 8000004c: 00112623 sw ra,12(sp) 80000050: 00812423 sw s0,8(sp) -80000054: 3c9000ef jal ra,80000c1c +80000054: 3d1000ef jal ra,80000c24 80000058: 00050413 mv s0,a0 -8000005c: 3b9000ef jal ra,80000c14 +8000005c: 3c1000ef jal ra,80000c1c 80000060: 7ffff7b7 lui a5,0x7ffff 80000064: 0007a783 lw a5,0(a5) # 7ffff000 <_start-0x1000> 80000068: 00050593 mv a1,a0 8000006c: 00040513 mv a0,s0 80000070: 00279713 slli a4,a5,0x2 80000074: 800017b7 lui a5,0x80001 -80000078: 24478793 addi a5,a5,580 # 80001244 <__global_pointer$+0xffffe7ac> +80000078: 24878793 addi a5,a5,584 # 80001248 <__global_pointer$+0xffffe7b0> 8000007c: 00812403 lw s0,8(sp) 80000080: 00e787b3 add a5,a5,a4 80000084: 00c12083 lw ra,12(sp) 80000088: 0007a603 lw a2,0(a5) 8000008c: 7ffff6b7 lui a3,0x7ffff 80000090: 01010113 addi sp,sp,16 -80000094: 3e90006f j 80000c7c +80000094: 3f10006f j 80000c84 80000098 : 80000098: 00000793 li a5,0 8000009c: 00078863 beqz a5,800000ac 800000a0: 80001537 lui a0,0x80001 -800000a4: eb850513 addi a0,a0,-328 # 80000eb8 <__global_pointer$+0xffffe420> -800000a8: 5c50006f j 80000e6c +800000a4: ebc50513 addi a0,a0,-324 # 80000ebc <__global_pointer$+0xffffe424> +800000a8: 5c90006f j 80000e70 800000ac: 00008067 ret 800000b0 : @@ -67,7 +67,7 @@ Disassembly of section .text: 800000cc: 00852983 lw s3,8(a0) 800000d0: 01052903 lw s2,16(a0) 800000d4: 00112e23 sw ra,28(sp) -800000d8: 32d000ef jal ra,80000c04 +800000d8: 335000ef jal ra,80000c0c 800000dc: 02850533 mul a0,a0,s0 800000e0: 04040063 beqz s0,80000120 800000e4: 00a406b3 add a3,s0,a0 @@ -104,7 +104,7 @@ Disassembly of section .text: 80000158: 00852983 lw s3,8(a0) 8000015c: 01052903 lw s2,16(a0) 80000160: 00112e23 sw ra,28(sp) -80000164: 2a1000ef jal ra,80000c04 +80000164: 2a9000ef jal ra,80000c0c 80000168: 02850533 mul a0,a0,s0 8000016c: 04040063 beqz s0,800001ac 80000170: 00a406b3 add a3,s0,a0 @@ -141,7 +141,7 @@ Disassembly of section .text: 800001e4: 00852983 lw s3,8(a0) 800001e8: 01052903 lw s2,16(a0) 800001ec: 00112e23 sw ra,28(sp) -800001f0: 215000ef jal ra,80000c04 +800001f0: 21d000ef jal ra,80000c0c 800001f4: 02850533 mul a0,a0,s0 800001f8: 04040063 beqz s0,80000238 800001fc: 00a406b3 add a3,s0,a0 @@ -178,7 +178,7 @@ Disassembly of section .text: 80000270: 00852983 lw s3,8(a0) 80000274: 01052483 lw s1,16(a0) 80000278: 00112e23 sw ra,28(sp) -8000027c: 189000ef jal ra,80000c04 +8000027c: 191000ef jal ra,80000c0c 80000280: 02850533 mul a0,a0,s0 80000284: 04040463 beqz s0,800002cc 80000288: 00a405b3 add a1,s0,a0 @@ -217,7 +217,7 @@ Disassembly of section .text: 80000304: 00852983 lw s3,8(a0) 80000308: 01052903 lw s2,16(a0) 8000030c: 00112e23 sw ra,28(sp) -80000310: 0f5000ef jal ra,80000c04 +80000310: 0fd000ef jal ra,80000c0c 80000314: 02850533 mul a0,a0,s0 80000318: 04040063 beqz s0,80000358 8000031c: 00a40733 add a4,s0,a0 @@ -254,7 +254,7 @@ Disassembly of section .text: 80000390: 00852983 lw s3,8(a0) 80000394: 01052903 lw s2,16(a0) 80000398: 00112e23 sw ra,28(sp) -8000039c: 069000ef jal ra,80000c04 +8000039c: 071000ef jal ra,80000c0c 800003a0: 02850533 mul a0,a0,s0 800003a4: 04040063 beqz s0,800003e4 800003a8: 00a40733 add a4,s0,a0 @@ -291,7 +291,7 @@ Disassembly of section .text: 8000041c: 00852983 lw s3,8(a0) 80000420: 01052903 lw s2,16(a0) 80000424: 00112e23 sw ra,28(sp) -80000428: 7dc000ef jal ra,80000c04 +80000428: 7e4000ef jal ra,80000c0c 8000042c: 02850533 mul a0,a0,s0 80000430: 04040063 beqz s0,80000470 80000434: 00a40733 add a4,s0,a0 @@ -328,7 +328,7 @@ Disassembly of section .text: 800004a8: 00852983 lw s3,8(a0) 800004ac: 01052483 lw s1,16(a0) 800004b0: 00112e23 sw ra,28(sp) -800004b4: 750000ef jal ra,80000c04 +800004b4: 758000ef jal ra,80000c0c 800004b8: 02850533 mul a0,a0,s0 800004bc: 04040063 beqz s0,800004fc 800004c0: 00a40733 add a4,s0,a0 @@ -365,7 +365,7 @@ Disassembly of section .text: 80000534: 00852983 lw s3,8(a0) 80000538: 01052483 lw s1,16(a0) 8000053c: 00112e23 sw ra,28(sp) -80000540: 6c4000ef jal ra,80000c04 +80000540: 6cc000ef jal ra,80000c0c 80000544: 02850533 mul a0,a0,s0 80000548: 04040063 beqz s0,80000588 8000054c: 00a40733 add a4,s0,a0 @@ -402,7 +402,7 @@ Disassembly of section .text: 800005c0: 00852983 lw s3,8(a0) 800005c4: 01052483 lw s1,16(a0) 800005c8: 00112e23 sw ra,28(sp) -800005cc: 638000ef jal ra,80000c04 +800005cc: 640000ef jal ra,80000c0c 800005d0: 02850533 mul a0,a0,s0 800005d4: 04040063 beqz s0,80000614 800005d8: 00a40733 add a4,s0,a0 @@ -439,7 +439,7 @@ Disassembly of section .text: 8000064c: 00852983 lw s3,8(a0) 80000650: 01052483 lw s1,16(a0) 80000654: 00112e23 sw ra,28(sp) -80000658: 5ac000ef jal ra,80000c04 +80000658: 5b4000ef jal ra,80000c0c 8000065c: 02850533 mul a0,a0,s0 80000660: 04040063 beqz s0,800006a0 80000664: 00a40733 add a4,s0,a0 @@ -476,7 +476,7 @@ Disassembly of section .text: 800006d8: 00852983 lw s3,8(a0) 800006dc: 01052483 lw s1,16(a0) 800006e0: 00112e23 sw ra,28(sp) -800006e4: 520000ef jal ra,80000c04 +800006e4: 528000ef jal ra,80000c0c 800006e8: 02850533 mul a0,a0,s0 800006ec: 04040463 beqz s0,80000734 800006f0: 00a40733 add a4,s0,a0 @@ -515,7 +515,7 @@ Disassembly of section .text: 8000076c: 00852983 lw s3,8(a0) 80000770: 01052903 lw s2,16(a0) 80000774: 00112e23 sw ra,28(sp) -80000778: 48c000ef jal ra,80000c04 +80000778: 494000ef jal ra,80000c0c 8000077c: 02850533 mul a0,a0,s0 80000780: 04040063 beqz s0,800007c0 80000784: 00a40733 add a4,s0,a0 @@ -552,7 +552,7 @@ Disassembly of section .text: 800007f8: 00852983 lw s3,8(a0) 800007fc: 01052483 lw s1,16(a0) 80000800: 00112e23 sw ra,28(sp) -80000804: 400000ef jal ra,80000c04 +80000804: 408000ef jal ra,80000c0c 80000808: 02850533 mul a0,a0,s0 8000080c: 04040463 beqz s0,80000854 80000810: 00a40733 add a4,s0,a0 @@ -591,25 +591,25 @@ Disassembly of section .text: 8000088c: 00852983 lw s3,8(a0) 80000890: 01052903 lw s2,16(a0) 80000894: 00112e23 sw ra,28(sp) -80000898: 36c000ef jal ra,80000c04 +80000898: 374000ef jal ra,80000c0c 8000089c: 02850533 mul a0,a0,s0 800008a0: 04040263 beqz s0,800008e4 -800008a4: 00a40733 add a4,s0,a0 -800008a8: 00271713 slli a4,a4,0x2 +800008a4: 00a406b3 add a3,s0,a0 +800008a8: 00269693 slli a3,a3,0x2 800008ac: 00251513 slli a0,a0,0x2 800008b0: 013507b3 add a5,a0,s3 -800008b4: 01370733 add a4,a4,s3 +800008b4: 013686b3 add a3,a3,s3 800008b8: 41390533 sub a0,s2,s3 800008bc: 413485b3 sub a1,s1,s3 -800008c0: 00f586b3 add a3,a1,a5 +800008c0: 00f58733 add a4,a1,a5 800008c4: 0007a707 flw fa4,0(a5) -800008c8: 0006a787 flw fa5,0(a3) -800008cc: 00f506b3 add a3,a0,a5 +800008c8: 00072787 flw fa5,0(a4) +800008cc: 00f50633 add a2,a0,a5 800008d0: 00478793 addi a5,a5,4 800008d4: 00e7f7d3 fadd.s fa5,fa5,fa4 -800008d8: c0079653 fcvt.w.s a2,fa5,rtz -800008dc: 00c6a023 sw a2,0(a3) -800008e0: fef710e3 bne a4,a5,800008c0 +800008d8: c0079753 fcvt.w.s a4,fa5,rtz +800008dc: 00e62023 sw a4,0(a2) +800008e0: fef690e3 bne a3,a5,800008c0 800008e4: 01c12083 lw ra,28(sp) 800008e8: 01812403 lw s0,24(sp) 800008ec: 01412483 lw s1,20(sp) @@ -629,25 +629,25 @@ Disassembly of section .text: 8000091c: 00852983 lw s3,8(a0) 80000920: 01052903 lw s2,16(a0) 80000924: 00112e23 sw ra,28(sp) -80000928: 2dc000ef jal ra,80000c04 +80000928: 2e4000ef jal ra,80000c0c 8000092c: 02850533 mul a0,a0,s0 80000930: 04040263 beqz s0,80000974 -80000934: 00a40733 add a4,s0,a0 -80000938: 00271713 slli a4,a4,0x2 +80000934: 00a406b3 add a3,s0,a0 +80000938: 00269693 slli a3,a3,0x2 8000093c: 00251513 slli a0,a0,0x2 80000940: 013507b3 add a5,a0,s3 -80000944: 01370733 add a4,a4,s3 +80000944: 013686b3 add a3,a3,s3 80000948: 41390533 sub a0,s2,s3 8000094c: 413485b3 sub a1,s1,s3 -80000950: 00f586b3 add a3,a1,a5 +80000950: 00f58733 add a4,a1,a5 80000954: 0007a707 flw fa4,0(a5) -80000958: 0006a787 flw fa5,0(a3) -8000095c: 00f506b3 add a3,a0,a5 +80000958: 00072787 flw fa5,0(a4) +8000095c: 00f50633 add a2,a0,a5 80000960: 00478793 addi a5,a5,4 80000964: 00e7f7d3 fadd.s fa5,fa5,fa4 -80000968: c0179653 fcvt.wu.s a2,fa5,rtz -8000096c: 00c6a023 sw a2,0(a3) -80000970: fef710e3 bne a4,a5,80000950 +80000968: c0179753 fcvt.wu.s a4,fa5,rtz +8000096c: 00e62023 sw a4,0(a2) +80000970: fef690e3 bne a3,a5,80000950 80000974: 01c12083 lw ra,28(sp) 80000978: 01812403 lw s0,24(sp) 8000097c: 01412483 lw s1,20(sp) @@ -667,7 +667,7 @@ Disassembly of section .text: 800009ac: 00852983 lw s3,8(a0) 800009b0: 01052903 lw s2,16(a0) 800009b4: 00112e23 sw ra,28(sp) -800009b8: 24c000ef jal ra,80000c04 +800009b8: 254000ef jal ra,80000c0c 800009bc: 02850533 mul a0,a0,s0 800009c0: 04040263 beqz s0,80000a04 800009c4: 00a406b3 add a3,s0,a0 @@ -705,7 +705,7 @@ Disassembly of section .text: 80000a3c: 00852983 lw s3,8(a0) 80000a40: 01052903 lw s2,16(a0) 80000a44: 00112e23 sw ra,28(sp) -80000a48: 1bc000ef jal ra,80000c04 +80000a48: 1c4000ef jal ra,80000c0c 80000a4c: 02850533 mul a0,a0,s0 80000a50: 04040263 beqz s0,80000a94 80000a54: 00a406b3 add a3,s0,a0 @@ -744,588 +744,589 @@ Disassembly of section .text: 80000ad0: 01052903 lw s2,16(a0) 80000ad4: 02112e23 sw ra,60(sp) 80000ad8: 00812e27 fsw fs0,28(sp) -80000adc: 128000ef jal ra,80000c04 -80000ae0: 02950533 mul a0,a0,s1 -80000ae4: 04048c63 beqz s1,80000b3c -80000ae8: 00a484b3 add s1,s1,a0 -80000aec: f0000453 fmv.w.x fs0,zero -80000af0: 00251513 slli a0,a0,0x2 -80000af4: 00249493 slli s1,s1,0x2 -80000af8: 01450533 add a0,a0,s4 -80000afc: 014484b3 add s1,s1,s4 -80000b00: 414989b3 sub s3,s3,s4 -80000b04: 41490933 sub s2,s2,s4 -80000b08: 00a987b3 add a5,s3,a0 -80000b0c: 0007a507 flw fa0,0(a5) -80000b10: 00052787 flw fa5,0(a0) -80000b14: 00a90733 add a4,s2,a0 -80000b18: 10f57553 fmul.s fa0,fa0,fa5 -80000b1c: 001026f3 frflags a3 -80000b20: a08517d3 flt.s a5,fa0,fs0 -80000b24: 00169073 fsflags a3 -80000b28: 02079a63 bnez a5,80000b5c -80000b2c: 58057553 fsqrt.s fa0,fa0 -80000b30: 00450513 addi a0,a0,4 -80000b34: 00a72027 fsw fa0,0(a4) -80000b38: fc9518e3 bne a0,s1,80000b08 -80000b3c: 03c12083 lw ra,60(sp) -80000b40: 03812483 lw s1,56(sp) -80000b44: 03412903 lw s2,52(sp) -80000b48: 03012983 lw s3,48(sp) -80000b4c: 02c12a03 lw s4,44(sp) -80000b50: 01c12407 flw fs0,28(sp) -80000b54: 04010113 addi sp,sp,64 -80000b58: 00008067 ret -80000b5c: 00a12623 sw a0,12(sp) -80000b60: 18c000ef jal ra,80000cec -80000b64: 00c12503 lw a0,12(sp) -80000b68: 00a907b3 add a5,s2,a0 -80000b6c: 00a7a027 fsw fa0,0(a5) -80000b70: 00450513 addi a0,a0,4 -80000b74: f8a49ae3 bne s1,a0,80000b08 -80000b78: fc5ff06f j 80000b3c +80000adc: 00912c27 fsw fs1,24(sp) +80000ae0: 12c000ef jal ra,80000c0c +80000ae4: 02950533 mul a0,a0,s1 +80000ae8: 04048c63 beqz s1,80000b40 +80000aec: 00a484b3 add s1,s1,a0 +80000af0: f00004d3 fmv.w.x fs1,zero +80000af4: 00251513 slli a0,a0,0x2 +80000af8: 00249493 slli s1,s1,0x2 +80000afc: 01450533 add a0,a0,s4 +80000b00: 014484b3 add s1,s1,s4 +80000b04: 414989b3 sub s3,s3,s4 +80000b08: 41490933 sub s2,s2,s4 +80000b0c: 00a987b3 add a5,s3,a0 +80000b10: 0007a507 flw fa0,0(a5) +80000b14: 00052787 flw fa5,0(a0) +80000b18: 00a90733 add a4,s2,a0 +80000b1c: 10f57553 fmul.s fa0,fa0,fa5 +80000b20: 001026f3 frflags a3 +80000b24: a09517d3 flt.s a5,fa0,fs1 +80000b28: 00169073 fsflags a3 +80000b2c: 58057453 fsqrt.s fs0,fa0 +80000b30: 02079a63 bnez a5,80000b64 +80000b34: 00450513 addi a0,a0,4 +80000b38: 00872027 fsw fs0,0(a4) +80000b3c: fc9518e3 bne a0,s1,80000b0c +80000b40: 03c12083 lw ra,60(sp) +80000b44: 03812483 lw s1,56(sp) +80000b48: 03412903 lw s2,52(sp) +80000b4c: 03012983 lw s3,48(sp) +80000b50: 02c12a03 lw s4,44(sp) +80000b54: 01c12407 flw fs0,28(sp) +80000b58: 01812487 flw fs1,24(sp) +80000b5c: 04010113 addi sp,sp,64 +80000b60: 00008067 ret +80000b64: 00a12623 sw a0,12(sp) +80000b68: 18c000ef jal ra,80000cf4 +80000b6c: 00c12503 lw a0,12(sp) +80000b70: 00a907b3 add a5,s2,a0 +80000b74: 0087a027 fsw fs0,0(a5) +80000b78: 00450513 addi a0,a0,4 +80000b7c: f8a498e3 bne s1,a0,80000b0c +80000b80: fc1ff06f j 80000b40 -80000b7c <_exit>: -80000b7c: 00000513 li a0,0 -80000b80: 0005006b 0x5006b - -80000b84 : -80000b84: 02502573 csrr a0,0x25 +80000b84 <_exit>: +80000b84: 00000513 li a0,0 80000b88: 0005006b 0x5006b -80000b8c: 00002197 auipc gp,0x2 -80000b90: f0c18193 addi gp,gp,-244 # 80002a98 <__global_pointer$> -80000b94: 022025f3 csrr a1,0x22 -80000b98: 00a59593 slli a1,a1,0xa -80000b9c: 02002673 csrr a2,0x20 -80000ba0: 00261613 slli a2,a2,0x2 -80000ba4: 6ffff137 lui sp,0x6ffff -80000ba8: 40b10133 sub sp,sp,a1 -80000bac: 00c10133 add sp,sp,a2 -80000bb0: 021026f3 csrr a3,0x21 -80000bb4: 00068663 beqz a3,80000bc0 -80000bb8: 00000513 li a0,0 -80000bbc: 0005006b 0x5006b -80000bc0 : -80000bc0: 00008067 ret +80000b8c : +80000b8c: 02502573 csrr a0,0x25 +80000b90: 0005006b 0x5006b +80000b94: 00002197 auipc gp,0x2 +80000b98: f0418193 addi gp,gp,-252 # 80002a98 <__global_pointer$> +80000b9c: 022025f3 csrr a1,0x22 +80000ba0: 00a59593 slli a1,a1,0xa +80000ba4: 02002673 csrr a2,0x20 +80000ba8: 00261613 slli a2,a2,0x2 +80000bac: 6ffff137 lui sp,0x6ffff +80000bb0: 40b10133 sub sp,sp,a1 +80000bb4: 00c10133 add sp,sp,a2 +80000bb8: 021026f3 csrr a3,0x21 +80000bbc: 00068663 beqz a3,80000bc8 +80000bc0: 00000513 li a0,0 +80000bc4: 0005006b 0x5006b -80000bc4 : -80000bc4: 00b5106b 0xb5106b +80000bc8 : 80000bc8: 00008067 ret -80000bcc : -80000bcc: 0005006b 0x5006b +80000bcc : +80000bcc: 00b5106b 0xb5106b 80000bd0: 00008067 ret -80000bd4 : -80000bd4: 00b5406b 0xb5406b +80000bd4 : +80000bd4: 0005006b 0x5006b 80000bd8: 00008067 ret -80000bdc : -80000bdc: 0005206b 0x5206b +80000bdc : +80000bdc: 00b5406b 0xb5406b 80000be0: 00008067 ret -80000be4 : -80000be4: 0000306b 0x306b +80000be4 : +80000be4: 0005206b 0x5206b 80000be8: 00008067 ret -80000bec : -80000bec: 02102573 csrr a0,0x21 +80000bec : +80000bec: 0000306b 0x306b 80000bf0: 00008067 ret -80000bf4 : -80000bf4: 02302573 csrr a0,0x23 +80000bf4 : +80000bf4: 02102573 csrr a0,0x21 80000bf8: 00008067 ret -80000bfc : -80000bfc: 02002573 csrr a0,0x20 +80000bfc : +80000bfc: 02302573 csrr a0,0x23 80000c00: 00008067 ret -80000c04 : -80000c04: 02202573 csrr a0,0x22 +80000c04 : +80000c04: 02002573 csrr a0,0x20 80000c08: 00008067 ret -80000c0c : -80000c0c: 02402573 csrr a0,0x24 +80000c0c : +80000c0c: 02202573 csrr a0,0x22 80000c10: 00008067 ret -80000c14 : -80000c14: 02502573 csrr a0,0x25 +80000c14 : +80000c14: 02402573 csrr a0,0x24 80000c18: 00008067 ret -80000c1c : -80000c1c: 02602573 csrr a0,0x26 +80000c1c : +80000c1c: 02502573 csrr a0,0x25 80000c20: 00008067 ret -80000c24 : -80000c24: 02702573 csrr a0,0x27 +80000c24 : +80000c24: 02602573 csrr a0,0x26 80000c28: 00008067 ret -80000c2c : -80000c2c: b0002573 csrr a0,mcycle +80000c2c : +80000c2c: 02702573 csrr a0,0x27 80000c30: 00008067 ret -80000c34 : -80000c34: b0202573 csrr a0,minstret +80000c34 : +80000c34: b0002573 csrr a0,mcycle 80000c38: 00008067 ret -80000c3c : -80000c3c: ff010113 addi sp,sp,-16 # 6fffeff0 <_start-0x10001010> -80000c40: 00812423 sw s0,8(sp) -80000c44: c301a783 lw a5,-976(gp) # 800026c8 -80000c48: 00112623 sw ra,12(sp) -80000c4c: 0087a503 lw a0,8(a5) -80000c50: f7dff0ef jal ra,80000bcc -80000c54: c301a783 lw a5,-976(gp) # 800026c8 -80000c58: 0047a503 lw a0,4(a5) -80000c5c: 0007a783 lw a5,0(a5) -80000c60: 000780e7 jalr a5 -80000c64: f89ff0ef jal ra,80000bec -80000c68: 00812403 lw s0,8(sp) -80000c6c: 00c12083 lw ra,12(sp) -80000c70: 00153513 seqz a0,a0 -80000c74: 01010113 addi sp,sp,16 -80000c78: f55ff06f j 80000bcc +80000c3c : +80000c3c: b0202573 csrr a0,minstret +80000c40: 00008067 ret -80000c7c : -80000c7c: fe010113 addi sp,sp,-32 -80000c80: 00410793 addi a5,sp,4 -80000c84: 00812c23 sw s0,24(sp) -80000c88: c2f1a823 sw a5,-976(gp) # 800026c8 -80000c8c: 00112e23 sw ra,28(sp) -80000c90: 00c12223 sw a2,4(sp) -80000c94: 00d12423 sw a3,8(sp) -80000c98: 00b12623 sw a1,12(sp) -80000c9c: 00100793 li a5,1 -80000ca0: 00a7dc63 bge a5,a0,80000cb8 -80000ca4: 800015b7 lui a1,0x80001 -80000ca8: c3c58593 addi a1,a1,-964 # 80000c3c <__global_pointer$+0xffffe1a4> -80000cac: f19ff0ef jal ra,80000bc4 -80000cb0: c301a783 lw a5,-976(gp) # 800026c8 -80000cb4: 0087a583 lw a1,8(a5) -80000cb8: 00058513 mv a0,a1 -80000cbc: f11ff0ef jal ra,80000bcc -80000cc0: c301a783 lw a5,-976(gp) # 800026c8 -80000cc4: 0047a503 lw a0,4(a5) -80000cc8: 0007a783 lw a5,0(a5) -80000ccc: 000780e7 jalr a5 -80000cd0: f1dff0ef jal ra,80000bec -80000cd4: 00153513 seqz a0,a0 -80000cd8: ef5ff0ef jal ra,80000bcc -80000cdc: 01c12083 lw ra,28(sp) -80000ce0: 01812403 lw s0,24(sp) -80000ce4: 02010113 addi sp,sp,32 -80000ce8: 00008067 ret +80000c44 : +80000c44: ff010113 addi sp,sp,-16 # 6fffeff0 <_start-0x10001010> +80000c48: 00812423 sw s0,8(sp) +80000c4c: c301a783 lw a5,-976(gp) # 800026c8 +80000c50: 00112623 sw ra,12(sp) +80000c54: 0087a503 lw a0,8(a5) +80000c58: f7dff0ef jal ra,80000bd4 +80000c5c: c301a783 lw a5,-976(gp) # 800026c8 +80000c60: 0047a503 lw a0,4(a5) +80000c64: 0007a783 lw a5,0(a5) +80000c68: 000780e7 jalr a5 +80000c6c: f89ff0ef jal ra,80000bf4 +80000c70: 00812403 lw s0,8(sp) +80000c74: 00c12083 lw ra,12(sp) +80000c78: 00153513 seqz a0,a0 +80000c7c: 01010113 addi sp,sp,16 +80000c80: f55ff06f j 80000bd4 -80000cec : -80000cec: fe010113 addi sp,sp,-32 -80000cf0: 00812627 fsw fs0,12(sp) -80000cf4: 00112e23 sw ra,28(sp) -80000cf8: 20a50453 fmv.s fs0,fa0 -80000cfc: 00912427 fsw fs1,8(sp) -80000d00: 05c000ef jal ra,80000d5c <__ieee754_sqrtf> -80000d04: c341a703 lw a4,-972(gp) # 800026cc <__fdlib_version> -80000d08: fff00793 li a5,-1 -80000d0c: 00f70c63 beq a4,a5,80000d24 -80000d10: a08427d3 feq.s a5,fs0,fs0 -80000d14: 00078863 beqz a5,80000d24 -80000d18: f00004d3 fmv.w.x fs1,zero -80000d1c: a09417d3 flt.s a5,fs0,fs1 -80000d20: 00079c63 bnez a5,80000d38 -80000d24: 01c12083 lw ra,28(sp) -80000d28: 00c12407 flw fs0,12(sp) -80000d2c: 00812487 flw fs1,8(sp) -80000d30: 02010113 addi sp,sp,32 -80000d34: 00008067 ret -80000d38: 148000ef jal ra,80000e80 <__errno> -80000d3c: 01c12083 lw ra,28(sp) -80000d40: 02100793 li a5,33 -80000d44: 00f52023 sw a5,0(a0) -80000d48: 1894f553 fdiv.s fa0,fs1,fs1 -80000d4c: 00c12407 flw fs0,12(sp) -80000d50: 00812487 flw fs1,8(sp) -80000d54: 02010113 addi sp,sp,32 -80000d58: 00008067 ret +80000c84 : +80000c84: fe010113 addi sp,sp,-32 +80000c88: 00410793 addi a5,sp,4 +80000c8c: 00812c23 sw s0,24(sp) +80000c90: c2f1a823 sw a5,-976(gp) # 800026c8 +80000c94: 00112e23 sw ra,28(sp) +80000c98: 00c12223 sw a2,4(sp) +80000c9c: 00d12423 sw a3,8(sp) +80000ca0: 00b12623 sw a1,12(sp) +80000ca4: 00100793 li a5,1 +80000ca8: 00a7dc63 bge a5,a0,80000cc0 +80000cac: 800015b7 lui a1,0x80001 +80000cb0: c4458593 addi a1,a1,-956 # 80000c44 <__global_pointer$+0xffffe1ac> +80000cb4: f19ff0ef jal ra,80000bcc +80000cb8: c301a783 lw a5,-976(gp) # 800026c8 +80000cbc: 0087a583 lw a1,8(a5) +80000cc0: 00058513 mv a0,a1 +80000cc4: f11ff0ef jal ra,80000bd4 +80000cc8: c301a783 lw a5,-976(gp) # 800026c8 +80000ccc: 0047a503 lw a0,4(a5) +80000cd0: 0007a783 lw a5,0(a5) +80000cd4: 000780e7 jalr a5 +80000cd8: f1dff0ef jal ra,80000bf4 +80000cdc: 00153513 seqz a0,a0 +80000ce0: ef5ff0ef jal ra,80000bd4 +80000ce4: 01c12083 lw ra,28(sp) +80000ce8: 01812403 lw s0,24(sp) +80000cec: 02010113 addi sp,sp,32 +80000cf0: 00008067 ret -80000d5c <__ieee754_sqrtf>: -80000d5c: e00506d3 fmv.x.w a3,fa0 -80000d60: 7f800737 lui a4,0x7f800 -80000d64: 00169793 slli a5,a3,0x1 -80000d68: 0017d793 srli a5,a5,0x1 -80000d6c: 0ce7f463 bgeu a5,a4,80000e34 <__ieee754_sqrtf+0xd8> -80000d70: e0050553 fmv.x.w a0,fa0 -80000d74: 0a078c63 beqz a5,80000e2c <__ieee754_sqrtf+0xd0> -80000d78: 00068793 mv a5,a3 -80000d7c: 0c06c863 bltz a3,80000e4c <__ieee754_sqrtf+0xf0> -80000d80: 00d77633 and a2,a4,a3 -80000d84: 4176d713 srai a4,a3,0x17 -80000d88: 02061663 bnez a2,80000db4 <__ieee754_sqrtf+0x58> -80000d8c: 00800637 lui a2,0x800 -80000d90: 00d676b3 and a3,a2,a3 -80000d94: 0c069663 bnez a3,80000e60 <__ieee754_sqrtf+0x104> -80000d98: 00800537 lui a0,0x800 -80000d9c: 00179793 slli a5,a5,0x1 -80000da0: 00a7f633 and a2,a5,a0 -80000da4: 00068593 mv a1,a3 -80000da8: 00168693 addi a3,a3,1 -80000dac: fe0608e3 beqz a2,80000d9c <__ieee754_sqrtf+0x40> -80000db0: 40b70733 sub a4,a4,a1 -80000db4: 008006b7 lui a3,0x800 -80000db8: fff68613 addi a2,a3,-1 # 7fffff <_start-0x7f800001> -80000dbc: 00c7f7b3 and a5,a5,a2 -80000dc0: f8170713 addi a4,a4,-127 # 7f7fff81 <_start-0x80007f> -80000dc4: 00d7e6b3 or a3,a5,a3 -80000dc8: 00177613 andi a2,a4,1 -80000dcc: 00169793 slli a5,a3,0x1 -80000dd0: 06061a63 bnez a2,80000e44 <__ieee754_sqrtf+0xe8> -80000dd4: 40175813 srai a6,a4,0x1 -80000dd8: 01900693 li a3,25 -80000ddc: 00000513 li a0,0 -80000de0: 00000593 li a1,0 -80000de4: 01000737 lui a4,0x1000 -80000de8: 00e58633 add a2,a1,a4 -80000dec: fff68693 addi a3,a3,-1 -80000df0: 00c7c863 blt a5,a2,80000e00 <__ieee754_sqrtf+0xa4> -80000df4: 00e605b3 add a1,a2,a4 -80000df8: 40c787b3 sub a5,a5,a2 -80000dfc: 00e50533 add a0,a0,a4 -80000e00: 00179793 slli a5,a5,0x1 -80000e04: 00175713 srli a4,a4,0x1 -80000e08: fe0690e3 bnez a3,80000de8 <__ieee754_sqrtf+0x8c> -80000e0c: 00078663 beqz a5,80000e18 <__ieee754_sqrtf+0xbc> -80000e10: 00150513 addi a0,a0,1 # 800001 <_start-0x7f7fffff> -80000e14: ffe57513 andi a0,a0,-2 -80000e18: 40155513 srai a0,a0,0x1 -80000e1c: 3f0007b7 lui a5,0x3f000 -80000e20: 00f50533 add a0,a0,a5 -80000e24: 01781713 slli a4,a6,0x17 -80000e28: 00a70533 add a0,a4,a0 -80000e2c: f0050553 fmv.w.x fa0,a0 -80000e30: 00008067 ret -80000e34: 50a577c3 fmadd.s fa5,fa0,fa0,fa0 -80000e38: e0078553 fmv.x.w a0,fa5 -80000e3c: f0050553 fmv.w.x fa0,a0 -80000e40: 00008067 ret -80000e44: 00269793 slli a5,a3,0x2 -80000e48: f8dff06f j 80000dd4 <__ieee754_sqrtf+0x78> -80000e4c: 08a577d3 fsub.s fa5,fa0,fa0 -80000e50: 18f7f7d3 fdiv.s fa5,fa5,fa5 -80000e54: e0078553 fmv.x.w a0,fa5 -80000e58: f0050553 fmv.w.x fa0,a0 -80000e5c: 00008067 ret -80000e60: fff00593 li a1,-1 -80000e64: 40b70733 sub a4,a4,a1 -80000e68: f4dff06f j 80000db4 <__ieee754_sqrtf+0x58> +80000cf4 : +80000cf4: fe010113 addi sp,sp,-32 +80000cf8: 00812627 fsw fs0,12(sp) +80000cfc: 00112e23 sw ra,28(sp) +80000d00: 20a50453 fmv.s fs0,fa0 +80000d04: 00912427 fsw fs1,8(sp) +80000d08: 05c000ef jal ra,80000d64 <__ieee754_sqrtf> +80000d0c: c341a703 lw a4,-972(gp) # 800026cc <__fdlib_version> +80000d10: fff00793 li a5,-1 +80000d14: 00f70c63 beq a4,a5,80000d2c +80000d18: a08427d3 feq.s a5,fs0,fs0 +80000d1c: 00078863 beqz a5,80000d2c +80000d20: f00004d3 fmv.w.x fs1,zero +80000d24: a09417d3 flt.s a5,fs0,fs1 +80000d28: 00079c63 bnez a5,80000d40 +80000d2c: 01c12083 lw ra,28(sp) +80000d30: 00c12407 flw fs0,12(sp) +80000d34: 00812487 flw fs1,8(sp) +80000d38: 02010113 addi sp,sp,32 +80000d3c: 00008067 ret +80000d40: 144000ef jal ra,80000e84 <__errno> +80000d44: 01c12083 lw ra,28(sp) +80000d48: 02100793 li a5,33 +80000d4c: 00f52023 sw a5,0(a0) +80000d50: 1894f553 fdiv.s fa0,fs1,fs1 +80000d54: 00c12407 flw fs0,12(sp) +80000d58: 00812487 flw fs1,8(sp) +80000d5c: 02010113 addi sp,sp,32 +80000d60: 00008067 ret -80000e6c : -80000e6c: 00050593 mv a1,a0 -80000e70: 00000693 li a3,0 -80000e74: 00000613 li a2,0 -80000e78: 00000513 li a0,0 -80000e7c: 2100006f j 8000108c <__register_exitproc> +80000d64 <__ieee754_sqrtf>: +80000d64: e00506d3 fmv.x.w a3,fa0 +80000d68: 7f800737 lui a4,0x7f800 +80000d6c: 00169793 slli a5,a3,0x1 +80000d70: 0017d793 srli a5,a5,0x1 +80000d74: 0ce7f263 bgeu a5,a4,80000e38 <__ieee754_sqrtf+0xd4> +80000d78: e0050553 fmv.x.w a0,fa0 +80000d7c: 0a078a63 beqz a5,80000e30 <__ieee754_sqrtf+0xcc> +80000d80: 00068793 mv a5,a3 +80000d84: 0c06c663 bltz a3,80000e50 <__ieee754_sqrtf+0xec> +80000d88: 00d77633 and a2,a4,a3 +80000d8c: 4176d713 srai a4,a3,0x17 +80000d90: 02061463 bnez a2,80000db8 <__ieee754_sqrtf+0x54> +80000d94: 00800637 lui a2,0x800 +80000d98: 00d676b3 and a3,a2,a3 +80000d9c: 0c069463 bnez a3,80000e64 <__ieee754_sqrtf+0x100> +80000da0: 00179793 slli a5,a5,0x1 +80000da4: 00879593 slli a1,a5,0x8 +80000da8: 00068613 mv a2,a3 +80000dac: 00168693 addi a3,a3,1 +80000db0: fe05d8e3 bgez a1,80000da0 <__ieee754_sqrtf+0x3c> +80000db4: 40c70733 sub a4,a4,a2 +80000db8: 008006b7 lui a3,0x800 +80000dbc: fff68613 addi a2,a3,-1 # 7fffff <_start-0x7f800001> +80000dc0: 00c7f7b3 and a5,a5,a2 +80000dc4: f8170713 addi a4,a4,-127 # 7f7fff81 <_start-0x80007f> +80000dc8: 00d7e6b3 or a3,a5,a3 +80000dcc: 00177613 andi a2,a4,1 +80000dd0: 00169793 slli a5,a3,0x1 +80000dd4: 06061a63 bnez a2,80000e48 <__ieee754_sqrtf+0xe4> +80000dd8: 40175813 srai a6,a4,0x1 +80000ddc: 01900693 li a3,25 +80000de0: 00000513 li a0,0 +80000de4: 00000593 li a1,0 +80000de8: 01000737 lui a4,0x1000 +80000dec: 00e58633 add a2,a1,a4 +80000df0: fff68693 addi a3,a3,-1 +80000df4: 00c7c863 blt a5,a2,80000e04 <__ieee754_sqrtf+0xa0> +80000df8: 00e605b3 add a1,a2,a4 +80000dfc: 40c787b3 sub a5,a5,a2 +80000e00: 00e50533 add a0,a0,a4 +80000e04: 00179793 slli a5,a5,0x1 +80000e08: 00175713 srli a4,a4,0x1 +80000e0c: fe0690e3 bnez a3,80000dec <__ieee754_sqrtf+0x88> +80000e10: 00078663 beqz a5,80000e1c <__ieee754_sqrtf+0xb8> +80000e14: 00150513 addi a0,a0,1 +80000e18: ffe57513 andi a0,a0,-2 +80000e1c: 40155513 srai a0,a0,0x1 +80000e20: 3f0007b7 lui a5,0x3f000 +80000e24: 00f50533 add a0,a0,a5 +80000e28: 01781713 slli a4,a6,0x17 +80000e2c: 00a70533 add a0,a4,a0 +80000e30: f0050553 fmv.w.x fa0,a0 +80000e34: 00008067 ret +80000e38: 50a577c3 fmadd.s fa5,fa0,fa0,fa0 +80000e3c: e0078553 fmv.x.w a0,fa5 +80000e40: f0050553 fmv.w.x fa0,a0 +80000e44: 00008067 ret +80000e48: 00269793 slli a5,a3,0x2 +80000e4c: f8dff06f j 80000dd8 <__ieee754_sqrtf+0x74> +80000e50: 08a577d3 fsub.s fa5,fa0,fa0 +80000e54: 18f7f7d3 fdiv.s fa5,fa5,fa5 +80000e58: e0078553 fmv.x.w a0,fa5 +80000e5c: f0050553 fmv.w.x fa0,a0 +80000e60: 00008067 ret +80000e64: fff00613 li a2,-1 +80000e68: 40c70733 sub a4,a4,a2 +80000e6c: f4dff06f j 80000db8 <__ieee754_sqrtf+0x54> -80000e80 <__errno>: -80000e80: c2c1a503 lw a0,-980(gp) # 800026c4 <_impure_ptr> -80000e84: 00008067 ret +80000e70 : +80000e70: 00050593 mv a1,a0 +80000e74: 00000693 li a3,0 +80000e78: 00000613 li a2,0 +80000e7c: 00000513 li a0,0 +80000e80: 2100006f j 80001090 <__register_exitproc> -80000e88 : -80000e88: ff010113 addi sp,sp,-16 -80000e8c: 00000593 li a1,0 -80000e90: 00812423 sw s0,8(sp) -80000e94: 00112623 sw ra,12(sp) -80000e98: 00050413 mv s0,a0 -80000e9c: 288000ef jal ra,80001124 <__call_exitprocs> -80000ea0: c281a503 lw a0,-984(gp) # 800026c0 <_global_impure_ptr> -80000ea4: 03c52783 lw a5,60(a0) -80000ea8: 00078463 beqz a5,80000eb0 -80000eac: 000780e7 jalr a5 # 3f000000 <_start-0x41000000> -80000eb0: 00040513 mv a0,s0 -80000eb4: cc9ff0ef jal ra,80000b7c <_exit> +80000e84 <__errno>: +80000e84: c2c1a503 lw a0,-980(gp) # 800026c4 <_impure_ptr> +80000e88: 00008067 ret -80000eb8 <__libc_fini_array>: -80000eb8: ff010113 addi sp,sp,-16 -80000ebc: 00812423 sw s0,8(sp) -80000ec0: 800027b7 lui a5,0x80002 -80000ec4: 80002437 lui s0,0x80002 -80000ec8: 29440413 addi s0,s0,660 # 80002294 <__global_pointer$+0xfffff7fc> -80000ecc: 29478793 addi a5,a5,660 # 80002294 <__global_pointer$+0xfffff7fc> -80000ed0: 408787b3 sub a5,a5,s0 -80000ed4: 00912223 sw s1,4(sp) -80000ed8: 00112623 sw ra,12(sp) -80000edc: 4027d493 srai s1,a5,0x2 -80000ee0: 02048063 beqz s1,80000f00 <__libc_fini_array+0x48> -80000ee4: ffc78793 addi a5,a5,-4 -80000ee8: 00878433 add s0,a5,s0 -80000eec: 00042783 lw a5,0(s0) -80000ef0: fff48493 addi s1,s1,-1 -80000ef4: ffc40413 addi s0,s0,-4 -80000ef8: 000780e7 jalr a5 -80000efc: fe0498e3 bnez s1,80000eec <__libc_fini_array+0x34> -80000f00: 00c12083 lw ra,12(sp) -80000f04: 00812403 lw s0,8(sp) -80000f08: 00412483 lw s1,4(sp) -80000f0c: 01010113 addi sp,sp,16 -80000f10: 00008067 ret +80000e8c : +80000e8c: ff010113 addi sp,sp,-16 +80000e90: 00000593 li a1,0 +80000e94: 00812423 sw s0,8(sp) +80000e98: 00112623 sw ra,12(sp) +80000e9c: 00050413 mv s0,a0 +80000ea0: 288000ef jal ra,80001128 <__call_exitprocs> +80000ea4: c281a503 lw a0,-984(gp) # 800026c0 <_global_impure_ptr> +80000ea8: 03c52783 lw a5,60(a0) +80000eac: 00078463 beqz a5,80000eb4 +80000eb0: 000780e7 jalr a5 # 3f000000 <_start-0x41000000> +80000eb4: 00040513 mv a0,s0 +80000eb8: ccdff0ef jal ra,80000b84 <_exit> -80000f14 <__libc_init_array>: -80000f14: ff010113 addi sp,sp,-16 -80000f18: 00812423 sw s0,8(sp) -80000f1c: 01212023 sw s2,0(sp) -80000f20: 80002437 lui s0,0x80002 -80000f24: 80002937 lui s2,0x80002 -80000f28: 29040793 addi a5,s0,656 # 80002290 <__global_pointer$+0xfffff7f8> -80000f2c: 29090913 addi s2,s2,656 # 80002290 <__global_pointer$+0xfffff7f8> -80000f30: 40f90933 sub s2,s2,a5 -80000f34: 00112623 sw ra,12(sp) -80000f38: 00912223 sw s1,4(sp) -80000f3c: 40295913 srai s2,s2,0x2 -80000f40: 02090063 beqz s2,80000f60 <__libc_init_array+0x4c> -80000f44: 29040413 addi s0,s0,656 -80000f48: 00000493 li s1,0 -80000f4c: 00042783 lw a5,0(s0) -80000f50: 00148493 addi s1,s1,1 -80000f54: 00440413 addi s0,s0,4 -80000f58: 000780e7 jalr a5 -80000f5c: fe9918e3 bne s2,s1,80000f4c <__libc_init_array+0x38> -80000f60: 80002437 lui s0,0x80002 -80000f64: 80002937 lui s2,0x80002 -80000f68: 29040793 addi a5,s0,656 # 80002290 <__global_pointer$+0xfffff7f8> -80000f6c: 29490913 addi s2,s2,660 # 80002294 <__global_pointer$+0xfffff7fc> -80000f70: 40f90933 sub s2,s2,a5 -80000f74: 40295913 srai s2,s2,0x2 -80000f78: 02090063 beqz s2,80000f98 <__libc_init_array+0x84> -80000f7c: 29040413 addi s0,s0,656 -80000f80: 00000493 li s1,0 -80000f84: 00042783 lw a5,0(s0) -80000f88: 00148493 addi s1,s1,1 -80000f8c: 00440413 addi s0,s0,4 -80000f90: 000780e7 jalr a5 -80000f94: fe9918e3 bne s2,s1,80000f84 <__libc_init_array+0x70> -80000f98: 00c12083 lw ra,12(sp) -80000f9c: 00812403 lw s0,8(sp) -80000fa0: 00412483 lw s1,4(sp) -80000fa4: 00012903 lw s2,0(sp) -80000fa8: 01010113 addi sp,sp,16 -80000fac: 00008067 ret +80000ebc <__libc_fini_array>: +80000ebc: ff010113 addi sp,sp,-16 +80000ec0: 00812423 sw s0,8(sp) +80000ec4: 800027b7 lui a5,0x80002 +80000ec8: 80002437 lui s0,0x80002 +80000ecc: 29840413 addi s0,s0,664 # 80002298 <__global_pointer$+0xfffff800> +80000ed0: 29878793 addi a5,a5,664 # 80002298 <__global_pointer$+0xfffff800> +80000ed4: 408787b3 sub a5,a5,s0 +80000ed8: 00912223 sw s1,4(sp) +80000edc: 00112623 sw ra,12(sp) +80000ee0: 4027d493 srai s1,a5,0x2 +80000ee4: 02048063 beqz s1,80000f04 <__libc_fini_array+0x48> +80000ee8: ffc78793 addi a5,a5,-4 +80000eec: 00878433 add s0,a5,s0 +80000ef0: 00042783 lw a5,0(s0) +80000ef4: fff48493 addi s1,s1,-1 +80000ef8: ffc40413 addi s0,s0,-4 +80000efc: 000780e7 jalr a5 +80000f00: fe0498e3 bnez s1,80000ef0 <__libc_fini_array+0x34> +80000f04: 00c12083 lw ra,12(sp) +80000f08: 00812403 lw s0,8(sp) +80000f0c: 00412483 lw s1,4(sp) +80000f10: 01010113 addi sp,sp,16 +80000f14: 00008067 ret -80000fb0 : -80000fb0: 00f00313 li t1,15 -80000fb4: 00050713 mv a4,a0 -80000fb8: 02c37e63 bgeu t1,a2,80000ff4 -80000fbc: 00f77793 andi a5,a4,15 -80000fc0: 0a079063 bnez a5,80001060 -80000fc4: 08059263 bnez a1,80001048 -80000fc8: ff067693 andi a3,a2,-16 -80000fcc: 00f67613 andi a2,a2,15 -80000fd0: 00e686b3 add a3,a3,a4 -80000fd4: 00b72023 sw a1,0(a4) # 1000000 <_start-0x7f000000> -80000fd8: 00b72223 sw a1,4(a4) -80000fdc: 00b72423 sw a1,8(a4) -80000fe0: 00b72623 sw a1,12(a4) -80000fe4: 01070713 addi a4,a4,16 -80000fe8: fed766e3 bltu a4,a3,80000fd4 -80000fec: 00061463 bnez a2,80000ff4 -80000ff0: 00008067 ret -80000ff4: 40c306b3 sub a3,t1,a2 -80000ff8: 00269693 slli a3,a3,0x2 -80000ffc: 00000297 auipc t0,0x0 -80001000: 005686b3 add a3,a3,t0 -80001004: 00c68067 jr 12(a3) -80001008: 00b70723 sb a1,14(a4) -8000100c: 00b706a3 sb a1,13(a4) -80001010: 00b70623 sb a1,12(a4) -80001014: 00b705a3 sb a1,11(a4) -80001018: 00b70523 sb a1,10(a4) -8000101c: 00b704a3 sb a1,9(a4) -80001020: 00b70423 sb a1,8(a4) -80001024: 00b703a3 sb a1,7(a4) -80001028: 00b70323 sb a1,6(a4) -8000102c: 00b702a3 sb a1,5(a4) -80001030: 00b70223 sb a1,4(a4) -80001034: 00b701a3 sb a1,3(a4) -80001038: 00b70123 sb a1,2(a4) -8000103c: 00b700a3 sb a1,1(a4) -80001040: 00b70023 sb a1,0(a4) -80001044: 00008067 ret -80001048: 0ff5f593 andi a1,a1,255 -8000104c: 00859693 slli a3,a1,0x8 -80001050: 00d5e5b3 or a1,a1,a3 -80001054: 01059693 slli a3,a1,0x10 -80001058: 00d5e5b3 or a1,a1,a3 -8000105c: f6dff06f j 80000fc8 -80001060: 00279693 slli a3,a5,0x2 -80001064: 00000297 auipc t0,0x0 -80001068: 005686b3 add a3,a3,t0 -8000106c: 00008293 mv t0,ra -80001070: fa0680e7 jalr -96(a3) -80001074: 00028093 mv ra,t0 -80001078: ff078793 addi a5,a5,-16 -8000107c: 40f70733 sub a4,a4,a5 -80001080: 00f60633 add a2,a2,a5 -80001084: f6c378e3 bgeu t1,a2,80000ff4 -80001088: f3dff06f j 80000fc4 +80000f18 <__libc_init_array>: +80000f18: ff010113 addi sp,sp,-16 +80000f1c: 00812423 sw s0,8(sp) +80000f20: 01212023 sw s2,0(sp) +80000f24: 80002437 lui s0,0x80002 +80000f28: 80002937 lui s2,0x80002 +80000f2c: 29440793 addi a5,s0,660 # 80002294 <__global_pointer$+0xfffff7fc> +80000f30: 29490913 addi s2,s2,660 # 80002294 <__global_pointer$+0xfffff7fc> +80000f34: 40f90933 sub s2,s2,a5 +80000f38: 00112623 sw ra,12(sp) +80000f3c: 00912223 sw s1,4(sp) +80000f40: 40295913 srai s2,s2,0x2 +80000f44: 02090063 beqz s2,80000f64 <__libc_init_array+0x4c> +80000f48: 29440413 addi s0,s0,660 +80000f4c: 00000493 li s1,0 +80000f50: 00042783 lw a5,0(s0) +80000f54: 00148493 addi s1,s1,1 +80000f58: 00440413 addi s0,s0,4 +80000f5c: 000780e7 jalr a5 +80000f60: fe9918e3 bne s2,s1,80000f50 <__libc_init_array+0x38> +80000f64: 80002437 lui s0,0x80002 +80000f68: 80002937 lui s2,0x80002 +80000f6c: 29440793 addi a5,s0,660 # 80002294 <__global_pointer$+0xfffff7fc> +80000f70: 29890913 addi s2,s2,664 # 80002298 <__global_pointer$+0xfffff800> +80000f74: 40f90933 sub s2,s2,a5 +80000f78: 40295913 srai s2,s2,0x2 +80000f7c: 02090063 beqz s2,80000f9c <__libc_init_array+0x84> +80000f80: 29440413 addi s0,s0,660 +80000f84: 00000493 li s1,0 +80000f88: 00042783 lw a5,0(s0) +80000f8c: 00148493 addi s1,s1,1 +80000f90: 00440413 addi s0,s0,4 +80000f94: 000780e7 jalr a5 +80000f98: fe9918e3 bne s2,s1,80000f88 <__libc_init_array+0x70> +80000f9c: 00c12083 lw ra,12(sp) +80000fa0: 00812403 lw s0,8(sp) +80000fa4: 00412483 lw s1,4(sp) +80000fa8: 00012903 lw s2,0(sp) +80000fac: 01010113 addi sp,sp,16 +80000fb0: 00008067 ret -8000108c <__register_exitproc>: -8000108c: c281a703 lw a4,-984(gp) # 800026c0 <_global_impure_ptr> -80001090: 14872783 lw a5,328(a4) -80001094: 04078c63 beqz a5,800010ec <__register_exitproc+0x60> -80001098: 0047a703 lw a4,4(a5) -8000109c: 01f00813 li a6,31 -800010a0: 06e84e63 blt a6,a4,8000111c <__register_exitproc+0x90> -800010a4: 00271813 slli a6,a4,0x2 -800010a8: 02050663 beqz a0,800010d4 <__register_exitproc+0x48> -800010ac: 01078333 add t1,a5,a6 -800010b0: 08c32423 sw a2,136(t1) -800010b4: 1887a883 lw a7,392(a5) -800010b8: 00100613 li a2,1 -800010bc: 00e61633 sll a2,a2,a4 -800010c0: 00c8e8b3 or a7,a7,a2 -800010c4: 1917a423 sw a7,392(a5) -800010c8: 10d32423 sw a3,264(t1) -800010cc: 00200693 li a3,2 -800010d0: 02d50463 beq a0,a3,800010f8 <__register_exitproc+0x6c> -800010d4: 00170713 addi a4,a4,1 -800010d8: 00e7a223 sw a4,4(a5) -800010dc: 010787b3 add a5,a5,a6 -800010e0: 00b7a423 sw a1,8(a5) -800010e4: 00000513 li a0,0 -800010e8: 00008067 ret -800010ec: 14c70793 addi a5,a4,332 -800010f0: 14f72423 sw a5,328(a4) -800010f4: fa5ff06f j 80001098 <__register_exitproc+0xc> -800010f8: 18c7a683 lw a3,396(a5) -800010fc: 00170713 addi a4,a4,1 -80001100: 00e7a223 sw a4,4(a5) -80001104: 00c6e633 or a2,a3,a2 -80001108: 18c7a623 sw a2,396(a5) -8000110c: 010787b3 add a5,a5,a6 -80001110: 00b7a423 sw a1,8(a5) -80001114: 00000513 li a0,0 -80001118: 00008067 ret -8000111c: fff00513 li a0,-1 -80001120: 00008067 ret +80000fb4 : +80000fb4: 00f00313 li t1,15 +80000fb8: 00050713 mv a4,a0 +80000fbc: 02c37e63 bgeu t1,a2,80000ff8 +80000fc0: 00f77793 andi a5,a4,15 +80000fc4: 0a079063 bnez a5,80001064 +80000fc8: 08059263 bnez a1,8000104c +80000fcc: ff067693 andi a3,a2,-16 +80000fd0: 00f67613 andi a2,a2,15 +80000fd4: 00e686b3 add a3,a3,a4 +80000fd8: 00b72023 sw a1,0(a4) # 1000000 <_start-0x7f000000> +80000fdc: 00b72223 sw a1,4(a4) +80000fe0: 00b72423 sw a1,8(a4) +80000fe4: 00b72623 sw a1,12(a4) +80000fe8: 01070713 addi a4,a4,16 +80000fec: fed766e3 bltu a4,a3,80000fd8 +80000ff0: 00061463 bnez a2,80000ff8 +80000ff4: 00008067 ret +80000ff8: 40c306b3 sub a3,t1,a2 +80000ffc: 00269693 slli a3,a3,0x2 +80001000: 00000297 auipc t0,0x0 +80001004: 005686b3 add a3,a3,t0 +80001008: 00c68067 jr 12(a3) +8000100c: 00b70723 sb a1,14(a4) +80001010: 00b706a3 sb a1,13(a4) +80001014: 00b70623 sb a1,12(a4) +80001018: 00b705a3 sb a1,11(a4) +8000101c: 00b70523 sb a1,10(a4) +80001020: 00b704a3 sb a1,9(a4) +80001024: 00b70423 sb a1,8(a4) +80001028: 00b703a3 sb a1,7(a4) +8000102c: 00b70323 sb a1,6(a4) +80001030: 00b702a3 sb a1,5(a4) +80001034: 00b70223 sb a1,4(a4) +80001038: 00b701a3 sb a1,3(a4) +8000103c: 00b70123 sb a1,2(a4) +80001040: 00b700a3 sb a1,1(a4) +80001044: 00b70023 sb a1,0(a4) +80001048: 00008067 ret +8000104c: 0ff5f593 andi a1,a1,255 +80001050: 00859693 slli a3,a1,0x8 +80001054: 00d5e5b3 or a1,a1,a3 +80001058: 01059693 slli a3,a1,0x10 +8000105c: 00d5e5b3 or a1,a1,a3 +80001060: f6dff06f j 80000fcc +80001064: 00279693 slli a3,a5,0x2 +80001068: 00000297 auipc t0,0x0 +8000106c: 005686b3 add a3,a3,t0 +80001070: 00008293 mv t0,ra +80001074: fa0680e7 jalr -96(a3) +80001078: 00028093 mv ra,t0 +8000107c: ff078793 addi a5,a5,-16 +80001080: 40f70733 sub a4,a4,a5 +80001084: 00f60633 add a2,a2,a5 +80001088: f6c378e3 bgeu t1,a2,80000ff8 +8000108c: f3dff06f j 80000fc8 -80001124 <__call_exitprocs>: -80001124: fd010113 addi sp,sp,-48 -80001128: 01412c23 sw s4,24(sp) -8000112c: c281aa03 lw s4,-984(gp) # 800026c0 <_global_impure_ptr> -80001130: 03212023 sw s2,32(sp) -80001134: 02112623 sw ra,44(sp) -80001138: 148a2903 lw s2,328(s4) -8000113c: 02812423 sw s0,40(sp) -80001140: 02912223 sw s1,36(sp) -80001144: 01312e23 sw s3,28(sp) -80001148: 01512a23 sw s5,20(sp) -8000114c: 01612823 sw s6,16(sp) -80001150: 01712623 sw s7,12(sp) -80001154: 01812423 sw s8,8(sp) -80001158: 04090063 beqz s2,80001198 <__call_exitprocs+0x74> -8000115c: 00050b13 mv s6,a0 -80001160: 00058b93 mv s7,a1 -80001164: 00100a93 li s5,1 -80001168: fff00993 li s3,-1 -8000116c: 00492483 lw s1,4(s2) -80001170: fff48413 addi s0,s1,-1 -80001174: 02044263 bltz s0,80001198 <__call_exitprocs+0x74> -80001178: 00249493 slli s1,s1,0x2 -8000117c: 009904b3 add s1,s2,s1 -80001180: 040b8463 beqz s7,800011c8 <__call_exitprocs+0xa4> -80001184: 1044a783 lw a5,260(s1) -80001188: 05778063 beq a5,s7,800011c8 <__call_exitprocs+0xa4> -8000118c: fff40413 addi s0,s0,-1 -80001190: ffc48493 addi s1,s1,-4 -80001194: ff3416e3 bne s0,s3,80001180 <__call_exitprocs+0x5c> -80001198: 02c12083 lw ra,44(sp) -8000119c: 02812403 lw s0,40(sp) -800011a0: 02412483 lw s1,36(sp) -800011a4: 02012903 lw s2,32(sp) -800011a8: 01c12983 lw s3,28(sp) -800011ac: 01812a03 lw s4,24(sp) -800011b0: 01412a83 lw s5,20(sp) -800011b4: 01012b03 lw s6,16(sp) -800011b8: 00c12b83 lw s7,12(sp) -800011bc: 00812c03 lw s8,8(sp) -800011c0: 03010113 addi sp,sp,48 -800011c4: 00008067 ret -800011c8: 00492783 lw a5,4(s2) -800011cc: 0044a683 lw a3,4(s1) -800011d0: fff78793 addi a5,a5,-1 -800011d4: 04878e63 beq a5,s0,80001230 <__call_exitprocs+0x10c> -800011d8: 0004a223 sw zero,4(s1) -800011dc: fa0688e3 beqz a3,8000118c <__call_exitprocs+0x68> -800011e0: 18892783 lw a5,392(s2) -800011e4: 008a9733 sll a4,s5,s0 -800011e8: 00492c03 lw s8,4(s2) -800011ec: 00f777b3 and a5,a4,a5 -800011f0: 02079263 bnez a5,80001214 <__call_exitprocs+0xf0> -800011f4: 000680e7 jalr a3 -800011f8: 00492703 lw a4,4(s2) -800011fc: 148a2783 lw a5,328(s4) -80001200: 01871463 bne a4,s8,80001208 <__call_exitprocs+0xe4> -80001204: f92784e3 beq a5,s2,8000118c <__call_exitprocs+0x68> -80001208: f80788e3 beqz a5,80001198 <__call_exitprocs+0x74> -8000120c: 00078913 mv s2,a5 -80001210: f5dff06f j 8000116c <__call_exitprocs+0x48> -80001214: 18c92783 lw a5,396(s2) -80001218: 0844a583 lw a1,132(s1) -8000121c: 00f77733 and a4,a4,a5 -80001220: 00071c63 bnez a4,80001238 <__call_exitprocs+0x114> -80001224: 000b0513 mv a0,s6 -80001228: 000680e7 jalr a3 -8000122c: fcdff06f j 800011f8 <__call_exitprocs+0xd4> -80001230: 00892223 sw s0,4(s2) -80001234: fa9ff06f j 800011dc <__call_exitprocs+0xb8> -80001238: 00058513 mv a0,a1 -8000123c: 000680e7 jalr a3 -80001240: fb9ff06f j 800011f8 <__call_exitprocs+0xd4> +80001090 <__register_exitproc>: +80001090: c281a703 lw a4,-984(gp) # 800026c0 <_global_impure_ptr> +80001094: 14872783 lw a5,328(a4) +80001098: 04078c63 beqz a5,800010f0 <__register_exitproc+0x60> +8000109c: 0047a703 lw a4,4(a5) +800010a0: 01f00813 li a6,31 +800010a4: 06e84e63 blt a6,a4,80001120 <__register_exitproc+0x90> +800010a8: 00271813 slli a6,a4,0x2 +800010ac: 02050663 beqz a0,800010d8 <__register_exitproc+0x48> +800010b0: 01078333 add t1,a5,a6 +800010b4: 08c32423 sw a2,136(t1) +800010b8: 1887a883 lw a7,392(a5) +800010bc: 00100613 li a2,1 +800010c0: 00e61633 sll a2,a2,a4 +800010c4: 00c8e8b3 or a7,a7,a2 +800010c8: 1917a423 sw a7,392(a5) +800010cc: 10d32423 sw a3,264(t1) +800010d0: 00200693 li a3,2 +800010d4: 02d50463 beq a0,a3,800010fc <__register_exitproc+0x6c> +800010d8: 00170713 addi a4,a4,1 +800010dc: 00e7a223 sw a4,4(a5) +800010e0: 010787b3 add a5,a5,a6 +800010e4: 00b7a423 sw a1,8(a5) +800010e8: 00000513 li a0,0 +800010ec: 00008067 ret +800010f0: 14c70793 addi a5,a4,332 +800010f4: 14f72423 sw a5,328(a4) +800010f8: fa5ff06f j 8000109c <__register_exitproc+0xc> +800010fc: 18c7a683 lw a3,396(a5) +80001100: 00170713 addi a4,a4,1 +80001104: 00e7a223 sw a4,4(a5) +80001108: 00c6e633 or a2,a3,a2 +8000110c: 18c7a623 sw a2,396(a5) +80001110: 010787b3 add a5,a5,a6 +80001114: 00b7a423 sw a1,8(a5) +80001118: 00000513 li a0,0 +8000111c: 00008067 ret +80001120: fff00513 li a0,-1 +80001124: 00008067 ret + +80001128 <__call_exitprocs>: +80001128: fd010113 addi sp,sp,-48 +8000112c: 01412c23 sw s4,24(sp) +80001130: c281aa03 lw s4,-984(gp) # 800026c0 <_global_impure_ptr> +80001134: 03212023 sw s2,32(sp) +80001138: 02112623 sw ra,44(sp) +8000113c: 148a2903 lw s2,328(s4) +80001140: 02812423 sw s0,40(sp) +80001144: 02912223 sw s1,36(sp) +80001148: 01312e23 sw s3,28(sp) +8000114c: 01512a23 sw s5,20(sp) +80001150: 01612823 sw s6,16(sp) +80001154: 01712623 sw s7,12(sp) +80001158: 01812423 sw s8,8(sp) +8000115c: 04090063 beqz s2,8000119c <__call_exitprocs+0x74> +80001160: 00050b13 mv s6,a0 +80001164: 00058b93 mv s7,a1 +80001168: 00100a93 li s5,1 +8000116c: fff00993 li s3,-1 +80001170: 00492483 lw s1,4(s2) +80001174: fff48413 addi s0,s1,-1 +80001178: 02044263 bltz s0,8000119c <__call_exitprocs+0x74> +8000117c: 00249493 slli s1,s1,0x2 +80001180: 009904b3 add s1,s2,s1 +80001184: 040b8463 beqz s7,800011cc <__call_exitprocs+0xa4> +80001188: 1044a783 lw a5,260(s1) +8000118c: 05778063 beq a5,s7,800011cc <__call_exitprocs+0xa4> +80001190: fff40413 addi s0,s0,-1 +80001194: ffc48493 addi s1,s1,-4 +80001198: ff3416e3 bne s0,s3,80001184 <__call_exitprocs+0x5c> +8000119c: 02c12083 lw ra,44(sp) +800011a0: 02812403 lw s0,40(sp) +800011a4: 02412483 lw s1,36(sp) +800011a8: 02012903 lw s2,32(sp) +800011ac: 01c12983 lw s3,28(sp) +800011b0: 01812a03 lw s4,24(sp) +800011b4: 01412a83 lw s5,20(sp) +800011b8: 01012b03 lw s6,16(sp) +800011bc: 00c12b83 lw s7,12(sp) +800011c0: 00812c03 lw s8,8(sp) +800011c4: 03010113 addi sp,sp,48 +800011c8: 00008067 ret +800011cc: 00492783 lw a5,4(s2) +800011d0: 0044a683 lw a3,4(s1) +800011d4: fff78793 addi a5,a5,-1 +800011d8: 04878e63 beq a5,s0,80001234 <__call_exitprocs+0x10c> +800011dc: 0004a223 sw zero,4(s1) +800011e0: fa0688e3 beqz a3,80001190 <__call_exitprocs+0x68> +800011e4: 18892783 lw a5,392(s2) +800011e8: 008a9733 sll a4,s5,s0 +800011ec: 00492c03 lw s8,4(s2) +800011f0: 00f777b3 and a5,a4,a5 +800011f4: 02079263 bnez a5,80001218 <__call_exitprocs+0xf0> +800011f8: 000680e7 jalr a3 +800011fc: 00492703 lw a4,4(s2) +80001200: 148a2783 lw a5,328(s4) +80001204: 01871463 bne a4,s8,8000120c <__call_exitprocs+0xe4> +80001208: f8f904e3 beq s2,a5,80001190 <__call_exitprocs+0x68> +8000120c: f80788e3 beqz a5,8000119c <__call_exitprocs+0x74> +80001210: 00078913 mv s2,a5 +80001214: f5dff06f j 80001170 <__call_exitprocs+0x48> +80001218: 18c92783 lw a5,396(s2) +8000121c: 0844a583 lw a1,132(s1) +80001220: 00f77733 and a4,a4,a5 +80001224: 00071c63 bnez a4,8000123c <__call_exitprocs+0x114> +80001228: 000b0513 mv a0,s6 +8000122c: 000680e7 jalr a3 +80001230: fcdff06f j 800011fc <__call_exitprocs+0xd4> +80001234: 00892223 sw s0,4(s2) +80001238: fa9ff06f j 800011e0 <__call_exitprocs+0xb8> +8000123c: 00058513 mv a0,a1 +80001240: 000680e7 jalr a3 +80001244: fb9ff06f j 800011fc <__call_exitprocs+0xd4> Disassembly of section .rodata: -80001244 : -80001244: 00b0 addi a2,sp,72 -80001246: 8000 0x8000 -80001248: 013c addi a5,sp,136 +80001248 : +80001248: 00b0 addi a2,sp,72 8000124a: 8000 0x8000 -8000124c: 01c8 addi a0,sp,196 +8000124c: 013c addi a5,sp,136 8000124e: 8000 0x8000 -80001250: 0254 addi a3,sp,260 +80001250: 01c8 addi a0,sp,196 80001252: 8000 0x8000 -80001254: 02e8 addi a0,sp,332 +80001254: 0254 addi a3,sp,260 80001256: 8000 0x8000 -80001258: 0374 addi a3,sp,396 +80001258: 02e8 addi a0,sp,332 8000125a: 8000 0x8000 -8000125c: 0400 addi s0,sp,512 +8000125c: 0374 addi a3,sp,396 8000125e: 8000 0x8000 -80001260: 048c addi a1,sp,576 +80001260: 0400 addi s0,sp,512 80001262: 8000 0x8000 -80001264: 0518 addi a4,sp,640 +80001264: 048c addi a1,sp,576 80001266: 8000 0x8000 -80001268: 05a4 addi s1,sp,712 +80001268: 0518 addi a4,sp,640 8000126a: 8000 0x8000 -8000126c: 0630 addi a2,sp,776 +8000126c: 05a4 addi s1,sp,712 8000126e: 8000 0x8000 -80001270: 06bc addi a5,sp,840 +80001270: 0630 addi a2,sp,776 80001272: 8000 0x8000 -80001274: 0750 addi a2,sp,900 +80001274: 06bc addi a5,sp,840 80001276: 8000 0x8000 -80001278: 07dc addi a5,sp,964 +80001278: 0750 addi a2,sp,900 8000127a: 8000 0x8000 -8000127c: 0ab0 addi a2,sp,344 +8000127c: 07dc addi a5,sp,964 8000127e: 8000 0x8000 -80001280: 0870 addi a2,sp,28 +80001280: 0ab0 addi a2,sp,344 80001282: 8000 0x8000 -80001284: 0900 addi s0,sp,144 +80001284: 0870 addi a2,sp,28 80001286: 8000 0x8000 -80001288: 0990 addi a2,sp,208 +80001288: 0900 addi s0,sp,144 8000128a: 8000 0x8000 -8000128c: 0a20 addi s0,sp,280 +8000128c: 0990 addi a2,sp,208 8000128e: 8000 0x8000 +80001290: 0a20 addi s0,sp,280 +80001292: 8000 0x8000 Disassembly of section .init_array: -80002290 <__init_array_start>: -80002290: 0098 addi a4,sp,64 -80002292: 8000 0x8000 +80002294 <__init_array_start>: +80002294: 0098 addi a4,sp,64 +80002296: 8000 0x8000 Disassembly of section .data: @@ -1378,20 +1379,21 @@ Disassembly of section .comment: 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm 4: 2820 fld fs0,80(s0) 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm - a: 3120 fld fs0,96(a0) - c: 2e30 fld fa2,88(a2) - e: 2e32 fld ft8,264(sp) - 10: 0030 addi a2,sp,8 + a: 3920 fld fs0,112(a0) + c: 322e fld ft4,232(sp) + e: 302e fld ft0,232(sp) + ... Disassembly of section .riscv.attributes: 00000000 <.riscv.attributes>: - 0: 2941 jal 490 <_start-0x7ffffb70> + 0: 2541 jal 680 <_start-0x7ffff980> 2: 0000 unimp 4: 7200 flw fs0,32(a2) 6: 7369 lui t1,0xffffa 8: 01007663 bgeu zero,a6,14 <_start-0x7fffffec> - c: 001f 0000 1004 0x10040000001f + c: 0000001b 0x1b + 10: 1004 addi s1,sp,32 12: 7205 lui tp,0xfffe1 14: 3376 fld ft6,376(sp) 16: 6932 flw fs2,12(sp) @@ -1400,5 +1402,3 @@ Disassembly of section .riscv.attributes: 1c: 326d jal fffff9c6 <__global_pointer$+0x7fffcf2e> 1e: 3070 fld fa2,224(s0) 20: 665f 7032 0030 0x307032665f - 26: 0108 addi a0,sp,128 - 28: 0b0a slli s6,s6,0x2 diff --git a/driver/tests/dogfood/kernel.elf b/driver/tests/dogfood/kernel.elf index 81caa976..899a1092 100755 Binary files a/driver/tests/dogfood/kernel.elf and b/driver/tests/dogfood/kernel.elf differ diff --git a/runtime/src/vx_spawn.c b/runtime/src/vx_spawn.c index a215d1b3..50150b31 100644 --- a/runtime/src/vx_spawn.c +++ b/runtime/src/vx_spawn.c @@ -6,35 +6,61 @@ extern "C" { #endif +#define NUM_CORES_MAX 8 + typedef struct { func_t function; void * arguments; int nthreads; } spawn_t; -spawn_t* g_spawn = NULL; +spawn_t* g_spawn[NUM_CORES_MAX]; -void spawn_warp_runonce() { +void spawn_warp_all() { // active all threads - vx_tmc(g_spawn->nthreads); + int num_threads = vx_num_threads(); + vx_tmc(num_threads); + + int core_id = vx_core_id(); + spawn_t* p_spawn = g_spawn[core_id]; // call user routine - g_spawn->function(g_spawn->arguments); + p_spawn->function(p_spawn->arguments); - // resume single-thread execution on exit + // resume single-warp execution on exit + int wid = vx_warp_id(); + unsigned tmask = (0 == wid) ? 0x1 : 0x0; + vx_tmc(tmask); +} + +void spawn_warp_threads(int num_threads) { + // active all threads + vx_tmc(num_threads); + + int core_id = vx_core_id(); + spawn_t* p_spawn = g_spawn[core_id]; + + // call user routine + p_spawn->function(p_spawn->arguments); + + // resume single-warp execution on exit int wid = vx_warp_id(); unsigned tmask = (0 == wid) ? 0x1 : 0x0; vx_tmc(tmask); } void vx_spawn_warps(int num_warps, int num_threads, func_t func_ptr , void * args) { - spawn_t spawn = { func_ptr, args, num_threads }; - g_spawn = &spawn; + int core_id = vx_core_id(); + if (core_id >= NUM_CORES_MAX) + return; + + spawn_t spawn = { func_ptr, args, num_threads }; + g_spawn[core_id] = &spawn; if (num_warps > 1) { - vx_wspawn(num_warps, (unsigned)spawn_warp_runonce); + vx_wspawn(num_warps, (unsigned)spawn_warp_all); } - spawn_warp_runonce(); + spawn_warp_threads(num_threads); } #ifdef __cplusplus diff --git a/runtime/src/vx_start.S b/runtime/src/vx_start.S index 62eb8025..f24a83f6 100644 --- a/runtime/src/vx_start.S +++ b/runtime/src/vx_start.S @@ -4,24 +4,37 @@ .global _start .type _start, @function _start: + + # execute stack initialization on all warps la a1, vx_set_sp csrr a0, CSR_NW # get num warps - .word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN) + .word 0x00b5106b # wspawn a0, a1 jal vx_set_sp + + # return back to single thread execution li a0, 1 - .word 0x0005006b # back to single thread - # Initialize global pointerp - # call __cxx_global_var_init + .word 0x0005006b # tmc a0 + # Clear the bss segment la a0, _edata la a2, _end sub a2, a2, a0 li a1, 0 call memset - la a0, __libc_fini_array # Register global termination functions - call atexit # to be called upon exit - call __libc_init_array # Run global initialization functions + + # Register global termination functions + la a0, __libc_fini_array + + # to be called upon exit + call atexit + + # Run global initialization functions + call __libc_init_array + + # call main program routine call main + + # call exit routine tail exit .size _start, .-_start @@ -29,34 +42,39 @@ _start: .type _exit, @function .global _exit _exit: + # disable all threads in current warp li a0, 0 - .word 0x0005006b # disable all threads + .word 0x0005006b # tmc a0 .section .text .type vx_set_sp, @function .global vx_set_sp vx_set_sp: + # activate all threads csrr a0, CSR_NT # get num threads - .word 0x0005006b # activate all threads + .word 0x0005006b # set thread mask + # set global pointer register .option push .option norelax - 1:auipc gp, %pcrel_hi(__global_pointer$) - addi gp, gp, %pcrel_lo(1b) + la gp, __global_pointer$ .option pop + # allocate stack region for a threads on the processor + # set stack pointer csrr a1, CSR_GTID # get global thread id slli a1, a1, 10 # multiply by 1024 csrr a2, CSR_LTID # get local thread id slli a2, a2, 2 # multiply by 4 - lui sp, (SHARED_MEM_BASE_ADDR>>12) # load base sp + la sp, __stack_top$ # load stack base address sub sp, sp, a1 # sub thread block add sp, sp, a2 # reduce addr collision for perf - csrr a3, CSR_LWID # get wid + # disable active warps except warp0 + csrr a3, CSR_LWID # get local wid beqz a3, RETURN li a0, 0 - .word 0x0005006b # tmc 0 + .word 0x0005006b # tmc a0 RETURN: ret diff --git a/runtime/tests/dev/vx_dev_main.dump b/runtime/tests/dev/vx_dev_main.dump index 47005603..787fdb48 100644 --- a/runtime/tests/dev/vx_dev_main.dump +++ b/runtime/tests/dev/vx_dev_main.dump @@ -271,11 +271,11 @@ Disassembly of section .text: 80000374: 00008067 ret 80000378 : -80000378: c0002573 rdcycle a0 +80000378: b0002573 csrr a0,mcycle 8000037c: 00008067 ret 80000380 : -80000380: c0202573 rdinstret a0 +80000380: b0202573 csrr a0,minstret 80000384: 00008067 ret 80000388 : diff --git a/runtime/tests/dev/vx_dev_main.elf b/runtime/tests/dev/vx_dev_main.elf index a8b09e98..de978030 100755 Binary files a/runtime/tests/dev/vx_dev_main.elf and b/runtime/tests/dev/vx_dev_main.elf differ diff --git a/runtime/tests/dev/vx_dev_main.hex b/runtime/tests/dev/vx_dev_main.hex index 8a3c3809..a3a0c81d 100644 --- a/runtime/tests/dev/vx_dev_main.hex +++ b/runtime/tests/dev/vx_dev_main.hex @@ -55,7 +55,7 @@ :100348007325000267800000732520026780000083 :100358007325400267800000732550026780000003 :1003680073256002678000007325700267800000B3 -:10037800732500C067800000732520C067800000D7 +:10037800732500B067800000732520B067800000F7 :1003880063060520130101F52324810A232E310970 :100398002326110A2322910A2320210B232C410909 :1003A800232A510923286109232671099309050085 diff --git a/runtime/tests/nlTest/vx_nl_main.dump b/runtime/tests/nlTest/vx_nl_main.dump index 4ff45d35..6cb16f2a 100644 --- a/runtime/tests/nlTest/vx_nl_main.dump +++ b/runtime/tests/nlTest/vx_nl_main.dump @@ -132,11 +132,11 @@ Disassembly of section .text: 80000158: 00008067 ret 8000015c : -8000015c: c0002573 rdcycle a0 +8000015c: b0002573 csrr a0,mcycle 80000160: 00008067 ret 80000164 : -80000164: c0202573 rdinstret a0 +80000164: b0202573 csrr a0,minstret 80000168: 00008067 ret 8000016c : diff --git a/runtime/tests/nlTest/vx_nl_main.elf b/runtime/tests/nlTest/vx_nl_main.elf index a6291fe1..34b9aee6 100755 Binary files a/runtime/tests/nlTest/vx_nl_main.elf and b/runtime/tests/nlTest/vx_nl_main.elf differ diff --git a/runtime/tests/nlTest/vx_nl_main.hex b/runtime/tests/nlTest/vx_nl_main.hex index 7c2e61cc..2473deac 100644 --- a/runtime/tests/nlTest/vx_nl_main.hex +++ b/runtime/tests/nlTest/vx_nl_main.hex @@ -21,7 +21,7 @@ :1001280067800000732500026780000073252002A5 :100138006780000073254002678000007325500225 :1001480067800000732560026780000073257002D5 -:1001580067800000732500C067800000732520C0F9 +:1001580067800000732500B067800000732520B019 :100168006780000063060520130101F52324810A36 :10017800232E31092326110A2322910A2320210B39 :10018800232C4109232A51092328610923267109AF diff --git a/runtime/tests/simple/vx_simple.dump b/runtime/tests/simple/vx_simple.dump index 56562e87..3b107f4f 100644 --- a/runtime/tests/simple/vx_simple.dump +++ b/runtime/tests/simple/vx_simple.dump @@ -407,11 +407,11 @@ Disassembly of section .text: 80000574: 00008067 ret 80000578 : -80000578: c0002573 rdcycle a0 +80000578: b0002573 csrr a0,mcycle 8000057c: 00008067 ret 80000580 : -80000580: c0202573 rdinstret a0 +80000580: b0202573 csrr a0,minstret 80000584: 00008067 ret 80000588 : diff --git a/runtime/tests/simple/vx_simple.elf b/runtime/tests/simple/vx_simple.elf index 4a61e26e..58937e6b 100755 Binary files a/runtime/tests/simple/vx_simple.elf and b/runtime/tests/simple/vx_simple.elf differ diff --git a/runtime/tests/simple/vx_simple.hex b/runtime/tests/simple/vx_simple.hex index 524875ec..5a2d59d3 100644 --- a/runtime/tests/simple/vx_simple.hex +++ b/runtime/tests/simple/vx_simple.hex @@ -87,7 +87,7 @@ :100548007325000267800000732520026780000081 :100558007325400267800000732550026780000001 :1005680073256002678000007325700267800000B1 -:10057800732500C067800000732520C067800000D5 +:10057800732500B067800000732520B067800000F5 :1005880063060520130101F52324810A232E31096E :100598002326110A2322910A2320210B232C410907 :1005A800232A510923286109232671099309050083