Files
kernels/evaluation/perf_2021_03_07/4c/sgemm.result
2021-03-30 16:44:57 -04:00

147 lines
5.9 KiB
Plaintext

CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sgemm -n32
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
Create context
Create program from kernel source
Upload source buffers
Execute the kernel
Elapsed time: 3 ms
Download destination buffer
Verify result
PASSED!
PERF: core0: instrs=90890, cycles=51133, IPC=1.777521
PERF: core0: ibuffer stalls=10132
PERF: core0: scoreboard stalls=15251
PERF: core0: alu unit stalls=2423
PERF: core0: lsu unit stalls=3859
PERF: core0: csr unit stalls=0
PERF: core0: fpu unit stalls=0
PERF: core0: gpu unit stalls=0
PERF: core0: icache reads=23003
PERF: core0: icache read misses=73 (hit ratio=99%)
PERF: core0: icache pipeline stalls=7639
PERF: core0: icache reponse stalls=10132
PERF: core0: dcache reads=17502
PERF: core0: dcache writes=293
PERF: core0: dcache read misses=1041 (hit ratio=94%)
PERF: core0: dcache write misses=289 (hit ratio=1%)
PERF: core0: dcache bank stalls=8464 (utilization=67%)
PERF: core0: dcache mshr stalls=4228
PERF: core0: dcache pipeline stalls=9676
PERF: core0: dcache reponse stalls=76
PERF: core0: smem reads=2026
PERF: core0: smem writes=1599
PERF: core0: smem bank stalls=0 (utilization=100%)
PERF: core0: dram requests=479 (reads=186, writes=293)
PERF: core0: dram stalls=789 (utilization=37%)
PERF: core0: dram average latency=32 cycles
PERF: core1: instrs=90890, cycles=51143, IPC=1.777174
PERF: core1: ibuffer stalls=10158
PERF: core1: scoreboard stalls=15244
PERF: core1: alu unit stalls=2440
PERF: core1: lsu unit stalls=3894
PERF: core1: csr unit stalls=0
PERF: core1: fpu unit stalls=0
PERF: core1: gpu unit stalls=0
PERF: core1: icache reads=23003
PERF: core1: icache read misses=73 (hit ratio=99%)
PERF: core1: icache pipeline stalls=7685
PERF: core1: icache reponse stalls=10158
PERF: core1: dcache reads=17502
PERF: core1: dcache writes=293
PERF: core1: dcache read misses=1101 (hit ratio=93%)
PERF: core1: dcache write misses=289 (hit ratio=1%)
PERF: core1: dcache bank stalls=8464 (utilization=67%)
PERF: core1: dcache mshr stalls=4330
PERF: core1: dcache pipeline stalls=9347
PERF: core1: dcache reponse stalls=67
PERF: core1: smem reads=2026
PERF: core1: smem writes=1599
PERF: core1: smem bank stalls=0 (utilization=100%)
PERF: core1: dram requests=509 (reads=216, writes=293)
PERF: core1: dram stalls=715 (utilization=41%)
PERF: core1: dram average latency=32 cycles
PERF: core2: instrs=90890, cycles=51135, IPC=1.777452
PERF: core2: ibuffer stalls=10120
PERF: core2: scoreboard stalls=15237
PERF: core2: alu unit stalls=2406
PERF: core2: lsu unit stalls=3881
PERF: core2: csr unit stalls=0
PERF: core2: fpu unit stalls=0
PERF: core2: gpu unit stalls=0
PERF: core2: icache reads=23003
PERF: core2: icache read misses=73 (hit ratio=99%)
PERF: core2: icache pipeline stalls=7651
PERF: core2: icache reponse stalls=10120
PERF: core2: dcache reads=17502
PERF: core2: dcache writes=293
PERF: core2: dcache read misses=1040 (hit ratio=94%)
PERF: core2: dcache write misses=289 (hit ratio=1%)
PERF: core2: dcache bank stalls=8464 (utilization=67%)
PERF: core2: dcache mshr stalls=4234
PERF: core2: dcache pipeline stalls=9580
PERF: core2: dcache reponse stalls=75
PERF: core2: smem reads=2026
PERF: core2: smem writes=1599
PERF: core2: smem bank stalls=0 (utilization=100%)
PERF: core2: dram requests=478 (reads=185, writes=293)
PERF: core2: dram stalls=776 (utilization=38%)
PERF: core2: dram average latency=32 cycles
PERF: core3: instrs=90892, cycles=51134, IPC=1.777526
PERF: core3: ibuffer stalls=10116
PERF: core3: scoreboard stalls=15282
PERF: core3: alu unit stalls=2380
PERF: core3: lsu unit stalls=3862
PERF: core3: csr unit stalls=0
PERF: core3: fpu unit stalls=0
PERF: core3: gpu unit stalls=0
PERF: core3: icache reads=23005
PERF: core3: icache read misses=73 (hit ratio=99%)
PERF: core3: icache pipeline stalls=7688
PERF: core3: icache reponse stalls=10116
PERF: core3: dcache reads=17502
PERF: core3: dcache writes=293
PERF: core3: dcache read misses=1040 (hit ratio=94%)
PERF: core3: dcache write misses=289 (hit ratio=1%)
PERF: core3: dcache bank stalls=8464 (utilization=67%)
PERF: core3: dcache mshr stalls=4421
PERF: core3: dcache pipeline stalls=9647
PERF: core3: dcache reponse stalls=76
PERF: core3: smem reads=2026
PERF: core3: smem writes=1599
PERF: core3: smem bank stalls=0 (utilization=100%)
PERF: core3: dram requests=478 (reads=185, writes=293)
PERF: core3: dram stalls=684 (utilization=41%)
PERF: core3: dram average latency=32 cycles
PERF: instrs=363562, cycles=51143, IPC=7.108734
PERF: ibuffer stalls=40526
PERF: scoreboard stalls=61014
PERF: alu unit stalls=9649
PERF: lsu unit stalls=15496
PERF: csr unit stalls=0
PERF: fpu unit stalls=0
PERF: gpu unit stalls=0
PERF: icache reads=92014
PERF: icache read misses=292 (hit ratio=99%)
PERF: icache pipeline stalls=30663
PERF: icache reponse stalls=40526
PERF: dcache reads=70008
PERF: dcache writes=1172
PERF: dcache read misses=4222 (hit ratio=93%)
PERF: dcache write misses=1156 (hit ratio=1%)
PERF: dcache bank stalls=33856 (utilization=67%)
PERF: dcache mshr stalls=17213
PERF: dcache pipeline stalls=38250
PERF: dcache reponse stalls=294
PERF: smem reads=8104
PERF: smem writes=6396
PERF: smem bank stalls=0 (utilization=100%)
PERF: dram requests=1944 (reads=772, writes=1172)
PERF: dram stalls=2964 (utilization=39%)
PERF: dram average latency=32 cycles
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'