adding support for non-cacheable memory addressing
This commit is contained in:
@@ -26,6 +26,7 @@ make -s
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l3cache --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=demo --args="-n1"
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=io_addr --args="-n1"
|
||||
|
||||
# build flags
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
|
||||
|
||||
@@ -3,22 +3,26 @@ all:
|
||||
$(MAKE) -C demo
|
||||
$(MAKE) -C dogfood
|
||||
$(MAKE) -C stress
|
||||
$(MAKE) -C io_addr
|
||||
|
||||
run:
|
||||
$(MAKE) -C basic run-vlsim
|
||||
$(MAKE) -C demo run-vlsim
|
||||
$(MAKE) -C dogfood run-vlsim
|
||||
$(MAKE) -C stress run-vlsim
|
||||
$(MAKE) -C io_addr run-vlsim
|
||||
|
||||
clean:
|
||||
$(MAKE) -C basic clean
|
||||
$(MAKE) -C demo clean
|
||||
$(MAKE) -C dogfood clean
|
||||
$(MAKE) -C stress clean
|
||||
$(MAKE) -C io_addr clean
|
||||
|
||||
clean-all:
|
||||
$(MAKE) -C basic clean-all
|
||||
$(MAKE) -C demo clean-all
|
||||
$(MAKE) -C dogfood clean-all
|
||||
$(MAKE) -C stress clean-all
|
||||
$(MAKE) -C io_addr clean-all
|
||||
|
||||
|
||||
67
driver/tests/io_addr/Makefile
Normal file
67
driver/tests/io_addr/Makefile
Normal file
@@ -0,0 +1,67 @@
|
||||
RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain
|
||||
VORTEX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
OPTS ?= -n1
|
||||
|
||||
VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../../include -I$(VORTEX_RT_PATH)/../hw
|
||||
|
||||
PROJECT = io_addr
|
||||
|
||||
SRCS = main.cpp
|
||||
|
||||
all: $(PROJECT) kernel.bin kernel.dump
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CP) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(VX_SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@
|
||||
|
||||
run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-asesim: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-vlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-simx: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
clean:
|
||||
rm -rf $(PROJECT) *.o .depend
|
||||
|
||||
clean-all: clean
|
||||
rm -rf *.elf *.bin *.dump
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
12
driver/tests/io_addr/common.h
Normal file
12
driver/tests/io_addr/common.h
Normal file
@@ -0,0 +1,12 @@
|
||||
#ifndef _COMMON_H_
|
||||
#define _COMMON_H_
|
||||
|
||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||
|
||||
struct kernel_arg_t {
|
||||
uint32_t num_points;
|
||||
uint32_t src_ptr;
|
||||
uint32_t dst_ptr;
|
||||
};
|
||||
|
||||
#endif
|
||||
BIN
driver/tests/io_addr/kernel.bin
Executable file
BIN
driver/tests/io_addr/kernel.bin
Executable file
Binary file not shown.
19
driver/tests/io_addr/kernel.c
Normal file
19
driver/tests/io_addr/kernel.c
Normal file
@@ -0,0 +1,19 @@
|
||||
#include <stdint.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <vx_spawn.h>
|
||||
#include "common.h"
|
||||
|
||||
void kernel_body(int task_id, void* arg) {
|
||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||
uint32_t* src_ptr = (uint32_t*)_arg->src_ptr;
|
||||
uint32_t* dst_ptr = (uint32_t*)_arg->dst_ptr;
|
||||
|
||||
int32_t* addr_ptr = (int32_t*)(src_ptr[task_id]);
|
||||
|
||||
dst_ptr[task_id] = *addr_ptr;
|
||||
}
|
||||
|
||||
void main() {
|
||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||
vx_spawn_tasks(arg->num_points, kernel_body, arg);
|
||||
}
|
||||
547
driver/tests/io_addr/kernel.dump
Normal file
547
driver/tests/io_addr/kernel.dump
Normal file
@@ -0,0 +1,547 @@
|
||||
|
||||
kernel.elf: file format elf32-littleriscv
|
||||
|
||||
|
||||
Disassembly of section .init:
|
||||
|
||||
80000000 <_start>:
|
||||
80000000: 00000597 auipc a1,0x0
|
||||
80000004: 0ac58593 addi a1,a1,172 # 800000ac <vx_set_sp>
|
||||
80000008: fc102573 csrr a0,0xfc1
|
||||
8000000c: 00b5106b 0xb5106b
|
||||
80000010: 09c000ef jal ra,800000ac <vx_set_sp>
|
||||
80000014: 00100513 li a0,1
|
||||
80000018: 0005006b 0x5006b
|
||||
8000001c: 00002517 auipc a0,0x2
|
||||
80000020: ae050513 addi a0,a0,-1312 # 80001afc <g_wspawn_args>
|
||||
80000024: 00002617 auipc a2,0x2
|
||||
80000028: b5860613 addi a2,a2,-1192 # 80001b7c <__BSS_END__>
|
||||
8000002c: 40a60633 sub a2,a2,a0
|
||||
80000030: 00000593 li a1,0
|
||||
80000034: 3fc000ef jal ra,80000430 <memset>
|
||||
80000038: 00000517 auipc a0,0x0
|
||||
8000003c: 30050513 addi a0,a0,768 # 80000338 <__libc_fini_array>
|
||||
80000040: 2b0000ef jal ra,800002f0 <atexit>
|
||||
80000044: 350000ef jal ra,80000394 <__libc_init_array>
|
||||
80000048: 008000ef jal ra,80000050 <main>
|
||||
8000004c: 2b80006f j 80000304 <exit>
|
||||
|
||||
Disassembly of section .text:
|
||||
|
||||
80000050 <main>:
|
||||
80000050: 7ffff7b7 lui a5,0x7ffff
|
||||
80000054: 0007a503 lw a0,0(a5) # 7ffff000 <__stack_size+0x7fffec00>
|
||||
80000058: 800005b7 lui a1,0x80000
|
||||
8000005c: 7ffff637 lui a2,0x7ffff
|
||||
80000060: 08058593 addi a1,a1,128 # 80000080 <__stack_top+0x81000080>
|
||||
80000064: 1440006f j 800001a8 <vx_spawn_tasks>
|
||||
|
||||
80000068 <register_fini>:
|
||||
80000068: 00000793 li a5,0
|
||||
8000006c: 00078863 beqz a5,8000007c <register_fini+0x14>
|
||||
80000070: 80000537 lui a0,0x80000
|
||||
80000074: 33850513 addi a0,a0,824 # 80000338 <__stack_top+0x81000338>
|
||||
80000078: 2780006f j 800002f0 <atexit>
|
||||
8000007c: 00008067 ret
|
||||
|
||||
80000080 <kernel_body>:
|
||||
80000080: 0045a783 lw a5,4(a1)
|
||||
80000084: 00251513 slli a0,a0,0x2
|
||||
80000088: 00a787b3 add a5,a5,a0
|
||||
8000008c: 0007a703 lw a4,0(a5)
|
||||
80000090: 0085a783 lw a5,8(a1)
|
||||
80000094: 00072703 lw a4,0(a4)
|
||||
80000098: 00a78533 add a0,a5,a0
|
||||
8000009c: 00e52023 sw a4,0(a0)
|
||||
800000a0: 00008067 ret
|
||||
|
||||
800000a4 <_exit>:
|
||||
800000a4: 00000513 li a0,0
|
||||
800000a8: 0005006b 0x5006b
|
||||
|
||||
800000ac <vx_set_sp>:
|
||||
800000ac: fc002573 csrr a0,0xfc0
|
||||
800000b0: 0005006b 0x5006b
|
||||
800000b4: 00002197 auipc gp,0x2
|
||||
800000b8: e1c18193 addi gp,gp,-484 # 80001ed0 <__global_pointer>
|
||||
800000bc: 7f000117 auipc sp,0x7f000
|
||||
800000c0: f4410113 addi sp,sp,-188 # ff000000 <__stack_top>
|
||||
800000c4: 40000593 li a1,1024
|
||||
800000c8: cc102673 csrr a2,0xcc1
|
||||
800000cc: 02c585b3 mul a1,a1,a2
|
||||
800000d0: 40b10133 sub sp,sp,a1
|
||||
800000d4: cc3026f3 csrr a3,0xcc3
|
||||
800000d8: 00068663 beqz a3,800000e4 <RETURN>
|
||||
800000dc: 00000513 li a0,0
|
||||
800000e0: 0005006b 0x5006b
|
||||
|
||||
800000e4 <RETURN>:
|
||||
800000e4: 00008067 ret
|
||||
|
||||
800000e8 <spawn_tasks_callback>:
|
||||
800000e8: fe010113 addi sp,sp,-32
|
||||
800000ec: 00112e23 sw ra,28(sp)
|
||||
800000f0: 00812c23 sw s0,24(sp)
|
||||
800000f4: 00912a23 sw s1,20(sp)
|
||||
800000f8: 01212823 sw s2,16(sp)
|
||||
800000fc: 01312623 sw s3,12(sp)
|
||||
80000100: fc0027f3 csrr a5,0xfc0
|
||||
80000104: 0007806b 0x7806b
|
||||
80000108: cc5027f3 csrr a5,0xcc5
|
||||
8000010c: cc3029f3 csrr s3,0xcc3
|
||||
80000110: cc002773 csrr a4,0xcc0
|
||||
80000114: fc002673 csrr a2,0xfc0
|
||||
80000118: 00279693 slli a3,a5,0x2
|
||||
8000011c: 800027b7 lui a5,0x80002
|
||||
80000120: afc78793 addi a5,a5,-1284 # 80001afc <__stack_top+0x81001afc>
|
||||
80000124: 00d787b3 add a5,a5,a3
|
||||
80000128: 0007a483 lw s1,0(a5)
|
||||
8000012c: 0104a403 lw s0,16(s1)
|
||||
80000130: 00c4a683 lw a3,12(s1)
|
||||
80000134: 0089a933 slt s2,s3,s0
|
||||
80000138: 00040793 mv a5,s0
|
||||
8000013c: 00d90933 add s2,s2,a3
|
||||
80000140: 03368433 mul s0,a3,s3
|
||||
80000144: 00f9d463 bge s3,a5,8000014c <spawn_tasks_callback+0x64>
|
||||
80000148: 00098793 mv a5,s3
|
||||
8000014c: 00f40433 add s0,s0,a5
|
||||
80000150: 0084a683 lw a3,8(s1)
|
||||
80000154: 02c40433 mul s0,s0,a2
|
||||
80000158: 02e907b3 mul a5,s2,a4
|
||||
8000015c: 00d40433 add s0,s0,a3
|
||||
80000160: 00f40433 add s0,s0,a5
|
||||
80000164: 00890933 add s2,s2,s0
|
||||
80000168: 01245e63 bge s0,s2,80000184 <spawn_tasks_callback+0x9c>
|
||||
8000016c: 0004a783 lw a5,0(s1)
|
||||
80000170: 0044a583 lw a1,4(s1)
|
||||
80000174: 00040513 mv a0,s0
|
||||
80000178: 00140413 addi s0,s0,1
|
||||
8000017c: 000780e7 jalr a5
|
||||
80000180: fe8916e3 bne s2,s0,8000016c <spawn_tasks_callback+0x84>
|
||||
80000184: 0019b993 seqz s3,s3
|
||||
80000188: 0009806b 0x9806b
|
||||
8000018c: 01c12083 lw ra,28(sp)
|
||||
80000190: 01812403 lw s0,24(sp)
|
||||
80000194: 01412483 lw s1,20(sp)
|
||||
80000198: 01012903 lw s2,16(sp)
|
||||
8000019c: 00c12983 lw s3,12(sp)
|
||||
800001a0: 02010113 addi sp,sp,32
|
||||
800001a4: 00008067 ret
|
||||
|
||||
800001a8 <vx_spawn_tasks>:
|
||||
800001a8: fc010113 addi sp,sp,-64
|
||||
800001ac: 02112e23 sw ra,60(sp)
|
||||
800001b0: 02812c23 sw s0,56(sp)
|
||||
800001b4: 02912a23 sw s1,52(sp)
|
||||
800001b8: 03212823 sw s2,48(sp)
|
||||
800001bc: 03312623 sw s3,44(sp)
|
||||
800001c0: fc2026f3 csrr a3,0xfc2
|
||||
800001c4: fc102873 csrr a6,0xfc1
|
||||
800001c8: fc002473 csrr s0,0xfc0
|
||||
800001cc: cc5027f3 csrr a5,0xcc5
|
||||
800001d0: 01f00713 li a4,31
|
||||
800001d4: 0cf74463 blt a4,a5,8000029c <vx_spawn_tasks+0xf4>
|
||||
800001d8: 030408b3 mul a7,s0,a6
|
||||
800001dc: 00100713 li a4,1
|
||||
800001e0: 00a8d463 bge a7,a0,800001e8 <vx_spawn_tasks+0x40>
|
||||
800001e4: 03154733 div a4,a0,a7
|
||||
800001e8: 0ce6c863 blt a3,a4,800002b8 <vx_spawn_tasks+0x110>
|
||||
800001ec: 0ae7d863 bge a5,a4,8000029c <vx_spawn_tasks+0xf4>
|
||||
800001f0: fff68693 addi a3,a3,-1
|
||||
800001f4: 02e54333 div t1,a0,a4
|
||||
800001f8: 00030893 mv a7,t1
|
||||
800001fc: 00f69663 bne a3,a5,80000208 <vx_spawn_tasks+0x60>
|
||||
80000200: 02e56533 rem a0,a0,a4
|
||||
80000204: 006508b3 add a7,a0,t1
|
||||
80000208: 0288c4b3 div s1,a7,s0
|
||||
8000020c: 0288e933 rem s2,a7,s0
|
||||
80000210: 0b04ca63 blt s1,a6,800002c4 <vx_spawn_tasks+0x11c>
|
||||
80000214: 00100693 li a3,1
|
||||
80000218: 0304c733 div a4,s1,a6
|
||||
8000021c: 00070663 beqz a4,80000228 <vx_spawn_tasks+0x80>
|
||||
80000220: 00070693 mv a3,a4
|
||||
80000224: 0304e733 rem a4,s1,a6
|
||||
80000228: 800029b7 lui s3,0x80002
|
||||
8000022c: afc98993 addi s3,s3,-1284 # 80001afc <__stack_top+0x81001afc>
|
||||
80000230: 00e12e23 sw a4,28(sp)
|
||||
80000234: 00c10713 addi a4,sp,12
|
||||
80000238: 00b12623 sw a1,12(sp)
|
||||
8000023c: 00c12823 sw a2,16(sp)
|
||||
80000240: 00d12c23 sw a3,24(sp)
|
||||
80000244: 02f30333 mul t1,t1,a5
|
||||
80000248: 00279793 slli a5,a5,0x2
|
||||
8000024c: 00f987b3 add a5,s3,a5
|
||||
80000250: 00e7a023 sw a4,0(a5)
|
||||
80000254: 00612a23 sw t1,20(sp)
|
||||
80000258: 06904c63 bgtz s1,800002d0 <vx_spawn_tasks+0x128>
|
||||
8000025c: 04090063 beqz s2,8000029c <vx_spawn_tasks+0xf4>
|
||||
80000260: 02848433 mul s0,s1,s0
|
||||
80000264: 00812a23 sw s0,20(sp)
|
||||
80000268: 0009006b 0x9006b
|
||||
8000026c: cc5027f3 csrr a5,0xcc5
|
||||
80000270: cc202573 csrr a0,0xcc2
|
||||
80000274: 00279793 slli a5,a5,0x2
|
||||
80000278: 00f989b3 add s3,s3,a5
|
||||
8000027c: 0009a783 lw a5,0(s3)
|
||||
80000280: 0087a683 lw a3,8(a5)
|
||||
80000284: 0007a703 lw a4,0(a5)
|
||||
80000288: 0047a583 lw a1,4(a5)
|
||||
8000028c: 00d50533 add a0,a0,a3
|
||||
80000290: 000700e7 jalr a4
|
||||
80000294: 00100793 li a5,1
|
||||
80000298: 0007806b 0x7806b
|
||||
8000029c: 03c12083 lw ra,60(sp)
|
||||
800002a0: 03812403 lw s0,56(sp)
|
||||
800002a4: 03412483 lw s1,52(sp)
|
||||
800002a8: 03012903 lw s2,48(sp)
|
||||
800002ac: 02c12983 lw s3,44(sp)
|
||||
800002b0: 04010113 addi sp,sp,64
|
||||
800002b4: 00008067 ret
|
||||
800002b8: 00068713 mv a4,a3
|
||||
800002bc: f2e7cae3 blt a5,a4,800001f0 <vx_spawn_tasks+0x48>
|
||||
800002c0: fddff06f j 8000029c <vx_spawn_tasks+0xf4>
|
||||
800002c4: 00000713 li a4,0
|
||||
800002c8: 00100693 li a3,1
|
||||
800002cc: f5dff06f j 80000228 <vx_spawn_tasks+0x80>
|
||||
800002d0: 00048713 mv a4,s1
|
||||
800002d4: 00985463 bge a6,s1,800002dc <vx_spawn_tasks+0x134>
|
||||
800002d8: 00080713 mv a4,a6
|
||||
800002dc: 800007b7 lui a5,0x80000
|
||||
800002e0: 0e878793 addi a5,a5,232 # 800000e8 <__stack_top+0x810000e8>
|
||||
800002e4: 00f7106b 0xf7106b
|
||||
800002e8: e01ff0ef jal ra,800000e8 <spawn_tasks_callback>
|
||||
800002ec: f71ff06f j 8000025c <vx_spawn_tasks+0xb4>
|
||||
|
||||
800002f0 <atexit>:
|
||||
800002f0: 00050593 mv a1,a0
|
||||
800002f4: 00000693 li a3,0
|
||||
800002f8: 00000613 li a2,0
|
||||
800002fc: 00000513 li a0,0
|
||||
80000300: 20c0006f j 8000050c <__register_exitproc>
|
||||
|
||||
80000304 <exit>:
|
||||
80000304: ff010113 addi sp,sp,-16
|
||||
80000308: 00000593 li a1,0
|
||||
8000030c: 00812423 sw s0,8(sp)
|
||||
80000310: 00112623 sw ra,12(sp)
|
||||
80000314: 00050413 mv s0,a0
|
||||
80000318: 290000ef jal ra,800005a8 <__call_exitprocs>
|
||||
8000031c: 800027b7 lui a5,0x80002
|
||||
80000320: af87a503 lw a0,-1288(a5) # 80001af8 <__stack_top+0x81001af8>
|
||||
80000324: 03c52783 lw a5,60(a0)
|
||||
80000328: 00078463 beqz a5,80000330 <exit+0x2c>
|
||||
8000032c: 000780e7 jalr a5
|
||||
80000330: 00040513 mv a0,s0
|
||||
80000334: d71ff0ef jal ra,800000a4 <_exit>
|
||||
|
||||
80000338 <__libc_fini_array>:
|
||||
80000338: ff010113 addi sp,sp,-16
|
||||
8000033c: 00812423 sw s0,8(sp)
|
||||
80000340: 800017b7 lui a5,0x80001
|
||||
80000344: 80001437 lui s0,0x80001
|
||||
80000348: 6d040413 addi s0,s0,1744 # 800016d0 <__stack_top+0x810016d0>
|
||||
8000034c: 6d078793 addi a5,a5,1744 # 800016d0 <__stack_top+0x810016d0>
|
||||
80000350: 408787b3 sub a5,a5,s0
|
||||
80000354: 00912223 sw s1,4(sp)
|
||||
80000358: 00112623 sw ra,12(sp)
|
||||
8000035c: 4027d493 srai s1,a5,0x2
|
||||
80000360: 02048063 beqz s1,80000380 <__libc_fini_array+0x48>
|
||||
80000364: ffc78793 addi a5,a5,-4
|
||||
80000368: 00878433 add s0,a5,s0
|
||||
8000036c: 00042783 lw a5,0(s0)
|
||||
80000370: fff48493 addi s1,s1,-1
|
||||
80000374: ffc40413 addi s0,s0,-4
|
||||
80000378: 000780e7 jalr a5
|
||||
8000037c: fe0498e3 bnez s1,8000036c <__libc_fini_array+0x34>
|
||||
80000380: 00c12083 lw ra,12(sp)
|
||||
80000384: 00812403 lw s0,8(sp)
|
||||
80000388: 00412483 lw s1,4(sp)
|
||||
8000038c: 01010113 addi sp,sp,16
|
||||
80000390: 00008067 ret
|
||||
|
||||
80000394 <__libc_init_array>:
|
||||
80000394: ff010113 addi sp,sp,-16
|
||||
80000398: 00812423 sw s0,8(sp)
|
||||
8000039c: 01212023 sw s2,0(sp)
|
||||
800003a0: 80001437 lui s0,0x80001
|
||||
800003a4: 80001937 lui s2,0x80001
|
||||
800003a8: 6cc40793 addi a5,s0,1740 # 800016cc <__stack_top+0x810016cc>
|
||||
800003ac: 6cc90913 addi s2,s2,1740 # 800016cc <__stack_top+0x810016cc>
|
||||
800003b0: 40f90933 sub s2,s2,a5
|
||||
800003b4: 00112623 sw ra,12(sp)
|
||||
800003b8: 00912223 sw s1,4(sp)
|
||||
800003bc: 40295913 srai s2,s2,0x2
|
||||
800003c0: 02090063 beqz s2,800003e0 <__libc_init_array+0x4c>
|
||||
800003c4: 6cc40413 addi s0,s0,1740
|
||||
800003c8: 00000493 li s1,0
|
||||
800003cc: 00042783 lw a5,0(s0)
|
||||
800003d0: 00148493 addi s1,s1,1
|
||||
800003d4: 00440413 addi s0,s0,4
|
||||
800003d8: 000780e7 jalr a5
|
||||
800003dc: fe9918e3 bne s2,s1,800003cc <__libc_init_array+0x38>
|
||||
800003e0: 80001437 lui s0,0x80001
|
||||
800003e4: 80001937 lui s2,0x80001
|
||||
800003e8: 6cc40793 addi a5,s0,1740 # 800016cc <__stack_top+0x810016cc>
|
||||
800003ec: 6d090913 addi s2,s2,1744 # 800016d0 <__stack_top+0x810016d0>
|
||||
800003f0: 40f90933 sub s2,s2,a5
|
||||
800003f4: 40295913 srai s2,s2,0x2
|
||||
800003f8: 02090063 beqz s2,80000418 <__libc_init_array+0x84>
|
||||
800003fc: 6cc40413 addi s0,s0,1740
|
||||
80000400: 00000493 li s1,0
|
||||
80000404: 00042783 lw a5,0(s0)
|
||||
80000408: 00148493 addi s1,s1,1
|
||||
8000040c: 00440413 addi s0,s0,4
|
||||
80000410: 000780e7 jalr a5
|
||||
80000414: fe9918e3 bne s2,s1,80000404 <__libc_init_array+0x70>
|
||||
80000418: 00c12083 lw ra,12(sp)
|
||||
8000041c: 00812403 lw s0,8(sp)
|
||||
80000420: 00412483 lw s1,4(sp)
|
||||
80000424: 00012903 lw s2,0(sp)
|
||||
80000428: 01010113 addi sp,sp,16
|
||||
8000042c: 00008067 ret
|
||||
|
||||
80000430 <memset>:
|
||||
80000430: 00f00313 li t1,15
|
||||
80000434: 00050713 mv a4,a0
|
||||
80000438: 02c37e63 bgeu t1,a2,80000474 <memset+0x44>
|
||||
8000043c: 00f77793 andi a5,a4,15
|
||||
80000440: 0a079063 bnez a5,800004e0 <memset+0xb0>
|
||||
80000444: 08059263 bnez a1,800004c8 <memset+0x98>
|
||||
80000448: ff067693 andi a3,a2,-16
|
||||
8000044c: 00f67613 andi a2,a2,15
|
||||
80000450: 00e686b3 add a3,a3,a4
|
||||
80000454: 00b72023 sw a1,0(a4)
|
||||
80000458: 00b72223 sw a1,4(a4)
|
||||
8000045c: 00b72423 sw a1,8(a4)
|
||||
80000460: 00b72623 sw a1,12(a4)
|
||||
80000464: 01070713 addi a4,a4,16
|
||||
80000468: fed766e3 bltu a4,a3,80000454 <memset+0x24>
|
||||
8000046c: 00061463 bnez a2,80000474 <memset+0x44>
|
||||
80000470: 00008067 ret
|
||||
80000474: 40c306b3 sub a3,t1,a2
|
||||
80000478: 00269693 slli a3,a3,0x2
|
||||
8000047c: 00000297 auipc t0,0x0
|
||||
80000480: 005686b3 add a3,a3,t0
|
||||
80000484: 00c68067 jr 12(a3)
|
||||
80000488: 00b70723 sb a1,14(a4)
|
||||
8000048c: 00b706a3 sb a1,13(a4)
|
||||
80000490: 00b70623 sb a1,12(a4)
|
||||
80000494: 00b705a3 sb a1,11(a4)
|
||||
80000498: 00b70523 sb a1,10(a4)
|
||||
8000049c: 00b704a3 sb a1,9(a4)
|
||||
800004a0: 00b70423 sb a1,8(a4)
|
||||
800004a4: 00b703a3 sb a1,7(a4)
|
||||
800004a8: 00b70323 sb a1,6(a4)
|
||||
800004ac: 00b702a3 sb a1,5(a4)
|
||||
800004b0: 00b70223 sb a1,4(a4)
|
||||
800004b4: 00b701a3 sb a1,3(a4)
|
||||
800004b8: 00b70123 sb a1,2(a4)
|
||||
800004bc: 00b700a3 sb a1,1(a4)
|
||||
800004c0: 00b70023 sb a1,0(a4)
|
||||
800004c4: 00008067 ret
|
||||
800004c8: 0ff5f593 andi a1,a1,255
|
||||
800004cc: 00859693 slli a3,a1,0x8
|
||||
800004d0: 00d5e5b3 or a1,a1,a3
|
||||
800004d4: 01059693 slli a3,a1,0x10
|
||||
800004d8: 00d5e5b3 or a1,a1,a3
|
||||
800004dc: f6dff06f j 80000448 <memset+0x18>
|
||||
800004e0: 00279693 slli a3,a5,0x2
|
||||
800004e4: 00000297 auipc t0,0x0
|
||||
800004e8: 005686b3 add a3,a3,t0
|
||||
800004ec: 00008293 mv t0,ra
|
||||
800004f0: fa0680e7 jalr -96(a3)
|
||||
800004f4: 00028093 mv ra,t0
|
||||
800004f8: ff078793 addi a5,a5,-16
|
||||
800004fc: 40f70733 sub a4,a4,a5
|
||||
80000500: 00f60633 add a2,a2,a5
|
||||
80000504: f6c378e3 bgeu t1,a2,80000474 <memset+0x44>
|
||||
80000508: f3dff06f j 80000444 <memset+0x14>
|
||||
|
||||
8000050c <__register_exitproc>:
|
||||
8000050c: 800027b7 lui a5,0x80002
|
||||
80000510: af87a703 lw a4,-1288(a5) # 80001af8 <__stack_top+0x81001af8>
|
||||
80000514: 14872783 lw a5,328(a4)
|
||||
80000518: 04078c63 beqz a5,80000570 <__register_exitproc+0x64>
|
||||
8000051c: 0047a703 lw a4,4(a5)
|
||||
80000520: 01f00813 li a6,31
|
||||
80000524: 06e84e63 blt a6,a4,800005a0 <__register_exitproc+0x94>
|
||||
80000528: 00271813 slli a6,a4,0x2
|
||||
8000052c: 02050663 beqz a0,80000558 <__register_exitproc+0x4c>
|
||||
80000530: 01078333 add t1,a5,a6
|
||||
80000534: 08c32423 sw a2,136(t1)
|
||||
80000538: 1887a883 lw a7,392(a5)
|
||||
8000053c: 00100613 li a2,1
|
||||
80000540: 00e61633 sll a2,a2,a4
|
||||
80000544: 00c8e8b3 or a7,a7,a2
|
||||
80000548: 1917a423 sw a7,392(a5)
|
||||
8000054c: 10d32423 sw a3,264(t1)
|
||||
80000550: 00200693 li a3,2
|
||||
80000554: 02d50463 beq a0,a3,8000057c <__register_exitproc+0x70>
|
||||
80000558: 00170713 addi a4,a4,1
|
||||
8000055c: 00e7a223 sw a4,4(a5)
|
||||
80000560: 010787b3 add a5,a5,a6
|
||||
80000564: 00b7a423 sw a1,8(a5)
|
||||
80000568: 00000513 li a0,0
|
||||
8000056c: 00008067 ret
|
||||
80000570: 14c70793 addi a5,a4,332
|
||||
80000574: 14f72423 sw a5,328(a4)
|
||||
80000578: fa5ff06f j 8000051c <__register_exitproc+0x10>
|
||||
8000057c: 18c7a683 lw a3,396(a5)
|
||||
80000580: 00170713 addi a4,a4,1
|
||||
80000584: 00e7a223 sw a4,4(a5)
|
||||
80000588: 00c6e633 or a2,a3,a2
|
||||
8000058c: 18c7a623 sw a2,396(a5)
|
||||
80000590: 010787b3 add a5,a5,a6
|
||||
80000594: 00b7a423 sw a1,8(a5)
|
||||
80000598: 00000513 li a0,0
|
||||
8000059c: 00008067 ret
|
||||
800005a0: fff00513 li a0,-1
|
||||
800005a4: 00008067 ret
|
||||
|
||||
800005a8 <__call_exitprocs>:
|
||||
800005a8: fd010113 addi sp,sp,-48
|
||||
800005ac: 800027b7 lui a5,0x80002
|
||||
800005b0: 01412c23 sw s4,24(sp)
|
||||
800005b4: af87aa03 lw s4,-1288(a5) # 80001af8 <__stack_top+0x81001af8>
|
||||
800005b8: 03212023 sw s2,32(sp)
|
||||
800005bc: 02112623 sw ra,44(sp)
|
||||
800005c0: 148a2903 lw s2,328(s4)
|
||||
800005c4: 02812423 sw s0,40(sp)
|
||||
800005c8: 02912223 sw s1,36(sp)
|
||||
800005cc: 01312e23 sw s3,28(sp)
|
||||
800005d0: 01512a23 sw s5,20(sp)
|
||||
800005d4: 01612823 sw s6,16(sp)
|
||||
800005d8: 01712623 sw s7,12(sp)
|
||||
800005dc: 01812423 sw s8,8(sp)
|
||||
800005e0: 04090063 beqz s2,80000620 <__call_exitprocs+0x78>
|
||||
800005e4: 00050b13 mv s6,a0
|
||||
800005e8: 00058b93 mv s7,a1
|
||||
800005ec: 00100a93 li s5,1
|
||||
800005f0: fff00993 li s3,-1
|
||||
800005f4: 00492483 lw s1,4(s2)
|
||||
800005f8: fff48413 addi s0,s1,-1
|
||||
800005fc: 02044263 bltz s0,80000620 <__call_exitprocs+0x78>
|
||||
80000600: 00249493 slli s1,s1,0x2
|
||||
80000604: 009904b3 add s1,s2,s1
|
||||
80000608: 040b8463 beqz s7,80000650 <__call_exitprocs+0xa8>
|
||||
8000060c: 1044a783 lw a5,260(s1)
|
||||
80000610: 05778063 beq a5,s7,80000650 <__call_exitprocs+0xa8>
|
||||
80000614: fff40413 addi s0,s0,-1
|
||||
80000618: ffc48493 addi s1,s1,-4
|
||||
8000061c: ff3416e3 bne s0,s3,80000608 <__call_exitprocs+0x60>
|
||||
80000620: 02c12083 lw ra,44(sp)
|
||||
80000624: 02812403 lw s0,40(sp)
|
||||
80000628: 02412483 lw s1,36(sp)
|
||||
8000062c: 02012903 lw s2,32(sp)
|
||||
80000630: 01c12983 lw s3,28(sp)
|
||||
80000634: 01812a03 lw s4,24(sp)
|
||||
80000638: 01412a83 lw s5,20(sp)
|
||||
8000063c: 01012b03 lw s6,16(sp)
|
||||
80000640: 00c12b83 lw s7,12(sp)
|
||||
80000644: 00812c03 lw s8,8(sp)
|
||||
80000648: 03010113 addi sp,sp,48
|
||||
8000064c: 00008067 ret
|
||||
80000650: 00492783 lw a5,4(s2)
|
||||
80000654: 0044a683 lw a3,4(s1)
|
||||
80000658: fff78793 addi a5,a5,-1
|
||||
8000065c: 04878e63 beq a5,s0,800006b8 <__call_exitprocs+0x110>
|
||||
80000660: 0004a223 sw zero,4(s1)
|
||||
80000664: fa0688e3 beqz a3,80000614 <__call_exitprocs+0x6c>
|
||||
80000668: 18892783 lw a5,392(s2)
|
||||
8000066c: 008a9733 sll a4,s5,s0
|
||||
80000670: 00492c03 lw s8,4(s2)
|
||||
80000674: 00f777b3 and a5,a4,a5
|
||||
80000678: 02079263 bnez a5,8000069c <__call_exitprocs+0xf4>
|
||||
8000067c: 000680e7 jalr a3
|
||||
80000680: 00492703 lw a4,4(s2)
|
||||
80000684: 148a2783 lw a5,328(s4)
|
||||
80000688: 01871463 bne a4,s8,80000690 <__call_exitprocs+0xe8>
|
||||
8000068c: f92784e3 beq a5,s2,80000614 <__call_exitprocs+0x6c>
|
||||
80000690: f80788e3 beqz a5,80000620 <__call_exitprocs+0x78>
|
||||
80000694: 00078913 mv s2,a5
|
||||
80000698: f5dff06f j 800005f4 <__call_exitprocs+0x4c>
|
||||
8000069c: 18c92783 lw a5,396(s2)
|
||||
800006a0: 0844a583 lw a1,132(s1)
|
||||
800006a4: 00f77733 and a4,a4,a5
|
||||
800006a8: 00071c63 bnez a4,800006c0 <__call_exitprocs+0x118>
|
||||
800006ac: 000b0513 mv a0,s6
|
||||
800006b0: 000680e7 jalr a3
|
||||
800006b4: fcdff06f j 80000680 <__call_exitprocs+0xd8>
|
||||
800006b8: 00892223 sw s0,4(s2)
|
||||
800006bc: fa9ff06f j 80000664 <__call_exitprocs+0xbc>
|
||||
800006c0: 00058513 mv a0,a1
|
||||
800006c4: 000680e7 jalr a3
|
||||
800006c8: fb9ff06f j 80000680 <__call_exitprocs+0xd8>
|
||||
|
||||
Disassembly of section .init_array:
|
||||
|
||||
800016cc <__init_array_start>:
|
||||
800016cc: 0068 addi a0,sp,12
|
||||
800016ce: 8000 0x8000
|
||||
|
||||
Disassembly of section .data:
|
||||
|
||||
800016d0 <impure_data>:
|
||||
800016d0: 0000 unimp
|
||||
800016d2: 0000 unimp
|
||||
800016d4: 19bc addi a5,sp,248
|
||||
800016d6: 8000 0x8000
|
||||
800016d8: 1a24 addi s1,sp,312
|
||||
800016da: 8000 0x8000
|
||||
800016dc: 1a8c addi a1,sp,368
|
||||
800016de: 8000 0x8000
|
||||
...
|
||||
80001778: 0001 nop
|
||||
8000177a: 0000 unimp
|
||||
8000177c: 0000 unimp
|
||||
8000177e: 0000 unimp
|
||||
80001780: 330e fld ft6,224(sp)
|
||||
80001782: abcd j 80001d74 <__BSS_END__+0x1f8>
|
||||
80001784: 1234 addi a3,sp,296
|
||||
80001786: e66d bnez a2,80001870 <impure_data+0x1a0>
|
||||
80001788: deec sw a1,124(a3)
|
||||
8000178a: 0005 c.nop 1
|
||||
8000178c: 0000000b 0xb
|
||||
...
|
||||
|
||||
Disassembly of section .sdata:
|
||||
|
||||
80001af8 <_global_impure_ptr>:
|
||||
80001af8: 16d0 addi a2,sp,868
|
||||
80001afa: 8000 0x8000
|
||||
|
||||
Disassembly of section .bss:
|
||||
|
||||
80001afc <g_wspawn_args>:
|
||||
...
|
||||
|
||||
Disassembly of section .comment:
|
||||
|
||||
00000000 <.comment>:
|
||||
0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm
|
||||
4: 2820 fld fs0,80(s0)
|
||||
6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm
|
||||
a: 3120 fld fs0,96(a0)
|
||||
c: 2e30 fld fa2,88(a2)
|
||||
e: 2e32 fld ft8,264(sp)
|
||||
10: 0030 addi a2,sp,8
|
||||
|
||||
Disassembly of section .riscv.attributes:
|
||||
|
||||
00000000 <.riscv.attributes>:
|
||||
0: 2941 jal 490 <__stack_size+0x90>
|
||||
2: 0000 unimp
|
||||
4: 7200 flw fs0,32(a2)
|
||||
6: 7369 lui t1,0xffffa
|
||||
8: 01007663 bgeu zero,a6,14 <__stack_usage+0x14>
|
||||
c: 001f 0000 1004 0x10040000001f
|
||||
12: 7205 lui tp,0xfffe1
|
||||
14: 3376 fld ft6,376(sp)
|
||||
16: 6932 flw fs2,12(sp)
|
||||
18: 7032 flw ft0,44(sp)
|
||||
1a: 5f30 lw a2,120(a4)
|
||||
1c: 326d jal fffff9c6 <__stack_top+0xfff9c6>
|
||||
1e: 3070 fld fa2,224(s0)
|
||||
20: 665f 7032 0030 0x307032665f
|
||||
26: 0108 addi a0,sp,128
|
||||
28: 0b0a slli s6,s6,0x2
|
||||
BIN
driver/tests/io_addr/kernel.elf
Executable file
BIN
driver/tests/io_addr/kernel.elf
Executable file
Binary file not shown.
246
driver/tests/io_addr/main.cpp
Normal file
246
driver/tests/io_addr/main.cpp
Normal file
@@ -0,0 +1,246 @@
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vortex.h>
|
||||
#include <vector>
|
||||
#include <VX_config.h>
|
||||
#include "common.h"
|
||||
|
||||
#define NUM_ADDRS 16
|
||||
|
||||
#define RT_CHECK(_expr) \
|
||||
do { \
|
||||
int _ret = _expr; \
|
||||
if (0 == _ret) \
|
||||
break; \
|
||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (false)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const char* kernel_file = "kernel.bin";
|
||||
uint32_t count = 0;
|
||||
|
||||
size_t usr_test_mem;
|
||||
|
||||
std::vector<uint32_t> src_data;
|
||||
std::vector<int32_t> ref_data;
|
||||
|
||||
vx_device_h device = nullptr;
|
||||
vx_buffer_h staging_buf = nullptr;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Driver Test." << std::endl;
|
||||
std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:k:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
count = atoi(optarg);
|
||||
break;
|
||||
case 'k':
|
||||
kernel_file = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
if (staging_buf) {
|
||||
vx_buf_release(staging_buf);
|
||||
}
|
||||
if (device) {
|
||||
vx_dev_close(device);
|
||||
}
|
||||
}
|
||||
|
||||
void gen_input_data(uint32_t num_points) {
|
||||
src_data.resize(num_points);
|
||||
|
||||
uint32_t u = 0, k = 0;
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
if (0 ==(i % 4)) {
|
||||
k = (i + u) % NUM_ADDRS;
|
||||
++u;
|
||||
}
|
||||
uint32_t j = i % NUM_ADDRS;
|
||||
uint32_t v = ((j == k) ? usr_test_mem : IO_BASE_ADDR) + j * sizeof(uint32_t);
|
||||
src_data[i] = v;
|
||||
std::cout << std::dec << i << "," << k << ": value=0x" << std::hex << v << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void gen_ref_data(uint32_t num_points) {
|
||||
ref_data.resize(num_points);
|
||||
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
uint32_t j = i % NUM_ADDRS;
|
||||
ref_data[i] = j * j;
|
||||
}
|
||||
}
|
||||
|
||||
int run_test(const kernel_arg_t& kernel_arg,
|
||||
uint32_t buf_size,
|
||||
uint32_t num_points) {
|
||||
// start device
|
||||
std::cout << "start device" << std::endl;
|
||||
RT_CHECK(vx_start(device));
|
||||
|
||||
// wait for completion
|
||||
std::cout << "wait for completion" << std::endl;
|
||||
RT_CHECK(vx_ready_wait(device, -1));
|
||||
|
||||
// download destination buffer
|
||||
std::cout << "download destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0));
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
{
|
||||
int errors = 0;
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
int ref = ref_data.at(i);
|
||||
int cur = buf_ptr[i];
|
||||
if (cur != ref) {
|
||||
std::cout << "error at result #" << std::dec << i
|
||||
<< std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
std::cout << "Found " << std::dec << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t value;
|
||||
kernel_arg_t kernel_arg;
|
||||
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
if (count == 0) {
|
||||
count = 1;
|
||||
}
|
||||
|
||||
std::srand(50);
|
||||
|
||||
// open device connection
|
||||
std::cout << "open device connection" << std::endl;
|
||||
RT_CHECK(vx_dev_open(&device));
|
||||
|
||||
unsigned max_cores, max_warps, max_threads;
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps));
|
||||
RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads));
|
||||
|
||||
uint32_t num_tasks = max_cores * max_warps * max_threads;
|
||||
uint32_t num_points = count * num_tasks;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, NUM_ADDRS * sizeof(uint32_t), &usr_test_mem));
|
||||
|
||||
// generate input data
|
||||
gen_input_data(num_points);
|
||||
|
||||
// generate reference data
|
||||
gen_ref_data(num_points);
|
||||
|
||||
uint32_t src_buf_size = src_data.size() * sizeof(int32_t);
|
||||
uint32_t dst_buf_size = src_data.size() * sizeof(int32_t);
|
||||
|
||||
std::cout << "number of points: " << num_points << std::endl;
|
||||
std::cout << "buffer size: " << dst_buf_size << " bytes" << std::endl;
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
RT_CHECK(vx_upload_kernel_file(device, kernel_file));
|
||||
|
||||
// allocate device memory
|
||||
std::cout << "allocate device memory" << std::endl;
|
||||
|
||||
RT_CHECK(vx_alloc_dev_mem(device, src_buf_size, &value));
|
||||
kernel_arg.src_ptr = value;
|
||||
RT_CHECK(vx_alloc_dev_mem(device, dst_buf_size, &value));
|
||||
kernel_arg.dst_ptr = value;
|
||||
|
||||
kernel_arg.num_points = num_points;
|
||||
|
||||
std::cout << "dev_src=" << std::hex << kernel_arg.src_ptr << std::endl;
|
||||
std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl;
|
||||
|
||||
// allocate shared memory
|
||||
std::cout << "allocate shared memory" << std::endl;
|
||||
uint32_t staging_buf_size = std::max<uint32_t>(src_buf_size,
|
||||
std::max<uint32_t>(dst_buf_size,
|
||||
sizeof(kernel_arg_t)));
|
||||
RT_CHECK(vx_alloc_shared_mem(device, staging_buf_size, &staging_buf));
|
||||
|
||||
// upload kernel argument
|
||||
std::cout << "upload kernel argument" << std::endl;
|
||||
{
|
||||
auto buf_ptr = (int*)vx_host_ptr(staging_buf);
|
||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||
}
|
||||
|
||||
// upload test address data
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
|
||||
for (uint32_t i = 0; i < NUM_ADDRS; ++i) {
|
||||
buf_ptr[i] = i * i;
|
||||
}
|
||||
}
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, 0xFF000000, NUM_ADDRS * sizeof(uint32_t), 0));
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, usr_test_mem, NUM_ADDRS * sizeof(uint32_t), 0));
|
||||
|
||||
// upload source buffer
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = src_data.at(i);
|
||||
}
|
||||
}
|
||||
std::cout << "upload source buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src_ptr, src_buf_size, 0));
|
||||
|
||||
// clear destination buffer
|
||||
{
|
||||
auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
buf_ptr[i] = 0xdeadbeef;
|
||||
}
|
||||
}
|
||||
std::cout << "clear destination buffer" << std::endl;
|
||||
RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, dst_buf_size, 0));
|
||||
|
||||
// run tests
|
||||
std::cout << "run tests" << std::endl;
|
||||
RT_CHECK(run_test(kernel_arg, dst_buf_size, num_points));
|
||||
|
||||
// cleanup
|
||||
std::cout << "cleanup" << std::endl;
|
||||
cleanup();
|
||||
|
||||
std::cout << "PASSED!" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -41,6 +41,7 @@ module VX_cluster #(
|
||||
output wire busy,
|
||||
output wire ebreak
|
||||
);
|
||||
`STATIC_ASSERT((`L2_ENABLE == 0 || `NUM_CORES > 1), ("invalid parameter"))
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_valid;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_req_rw;
|
||||
@@ -166,7 +167,7 @@ module VX_cluster #(
|
||||
.CACHE_LINE_SIZE (`L2CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L2NUM_BANKS),
|
||||
.WORD_SIZE (`L2WORD_SIZE),
|
||||
.NUM_REQS (`NUM_CORES),
|
||||
.NUM_REQS (`L2NUM_REQS),
|
||||
.CREQ_SIZE (`L2CREQ_SIZE),
|
||||
.MSHR_SIZE (`L2MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L2MRSQ_SIZE),
|
||||
@@ -174,15 +175,14 @@ module VX_cluster #(
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`XMEM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.MEM_TAG_WIDTH (`L2MEM_TAG_WIDTH)
|
||||
.MEM_TAG_WIDTH (`L2MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) l2cache (
|
||||
`SCOPE_BIND_VX_cluster_l2cache
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.flush (1'b0),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_l2cache_if),
|
||||
`endif
|
||||
|
||||
@@ -45,20 +45,20 @@
|
||||
`define STARTUP_ADDR 32'h80000000
|
||||
`endif
|
||||
|
||||
`ifndef IO_BUS_BASE_ADDR
|
||||
`define IO_BUS_BASE_ADDR 32'hFF000000
|
||||
`ifndef IO_BASE_ADDR
|
||||
`define IO_BASE_ADDR 32'hFF000000
|
||||
`endif
|
||||
|
||||
`ifndef SHARED_MEM_BASE_ADDR
|
||||
`define SHARED_MEM_BASE_ADDR `IO_BUS_BASE_ADDR
|
||||
`ifndef IO_ADDR_SIZE
|
||||
`define IO_ADDR_SIZE (32'hFFFFFFFF - 32'hFF000000 + 1)
|
||||
`endif
|
||||
|
||||
`ifndef SHARED_MEM_BASE_ADDR_ALIGN
|
||||
`define SHARED_MEM_BASE_ADDR_ALIGN 64
|
||||
`ifndef IO_ADDR_COUT
|
||||
`define IO_ADDR_COUT 32'hFFFFFFFC
|
||||
`endif
|
||||
|
||||
`ifndef IO_BUS_ADDR_COUT
|
||||
`define IO_BUS_ADDR_COUT 32'hFFFFFFFC
|
||||
`ifndef SMEM_BASE_ADDR
|
||||
`define SMEM_BASE_ADDR `IO_BASE_ADDR
|
||||
`endif
|
||||
|
||||
`ifndef FRAME_BUFFER_BASE_ADDR
|
||||
|
||||
@@ -71,13 +71,13 @@ module VX_core #(
|
||||
//--
|
||||
|
||||
VX_dcache_core_req_if #(
|
||||
.NUM_REQS(`DNUM_REQUESTS),
|
||||
.NUM_REQS(`DNUM_REQS),
|
||||
.WORD_SIZE(`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
|
||||
) dcache_core_req_if();
|
||||
|
||||
VX_dcache_core_rsp_if #(
|
||||
.NUM_REQS(`DNUM_REQUESTS),
|
||||
.NUM_REQS(`DNUM_REQS),
|
||||
.WORD_SIZE(`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
|
||||
) dcache_core_rsp_if();
|
||||
|
||||
@@ -18,40 +18,36 @@ module VX_databus_arb (
|
||||
// output response
|
||||
VX_dcache_core_rsp_if core_rsp_if
|
||||
);
|
||||
localparam SMEM_ASHIFT = `CLOG2(`SHARED_MEM_BASE_ADDR_ALIGN);
|
||||
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
|
||||
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
|
||||
localparam REQ_DATAW = 1 + REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
|
||||
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
|
||||
localparam REQ_DATAW = 1 + REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
|
||||
//
|
||||
// handle requests
|
||||
//
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
wire cache_req_valid_out, cache_req_ready_out;
|
||||
wire is_smem_addr_in, is_smem_addr_out;
|
||||
|
||||
// select shared memory bus
|
||||
assign is_smem_addr_in = `SM_ENABLE
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_req_if.valid[i]),
|
||||
.data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||
.ready_in (core_req_if.ready[i]),
|
||||
.valid_out (cache_req_valid_out),
|
||||
.data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
|
||||
.ready_out (cache_req_ready_out)
|
||||
);
|
||||
|
||||
if (`SM_ENABLE) begin
|
||||
wire cache_req_valid_out;
|
||||
wire cache_req_ready_out;
|
||||
wire is_smem_addr_out;
|
||||
|
||||
wire is_smem_addr_in = core_req_if.tag[i][1];
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_req_if.valid[i]),
|
||||
.data_in ({is_smem_addr_in, core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||
.ready_in (core_req_if.ready[i]),
|
||||
.valid_out (cache_req_valid_out),
|
||||
.data_out ({is_smem_addr_out, cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
|
||||
.ready_out (cache_req_ready_out)
|
||||
);
|
||||
|
||||
assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out;
|
||||
assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out;
|
||||
assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i];
|
||||
@@ -61,10 +57,22 @@ module VX_databus_arb (
|
||||
assign smem_req_if.byteen[i] = cache_req_if.byteen[i];
|
||||
assign smem_req_if.data[i] = cache_req_if.data[i];
|
||||
assign smem_req_if.tag[i] = cache_req_if.tag[i];
|
||||
|
||||
end else begin
|
||||
`UNUSED_VAR (is_smem_addr_out)
|
||||
assign cache_req_if.valid[i] = cache_req_valid_out;
|
||||
assign cache_req_ready_out = cache_req_if.ready[i];
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_req_if.valid[i]),
|
||||
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||
.ready_in (core_req_if.ready[i]),
|
||||
.valid_out (cache_req_if.valid[i]),
|
||||
.data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
|
||||
.ready_out (cache_req_if.ready[i])
|
||||
);
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
@@ -90,7 +98,7 @@ module VX_databus_arb (
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (0)
|
||||
.BUFFERED (1)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -237,6 +237,9 @@
|
||||
`define DBG_CACHE_REQ_MDATAW 0
|
||||
`endif
|
||||
|
||||
// Shared memory and non-cacheable flags
|
||||
`define SM_NC_BITS 2
|
||||
|
||||
////////////////////////// Icache Configurable Knobs //////////////////////////
|
||||
|
||||
// Cache ID
|
||||
@@ -281,9 +284,10 @@
|
||||
`define DWORD_SIZE 4
|
||||
|
||||
// TAG sharing enable
|
||||
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
`define DCORE_TAG_ID_BITS (`LSUQ_ADDR_BITS + `SM_NC_BITS)
|
||||
|
||||
// Core request tag bits
|
||||
// Input request tag bits
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
|
||||
// Memory request data bits
|
||||
@@ -295,11 +299,13 @@
|
||||
// Memory byte enable bits
|
||||
`define DMEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
|
||||
|
||||
// Memory request tag bits
|
||||
`define DMEM_TAG_WIDTH `DMEM_ADDR_WIDTH
|
||||
// Input request size
|
||||
`define DNUM_REQS `NUM_THREADS
|
||||
|
||||
// Core request size
|
||||
`define DNUM_REQUESTS `NUM_THREADS
|
||||
// Memory request tag bits
|
||||
`define _DMEM_ADDR_RATIO_W $clog2(`DCACHE_LINE_SIZE / `DWORD_SIZE)
|
||||
`define _DNC_MEM_TAG_WIDTH ($clog2(`DNUM_REQS) + `_DMEM_ADDR_RATIO_W + `DCORE_TAG_WIDTH)
|
||||
`define DMEM_TAG_WIDTH `MAX((`DMEM_ADDR_WIDTH + `SM_NC_BITS), `_DNC_MEM_TAG_WIDTH)
|
||||
|
||||
////////////////////////// SM Configurable Knobs //////////////////////////////
|
||||
|
||||
@@ -312,11 +318,8 @@
|
||||
// bank address offset
|
||||
`define SBANK_ADDR_OFFSET `CLOG2(`STACK_SIZE / `SWORD_SIZE)
|
||||
|
||||
// Core request size
|
||||
`define SNUM_REQUESTS `NUM_THREADS
|
||||
|
||||
// Core request size
|
||||
`define SNUM_REQUESTS `NUM_THREADS
|
||||
// Input request size
|
||||
`define SNUM_REQS `NUM_THREADS
|
||||
|
||||
////////////////////////// L2cache Configurable Knobs /////////////////////////
|
||||
|
||||
@@ -324,12 +327,12 @@
|
||||
`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
|
||||
|
||||
// Block size in bytes
|
||||
`define L2CACHE_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`define L2CACHE_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
|
||||
// Word size in bytes
|
||||
`define L2WORD_SIZE `DCACHE_LINE_SIZE
|
||||
|
||||
// Core request tag bits
|
||||
// Input request tag bits
|
||||
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
|
||||
|
||||
// Memory request data bits
|
||||
@@ -341,8 +344,14 @@
|
||||
// Memory byte enable bits
|
||||
`define L2MEM_BYTEEN_WIDTH `L2CACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define L2NUM_REQS `NUM_CORES
|
||||
|
||||
// Memory request tag bits
|
||||
`define L2MEM_TAG_WIDTH (`L2_ENABLE ? `L2MEM_ADDR_WIDTH : (`XMEM_TAG_WIDTH+`CLOG2(`NUM_CORES)))
|
||||
`define _L2MEM_ADDR_RATIO_W $clog2(`L2CACHE_LINE_SIZE / `L2WORD_SIZE)
|
||||
`define _L2NC_MEM_TAG_WIDTH ($clog2(`L2NUM_REQS) + `_L2MEM_ADDR_RATIO_W + `XMEM_TAG_WIDTH)
|
||||
`define _L2MEM_TAG_WIDTH `MAX((`L2MEM_ADDR_WIDTH + `SM_NC_BITS), `_L2NC_MEM_TAG_WIDTH)
|
||||
`define L2MEM_TAG_WIDTH (`L2_ENABLE ? `_L2MEM_TAG_WIDTH : (`XMEM_TAG_WIDTH + `CLOG2(`L2NUM_REQS)))
|
||||
|
||||
////////////////////////// L3cache Configurable Knobs /////////////////////////
|
||||
|
||||
@@ -350,12 +359,12 @@
|
||||
`define L3CACHE_ID 0
|
||||
|
||||
// Block size in bytes
|
||||
`define L3CACHE_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
`define L3CACHE_LINE_SIZE `MEM_BLOCK_SIZE
|
||||
|
||||
// Word size in bytes
|
||||
`define L3WORD_SIZE `L2CACHE_LINE_SIZE
|
||||
|
||||
// Core request tag bits
|
||||
// Input request tag bits
|
||||
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
|
||||
|
||||
// Memory request data bits
|
||||
@@ -367,21 +376,28 @@
|
||||
// Memory byte enable bits
|
||||
`define L3MEM_BYTEEN_WIDTH `L3CACHE_LINE_SIZE
|
||||
|
||||
// Input request size
|
||||
`define L3NUM_REQS `NUM_CLUSTERS
|
||||
|
||||
// Memory request tag bits
|
||||
`define L3MEM_TAG_WIDTH (`L3_ENABLE ? `L3MEM_ADDR_WIDTH : (`L2MEM_TAG_WIDTH+`CLOG2(`NUM_CLUSTERS)))
|
||||
`define _L3MEM_ADDR_RATIO_W $clog2(`L3CACHE_LINE_SIZE / `L3WORD_SIZE)
|
||||
`define _L3NC_MEM_TAG_WIDTH ($clog2(`L3NUM_REQS) + `_L3MEM_ADDR_RATIO_W + `L2MEM_TAG_WIDTH)
|
||||
`define _L3MEM_TAG_WIDTH `MAX((`L3MEM_ADDR_WIDTH + `SM_NC_BITS), `_L3NC_MEM_TAG_WIDTH)
|
||||
`define L3MEM_TAG_WIDTH (`L3_ENABLE ? `_L3MEM_TAG_WIDTH : (`L2MEM_TAG_WIDTH + `CLOG2(`L3NUM_REQS)))
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3MEM_BYTEEN_WIDTH
|
||||
`define VX_MEM_ADDR_WIDTH `L3MEM_ADDR_WIDTH
|
||||
`define VX_MEM_LINE_WIDTH `L3MEM_LINE_WIDTH
|
||||
`define VX_MEM_TAG_WIDTH `L3MEM_TAG_WIDTH
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3MEM_BYTEEN_WIDTH
|
||||
`define VX_MEM_ADDR_WIDTH `L3MEM_ADDR_WIDTH
|
||||
`define VX_MEM_LINE_WIDTH `L3MEM_LINE_WIDTH
|
||||
`define VX_MEM_TAG_WIDTH `L3MEM_TAG_WIDTH
|
||||
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
|
||||
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
|
||||
`define XMEM_TAG_WIDTH (`DMEM_TAG_WIDTH+`CLOG2(2))
|
||||
// Merged D-cache/I-cache memory tag
|
||||
`define XMEM_TAG_WIDTH (`DMEM_TAG_WIDTH + `CLOG2(2))
|
||||
|
||||
`include "VX_types.vh"
|
||||
|
||||
|
||||
@@ -19,8 +19,17 @@ module VX_lsu_unit #(
|
||||
VX_commit_if ld_commit_if,
|
||||
VX_commit_if st_commit_if
|
||||
);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = 32 - MEM_ASHIFT;
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
|
||||
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
|
||||
|
||||
localparam ADDR_TYPEW = 1 + `SM_ENABLE;
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
|
||||
`STATIC_ASSERT(0 == (`SMEM_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
|
||||
`STATIC_ASSERT(`SMEM_SIZE == `MEM_BLOCK_SIZE * (`SMEM_SIZE / `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
|
||||
wire req_valid;
|
||||
wire [`NUM_THREADS-1:0] req_tmask;
|
||||
@@ -33,29 +42,53 @@ module VX_lsu_unit #(
|
||||
wire [31:0] req_pc;
|
||||
wire req_is_dup;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] full_address;
|
||||
wire [`NUM_THREADS-1:0][ADDR_TYPEW-1:0] lsu_addr_type, req_addr_type;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] full_addr;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign full_address[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
|
||||
assign full_addr[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
|
||||
end
|
||||
|
||||
wire [`NUM_THREADS-1:0][REQ_ADDRW-1:0] word_addr;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign word_addr[i] = full_addr[i][REQ_ASHIFT +: REQ_ADDRW];
|
||||
end
|
||||
|
||||
wire [`NUM_THREADS-1:0] addr_matches;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign addr_matches[i] = (full_address[0][31:2] == full_address[i][31:2]) || ~lsu_req_if.tmask[i];
|
||||
assign addr_matches[i] = (word_addr[0] == word_addr[i]) || ~lsu_req_if.tmask[i];
|
||||
end
|
||||
wire is_dup_load = lsu_req_if.wb && lsu_req_if.tmask[0] && (& addr_matches);
|
||||
|
||||
wire [`NUM_THREADS-1:0] is_addr_sm, is_addr_nc;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
// is shared memory address
|
||||
assign is_addr_sm[i] = (word_addr[i][(MEM_ASHIFT-REQ_ASHIFT) +: MEM_ADDRW] >= MEM_ADDRW'((`SMEM_BASE_ADDR - `SMEM_SIZE) >> MEM_ASHIFT))
|
||||
& (word_addr[i][(MEM_ASHIFT-REQ_ASHIFT) +: MEM_ADDRW] < MEM_ADDRW'(`SMEM_BASE_ADDR >> MEM_ASHIFT));
|
||||
|
||||
// is non-cacheable address
|
||||
assign is_addr_nc[i] = (word_addr[i][(MEM_ASHIFT-REQ_ASHIFT) +: MEM_ADDRW] >= MEM_ADDRW'(`IO_BASE_ADDR >> MEM_ASHIFT));
|
||||
|
||||
if (`SM_ENABLE) begin
|
||||
assign lsu_addr_type[i] = {is_addr_sm[i], is_addr_nc[i]};
|
||||
end else begin
|
||||
assign lsu_addr_type[i] = {1'b0, is_addr_nc[i]};
|
||||
end
|
||||
end
|
||||
|
||||
wire ready_in;
|
||||
wire stall_in = ~ready_in && req_valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + `LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * ADDR_TYPEW) + `LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.RESETW (1)
|
||||
) req_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_in),
|
||||
.data_in ({lsu_req_if.valid, is_dup_load, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_address, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}),
|
||||
.data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_type, req_rd, req_wb, req_data})
|
||||
.data_in ({lsu_req_if.valid, is_dup_load, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}),
|
||||
.data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data})
|
||||
);
|
||||
|
||||
// Can accept new request?
|
||||
@@ -77,10 +110,10 @@ module VX_lsu_unit #(
|
||||
reg [`NUM_THREADS-1:0] req_sent_mask;
|
||||
wire req_ready_all;
|
||||
|
||||
wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr;
|
||||
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
|
||||
wire mbuf_full;
|
||||
|
||||
wire [`NUM_THREADS-1:0][1:0] req_offset, rsp_offset;
|
||||
wire [`NUM_THREADS-1:0][REQ_ASHIFT-1:0] req_offset, rsp_offset;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign req_offset[i] = req_addr[i][1:0];
|
||||
end
|
||||
@@ -95,10 +128,10 @@ module VX_lsu_unit #(
|
||||
|
||||
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
|
||||
|
||||
assign mbuf_raddr = dcache_rsp_if.tag[`DCORE_TAG_ID_BITS-1:0];
|
||||
assign mbuf_raddr = dcache_rsp_if.tag[ADDR_TYPEW +: `LSUQ_ADDR_BITS];
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * 2) + 1),
|
||||
.DATAW (`NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1),
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
) req_metadata (
|
||||
.clk (clk),
|
||||
@@ -132,8 +165,8 @@ module VX_lsu_unit #(
|
||||
wire is_req_start = (0 == req_sent_mask);
|
||||
|
||||
// need to hold the acquired tag index until the full request is submitted
|
||||
reg [`DCORE_TAG_ID_BITS-1:0] req_tag_hold;
|
||||
wire [`DCORE_TAG_ID_BITS-1:0] req_tag = is_req_start ? mbuf_waddr : req_tag_hold;
|
||||
reg [`LSUQ_ADDR_BITS-1:0] req_tag_hold;
|
||||
wire [`LSUQ_ADDR_BITS-1:0] req_tag = is_req_start ? mbuf_waddr : req_tag_hold;
|
||||
always @(posedge clk) begin
|
||||
if (mbuf_push) begin
|
||||
req_tag_hold <= mbuf_waddr;
|
||||
@@ -193,11 +226,13 @@ module VX_lsu_unit #(
|
||||
assign dcache_req_if.byteen = mem_req_byteen;
|
||||
assign dcache_req_if.data = mem_req_data;
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
assign dcache_req_if.tag = {`NUM_THREADS{req_pc, req_wid, req_tag}};
|
||||
`else
|
||||
assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
|
||||
`endif
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
assign dcache_req_if.tag[i] = {req_pc, req_wid, req_tag, req_addr_type[i]};
|
||||
`else
|
||||
assign dcache_req_if.tag[i] = {req_tag, req_addr_type[i]};
|
||||
`endif
|
||||
end
|
||||
|
||||
assign ready_in = req_dep_ready && req_ready_all;
|
||||
|
||||
@@ -293,18 +328,22 @@ module VX_lsu_unit #(
|
||||
if (dcache_req_if.rw[0]) begin
|
||||
$write("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
|
||||
`PRINT_ARRAY1D(req_addr, `NUM_THREADS);
|
||||
$write(", tag=%0h, byteen=%0h, data=", dcache_req_if.tag[0], dcache_req_if.byteen);
|
||||
$write(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
|
||||
`PRINT_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
$write(", data=");
|
||||
`PRINT_ARRAY1D(dcache_req_if.data, `NUM_THREADS);
|
||||
$write("\n");
|
||||
end else begin
|
||||
$write("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
|
||||
`PRINT_ARRAY1D(req_addr, `NUM_THREADS);
|
||||
$write(", tag=%0h, byteen=%0h, rd=%0d, is_dup=%b\n", dcache_req_if.tag[0], dcache_req_if.byteen, req_rd, req_is_dup);
|
||||
$write(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
|
||||
`PRINT_ARRAY1D(req_addr_type, `NUM_THREADS);
|
||||
$write(", rd=%0d, is_dup=%b\n", req_rd, req_is_dup);
|
||||
end
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
$write("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=",
|
||||
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd);
|
||||
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, mbuf_raddr, rsp_rd);
|
||||
`PRINT_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS);
|
||||
$write(", is_dup=%b\n", rsp_is_dup);
|
||||
end
|
||||
|
||||
@@ -41,25 +41,25 @@ module VX_mem_unit # (
|
||||
) dcache_mem_rsp_if(), icache_mem_rsp_if();
|
||||
|
||||
VX_dcache_core_req_if #(
|
||||
.NUM_REQS (`DNUM_REQUESTS),
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
) dcache_req_if();
|
||||
|
||||
VX_dcache_core_rsp_if #(
|
||||
.NUM_REQS (`DNUM_REQUESTS),
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
) dcache_rsp_if();
|
||||
|
||||
VX_dcache_core_req_if #(
|
||||
.NUM_REQS (`DNUM_REQUESTS),
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
) smem_req_if();
|
||||
|
||||
VX_dcache_core_rsp_if #(
|
||||
.NUM_REQS (`DNUM_REQUESTS),
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
) smem_rsp_if();
|
||||
@@ -108,8 +108,6 @@ module VX_mem_unit # (
|
||||
.clk (clk),
|
||||
.reset (icache_reset),
|
||||
|
||||
.flush (1'b0),
|
||||
|
||||
// Core request
|
||||
.core_req_valid (icache_core_req_if.valid),
|
||||
.core_req_rw (1'b0),
|
||||
@@ -152,7 +150,7 @@ module VX_mem_unit # (
|
||||
.NUM_BANKS (`DNUM_BANKS),
|
||||
.NUM_PORTS (`DNUM_PORTS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.NUM_REQS (`DNUM_REQUESTS),
|
||||
.NUM_REQS (`DNUM_REQS),
|
||||
.CREQ_SIZE (`DCREQ_SIZE),
|
||||
.MSHR_SIZE (`DMSHR_SIZE),
|
||||
.MRSQ_SIZE (`DMRSQ_SIZE),
|
||||
@@ -160,15 +158,14 @@ module VX_mem_unit # (
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
.MEM_TAG_WIDTH (`DMEM_TAG_WIDTH)
|
||||
.MEM_TAG_WIDTH (`DMEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) dcache (
|
||||
`SCOPE_BIND_VX_mem_unit_dcache
|
||||
|
||||
.clk (clk),
|
||||
.reset (dcache_reset),
|
||||
|
||||
.flush (1'b0),
|
||||
|
||||
// Core req
|
||||
.core_req_valid (dcache_req_if.valid),
|
||||
.core_req_rw (dcache_req_if.rw),
|
||||
@@ -219,7 +216,7 @@ module VX_mem_unit # (
|
||||
.CACHE_SIZE (`SMEM_SIZE),
|
||||
.NUM_BANKS (`SNUM_BANKS),
|
||||
.WORD_SIZE (`SWORD_SIZE),
|
||||
.NUM_REQS (`SNUM_REQUESTS),
|
||||
.NUM_REQS (`SNUM_REQS),
|
||||
.CREQ_SIZE (`SCREQ_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
|
||||
|
||||
@@ -39,6 +39,7 @@ module Vortex (
|
||||
output wire busy,
|
||||
output wire ebreak
|
||||
);
|
||||
`STATIC_ASSERT((`L3_ENABLE == 0 || `NUM_CLUSTERS > 1), ("invalid parameter"))
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_valid;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_rw;
|
||||
@@ -168,7 +169,7 @@ module Vortex (
|
||||
.CACHE_LINE_SIZE (`L3CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (`L3NUM_BANKS),
|
||||
.WORD_SIZE (`L3WORD_SIZE),
|
||||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.NUM_REQS (`L3NUM_REQS),
|
||||
.CREQ_SIZE (`L3CREQ_SIZE),
|
||||
.MSHR_SIZE (`L3MSHR_SIZE),
|
||||
.MRSQ_SIZE (`L3MRSQ_SIZE),
|
||||
@@ -176,15 +177,14 @@ module Vortex (
|
||||
.WRITE_ENABLE (1),
|
||||
.CORE_TAG_WIDTH (`L2MEM_TAG_WIDTH),
|
||||
.CORE_TAG_ID_BITS (0),
|
||||
.MEM_TAG_WIDTH (`L3MEM_TAG_WIDTH)
|
||||
.MEM_TAG_WIDTH (`L3MEM_TAG_WIDTH),
|
||||
.NC_ENABLE (1)
|
||||
) l3cache (
|
||||
`SCOPE_BIND_Vortex_l3cache
|
||||
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.flush (1'b0),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_cache_if (perf_l3cache_if),
|
||||
`endif
|
||||
@@ -267,7 +267,6 @@ module Vortex (
|
||||
end
|
||||
|
||||
`SCOPE_ASSIGN (reset, reset);
|
||||
|
||||
`SCOPE_ASSIGN (mem_req_fire, mem_req_valid && mem_req_ready);
|
||||
`SCOPE_ASSIGN (mem_req_addr, `TO_FULL_ADDR(mem_req_addr));
|
||||
`SCOPE_ASSIGN (mem_req_rw, mem_req_rw);
|
||||
|
||||
322
hw/rtl/cache/VX_cache.v
vendored
322
hw/rtl/cache/VX_cache.v
vendored
@@ -36,30 +36,38 @@ module VX_cache #(
|
||||
parameter CORE_TAG_ID_BITS = CORE_TAG_WIDTH,
|
||||
|
||||
// Memory request tag size
|
||||
parameter MEM_TAG_WIDTH = (32 - $clog2(CACHE_LINE_SIZE)),
|
||||
parameter MEM_TAG_WIDTH = (32 - $clog2(CACHE_LINE_SIZE)),
|
||||
|
||||
// bank offset from beginning of index range
|
||||
parameter BANK_ADDR_OFFSET = 0
|
||||
parameter BANK_ADDR_OFFSET = 0,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0
|
||||
) (
|
||||
`SCOPE_IO_VX_cache
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_cache_if,
|
||||
`endif
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Core request
|
||||
input wire [NUM_REQS-1:0] core_req_valid,
|
||||
input wire [NUM_REQS-1:0] core_req_rw,
|
||||
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire [NUM_REQS-1:0] core_req_ready,
|
||||
input wire [NUM_REQS-1:0] core_req_valid,
|
||||
input wire [NUM_REQS-1:0] core_req_rw,
|
||||
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire [NUM_REQS-1:0] core_req_ready,
|
||||
|
||||
// Core response
|
||||
output wire [NUM_REQS-1:0] core_rsp_valid,
|
||||
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [NUM_REQS-1:0] core_rsp_valid,
|
||||
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
||||
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
||||
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready,
|
||||
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready,
|
||||
|
||||
// Memory request
|
||||
output wire mem_req_valid,
|
||||
@@ -74,19 +82,207 @@ module VX_cache #(
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// PERF
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_cache_if,
|
||||
`endif
|
||||
|
||||
// device flush
|
||||
input wire flush
|
||||
output wire mem_rsp_ready
|
||||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_pipe_stall_per_bank;
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Core request
|
||||
wire [NUM_REQS-1:0] core_req_valid_out;
|
||||
wire [NUM_REQS-1:0] core_req_rw_out;
|
||||
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr_out;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_out;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data_out;
|
||||
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_out;
|
||||
wire [NUM_REQS-1:0] core_req_ready_out;
|
||||
|
||||
// Core response
|
||||
wire [NUM_REQS-1:0] core_rsp_valid_in;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_in;
|
||||
wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready_in;
|
||||
|
||||
// Memory request
|
||||
wire mem_req_valid_in;
|
||||
wire mem_req_rw_in;
|
||||
wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_in;
|
||||
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_in;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_in;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_in;
|
||||
wire mem_req_ready_in;
|
||||
|
||||
// Memory response
|
||||
wire mem_rsp_valid_out;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_out;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_out;
|
||||
wire mem_rsp_ready_out;
|
||||
|
||||
if (NC_ENABLE) begin
|
||||
VX_nc_bypass #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_RSP_TAGS (`CORE_REQ_TAG_COUNT),
|
||||
.NC_TAG_BIT (0),
|
||||
|
||||
.CORE_ADDR_WIDTH(`WORD_ADDR_WIDTH),
|
||||
.CORE_DATA_SIZE (WORD_SIZE),
|
||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
||||
.MEM_DATA_SIZE (CACHE_LINE_SIZE),
|
||||
.MEM_TAG_WIDTH (MEM_TAG_WIDTH)
|
||||
) nc_bypass (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core request in
|
||||
.core_req_valid_in (core_req_valid),
|
||||
.core_req_rw_in (core_req_rw),
|
||||
.core_req_byteen_in (core_req_byteen),
|
||||
.core_req_addr_in (core_req_addr),
|
||||
.core_req_data_in (core_req_data),
|
||||
.core_req_tag_in (core_req_tag),
|
||||
.core_req_ready_in (core_req_ready),
|
||||
|
||||
// Core request out
|
||||
.core_req_valid_out (core_req_valid_out),
|
||||
.core_req_rw_out (core_req_rw_out),
|
||||
.core_req_byteen_out(core_req_byteen_out),
|
||||
.core_req_addr_out (core_req_addr_out),
|
||||
.core_req_data_out (core_req_data_out),
|
||||
.core_req_tag_out (core_req_tag_out),
|
||||
.core_req_ready_out (core_req_ready_out),
|
||||
|
||||
// Core response in
|
||||
.core_rsp_valid_in (core_rsp_valid_in),
|
||||
.core_rsp_data_in (core_rsp_data_in),
|
||||
.core_rsp_tag_in (core_rsp_tag_in),
|
||||
.core_rsp_ready_in (core_rsp_ready_in),
|
||||
|
||||
// Core response out
|
||||
.core_rsp_valid_out (core_rsp_valid),
|
||||
.core_rsp_data_out (core_rsp_data),
|
||||
.core_rsp_tag_out (core_rsp_tag),
|
||||
.core_rsp_ready_out (core_rsp_ready),
|
||||
|
||||
// Memory request in
|
||||
.mem_req_valid_in (mem_req_valid_in),
|
||||
.mem_req_rw_in (mem_req_rw_in),
|
||||
.mem_req_byteen_in (mem_req_byteen_in),
|
||||
.mem_req_addr_in (mem_req_addr_in),
|
||||
.mem_req_data_in (mem_req_data_in),
|
||||
.mem_req_tag_in (mem_req_tag_in),
|
||||
.mem_req_ready_in (mem_req_ready_in),
|
||||
|
||||
// Memory request out
|
||||
.mem_req_valid_out (mem_req_valid),
|
||||
.mem_req_rw_out (mem_req_rw),
|
||||
.mem_req_byteen_out (mem_req_byteen),
|
||||
.mem_req_addr_out (mem_req_addr),
|
||||
.mem_req_data_out (mem_req_data),
|
||||
.mem_req_tag_out (mem_req_tag),
|
||||
.mem_req_ready_out (mem_req_ready),
|
||||
|
||||
// Memory response in
|
||||
.mem_rsp_valid_in (mem_rsp_valid),
|
||||
.mem_rsp_data_in (mem_rsp_data),
|
||||
.mem_rsp_tag_in (mem_rsp_tag),
|
||||
.mem_rsp_ready_in (mem_rsp_ready),
|
||||
|
||||
// Memory response out
|
||||
.mem_rsp_valid_out (mem_rsp_valid_out),
|
||||
.mem_rsp_data_out (mem_rsp_data_out),
|
||||
.mem_rsp_tag_out (mem_rsp_tag_out),
|
||||
.mem_rsp_ready_out (mem_rsp_ready_out)
|
||||
);
|
||||
end else begin
|
||||
assign core_req_valid_out = core_req_valid;
|
||||
assign core_req_rw_out = core_req_rw;
|
||||
assign core_req_addr_out = core_req_addr;
|
||||
assign core_req_byteen_out = core_req_byteen;
|
||||
assign core_req_data_out = core_req_data;
|
||||
assign core_req_tag_out = core_req_tag;
|
||||
assign core_req_ready = core_req_ready_out;
|
||||
|
||||
assign core_rsp_valid = core_rsp_valid_in;
|
||||
assign core_rsp_data = core_rsp_data_in;
|
||||
assign core_rsp_tag = core_rsp_tag_in;
|
||||
assign core_rsp_ready_in = core_rsp_ready;
|
||||
|
||||
assign mem_req_valid = mem_req_valid_in;
|
||||
assign mem_req_rw = mem_req_rw_in;
|
||||
assign mem_req_addr = mem_req_addr_in;
|
||||
assign mem_req_byteen = mem_req_byteen_in;
|
||||
assign mem_req_data = mem_req_data_in;
|
||||
assign mem_req_tag = mem_req_tag_in;
|
||||
assign mem_req_ready_in = mem_req_ready;
|
||||
|
||||
assign mem_rsp_valid_out = mem_rsp_valid;
|
||||
assign mem_rsp_data_out = mem_rsp_data;
|
||||
assign mem_rsp_tag_out = mem_rsp_tag;
|
||||
assign mem_rsp_ready = mem_rsp_ready_out;
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual;
|
||||
wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_out_a, mem_rsp_tag_qual;
|
||||
|
||||
wire mrsq_full, mrsq_empty;
|
||||
wire mrsq_push, mrsq_pop;
|
||||
|
||||
assign mrsq_push = mem_rsp_valid_out && mem_rsp_ready_out;
|
||||
assign mem_rsp_ready_out = !mrsq_full;
|
||||
|
||||
// trim out shared memory and non-cacheable flags
|
||||
assign mem_rsp_tag_out_a = mem_rsp_tag_out[2 +: `MEM_ADDR_WIDTH];
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (`MEM_ADDR_WIDTH + `CACHE_LINE_WIDTH),
|
||||
.SIZE (MRSQ_SIZE),
|
||||
.BUFFERED (1)
|
||||
) mem_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (mrsq_push),
|
||||
.pop (mrsq_pop),
|
||||
.data_in ({mem_rsp_tag_out_a, mem_rsp_data_out}),
|
||||
.data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}),
|
||||
.empty (mrsq_empty),
|
||||
.full (mrsq_full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (mem_rsp_tag_out)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`LINE_SELECT_BITS-1:0] flush_addr;
|
||||
wire flush_enable;
|
||||
|
||||
VX_flush_ctrl #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS)
|
||||
) flush_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.addr_out (flush_addr),
|
||||
.valid_out (flush_enable)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_valid;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel;
|
||||
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
|
||||
@@ -113,44 +309,6 @@ module VX_cache #(
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
||||
|
||||
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_qual;
|
||||
wire [`LINE_SELECT_BITS-1:0] flush_addr;
|
||||
wire flush_enable;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_pipe_stall_per_bank;
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire mrsq_full, mrsq_empty;
|
||||
wire mrsq_push, mrsq_pop;
|
||||
|
||||
assign mrsq_push = mem_rsp_valid && mem_rsp_ready;
|
||||
assign mem_rsp_ready = !mrsq_full;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (MEM_TAG_WIDTH + `CACHE_LINE_WIDTH),
|
||||
.SIZE (MRSQ_SIZE),
|
||||
.BUFFERED (1)
|
||||
) mem_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (mrsq_push),
|
||||
.pop (mrsq_pop),
|
||||
.data_in ({mem_rsp_tag, mem_rsp_data}),
|
||||
.data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}),
|
||||
.empty (mrsq_empty),
|
||||
.full (mrsq_full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
`UNUSED_VAR (mem_rsp_tag_qual)
|
||||
assign mrsq_pop = !mrsq_empty && per_bank_mem_rsp_ready;
|
||||
@@ -158,21 +316,6 @@ module VX_cache #(
|
||||
assign mrsq_pop = !mrsq_empty && per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)];
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_flush_ctrl #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS)
|
||||
) flush_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset || flush),
|
||||
.addr_out (flush_addr),
|
||||
.valid_out (flush_enable)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_cache_core_req_bank_sel #(
|
||||
.CACHE_ID (CACHE_ID),
|
||||
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
||||
@@ -188,13 +331,13 @@ module VX_cache #(
|
||||
`ifdef PERF_ENABLE
|
||||
.bank_stalls(perf_cache_if.bank_stalls),
|
||||
`endif
|
||||
.core_req_valid (core_req_valid),
|
||||
.core_req_rw (core_req_rw),
|
||||
.core_req_addr (core_req_addr),
|
||||
.core_req_byteen(core_req_byteen),
|
||||
.core_req_data (core_req_data),
|
||||
.core_req_tag (core_req_tag),
|
||||
.core_req_ready (core_req_ready),
|
||||
.core_req_valid (core_req_valid_out),
|
||||
.core_req_rw (core_req_rw_out),
|
||||
.core_req_addr (core_req_addr_out),
|
||||
.core_req_byteen(core_req_byteen_out),
|
||||
.core_req_data (core_req_data_out),
|
||||
.core_req_tag (core_req_tag_out),
|
||||
.core_req_ready (core_req_ready_out),
|
||||
.per_bank_core_req_valid (per_bank_core_req_valid),
|
||||
.per_bank_core_req_rw (per_bank_core_req_rw),
|
||||
.per_bank_core_req_addr (per_bank_core_req_addr),
|
||||
@@ -365,10 +508,10 @@ module VX_cache #(
|
||||
.per_bank_core_rsp_tag (per_bank_core_rsp_tag),
|
||||
.per_bank_core_rsp_tid (per_bank_core_rsp_tid),
|
||||
.per_bank_core_rsp_ready (per_bank_core_rsp_ready),
|
||||
.core_rsp_valid (core_rsp_valid),
|
||||
.core_rsp_tag (core_rsp_tag),
|
||||
.core_rsp_data (core_rsp_data),
|
||||
.core_rsp_ready (core_rsp_ready)
|
||||
.core_rsp_valid (core_rsp_valid_in),
|
||||
.core_rsp_tag (core_rsp_tag_in),
|
||||
.core_rsp_data (core_rsp_data_in),
|
||||
.core_rsp_ready (core_rsp_ready_in)
|
||||
);
|
||||
|
||||
wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in;
|
||||
@@ -386,12 +529,13 @@ module VX_cache #(
|
||||
.valid_in (per_bank_mem_req_valid),
|
||||
.data_in (data_in),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.valid_out (mem_req_valid),
|
||||
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data}),
|
||||
.ready_out (mem_req_ready)
|
||||
.valid_out (mem_req_valid_in),
|
||||
.data_out ({mem_req_addr_in, mem_req_rw_in, mem_req_byteen_in, mem_req_data_in}),
|
||||
.ready_out (mem_req_ready_in)
|
||||
);
|
||||
|
||||
assign mem_req_tag = mem_req_addr;
|
||||
// build memory tag adding shared memory and non-cacheable flags
|
||||
assign mem_req_tag_in = MEM_TAG_WIDTH'({mem_req_addr_in, 1'b0, 1'b0});
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_reads, core_writes
|
||||
|
||||
301
hw/rtl/cache/VX_nc_bypass.v
vendored
Normal file
301
hw/rtl/cache/VX_nc_bypass.v
vendored
Normal file
@@ -0,0 +1,301 @@
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_nc_bypass #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter NUM_RSP_TAGS = 0,
|
||||
parameter NC_TAG_BIT = 0,
|
||||
|
||||
parameter CORE_ADDR_WIDTH = 1,
|
||||
parameter CORE_DATA_SIZE = 1,
|
||||
parameter CORE_TAG_WIDTH = 1,
|
||||
|
||||
parameter MEM_ADDR_WIDTH = 1,
|
||||
parameter MEM_DATA_SIZE = 1,
|
||||
parameter MEM_TAG_WIDTH = 1,
|
||||
|
||||
parameter CORE_DATA_WIDTH = CORE_DATA_SIZE * 8,
|
||||
parameter MEM_DATA_WIDTH = MEM_DATA_SIZE * 8
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Core request in
|
||||
input wire [NUM_REQS-1:0] core_req_valid_in,
|
||||
input wire [NUM_REQS-1:0] core_req_rw_in,
|
||||
input wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_in,
|
||||
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_in,
|
||||
output wire [NUM_REQS-1:0] core_req_ready_in,
|
||||
|
||||
// Core request out
|
||||
output wire [NUM_REQS-1:0] core_req_valid_out,
|
||||
output wire [NUM_REQS-1:0] core_req_rw_out,
|
||||
output wire [NUM_REQS-1:0][CORE_ADDR_WIDTH-1:0] core_req_addr_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_SIZE-1:0] core_req_byteen_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_req_data_out,
|
||||
output wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag_out,
|
||||
input wire [NUM_REQS-1:0] core_req_ready_out,
|
||||
|
||||
// Core response in
|
||||
input wire [NUM_REQS-1:0] core_rsp_valid_in,
|
||||
input wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_in,
|
||||
input wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_in,
|
||||
output wire [NUM_RSP_TAGS-1:0] core_rsp_ready_in,
|
||||
|
||||
// Core response out
|
||||
output wire [NUM_REQS-1:0] core_rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out,
|
||||
output wire [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out,
|
||||
input wire [NUM_RSP_TAGS-1:0] core_rsp_ready_out,
|
||||
|
||||
// Memory request in
|
||||
input wire mem_req_valid_in,
|
||||
input wire mem_req_rw_in,
|
||||
input wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_in,
|
||||
input wire [MEM_DATA_SIZE-1:0] mem_req_byteen_in,
|
||||
input wire [MEM_DATA_WIDTH-1:0] mem_req_data_in,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_req_tag_in,
|
||||
output wire mem_req_ready_in,
|
||||
|
||||
// Memory request out
|
||||
output wire mem_req_valid_out,
|
||||
output wire mem_req_rw_out,
|
||||
output wire [MEM_ADDR_WIDTH-1:0] mem_req_addr_out,
|
||||
output wire [MEM_DATA_SIZE-1:0] mem_req_byteen_out,
|
||||
output wire [MEM_DATA_WIDTH-1:0] mem_req_data_out,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag_out,
|
||||
input wire mem_req_ready_out,
|
||||
|
||||
// Memory response in
|
||||
input wire mem_rsp_valid_in,
|
||||
input wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_in,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_in,
|
||||
output wire mem_rsp_ready_in,
|
||||
|
||||
// Memory response out
|
||||
output wire mem_rsp_valid_out,
|
||||
output wire [MEM_DATA_WIDTH-1:0] mem_rsp_data_out,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_out,
|
||||
input wire mem_rsp_ready_out
|
||||
);
|
||||
`STATIC_ASSERT((NUM_RSP_TAGS == 1 || NUM_RSP_TAGS == NUM_REQS), ("invalid paramter"))
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
localparam CORE_REQ_TIDW = $clog2(NUM_REQS);
|
||||
|
||||
localparam CORE_LDATAW = $clog2(CORE_DATA_WIDTH);
|
||||
localparam MEM_LDATAW = $clog2(MEM_DATA_WIDTH);
|
||||
localparam D = MEM_LDATAW - CORE_LDATAW;
|
||||
localparam P = 2**D;
|
||||
|
||||
// core request handling
|
||||
|
||||
reg [NUM_REQS-1:0] core_req_valid_out_r;
|
||||
reg [NUM_REQS-1:0] core_req_ready_in_r;
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid_in_nc;
|
||||
wire [CORE_REQ_TIDW-1:0] core_req_nc_tid;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid_in_nc[i] = core_req_valid_in[i] && core_req_tag_in[i][NC_TAG_BIT];
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (core_req_valid_in_nc[i]) begin
|
||||
core_req_valid_out_r[i] = 0;
|
||||
core_req_ready_in_r[i] = mem_req_ready_out && (core_req_nc_tid == CORE_REQ_TIDW'(i));
|
||||
end else begin
|
||||
core_req_valid_out_r[i] = core_req_valid_in[i];
|
||||
core_req_ready_in_r[i] = core_req_ready_out[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign core_req_valid_out = core_req_valid_out_r;
|
||||
assign core_req_rw_out = core_req_rw_in;
|
||||
assign core_req_addr_out = core_req_addr_in;
|
||||
assign core_req_byteen_out = core_req_byteen_in;
|
||||
assign core_req_data_out = core_req_data_in;
|
||||
assign core_req_tag_out = core_req_tag_in;
|
||||
assign core_req_ready_in = core_req_ready_in_r;
|
||||
|
||||
// memory request handling
|
||||
|
||||
reg mem_req_valid_out_r;
|
||||
reg mem_req_rw_out_r;
|
||||
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_out_r;
|
||||
reg [MEM_ADDR_WIDTH-1:0] mem_req_addr_out_r;
|
||||
reg [MEM_DATA_WIDTH-1:0] mem_req_data_out_r;
|
||||
reg [MEM_TAG_WIDTH-1:0] mem_req_tag_out_r;
|
||||
reg mem_req_ready_in_r;
|
||||
|
||||
wire core_req_nc_valid;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) core_req_sel (
|
||||
.data_in (core_req_valid_in_nc),
|
||||
.index (core_req_nc_tid),
|
||||
`UNUSED_PIN (onehot),
|
||||
.valid_out (core_req_nc_valid)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
if (core_req_nc_valid) begin
|
||||
mem_req_valid_out_r = 1;
|
||||
mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid];
|
||||
mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH];
|
||||
for (integer i = 0; i < P; ++i) begin
|
||||
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid];
|
||||
end
|
||||
mem_req_ready_in_r = 0;
|
||||
end else begin
|
||||
mem_req_valid_out_r = mem_req_valid_in;
|
||||
mem_req_rw_out_r = mem_req_rw_in;
|
||||
mem_req_addr_out_r = mem_req_addr_in;
|
||||
mem_req_data_out_r = mem_req_data_in;
|
||||
mem_req_ready_in_r = mem_req_ready_out;
|
||||
end
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0];
|
||||
always @(*) begin
|
||||
if (core_req_nc_valid) begin
|
||||
mem_req_byteen_out_r = 0;
|
||||
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid];
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]});
|
||||
end else begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (core_req_nc_valid) begin
|
||||
mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid];
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]});
|
||||
end else begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_req_valid_out = mem_req_valid_out_r;
|
||||
assign mem_req_rw_out = mem_req_rw_out_r;
|
||||
assign mem_req_addr_out = mem_req_addr_out_r;
|
||||
assign mem_req_byteen_out = mem_req_byteen_out_r;
|
||||
assign mem_req_data_out = mem_req_data_out_r;
|
||||
assign mem_req_tag_out = mem_req_tag_out_r;
|
||||
assign mem_req_ready_in = mem_req_ready_in_r;
|
||||
|
||||
// core response handling
|
||||
|
||||
reg [NUM_REQS-1:0] core_rsp_valid_out_r;
|
||||
reg [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out_r;
|
||||
reg [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out_r;
|
||||
reg [NUM_RSP_TAGS-1:0] core_rsp_ready_in_r;
|
||||
|
||||
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
|
||||
|
||||
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
core_rsp_valid_out_r = 0;
|
||||
core_rsp_valid_out_r[rsp_tid] = 1;
|
||||
for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin
|
||||
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
|
||||
end
|
||||
core_rsp_ready_in_r = 0;
|
||||
end else begin
|
||||
core_rsp_valid_out_r = core_rsp_valid_in;
|
||||
core_rsp_tag_out_r = core_rsp_tag_in;
|
||||
core_rsp_ready_in_r = core_rsp_ready_out;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
core_rsp_valid_out_r = 1;
|
||||
core_rsp_tag_out_r = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
|
||||
core_rsp_ready_in_r = 0;
|
||||
end else begin
|
||||
core_rsp_valid_out_r = core_rsp_valid_in;
|
||||
core_rsp_tag_out_r = core_rsp_tag_in;
|
||||
core_rsp_ready_in_r = core_rsp_ready_out;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D];
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_rsp_data_out_r[i] = mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
core_rsp_data_out_r = core_rsp_data_in;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_rsp_data_out_r[i] = mem_rsp_data_in;
|
||||
end
|
||||
end else begin
|
||||
core_rsp_data_out_r = core_rsp_data_in;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign core_rsp_valid_out = core_rsp_valid_out_r;
|
||||
assign core_rsp_data_out = core_rsp_data_out_r;
|
||||
assign core_rsp_tag_out = core_rsp_tag_out_r;
|
||||
assign core_rsp_ready_in = core_rsp_ready_in_r;
|
||||
|
||||
// memory response handling
|
||||
|
||||
reg mem_rsp_valid_out_r;
|
||||
reg mem_rsp_ready_in_r;
|
||||
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
mem_rsp_valid_out_r = 0;
|
||||
end else begin
|
||||
mem_rsp_valid_out_r = mem_rsp_valid_in;
|
||||
end
|
||||
end
|
||||
|
||||
if (NUM_RSP_TAGS > 1) begin
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
mem_rsp_ready_in_r = core_rsp_ready_out[rsp_tid];
|
||||
end else begin
|
||||
mem_rsp_ready_in_r = mem_rsp_ready_out;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
mem_rsp_ready_in_r = core_rsp_ready_out;
|
||||
end else begin
|
||||
mem_rsp_ready_in_r = mem_rsp_ready_out;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_rsp_valid_out = mem_rsp_valid_out_r;
|
||||
assign mem_rsp_data_out = mem_rsp_data_in;
|
||||
assign mem_rsp_tag_out = mem_rsp_tag_in;
|
||||
assign mem_rsp_ready_in = mem_rsp_ready_in_r;
|
||||
|
||||
endmodule
|
||||
@@ -111,9 +111,9 @@
|
||||
"!cci_pending_writes_full": 1,
|
||||
"?afu_mem_req_fire": 1,
|
||||
"afu_mem_req_addr": 26,
|
||||
"afu_mem_req_tag": 28,
|
||||
"afu_mem_req_tag": 30,
|
||||
"?afu_mem_rsp_fire": 1,
|
||||
"afu_mem_rsp_tag": 28
|
||||
"afu_mem_rsp_tag": 30
|
||||
},
|
||||
"afu/vortex": {
|
||||
"!reset": 1,
|
||||
@@ -167,10 +167,10 @@
|
||||
"dcache_req_rw": 1,
|
||||
"dcache_req_byteen":"`NUM_THREADS * 4",
|
||||
"dcache_req_data": "`NUM_THREADS * 32",
|
||||
"dcache_req_tag":"`DCORE_TAG_ID_BITS",
|
||||
"dcache_req_tag":"`LSUQ_ADDR_BITS",
|
||||
"?dcache_rsp_fire":"`NUM_THREADS",
|
||||
"dcache_rsp_data":"`NUM_THREADS * 32",
|
||||
"dcache_rsp_tag":"`DCORE_TAG_ID_BITS"
|
||||
"dcache_rsp_tag":"`LSUQ_ADDR_BITS"
|
||||
},
|
||||
"afu/vortex/cluster/core/pipeline/issue": {
|
||||
"?issue_fire": 1,
|
||||
|
||||
@@ -309,7 +309,7 @@ void Simulator::run() {
|
||||
}
|
||||
|
||||
int Simulator::get_last_wb_value(int reg) const {
|
||||
return (int)vortex_->Vortex->genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||
return (int)vortex_->Vortex->genblk2__BRA__0__KET____DOT__cluster->genblk2__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
|
||||
}
|
||||
|
||||
void Simulator::load_bin(const char* program_file) {
|
||||
|
||||
@@ -50,7 +50,7 @@ private:
|
||||
int cycles_left;
|
||||
std::array<uint8_t, MEM_BLOCK_SIZE> block;
|
||||
uint32_t addr;
|
||||
uint32_t tag;
|
||||
uint64_t tag;
|
||||
} mem_req_t;
|
||||
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
|
||||
@@ -321,8 +321,8 @@ Word Core::dcache_read(Addr addr, Size size) {
|
||||
++loads_;
|
||||
Word data = 0;
|
||||
#ifdef SM_ENABLE
|
||||
if ((addr >= (SHARED_MEM_BASE_ADDR - SMEM_SIZE))
|
||||
&& ((addr + 3) < SHARED_MEM_BASE_ADDR)) {
|
||||
if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE))
|
||||
&& ((addr + 3) < SMEM_BASE_ADDR)) {
|
||||
shared_mem_.read(addr & (SMEM_SIZE-1), &data, size);
|
||||
return data;
|
||||
}
|
||||
@@ -334,8 +334,8 @@ Word Core::dcache_read(Addr addr, Size size) {
|
||||
void Core::dcache_write(Addr addr, Word data, Size size) {
|
||||
++stores_;
|
||||
#ifdef SM_ENABLE
|
||||
if ((addr >= (SHARED_MEM_BASE_ADDR - SMEM_SIZE))
|
||||
&& ((addr + 3) < SHARED_MEM_BASE_ADDR)) {
|
||||
if ((addr >= (SMEM_BASE_ADDR - SMEM_SIZE))
|
||||
&& ((addr + 3) < SMEM_BASE_ADDR)) {
|
||||
shared_mem_.write(addr & (SMEM_SIZE-1), &data, size);
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user