Merge branch 'fpga_synthesis' of github.gatech.edu:casl/Vortex into fpga_synthesis
This commit is contained in:
@@ -7,7 +7,7 @@ CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-error
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/dummy -lOpenCL -lvortex
|
||||
|
||||
PROJECT = bfs
|
||||
|
||||
@@ -25,7 +25,7 @@ run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
Binary file not shown.
1
benchmarks/new_opencl/compiler/lib/libOpenCL.so
Symbolic link
1
benchmarks/new_opencl/compiler/lib/libOpenCL.so
Symbolic link
@@ -0,0 +1 @@
|
||||
libOpenCL.so.2
|
||||
Binary file not shown.
1
benchmarks/new_opencl/compiler/lib/libOpenCL.so.2
Symbolic link
1
benchmarks/new_opencl/compiler/lib/libOpenCL.so.2
Symbolic link
@@ -0,0 +1 @@
|
||||
libOpenCL.so.2.5.0
|
||||
Binary file not shown.
@@ -7,7 +7,7 @@ CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-error
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/dummy -lOpenCL -lvortex
|
||||
|
||||
PROJECT = guassian
|
||||
|
||||
@@ -25,7 +25,7 @@ run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
@@ -7,7 +7,7 @@ CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-error
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/dummy -lOpenCL -lvortex
|
||||
|
||||
PROJECT = kmeans
|
||||
|
||||
@@ -25,7 +25,7 @@ run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
@@ -7,7 +7,7 @@ CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-error
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/dummy -lOpenCL -lvortex
|
||||
|
||||
PROJECT = nearn
|
||||
|
||||
@@ -25,7 +25,7 @@ run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
@@ -7,7 +7,7 @@ CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-error
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/dummy -lOpenCL -lvortex
|
||||
|
||||
PROJECT = saxpy
|
||||
|
||||
@@ -25,7 +25,7 @@ run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
@@ -7,7 +7,7 @@ CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-error
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/dummy -lOpenCL -lvortex
|
||||
|
||||
PROJECT = sfilter
|
||||
|
||||
@@ -25,7 +25,7 @@ run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
@@ -7,7 +7,7 @@ CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-error
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/dummy -lOpenCL -lvortex
|
||||
|
||||
PROJECT = sgemm
|
||||
|
||||
@@ -25,7 +25,7 @@ run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
@@ -7,7 +7,7 @@ CXXFLAGS += -std=c++11 -O0 -g -fpermissive -Wall -Wextra -pedantic -Wfatal-error
|
||||
|
||||
CXXFLAGS += -I$(POCLRT_PATH)/include
|
||||
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/simx -lOpenCL -lvortex
|
||||
LDFLAGS += -L$(POCLRT_PATH)/lib -L$(DRIVER_PATH)/dummy -lOpenCL -lvortex
|
||||
|
||||
PROJECT = vecadd
|
||||
|
||||
@@ -25,7 +25,7 @@ run-fpga: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCLRT_PATH)/lib:$(DRIVER_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
Binary file not shown.
@@ -1,35 +1,36 @@
|
||||
|
||||
BUILD_DIR=build_ase
|
||||
ASE_BUILD_DIR=build_ase
|
||||
FPGA_BUILD_DIR=build_fpga
|
||||
|
||||
all: ase fpga
|
||||
|
||||
ase: setup-ase
|
||||
make -C $(BUILD_DIR)
|
||||
make -C $(ASE_BUILD_DIR)
|
||||
|
||||
fpga: setup-fpga
|
||||
cd build_fpga && qsub-synth
|
||||
cd $(FPGA_BUILD_DIR) && qsub-synth
|
||||
|
||||
setup-ase: build_ase/Makefile
|
||||
setup-ase: $(ASE_BUILD_DIR)/Makefile
|
||||
|
||||
setup-fpga: build_fpga/build/dcp.qpf
|
||||
setup-fpga: $(FPGA_BUILD_DIR)/build/dcp.qpf
|
||||
|
||||
build_ase/Makefile:
|
||||
afu_sim_setup --s sources.txt build_ase
|
||||
$(ASE_BUILD_DIR)/Makefile:
|
||||
afu_sim_setup --s sources.txt $(ASE_BUILD_DIR)
|
||||
|
||||
build_fpga/build/dcp.qpf:
|
||||
afu_synth_setup -s sources.txt build_fpga
|
||||
$(FPGA_BUILD_DIR)/build/dcp.qpf:
|
||||
afu_synth_setup -s sources.txt $(FPGA_BUILD_DIR)
|
||||
|
||||
run-ase:
|
||||
cd build_ase && make sim
|
||||
cd $(ASE_BUILD_DIR) && make sim
|
||||
|
||||
wave:
|
||||
vsim -view build_ase/work/vsim.wlf -do wave.do
|
||||
vsim -view $(ASE_BUILD_DIR)/work/vsim.wlf -do wave.do
|
||||
|
||||
run-fpga:
|
||||
# TODO
|
||||
|
||||
clean-ase:
|
||||
rm -rf build_ase
|
||||
rm -rf $(ASE_BUILD_DIR)
|
||||
|
||||
clean-fpga:
|
||||
rm -rf build_fpga
|
||||
rm -rf $(FPGA_BUILD_DIR)
|
||||
@@ -68,6 +68,7 @@ vortex_afu.json
|
||||
../../rtl/VX_cache/VX_cache_miss_resrv.v
|
||||
../../rtl/VX_cache/VX_fill_invalidator.v
|
||||
../../rtl/VX_cache/VX_tag_data_structure.v
|
||||
../../rtl/VX_cache/VX_prefetcher.v
|
||||
../../rtl/cache/VX_generic_pe.v
|
||||
../../rtl/cache/cache_set.v
|
||||
../../rtl/cache/VX_d_cache.v
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
"cmd-type-read": 1,
|
||||
"cmd-type-write": 2,
|
||||
"cmd-type-run": 3,
|
||||
"cmd-type-snoop": 4,
|
||||
"cmd-type-clflush": 4,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
|
||||
@@ -34,7 +34,9 @@ module vortex_afu #(
|
||||
);
|
||||
|
||||
localparam AVS_RD_QUEUE_SIZE = 16;
|
||||
localparam VX_SNOOPING_DELAY = 300;
|
||||
|
||||
localparam VX_SNOOP_DELAY = 300;
|
||||
localparam VX_SNOOP_LEVELS = 2;
|
||||
|
||||
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
|
||||
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
|
||||
@@ -42,7 +44,7 @@ localparam AFU_ID_H = 16'h0004; // AFU ID Higher
|
||||
localparam CMD_TYPE_READ = `AFU_IMAGE_CMD_TYPE_READ;
|
||||
localparam CMD_TYPE_WRITE = `AFU_IMAGE_CMD_TYPE_WRITE;
|
||||
localparam CMD_TYPE_RUN = `AFU_IMAGE_CMD_TYPE_RUN;
|
||||
localparam CMD_TYPE_SNOOP = `AFU_IMAGE_CMD_TYPE_SNOOP;
|
||||
localparam CMD_TYPE_CLFLUSH = `AFU_IMAGE_CMD_TYPE_CLFLUSH;
|
||||
|
||||
localparam MMIO_CSR_CMD = `AFU_IMAGE_MMIO_CSR_CMD;
|
||||
localparam MMIO_CSR_STATUS = `AFU_IMAGE_MMIO_CSR_STATUS;
|
||||
@@ -52,13 +54,12 @@ localparam MMIO_CSR_DATA_SIZE = `AFU_IMAGE_MMIO_CSR_DATA_SIZE;
|
||||
|
||||
logic [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
typedef enum logic[2:0] {
|
||||
typedef enum logic[3:0] {
|
||||
STATE_IDLE,
|
||||
STATE_READ,
|
||||
STATE_WRITE,
|
||||
STATE_RUN,
|
||||
STATE_SNOOP1,
|
||||
STATE_SNOOP2
|
||||
STATE_CLFLUSH
|
||||
} state_t;
|
||||
|
||||
state_t state;
|
||||
@@ -192,7 +193,7 @@ logic [31:0] cci_write_ctr;
|
||||
logic [31:0] avs_read_ctr;
|
||||
logic [31:0] avs_write_ctr;
|
||||
logic [31:0] vx_snoop_ctr;
|
||||
logic [31:0] vx_snoop_delay;
|
||||
logic [9:0] vx_snoop_delay;
|
||||
logic vx_reset;
|
||||
|
||||
always_ff @(posedge clk)
|
||||
@@ -210,21 +211,21 @@ begin
|
||||
STATE_IDLE: begin
|
||||
case (csr_cmd)
|
||||
CMD_TYPE_READ: begin
|
||||
$display("%t: CMD READ: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
|
||||
$display("%t: STATE READ: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
|
||||
state <= STATE_READ;
|
||||
end
|
||||
CMD_TYPE_WRITE: begin
|
||||
$display("%t: CMD WRITE: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
|
||||
$display("%t: STATE WRITE: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
|
||||
state <= STATE_WRITE;
|
||||
end
|
||||
CMD_TYPE_RUN: begin
|
||||
$display("%t: CMD START", $time);
|
||||
$display("%t: STATE START", $time);
|
||||
vx_reset <= 1;
|
||||
state <= STATE_RUN;
|
||||
end
|
||||
CMD_TYPE_SNOOP: begin
|
||||
$display("%t: CMD SNOOP: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size);
|
||||
state <= STATE_SNOOP1;
|
||||
CMD_TYPE_CLFLUSH: begin
|
||||
$display("%t: STATE CFLUSH: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size);
|
||||
state <= STATE_CLFLUSH;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
@@ -250,15 +251,8 @@ begin
|
||||
end
|
||||
end
|
||||
|
||||
STATE_SNOOP1: begin
|
||||
if (vx_snoop_delay >= VX_SNOOPING_DELAY)
|
||||
begin
|
||||
state <= STATE_SNOOP2;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_SNOOP2: begin
|
||||
if (vx_snoop_delay >= VX_SNOOPING_DELAY)
|
||||
STATE_CLFLUSH: begin
|
||||
if (vx_snoop_delay >= VX_SNOOP_DELAY)
|
||||
begin
|
||||
state <= STATE_IDLE;
|
||||
end
|
||||
@@ -320,7 +314,7 @@ begin
|
||||
end
|
||||
end
|
||||
|
||||
STATE_RUN: begin
|
||||
STATE_RUN, STATE_CLFLUSH: begin
|
||||
if (vx_dram_req_read
|
||||
&& !vx_dram_req_delay)
|
||||
begin
|
||||
@@ -348,15 +342,20 @@ begin
|
||||
end
|
||||
|
||||
// Vortex DRAM requests stalling
|
||||
assign vx_dram_req_delay = !((STATE_RUN == state)
|
||||
&& !avs_waitrequest
|
||||
&& !avs_raq_full
|
||||
&& !avs_rdq_full);
|
||||
|
||||
// Vortex DRAM fill response
|
||||
logic vortex_enabled;
|
||||
|
||||
always_comb
|
||||
begin
|
||||
vx_dram_fill_rsp = (STATE_RUN == state) && !avs_rdq_empty && vx_dram_fill_accept;
|
||||
vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
|
||||
vx_dram_req_delay = !vortex_enabled || avs_waitrequest || avs_raq_full || avs_rdq_full;
|
||||
end
|
||||
|
||||
// Vortex DRAM fill response
|
||||
|
||||
always_comb
|
||||
begin
|
||||
vx_dram_fill_rsp = vortex_enabled && !avs_rdq_empty && vx_dram_fill_accept;
|
||||
vx_dram_fill_rsp_addr = (avs_raq_dout << 6);
|
||||
{>>{vx_dram_fill_rsp_data}} = avs_rdq_dout;
|
||||
end
|
||||
@@ -524,32 +523,25 @@ begin
|
||||
else begin
|
||||
if (STATE_IDLE == state)
|
||||
begin
|
||||
vx_snoop_ctr <= 0;
|
||||
vx_snoop_ctr <= 0;
|
||||
vx_snoop_delay <= 0;
|
||||
end
|
||||
|
||||
vx_snp_req <= 0;
|
||||
|
||||
if ((STATE_SNOOP1 == state
|
||||
|| STATE_SNOOP2 == state)
|
||||
if ((STATE_CLFLUSH == state)
|
||||
&& vx_snoop_ctr < csr_data_size
|
||||
&& vx_snp_req_delay)
|
||||
&& !vx_snp_req_delay)
|
||||
begin
|
||||
vx_snp_req <= 1;
|
||||
vx_snoop_ctr <= vx_snoop_ctr + 1;
|
||||
vx_snp_req_addr <= (csr_mem_addr + vx_snoop_ctr) << 6;
|
||||
vx_snp_req <= 1;
|
||||
vx_snoop_ctr <= vx_snoop_ctr + 1;
|
||||
end
|
||||
|
||||
if ((vx_snoop_ctr >= csr_data_size)
|
||||
&& (vx_snoop_delay < VX_SNOOPING_DELAY))
|
||||
if (vx_snoop_ctr == csr_data_size)
|
||||
begin
|
||||
vx_snoop_delay <= vx_snoop_delay + 1;
|
||||
end
|
||||
|
||||
if (vx_snoop_delay >= VX_SNOOPING_DELAY)
|
||||
begin
|
||||
vx_snoop_ctr <= 0;
|
||||
vx_snoop_delay <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -27,12 +27,17 @@ add wave -noupdate -label avs_raq_full /ase_top/ase_top_generic/platform_shim_cc
|
||||
add wave -noupdate -label avs_rdq_full /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_full
|
||||
add wave -noupdate -label avs_raq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_raq_empty
|
||||
add wave -noupdate -label avs_rdq_empty /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/avs_rdq_empty
|
||||
add wave -noupdate -label vortex_enabled /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vortex_enabled
|
||||
add wave -noupdate -label vx_reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/reset
|
||||
add wave -noupdate -label vx_dram_req_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_read
|
||||
add wave -noupdate -label vx_dram_req_write /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_write
|
||||
add wave -noupdate -label vx_dram_req_delay /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_delay
|
||||
add wave -noupdate -label vx_dram_req_read /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_read
|
||||
add wave -noupdate -label vx_reset /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/reset
|
||||
add wave -noupdate -label vx_dram_req_addr -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_addr
|
||||
add wave -noupdate -label vx_draw_req_data -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_req_data
|
||||
add wave -noupdate -label out_dram_fill_rsp /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_dram_fill_rsp
|
||||
add wave -noupdate -label out_dram_fill_accept /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_dram_fill_accept
|
||||
add wave -noupdate -label vx_draw_fill_rsp_data -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_fill_rsp_data
|
||||
add wave -noupdate -label vx_dram_fill_rsp_addr -radix hexadecimal /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_dram_fill_rsp_addr
|
||||
add wave -noupdate -label llc_snp_req /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/llc_snp_req
|
||||
add wave -noupdate -label llc_snp_req_delay /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/llc_snp_req_delay
|
||||
add wave -noupdate -label out_break /ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/out_ebreak
|
||||
@@ -45,7 +50,7 @@ add wave -noupdate -label warp_stalled {/ase_top/ase_top_generic/platform_shim_c
|
||||
add wave -noupdate -label warp_lock {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/warp_lock}
|
||||
add wave -noupdate -label use_active {/ase_top/ase_top_generic/platform_shim_ccip_std_afu/ccip_std_afu/vortex_afu_inst/vx_soc/genblk1/Vortex_Cluster/genblk1[0]/vortex_core/vx_front_end/vx_fetch/warp_scheduler/use_active}
|
||||
TreeUpdate [SetDefaultTree]
|
||||
WaveRestoreCursors {{Cursor 2} {66234495 ps} 0}
|
||||
WaveRestoreCursors {{Cursor 2} {360293 ps} 0}
|
||||
quietly wave cursor active 1
|
||||
configure wave -namecolwidth 195
|
||||
configure wave -valuecolwidth 100
|
||||
@@ -61,4 +66,4 @@ configure wave -griddelta 40
|
||||
configure wave -timeline 0
|
||||
configure wave -timelineunits ps
|
||||
update
|
||||
WaveRestoreZoom {66041656 ps} {66406344 ps}
|
||||
WaveRestoreZoom {346453 ps} {711141 ps}
|
||||
|
||||
@@ -65,4 +65,4 @@ clean:
|
||||
|
||||
ifneq ($(MAKECMDGOALS),clean)
|
||||
-include .depend
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -22,8 +22,8 @@
|
||||
|
||||
#define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ
|
||||
#define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE
|
||||
#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN
|
||||
#define CMD_TYPE_SNOOP AFU_IMAGE_CMD_TYPE_SNOOP
|
||||
#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN
|
||||
#define CMD_TYPE_CLFLUSH AFU_IMAGE_CMD_TYPE_CLFLUSH
|
||||
|
||||
#define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4)
|
||||
#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4)
|
||||
@@ -313,7 +313,7 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) {
|
||||
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_SNOOP));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_CLFLUSH));
|
||||
|
||||
// Wait for the write operation to finish
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
|
||||
@@ -1,3 +1,21 @@
|
||||
RISCV_TOOL_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops)
|
||||
VX_RT_PATH ?= $(wildcard ../../../runtime)
|
||||
|
||||
VX_CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
|
||||
VX_CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
|
||||
VX_DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
|
||||
VX_CPY = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
|
||||
|
||||
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
|
||||
VX_STR = $(VX_RT_PATH)/startup/vx_start.S
|
||||
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
|
||||
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
|
||||
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
|
||||
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
|
||||
|
||||
VX_CFLAGS = -march=rv32im -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
|
||||
|
||||
VX_SRCS = kernel.c
|
||||
|
||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
@@ -11,6 +29,18 @@ SRCS = basic.cpp
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
kernel.dump: kernel.elf
|
||||
$(VX_DMP) -D kernel.elf > kernel.dump
|
||||
|
||||
kernel.hex: kernel.elf
|
||||
$(VX_CPY) -O ihex kernel.elf kernel.hex
|
||||
|
||||
kernel.bin: kernel.elf
|
||||
$(VX_CPY) -O binary kernel.elf kernel.bin
|
||||
|
||||
kernel.elf: $(SRCS)
|
||||
$(VX_CC) $(VX_CFLAGS) $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_SRCS) -I$(VX_RT_PATH) -o kernel.elf
|
||||
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../sw/dummy -lvortex -o $@
|
||||
|
||||
@@ -18,7 +48,7 @@ run-fpga: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-ase: $(PROJECT)
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -t 1
|
||||
ASE_LOG=0 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
run-rtlsim: $(PROJECT)
|
||||
LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
|
||||
|
||||
Binary file not shown.
@@ -27,11 +27,11 @@ uint64_t shuffle(int i, uint64_t value) {
|
||||
return (value << i) | (value & ((1 << i)-1));;
|
||||
}
|
||||
|
||||
int run_test_0(vx_buffer_h sbuf,
|
||||
vx_buffer_h dbuf,
|
||||
uint32_t address,
|
||||
uint64_t value,
|
||||
int num_blocks) {
|
||||
int run_memcopy_test(vx_buffer_h sbuf,
|
||||
vx_buffer_h dbuf,
|
||||
uint32_t address,
|
||||
uint64_t value,
|
||||
int num_blocks) {
|
||||
int ret;
|
||||
int errors = 0;
|
||||
|
||||
@@ -73,8 +73,29 @@ int run_test_0(vx_buffer_h sbuf,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int run_test_1(vx_device_h device, const char* program) {
|
||||
int run_kernel_test(vx_device_h device,
|
||||
vx_buffer_h sbuf,
|
||||
vx_buffer_h dbuf,
|
||||
const char* program) {
|
||||
int ret;
|
||||
int errors = 0;
|
||||
|
||||
uint64_t seed = 0x0badf00d40ff40ff;
|
||||
int num_blocks = 4;
|
||||
|
||||
unsigned src_dev_addr = 0x10000000;
|
||||
unsigned dest_dev_addr = 0x20000000;
|
||||
|
||||
// write sbuf data
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed);
|
||||
}
|
||||
|
||||
// write buffer to local memory
|
||||
std::cout << "write buffer to local memory" << std::endl;
|
||||
ret = vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// upload program
|
||||
std::cout << "upload program" << std::endl;
|
||||
@@ -97,6 +118,37 @@ int run_test_1(vx_device_h device, const char* program) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// flush the caches
|
||||
std::cout << "flush the caches" << std::endl;
|
||||
ret = vx_flush_caches(device, dest_dev_addr, 64 * num_blocks);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// read buffer from local memory
|
||||
std::cout << "read buffer from local memory" << std::endl;
|
||||
ret = vx_copy_from_dev(dbuf, dest_dev_addr, 64 * num_blocks, 0);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
// verify result
|
||||
std::cout << "verify result" << std::endl;
|
||||
for (int i = 0; i < 8 * num_blocks; ++i) {
|
||||
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
|
||||
auto ref = shuffle(i, seed);
|
||||
if (curr != ref) {
|
||||
std::cout << "error @ " << std::hex << (dest_dev_addr + 64 * i)
|
||||
<< ": actual " << curr << ", expected " << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
||||
if (errors != 0) {
|
||||
std::cout << "Found " << errors << " errors!" << std::endl;
|
||||
std::cout << "FAILED!" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -147,27 +199,15 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// run tests
|
||||
if (0 == test || -1 == test) {
|
||||
std::cout << "run test suite 0" << std::endl;
|
||||
std::cout << "run memcopy test" << std::endl;
|
||||
|
||||
ret = run_test_0(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
|
||||
ret = run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = run_test_0(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 2);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = run_test_0(sbuf, dbuf, 0x20000000, 0xff00ff00ff00ff00, 4);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = run_test_0(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
|
||||
ret = run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8);
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
@@ -175,17 +215,8 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
if (1 == test || -1 == test) {
|
||||
std::cout << "run test suite 1" << std::endl;
|
||||
ret = run_test_1(device, "rv32ui-p-lw.bin");
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (2 == test || -1 == test) {
|
||||
std::cout << "run test suite 1" << std::endl;
|
||||
ret = run_test_1(device, "rv32ui-p-sw.bin");
|
||||
std::cout << "run kernel test" << std::endl;
|
||||
ret = run_kernel_test(device, sbuf, dbuf, "kernel.bin");
|
||||
if (ret != 0) {
|
||||
cleanup();
|
||||
return ret;
|
||||
|
||||
BIN
driver/tests/basic/rv32ui-p-sw.bin → driver/tests/basic/kernel.bin
Normal file → Executable file
BIN
driver/tests/basic/rv32ui-p-sw.bin → driver/tests/basic/kernel.bin
Normal file → Executable file
Binary file not shown.
9
driver/tests/basic/kernel.c
Normal file
9
driver/tests/basic/kernel.c
Normal file
@@ -0,0 +1,9 @@
|
||||
#include <stdint.h>
|
||||
|
||||
void main() {
|
||||
int64_t* x = (int64_t*)0x10000000;
|
||||
int64_t* y = (int64_t*)0x20000000;
|
||||
for (int i = 0; i < 8 * 4; ++i) {
|
||||
y[i] = x[i];
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
@@ -80,7 +80,7 @@ int run_test(vx_device_h device,
|
||||
int errors = 0;
|
||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
int ref = i * i;
|
||||
int ref = i + i;
|
||||
int cur = buf_ptr[i];
|
||||
if (cur != ref) {
|
||||
++errors;
|
||||
|
||||
Binary file not shown.
@@ -16,7 +16,7 @@ void kernel_body(void* arg) {
|
||||
unsigned i = ((wNo * _arg->num_threads) + tid) * _arg->stride;
|
||||
|
||||
for (unsigned j = 0; j < _arg->stride; ++j) {
|
||||
z[i+j] = x[i+j] * y[i+j];
|
||||
z[i+j] = x[i+j] + y[i+j];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BIN
driver/tests/demo/kernel.elf
Executable file
BIN
driver/tests/demo/kernel.elf
Executable file
Binary file not shown.
@@ -135,7 +135,7 @@ module VX_alu(
|
||||
|
||||
assign upper_immed = {in_upper_immed, {12{1'b0}}};
|
||||
|
||||
always @(in_alu_op or ALU_in1 or ALU_in2) begin
|
||||
always @(*) begin
|
||||
case(in_alu_op)
|
||||
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
|
||||
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
|
||||
@@ -177,7 +177,7 @@ module VX_alu(
|
||||
|
||||
assign upper_immed = {in_upper_immed, {12{1'b0}}};
|
||||
|
||||
always @(in_alu_op or ALU_in1 or ALU_in2) begin
|
||||
always @(*) begin
|
||||
case(in_alu_op)
|
||||
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
|
||||
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
|
||||
|
||||
@@ -106,6 +106,15 @@ module VX_bank
|
||||
);
|
||||
|
||||
|
||||
reg snoop_state = 0;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
snoop_state <= 0;
|
||||
end else begin
|
||||
snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID));
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
wire snrq_pop;
|
||||
@@ -498,13 +507,17 @@ module VX_bank
|
||||
.out ({is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , pc_st2 , inst_meta_st2 })
|
||||
);
|
||||
|
||||
|
||||
wire should_flush;
|
||||
wire dwbq_push;
|
||||
|
||||
wire cwbq_full;
|
||||
wire dwbq_full;
|
||||
wire ffsq_full;
|
||||
wire invalidate_fill;
|
||||
|
||||
// Enqueue to miss reserv if it's a valid miss
|
||||
assign miss_add = valid_st2 && !is_snp_st2 && miss_st2 && !mrvq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
assign miss_add = valid_st2 && !is_snp_st2 && miss_st2 && !mrvq_full && !(should_flush && dwbq_push) && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
assign miss_add_pc = pc_st2;
|
||||
assign miss_add_addr = addr_st2;
|
||||
assign miss_add_data = writeword_st2;
|
||||
@@ -535,12 +548,23 @@ module VX_bank
|
||||
.full (cwbq_full)
|
||||
);
|
||||
|
||||
assign should_flush = snoop_state && valid_st2 && (miss_add_mem_write != `NO_MEM_WRITE) && !is_snp_st2 && !is_fill_st2;
|
||||
// Enqueue to DWB Queue
|
||||
wire dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && !dwbq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
wire[31:0] dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK;
|
||||
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data = readdata_st2;
|
||||
assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush) && !dwbq_full && !((is_snp_st2 && valid_st2 && ffsq_full) ||((valid_st2 && !miss_st2) && cwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
wire[31:0] dwbq_req_addr;
|
||||
wire dwbq_empty;
|
||||
|
||||
|
||||
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data;
|
||||
if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
|
||||
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
|
||||
assign dwbq_req_addr = (should_flush && dwbq_push) ? (addr_st2) : ({readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK);
|
||||
end else begin
|
||||
assign dwbq_req_data = readdata_st2;
|
||||
assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK;
|
||||
end
|
||||
|
||||
|
||||
|
||||
wire possible_fill = valid_st2 && miss_st2 && !dram_fill_req_queue_full && !is_snp_st2;
|
||||
wire[31:0] fill_invalidator_addr = addr_st2 & `BASE_ADDR_MASK;
|
||||
VX_fill_invalidator #(
|
||||
|
||||
@@ -82,6 +82,9 @@ module VX_cache_dram_req_arb
|
||||
wire pref_pop;
|
||||
wire pref_valid;
|
||||
wire[31:0] pref_addr;
|
||||
|
||||
wire dwb_valid;
|
||||
wire dfqq_req;
|
||||
|
||||
assign pref_pop = !dwb_valid && !dfqq_req && !dram_req_delay && pref_valid;
|
||||
VX_prefetcher #(
|
||||
@@ -105,10 +108,8 @@ module VX_cache_dram_req_arb
|
||||
|
||||
);
|
||||
|
||||
wire dfqq_req;
|
||||
wire[31:0] dfqq_req_addr;
|
||||
wire dfqq_empty;
|
||||
wire dwb_valid;
|
||||
wire dfqq_empty;
|
||||
wire dfqq_pop = !dwb_valid && dfqq_req && !dram_req_delay; // If no dwb, and dfqq has valids, then pop
|
||||
wire dfqq_push = (|per_bank_dram_fill_req);
|
||||
|
||||
@@ -139,8 +140,8 @@ module VX_cache_dram_req_arb
|
||||
|
||||
|
||||
assign dram_req = dwb_valid || dfqq_req || pref_pop;
|
||||
assign dram_req_write = dwb_valid;
|
||||
assign dram_req_read = (dfqq_req && !dwb_valid) || pref_pop;
|
||||
assign dram_req_write = dwb_valid && dram_req;
|
||||
assign dram_req_read = ((dfqq_req && !dwb_valid) || pref_pop) && dram_req;
|
||||
assign dram_req_addr = (dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : (dfqq_req ? dfqq_req_addr : pref_addr)) & `BASE_ADDR_MASK;
|
||||
assign dram_req_size = BANK_LINE_SIZE_BYTES;
|
||||
assign {dram_req_data} = dwb_valid ? {per_bank_dram_wb_req_data[dwb_bank] }: 0;
|
||||
|
||||
@@ -105,15 +105,34 @@ module VX_cache_wb_sel_merge
|
||||
core_wb_pc = 0;
|
||||
core_wb_address = 0;
|
||||
for (this_bank = 0; this_bank < NUMBER_BANKS; this_bank = this_bank + 1) begin
|
||||
if (((FUNC_ID == `LLFUNC_ID) && found_bank && per_bank_wb_valid[this_bank] && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) || ((FUNC_ID != `LLFUNC_ID) && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index])) && found_bank && (per_bank_wb_valid[this_bank]) && (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index]) && (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index]))) begin
|
||||
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
|
||||
core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
|
||||
core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
|
||||
core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank];
|
||||
per_bank_wb_pop_unqual[this_bank] = 1;
|
||||
if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
|
||||
|
||||
if (found_bank && !core_wb_valid[per_bank_wb_tid[this_bank]] && per_bank_wb_valid[this_bank] && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin
|
||||
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
|
||||
core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
|
||||
core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
|
||||
core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank];
|
||||
per_bank_wb_pop_unqual[this_bank] = 1;
|
||||
end else begin
|
||||
per_bank_wb_pop_unqual[this_bank] = 0;
|
||||
end
|
||||
|
||||
end else begin
|
||||
per_bank_wb_pop_unqual[this_bank] = 0;
|
||||
|
||||
|
||||
if (((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index])) && found_bank && !core_wb_valid[per_bank_wb_tid[this_bank]] && (per_bank_wb_valid[this_bank]) && (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index]) && (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index])) begin
|
||||
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
|
||||
core_wb_readdata[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
|
||||
core_wb_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
|
||||
core_wb_address[per_bank_wb_tid[this_bank]] = per_bank_wb_address[this_bank];
|
||||
per_bank_wb_pop_unqual[this_bank] = 1;
|
||||
end else begin
|
||||
per_bank_wb_pop_unqual[this_bank] = 0;
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
@@ -290,6 +290,6 @@ module VX_tag_data_access
|
||||
assign readtag_st1e = use_read_tag_st1e;
|
||||
assign fill_sent = miss_st1e;
|
||||
assign fill_saw_dirty_st1e = real_writefill && dirty_st1e;
|
||||
assign invalidate_line = is_snp_st1e && miss_st1e;
|
||||
assign invalidate_line = snoop_hit;
|
||||
|
||||
endmodule
|
||||
@@ -88,9 +88,9 @@ module VX_tag_data_structure
|
||||
if (reset) begin
|
||||
for (l = 0; l < `BANK_LINE_COUNT; l=l+1) begin
|
||||
valid[l] <= 0;
|
||||
tag [l] <= 0;
|
||||
// tag [l] <= 0;
|
||||
dirty[l] <= 0;
|
||||
data [l] <= 0;
|
||||
// data [l] <= 0;
|
||||
end
|
||||
end else if (!stall_bank_pipe) begin
|
||||
if (going_to_write) begin
|
||||
|
||||
@@ -253,6 +253,15 @@
|
||||
`define DFFSQ_SIZE 32
|
||||
`endif
|
||||
|
||||
// Prefetcher
|
||||
`ifndef DPRFQ_SIZE
|
||||
`define DPRFQ_SIZE 32
|
||||
`endif
|
||||
|
||||
`ifndef DPRFQ_STRIDE
|
||||
`define DPRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`ifndef DFILL_INVALIDAOR_SIZE
|
||||
`define DFILL_INVALIDAOR_SIZE 32
|
||||
@@ -361,6 +370,15 @@
|
||||
`define IFFSQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Prefetcher
|
||||
`ifndef IPRFQ_SIZE
|
||||
`define IPRFQ_SIZE 32
|
||||
`endif
|
||||
|
||||
`ifndef IPRFQ_STRIDE
|
||||
`define IPRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`ifndef IFILL_INVALIDAOR_SIZE
|
||||
`define IFILL_INVALIDAOR_SIZE 32
|
||||
@@ -467,6 +485,15 @@
|
||||
`define SFFSQ_SIZE 16
|
||||
`endif
|
||||
|
||||
// Prefetcher
|
||||
`ifndef SPRFQ_SIZE
|
||||
`define SPRFQ_SIZE 4
|
||||
`endif
|
||||
|
||||
`ifndef SPRFQ_STRIDE
|
||||
`define SPRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`ifndef SFILL_INVALIDAOR_SIZE
|
||||
`define SFILL_INVALIDAOR_SIZE 32
|
||||
@@ -572,6 +599,15 @@
|
||||
`define LLFFSQ_SIZE 32
|
||||
`endif
|
||||
|
||||
// Prefetcher
|
||||
`ifndef LLPRFQ_SIZE
|
||||
`define LLPRFQ_SIZE 32
|
||||
`endif
|
||||
|
||||
`ifndef LLPRFQ_STRIDE
|
||||
`define LLPRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`ifndef LLFILL_INVALIDAOR_SIZE
|
||||
`define LLFILL_INVALIDAOR_SIZE 32
|
||||
@@ -677,6 +713,15 @@
|
||||
`define L3FFSQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Prefetcher
|
||||
`ifndef L3PRFQ_SIZE
|
||||
`define L3PRFQ_SIZE 32
|
||||
`endif
|
||||
|
||||
`ifndef L3PRFQ_STRIDE
|
||||
`define L3PRFQ_STRIDE 0
|
||||
`endif
|
||||
|
||||
// Fill Invalidator Size {Fill invalidator must be active}
|
||||
`ifndef L3FILL_INVALIDAOR_SIZE
|
||||
`define L3FILL_INVALIDAOR_SIZE 32
|
||||
|
||||
@@ -95,6 +95,8 @@ module VX_dmem_controller (
|
||||
.DFQQ_SIZE (`SDFQQ_SIZE),
|
||||
.LLVQ_SIZE (`SLLVQ_SIZE),
|
||||
.FFSQ_SIZE (`SFFSQ_SIZE),
|
||||
.PRFQ_SIZE (`SPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`SPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`SSIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
@@ -153,6 +155,7 @@ module VX_dmem_controller (
|
||||
// Snoop Request
|
||||
.snp_req (0),
|
||||
.snp_req_addr (0),
|
||||
.snp_req_delay (),
|
||||
|
||||
// Snoop Forward
|
||||
.snp_fwd (),
|
||||
@@ -177,6 +180,8 @@ module VX_dmem_controller (
|
||||
.DFQQ_SIZE (`DDFQQ_SIZE),
|
||||
.LLVQ_SIZE (`DLLVQ_SIZE),
|
||||
.FFSQ_SIZE (`DFFSQ_SIZE),
|
||||
.PRFQ_SIZE (`DPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`DPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`DSIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
@@ -263,6 +268,8 @@ module VX_dmem_controller (
|
||||
.DFQQ_SIZE (`IDFQQ_SIZE),
|
||||
.LLVQ_SIZE (`ILLVQ_SIZE),
|
||||
.FFSQ_SIZE (`IFFSQ_SIZE),
|
||||
.PRFQ_SIZE (`IPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`IPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`ISIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
|
||||
@@ -6,45 +6,47 @@ module VX_generic_queue_ll
|
||||
parameter SIZE = 277
|
||||
)
|
||||
(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
input wire[DATAW-1:0] in_data,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
input wire [DATAW-1:0] in_data,
|
||||
|
||||
input wire pop,
|
||||
output wire[DATAW-1:0] out_data,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
input wire pop,
|
||||
output wire [DATAW-1:0] out_data,
|
||||
output wire empty,
|
||||
output wire full
|
||||
);
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
|
||||
if (SIZE == 0) begin
|
||||
|
||||
assign empty = 1;
|
||||
assign out_data = 0;
|
||||
assign full = 0;
|
||||
end else begin
|
||||
|
||||
`ifdef QUEUE_FORCE_MLAB
|
||||
end else begin // (SIZE > 0)
|
||||
|
||||
`ifdef QUEUE_FORCE_MLAB
|
||||
(* syn_ramstyle = "mlab" *) reg[DATAW-1:0] data[SIZE-1:0];
|
||||
`else
|
||||
reg[DATAW-1:0] data[SIZE-1:0];
|
||||
`endif
|
||||
`else
|
||||
reg[ DATAW-1:0] data[SIZE-1:0];
|
||||
`endif
|
||||
|
||||
reg[DATAW-1:0] curr_r, head_r;
|
||||
reg[$clog2(SIZE+1)-1:0] size_r;
|
||||
reg[$clog2(SIZE)-1:0] wr_ctr_r;
|
||||
reg[$clog2(SIZE)-1:0] rd_ptr_r, rd_next_ptr_r;
|
||||
reg empty_r, full_r, bypass_r;
|
||||
wire reading, writing;
|
||||
reg [DATAW-1:0] head_r;
|
||||
reg [$clog2(SIZE+1)-1:0] size_r;
|
||||
wire reading;
|
||||
wire writing;
|
||||
|
||||
assign reading = pop && !empty;
|
||||
assign writing = push && !full;
|
||||
|
||||
if (SIZE == 1) begin
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
size_r <= 0;
|
||||
size_r <= 0;
|
||||
head_r <= 0;
|
||||
end else begin
|
||||
if (writing && !reading) begin
|
||||
size_r <= 1;
|
||||
@@ -59,9 +61,19 @@ module VX_generic_queue_ll
|
||||
end
|
||||
|
||||
assign out_data = head_r;
|
||||
assign empty = (size_r == 0);
|
||||
assign full = (size_r != 0) && !pop;
|
||||
end else begin
|
||||
assign empty = (size_r == 0);
|
||||
assign full = (size_r != 0) && !pop;
|
||||
|
||||
end else begin // (SIZE > 1)
|
||||
|
||||
reg [DATAW-1:0] curr_r;
|
||||
reg [$clog2(SIZE)-1:0] wr_ctr_r;
|
||||
reg [$clog2(SIZE)-1:0] rd_ptr_r;
|
||||
reg [$clog2(SIZE)-1:0] rd_next_ptr_r;
|
||||
reg empty_r;
|
||||
reg full_r;
|
||||
reg bypass_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_ctr_r <= 0;
|
||||
@@ -99,9 +111,10 @@ module VX_generic_queue_ll
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr_r <= 0;
|
||||
curr_r <= 0;
|
||||
rd_ptr_r <= 0;
|
||||
rd_next_ptr_r <= 1;
|
||||
bypass_r <= 0;
|
||||
bypass_r <= 0;
|
||||
end else begin
|
||||
if (reading) begin
|
||||
if (SIZE == 2) begin
|
||||
@@ -123,7 +136,6 @@ module VX_generic_queue_ll
|
||||
assign empty = empty_r;
|
||||
assign full = full_r;
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
/* verilator lint_on WIDTH */
|
||||
|
||||
@@ -230,6 +230,8 @@ module Vortex_Cluster
|
||||
.DFQQ_SIZE (`LLDFQQ_SIZE),
|
||||
.LLVQ_SIZE (`LLLLVQ_SIZE),
|
||||
.FFSQ_SIZE (`LLFFSQ_SIZE),
|
||||
.PRFQ_SIZE (`LLPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`LLPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`LLFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`LLSIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
|
||||
@@ -234,6 +234,8 @@ module Vortex_SOC (
|
||||
.DFQQ_SIZE (`L3DFQQ_SIZE),
|
||||
.LLVQ_SIZE (`L3LLVQ_SIZE),
|
||||
.FFSQ_SIZE (`L3FFSQ_SIZE),
|
||||
.PRFQ_SIZE (`L3PRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`L3PRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`L3SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
|
||||
@@ -25,9 +25,9 @@ module byte_enabled_simple_dual_port_ram
|
||||
always @(posedge clk, posedge reset) begin
|
||||
// TODO Clearing ram not currently supported on FPGA.
|
||||
if (reset) begin
|
||||
`ifdef ASIC
|
||||
// `ifdef ASIC
|
||||
for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] <= 0;
|
||||
`endif
|
||||
// `endif
|
||||
end
|
||||
else if(we) begin
|
||||
integer thread_ind;
|
||||
|
||||
@@ -357,8 +357,8 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||
|
||||
// #if NUMBER_CORES != 1
|
||||
// send snoops for L2 flush
|
||||
this->send_snoops(mem_addr, size);
|
||||
this->wait(PIPELINE_FLUSH_LATENCY);
|
||||
// this->send_snoops(mem_addr, size);
|
||||
// this->wait(PIPELINE_FLUSH_LATENCY);
|
||||
// #endif
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user