opae build fixes
This commit is contained in:
@@ -16,12 +16,12 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
|||||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||||
|
|
||||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||||
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||||
|
|
||||||
DEBUG=1
|
#DEBUG=1
|
||||||
#AFU=1
|
#AFU=1
|
||||||
|
|
||||||
CFLAGS += -fPIC
|
CFLAGS += -fPIC
|
||||||
|
|||||||
@@ -4,13 +4,16 @@ FPGA_BUILD_DIR=build_fpga
|
|||||||
|
|
||||||
all: ase-1c
|
all: ase-1c
|
||||||
|
|
||||||
ase-1c: setup-ase-1c
|
sources.txt:
|
||||||
|
./gen_sources.sh
|
||||||
|
|
||||||
|
ase-1c: setup-ase-1c sources.txt
|
||||||
make -C $(ASE_BUILD_DIR)_1c
|
make -C $(ASE_BUILD_DIR)_1c
|
||||||
|
|
||||||
ase-2c: setup-ase-2c
|
ase-2c: setup-ase-2c sources.txt
|
||||||
make -C $(ASE_BUILD_DIR)_2c
|
make -C $(ASE_BUILD_DIR)_2c
|
||||||
|
|
||||||
ase-4c: setup-ase-4c
|
ase-4c: setup-ase-4c sources.txt
|
||||||
make -C $(ASE_BUILD_DIR)_4c
|
make -C $(ASE_BUILD_DIR)_4c
|
||||||
|
|
||||||
setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile
|
setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile
|
||||||
@@ -28,13 +31,13 @@ $(ASE_BUILD_DIR)_2c/Makefile:
|
|||||||
$(ASE_BUILD_DIR)_4c/Makefile:
|
$(ASE_BUILD_DIR)_4c/Makefile:
|
||||||
afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c
|
afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c
|
||||||
|
|
||||||
fpga-1c: setup-fpga-1c
|
fpga-1c: setup-fpga-1c sources.txt
|
||||||
cd $(FPGA_BUILD_DIR)_1c && qsub-synth
|
cd $(FPGA_BUILD_DIR)_1c && qsub-synth
|
||||||
|
|
||||||
fpga-2c: setup-fpga-2c
|
fpga-2c: setup-fpga-2c sources.txt
|
||||||
cd $(FPGA_BUILD_DIR)_2c && qsub-synth
|
cd $(FPGA_BUILD_DIR)_2c && qsub-synth
|
||||||
|
|
||||||
fpga-4c: setup-fpga-4c
|
fpga-4c: setup-fpga-4c sources.txt
|
||||||
cd $(FPGA_BUILD_DIR)_4c && qsub-synth
|
cd $(FPGA_BUILD_DIR)_4c && qsub-synth
|
||||||
|
|
||||||
setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf
|
setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf
|
||||||
|
|||||||
@@ -60,8 +60,8 @@ qsub-sim
|
|||||||
make ase
|
make ase
|
||||||
|
|
||||||
# tests
|
# tests
|
||||||
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic
|
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256
|
||||||
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo
|
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
|
||||||
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
||||||
|
|
||||||
# modify "vsim_run.tcl" to dump VCD trace
|
# modify "vsim_run.tcl" to dump VCD trace
|
||||||
|
|||||||
21
hw/opae/gen_sources.sh
Executable file
21
hw/opae/gen_sources.sh
Executable file
@@ -0,0 +1,21 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl'
|
||||||
|
|
||||||
|
inc_list=""
|
||||||
|
for dir in $dir_list; do
|
||||||
|
inc_list="$inc_list -I$dir"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "inc_list=$inc_list"
|
||||||
|
|
||||||
|
{
|
||||||
|
# read design sources
|
||||||
|
for dir in $dir_list; do
|
||||||
|
echo "+incdir+$dir"
|
||||||
|
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
|
||||||
|
do
|
||||||
|
echo $file
|
||||||
|
done
|
||||||
|
done
|
||||||
|
} > sources.txt
|
||||||
@@ -1,46 +1,34 @@
|
|||||||
vortex_afu.json
|
|
||||||
|
|
||||||
QI:vortex_afu.qsf
|
|
||||||
|
|
||||||
#+define+SCOPE
|
|
||||||
|
|
||||||
#+define+DBG_PRINT_CORE_ICACHE
|
|
||||||
#+define+DBG_PRINT_CORE_DCACHE
|
|
||||||
#+define+DBG_PRINT_CACHE_BANK
|
|
||||||
#+define+DBG_PRINT_CACHE_SNP
|
|
||||||
#+define+DBG_PRINT_CACHE_MSRQ
|
|
||||||
#+define+DBG_PRINT_DRAM
|
|
||||||
#+define+DBG_PRINT_PIPELINE
|
|
||||||
#+define+DBG_PRINT_OPAE
|
|
||||||
#+define+DBG_PRINT_SCOPE
|
|
||||||
|
|
||||||
+incdir+.
|
|
||||||
+incdir+../rtl
|
|
||||||
+incdir+../rtl/interfaces
|
|
||||||
+incdir+../rtl/pipe_regs
|
|
||||||
+incdir+../rtl/cache
|
|
||||||
+incdir+../rtl/libs
|
+incdir+../rtl/libs
|
||||||
|
../rtl/libs/VX_countones.v
|
||||||
../rtl/VX_user_config.vh
|
../rtl/libs/VX_divide.v
|
||||||
../rtl/VX_config.vh
|
../rtl/libs/VX_fair_arbiter.v
|
||||||
../rtl/VX_define.vh
|
../rtl/libs/VX_fixed_arbiter.v
|
||||||
|
../rtl/libs/VX_generic_queue.v
|
||||||
../rtl/cache/VX_cache_config.vh
|
../rtl/libs/VX_generic_register.v
|
||||||
../rtl/cache/VX_cache.v
|
../rtl/libs/VX_generic_stack.v
|
||||||
../rtl/cache/VX_cache_core_rsp_merge.v
|
../rtl/libs/VX_index_queue.v
|
||||||
../rtl/cache/VX_cache_core_req_bank_sel.v
|
../rtl/libs/VX_matrix_arbiter.v
|
||||||
../rtl/cache/VX_cache_dram_req_arb.v
|
../rtl/libs/VX_mult.v
|
||||||
../rtl/cache/VX_cache_dram_fill_arb.v
|
../rtl/libs/VX_priority_encoder.v
|
||||||
../rtl/cache/VX_cache_miss_resrv.v
|
../rtl/libs/VX_rr_arbiter.v
|
||||||
|
../rtl/libs/VX_onehot_encooder.v
|
||||||
|
+incdir+../rtl/cache
|
||||||
../rtl/cache/VX_bank.v
|
../rtl/cache/VX_bank.v
|
||||||
../rtl/cache/VX_bank_core_req_arb.v
|
../rtl/cache/VX_bank_core_req_arb.v
|
||||||
|
../rtl/cache/VX_cache.v
|
||||||
|
../rtl/cache/VX_cache_core_req_bank_sel.v
|
||||||
|
../rtl/cache/VX_cache_core_rsp_merge.v
|
||||||
|
../rtl/cache/VX_cache_dram_fill_arb.v
|
||||||
|
../rtl/cache/VX_cache_dram_req_arb.v
|
||||||
|
../rtl/cache/VX_cache_miss_resrv.v
|
||||||
|
../rtl/cache/VX_prefetcher.v
|
||||||
|
../rtl/cache/VX_snp_forwarder.v
|
||||||
../rtl/cache/VX_snp_rsp_arb.v
|
../rtl/cache/VX_snp_rsp_arb.v
|
||||||
../rtl/cache/VX_tag_data_access.v
|
../rtl/cache/VX_tag_data_access.v
|
||||||
../rtl/cache/VX_tag_data_structure.v
|
../rtl/cache/VX_tag_data_structure.v
|
||||||
../rtl/cache/VX_snp_forwarder.v
|
+incdir+../rtl/interfaces
|
||||||
../rtl/cache/VX_prefetcher.v
|
../rtl/interfaces/VX_alu_req_if.v
|
||||||
|
../rtl/interfaces/VX_branch_ctl_if.v
|
||||||
../rtl/interfaces/VX_branch_rsp_if.v
|
|
||||||
../rtl/interfaces/VX_cache_core_req_if.v
|
../rtl/interfaces/VX_cache_core_req_if.v
|
||||||
../rtl/interfaces/VX_cache_core_rsp_if.v
|
../rtl/interfaces/VX_cache_core_rsp_if.v
|
||||||
../rtl/interfaces/VX_cache_dram_req_if.v
|
../rtl/interfaces/VX_cache_dram_req_if.v
|
||||||
@@ -48,65 +36,46 @@ QI:vortex_afu.qsf
|
|||||||
../rtl/interfaces/VX_cache_snp_req_if.v
|
../rtl/interfaces/VX_cache_snp_req_if.v
|
||||||
../rtl/interfaces/VX_cache_snp_rsp_if.v
|
../rtl/interfaces/VX_cache_snp_rsp_if.v
|
||||||
../rtl/interfaces/VX_csr_req_if.v
|
../rtl/interfaces/VX_csr_req_if.v
|
||||||
|
../rtl/interfaces/VX_commit_if.v
|
||||||
../rtl/interfaces/VX_csr_io_req_if.v
|
../rtl/interfaces/VX_csr_io_req_if.v
|
||||||
../rtl/interfaces/VX_csr_io_rsp_if.v
|
../rtl/interfaces/VX_decode_if.v
|
||||||
../rtl/interfaces/VX_exec_unit_req_if.v
|
../rtl/interfaces/VX_gpr_data_if.v
|
||||||
../rtl/interfaces/VX_backend_req_if.v
|
../rtl/interfaces/VX_gpu_req_if.v
|
||||||
../rtl/interfaces/VX_gpr_read_if.v
|
|
||||||
../rtl/interfaces/VX_gpu_inst_req_if.v
|
|
||||||
../rtl/interfaces/VX_inst_meta_if.v
|
|
||||||
../rtl/interfaces/VX_jal_rsp_if.v
|
|
||||||
../rtl/interfaces/VX_join_if.v
|
../rtl/interfaces/VX_join_if.v
|
||||||
../rtl/interfaces/VX_lsu_req_if.v
|
../rtl/interfaces/VX_lsu_req_if.v
|
||||||
../rtl/interfaces/VX_warp_ctl_if.v
|
../rtl/interfaces/VX_warp_ctl_if.v
|
||||||
../rtl/interfaces/VX_wb_if.v
|
../rtl/interfaces/VX_wb_if.v
|
||||||
../rtl/interfaces/VX_wstall_if.v
|
../rtl/interfaces/VX_wstall_if.v
|
||||||
|
../rtl/interfaces/VX_csr_io_rsp_if.v
|
||||||
../rtl/libs/VX_generic_register.v
|
../rtl/interfaces/VX_ifetch_req_if.v
|
||||||
../rtl/libs/VX_mult.v
|
../rtl/interfaces/VX_ifetch_rsp_if.v
|
||||||
../rtl/libs/VX_divide.v
|
../rtl/interfaces/VX_mul_req_if.v
|
||||||
../rtl/libs/VX_generic_stack.v
|
../rtl/interfaces/VX_perf_cntrs_if.v
|
||||||
../rtl/libs/VX_priority_encoder.v
|
+incdir+../rtl
|
||||||
../rtl/libs/VX_generic_queue.v
|
../rtl/VX_alu_unit.v
|
||||||
../rtl/libs/VX_indexable_queue.v
|
../rtl/VX_commit.v
|
||||||
../rtl/libs/VX_fair_arbiter.v
|
|
||||||
../rtl/libs/VX_fixed_arbiter.v
|
|
||||||
../rtl/libs/VX_rr_arbiter.v
|
|
||||||
../rtl/libs/VX_countones.v
|
|
||||||
../rtl/libs/VX_scope.v
|
|
||||||
|
|
||||||
../rtl/Vortex.v
|
|
||||||
../rtl/VX_cluster.v
|
../rtl/VX_cluster.v
|
||||||
../rtl/VX_core.v
|
../rtl/VX_core.v
|
||||||
../rtl/VX_mem_unit.v
|
|
||||||
../rtl/VX_pipeline.v
|
|
||||||
../rtl/VX_front_end.v
|
|
||||||
../rtl/VX_back_end.v
|
|
||||||
../rtl/VX_fetch.v
|
|
||||||
../rtl/VX_scheduler.v
|
|
||||||
../rtl/VX_exec_unit.v
|
|
||||||
../rtl/VX_warp.v
|
|
||||||
../rtl/VX_icache_stage.v
|
|
||||||
../rtl/VX_gpr_wrapper.v
|
|
||||||
../rtl/VX_gpu_inst.v
|
|
||||||
../rtl/VX_writeback.v
|
|
||||||
../rtl/VX_csr_pipe.v
|
|
||||||
../rtl/VX_csr_data.v
|
../rtl/VX_csr_data.v
|
||||||
../rtl/VX_csr_arb.v
|
../rtl/VX_csr_arb.v
|
||||||
|
../rtl/VX_dcache_arb.v
|
||||||
|
../rtl/VX_decode.v
|
||||||
../rtl/VX_csr_io_arb.v
|
../rtl/VX_csr_io_arb.v
|
||||||
../rtl/VX_warp_sched.v
|
../rtl/VX_fetch.v
|
||||||
|
../rtl/VX_csr_unit.v
|
||||||
../rtl/VX_gpr_ram.v
|
../rtl/VX_gpr_ram.v
|
||||||
../rtl/VX_gpr_stage.v
|
../rtl/VX_gpr_stage.v
|
||||||
../rtl/VX_alu_unit.v
|
../rtl/VX_execute.v
|
||||||
|
../rtl/VX_gpu_unit.v
|
||||||
|
../rtl/VX_icache_stage.v
|
||||||
|
../rtl/VX_issue.v
|
||||||
../rtl/VX_lsu_unit.v
|
../rtl/VX_lsu_unit.v
|
||||||
../rtl/VX_decode.v
|
|
||||||
../rtl/VX_inst_multiplex.v
|
|
||||||
../rtl/VX_dcache_arb.v
|
|
||||||
../rtl/VX_mem_arb.v
|
../rtl/VX_mem_arb.v
|
||||||
../rtl/VX_f_d_reg.v
|
../rtl/VX_mem_unit.v
|
||||||
../rtl/VX_i_d_reg.v
|
../rtl/VX_pipeline.v
|
||||||
../rtl/VX_d_e_reg.v
|
../rtl/VX_scheduler.v
|
||||||
|
../rtl/VX_issue_mux.v
|
||||||
ccip_interface_reg.sv
|
../rtl/VX_warp_sched.v
|
||||||
ccip_std_afu.sv
|
../rtl/VX_writeback.v
|
||||||
vortex_afu.sv
|
../rtl/Vortex.v
|
||||||
|
../rtl/VX_mul_unit.v
|
||||||
|
|||||||
@@ -1,3 +1,21 @@
|
|||||||
+define+NUM_CORES=1
|
+define+NUM_CORES=1
|
||||||
|
|
||||||
|
#+define+SCOPE
|
||||||
|
|
||||||
|
#+define+DBG_PRINT_CORE_ICACHE
|
||||||
|
#+define+DBG_PRINT_CORE_DCACHE
|
||||||
|
#+define+DBG_PRINT_CACHE_BANK
|
||||||
|
#+define+DBG_PRINT_CACHE_SNP
|
||||||
|
#+define+DBG_PRINT_CACHE_MSRQ
|
||||||
|
#+define+DBG_PRINT_DRAM
|
||||||
|
#+define+DBG_PRINT_PIPELINE
|
||||||
|
#+define+DBG_PRINT_OPAE
|
||||||
|
#+define+DBG_PRINT_SCOPE
|
||||||
|
|
||||||
|
vortex_afu.json
|
||||||
|
QI:vortex_afu.qsf
|
||||||
|
ccip_interface_reg.sv
|
||||||
|
ccip_std_afu.sv
|
||||||
|
vortex_afu.sv
|
||||||
|
|
||||||
C:sources.txt
|
C:sources.txt
|
||||||
@@ -1,4 +1,10 @@
|
|||||||
+define+NUM_CORES=2
|
+define+NUM_CORES=2
|
||||||
+define+L2_ENABLE=0
|
+define+L2_ENABLE=0
|
||||||
|
|
||||||
|
vortex_afu.json
|
||||||
|
QI:vortex_afu.qsf
|
||||||
|
ccip_interface_reg.sv
|
||||||
|
ccip_std_afu.sv
|
||||||
|
vortex_afu.sv
|
||||||
|
|
||||||
C:sources.txt
|
C:sources.txt
|
||||||
@@ -1,4 +1,10 @@
|
|||||||
+define+NUM_CORES=4
|
+define+NUM_CORES=4
|
||||||
+define+L2_ENABLE=0
|
+define+L2_ENABLE=0
|
||||||
|
|
||||||
|
vortex_afu.json
|
||||||
|
QI:vortex_afu.qsf
|
||||||
|
ccip_interface_reg.sv
|
||||||
|
ccip_std_afu.sv
|
||||||
|
vortex_afu.sv
|
||||||
|
|
||||||
C:sources.txt
|
C:sources.txt
|
||||||
@@ -13,7 +13,7 @@ module VX_alu_unit #(
|
|||||||
VX_branch_ctl_if branch_ctl_if,
|
VX_branch_ctl_if branch_ctl_if,
|
||||||
VX_commit_if alu_commit_if
|
VX_commit_if alu_commit_if
|
||||||
);
|
);
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||||
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
||||||
wire [`NUM_THREADS-1:0][32:0] shift_result;
|
wire [`NUM_THREADS-1:0][32:0] shift_result;
|
||||||
|
|
||||||
@@ -99,7 +99,7 @@ module VX_alu_unit #(
|
|||||||
);
|
);
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
|
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32))
|
||||||
) alu_reg (
|
) alu_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|||||||
@@ -76,7 +76,7 @@
|
|||||||
|
|
||||||
`define CSR_WIDTH 12
|
`define CSR_WIDTH 12
|
||||||
|
|
||||||
`define DIV_LATENCY 2
|
`define DIV_LATENCY 21
|
||||||
|
|
||||||
`define MUL_LATENCY 2
|
`define MUL_LATENCY 2
|
||||||
|
|
||||||
@@ -390,6 +390,8 @@
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
task print_ex_type;
|
task print_ex_type;
|
||||||
input [`EX_BITS-1:0] ex;
|
input [`EX_BITS-1:0] ex;
|
||||||
begin
|
begin
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ module VX_lsu_unit #(
|
|||||||
wire [`NW_BITS-1:0] use_warp_num;
|
wire [`NW_BITS-1:0] use_warp_num;
|
||||||
wire [`WB_BITS-1:0] use_wb;
|
wire [`WB_BITS-1:0] use_wb;
|
||||||
wire [31:0] use_pc;
|
wire [31:0] use_pc;
|
||||||
|
wire mrq_full;
|
||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
|
|
||||||
@@ -83,8 +84,7 @@ module VX_lsu_unit #(
|
|||||||
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, dbg_mrq_write_addr;
|
wire [`LOG2UP(`DCREQ_SIZE)-1:0] mrq_write_addr, dbg_mrq_write_addr;
|
||||||
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset;
|
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset;
|
||||||
wire [`BYTEEN_BITS-1:0] core_rsp_mem_read;
|
wire [`BYTEEN_BITS-1:0] core_rsp_mem_read;
|
||||||
wire mrq_full;
|
|
||||||
|
|
||||||
wire mrq_push = (| dcache_req_if.valid) && dcache_req_if.ready
|
wire mrq_push = (| dcache_req_if.valid) && dcache_req_if.ready
|
||||||
&& (0 == use_req_rw); // only push read requests
|
&& (0 == use_req_rw); // only push read requests
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ module VX_mul_unit #(
|
|||||||
// Outputs
|
// Outputs
|
||||||
VX_commit_if mul_commit_if
|
VX_commit_if mul_commit_if
|
||||||
);
|
);
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_result;
|
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||||
wire [`NUM_THREADS-1:0][63:0] mul_result;
|
wire [`NUM_THREADS-1:0][63:0] mul_result;
|
||||||
wire [`NUM_THREADS-1:0][31:0] div_result;
|
wire [`NUM_THREADS-1:0][31:0] div_result;
|
||||||
wire [`NUM_THREADS-1:0][31:0] rem_result;
|
wire [`NUM_THREADS-1:0][31:0] rem_result;
|
||||||
@@ -77,6 +77,8 @@ module VX_mul_unit #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
wire stall;
|
||||||
|
|
||||||
reg result_avail;
|
reg result_avail;
|
||||||
reg [4:0] pending_ctr;
|
reg [4:0] pending_ctr;
|
||||||
wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `DIV_LATENCY : `MUL_LATENCY;
|
wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `DIV_LATENCY : `MUL_LATENCY;
|
||||||
@@ -104,13 +106,13 @@ module VX_mul_unit #(
|
|||||||
|
|
||||||
wire pipeline_stall = ~result_avail && (| mul_req_if.valid);
|
wire pipeline_stall = ~result_avail && (| mul_req_if.valid);
|
||||||
|
|
||||||
wire stall = (~mul_commit_if.ready && (| mul_commit_if.valid))
|
assign stall = (~mul_commit_if.ready && (| mul_commit_if.valid))
|
||||||
|| pipeline_stall;
|
|| pipeline_stall;
|
||||||
|
|
||||||
wire flush = mul_commit_if.ready && pipeline_stall;
|
wire flush = mul_commit_if.ready && pipeline_stall;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32)),
|
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32))
|
||||||
) mul_reg (
|
) mul_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ module VX_warp_sched #(
|
|||||||
);
|
);
|
||||||
wire update_use_wspawn;
|
wire update_use_wspawn;
|
||||||
wire update_visible_active;
|
wire update_visible_active;
|
||||||
wire scheduled_warp;
|
|
||||||
|
|
||||||
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
|
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
|
||||||
|
|
||||||
|
|||||||
116
hw/rtl/Vortex.v
116
hw/rtl/Vortex.v
@@ -139,54 +139,54 @@ module Vortex (
|
|||||||
|
|
||||||
end else begin
|
end else begin
|
||||||
|
|
||||||
wire per_cluster_dram_req_valid [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid;
|
||||||
wire per_cluster_dram_req_rw [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw;
|
||||||
wire [`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen;
|
||||||
wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
|
||||||
wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
|
||||||
wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
|
||||||
wire l3_core_req_ready;
|
wire l3_core_req_ready;
|
||||||
|
|
||||||
wire per_cluster_dram_rsp_valid [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
|
||||||
wire [`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
|
||||||
wire [`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
|
||||||
wire per_cluster_dram_rsp_ready [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
|
||||||
|
|
||||||
wire per_cluster_snp_req_valid [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid;
|
||||||
wire [`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr;
|
||||||
wire per_cluster_snp_req_invalidate [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_invalidate;
|
||||||
wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag;
|
||||||
wire per_cluster_snp_req_ready [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready;
|
||||||
|
|
||||||
wire per_cluster_snp_rsp_valid [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid;
|
||||||
wire [`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag;
|
||||||
wire per_cluster_snp_rsp_ready [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready;
|
||||||
|
|
||||||
wire per_cluster_io_req_valid [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_valid;
|
||||||
wire per_cluster_io_req_rw [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw;
|
||||||
wire [3:0] per_cluster_io_req_byteen [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][3:0] per_cluster_io_req_byteen;
|
||||||
wire [29:0] per_cluster_io_req_addr [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][29:0] per_cluster_io_req_addr;
|
||||||
wire [31:0] per_cluster_io_req_data [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data;
|
||||||
wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag;
|
||||||
wire per_cluster_io_req_ready [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready;
|
||||||
|
|
||||||
wire per_cluster_io_rsp_valid [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid;
|
||||||
wire [`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag;
|
||||||
wire [31:0] per_cluster_io_rsp_data [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data;
|
||||||
wire per_cluster_io_rsp_ready [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
|
||||||
|
|
||||||
wire per_cluster_csr_io_req_valid [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid;
|
||||||
wire [11:0] per_cluster_csr_io_req_addr [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr;
|
||||||
wire per_cluster_csr_io_req_rw [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw;
|
||||||
wire [31:0] per_cluster_csr_io_req_data [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data;
|
||||||
wire per_cluster_csr_io_req_ready [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_ready;
|
||||||
|
|
||||||
wire per_cluster_csr_io_rsp_valid [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_valid;
|
||||||
wire [31:0] per_cluster_csr_io_rsp_data [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_rsp_data;
|
||||||
wire per_cluster_csr_io_rsp_ready [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_rsp_ready;
|
||||||
|
|
||||||
wire per_cluster_busy [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||||
wire per_cluster_ebreak [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak;
|
||||||
|
|
||||||
wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS));
|
wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS));
|
||||||
wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid);
|
wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid);
|
||||||
@@ -336,27 +336,27 @@ module Vortex (
|
|||||||
|
|
||||||
// L3 Cache ///////////////////////////////////////////////////////////
|
// L3 Cache ///////////////////////////////////////////////////////////
|
||||||
|
|
||||||
wire l3_core_req_valid [`L3NUM_REQUESTS-1:0];
|
wire [`L3NUM_REQUESTS-1:0] l3_core_req_valid;
|
||||||
wire l3_core_req_rw [`L3NUM_REQUESTS-1:0];
|
wire [`L3NUM_REQUESTS-1:0] l3_core_req_rw;
|
||||||
wire [`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen [`L3NUM_REQUESTS-1:0];
|
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen;
|
||||||
wire [`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr [`L3NUM_REQUESTS-1:0];
|
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr;
|
||||||
wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data [`L3NUM_REQUESTS-1:0];
|
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data;
|
||||||
wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag [`L3NUM_REQUESTS-1:0];
|
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag;
|
||||||
|
|
||||||
wire l3_core_rsp_valid [`L3NUM_REQUESTS-1:0];
|
wire [`L3NUM_REQUESTS-1:0] l3_core_rsp_valid;
|
||||||
wire [`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data [`L3NUM_REQUESTS-1:0];
|
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data;
|
||||||
wire [`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag [`L3NUM_REQUESTS-1:0];
|
wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
|
||||||
wire l3_core_rsp_ready;
|
wire l3_core_rsp_ready;
|
||||||
|
|
||||||
wire l3_snp_fwdout_valid [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid;
|
||||||
wire [`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr;
|
||||||
wire l3_snp_fwdout_invalidate [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate;
|
||||||
wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag;
|
||||||
wire l3_snp_fwdout_ready [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready;
|
||||||
|
|
||||||
wire l3_snp_fwdin_valid [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid;
|
||||||
wire [`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag;
|
||||||
wire l3_snp_fwdin_ready [`NUM_CLUSTERS-1:0];
|
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready;
|
||||||
|
|
||||||
for (i = 0; i < `L3NUM_REQUESTS; i++) begin
|
for (i = 0; i < `L3NUM_REQUESTS; i++) begin
|
||||||
// Core Request
|
// Core Request
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
module VX_tex_mgr (
|
module VX_tex_mgr (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset
|
||||||
);
|
);
|
||||||
|
|
||||||
//--
|
//--
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ module VX_tex_unit #(
|
|||||||
parameter MAXAMW = 2,
|
parameter MAXAMW = 2,
|
||||||
parameter TAGW = 16,
|
parameter TAGW = 16,
|
||||||
|
|
||||||
parameter NUMCRQS = 32,
|
parameter NUMCRQS = 32
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ echo "inc_list=$inc_list"
|
|||||||
{
|
{
|
||||||
# read design sources
|
# read design sources
|
||||||
for dir in $dir_list; do
|
for dir in $dir_list; do
|
||||||
for file in $(find $dir -name '*.v' -o -name '*.sv' -type f)
|
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
|
||||||
do
|
do
|
||||||
echo "read_verilog -sv $inc_list $file"
|
echo "read_verilog -sv $inc_list $file"
|
||||||
done
|
done
|
||||||
|
|||||||
Reference in New Issue
Block a user