diff --git a/hw/opae/Makefile b/hw/opae/Makefile index fb6cbe25..21fd1c74 100644 --- a/hw/opae/Makefile +++ b/hw/opae/Makefile @@ -44,6 +44,12 @@ fpga-2c: gen_sources setup-fpga-2c fpga-4c: gen_sources setup-fpga-4c cd $(FPGA_BUILD_DIR)_4c && qsub-synth + +fpga-8c: gen_sources setup-fpga-8c + cd $(FPGA_BUILD_DIR)_8c && qsub-synth + +fpga-16c: gen_sources setup-fpga-16c + cd $(FPGA_BUILD_DIR)_16c && qsub-synth setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf @@ -51,6 +57,10 @@ setup-fpga-2c: $(FPGA_BUILD_DIR)_2c/build/dcp.qpf setup-fpga-4c: $(FPGA_BUILD_DIR)_4c/build/dcp.qpf +setup-fpga-8c: $(FPGA_BUILD_DIR)_8c/build/dcp.qpf + +setup-fpga-16c: $(FPGA_BUILD_DIR)_16c/build/dcp.qpf + $(FPGA_BUILD_DIR)_1c/build/dcp.qpf: afu_synth_setup -s sources_1c.txt $(FPGA_BUILD_DIR)_1c @@ -60,6 +70,12 @@ $(FPGA_BUILD_DIR)_2c/build/dcp.qpf: $(FPGA_BUILD_DIR)_4c/build/dcp.qpf: afu_synth_setup -s sources_4c.txt $(FPGA_BUILD_DIR)_4c +$(FPGA_BUILD_DIR)_8c/build/dcp.qpf: + afu_synth_setup -s sources_8c.txt $(FPGA_BUILD_DIR)_8c + +$(FPGA_BUILD_DIR)_16c/build/dcp.qpf: + afu_synth_setup -s sources_16c.txt $(FPGA_BUILD_DIR)_16c + run-ase-1c: cd $(ASE_BUILD_DIR)_1c && make sim @@ -87,5 +103,11 @@ clean-fpga-2c: clean-fpga-4c: rm -rf $(FPGA_BUILD_DIR)_4c sources.txt -clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c +clean-fpga-8c: + rm -rf $(FPGA_BUILD_DIR)_8c sources.txt + +clean-fpga-16c: + rm -rf $(FPGA_BUILD_DIR)_16c sources.txt + +clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c rm sources.txt \ No newline at end of file diff --git a/hw/opae/README b/hw/opae/README index 8e30eec6..33c0a918 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -101,6 +101,7 @@ kill -9 lsof +D build_ase_1c # quick off synthesis +make -C unittest clean && make -C unittest > unittest/build.log 2>&1 & make -C pipeline clean && make -C pipeline > pipeline/build.log 2>&1 & make -C cache clean && make -C cache > cache/build.log 2>&1 & make -C core clean && make -C core > core/build.log 2>&1 & @@ -110,6 +111,7 @@ make -C top clean && make -C top > top/build.log 2>&1 & make -C top1 clean && make -C top1 > top1/build.log 2>&1 & make -C top8 clean && make -C top8 > top8/build.log 2>&1 & make -C top16 clean && make -C top16 > top16/build.log 2>&1 & +make -C top32 clean && make -C top32 > top32/build.log 2>&1 & # How to calculate the maximum operating frequency? 200 Mhz -> period = 1/200x10^6 = 5ns diff --git a/hw/opae/sources_16c.txt b/hw/opae/sources_16c.txt new file mode 100644 index 00000000..637db1ac --- /dev/null +++ b/hw/opae/sources_16c.txt @@ -0,0 +1,11 @@ ++define+NUM_CORES=4 ++define+NUM_CLUSTERS=4 + ++define+SYNTHESIS ++define+QUARTUS ++define+FPU_FAST + +vortex_afu.json +QI:vortex_afu.qsf + +C:sources.txt \ No newline at end of file diff --git a/hw/opae/sources_8c.txt b/hw/opae/sources_8c.txt new file mode 100644 index 00000000..fa02d08c --- /dev/null +++ b/hw/opae/sources_8c.txt @@ -0,0 +1,10 @@ ++define+NUM_CORES=8 + ++define+SYNTHESIS ++define+QUARTUS ++define+FPU_FAST + +vortex_afu.json +QI:vortex_afu.qsf + +C:sources.txt \ No newline at end of file diff --git a/hw/rtl/VX_databus_arb.v b/hw/rtl/VX_databus_arb.v index 3eec3de5..ac7e3ff5 100644 --- a/hw/rtl/VX_databus_arb.v +++ b/hw/rtl/VX_databus_arb.v @@ -69,37 +69,48 @@ module VX_databus_arb ( // handle responses // - wire [1:0][RSP_DATAW-1:0] rsp_data_in; - wire [1:0] rsp_valid_in; - wire [1:0] rsp_ready_in; - - wire core_rsp_valid; - wire [`NUM_THREADS-1:0] core_rsp_valid_tmask; + if (`SM_ENABLE ) begin - assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, cache_rsp_if.tag}; - assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, smem_rsp_if.tag}; + wire [1:0][RSP_DATAW-1:0] rsp_data_in; + wire [1:0] rsp_valid_in; + wire [1:0] rsp_ready_in; + + wire core_rsp_valid; + wire [`NUM_THREADS-1:0] core_rsp_valid_tmask; - assign rsp_valid_in[0] = (| cache_rsp_if.valid); - assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE; + assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, cache_rsp_if.tag}; + assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, smem_rsp_if.tag}; - VX_stream_arbiter #( - .NUM_REQS ((`SM_ENABLE ? 2 : 1)), - .DATAW (RSP_DATAW), - .BUFFERED (0) - ) rsp_arb ( - .clk (clk), - .reset (reset), - .valid_in (rsp_valid_in), - .data_in (rsp_data_in), - .ready_in (rsp_ready_in), - .valid_out (core_rsp_valid), - .data_out ({core_rsp_valid_tmask, core_rsp_if.data, core_rsp_if.tag}), - .ready_out (core_rsp_if.ready) - ); + assign rsp_valid_in[0] = (| cache_rsp_if.valid); + assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE; - assign cache_rsp_if.ready = rsp_ready_in[0]; - assign smem_rsp_if.ready = rsp_ready_in[1]; + VX_stream_arbiter #( + .NUM_REQS (2), + .DATAW (RSP_DATAW), + .BUFFERED (0) + ) rsp_arb ( + .clk (clk), + .reset (reset), + .valid_in (rsp_valid_in), + .data_in (rsp_data_in), + .ready_in (rsp_ready_in), + .valid_out (core_rsp_valid), + .data_out ({core_rsp_valid_tmask, core_rsp_if.data, core_rsp_if.tag}), + .ready_out (core_rsp_if.ready) + ); - assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_valid_tmask; + assign cache_rsp_if.ready = rsp_ready_in[0]; + assign smem_rsp_if.ready = rsp_ready_in[1]; + + assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_valid_tmask; + + end else begin + + assign core_rsp_if.valid = cache_rsp_if.valid; + assign core_rsp_if.tag = cache_rsp_if.tag; + assign core_rsp_if.data = cache_rsp_if.data; + assign cache_rsp_if.ready = core_rsp_if.ready; + + end endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 7da1480f..2f8cac57 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -110,7 +110,8 @@ module VX_bank #( VX_input_queue #( .DATAW ($bits(dram_rsp_data)), - .SIZE (DRSQ_SIZE) + .SIZE (DRSQ_SIZE), + .FASTRAM (1) ) dram_rsp_queue ( .clk (clk), .reset (reset), @@ -164,7 +165,8 @@ module VX_bank #( VX_input_queue #( .DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + `WORD_WIDTH), - .SIZE (CREQ_SIZE) + .SIZE (CREQ_SIZE), + .FASTRAM (1) ) core_req_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index 5fc4c91b..f6c3b34c 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -2,18 +2,18 @@ module VX_cache_core_req_bank_sel #( // Size of line inside a bank in bytes - parameter CACHE_LINE_SIZE= 1, + parameter CACHE_LINE_SIZE = 64, // Size of a word in bytes - parameter WORD_SIZE = 1, + parameter WORD_SIZE = 4, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 4, // Number of Word requests per cycle - parameter NUM_REQS = 1, + parameter NUM_REQS = 4, // core request tag size - parameter CORE_TAG_WIDTH = 1, + parameter CORE_TAG_WIDTH = 3, // bank offset from beginning of index range - parameter BANK_ADDR_OFFSET = 0 + parameter BANK_ADDR_OFFSET = 0 ) ( input wire clk, input wire reset, @@ -62,7 +62,7 @@ module VX_cache_core_req_bank_sel #( per_bank_core_req_addr_r = 'x; per_bank_core_req_tag_r = 'x; per_bank_core_req_data_r = 'x; - + for (integer i = NUM_REQS-1; i >= 0; --i) begin if (core_req_valid[i]) begin per_bank_core_req_valid_r[core_req_bid[i]] = 1; diff --git a/hw/rtl/cache/VX_input_queue.v b/hw/rtl/cache/VX_input_queue.v index 5f94f21f..6685e725 100644 --- a/hw/rtl/cache/VX_input_queue.v +++ b/hw/rtl/cache/VX_input_queue.v @@ -4,7 +4,8 @@ module VX_input_queue #( parameter DATAW = 1, parameter SIZE = 2, parameter ADDRW = $clog2(SIZE), - parameter SIZEW = $clog2(SIZE+1) + parameter SIZEW = $clog2(SIZE+1), + parameter FASTRAM = 0 ) ( input wire clk, input wire reset, @@ -97,7 +98,7 @@ module VX_input_queue #( .SIZE(SIZE), .BUFFERED(0), .RWCHECK(1), - .FASTRAM(1) + .FASTRAM(FASTRAM) ) dp_ram ( .clk(clk), .waddr(wr_ptr_r), diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index 002dab64..fc97ef50 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -55,7 +55,7 @@ module VX_miss_resrv #( // dequeue input wire dequeue ); - `USE_FAST_BRAM reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; + reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table; reg [MSHR_SIZE-1:0] valid_table; reg [MSHR_SIZE-1:0] ready_table; diff --git a/hw/syn/quartus/.gitignore b/hw/syn/quartus/.gitignore index e1a705fb..e79a1802 100644 --- a/hw/syn/quartus/.gitignore +++ b/hw/syn/quartus/.gitignore @@ -1,3 +1,6 @@ +/unittest/* +!/unittest/Makefile + /cache/* !/cache/Makefile @@ -23,4 +26,10 @@ !/top2/Makefile /top8/* -!/top8/Makefile \ No newline at end of file +!/top8/Makefile + +/top16/* +!/top16/Makefile + +/top32/* +!/top32/Makefile \ No newline at end of file diff --git a/hw/syn/quartus/top16/Makefile b/hw/syn/quartus/top16/Makefile index 3583a832..9c411fa6 100644 --- a/hw/syn/quartus/top16/Makefile +++ b/hw/syn/quartus/top16/Makefile @@ -1,10 +1,10 @@ -FAMILY = "Arria 10" -DEVICE = 10AX115N3F40E2SG -FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 +#FAMILY = "Arria 10" +#DEVICE = 10AX115N3F40E2SG +#FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 -#FAMILY = "Stratix 10" -#DEVICE = 1SX280HN2F43E2VG -#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 +FAMILY = "Stratix 10" +DEVICE = 1SX280HN2F43E2VG +FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 PROJECT = vortex_afu TOP_LEVEL_ENTITY = vortex_afu diff --git a/hw/syn/quartus/top32/Makefile b/hw/syn/quartus/top32/Makefile new file mode 100644 index 00000000..17b0b04c --- /dev/null +++ b/hw/syn/quartus/top32/Makefile @@ -0,0 +1,76 @@ +#FAMILY = "Arria 10" +#DEVICE = 10AX115N3F40E2SG +#FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 + +FAMILY = "Stratix 10" +DEVICE = 1SX280HN2F43E2VG +FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 + +PROJECT = vortex_afu +TOP_LEVEL_ENTITY = vortex_afu +SRC_FILE = vortex_afu.sv +FPU_INCLUDE = ../../../rtl/fp_cores;$(FPU_CORE_PATH);../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src +RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip +PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf + +# Executable Configuration +SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1 +FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on +ASM_ARGS = +STA_ARGS = --parallel --do_report_timing + +# Build targets +all: $(PROJECT).sta.rpt + +syn: $(PROJECT).syn.rpt + +fit: $(PROJECT).fit.rpt + +asm: $(PROJECT).asm.rpt + +sta: $(PROJECT).sta.rpt + +smart: smart.log + +# Target implementations +STAMP = echo done > + +$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES) + quartus_syn $(PROJECT) $(SYN_ARGS) + $(STAMP) fit.chg + +$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt + quartus_fit $(PROJECT) $(FIT_ARGS) + $(STAMP) asm.chg + $(STAMP) sta.chg + +$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt + quartus_asm $(PROJECT) $(ASM_ARGS) + +$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt + quartus_sta $(PROJECT) $(STA_ARGS) + +smart.log: $(PROJECT_FILES) + quartus_sh --determine_smart_action $(PROJECT) > smart.log + +# Project initialization +$(PROJECT_FILES): + quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=4" + +syn.chg: + $(STAMP) syn.chg + +fit.chg: + $(STAMP) fit.chg + +sta.chg: + $(STAMP) sta.chg + +asm.chg: + $(STAMP) asm.chg + +program: $(PROJECT).sof + quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof" + +clean: + rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox diff --git a/hw/syn/quartus/unittest/Makefile b/hw/syn/quartus/unittest/Makefile new file mode 100644 index 00000000..9644cf52 --- /dev/null +++ b/hw/syn/quartus/unittest/Makefile @@ -0,0 +1,72 @@ +PROJECT = Unittest +TOP_LEVEL_ENTITY = VX_cache_core_req_bank_sel +SRC_FILE = VX_cache_core_req_bank_sel.v +FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera/arria10;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src +RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache +PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf + +# Part, Family +FAMILY = "Arria 10" +DEVICE = 10AX115N3F40E2SG + +# Executable Configuration +SYN_ARGS = --parallel --read_settings_files=on +FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on +ASM_ARGS = +STA_ARGS = --parallel --do_report_timing + +# Build targets +all: $(PROJECT).sta.rpt + +syn: $(PROJECT).syn.rpt + +fit: $(PROJECT).fit.rpt + +asm: $(PROJECT).asm.rpt + +sta: $(PROJECT).sta.rpt + +smart: smart.log + +# Target implementations +STAMP = echo done > + +$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES) + quartus_syn $(PROJECT) $(SYN_ARGS) + $(STAMP) fit.chg + +$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt + quartus_fit $(PROJECT) $(FIT_ARGS) + $(STAMP) asm.chg + $(STAMP) sta.chg + +$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt + quartus_asm $(PROJECT) $(ASM_ARGS) + +$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt + quartus_sta $(PROJECT) $(STA_ARGS) + +smart.log: $(PROJECT_FILES) + quartus_sh --determine_smart_action $(PROJECT) > smart.log + +# Project initialization +$(PROJECT_FILES): + quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" + +syn.chg: + $(STAMP) syn.chg + +fit.chg: + $(STAMP) fit.chg + +sta.chg: + $(STAMP) sta.chg + +asm.chg: + $(STAMP) asm.chg + +program: $(PROJECT).sof + quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof" + +clean: + rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox