8Warp 32Threads for GTCAD synthesis
This commit is contained in:
70
rtl/VX_alu.v
70
rtl/VX_alu.v
@@ -1,4 +1,3 @@
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_alu(
|
||||
@@ -13,6 +12,71 @@ module VX_alu(
|
||||
);
|
||||
|
||||
|
||||
`ifdef SYN_FUNC
|
||||
wire which_in2;
|
||||
|
||||
wire[31:0] ALU_in1;
|
||||
wire[31:0] ALU_in2;
|
||||
wire[63:0] ALU_in1_mult;
|
||||
wire[63:0] ALU_in2_mult;
|
||||
wire[31:0] upper_immed;
|
||||
wire[31:0] div_result;
|
||||
wire[31:0] rem_result;
|
||||
|
||||
|
||||
assign which_in2 = in_rs2_src == `RS2_IMMED;
|
||||
|
||||
assign ALU_in1 = in_1;
|
||||
|
||||
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
|
||||
|
||||
|
||||
assign upper_immed = {in_upper_immed, {12{1'b0}}};
|
||||
|
||||
|
||||
|
||||
//always @(posedge `MUL) begin
|
||||
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
|
||||
|
||||
wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1};
|
||||
wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2};
|
||||
assign ALU_in1_mult = (in_alu_op == `MULHU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in1} : alu_in1_signed;
|
||||
assign ALU_in2_mult = (in_alu_op == `MULHU || in_alu_op == `MULHSU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in2} : alu_in2_signed;
|
||||
wire[63:0] mult_result = ALU_in1_mult * ALU_in2_mult;
|
||||
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
always @(in_alu_op or ALU_in1 or ALU_in2) begin
|
||||
case(in_alu_op)
|
||||
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
|
||||
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
|
||||
`SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0];
|
||||
`SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
|
||||
`SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
|
||||
`XOR: out_alu_result = ALU_in1 ^ ALU_in2;
|
||||
`SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0];
|
||||
`SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
|
||||
`OR: out_alu_result = ALU_in1 | ALU_in2;
|
||||
`AND: out_alu_result = ALU_in2 & ALU_in1;
|
||||
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
|
||||
`LUI_ALU: out_alu_result = upper_immed;
|
||||
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
|
||||
`MUL: out_alu_result = mult_result[31:0];
|
||||
`MULH: out_alu_result = mult_result[63:32];
|
||||
`MULHSU: out_alu_result = mult_result[63:32];
|
||||
`MULHU: out_alu_result = mult_result[63:32];
|
||||
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : $signed($signed(ALU_in1) / $signed(ALU_in2));
|
||||
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : ALU_in1 / ALU_in2;
|
||||
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : $signed($signed(ALU_in1) % $signed(ALU_in2));
|
||||
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2;
|
||||
default: out_alu_result = 32'h0;
|
||||
endcase // in_alu_op
|
||||
end
|
||||
|
||||
`else
|
||||
wire which_in2;
|
||||
|
||||
wire[31:0] ALU_in1;
|
||||
@@ -69,7 +133,7 @@ module VX_alu(
|
||||
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2;
|
||||
default: out_alu_result = 32'h0;
|
||||
endcase // in_alu_op
|
||||
end
|
||||
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule // VX_alu
|
||||
@@ -1,11 +1,10 @@
|
||||
`include "./VX_define_synth.v"
|
||||
|
||||
|
||||
|
||||
`define NT 4
|
||||
`define NT_M1 (`NT-1)
|
||||
|
||||
// NW_M1 is actually log2(NW)
|
||||
//`define NW_M1 (4-1)
|
||||
|
||||
`define NW 8
|
||||
`define NW_M1 (`CLOG2(`NW))
|
||||
|
||||
// Uncomment the below line if NW=1
|
||||
@@ -13,6 +12,7 @@
|
||||
|
||||
// `define SYN 1
|
||||
// `define ASIC 1
|
||||
// `define SYN_FUNC 1
|
||||
|
||||
`define NUM_BARRIERS 4
|
||||
|
||||
|
||||
2
rtl/VX_define_synth.v
Normal file
2
rtl/VX_define_synth.v
Normal file
@@ -0,0 +1,2 @@
|
||||
`define NT 32
|
||||
`define NW 8
|
||||
156
rtl/VX_gpr.v
156
rtl/VX_gpr.v
@@ -85,83 +85,87 @@ module VX_gpr (
|
||||
|
||||
wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0;
|
||||
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_a),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(VX_gpr_read.rs1),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask),
|
||||
.AB(VX_writeback_inter.rd),
|
||||
.DB(to_write),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
genvar curr_base_thread;
|
||||
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
|
||||
begin
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_a[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(VX_gpr_read.rs1[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 second_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_b),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(VX_gpr_read.rs2),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask),
|
||||
.AB(VX_writeback_inter.rd),
|
||||
.DB(to_write),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 second_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(temp_b[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(VX_gpr_read.rs2[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
|
||||
8
rtl/cache/VX_d_cache.v
vendored
8
rtl/cache/VX_d_cache.v
vendored
@@ -304,9 +304,15 @@ module VX_d_cache
|
||||
// 0;
|
||||
|
||||
wire[1:0] byte_select = bank_addr[1:0];
|
||||
wire[TAG_SIZE_END:TAG_SIZE_START] cache_tag = bank_addr[ADDR_TAG_END:ADDR_TAG_START];
|
||||
|
||||
`ifdef SYN_FUNC
|
||||
wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = 0;
|
||||
wire[IND_SIZE_END:IND_SIZE_START] cache_index = 0;
|
||||
`else
|
||||
wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = bank_addr[ADDR_OFFSET_END:ADDR_OFFSET_START];
|
||||
wire[IND_SIZE_END:IND_SIZE_START] cache_index = bank_addr[ADDR_IND_END:ADDR_IND_START];
|
||||
wire[TAG_SIZE_END:TAG_SIZE_START] cache_tag = bank_addr[ADDR_TAG_END:ADDR_TAG_START];
|
||||
`endif
|
||||
|
||||
|
||||
wire normal_valid_in = valid_per_bank[bank_id];
|
||||
|
||||
@@ -7,6 +7,7 @@ SRC = \
|
||||
vortex_dpi.cpp \
|
||||
vortex_tb.v \
|
||||
../VX_define.v \
|
||||
../VX_define_synth.v \
|
||||
../interfaces/VX_branch_response_inter.v \
|
||||
../interfaces/VX_csr_req_inter.v \
|
||||
../interfaces/VX_csr_wb_inter.v \
|
||||
|
||||
32
syn/Makefile
32
syn/Makefile
@@ -1,7 +1,33 @@
|
||||
|
||||
|
||||
all: syn
|
||||
SCRIPT_DIR=./scripts
|
||||
|
||||
all: dc
|
||||
|
||||
|
||||
syn:
|
||||
dc_shell-t -f fsyn.tcl 2>&1 | tee vortex_syn.log
|
||||
#syn:
|
||||
#dc_shell-t -f esyn.tcl 2>&1 | tee vortex_syn.log
|
||||
#dc_shell -f esyn.tcl 2>&1 | tee vortex_syn.log
|
||||
#dc_shell -f $(SCRIPT_DIR)/dc/dc_script.tcl
|
||||
|
||||
dc:
|
||||
rm -rf rpt
|
||||
mkdir rpt
|
||||
dc_shell -f esyn.tcl 2>&1 | tee vortex_syn.log
|
||||
|
||||
clean:
|
||||
rm -f simv
|
||||
rm -f *.vcd
|
||||
rm -f *.key
|
||||
rm -rf csrc/
|
||||
rm -rf *.rpt
|
||||
rm -rf *.log
|
||||
rm -rf *.svf
|
||||
rm -rf *.ddc
|
||||
rm -rf results_synthesized.v
|
||||
rm -rf results_synthesized.sdc
|
||||
rm -rf alib-52/
|
||||
rm -rf rpt/
|
||||
rm -rf simv.daidir/
|
||||
rm -rf encounter*
|
||||
rm -rf ./synth_out
|
||||
53
syn/esyn.tcl
Normal file
53
syn/esyn.tcl
Normal file
@@ -0,0 +1,53 @@
|
||||
#set search_path [concat /nethome/dshim8/Desktop/GTCAD-3DPKG-v3/example/tech/cln28hpm/2d_db/ /nethome/dshim8/Desktop/GTCAD-3DPKG-v3/example/tech/cln28hpm/2d_hard_db/ ../rtl/ ../rtl/interfaces ../rtl/pipe_regs ../rtl/shared_memory ../rtl/cache ../models/memory/cln28hpm/2d_hardmacro_db]
|
||||
set search_path [concat ../rtl/ ../rtl/interfaces ../rtl/pipe_regs ../rtl/shared_memory ../rtl/cache ../models/memory/cln28hpm/2d_hardmacro_db]
|
||||
set link_library [concat ./NanGate_15nm_OCL.db]
|
||||
set symbol_library {}
|
||||
set target_library [concat ./NanGate_15nm_OCL.db]
|
||||
|
||||
set verilog_files [ list VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v VX_cache_data_per_index.v VX_Cache_Bank.v VX_cache_data.v VX_d_cache.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_define_synth.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v Vortex.v VX_cache_bank_valid.v \
|
||||
]
|
||||
# set verilog_files [ list Vortex.v VX_countones.v VX_priority_encoder_w_mask.v VX_dram_req_rsp_inter.v cache_set.v VX_Cache_Bank.v VX_Cache_Block_DM.v VX_cache_data.v VX_d_cache.v VX_generic_pc.v VX_bank_valids.v VX_priority_encoder_sm.v VX_shared_memory.v VX_shared_memory_block.v VX_dmem_controller.v VX_generic_priority_encoder.v VX_generic_stack.v VX_join_inter.v VX_csr_wrapper.v VX_csr_req_inter.v VX_csr_wb_inter.v VX_gpgpu_inst.v VX_gpu_inst_req_inter.v VX_wstall_inter.v VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp_scheduler.v VX_writeback.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \
|
||||
# ]
|
||||
|
||||
set top_level Vortex
|
||||
analyze -format sverilog $verilog_files
|
||||
#analyze -format sverilog -error=LINT-66 $verilog_files
|
||||
elaborate Vortex
|
||||
link
|
||||
|
||||
set clk_freq 0.4
|
||||
set clk_period [expr 1000.0 / $clk_freq / 1.0]
|
||||
create_clock [get_ports clk] -period $clk_period
|
||||
set_max_fanout 20 [get_ports clk]
|
||||
set_ideal_network [get_ports clk]
|
||||
|
||||
set_max_fanout 20 [get_ports reset]
|
||||
set_false_path -from [get_ports reset]
|
||||
all_high_fanout -net -threshold 20
|
||||
|
||||
# set_register_merging Vortex FALSE
|
||||
# set compile_seqmap_propagate_constants false
|
||||
# set compile_seqmap_propagate_high_effort false
|
||||
|
||||
check_design
|
||||
compile_ultra -no_autoungroup
|
||||
ungroup -all -flatten
|
||||
uniquify
|
||||
|
||||
define_name_rules verilog -remove_internal_net_bus -remove_port_bus
|
||||
change_names -rule verilog -hierarchy
|
||||
|
||||
# report_qor
|
||||
report_area
|
||||
report_hierarchy
|
||||
report_cell
|
||||
report_reference
|
||||
report_port
|
||||
report_power
|
||||
|
||||
write -hierarchy -format verilog -output Vortex.netlist.v
|
||||
remove_ideal_network [get_ports clk]
|
||||
set_propagated_clock [get_ports clk]
|
||||
write_sdc -version 1.9 Vortex.sdc
|
||||
write_file -format ddc -output Vortex.ddc
|
||||
exit
|
||||
28
syn/run_mult_synth.sh
Normal file
28
syn/run_mult_synth.sh
Normal file
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
set top_level = Vortex
|
||||
|
||||
source /tools/synopsys/synthesis/j201409/cshrc.syn
|
||||
set cur_dir = `pwd`
|
||||
echo $cur_dir
|
||||
|
||||
for number_of_warps in 2 4 8 16 32; do
|
||||
for number_of_threads in 2 4 8 16 32; do
|
||||
|
||||
echo "Warp Count: $number_of_warps Thread Count: $number_of_threads Launched"
|
||||
echo "\`define NT $number_of_threads" > ../rtl/VX_define_synth.v
|
||||
echo "\`define NW $number_of_warps" >> ../rtl/VX_define_synth.v
|
||||
make dc | tee run.log
|
||||
sleep 30
|
||||
moved_filename="${number_of_warps}_Warps__${number_of_threads}_threads__400MHz.log"
|
||||
mv ./vortex_syn.log ../../$moved_filename
|
||||
sleep 30
|
||||
|
||||
|
||||
|
||||
|
||||
echo "Warp Count: $number_of_warps Thread Count: $number_of_threads Finished"
|
||||
done
|
||||
done
|
||||
|
||||
|
||||
echo "Done!"
|
||||
Reference in New Issue
Block a user