diff --git a/hw/rtl/VX_alu_unit.v b/hw/rtl/VX_alu_unit.v index 9751bb48..425a5dbf 100644 --- a/hw/rtl/VX_alu_unit.v +++ b/hw/rtl/VX_alu_unit.v @@ -25,7 +25,7 @@ module VX_alu_unit #( wire stall_in, stall_out; `UNUSED_VAR (alu_req_if.op_mod) - wire is_br_op = `IS_BR_MOD(alu_req_if.op_mod); + wire is_br_op = `ALU_IS_BR(alu_req_if.op_mod); wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type); wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type); wire alu_signed = `ALU_SIGNED(alu_op); @@ -117,7 +117,7 @@ module VX_alu_unit #( wire mul_wb; wire [`NUM_THREADS-1:0][31:0] mul_data; - wire is_mul_op = `IS_MUL_MOD(alu_req_if.op_mod); + wire is_mul_op = `ALU_IS_MUL(alu_req_if.op_mod); VX_muldiv muldiv ( .clk (clk), diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index b0209dec..7217e77c 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -49,8 +49,8 @@ module VX_csr_data #( end if (fpu_to_csr_if.write_enable) begin - fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] <= fpu_to_csr_if.write_fflags - | fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0]; + fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] + | fpu_to_csr_if.write_fflags; end if (write_enable) begin diff --git a/hw/rtl/VX_csr_unit.v b/hw/rtl/VX_csr_unit.v index b14a7e71..6bf5277e 100644 --- a/hw/rtl/VX_csr_unit.v +++ b/hw/rtl/VX_csr_unit.v @@ -59,11 +59,11 @@ module VX_csr_unit #( wire [31:0] csr_read_data_qual = write_hazard ? csr_updated_data_s1 : csr_read_data; - reg [31:0] csr_updated_data; - + reg [31:0] csr_updated_data; reg csr_we_s0_unqual; always @(*) begin + csr_we_s0_unqual = (csr_req_data != 0); case (csr_req_if.op_type) `CSR_RW: begin csr_updated_data = csr_req_data; @@ -71,15 +71,10 @@ module VX_csr_unit #( end `CSR_RS: begin csr_updated_data = csr_read_data_qual | csr_req_data; - csr_we_s0_unqual = (csr_req_data != 0); - end - `CSR_RC: begin - csr_updated_data = csr_read_data_qual & ~csr_req_data; - csr_we_s0_unqual = (csr_req_data != 0); end + //`CSR_RC default: begin - csr_updated_data = 'x; - csr_we_s0_unqual = 0; + csr_updated_data = csr_read_data_qual & ~csr_req_data; end endcase end diff --git a/hw/rtl/VX_decode.v b/hw/rtl/VX_decode.v index 628a5f27..3dcc0517 100644 --- a/hw/rtl/VX_decode.v +++ b/hw/rtl/VX_decode.v @@ -2,9 +2,17 @@ `include "VX_print_instr.vh" `ifdef EXT_F_ENABLE - `define USED_REGS(f,r) used_regs[{f,r}] = 1 + `define SET_REG(d,f,s) \ + d = {f, s} + `define USED_REG(d,f,s) \ + `SET_REG(d,f,s); \ + used_regs[{f, s}] = 1 `else - `define USED_REGS(f,r) used_regs[r] = 1 + `define SET_REG(d,f,s) \ + d = s + `define USED_REG(d,f,s) \ + `SET_REG(d,f,s); \ + used_regs[s] = 1 `endif module VX_decode #( @@ -28,10 +36,9 @@ module VX_decode #( reg [`EX_BITS-1:0] ex_type; reg [`OP_BITS-1:0] op_type; reg [`MOD_BITS-1:0] op_mod; - reg [4:0] rd_r, rs1_r, rs2_r, rs3_r; + reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r; reg [31:0] imm; reg use_rd, use_PC, use_imm; - reg rd_fp, rs1_fp, rs2_fp; reg is_join, is_wstall; reg [`NUM_REGS-1:0] used_regs; @@ -43,8 +50,10 @@ module VX_decode #( wire [4:0] rd = instr[11:7]; wire [4:0] rs1 = instr[19:15]; - wire [4:0] rs2 = instr[24:20]; + wire [4:0] rs2 = instr[24:20]; +`ifdef EXT_F_ENABLE wire [4:0] rs3 = instr[31:27]; +`endif wire [19:0] upper_imm = {func7, rs2, rs1, func3}; wire [11:0] alu_imm = ((func3 == 3'h1) || (func3 == 3'h5)) ? {{7{1'b0}}, rs2} : u_12; @@ -56,20 +65,17 @@ module VX_decode #( ex_type = 0; op_type = 'x; op_mod = 'x; + rd_r = 'x; + rs1_r = 'x; + rs2_r = 'x; + rs3_r = 'x; imm = 'x; + use_imm = 'x; + use_PC = 'x; use_rd = 0; - use_PC = 0; - use_imm = 0; - rd_fp = 0; - rs1_fp = 0; - rs2_fp = 0; is_join = 0; is_wstall = 0; - used_regs = 0; - rd_r = rd; - rs1_r = rs1; - rs2_r = rs2; - rs3_r = rs3; + used_regs = 0; case (opcode) `INST_I: begin @@ -86,11 +92,12 @@ module VX_decode #( default:; endcase op_mod = 0; - imm = {{20{alu_imm[11]}}, alu_imm}; use_rd = 1; use_imm = 1; - `USED_REGS (1'b0, rd); - `USED_REGS (1'b0, rs1); + use_PC = 0; + imm = {{20{alu_imm[11]}}, alu_imm}; + `USED_REG (rd_r, 1'b0, rd); + `USED_REG (rs1_r, 1'b0, rs1); end `INST_R: begin ex_type = `EX_ALU; @@ -123,54 +130,57 @@ module VX_decode #( default:; endcase op_mod = 0; - end - use_rd = 1; - `USED_REGS (1'b0, rd); - `USED_REGS (1'b0, rs1); - `USED_REGS (1'b0, rs2); + end + use_rd = 1; + use_imm = 0; + use_PC = 0; + `USED_REG (rd_r, 1'b0, rd); + `USED_REG (rs1_r, 1'b0, rs1); + `USED_REG (rs2_r, 1'b0, rs2); end `INST_LUI: begin ex_type = `EX_ALU; op_type = `OP_BITS'(`ALU_LUI); op_mod = 0; - rs1_r = 0; - imm = {upper_imm, 12'(0)}; use_rd = 1; - use_imm = 1; - `USED_REGS (1'b0, rd); - `USED_REGS (1'b0, 5'b0); + use_imm = 1; + use_PC = 0; + imm = {upper_imm, 12'(0)}; + `USED_REG (rd_r, 1'b0, rd); + `USED_REG (rs1_r, 1'b0, 5'b0); end `INST_AUIPC: begin ex_type = `EX_ALU; op_type = `OP_BITS'(`ALU_AUIPC); op_mod = 0; - imm = {upper_imm, 12'(0)}; use_rd = 1; - use_PC = 1; use_imm = 1; - `USED_REGS (1'b0, rd); + use_PC = 1; + imm = {upper_imm, 12'(0)}; + `USED_REG (rd_r, 1'b0, rd); end `INST_JAL: begin ex_type = `EX_ALU; op_type = `OP_BITS'(`BR_JAL); op_mod = 1; - imm = {{11{jal_imm[20]}}, jal_imm}; use_rd = 1; - use_PC = 1; use_imm = 1; + use_PC = 1; is_wstall = 1; - `USED_REGS (1'b0, rd); + imm = {{11{jal_imm[20]}}, jal_imm}; + `USED_REG (rd_r, 1'b0, rd); end `INST_JALR: begin ex_type = `EX_ALU; op_type = `OP_BITS'(`BR_JALR); op_mod = 1; - imm = {{20{jalr_imm[11]}}, jalr_imm}; use_rd = 1; use_imm = 1; + use_PC = 0; is_wstall = 1; - `USED_REGS (1'b0, rd); - `USED_REGS (1'b0, rs1); + imm = {{20{jalr_imm[11]}}, jalr_imm}; + `USED_REG (rd_r, 1'b0, rd); + `USED_REG (rs1_r, 1'b0, rs1); end `INST_B: begin ex_type = `EX_ALU; @@ -184,12 +194,16 @@ module VX_decode #( default:; endcase op_mod = 1; - imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0}; - use_PC = 1; use_imm = 1; + use_PC = 1; is_wstall = 1; - `USED_REGS (1'b0, rs1); - `USED_REGS (1'b0, rs2); + imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0}; + `USED_REG (rs1_r, 1'b0, rs1); + `USED_REG (rs2_r, 1'b0, rs2); + end + `INST_F: begin + ex_type = `EX_LSU; + op_mod = `MOD_BITS'(0 == func3); // data fence end `INST_SYS : begin if (func3 == 0) begin @@ -203,26 +217,28 @@ module VX_decode #( default:; endcase op_mod = 1; - imm = 32'd4; use_rd = 1; - use_PC = 1; use_imm = 1; - `USED_REGS (1'b0, rd); + use_PC = 1; + imm = 32'd4; + `USED_REG (rd_r, 1'b0, rd); end else begin ex_type = `EX_CSR; case (func3[1:0]) - 2'h0: op_type = `OP_BITS'(`CSR_RW); 2'h1: op_type = `OP_BITS'(`CSR_RW); 2'h2: op_type = `OP_BITS'(`CSR_RS); 2'h3: op_type = `OP_BITS'(`CSR_RC); default:; endcase - imm = 32'(u_12); use_rd = 1; - use_imm = func3[2]; - `USED_REGS (1'b0, rd); - if (!func3[2]) - `USED_REGS (1'b0, rs1); + use_imm = func3[2]; + imm = 32'(u_12); // addr + `USED_REG (rd_r, 1'b0, rd); + if (func3[2]) begin + `SET_REG(rs1_r, 1'b0, rs1); // imm + end else begin + `USED_REG (rs1_r, 1'b0, rs1); + end end end `ifdef EXT_F_ENABLE @@ -231,13 +247,11 @@ module VX_decode #( `INST_L: begin ex_type = `EX_LSU; op_type = `OP_BITS'({1'b0, func3}); + op_mod = 0; + use_rd = 1; imm = {{20{u_12[11]}}, u_12}; - use_rd = 1; - `USED_REGS (1'b0, rs1); - `USED_REGS ((opcode == `INST_FL), rd); - `ifdef EXT_F_ENABLE - rd_fp = (opcode == `INST_FL); - `endif + `USED_REG (rd_r, (opcode == `INST_FL), rd); + `USED_REG (rs1_r, 1'b0, rs1); end `ifdef EXT_F_ENABLE `INST_FS, @@ -245,12 +259,10 @@ module VX_decode #( `INST_S: begin ex_type = `EX_LSU; op_type = `OP_BITS'({1'b1, func3}); + op_mod = 0; imm = {{20{func7[6]}}, func7, rd}; - `USED_REGS (1'b0, rs1); - `USED_REGS ((opcode == `INST_FS), rs2); - `ifdef EXT_F_ENABLE - rs2_fp = (opcode == `INST_FS); - `endif + `USED_REG (rs1_r, 1'b0, rs1); + `USED_REG (rs2_r, (opcode == `INST_FS), rs2); end `ifdef EXT_F_ENABLE `INST_FMADD, @@ -261,80 +273,61 @@ module VX_decode #( op_type = `OP_BITS'(opcode[3:0]); op_mod = func3; use_rd = 1; - rd_fp = 1; - rs1_fp = 1; - rs2_fp = 1; - `USED_REGS (1'b1, rd); - `USED_REGS (1'b1, rs1); - `USED_REGS (1'b1, rs2); - `USED_REGS (1'b1, rs3); + `USED_REG (rd_r, 1'b1, rd); + `USED_REG (rs1_r, 1'b1, rs1); + `USED_REG (rs2_r, 1'b1, rs2); + `USED_REG (rs3_r, 1'b1, rs3); end `INST_FCI: begin ex_type = `EX_FPU; - op_mod = func3; - use_rd = 1; + op_mod = func3; + use_rd = 1; case (func7) - 7'h00, // FADD - 7'h04, // FSUB - 7'h08, // FMUL - 7'h0C: // FDIV - begin + 7'h00, // FADD + 7'h04, // FSUB + 7'h08, // FMUL + 7'h0C: begin // FDIV op_type = `OP_BITS'(func7[3:0]); - rd_fp = 1; - rs1_fp = 1; - rs2_fp = 1; - `USED_REGS (1'b1, rd); - `USED_REGS (1'b1, rs1); - `USED_REGS (1'b1, rs2); + `USED_REG (rd_r, 1'b1, rd); + `USED_REG (rs1_r, 1'b1, rs1); + `USED_REG (rs2_r, 1'b1, rs2); end 7'h2C: begin op_type = `OP_BITS'(`FPU_SQRT); - rd_fp = 1; - rs1_fp = 1; - `USED_REGS (1'b1, rd); - `USED_REGS (1'b1, rs1); + `USED_REG (rd_r, 1'b1, rd); + `USED_REG (rs1_r, 1'b1, rs1); end 7'h50: begin op_type = `OP_BITS'(`FPU_CMP); - rs1_fp = 1; - rs2_fp = 1; - `USED_REGS (1'b0, rd); - `USED_REGS (1'b1, rs1); - `USED_REGS (1'b1, rs2); + `USED_REG (rd_r, 1'b0, rd); + `USED_REG (rs1_r, 1'b1, rs1); + `USED_REG (rs2_r, 1'b1, rs2); end 7'h60: begin op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS); - rs1_fp = 1; - `USED_REGS (1'b0, rd); - `USED_REGS (1'b1, rs1); + `USED_REG (rd_r, 1'b0, rd); + `USED_REG (rs1_r, 1'b1, rs1); end 7'h68: begin op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW); - rd_fp = 1; - `USED_REGS (1'b1, rd); - `USED_REGS (1'b0, rs1); + `USED_REG (rd_r, 1'b1, rd); + `USED_REG (rs1_r, 1'b0, rs1); end 7'h10: begin // FSGNJ=0, FSGNJN=1, FSGNJX=2 op_type = `OP_BITS'(`FPU_MISC); op_mod = {1'b0, func3[1:0]}; - rd_fp = 1; - rs1_fp = 1; - rs2_fp = 1; - `USED_REGS (1'b1, rd); - `USED_REGS (1'b1, rs1); - `USED_REGS (1'b1, rs2); + `USED_REG (rd_r, 1'b1, rd); + `USED_REG (rs1_r, 1'b1, rs1); + `USED_REG (rs2_r, 1'b1, rs2); end 7'h14: begin // FMIN=3, FMAX=4 op_type = `OP_BITS'(`FPU_MISC); op_mod = func3[0] ? 4 : 3; - rd_fp = 1; - rs1_fp = 1; - rs2_fp = 1; - `USED_REGS (1'b1, rd); - `USED_REGS (1'b1, rs1); - `USED_REGS (1'b1, rs2); + `USED_REG (rd_r, 1'b1, rd); + `USED_REG (rs1_r, 1'b1, rs1); + `USED_REG (rs2_r, 1'b1, rs2); end 7'h70: begin if (func3[0]) begin @@ -344,17 +337,16 @@ module VX_decode #( // FMV.X.W=5 op_type = `OP_BITS'(`FPU_MISC); op_mod = 5; - end - rs1_fp = 1; - `USED_REGS (1'b0, rd); - `USED_REGS (1'b1, rs1); + end + `USED_REG (rd_r, 1'b0, rd); + `USED_REG (rs1_r, 1'b1, rs1); end 7'h78: begin // FMV.W.X=6 op_type = `OP_BITS'(`FPU_MISC); - op_mod = 6; - rd_fp = 1; - `USED_REGS (1'b1, rd); + op_mod = 6; + `USED_REG (rd_r, 1'b1, rd); + `USED_REG (rs1_r, 1'b0, rs1); end default:; endcase @@ -366,17 +358,17 @@ module VX_decode #( 3'h0: begin op_type = `OP_BITS'(`GPU_TMC); is_wstall = 1; - `USED_REGS (1'b0, rs1); + `USED_REG (rs1_r, 1'b0, rs1); end 3'h1: begin op_type = `OP_BITS'(`GPU_WSPAWN); - `USED_REGS (1'b0, rs1); - `USED_REGS (1'b0, rs2); + `USED_REG (rs1_r, 1'b0, rs1); + `USED_REG (rs2_r, 1'b0, rs2); end 3'h2: begin op_type = `OP_BITS'(`GPU_SPLIT); is_wstall = 1; - `USED_REGS (1'b0, rs1); + `USED_REG (rs1_r, 1'b0, rs1); end 3'h3: begin op_type = `OP_BITS'(`GPU_JOIN); @@ -385,8 +377,8 @@ module VX_decode #( 3'h4: begin op_type = `OP_BITS'(`GPU_BAR); is_wstall = 1; - `USED_REGS (1'b0, rs1); - `USED_REGS (1'b0, rs2); + `USED_REG (rs1_r, 1'b0, rs1); + `USED_REG (rs2_r, 1'b0, rs2); end default:; endcase @@ -396,32 +388,20 @@ module VX_decode #( end // disable write to integer register r0 - wire wb = use_rd && (rd_fp || (rd_r != 0)); - - assign decode_if.valid = ifetch_rsp_if.valid; - assign decode_if.wid = ifetch_rsp_if.wid; - assign decode_if.tmask = ifetch_rsp_if.tmask; - assign decode_if.PC = ifetch_rsp_if.PC; - assign decode_if.ex_type = ex_type; - assign decode_if.op_type = op_type; - assign decode_if.op_mod = op_mod; - assign decode_if.wb = wb; - -`ifdef EXT_F_ENABLE - assign decode_if.rd = {rd_fp, rd_r}; - assign decode_if.rs1 = {rs1_fp, rs1_r}; - assign decode_if.rs2 = {rs2_fp, rs2_r}; - assign decode_if.rs3 = {1'b1, rs3_r}; -`else - `UNUSED_VAR (rd_fp) - `UNUSED_VAR (rs1_fp) - `UNUSED_VAR (rs2_fp) - assign decode_if.rd = rd_r; - assign decode_if.rs1 = rs1_r; - assign decode_if.rs2 = rs2_r; - assign decode_if.rs3 = rs3_r; -`endif + wire wb = use_rd && (| rd_r); + assign decode_if.valid = ifetch_rsp_if.valid; + assign decode_if.wid = ifetch_rsp_if.wid; + assign decode_if.tmask = ifetch_rsp_if.tmask; + assign decode_if.PC = ifetch_rsp_if.PC; + assign decode_if.ex_type = ex_type; + assign decode_if.op_type = op_type; + assign decode_if.op_mod = op_mod; + assign decode_if.wb = wb; + assign decode_if.rd = rd_r; + assign decode_if.rs1 = rs1_r; + assign decode_if.rs2 = rs2_r; + assign decode_if.rs3 = rs3_r; assign decode_if.imm = imm; assign decode_if.use_PC = use_PC; assign decode_if.use_imm = use_imm; diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 2e7004db..dd8926ac 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -99,6 +99,8 @@ `define ALU_OP(x) x[`ALU_BITS-1:0] `define ALU_OP_CLASS(x) x[3:2] `define ALU_SIGNED(x) x[0] +`define ALU_IS_BR(x) x[0] +`define ALU_IS_MUL(x) x[1] `define BR_EQ 4'b0000 `define BR_NE 4'b0010 @@ -119,7 +121,6 @@ `define BR_NEG(x) x[1] `define BR_LESS(x) x[2] `define BR_STATIC(x) x[3] -`define IS_BR_MOD(x) x[0] `define MUL_MUL 3'h0 `define MUL_MULH 3'h1 @@ -131,8 +132,7 @@ `define MUL_REMU 3'h7 `define MUL_BITS 3 `define MUL_OP(x) x[`MUL_BITS-1:0] -`define IS_DIV_OP(x) x[2] -`define IS_MUL_MOD(x) x[1] +`define MUL_IS_DIV(x) x[2] `define FMT_B 3'b000 `define FMT_H 3'b001 @@ -152,6 +152,7 @@ `define LSU_FMT(x) x[2:0] `define LSU_WSIZE(x) x[1:0] `define LSU_OP(x) x[`LSU_BITS-1:0] +`define LSU_IS_FENCE(x) x[0] `define CSR_RW 2'h0 `define CSR_RS 2'h1 diff --git a/hw/rtl/VX_execute.v b/hw/rtl/VX_execute.v index c8178ffd..6ea747b4 100644 --- a/hw/rtl/VX_execute.v +++ b/hw/rtl/VX_execute.v @@ -128,7 +128,7 @@ module VX_execute #( // special workaround to get RISC-V tests Pass/Fail status wire ebreak /* verilator public */; assign ebreak = alu_req_if.valid && alu_req_if.ready - && `IS_BR_MOD(alu_req_if.op_mod) + && `ALU_IS_BR(alu_req_if.op_mod) && (`BR_OP(alu_req_if.op_type) == `BR_EBREAK || `BR_OP(alu_req_if.op_type) == `BR_ECALL); diff --git a/hw/rtl/VX_instr_demux.v b/hw/rtl/VX_instr_demux.v index ff00971f..42d4fc5e 100644 --- a/hw/rtl/VX_instr_demux.v +++ b/hw/rtl/VX_instr_demux.v @@ -53,16 +53,17 @@ module VX_instr_demux ( // lsu unit wire lsu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_LSU); + wire lsu_is_fence = `LSU_IS_FENCE(ibuffer_if.op_mod); VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)) ) lsu_buffer ( .clk (clk), .reset (reset), .valid_in (lsu_req_valid), .ready_in (lsu_req_ready), - .data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `LSU_OP(ibuffer_if.op_type), ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), - .data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}), + .data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `LSU_OP(ibuffer_if.op_type), lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}), + .data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}), .valid_out (lsu_req_if.valid), .ready_out (lsu_req_if.ready) ); @@ -88,7 +89,7 @@ module VX_instr_demux ( `ifdef EXT_F_ENABLE wire fpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_FPU); - + VX_skid_buffer #( .DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) ) fpu_buffer ( diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 572de2fd..908c4e8e 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -41,6 +41,8 @@ module VX_lsu_unit #( wire [`NW_BITS-1:0] req_wid; wire [31:0] req_pc; wire req_is_dup; + + wire mbuf_empty; wire [`NUM_THREADS-1:0][ADDR_TYPEW-1:0] lsu_addr_type, req_addr_type; @@ -69,9 +71,14 @@ module VX_lsu_unit #( assign lsu_addr_type[i] = is_addr_nc; end end + + // fence stalls the pipeline until all pending requests are sent + wire fence_wait = lsu_req_if.is_fence && (req_valid || !mbuf_empty); wire ready_in; wire stall_in = ~ready_in && req_valid; + + wire lsu_valid = lsu_req_if.valid && ~fence_wait; VX_pipe_register #( .DATAW (1 + 1 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * ADDR_TYPEW) + `LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)), @@ -80,12 +87,12 @@ module VX_lsu_unit #( .clk (clk), .reset (reset), .enable (!stall_in), - .data_in ({lsu_req_if.valid, lsu_is_dup, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}), - .data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data}) + .data_in ({lsu_valid, lsu_is_dup, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, full_addr, lsu_addr_type, lsu_req_if.op_type, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.store_data}), + .data_out ({req_valid, req_is_dup, req_wid, req_tmask, req_pc, req_addr, req_addr_type, req_type, req_rd, req_wb, req_data}) ); // Can accept new request? - assign lsu_req_if.ready = ~stall_in; + assign lsu_req_if.ready = ~stall_in && ~fence_wait; wire [`NW_BITS-1:0] rsp_wid; wire [31:0] rsp_pc; @@ -137,7 +144,7 @@ module VX_lsu_unit #( .release_addr (mbuf_raddr), .release_slot (mbuf_pop), .full (mbuf_full), - `UNUSED_PIN (empty) + .empty (mbuf_empty) ); wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1}; @@ -309,7 +316,10 @@ module VX_lsu_unit #( end end - always @(posedge clk) begin + always @(posedge clk) begin + if (lsu_req_if.valid && fence_wait) begin + $display("%t: *** D$%0d fence wait", $time, CORE_ID); + end if ((| dcache_req_fire)) begin if (dcache_req_if.rw[0]) begin $write("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire); diff --git a/hw/rtl/VX_muldiv.v b/hw/rtl/VX_muldiv.v index 36ba9d28..189d596f 100644 --- a/hw/rtl/VX_muldiv.v +++ b/hw/rtl/VX_muldiv.v @@ -33,7 +33,7 @@ module VX_muldiv ( input wire ready_out ); - wire is_div_op = `IS_DIV_OP(alu_op); + wire is_div_op = `MUL_IS_DIV(alu_op); wire [`NUM_THREADS-1:0][31:0] mul_result; wire [`NW_BITS-1:0] mul_wid_out; diff --git a/hw/rtl/VX_print_instr.vh b/hw/rtl/VX_print_instr.vh index 81f63aeb..4559c199 100644 --- a/hw/rtl/VX_print_instr.vh +++ b/hw/rtl/VX_print_instr.vh @@ -23,7 +23,7 @@ task print_ex_op ( ); case (ex_type) `EX_ALU: begin - if (`IS_BR_MOD(op_mod)) begin + if (`ALU_IS_BR(op_mod)) begin case (`BR_BITS'(op_type)) `BR_EQ: $write("BEQ"); `BR_NE: $write("BNE"); @@ -40,7 +40,7 @@ task print_ex_op ( `BR_DRET: $write("DRET"); default: $write("?"); endcase - end else if (`IS_MUL_MOD(op_mod)) begin + end else if (`ALU_IS_MUL(op_mod)) begin case (`MUL_BITS'(op_type)) `MUL_MUL: $write("MUL"); `MUL_MULH: $write("MULH"); diff --git a/hw/rtl/interfaces/VX_lsu_req_if.v b/hw/rtl/interfaces/VX_lsu_req_if.v index ad84c277..c9797b0f 100644 --- a/hw/rtl/interfaces/VX_lsu_req_if.v +++ b/hw/rtl/interfaces/VX_lsu_req_if.v @@ -10,6 +10,7 @@ interface VX_lsu_req_if (); wire [`NUM_THREADS-1:0] tmask; wire [31:0] PC; wire [`LSU_BITS-1:0] op_type; + wire is_fence; wire [`NUM_THREADS-1:0][31:0] store_data; wire [`NUM_THREADS-1:0][31:0] base_addr; wire [31:0] offset; diff --git a/runtime/include/vx_intrinsics.h b/runtime/include/vx_intrinsics.h index 402e167f..68c3911d 100644 --- a/runtime/include/vx_intrinsics.h +++ b/runtime/include/vx_intrinsics.h @@ -140,6 +140,10 @@ inline int vx_num_cores() { return result; } +inline void vx_fence() { + asm volatile ("fence iorw, iorw"); +} + #define __if(b) vx_split(b); \ if (b) diff --git a/tests/regression/Makefile b/tests/regression/Makefile index 280c9b4b..114c2173 100644 --- a/tests/regression/Makefile +++ b/tests/regression/Makefile @@ -6,6 +6,7 @@ all: $(MAKE) -C io_addr $(MAKE) -C printf $(MAKE) -C diverge + $(MAKE) -C fence run: $(MAKE) -C basic run-vlsim @@ -15,6 +16,7 @@ run: $(MAKE) -C io_addr run-vlsim $(MAKE) -C printf run-vlsim $(MAKE) -C diverge run-vlsim + $(MAKE) -C fence run-vlsim clean: $(MAKE) -C basic clean @@ -24,6 +26,7 @@ clean: $(MAKE) -C io_addr clean $(MAKE) -C printf clean $(MAKE) -C diverge clean + $(MAKE) -C fence clean clean-all: $(MAKE) -C basic clean-all @@ -33,4 +36,5 @@ clean-all: $(MAKE) -C io_addr clean-all $(MAKE) -C printf clean-all $(MAKE) -C diverge clean-all + $(MAKE) -C fence clean-all diff --git a/tests/regression/fence/Makefile b/tests/regression/fence/Makefile new file mode 100644 index 00000000..7440669f --- /dev/null +++ b/tests/regression/fence/Makefile @@ -0,0 +1,70 @@ +RISCV_TOOLCHAIN_PATH ?= /opt/riscv-gnu-toolchain +VORTEX_DRV_PATH ?= $(realpath ../../../driver) +VORTEX_RT_PATH ?= $(realpath ../../../runtime) + +OPTS ?= -n64 + +VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc +VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++ +VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump +VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy + +VX_CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections +VX_CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw + +VX_LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a + +VX_SRCS = kernel.c + +#CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors +CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors + +CXXFLAGS += -I$(VORTEX_DRV_PATH)/include + +LDFLAGS += -L$(VORTEX_DRV_PATH)/stub -lvortex + +PROJECT = fence + +SRCS = main.cpp + +all: $(PROJECT) kernel.bin kernel.dump + +kernel.dump: kernel.elf + $(VX_DP) -D kernel.elf > kernel.dump + +kernel.bin: kernel.elf + $(VX_CP) -O binary kernel.elf kernel.bin + +kernel.elf: $(VX_SRCS) + $(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +run-simx: $(PROJECT) kernel.bin + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-fpga: $(PROJECT) kernel.bin + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-asesim: $(PROJECT) kernel.bin + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-vlsim: $(PROJECT) kernel.bin + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-rtlsim: $(PROJECT) kernel.bin + LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; + +clean: + rm -rf $(PROJECT) *.o .depend + +clean-all: clean + rm -rf *.elf *.bin *.dump + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/tests/regression/fence/common.h b/tests/regression/fence/common.h new file mode 100644 index 00000000..d6540ae1 --- /dev/null +++ b/tests/regression/fence/common.h @@ -0,0 +1,14 @@ +#ifndef _COMMON_H_ +#define _COMMON_H_ + +#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 + +struct kernel_arg_t { + uint32_t num_tasks; + uint32_t task_size; + uint32_t src0_ptr; + uint32_t src1_ptr; + uint32_t dst_ptr; +}; + +#endif \ No newline at end of file diff --git a/tests/regression/fence/kernel.bin b/tests/regression/fence/kernel.bin new file mode 100755 index 00000000..667365f5 Binary files /dev/null and b/tests/regression/fence/kernel.bin differ diff --git a/tests/regression/fence/kernel.c b/tests/regression/fence/kernel.c new file mode 100644 index 00000000..1401bc87 --- /dev/null +++ b/tests/regression/fence/kernel.c @@ -0,0 +1,25 @@ +#include +#include +#include +#include "common.h" + +void kernel_body(int task_id, void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->task_size; + int32_t* src0_ptr = (int32_t*)_arg->src0_ptr; + int32_t* src1_ptr = (int32_t*)_arg->src1_ptr; + int32_t* dst_ptr = (int32_t*)_arg->dst_ptr; + + uint32_t offset = task_id * count; + + for (uint32_t i = 0; i < count; ++i) { + dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i]; + } + + vx_fence(); +} + +void main() { + struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; + vx_spawn_tasks(arg->num_tasks, kernel_body, arg); +} \ No newline at end of file diff --git a/tests/regression/fence/kernel.dump b/tests/regression/fence/kernel.dump new file mode 100644 index 00000000..c871403f --- /dev/null +++ b/tests/regression/fence/kernel.dump @@ -0,0 +1,697 @@ + +kernel.elf: file format elf32-littleriscv + + +Disassembly of section .init: + +80000000 <_start>: +80000000: 00000597 auipc a1,0x0 +80000004: 0e858593 addi a1,a1,232 # 800000e8 +80000008: fc102573 csrr a0,0xfc1 +8000000c: 00b5106b 0xb5106b +80000010: 0d8000ef jal ra,800000e8 +80000014: 00100513 li a0,1 +80000018: 0005006b 0x5006b +8000001c: 00002517 auipc a0,0x2 +80000020: d3050513 addi a0,a0,-720 # 80001d4c +80000024: 00002617 auipc a2,0x2 +80000028: da860613 addi a2,a2,-600 # 80001dcc <__BSS_END__> +8000002c: 40a60633 sub a2,a2,a0 +80000030: 00000593 li a1,0 +80000034: 64c000ef jal ra,80000680 +80000038: 00000517 auipc a0,0x0 +8000003c: 55050513 addi a0,a0,1360 # 80000588 <__libc_fini_array> +80000040: 500000ef jal ra,80000540 +80000044: 5a0000ef jal ra,800005e4 <__libc_init_array> +80000048: 008000ef jal ra,80000050
+8000004c: 5080006f j 80000554 + +Disassembly of section .text: + +80000050
: +80000050: 7ffff7b7 lui a5,0x7ffff +80000054: 0007a503 lw a0,0(a5) # 7ffff000 <__stack_size+0x7fffec00> +80000058: 800005b7 lui a1,0x80000 +8000005c: 7ffff637 lui a2,0x7ffff +80000060: 08058593 addi a1,a1,128 # 80000080 <__stack_top+0x81000080> +80000064: 1800006f j 800001e4 + +80000068 : +80000068: 00000793 li a5,0 +8000006c: 00078863 beqz a5,8000007c +80000070: 80000537 lui a0,0x80000 +80000074: 58850513 addi a0,a0,1416 # 80000588 <__stack_top+0x81000588> +80000078: 4c80006f j 80000540 +8000007c: 00008067 ret + +80000080 : +80000080: 0045a683 lw a3,4(a1) +80000084: 0085a603 lw a2,8(a1) +80000088: 00c5a703 lw a4,12(a1) +8000008c: 02d50533 mul a0,a0,a3 +80000090: 0105a803 lw a6,16(a1) +80000094: 04068063 beqz a3,800000d4 +80000098: 00a686b3 add a3,a3,a0 +8000009c: 00269693 slli a3,a3,0x2 +800000a0: 00251513 slli a0,a0,0x2 +800000a4: 00c507b3 add a5,a0,a2 +800000a8: 00c686b3 add a3,a3,a2 +800000ac: 40c80833 sub a6,a6,a2 +800000b0: 40c70533 sub a0,a4,a2 +800000b4: 00f50733 add a4,a0,a5 +800000b8: 0007a583 lw a1,0(a5) +800000bc: 00072703 lw a4,0(a4) +800000c0: 00f80633 add a2,a6,a5 +800000c4: 00478793 addi a5,a5,4 +800000c8: 00b70733 add a4,a4,a1 +800000cc: 00e62023 sw a4,0(a2) # 7ffff000 <__stack_size+0x7fffec00> +800000d0: fef692e3 bne a3,a5,800000b4 +800000d4: 0ff0000f fence +800000d8: 00008067 ret + +800000dc <_exit>: +800000dc: 250000ef jal ra,8000032c +800000e0: 00000513 li a0,0 +800000e4: 0005006b 0x5006b + +800000e8 : +800000e8: fc002573 csrr a0,0xfc0 +800000ec: 0005006b 0x5006b +800000f0: 00002197 auipc gp,0x2 +800000f4: 03018193 addi gp,gp,48 # 80002120 <__global_pointer> +800000f8: 7f000117 auipc sp,0x7f000 +800000fc: f0810113 addi sp,sp,-248 # ff000000 <__stack_top> +80000100: 40000593 li a1,1024 +80000104: cc102673 csrr a2,0xcc1 +80000108: 02c585b3 mul a1,a1,a2 +8000010c: 40b10133 sub sp,sp,a1 +80000110: cc3026f3 csrr a3,0xcc3 +80000114: 00068663 beqz a3,80000120 +80000118: 00000513 li a0,0 +8000011c: 0005006b 0x5006b + +80000120 : +80000120: 00008067 ret + +80000124 : +80000124: fe010113 addi sp,sp,-32 +80000128: 00112e23 sw ra,28(sp) +8000012c: 00812c23 sw s0,24(sp) +80000130: 00912a23 sw s1,20(sp) +80000134: 01212823 sw s2,16(sp) +80000138: 01312623 sw s3,12(sp) +8000013c: fc0027f3 csrr a5,0xfc0 +80000140: 0007806b 0x7806b +80000144: cc5027f3 csrr a5,0xcc5 +80000148: cc3029f3 csrr s3,0xcc3 +8000014c: cc002773 csrr a4,0xcc0 +80000150: fc002673 csrr a2,0xfc0 +80000154: 00279693 slli a3,a5,0x2 +80000158: 800027b7 lui a5,0x80002 +8000015c: d4c78793 addi a5,a5,-692 # 80001d4c <__stack_top+0x81001d4c> +80000160: 00d787b3 add a5,a5,a3 +80000164: 0007a483 lw s1,0(a5) +80000168: 0104a403 lw s0,16(s1) +8000016c: 00c4a683 lw a3,12(s1) +80000170: 0089a933 slt s2,s3,s0 +80000174: 00040793 mv a5,s0 +80000178: 00d90933 add s2,s2,a3 +8000017c: 03368433 mul s0,a3,s3 +80000180: 00f9d463 bge s3,a5,80000188 +80000184: 00098793 mv a5,s3 +80000188: 00f40433 add s0,s0,a5 +8000018c: 0084a683 lw a3,8(s1) +80000190: 02c40433 mul s0,s0,a2 +80000194: 02e907b3 mul a5,s2,a4 +80000198: 00d40433 add s0,s0,a3 +8000019c: 00f40433 add s0,s0,a5 +800001a0: 00890933 add s2,s2,s0 +800001a4: 01245e63 bge s0,s2,800001c0 +800001a8: 0004a783 lw a5,0(s1) +800001ac: 0044a583 lw a1,4(s1) +800001b0: 00040513 mv a0,s0 +800001b4: 00140413 addi s0,s0,1 +800001b8: 000780e7 jalr a5 +800001bc: fe8916e3 bne s2,s0,800001a8 +800001c0: 0019b993 seqz s3,s3 +800001c4: 0009806b 0x9806b +800001c8: 01c12083 lw ra,28(sp) +800001cc: 01812403 lw s0,24(sp) +800001d0: 01412483 lw s1,20(sp) +800001d4: 01012903 lw s2,16(sp) +800001d8: 00c12983 lw s3,12(sp) +800001dc: 02010113 addi sp,sp,32 +800001e0: 00008067 ret + +800001e4 : +800001e4: fc010113 addi sp,sp,-64 +800001e8: 02112e23 sw ra,60(sp) +800001ec: 02812c23 sw s0,56(sp) +800001f0: 02912a23 sw s1,52(sp) +800001f4: 03212823 sw s2,48(sp) +800001f8: 03312623 sw s3,44(sp) +800001fc: fc2026f3 csrr a3,0xfc2 +80000200: fc102873 csrr a6,0xfc1 +80000204: fc002473 csrr s0,0xfc0 +80000208: cc5027f3 csrr a5,0xcc5 +8000020c: 01f00713 li a4,31 +80000210: 0cf74463 blt a4,a5,800002d8 +80000214: 030408b3 mul a7,s0,a6 +80000218: 00100713 li a4,1 +8000021c: 00a8d463 bge a7,a0,80000224 +80000220: 03154733 div a4,a0,a7 +80000224: 0ce6c863 blt a3,a4,800002f4 +80000228: 0ae7d863 bge a5,a4,800002d8 +8000022c: fff68693 addi a3,a3,-1 +80000230: 02e54333 div t1,a0,a4 +80000234: 00030893 mv a7,t1 +80000238: 00f69663 bne a3,a5,80000244 +8000023c: 02e56533 rem a0,a0,a4 +80000240: 006508b3 add a7,a0,t1 +80000244: 0288c4b3 div s1,a7,s0 +80000248: 0288e933 rem s2,a7,s0 +8000024c: 0b04ca63 blt s1,a6,80000300 +80000250: 00100693 li a3,1 +80000254: 0304c733 div a4,s1,a6 +80000258: 00070663 beqz a4,80000264 +8000025c: 00070693 mv a3,a4 +80000260: 0304e733 rem a4,s1,a6 +80000264: 800029b7 lui s3,0x80002 +80000268: d4c98993 addi s3,s3,-692 # 80001d4c <__stack_top+0x81001d4c> +8000026c: 00e12e23 sw a4,28(sp) +80000270: 00c10713 addi a4,sp,12 +80000274: 00b12623 sw a1,12(sp) +80000278: 00c12823 sw a2,16(sp) +8000027c: 00d12c23 sw a3,24(sp) +80000280: 02f30333 mul t1,t1,a5 +80000284: 00279793 slli a5,a5,0x2 +80000288: 00f987b3 add a5,s3,a5 +8000028c: 00e7a023 sw a4,0(a5) +80000290: 00612a23 sw t1,20(sp) +80000294: 06904c63 bgtz s1,8000030c +80000298: 04090063 beqz s2,800002d8 +8000029c: 02848433 mul s0,s1,s0 +800002a0: 00812a23 sw s0,20(sp) +800002a4: 0009006b 0x9006b +800002a8: cc5027f3 csrr a5,0xcc5 +800002ac: cc202573 csrr a0,0xcc2 +800002b0: 00279793 slli a5,a5,0x2 +800002b4: 00f989b3 add s3,s3,a5 +800002b8: 0009a783 lw a5,0(s3) +800002bc: 0087a683 lw a3,8(a5) +800002c0: 0007a703 lw a4,0(a5) +800002c4: 0047a583 lw a1,4(a5) +800002c8: 00d50533 add a0,a0,a3 +800002cc: 000700e7 jalr a4 +800002d0: 00100793 li a5,1 +800002d4: 0007806b 0x7806b +800002d8: 03c12083 lw ra,60(sp) +800002dc: 03812403 lw s0,56(sp) +800002e0: 03412483 lw s1,52(sp) +800002e4: 03012903 lw s2,48(sp) +800002e8: 02c12983 lw s3,44(sp) +800002ec: 04010113 addi sp,sp,64 +800002f0: 00008067 ret +800002f4: 00068713 mv a4,a3 +800002f8: f2e7cae3 blt a5,a4,8000022c +800002fc: fddff06f j 800002d8 +80000300: 00000713 li a4,0 +80000304: 00100693 li a3,1 +80000308: f5dff06f j 80000264 +8000030c: 00048713 mv a4,s1 +80000310: 00985463 bge a6,s1,80000318 +80000314: 00080713 mv a4,a6 +80000318: 800007b7 lui a5,0x80000 +8000031c: 12478793 addi a5,a5,292 # 80000124 <__stack_top+0x81000124> +80000320: 00f7106b 0xf7106b +80000324: e01ff0ef jal ra,80000124 +80000328: f71ff06f j 80000298 + +8000032c : +8000032c: cc5027f3 csrr a5,0xcc5 +80000330: 00ff0737 lui a4,0xff0 +80000334: 00e787b3 add a5,a5,a4 +80000338: 00879793 slli a5,a5,0x8 +8000033c: b0002773 csrr a4,mcycle +80000340: 00e7a023 sw a4,0(a5) +80000344: b0102773 csrr a4,0xb01 +80000348: 00e7a223 sw a4,4(a5) +8000034c: b0202773 csrr a4,minstret +80000350: 00e7a423 sw a4,8(a5) +80000354: b0302773 csrr a4,mhpmcounter3 +80000358: 00e7a623 sw a4,12(a5) +8000035c: b0402773 csrr a4,mhpmcounter4 +80000360: 00e7a823 sw a4,16(a5) +80000364: b0502773 csrr a4,mhpmcounter5 +80000368: 00e7aa23 sw a4,20(a5) +8000036c: b0602773 csrr a4,mhpmcounter6 +80000370: 00e7ac23 sw a4,24(a5) +80000374: b0702773 csrr a4,mhpmcounter7 +80000378: 00e7ae23 sw a4,28(a5) +8000037c: b0802773 csrr a4,mhpmcounter8 +80000380: 02e7a023 sw a4,32(a5) +80000384: b0902773 csrr a4,mhpmcounter9 +80000388: 02e7a223 sw a4,36(a5) +8000038c: b0a02773 csrr a4,mhpmcounter10 +80000390: 02e7a423 sw a4,40(a5) +80000394: b0b02773 csrr a4,mhpmcounter11 +80000398: 02e7a623 sw a4,44(a5) +8000039c: b0c02773 csrr a4,mhpmcounter12 +800003a0: 02e7a823 sw a4,48(a5) +800003a4: b0d02773 csrr a4,mhpmcounter13 +800003a8: 02e7aa23 sw a4,52(a5) +800003ac: b0e02773 csrr a4,mhpmcounter14 +800003b0: 02e7ac23 sw a4,56(a5) +800003b4: b0f02773 csrr a4,mhpmcounter15 +800003b8: 02e7ae23 sw a4,60(a5) +800003bc: b1002773 csrr a4,mhpmcounter16 +800003c0: 04e7a023 sw a4,64(a5) +800003c4: b1102773 csrr a4,mhpmcounter17 +800003c8: 04e7a223 sw a4,68(a5) +800003cc: b1202773 csrr a4,mhpmcounter18 +800003d0: 04e7a423 sw a4,72(a5) +800003d4: b1302773 csrr a4,mhpmcounter19 +800003d8: 04e7a623 sw a4,76(a5) +800003dc: b1402773 csrr a4,mhpmcounter20 +800003e0: 04e7a823 sw a4,80(a5) +800003e4: b1502773 csrr a4,mhpmcounter21 +800003e8: 04e7aa23 sw a4,84(a5) +800003ec: b1602773 csrr a4,mhpmcounter22 +800003f0: 04e7ac23 sw a4,88(a5) +800003f4: b1702773 csrr a4,mhpmcounter23 +800003f8: 04e7ae23 sw a4,92(a5) +800003fc: b1802773 csrr a4,mhpmcounter24 +80000400: 06e7a023 sw a4,96(a5) +80000404: b1902773 csrr a4,mhpmcounter25 +80000408: 06e7a223 sw a4,100(a5) +8000040c: b1a02773 csrr a4,mhpmcounter26 +80000410: 06e7a423 sw a4,104(a5) +80000414: b1b02773 csrr a4,mhpmcounter27 +80000418: 06e7a623 sw a4,108(a5) +8000041c: b1c02773 csrr a4,mhpmcounter28 +80000420: 06e7a823 sw a4,112(a5) +80000424: b1d02773 csrr a4,mhpmcounter29 +80000428: 06e7aa23 sw a4,116(a5) +8000042c: b1e02773 csrr a4,mhpmcounter30 +80000430: 06e7ac23 sw a4,120(a5) +80000434: b1f02773 csrr a4,mhpmcounter31 +80000438: 06e7ae23 sw a4,124(a5) +8000043c: b8002773 csrr a4,mcycleh +80000440: 08e7a023 sw a4,128(a5) +80000444: b8102773 csrr a4,0xb81 +80000448: 08e7a223 sw a4,132(a5) +8000044c: b8202773 csrr a4,minstreth +80000450: 08e7a423 sw a4,136(a5) +80000454: b8302773 csrr a4,mhpmcounter3h +80000458: 08e7a623 sw a4,140(a5) +8000045c: b8402773 csrr a4,mhpmcounter4h +80000460: 08e7a823 sw a4,144(a5) +80000464: b8502773 csrr a4,mhpmcounter5h +80000468: 08e7aa23 sw a4,148(a5) +8000046c: b8602773 csrr a4,mhpmcounter6h +80000470: 08e7ac23 sw a4,152(a5) +80000474: b8702773 csrr a4,mhpmcounter7h +80000478: 08e7ae23 sw a4,156(a5) +8000047c: b8802773 csrr a4,mhpmcounter8h +80000480: 0ae7a023 sw a4,160(a5) +80000484: b8902773 csrr a4,mhpmcounter9h +80000488: 0ae7a223 sw a4,164(a5) +8000048c: b8a02773 csrr a4,mhpmcounter10h +80000490: 0ae7a423 sw a4,168(a5) +80000494: b8b02773 csrr a4,mhpmcounter11h +80000498: 0ae7a623 sw a4,172(a5) +8000049c: b8c02773 csrr a4,mhpmcounter12h +800004a0: 0ae7a823 sw a4,176(a5) +800004a4: b8d02773 csrr a4,mhpmcounter13h +800004a8: 0ae7aa23 sw a4,180(a5) +800004ac: b8e02773 csrr a4,mhpmcounter14h +800004b0: 0ae7ac23 sw a4,184(a5) +800004b4: b8f02773 csrr a4,mhpmcounter15h +800004b8: 0ae7ae23 sw a4,188(a5) +800004bc: b9002773 csrr a4,mhpmcounter16h +800004c0: 0ce7a023 sw a4,192(a5) +800004c4: b9102773 csrr a4,mhpmcounter17h +800004c8: 0ce7a223 sw a4,196(a5) +800004cc: b9202773 csrr a4,mhpmcounter18h +800004d0: 0ce7a423 sw a4,200(a5) +800004d4: b9302773 csrr a4,mhpmcounter19h +800004d8: 0ce7a623 sw a4,204(a5) +800004dc: b9402773 csrr a4,mhpmcounter20h +800004e0: 0ce7a823 sw a4,208(a5) +800004e4: b9502773 csrr a4,mhpmcounter21h +800004e8: 0ce7aa23 sw a4,212(a5) +800004ec: b9602773 csrr a4,mhpmcounter22h +800004f0: 0ce7ac23 sw a4,216(a5) +800004f4: b9702773 csrr a4,mhpmcounter23h +800004f8: 0ce7ae23 sw a4,220(a5) +800004fc: b9802773 csrr a4,mhpmcounter24h +80000500: 0ee7a023 sw a4,224(a5) +80000504: b9902773 csrr a4,mhpmcounter25h +80000508: 0ee7a223 sw a4,228(a5) +8000050c: b9a02773 csrr a4,mhpmcounter26h +80000510: 0ee7a423 sw a4,232(a5) +80000514: b9b02773 csrr a4,mhpmcounter27h +80000518: 0ee7a623 sw a4,236(a5) +8000051c: b9c02773 csrr a4,mhpmcounter28h +80000520: 0ee7a823 sw a4,240(a5) +80000524: b9d02773 csrr a4,mhpmcounter29h +80000528: 0ee7aa23 sw a4,244(a5) +8000052c: b9e02773 csrr a4,mhpmcounter30h +80000530: 0ee7ac23 sw a4,248(a5) +80000534: b9f02773 csrr a4,mhpmcounter31h +80000538: 0ee7ae23 sw a4,252(a5) +8000053c: 00008067 ret + +80000540 : +80000540: 00050593 mv a1,a0 +80000544: 00000693 li a3,0 +80000548: 00000613 li a2,0 +8000054c: 00000513 li a0,0 +80000550: 20c0006f j 8000075c <__register_exitproc> + +80000554 : +80000554: ff010113 addi sp,sp,-16 +80000558: 00000593 li a1,0 +8000055c: 00812423 sw s0,8(sp) +80000560: 00112623 sw ra,12(sp) +80000564: 00050413 mv s0,a0 +80000568: 290000ef jal ra,800007f8 <__call_exitprocs> +8000056c: 800027b7 lui a5,0x80002 +80000570: d487a503 lw a0,-696(a5) # 80001d48 <__stack_top+0x81001d48> +80000574: 03c52783 lw a5,60(a0) +80000578: 00078463 beqz a5,80000580 +8000057c: 000780e7 jalr a5 +80000580: 00040513 mv a0,s0 +80000584: b59ff0ef jal ra,800000dc <_exit> + +80000588 <__libc_fini_array>: +80000588: ff010113 addi sp,sp,-16 +8000058c: 00812423 sw s0,8(sp) +80000590: 800027b7 lui a5,0x80002 +80000594: 80002437 lui s0,0x80002 +80000598: 92040413 addi s0,s0,-1760 # 80001920 <__stack_top+0x81001920> +8000059c: 92078793 addi a5,a5,-1760 # 80001920 <__stack_top+0x81001920> +800005a0: 408787b3 sub a5,a5,s0 +800005a4: 00912223 sw s1,4(sp) +800005a8: 00112623 sw ra,12(sp) +800005ac: 4027d493 srai s1,a5,0x2 +800005b0: 02048063 beqz s1,800005d0 <__libc_fini_array+0x48> +800005b4: ffc78793 addi a5,a5,-4 +800005b8: 00878433 add s0,a5,s0 +800005bc: 00042783 lw a5,0(s0) +800005c0: fff48493 addi s1,s1,-1 +800005c4: ffc40413 addi s0,s0,-4 +800005c8: 000780e7 jalr a5 +800005cc: fe0498e3 bnez s1,800005bc <__libc_fini_array+0x34> +800005d0: 00c12083 lw ra,12(sp) +800005d4: 00812403 lw s0,8(sp) +800005d8: 00412483 lw s1,4(sp) +800005dc: 01010113 addi sp,sp,16 +800005e0: 00008067 ret + +800005e4 <__libc_init_array>: +800005e4: ff010113 addi sp,sp,-16 +800005e8: 00812423 sw s0,8(sp) +800005ec: 01212023 sw s2,0(sp) +800005f0: 80002437 lui s0,0x80002 +800005f4: 80002937 lui s2,0x80002 +800005f8: 91c40793 addi a5,s0,-1764 # 8000191c <__stack_top+0x8100191c> +800005fc: 91c90913 addi s2,s2,-1764 # 8000191c <__stack_top+0x8100191c> +80000600: 40f90933 sub s2,s2,a5 +80000604: 00112623 sw ra,12(sp) +80000608: 00912223 sw s1,4(sp) +8000060c: 40295913 srai s2,s2,0x2 +80000610: 02090063 beqz s2,80000630 <__libc_init_array+0x4c> +80000614: 91c40413 addi s0,s0,-1764 +80000618: 00000493 li s1,0 +8000061c: 00042783 lw a5,0(s0) +80000620: 00148493 addi s1,s1,1 +80000624: 00440413 addi s0,s0,4 +80000628: 000780e7 jalr a5 +8000062c: fe9918e3 bne s2,s1,8000061c <__libc_init_array+0x38> +80000630: 80002437 lui s0,0x80002 +80000634: 80002937 lui s2,0x80002 +80000638: 91c40793 addi a5,s0,-1764 # 8000191c <__stack_top+0x8100191c> +8000063c: 92090913 addi s2,s2,-1760 # 80001920 <__stack_top+0x81001920> +80000640: 40f90933 sub s2,s2,a5 +80000644: 40295913 srai s2,s2,0x2 +80000648: 02090063 beqz s2,80000668 <__libc_init_array+0x84> +8000064c: 91c40413 addi s0,s0,-1764 +80000650: 00000493 li s1,0 +80000654: 00042783 lw a5,0(s0) +80000658: 00148493 addi s1,s1,1 +8000065c: 00440413 addi s0,s0,4 +80000660: 000780e7 jalr a5 +80000664: fe9918e3 bne s2,s1,80000654 <__libc_init_array+0x70> +80000668: 00c12083 lw ra,12(sp) +8000066c: 00812403 lw s0,8(sp) +80000670: 00412483 lw s1,4(sp) +80000674: 00012903 lw s2,0(sp) +80000678: 01010113 addi sp,sp,16 +8000067c: 00008067 ret + +80000680 : +80000680: 00f00313 li t1,15 +80000684: 00050713 mv a4,a0 +80000688: 02c37e63 bgeu t1,a2,800006c4 +8000068c: 00f77793 andi a5,a4,15 +80000690: 0a079063 bnez a5,80000730 +80000694: 08059263 bnez a1,80000718 +80000698: ff067693 andi a3,a2,-16 +8000069c: 00f67613 andi a2,a2,15 +800006a0: 00e686b3 add a3,a3,a4 +800006a4: 00b72023 sw a1,0(a4) # ff0000 <__stack_size+0xfefc00> +800006a8: 00b72223 sw a1,4(a4) +800006ac: 00b72423 sw a1,8(a4) +800006b0: 00b72623 sw a1,12(a4) +800006b4: 01070713 addi a4,a4,16 +800006b8: fed766e3 bltu a4,a3,800006a4 +800006bc: 00061463 bnez a2,800006c4 +800006c0: 00008067 ret +800006c4: 40c306b3 sub a3,t1,a2 +800006c8: 00269693 slli a3,a3,0x2 +800006cc: 00000297 auipc t0,0x0 +800006d0: 005686b3 add a3,a3,t0 +800006d4: 00c68067 jr 12(a3) +800006d8: 00b70723 sb a1,14(a4) +800006dc: 00b706a3 sb a1,13(a4) +800006e0: 00b70623 sb a1,12(a4) +800006e4: 00b705a3 sb a1,11(a4) +800006e8: 00b70523 sb a1,10(a4) +800006ec: 00b704a3 sb a1,9(a4) +800006f0: 00b70423 sb a1,8(a4) +800006f4: 00b703a3 sb a1,7(a4) +800006f8: 00b70323 sb a1,6(a4) +800006fc: 00b702a3 sb a1,5(a4) +80000700: 00b70223 sb a1,4(a4) +80000704: 00b701a3 sb a1,3(a4) +80000708: 00b70123 sb a1,2(a4) +8000070c: 00b700a3 sb a1,1(a4) +80000710: 00b70023 sb a1,0(a4) +80000714: 00008067 ret +80000718: 0ff5f593 andi a1,a1,255 +8000071c: 00859693 slli a3,a1,0x8 +80000720: 00d5e5b3 or a1,a1,a3 +80000724: 01059693 slli a3,a1,0x10 +80000728: 00d5e5b3 or a1,a1,a3 +8000072c: f6dff06f j 80000698 +80000730: 00279693 slli a3,a5,0x2 +80000734: 00000297 auipc t0,0x0 +80000738: 005686b3 add a3,a3,t0 +8000073c: 00008293 mv t0,ra +80000740: fa0680e7 jalr -96(a3) +80000744: 00028093 mv ra,t0 +80000748: ff078793 addi a5,a5,-16 +8000074c: 40f70733 sub a4,a4,a5 +80000750: 00f60633 add a2,a2,a5 +80000754: f6c378e3 bgeu t1,a2,800006c4 +80000758: f3dff06f j 80000694 + +8000075c <__register_exitproc>: +8000075c: 800027b7 lui a5,0x80002 +80000760: d487a703 lw a4,-696(a5) # 80001d48 <__stack_top+0x81001d48> +80000764: 14872783 lw a5,328(a4) +80000768: 04078c63 beqz a5,800007c0 <__register_exitproc+0x64> +8000076c: 0047a703 lw a4,4(a5) +80000770: 01f00813 li a6,31 +80000774: 06e84e63 blt a6,a4,800007f0 <__register_exitproc+0x94> +80000778: 00271813 slli a6,a4,0x2 +8000077c: 02050663 beqz a0,800007a8 <__register_exitproc+0x4c> +80000780: 01078333 add t1,a5,a6 +80000784: 08c32423 sw a2,136(t1) +80000788: 1887a883 lw a7,392(a5) +8000078c: 00100613 li a2,1 +80000790: 00e61633 sll a2,a2,a4 +80000794: 00c8e8b3 or a7,a7,a2 +80000798: 1917a423 sw a7,392(a5) +8000079c: 10d32423 sw a3,264(t1) +800007a0: 00200693 li a3,2 +800007a4: 02d50463 beq a0,a3,800007cc <__register_exitproc+0x70> +800007a8: 00170713 addi a4,a4,1 +800007ac: 00e7a223 sw a4,4(a5) +800007b0: 010787b3 add a5,a5,a6 +800007b4: 00b7a423 sw a1,8(a5) +800007b8: 00000513 li a0,0 +800007bc: 00008067 ret +800007c0: 14c70793 addi a5,a4,332 +800007c4: 14f72423 sw a5,328(a4) +800007c8: fa5ff06f j 8000076c <__register_exitproc+0x10> +800007cc: 18c7a683 lw a3,396(a5) +800007d0: 00170713 addi a4,a4,1 +800007d4: 00e7a223 sw a4,4(a5) +800007d8: 00c6e633 or a2,a3,a2 +800007dc: 18c7a623 sw a2,396(a5) +800007e0: 010787b3 add a5,a5,a6 +800007e4: 00b7a423 sw a1,8(a5) +800007e8: 00000513 li a0,0 +800007ec: 00008067 ret +800007f0: fff00513 li a0,-1 +800007f4: 00008067 ret + +800007f8 <__call_exitprocs>: +800007f8: fd010113 addi sp,sp,-48 +800007fc: 800027b7 lui a5,0x80002 +80000800: 01412c23 sw s4,24(sp) +80000804: d487aa03 lw s4,-696(a5) # 80001d48 <__stack_top+0x81001d48> +80000808: 03212023 sw s2,32(sp) +8000080c: 02112623 sw ra,44(sp) +80000810: 148a2903 lw s2,328(s4) +80000814: 02812423 sw s0,40(sp) +80000818: 02912223 sw s1,36(sp) +8000081c: 01312e23 sw s3,28(sp) +80000820: 01512a23 sw s5,20(sp) +80000824: 01612823 sw s6,16(sp) +80000828: 01712623 sw s7,12(sp) +8000082c: 01812423 sw s8,8(sp) +80000830: 04090063 beqz s2,80000870 <__call_exitprocs+0x78> +80000834: 00050b13 mv s6,a0 +80000838: 00058b93 mv s7,a1 +8000083c: 00100a93 li s5,1 +80000840: fff00993 li s3,-1 +80000844: 00492483 lw s1,4(s2) +80000848: fff48413 addi s0,s1,-1 +8000084c: 02044263 bltz s0,80000870 <__call_exitprocs+0x78> +80000850: 00249493 slli s1,s1,0x2 +80000854: 009904b3 add s1,s2,s1 +80000858: 040b8463 beqz s7,800008a0 <__call_exitprocs+0xa8> +8000085c: 1044a783 lw a5,260(s1) +80000860: 05778063 beq a5,s7,800008a0 <__call_exitprocs+0xa8> +80000864: fff40413 addi s0,s0,-1 +80000868: ffc48493 addi s1,s1,-4 +8000086c: ff3416e3 bne s0,s3,80000858 <__call_exitprocs+0x60> +80000870: 02c12083 lw ra,44(sp) +80000874: 02812403 lw s0,40(sp) +80000878: 02412483 lw s1,36(sp) +8000087c: 02012903 lw s2,32(sp) +80000880: 01c12983 lw s3,28(sp) +80000884: 01812a03 lw s4,24(sp) +80000888: 01412a83 lw s5,20(sp) +8000088c: 01012b03 lw s6,16(sp) +80000890: 00c12b83 lw s7,12(sp) +80000894: 00812c03 lw s8,8(sp) +80000898: 03010113 addi sp,sp,48 +8000089c: 00008067 ret +800008a0: 00492783 lw a5,4(s2) +800008a4: 0044a683 lw a3,4(s1) +800008a8: fff78793 addi a5,a5,-1 +800008ac: 04878e63 beq a5,s0,80000908 <__call_exitprocs+0x110> +800008b0: 0004a223 sw zero,4(s1) +800008b4: fa0688e3 beqz a3,80000864 <__call_exitprocs+0x6c> +800008b8: 18892783 lw a5,392(s2) +800008bc: 008a9733 sll a4,s5,s0 +800008c0: 00492c03 lw s8,4(s2) +800008c4: 00f777b3 and a5,a4,a5 +800008c8: 02079263 bnez a5,800008ec <__call_exitprocs+0xf4> +800008cc: 000680e7 jalr a3 +800008d0: 00492703 lw a4,4(s2) +800008d4: 148a2783 lw a5,328(s4) +800008d8: 01871463 bne a4,s8,800008e0 <__call_exitprocs+0xe8> +800008dc: f92784e3 beq a5,s2,80000864 <__call_exitprocs+0x6c> +800008e0: f80788e3 beqz a5,80000870 <__call_exitprocs+0x78> +800008e4: 00078913 mv s2,a5 +800008e8: f5dff06f j 80000844 <__call_exitprocs+0x4c> +800008ec: 18c92783 lw a5,396(s2) +800008f0: 0844a583 lw a1,132(s1) +800008f4: 00f77733 and a4,a4,a5 +800008f8: 00071c63 bnez a4,80000910 <__call_exitprocs+0x118> +800008fc: 000b0513 mv a0,s6 +80000900: 000680e7 jalr a3 +80000904: fcdff06f j 800008d0 <__call_exitprocs+0xd8> +80000908: 00892223 sw s0,4(s2) +8000090c: fa9ff06f j 800008b4 <__call_exitprocs+0xbc> +80000910: 00058513 mv a0,a1 +80000914: 000680e7 jalr a3 +80000918: fb9ff06f j 800008d0 <__call_exitprocs+0xd8> + +Disassembly of section .init_array: + +8000191c <__init_array_start>: +8000191c: 0068 addi a0,sp,12 +8000191e: 8000 0x8000 + +Disassembly of section .data: + +80001920 : +80001920: 0000 unimp +80001922: 0000 unimp +80001924: 1c0c addi a1,sp,560 +80001926: 8000 0x8000 +80001928: 1c74 addi a3,sp,572 +8000192a: 8000 0x8000 +8000192c: 1cdc addi a5,sp,628 +8000192e: 8000 0x8000 + ... +800019c8: 0001 nop +800019ca: 0000 unimp +800019cc: 0000 unimp +800019ce: 0000 unimp +800019d0: 330e fld ft6,224(sp) +800019d2: abcd j 80001fc4 <__BSS_END__+0x1f8> +800019d4: 1234 addi a3,sp,296 +800019d6: e66d bnez a2,80001ac0 +800019d8: deec sw a1,124(a3) +800019da: 0005 c.nop 1 +800019dc: 0000000b 0xb + ... + +Disassembly of section .sdata: + +80001d48 <_global_impure_ptr>: +80001d48: 1920 addi s0,sp,184 +80001d4a: 8000 0x8000 + +Disassembly of section .bss: + +80001d4c : + ... + +Disassembly of section .comment: + +00000000 <.comment>: + 0: 3a434347 fmsub.d ft6,ft6,ft4,ft7,rmm + 4: 2820 fld fs0,80(s0) + 6: 29554e47 fmsub.s ft8,fa0,fs5,ft5,rmm + a: 3120 fld fs0,96(a0) + c: 2e30 fld fa2,88(a2) + e: 2e32 fld ft8,264(sp) + 10: 0030 addi a2,sp,8 + +Disassembly of section .riscv.attributes: + +00000000 <.riscv.attributes>: + 0: 2941 jal 490 <__stack_size+0x90> + 2: 0000 unimp + 4: 7200 flw fs0,32(a2) + 6: 7369 lui t1,0xffffa + 8: 01007663 bgeu zero,a6,14 <__stack_usage+0x14> + c: 001f 0000 1004 0x10040000001f + 12: 7205 lui tp,0xfffe1 + 14: 3376 fld ft6,376(sp) + 16: 6932 flw fs2,12(sp) + 18: 7032 flw ft0,44(sp) + 1a: 5f30 lw a2,120(a4) + 1c: 326d jal fffff9c6 <__stack_top+0xfff9c6> + 1e: 3070 fld fa2,224(s0) + 20: 665f 7032 0030 0x307032665f + 26: 0108 addi a0,sp,128 + 28: 0b0a slli s6,s6,0x2 diff --git a/tests/regression/fence/kernel.elf b/tests/regression/fence/kernel.elf new file mode 100755 index 00000000..0b635250 Binary files /dev/null and b/tests/regression/fence/kernel.elf differ diff --git a/tests/regression/fence/main.cpp b/tests/regression/fence/main.cpp new file mode 100644 index 00000000..2961b517 --- /dev/null +++ b/tests/regression/fence/main.cpp @@ -0,0 +1,202 @@ +#include +#include +#include +#include +#include "common.h" + +#define RT_CHECK(_expr) \ + do { \ + int _ret = _expr; \ + if (0 == _ret) \ + break; \ + printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ + cleanup(); \ + exit(-1); \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +const char* kernel_file = "kernel.bin"; +uint32_t count = 0; + +vx_device_h device = nullptr; +vx_buffer_h staging_buf = nullptr; + +static void show_usage() { + std::cout << "Vortex Test." << std::endl; + std::cout << "Usage: [-k: kernel] [-n words] [-h: help]" << std::endl; +} + +static void parse_args(int argc, char **argv) { + int c; + while ((c = getopt(argc, argv, "n:k:h?")) != -1) { + switch (c) { + case 'n': + count = atoi(optarg); + break; + case 'k': + kernel_file = optarg; + break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } +} + +void cleanup() { + if (staging_buf) { + vx_buf_release(staging_buf); + } + if (device) { + vx_dev_close(device); + } +} + +int run_test(const kernel_arg_t& kernel_arg, + uint32_t buf_size, + uint32_t num_points) { + // start device + std::cout << "start device" << std::endl; + RT_CHECK(vx_start(device)); + + // wait for completion + std::cout << "wait for completion" << std::endl; + RT_CHECK(vx_ready_wait(device, -1)); + + // download destination buffer + std::cout << "download destination buffer" << std::endl; + RT_CHECK(vx_copy_from_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + + // verify result + std::cout << "verify result" << std::endl; + { + int errors = 0; + auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf); + for (uint32_t i = 0; i < num_points; ++i) { + int ref = i + i; + int cur = buf_ptr[i]; + if (cur != ref) { + std::cout << "error at result #" << std::dec << i + << std::hex << ": actual 0x" << cur << ", expected 0x" << ref << std::endl; + ++errors; + } + } + if (errors != 0) { + std::cout << "Found " << std::dec << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl; + return 1; + } + } + + return 0; +} + +int main(int argc, char *argv[]) { + size_t value; + kernel_arg_t kernel_arg; + + // parse command arguments + parse_args(argc, argv); + + if (count == 0) { + count = 1; + } + + // open device connection + std::cout << "open device connection" << std::endl; + RT_CHECK(vx_dev_open(&device)); + + unsigned max_cores, max_warps, max_threads; + RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads)); + + uint32_t num_tasks = max_cores * max_warps * max_threads; + uint32_t num_points = count * num_tasks; + uint32_t buf_size = num_points * sizeof(int32_t); + + std::cout << "number of points: " << num_points << std::endl; + std::cout << "buffer size: " << buf_size << " bytes" << std::endl; + + // upload program + std::cout << "upload program" << std::endl; + RT_CHECK(vx_upload_kernel_file(device, kernel_file)); + + // allocate device memory + std::cout << "allocate device memory" << std::endl; + + RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); + kernel_arg.src0_ptr = value; + RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); + kernel_arg.src1_ptr = value; + RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); + kernel_arg.dst_ptr = value; + + kernel_arg.num_tasks = num_tasks; + kernel_arg.task_size = count; + + std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; + std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + + // allocate shared memory + std::cout << "allocate shared memory" << std::endl; + uint32_t alloc_size = std::max(buf_size, sizeof(kernel_arg_t)); + RT_CHECK(vx_alloc_shared_mem(device, alloc_size, &staging_buf)); + + // upload kernel argument + std::cout << "upload kernel argument" << std::endl; + { + auto buf_ptr = (int*)vx_host_ptr(staging_buf); + memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t)); + RT_CHECK(vx_copy_to_dev(staging_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0)); + } + + // upload source buffer0 + { + auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = i-1; + } + } + std::cout << "upload source buffer0" << std::endl; + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src0_ptr, buf_size, 0)); + + // upload source buffer1 + { + auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = i+1; + } + } + std::cout << "upload source buffer1" << std::endl; + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.src1_ptr, buf_size, 0)); + + // clear destination buffer + { + auto buf_ptr = (int32_t*)vx_host_ptr(staging_buf); + for (uint32_t i = 0; i < num_points; ++i) { + buf_ptr[i] = 0xdeadbeef; + } + } + std::cout << "clear destination buffer" << std::endl; + RT_CHECK(vx_copy_to_dev(staging_buf, kernel_arg.dst_ptr, buf_size, 0)); + + // run tests + std::cout << "run tests" << std::endl; + RT_CHECK(run_test(kernel_arg, buf_size, num_points)); + + // cleanup + std::cout << "cleanup" << std::endl; + cleanup(); + + std::cout << "PASSED!" << std::endl; + + return 0; +} \ No newline at end of file