This commit is contained in:
felsabbagh3
2020-06-28 20:01:49 -07:00
8 changed files with 193 additions and 265 deletions

0
driver/tests/demo/kernel.bin Executable file → Normal file
View File

View File

@@ -91,4 +91,4 @@ lsof +D build_ase_1c
make -C pipeline > pipeline/build.log 2>&1 &
make -C cache > cache/build.log 2>&1 &
make -C vortex > vortex/build.log 2>&1 &
make -C pipeline > pipeline/build.log 2>&1 &
make -C top > top/build.log 2>&1 &

View File

@@ -16,190 +16,121 @@ module VX_alu_unit (
localparam DIV_PIPELINE_LEN = 20;
localparam MUL_PIPELINE_LEN = 8;
wire[31:0] unsigned_div_result;
wire[31:0] unsigned_rem_result;
wire[31:0] signed_div_result;
wire[31:0] signed_rem_result;
wire[31:0] div_result_unsigned;
wire[31:0] div_result_signed;
wire[63:0] mul_data_a, mul_data_b;
wire[63:0] mul_result;
wire[31:0] rem_result_unsigned;
wire[31:0] rem_result_signed;
wire[63:0] mul_result;
wire[31:0] ALU_in1;
wire[31:0] ALU_in2;
wire[31:0] alu_in1 = src_a;
wire[31:0] alu_in2 = (src_rs2 == `RS2_IMMED) ? itype_immed : src_b;
VX_divide #(
.WIDTHN(32),
.WIDTHD(32),
.REP("UNSIGNED"),
.PIPELINE(DIV_PIPELINE_LEN)
) unsigned_div (
.clk(clk),
.reset(reset),
.clken(1'b1), // TODO this could be disabled on inactive instructions
.numer(ALU_in1),
.denom(ALU_in2),
.quotient(unsigned_div_result),
.remainder(unsigned_rem_result)
);
VX_divide #(
.WIDTHN(32),
.WIDTHD(32),
.REP("SIGNED"),
.PIPELINE(DIV_PIPELINE_LEN)
) signed_div (
.clk(clk),
.reset(reset),
.clken(1'b1), // TODO this could be disabled on inactive instructions
.numer(ALU_in1),
.denom(ALU_in2),
.quotient(signed_div_result),
.remainder(signed_rem_result)
);
VX_mult #(
.WIDTHA(64),
.WIDTHB(64),
.WIDTHP(64),
.REP("UNSIGNED"),
.PIPELINE(MUL_PIPELINE_LEN)
) multiplier (
.clk(clk),
.reset(reset),
.clken(1'b1), // TODO this could be disabled on inactive instructions
.dataa(mul_data_a),
.datab(mul_data_b),
.result(mul_result)
);
// ALU_MUL, ALU_MULH (signed*signed), ALU_MULHSU (signed*unsigned), ALU_MULHU (unsigned*unsigned)
wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1};
wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2};
assign mul_data_a = (alu_op == `ALU_MULHU) ? {32'b0, ALU_in1} : alu_in1_signed;
assign mul_data_b = (alu_op == `ALU_MULHU || alu_op == `ALU_MULHSU) ? {32'b0, ALU_in2} : alu_in2_signed;
reg [15:0] curr_inst_delay;
reg [15:0] inst_delay;
reg inst_was_stalling;
wire inst_delay_stall = inst_was_stalling ? (inst_delay != 0) : (curr_inst_delay != 0);
assign alu_stall = inst_delay_stall;
wire[31:0] upper_immed_s = {upper_immed, {12{1'b0}}};
reg [7:0] inst_delay;
reg [7:0] curr_inst_delay;
always @(*) begin
case (alu_op)
`ALU_DIV,
`ALU_DIVU,
`ALU_REM,
`ALU_REMU: curr_inst_delay = DIV_PIPELINE_LEN;
`ALU_REMU: inst_delay = DIV_PIPELINE_LEN;
`ALU_MUL,
`ALU_MULH,
`ALU_MULHSU,
`ALU_MULHU: curr_inst_delay = MUL_PIPELINE_LEN;
default: curr_inst_delay = 0;
endcase // alu_op
`ALU_MULHU: inst_delay = MUL_PIPELINE_LEN;
default: inst_delay = 0;
endcase
end
wire inst_stalled = (curr_inst_delay != inst_delay);
always @(posedge clk) begin
if (reset) begin
inst_delay <= 0;
inst_was_stalling <= 0;
end
else if (inst_delay_stall) begin
if (inst_was_stalling) begin
if (inst_delay > 0)
inst_delay <= inst_delay - 1;
end
else begin
inst_was_stalling <= 1;
inst_delay <= curr_inst_delay - 1;
end
end
else begin
inst_was_stalling <= 0;
end
if (reset) begin
curr_inst_delay <= 0;
end else begin
curr_inst_delay <= inst_stalled ? (curr_inst_delay + 1) : 0;
end
end
`ifdef SYN_FUNC
wire which_in2;
wire[31:0] upper_immed;
assign which_in2 = (src_rs2 == `RS2_IMMED);
assign ALU_in1 = src_a;
assign ALU_in2 = which_in2 ? itype_immed : src_b;
assign upper_immed = {upper_immed, {12{1'b0}}};
assign alu_stall = inst_stalled;
always @(*) begin
case (alu_op)
`ALU_ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2);
`ALU_SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2);
`ALU_SLLA: alu_result = ALU_in1 << ALU_in2[4:0];
`ALU_SLT: alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
`ALU_SLTU: alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
`ALU_XOR: alu_result = ALU_in1 ^ ALU_in2;
`ALU_SRL: alu_result = ALU_in1 >> ALU_in2[4:0];
`ALU_SRA: alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
`ALU_OR: alu_result = ALU_in1 | ALU_in2;
`ALU_AND: alu_result = ALU_in2 & ALU_in1;
`ALU_SUBU: alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`ALU_LUI: alu_result = upper_immed;
`ALU_AUIPC: alu_result = $signed(curr_PC) + $signed(upper_immed);
// TODO: profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
`ALU_MUL: alu_result = mul_result[31:0];
`ALU_MULH: alu_result = mul_result[63:32];
`ALU_MULHSU: alu_result = mul_result[63:32];
`ALU_MULHU: alu_result = mul_result[63:32];
`ALU_DIV: alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
`ALU_DIVU: alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
`ALU_REM: alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
`ALU_REMU: alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
default: alu_result = 32'h0;
endcase // alu_op
end
`else
wire which_in2;
wire[31:0] upper_immed_s;
assign which_in2 = (src_rs2 == `RS2_IMMED);
assign ALU_in1 = src_a;
assign ALU_in2 = which_in2 ? itype_immed : src_b;
assign upper_immed_s = {upper_immed, {12{1'b0}}};
always @(*) begin
case (alu_op)
`ALU_ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2);
`ALU_SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2);
`ALU_SLLA: alu_result = ALU_in1 << ALU_in2[4:0];
`ALU_SLT: alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
`ALU_SLTU: alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
`ALU_XOR: alu_result = ALU_in1 ^ ALU_in2;
`ALU_SRL: alu_result = ALU_in1 >> ALU_in2[4:0];
`ALU_SRA: alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
`ALU_OR: alu_result = ALU_in1 | ALU_in2;
`ALU_AND: alu_result = ALU_in2 & ALU_in1;
`ALU_SUBU: alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`ALU_ADD: alu_result = $signed(alu_in1) + $signed(alu_in2);
`ALU_SUB: alu_result = $signed(alu_in1) - $signed(alu_in2);
`ALU_SLLA: alu_result = alu_in1 << alu_in2[4:0];
`ALU_SLT: alu_result = ($signed(alu_in1) < $signed(alu_in2)) ? 32'h1 : 32'h0;
`ALU_SLTU: alu_result = alu_in1 < alu_in2 ? 32'h1 : 32'h0;
`ALU_XOR: alu_result = alu_in1 ^ alu_in2;
`ALU_SRL: alu_result = alu_in1 >> alu_in2[4:0];
`ALU_SRA: alu_result = $signed(alu_in1) >>> alu_in2[4:0];
`ALU_OR: alu_result = alu_in1 | alu_in2;
`ALU_AND: alu_result = alu_in2 & alu_in1;
`ALU_SUBU: alu_result = (alu_in1 >= alu_in2) ? 32'h0 : 32'hffffffff;
`ALU_LUI: alu_result = upper_immed_s;
`ALU_AUIPC: alu_result = $signed(curr_PC) + $signed(upper_immed_s);
// TODO: profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
// TODO: profitable to roll these exceptional cases into inst_delay_tmp to avoid pipeline when possible?
`ALU_MUL: alu_result = mul_result[31:0];
`ALU_MULH: alu_result = mul_result[63:32];
`ALU_MULHSU: alu_result = mul_result[63:32];
`ALU_MULHU: alu_result = mul_result[63:32];
`ALU_DIV: alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
`ALU_DIVU: alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
`ALU_REM: alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
`ALU_REMU: alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
`ALU_DIV: alu_result = (alu_in2 == 0) ? 32'hffffffff : div_result_signed;
`ALU_DIVU: alu_result = (alu_in2 == 0) ? 32'hffffffff : div_result_unsigned;
`ALU_REM: alu_result = (alu_in2 == 0) ? alu_in1 : rem_result_signed;
`ALU_REMU: alu_result = (alu_in2 == 0) ? alu_in1 : rem_result_unsigned;
default: alu_result = 32'h0;
endcase // alu_op
end
`endif
VX_divide #(
.WIDTHN(32),
.WIDTHD(32),
.NSIGNED(0),
.DSIGNED(0),
.PIPELINE(DIV_PIPELINE_LEN)
) udiv (
.clk(clk),
.reset(reset),
.numer(alu_in1),
.denom(alu_in2),
.quotient(div_result_unsigned),
.remainder(rem_result_unsigned)
);
VX_divide #(
.WIDTHN(32),
.WIDTHD(32),
.NSIGNED(1),
.DSIGNED(1),
.PIPELINE(DIV_PIPELINE_LEN)
) sdiv (
.clk(clk),
.reset(reset),
.numer(alu_in1),
.denom(alu_in2),
.quotient(div_result_signed),
.remainder(rem_result_signed)
);
wire [32:0] mul_dataa = {(alu_op == `ALU_MULHU) ? 1'b0 : alu_in1[31], alu_in1};
wire [32:0] mul_datab = {(alu_op == `ALU_MULHU || alu_op == `ALU_MULHSU) ? 1'b0 : alu_in2[31], alu_in2};
VX_mult #(
.WIDTHA(33),
.WIDTHB(33),
.WIDTHP(64),
.SIGNED(1),
.PIPELINE(MUL_PIPELINE_LEN)
) multiplier (
.clk(clk),
.reset(reset),
.dataa(mul_dataa),
.datab(mul_datab),
.result(mul_result)
);
endmodule

View File

@@ -4,10 +4,10 @@
`include "VX_config.vh"
`include "VX_scope.vh"
// `define QUEUE_FORCE_MLAB 1
// `define SYN 1
`define QUEUE_FORCE_MLAB 1
// `define SYNTHESIS 1
// `define ASIC 1
// `define SYN_FUNC 1
///////////////////////////////////////////////////////////////////////////////

View File

@@ -54,7 +54,7 @@ module VX_gpr_ram (
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] tmp_a;
wire [`NUM_THREADS-1:0][`NUM_GPRS-1:0] tmp_b;
`ifndef SYN
`ifndef SYNTHESIS
genvar j;
for (i = 0; i < `NUM_THREADS; i++) begin
for (j = 0; j < `NUM_GPRS; j++) begin

View File

@@ -161,8 +161,6 @@ module VX_lsu_unit #(
// Can't accept new response
assign dcache_rsp_if.core_rsp_ready = !(no_slot_mem & (|mem_wb_if_p1.valid));
// From LSU to WB
localparam WB_REQ_SIZE = (`NUM_THREADS) + (`NUM_THREADS * 32) + (`NW_BITS) + (5) + (2) + 32;
VX_generic_register #(.N(WB_REQ_SIZE)) lsu_to_wb(

View File

@@ -3,12 +3,12 @@
module VX_divide #(
parameter WIDTHN = 1,
parameter WIDTHD = 1,
parameter REP = "UNSIGNED",
parameter NSIGNED = 0,
parameter DSIGNED = 0,
parameter PIPELINE = 0
) (
input wire clk,
input wire reset,
input wire clken,
input [WIDTHN-1:0] numer,
input [WIDTHD-1:0] denom,
@@ -19,84 +19,90 @@ module VX_divide #(
`ifdef QUARTUS
lpm_divide #(
.LPM_WIDTHN(WIDTHN),
.LPM_WIDTHD(WIDTHD),
.LPM_NREPRESENTATION(REP),
.LPM_DREPRESENTATION(REP),
.LPM_PIPELINE(PIPELINE),
.DSP_BLOCK_BALANCING("LOGIC ELEMENTS"),
.MAXIMIZE_SPEED(9)
) quartus_divider (
.clock(clk),
.aclr(reset),
.clken(clken),
.numer(numer),
.denom(denom),
.quotient(quotient),
.remain(remainder)
lpm_divide quartus_div (
.clock (clk),
.numer (numer),
.denom (denom),
.quotient (quotient),
.remain (remainder),
.aclr (1'b0),
.clken (1'b1)
);
defparam
quartus_div.lpm_type = "LPM_DIVIDE",
quartus_div.lpm_widthn = WIDTHN,
quartus_div.lpm_widthd = WIDTHD,
quartus_div.lpm_nrepresentation = NSIGNED ? "SIGNED" : "UNSIGNED",
quartus_div.lpm_drepresentation = DSIGNED ? "SIGNED" : "UNSIGNED",
quartus_div.lpm_hint = "LPM_REMAINDERPOSITIVE=FALSE,MAXIMIZE_SPEED=9",
quartus_div.lpm_pipeline = PIPELINE;
`else
wire [WIDTHN-1:0] numer_pipe_end;
wire [WIDTHD-1:0] denom_pipe_end;
reg [WIDTHN-1:0] quotient_unqual;
reg [WIDTHD-1:0] remainder_unqual;
always @(*) begin
`ifndef SYNTHESIS
// this edge case kills verilator in some cases by causing a division
// overflow exception. INT_MIN / -1 (on x86)
if (numer == {1'b1, (WIDTHN-1)'(0)}
&& denom == {WIDTHD{1'b1}}) begin
quotient_unqual = 0;
remainder_unqual = 0;
end else
`endif
begin
if (NSIGNED && DSIGNED) begin
quotient_unqual = $signed(numer) / $signed(denom);
remainder_unqual = $signed(numer) % $signed(denom);
end
else if (NSIGNED && !DSIGNED) begin
quotient_unqual = $signed(numer) / denom;
remainder_unqual = $signed(numer) % denom;
end
else if (!NSIGNED && DSIGNED) begin
quotient_unqual = numer / $signed(denom);
remainder_unqual = numer % $signed(denom);
end
else begin
quotient_unqual = numer / denom;
remainder_unqual = numer % denom;
end
end
end
if (PIPELINE == 0) begin
assign numer_pipe_end = numer;
assign denom_pipe_end = denom;
assign quotient = quotient_unqual;
assign remainder = remainder_unqual;
end else begin
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
reg [WIDTHN-1:0] quotient_pipe [0:PIPELINE-1];
reg [WIDTHD-1:0] remainder_pipe [0:PIPELINE-1];
genvar i;
for (i = 0; i < PIPELINE; i++) begin
always @(posedge clk) begin
if (reset) begin
numer_pipe[i] <= 0;
denom_pipe[i] <= 0;
quotient_pipe[i] <= 0;
remainder_pipe[i] <= 0;
end
else if (clken) begin
else begin
if (i == 0) begin
numer_pipe[0] <= numer;
denom_pipe[0] <= denom;
quotient_pipe[0] <= quotient_unqual;
remainder_pipe[0] <= remainder_unqual;
end else begin
numer_pipe[i] <= numer_pipe[i-1];
denom_pipe[i] <= denom_pipe[i-1];
quotient_pipe[i] <= quotient_pipe[i-1];
remainder_pipe[i] <= remainder_pipe[i-1];
end
end
end
end
assign numer_pipe_end = numer_pipe[PIPELINE-1];
assign denom_pipe_end = denom_pipe[PIPELINE-1];
end
always @(*) begin
if (denom_pipe_end == 0) begin
quotient = {WIDTHN{1'b1}};
remainder = numer_pipe_end;
end
`ifndef SYNTHESIS
// this edge case kills verilator in some cases by causing a division
// overflow exception. INT_MIN / -1 (on x86)
else if (numer_pipe_end == {1'b1, (WIDTHN-1)'(0)}
&& denom_pipe_end == {WIDTHD{1'b1}}) begin
quotient = 0;
remainder = 0;
end
`endif
else begin
if (REP == "SIGNED") begin
quotient = $signed(numer_pipe_end) / $signed(denom_pipe_end);
remainder = $signed(numer_pipe_end) % $signed(denom_pipe_end);
end else begin
quotient = numer_pipe_end / denom_pipe_end;
remainder = numer_pipe_end % denom_pipe_end;
end
end
end
assign quotient = quotient_pipe[PIPELINE-1];
assign remainder = remainder_pipe[PIPELINE-1];
end
`endif
endmodule : VX_divide
endmodule

View File

@@ -4,12 +4,11 @@ module VX_mult #(
parameter WIDTHA = 1,
parameter WIDTHB = 1,
parameter WIDTHP = 1,
parameter REP = "UNSIGNED",
parameter SIGNED = 0,
parameter PIPELINE = 0
) (
input clk,
input reset,
input clken,
input [WIDTHA-1:0] dataa,
input [WIDTHB-1:0] datab,
@@ -19,65 +18,59 @@ module VX_mult #(
`ifdef QUARTUS
lpm_mult #(
.LPM_WIDTHA(WIDTHA),
.LPM_WIDTHB(WIDTHB),
.LPM_WIDTHP(WIDTHP),
.LPM_REPRESENTATION(REP),
.LPM_PIPELINE(PIPELINE),
.DSP_BLOCK_BALANCING("LOGIC ELEMENTS"),
.MAXIMIZE_SPEED(9)
) quartus_mult (
.clock(clk),
.aclr(reset),
.clken(clken),
.dataa(dataa),
.datab(datab),
.result(result)
lpm_mult quartus_mult (
.clock (clk),
.dataa (dataa),
.datab (datab),
.result (result),
.aclr (1'b0),
.clken (1'b1),
.sclr (1'b0),
.sum (1'b0)
);
defparam quartus_mult.lpm_type = "LPM_MULT",
quartus_mult.lpm_widtha = WIDTHA,
quartus_mult.lpm_widthb = WIDTHB,
quartus_mult.lpm_widthp = WIDTHP,
quartus_mult.lpm_representation = SIGNED ? "SIGNED" : "UNSIGNED",
quartus_mult.lpm_pipeline = PIPELINE,
quartus_mult.lpm_hint = "MAXIMIZE_SPEED=9";
`else
wire [WIDTHA-1:0] dataa_pipe_end;
wire [WIDTHB-1:0] datab_pipe_end;
if (PIPELINE == 0) begin
assign dataa_pipe_end = dataa;
assign datab_pipe_end = datab;
wire [WIDTHP-1:0] result_unqual;
if (SIGNED) begin
assign result_unqual = $signed(dataa) * $signed(datab);
end else begin
reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1];
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
assign result_unqual = dataa * datab;
end
if (PIPELINE == 0) begin
assign result = result_unqual;
end else begin
reg [WIDTHP-1:0] result_pipe [0:PIPELINE-1];
genvar i;
for (i = 0; i < PIPELINE; i++) begin
always @(posedge clk) begin
if (reset) begin
dataa_pipe[i] <= 0;
datab_pipe[i] <= 0;
result_pipe[i] <= 0;
end
else if (clken) begin
else begin
if (i == 0) begin
dataa_pipe[0] <= dataa;
datab_pipe[0] <= datab;
result_pipe[0] <= result_unqual;
end else begin
dataa_pipe[i] <= dataa_pipe[i-1];
datab_pipe[i] <= datab_pipe[i-1];
result_pipe[i] <= result_pipe[i-1];
end
end
end
end
assign dataa_pipe_end = dataa_pipe[PIPELINE-1];
assign datab_pipe_end = datab_pipe[PIPELINE-1];
end
if (REP == "SIGNED") begin
assign result = $signed(dataa_pipe_end) * $signed(datab_pipe_end);
end
else begin
assign result = dataa_pipe_end * datab_pipe_end;
assign result = result_pipe[PIPELINE-1];
end
`endif
endmodule: VX_mult
endmodule