Files
vortex/driver/hw/vortex_afu.sv
2020-04-02 15:41:12 -07:00

585 lines
16 KiB
Systemverilog

// Interface between CSR and FSM
// All the MMIOs read/write are done from CSR and passed to the FSM for state transitions
// To be done:
// Change address size to buffer's address size and data size based on IO address size. Check from hello_world
`include "platform_if.vh"
import local_mem_cfg_pkg::*;
`include "afu_json_info.vh"
module vortex_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2
) (
// global signals
input clk,
input SoftReset,
// IF signals between CCI and AFU
input t_if_ccip_Rx cp2af_sRxPort,
output t_if_ccip_Tx af2cp_sTxPort,
// Avalon signals for local memory access
output t_local_mem_data avs_writedata,
input t_local_mem_data avs_readdata,
output t_local_mem_addr avs_address,
input logic avs_waitrequest,
output logic avs_write,
output logic avs_read,
output t_local_mem_byte_mask avs_byteenable,
output t_local_mem_burst_cnt avs_burstcount,
input avs_readdatavalid,
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
);
localparam AVS_RD_QUEUE_SIZE = 16;
localparam VX_SNOOPING_DELAY = 300;
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
localparam CMD_TYPE_READ = `AFU_IMAGE_CMD_TYPE_READ;
localparam CMD_TYPE_WRITE = `AFU_IMAGE_CMD_TYPE_WRITE;
localparam CMD_TYPE_RUN = `AFU_IMAGE_CMD_TYPE_RUN;
localparam CMD_TYPE_SNOOP = `AFU_IMAGE_CMD_TYPE_SNOOP;
localparam MMIO_CSR_CMD = `AFU_IMAGE_MMIO_CSR_CMD;
localparam MMIO_CSR_STATUS = `AFU_IMAGE_MMIO_CSR_STATUS;
localparam MMIO_CSR_IO_ADDR = `AFU_IMAGE_MMIO_CSR_IO_ADDR;
localparam MMIO_CSR_MEM_ADDR = `AFU_IMAGE_MMIO_CSR_MEM_ADDR;
localparam MMIO_CSR_DATA_SIZE = `AFU_IMAGE_MMIO_CSR_DATA_SIZE;
logic [127:0] afu_id = `AFU_ACCEL_UUID;
typedef enum logic[2:0] {
STATE_IDLE,
STATE_READ,
STATE_WRITE,
STATE_RUN,
STATE_SNOOP1,
STATE_SNOOP2
} state_t;
state_t state;
// Vortex signals /////////////////////////////////////////////////////////////
logic vx_dram_req_read;
logic vx_dram_req_write;
logic [31:0] vx_dram_req_addr;
logic [31:0] vx_dram_req_data[15:0];
logic vx_dram_req_delay;
logic vx_dram_fill_accept;
logic vx_dram_fill_rsp;
logic [31:0] vx_dram_fill_rsp_addr;
logic [31:0] vx_dram_fill_rsp_data[15:0];
logic vx_snp_req;
logic [31:0] vx_snp_req_addr;
logic vx_snp_req_delay;
logic vx_ebreak;
// AVS Queues /////////////////////////////////////////////////////////////////
logic avs_raq_push;
t_local_mem_addr avs_raq_din;
logic avs_raq_pop;
t_local_mem_addr avs_raq_dout;
logic avs_raq_empty;
logic avs_raq_full;
logic avs_rdq_push;
t_local_mem_data avs_rdq_din;
logic avs_rdq_pop;
t_local_mem_data avs_rdq_dout;
logic avs_rdq_empty;
logic avs_rdq_full;
// CSR variables //////////////////////////////////////////////////////////////
logic [2:0] csr_cmd;
t_ccip_clAddr csr_io_addr;
t_local_mem_addr csr_mem_addr;
logic [31:0] csr_data_size;
// MMIO controller ////////////////////////////////////////////////////////////
t_ccip_c0_ReqMmioHdr mmioHdr;
always_comb
begin
mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
end
always_ff @(posedge clk)
begin
if (SoftReset)
begin
af2cp_sTxPort.c2.hdr <= 0;
af2cp_sTxPort.c2.data <= 0;
af2cp_sTxPort.c2.mmioRdValid <= 0;
csr_cmd <= 0;
csr_io_addr <= 0;
csr_mem_addr <= 0;
csr_data_size <= 0;
end
else begin
csr_cmd <= 0;
af2cp_sTxPort.c2.mmioRdValid <= 0;
// serve MMIO write request
if (cp2af_sRxPort.c0.mmioWrValid)
begin
case (mmioHdr.address)
MMIO_CSR_IO_ADDR: begin
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6);
$display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6));
end
MMIO_CSR_MEM_ADDR: begin
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6);
$display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6));
end
MMIO_CSR_DATA_SIZE: begin
csr_data_size <= $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6);
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6));
end
MMIO_CSR_CMD: begin
csr_cmd <= $bits(csr_cmd)'(cp2af_sRxPort.c0.data);
$display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data));
end
endcase
end
// serve MMIO read requests
if (cp2af_sRxPort.c0.mmioRdValid)
begin
af2cp_sTxPort.c2.hdr.tid <= mmioHdr.tid; // copy TID
case (mmioHdr.address)
// AFU header
16'h0000: af2cp_sTxPort.c2.data <= {
4'b0001, // Feature type = AFU
8'b0, // reserved
4'b0, // afu minor revision = 0
7'b0, // reserved
1'b1, // end of DFH list = 1
24'b0, // next DFH offset = 0
4'b0, // afu major revision = 0
12'b0 // feature ID = 0
};
AFU_ID_L: af2cp_sTxPort.c2.data <= afu_id[63:0]; // afu id low
AFU_ID_H: af2cp_sTxPort.c2.data <= afu_id[127:64]; // afu id hi
16'h0006: af2cp_sTxPort.c2.data <= 64'h0; // next AFU
16'h0008: af2cp_sTxPort.c2.data <= 64'h0; // reserved
MMIO_CSR_STATUS: begin
if (state != af2cp_sTxPort.c2.data)
$display("%t: STATUS: state=%0d", $time, state);
af2cp_sTxPort.c2.data <= state;
end
default: af2cp_sTxPort.c2.data <= 64'h0;
endcase
af2cp_sTxPort.c2.mmioRdValid <= 1; // post response
end
end
end
// COMMAND FSM ////////////////////////////////////////////////////////////////
logic [31:0] cci_write_ctr;
logic [31:0] avs_read_ctr;
logic [31:0] avs_write_ctr;
logic [31:0] vx_snoop_ctr;
logic [31:0] vx_snoop_delay;
logic vx_reset;
always_ff @(posedge clk)
begin
if (SoftReset)
begin
state <= STATE_IDLE;
vx_reset <= 0;
end
else begin
vx_reset <= 0;
case (state)
STATE_IDLE: begin
case (csr_cmd)
CMD_TYPE_READ: begin
$display("%t: CMD READ: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
state <= STATE_READ;
end
CMD_TYPE_WRITE: begin
$display("%t: CMD WRITE: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
state <= STATE_WRITE;
end
CMD_TYPE_RUN: begin
$display("%t: CMD START", $time);
vx_reset <= 1;
state <= STATE_RUN;
end
CMD_TYPE_SNOOP: begin
$display("%t: CMD SNOOP: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size);
state <= STATE_SNOOP1;
end
endcase
end
STATE_READ: begin
if (cci_write_ctr >= csr_data_size)
begin
state <= STATE_IDLE;
end
end
STATE_WRITE: begin
if (avs_write_ctr >= csr_data_size)
begin
state <= STATE_IDLE;
end
end
STATE_RUN: begin
if (vx_ebreak)
begin
state <= STATE_IDLE;
end
end
STATE_SNOOP1: begin
if (vx_snoop_delay >= VX_SNOOPING_DELAY)
begin
state <= STATE_SNOOP2;
end
end
STATE_SNOOP2: begin
if (vx_snoop_delay >= VX_SNOOPING_DELAY)
begin
state <= STATE_IDLE;
end
end
endcase
end
end
// AVS Controller /////////////////////////////////////////////////////////////
always_ff @(posedge clk)
begin
if (SoftReset)
begin
mem_bank_select <= 0;
avs_burstcount <= 1;
avs_byteenable <= 64'hffffffffffffffff;
avs_address <= 0;
avs_writedata <= 0;
avs_read <= 0;
avs_write <= 0;
avs_read_ctr <= 0;
avs_write_ctr <= 0;
end
else begin
avs_read <= 0;
avs_write <= 0;
case (state)
STATE_IDLE: begin
avs_read_ctr <= 0;
avs_write_ctr <= 0;
end
STATE_READ: begin
if (!avs_raq_full
&& !avs_rdq_full
&& !avs_waitrequest
&& avs_read_ctr < csr_data_size)
begin
avs_address <= csr_mem_addr + avs_read_ctr;
avs_read <= 1;
avs_read_ctr <= avs_read_ctr + 1;
$display("%t: AVS Rd Req: addr=%h", $time, csr_mem_addr + avs_read_ctr);
end
end
STATE_WRITE: begin
if (cp2af_sRxPort.c0.rspValid
&& avs_write_ctr < csr_data_size)
begin
avs_writedata <= cp2af_sRxPort.c0.data;
avs_address <= csr_mem_addr + avs_write_ctr;
avs_write <= 1;
avs_write_ctr <= avs_write_ctr + 1;
$display("%t: AVS Wr Req: addr=%h (%0d/%0d)", $time, csr_mem_addr + avs_write_ctr, avs_write_ctr + 1, csr_data_size);
end
end
STATE_RUN: begin
if (vx_dram_req_read
&& !vx_dram_req_delay)
begin
avs_address <= (vx_dram_req_addr >> 6);
avs_read <= 1;
$display("%t: AVS Rd Req: addr=%h", $time, vx_dram_req_addr >> 6);
end
if (vx_dram_req_write
&& !vx_dram_req_delay)
begin
avs_writedata <= {>>{vx_dram_req_data}};
avs_address <= (vx_dram_req_addr >> 6);
avs_write <= 1;
$display("%t: AVS Wr Req: addr=%h", $time, vx_dram_req_addr >> 6);
end
end
endcase
if (avs_readdatavalid)
begin
$display("%t: AVS Rd Rsp: value=%h", $time, avs_readdata[63:0]);
end
end
end
// Vortex DRAM requests stalling
assign vx_dram_req_delay = !((STATE_RUN == state)
&& !avs_waitrequest
&& !avs_raq_full
&& !avs_rdq_full);
// Vortex DRAM fill response
always_comb
begin
vx_dram_fill_rsp = (STATE_RUN == state) && !avs_rdq_empty && vx_dram_fill_accept;
vx_dram_fill_rsp_addr = avs_raq_dout;
{>>{vx_dram_fill_rsp_data}} = avs_rdq_dout;
end
// AVS address read request queue /////////////////////////////////////////////
logic cci_write_req;
always_comb
begin
avs_raq_pop = vx_dram_fill_rsp || cci_write_req;
avs_raq_din = avs_address;
avs_raq_push = avs_read;
end
VX_generic_queue_ll #(
.DATAW($bits(t_local_mem_addr)),
.SIZE(AVS_RD_QUEUE_SIZE)
) vx_rd_addr_queue (
.clk (clk),
.reset (SoftReset),
.push (avs_raq_push),
.in_data (avs_raq_din),
.pop (avs_raq_pop),
.out_data (avs_raq_dout),
.empty (avs_raq_empty),
.full (avs_raq_full)
);
// AVS data read response queue ///////////////////////////////////////////////
always_comb
begin
avs_rdq_pop = avs_raq_pop;
avs_rdq_din = avs_readdata;
avs_rdq_push = avs_readdatavalid;
end
VX_generic_queue_ll #(
.DATAW($bits(t_local_mem_data)),
.SIZE(AVS_RD_QUEUE_SIZE)
) vx_rd_data_queue (
.clk (clk),
.reset (SoftReset),
.push (avs_rdq_push),
.in_data (avs_rdq_din),
.pop (avs_rdq_pop),
.out_data (avs_rdq_dout),
.empty (avs_rdq_empty),
.full (avs_rdq_full)
);
// CCI Read Request ///////////////////////////////////////////////////////////
t_ccip_c0_ReqMemHdr rd_hdr;
logic cci_read_pending;
always_comb
begin
rd_hdr = t_ccip_c0_ReqMemHdr'(0);
rd_hdr.address = csr_io_addr + avs_write_ctr;
end
// Send read requests to CCI
always_ff @(posedge clk)
begin
if (SoftReset)
begin
af2cp_sTxPort.c0.hdr <= 0;
af2cp_sTxPort.c0.valid <= 0;
cci_read_pending <= 0;
end
else begin
af2cp_sTxPort.c0.valid <= 0;
if (STATE_WRITE == state
&& !cp2af_sRxPort.c0TxAlmFull // ensure read queue not full
&& !avs_waitrequest // ensure AVS write queue not full
&& !cci_read_pending // ensure no read pending
&& avs_write_ctr < csr_data_size) // ensure not done
begin
af2cp_sTxPort.c0.hdr <= rd_hdr;
af2cp_sTxPort.c0.valid <= 1;
cci_read_pending <= 1;
$display("%t: CCI Rd Req: addr=%h", $time, rd_hdr.address);
end
if (cci_read_pending
&& cp2af_sRxPort.c0.rspValid)
begin
$display("%t: CCI Rd Rsp", $time);
cci_read_pending <= 0;
end
end
end
// CCI Write Request //////////////////////////////////////////////////////////
t_ccip_c1_ReqMemHdr wr_hdr;
logic cci_write_pending;
always_comb
begin
cci_write_req = (STATE_READ == state)
&& !avs_rdq_empty
&& !cp2af_sRxPort.c1TxAlmFull
&& !cci_write_pending
&& cci_write_ctr < csr_data_size;
wr_hdr = t_ccip_c1_ReqMemHdr'(0);
wr_hdr.address = csr_io_addr + cci_write_ctr;
wr_hdr.sop = 1; // single line write mode
end
// Send write requests to CCI
always_ff @(posedge clk)
begin
if (SoftReset)
begin
af2cp_sTxPort.c1.hdr <= 0;
af2cp_sTxPort.c1.data <= 0;
af2cp_sTxPort.c1.valid <= 0;
cci_write_ctr <= 0;
cci_write_pending <= 0;
end
else begin
af2cp_sTxPort.c1.valid <= 0;
if (STATE_IDLE == state)
begin
cci_write_ctr <= 0;
end
if (cci_write_req)
begin
af2cp_sTxPort.c1.hdr <= wr_hdr;
af2cp_sTxPort.c1.data <= t_ccip_clData'(avs_rdq_dout);
af2cp_sTxPort.c1.valid <= 1;
cci_write_pending <= 1;
$display("%t: CCI Wr Req: addr=%h", $time, wr_hdr.address);
end
if (cci_write_pending
&& cp2af_sRxPort.c1.rspValid)
begin
cci_write_ctr <= cci_write_ctr + 1;
cci_write_pending <= 0;
$display("%t: CCI Wr Rsp (%0d/%0d)", $time, cci_write_ctr + 1, csr_data_size);
end
end
end
// Vortex cache snooping //////////////////////////////////////////////////////
always_ff @(posedge clk)
begin
if (SoftReset)
begin
vx_snp_req <= 0;
vx_snoop_ctr <= 0;
vx_snoop_delay <= 0;
end
else begin
if (STATE_IDLE == state)
begin
vx_snoop_ctr <= 0;
vx_snoop_delay <= 0;
end
vx_snp_req <= 0;
if ((STATE_SNOOP1 == state
|| STATE_SNOOP2 == state)
&& vx_snoop_ctr < csr_data_size
&& vx_snp_req_delay)
begin
vx_snp_req <= 1;
vx_snoop_ctr <= vx_snoop_ctr + 1;
end
if ((vx_snoop_ctr >= csr_data_size)
&& (vx_snoop_delay < VX_SNOOPING_DELAY))
begin
vx_snoop_delay <= vx_snoop_delay + 1;
end
if (vx_snoop_delay >= VX_SNOOPING_DELAY)
begin
vx_snoop_ctr <= 0;
vx_snoop_delay <= 0;
end
end
end
// Vortex binding /////////////////////////////////////////////////////////////
Vortex_SOC #() vx_soc (
.clk (clk),
.reset (SoftReset || vx_reset),
// DRAM Req
.out_dram_req_write (vx_dram_req_write),
.out_dram_req_read (vx_dram_req_read),
.out_dram_req_addr (vx_dram_req_addr),
.out_dram_req_data (vx_dram_req_data),
.out_dram_req_delay (vx_dram_req_delay),
// DRAM Rsp
.out_dram_fill_accept (vx_dram_fill_accept),
.out_dram_fill_rsp (vx_dram_fill_rsp),
.out_dram_fill_rsp_addr (vx_dram_fill_rsp_addr),
.out_dram_fill_rsp_data (vx_dram_fill_rsp_data),
// Cache Snooping Req
.llc_snp_req (vx_snp_req),
.llc_snp_req_addr (vx_snp_req_addr),
.llc_snp_req_delay (vx_snp_req_delay),
// program exit signal
.out_ebreak (vx_ebreak)
);
endmodule