diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index cf40c94c..71ffb1c9 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -309,11 +309,16 @@ module VX_cluster #( wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] per_core_dram_req_tag_qual; wire [`NUM_CORES-1:0] per_core_dram_req_ready_qual; + wire [`NUM_CORES-1:0] per_core_dram_rsp_valid_unqual; + wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_dram_rsp_data_unqual; + wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] per_core_dram_rsp_tag_unqual; + wire [`NUM_CORES-1:0] per_core_dram_rsp_ready_unqual; + for (genvar i = 0; i < `NUM_CORES; i++) begin VX_skid_buffer #( .DATAW (1 + `DDRAM_BYTEEN_WIDTH + `DDRAM_ADDR_WIDTH + `DDRAM_LINE_WIDTH + `XDRAM_TAG_WIDTH), .PASSTHRU (`NUM_CORES < 4) - ) dram_req_buffer ( + ) core_req_buffer ( .clk (clk), .reset (reset), .valid_in (per_core_dram_req_valid[i]), @@ -323,6 +328,20 @@ module VX_cluster #( .data_out ({per_core_dram_req_rw_qual[i], per_core_dram_req_byteen_qual[i], per_core_dram_req_addr_qual[i], per_core_dram_req_data_qual[i], per_core_dram_req_tag_qual[i]}), .ready_out (per_core_dram_req_ready_qual[i]) ); + + VX_skid_buffer #( + .DATAW (`DDRAM_LINE_WIDTH + `XDRAM_TAG_WIDTH), + .PASSTHRU (1) + ) core_rsp_buffer ( + .clk (clk), + .reset (reset), + .valid_in (per_core_dram_rsp_valid_unqual[i]), + .data_in ({per_core_dram_rsp_data_unqual[i], per_core_dram_rsp_tag_unqual[i]}), + .ready_in (per_core_dram_rsp_ready_unqual[i]), + .valid_out (per_core_dram_rsp_valid[i]), + .data_out ({per_core_dram_rsp_data[i], per_core_dram_rsp_tag[i]}), + .ready_out (per_core_dram_rsp_ready[i]) + ); end VX_cache #( @@ -366,10 +385,10 @@ module VX_cluster #( .core_req_ready (per_core_dram_req_ready_qual), // Core response - .core_rsp_valid (per_core_dram_rsp_valid), - .core_rsp_data (per_core_dram_rsp_data), - .core_rsp_tag (per_core_dram_rsp_tag), - .core_rsp_ready (per_core_dram_rsp_ready), + .core_rsp_valid (per_core_dram_rsp_valid_unqual), + .core_rsp_data (per_core_dram_rsp_data_unqual), + .core_rsp_tag (per_core_dram_rsp_tag_unqual), + .core_rsp_ready (per_core_dram_rsp_ready_unqual), // DRAM request .dram_req_valid (dram_req_valid), diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index d9625e25..cf9d8dff 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -308,9 +308,14 @@ module Vortex ( wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen_qual; wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr_qual; wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data_qual; - wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag_qual; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag_qual; wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_ready_qual; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid_unqual; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data_unqual; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag_unqual; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready_unqual; + for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin VX_skid_buffer #( .DATAW (1 + `L2DRAM_BYTEEN_WIDTH + `L2DRAM_ADDR_WIDTH + `L2DRAM_LINE_WIDTH + `L2DRAM_TAG_WIDTH), @@ -325,6 +330,20 @@ module Vortex ( .data_out ({per_cluster_dram_req_rw_qual[i], per_cluster_dram_req_byteen_qual[i], per_cluster_dram_req_addr_qual[i], per_cluster_dram_req_data_qual[i], per_cluster_dram_req_tag_qual[i]}), .ready_out (per_cluster_dram_req_ready_qual[i]) ); + + VX_skid_buffer #( + .DATAW (`L2DRAM_LINE_WIDTH + `L2DRAM_TAG_WIDTH), + .PASSTHRU (1) + ) core_rsp_buffer ( + .clk (clk), + .reset (reset), + .valid_in (per_cluster_dram_rsp_valid_unqual[i]), + .data_in ({per_cluster_dram_rsp_data_unqual[i], per_cluster_dram_rsp_tag_unqual[i]}), + .ready_in (per_cluster_dram_rsp_ready_unqual[i]), + .valid_out (per_cluster_dram_rsp_valid[i]), + .data_out ({per_cluster_dram_rsp_data[i], per_cluster_dram_rsp_tag[i]}), + .ready_out (per_cluster_dram_rsp_ready[i]) + ); end VX_cache #( @@ -368,10 +387,10 @@ module Vortex ( .core_req_ready (per_cluster_dram_req_ready_qual), // Core response - .core_rsp_valid (per_cluster_dram_rsp_valid), - .core_rsp_data (per_cluster_dram_rsp_data), - .core_rsp_tag (per_cluster_dram_rsp_tag), - .core_rsp_ready (per_cluster_dram_rsp_ready), + .core_rsp_valid (per_cluster_dram_rsp_valid_unqual), + .core_rsp_data (per_cluster_dram_rsp_data_unqual), + .core_rsp_tag (per_cluster_dram_rsp_tag_unqual), + .core_rsp_ready (per_cluster_dram_rsp_ready_unqual), // DRAM request .dram_req_valid (dram_req_valid), diff --git a/hw/rtl/afu/VX_avs_wrapper.v b/hw/rtl/afu/VX_avs_wrapper.v new file mode 100644 index 00000000..ca814e47 --- /dev/null +++ b/hw/rtl/afu/VX_avs_wrapper.v @@ -0,0 +1,131 @@ +`include "VX_define.vh" + +module VX_avs_wrapper #( + parameter AVS_DATAW = 1, + parameter AVS_ADDRW = 1, + parameter AVS_BURSTW = 1, + parameter AVS_BANKS = 1, + parameter REQ_TAGW = 1, + parameter RD_QUEUE_SIZE = 1, + + parameter AVS_BYTEENW = (AVS_DATAW / 8), + parameter RD_QUEUE_ADDRW= $clog2(RD_QUEUE_SIZE+1), + parameter AVS_BANKS_BITS= $clog2(AVS_BANKS) +) ( + input wire clk, + input wire reset, + + // AVS bus + output wire [AVS_DATAW-1:0] avs_writedata, + input wire [AVS_DATAW-1:0] avs_readdata, + output wire [AVS_ADDRW-1:0] avs_address, + input wire avs_waitrequest, + output wire avs_write, + output wire avs_read, + output wire [AVS_BYTEENW-1:0] avs_byteenable, + output wire [AVS_BURSTW-1:0] avs_burstcount, + input avs_readdatavalid, + output wire [AVS_BANKS_BITS-1:0] avs_bankselect, + + // DRAM request + input wire dram_req_valid, + input wire dram_req_rw, + input wire [AVS_BYTEENW-1:0] dram_req_byteen, + input wire [AVS_ADDRW-1:0] dram_req_addr, + input wire [AVS_DATAW-1:0] dram_req_data, + input wire [REQ_TAGW-1:0] dram_req_tag, + output wire dram_req_ready, + + // DRAM response + output wire dram_rsp_valid, + output wire [AVS_DATAW-1:0] dram_rsp_data, + output wire [REQ_TAGW-1:0] dram_rsp_tag, + input wire dram_rsp_ready +); + reg [AVS_BANKS_BITS-1:0] avs_bankselect_r; + reg [AVS_BURSTW-1:0] avs_burstcount_r; + + wire avs_reqq_push = dram_req_valid && dram_req_ready && !dram_req_rw; + wire avs_reqq_pop = dram_rsp_valid && dram_rsp_ready; + + wire avs_rspq_push = avs_readdatavalid; + wire avs_rspq_pop = avs_reqq_pop; + wire avs_rspq_empty; + + reg [RD_QUEUE_ADDRW-1:0] avs_pending_reads; + wire [RD_QUEUE_ADDRW-1:0] avs_pending_reads_n; + + assign avs_pending_reads_n = avs_pending_reads + + RD_QUEUE_ADDRW'((avs_reqq_push && !avs_rspq_pop) ? 1 : + (avs_rspq_pop && !avs_reqq_push) ? -1 : 0); + + always @(posedge clk) begin + if (reset) begin + avs_burstcount_r <= 1; + avs_bankselect_r <= 0; + avs_pending_reads <= 0; + end else begin + avs_pending_reads <= avs_pending_reads_n; + end + end + + VX_generic_queue #( + .DATAW (REQ_TAGW), + .SIZE (RD_QUEUE_SIZE), + .BUFFERED (1) + ) rd_req_queue ( + .clk (clk), + .reset (reset), + .push (avs_reqq_push), + .pop (avs_reqq_pop), + .data_in (dram_req_tag), + .data_out (dram_rsp_tag), + `UNUSED_PIN (empty), + `UNUSED_PIN (full), + `UNUSED_PIN (size) + ); + + VX_generic_queue #( + .DATAW (AVS_DATAW), + .SIZE (RD_QUEUE_SIZE), + .BUFFERED (1) + ) rd_rsp_queue ( + .clk (clk), + .reset (reset), + .push (avs_rspq_push), + .pop (avs_rspq_pop), + .data_in (avs_readdata), + .data_out (dram_rsp_data), + .empty (avs_rspq_empty), + `UNUSED_PIN (full), + `UNUSED_PIN (size) + ); + + wire rsp_queue_ready = (avs_pending_reads != RD_QUEUE_SIZE); + + assign avs_read = dram_req_valid && !dram_req_rw && rsp_queue_ready; + assign avs_write = dram_req_valid && dram_req_rw && rsp_queue_ready; + assign avs_address = dram_req_addr; + assign avs_byteenable = dram_req_byteen; + assign avs_writedata = dram_req_data; + assign dram_req_ready = !avs_waitrequest && rsp_queue_ready; + assign avs_burstcount = avs_burstcount_r; + assign avs_bankselect = avs_bankselect_r; + + assign dram_rsp_valid = !avs_rspq_empty; + +`ifdef DBG_PRINT_AVS + always @(posedge clk) begin + if (dram_req_valid && dram_req_ready) begin + if (dram_req_rw) + $display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, dram_req_data); + else + $display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, avs_pending_reads_n); + end + if (dram_rsp_valid && dram_rsp_ready) begin + $display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, dram_rsp_tag, dram_rsp_data, avs_pending_reads_n); + end + end +`endif + +endmodule \ No newline at end of file diff --git a/hw/rtl/afu/ccip/ccip_if_pkg.sv b/hw/rtl/afu/ccip/ccip_if_pkg.sv new file mode 100644 index 00000000..930eaecb --- /dev/null +++ b/hw/rtl/afu/ccip/ccip_if_pkg.sv @@ -0,0 +1,244 @@ +// Date: 02/2/2016 +// Compliant with CCI-P spec v0.71 +package ccip_if_pkg; + +//===================================================================== +// CCI-P interface defines +//===================================================================== +parameter CCIP_VERSION_NUMBER = 12'h071; + +parameter CCIP_CLADDR_WIDTH = 42; +parameter CCIP_CLDATA_WIDTH = 512; + +parameter CCIP_MMIOADDR_WIDTH = 16; +parameter CCIP_MMIODATA_WIDTH = 64; +parameter CCIP_TID_WIDTH = 9; + +parameter CCIP_MDATA_WIDTH = 16; + + +// Number of requests that can be accepted after almost full is asserted. +parameter CCIP_TX_ALMOST_FULL_THRESHOLD = 8; + +parameter CCIP_MMIO_RD_TIMEOUT = 512; + +parameter CCIP_SYNC_RESET_POLARITY=1; // Active High Reset + +// Base types +//---------------------------------------------------------------------- +typedef logic [CCIP_CLADDR_WIDTH-1:0] t_ccip_clAddr; +typedef logic [CCIP_CLDATA_WIDTH-1:0] t_ccip_clData; + + +typedef logic [CCIP_MMIOADDR_WIDTH-1:0] t_ccip_mmioAddr; +typedef logic [CCIP_MMIODATA_WIDTH-1:0] t_ccip_mmioData; +typedef logic [CCIP_TID_WIDTH-1:0] t_ccip_tid; + + +typedef logic [CCIP_MDATA_WIDTH-1:0] t_ccip_mdata; +typedef logic [1:0] t_ccip_clNum; +typedef logic [2:0] t_ccip_qwIdx; + + +// Request Type Encodings +//---------------------------------------------------------------------- +// Channel 0 +typedef enum logic [3:0] { + eREQ_RDLINE_I = 4'h0, // Memory Read with FPGA Cache Hint=Invalid + eREQ_RDLINE_S = 4'h1 // Memory Read with FPGA Cache Hint=Shared +} t_ccip_c0_req; + +// Channel 1 +typedef enum logic [3:0] { + eREQ_WRLINE_I = 4'h0, // Memory Write with FPGA Cache Hint=Invalid + eREQ_WRLINE_M = 4'h1, // Memory Write with FPGA Cache Hint=Modified + eREQ_WRPUSH_I = 4'h2, // Memory Write with DDIO Hint ** NOT SUPPORTED CURRENTLY ** + eREQ_WRFENCE = 4'h4, // Memory Write Fence +// eREQ_ATOMIC = 4'h5, // Atomic operation: Compare-Exchange for Memory Addr ** NOT SUPPORTED CURRENTELY ** + eREQ_INTR = 4'h6 // Interrupt the CPU ** NOT SUPPORTED CURRENTLY ** +} t_ccip_c1_req; + +// Response Type Encodings +//---------------------------------------------------------------------- +// Channel 0 +typedef enum logic [3:0] { + eRSP_RDLINE = 4'h0, // Memory Read + eRSP_UMSG = 4'h4 // UMsg received +// eRSP_ATOMIC = 4'h5 // Atomic Operation: Compare-Exchange for Memory Addr +} t_ccip_c0_rsp; + +// Channel 1 +typedef enum logic [3:0] { + eRSP_WRLINE = 4'h0, // Memory Write + eRSP_WRFENCE = 4'h4, // Memory Write Fence + eRSP_INTR = 4'h6 // Interrupt delivered to the CPU ** NOT SUPPORTED CURRENTLY ** +} t_ccip_c1_rsp; + +// +// Virtual Channel Select +//---------------------------------------------------------------------- +typedef enum logic [1:0] { + eVC_VA = 2'b00, + eVC_VL0 = 2'b01, + eVC_VH0 = 2'b10, + eVC_VH1 = 2'b11 +} t_ccip_vc; + +// Multi-CL Memory Request +//---------------------------------------------------------------------- +typedef enum logic [1:0] { + eCL_LEN_1 = 2'b00, + eCL_LEN_2 = 2'b01, + eCL_LEN_4 = 2'b11 +} t_ccip_clLen; + +// +// Structures for Request and Response headers +//---------------------------------------------------------------------- +typedef struct packed { + t_ccip_vc vc_sel; + logic [1:0] rsvd1; // reserved, drive 0 + t_ccip_clLen cl_len; + t_ccip_c0_req req_type; + logic [5:0] rsvd0; // reserved, drive 0 + t_ccip_clAddr address; + t_ccip_mdata mdata; +} t_ccip_c0_ReqMemHdr; +parameter CCIP_C0TX_HDR_WIDTH = $bits(t_ccip_c0_ReqMemHdr); + +typedef struct packed { + logic [5:0] rsvd2; + t_ccip_vc vc_sel; + logic sop; + logic rsvd1; // reserved, drive 0 + t_ccip_clLen cl_len; + t_ccip_c1_req req_type; + logic [5:0] rsvd0; // reserved, drive 0 + t_ccip_clAddr address; + t_ccip_mdata mdata; +} t_ccip_c1_ReqMemHdr; +parameter CCIP_C1TX_HDR_WIDTH = $bits(t_ccip_c1_ReqMemHdr); + +typedef struct packed { + logic [5:0] rsvd2; // reserved, drive 0 + t_ccip_vc vc_sel; + logic [3:0] rsvd1; // reserved, drive 0 + t_ccip_c1_req req_type; + logic [47:0] rsvd0; // reserved, drive 0 + t_ccip_mdata mdata; +}t_ccip_c1_ReqFenceHdr; + +typedef struct packed { + t_ccip_vc vc_used; + logic rsvd1; // reserved, don't care + logic hit_miss; + logic [1:0] rsvd0; // reserved, don't care + t_ccip_clNum cl_num; + t_ccip_c0_rsp resp_type; + t_ccip_mdata mdata; +} t_ccip_c0_RspMemHdr; +parameter CCIP_C0RX_HDR_WIDTH = $bits(t_ccip_c0_RspMemHdr); + +typedef struct packed { + t_ccip_vc vc_used; + logic rsvd1; // reserved, don't care + logic hit_miss; + logic format; + logic rsvd0; // reserved, don't care + t_ccip_clNum cl_num; + t_ccip_c1_rsp resp_type; + t_ccip_mdata mdata; +} t_ccip_c1_RspMemHdr; +parameter CCIP_C1RX_HDR_WIDTH = $bits(t_ccip_c1_RspMemHdr); + +typedef struct packed { + logic [7:0] rsvd0; // reserved, don't care + t_ccip_c1_rsp resp_type; + t_ccip_mdata mdata; +} t_ccip_c1_RspFenceHdr; + +// Alternate Channel 0 MMIO request from host : +// MMIO requests arrive on the same channel as read responses, sharing +// t_if_ccip_c0_Rx below. When either mmioRdValid or mmioWrValid is set +// the message is an MMIO request and should be processed by casting +// t_if_ccip_c0_Rx.hdr to t_ccip_c0_ReqMmioHdr. +typedef struct packed { + t_ccip_mmioAddr address; // 4B aligned Mmio address + logic [1:0] length; // 2'b00- 4B, 2'b01- 8B, 2'b10- 64B + logic rsvd; // reserved, don't care + t_ccip_tid tid; +} t_ccip_c0_ReqMmioHdr; + +typedef struct packed { + t_ccip_tid tid; // Returned back from ReqMmioHdr +} t_ccip_c2_RspMmioHdr; +parameter CCIP_C2TX_HDR_WIDTH = $bits(t_ccip_c2_RspMmioHdr); + +//------------------------------------------------------------------------ +// CCI-P Input & Output bus structures +// +// Users are encouraged to use these for AFU development +//------------------------------------------------------------------------ +// Channel 0 : Memory Reads +typedef struct packed { + t_ccip_c0_ReqMemHdr hdr; // Request Header + logic valid; // Request Valid +} t_if_ccip_c0_Tx; + + +// Channel 1 : Memory Writes, Interrupts, CmpXchg +typedef struct packed { + t_ccip_c1_ReqMemHdr hdr; // Request Header + t_ccip_clData data; // Request Data + logic valid; // Request Wr Valid +} t_if_ccip_c1_Tx; + +// Channel 2 : MMIO Read response +typedef struct packed { + t_ccip_c2_RspMmioHdr hdr; // Response Header + logic mmioRdValid; // Response Read Valid + t_ccip_mmioData data; // Response Data +} t_if_ccip_c2_Tx; + +// Wrap all Tx channels +typedef struct packed { + t_if_ccip_c0_Tx c0; + t_if_ccip_c1_Tx c1; + t_if_ccip_c2_Tx c2; +} t_if_ccip_Tx; + +// Channel 0: Memory Read response, MMIO Request +typedef struct packed { + t_ccip_c0_RspMemHdr hdr; // Rd Response/ MMIO req Header + t_ccip_clData data; // Rd Data / MMIO req Data + // Only one of valid, mmioRdValid and mmioWrValid may be set + // in a cycle. When either mmioRdValid or mmioWrValid are true + // the hdr must be processed specially. See t_ccip_c0_ReqMmioHdr + // above. + logic rspValid; // Rd Response Valid + logic mmioRdValid; // MMIO Read Valid + logic mmioWrValid; // MMIO Write Valid +} t_if_ccip_c0_Rx; + +// Channel 1: Memory Writes +typedef struct packed { + t_ccip_c1_RspMemHdr hdr; // Response Header + logic rspValid; // Response Valid +} t_if_ccip_c1_Rx; + +// Wrap all channels +typedef struct packed { + logic c0TxAlmFull; // C0 Request Channel Almost Full + logic c1TxAlmFull; // C1 Request Channel Almost Full + + t_if_ccip_c0_Rx c0; + t_if_ccip_c1_Rx c1; +} t_if_ccip_Rx; + + +typedef union packed { + t_ccip_c0_RspMemHdr rspMemHdr; + t_ccip_c0_ReqMmioHdr reqMmioHdr; +} t_if_ccip_c0_RxHdr; + +endpackage \ No newline at end of file diff --git a/hw/rtl/afu/ccip/local_mem_cfg_pkg.sv b/hw/rtl/afu/ccip/local_mem_cfg_pkg.sv new file mode 100644 index 00000000..97205e3c --- /dev/null +++ b/hw/rtl/afu/ccip/local_mem_cfg_pkg.sv @@ -0,0 +1,61 @@ +// +// Copyright (c) 2017, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// Neither the name of the Intel Corporation nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. + +//`include "platform_afu_top_config.vh" + +`ifdef PLATFORM_PROVIDES_LOCAL_MEMORY + +package local_mem_cfg_pkg; + + parameter LOCAL_MEM_VERSION_NUMBER = 1; + + parameter LOCAL_MEM_ADDR_WIDTH = `PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH; + parameter LOCAL_MEM_DATA_WIDTH = `PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH; + + parameter LOCAL_MEM_BURST_CNT_WIDTH = `PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH; + + // Number of bytes in a data line + parameter LOCAL_MEM_DATA_N_BYTES = LOCAL_MEM_DATA_WIDTH / 8; + + + // Base types + // -------------------------------------------------------------------- + + typedef logic [LOCAL_MEM_ADDR_WIDTH-1:0] t_local_mem_addr; + typedef logic [LOCAL_MEM_DATA_WIDTH-1:0] t_local_mem_data; + + typedef logic [LOCAL_MEM_BURST_CNT_WIDTH-1:0] t_local_mem_burst_cnt; + + // Byte-level mask of a data line + typedef logic [LOCAL_MEM_DATA_N_BYTES-1:0] t_local_mem_byte_mask; + +endpackage // local_mem_cfg_pkg + +`endif // PLATFORM_PROVIDES_LOCAL_MEMORY \ No newline at end of file diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv new file mode 100644 index 00000000..59d54136 --- /dev/null +++ b/hw/rtl/afu/vortex_afu.sv @@ -0,0 +1,1091 @@ +`ifndef NOPAE +`include "platform_if.vh" +import local_mem_cfg_pkg::*; +`include "afu_json_info.vh" +`else +`include "vortex_afu.vh" +/* verilator lint_off IMPORTSTAR */ +import ccip_if_pkg::*; +import local_mem_cfg_pkg::*; +/* verilator lint_on IMPORTSTAR */ +`endif + +`include "VX_define.vh" + +module vortex_afu #( + parameter NUM_LOCAL_MEM_BANKS = 2 +) ( + // global signals + input clk, + input reset, + + // IF signals between CCI and AFU + input t_if_ccip_Rx cp2af_sRxPort, + output t_if_ccip_Tx af2cp_sTxPort, + + // Avalon signals for local memory access + output t_local_mem_data avs_writedata, + input t_local_mem_data avs_readdata, + output t_local_mem_addr avs_address, + input logic avs_waitrequest, + output logic avs_write, + output logic avs_read, + output t_local_mem_byte_mask avs_byteenable, + output t_local_mem_burst_cnt avs_burstcount, + input avs_readdatavalid, + + output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select +); + +localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr); +localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data); +localparam DRAM_LINE_LW = $clog2(DRAM_LINE_WIDTH); + +localparam VX_DRAM_LINE_LW = $clog2(`VX_DRAM_LINE_WIDTH); +localparam VX_DRAM_LINE_IDX = (DRAM_LINE_LW - VX_DRAM_LINE_LW); + +localparam AVS_RD_QUEUE_SIZE = 16; +localparam AVS_REQ_TAGW = `VX_DRAM_TAG_WIDTH + VX_DRAM_LINE_IDX; + +localparam CCI_RD_WINDOW_SIZE = 8; +localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE; +localparam CCI_RW_QUEUE_SIZE = 1024; + +localparam AFU_ID_L = 16'h0002; // AFU ID Lower +localparam AFU_ID_H = 16'h0004; // AFU ID Higher + +localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ; +localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE; +localparam CMD_RUN = `AFU_IMAGE_CMD_RUN; +localparam CMD_CLFLUSH = `AFU_IMAGE_CMD_CLFLUSH; +localparam CMD_CSR_READ = `AFU_IMAGE_CMD_CSR_READ; +localparam CMD_CSR_WRITE = `AFU_IMAGE_CMD_CSR_WRITE; + +localparam MMIO_CMD_TYPE = `AFU_IMAGE_MMIO_CMD_TYPE; +localparam MMIO_IO_ADDR = `AFU_IMAGE_MMIO_IO_ADDR; +localparam MMIO_MEM_ADDR = `AFU_IMAGE_MMIO_MEM_ADDR; +localparam MMIO_DATA_SIZE = `AFU_IMAGE_MMIO_DATA_SIZE; +localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS; + +localparam MMIO_SCOPE_READ = `AFU_IMAGE_MMIO_SCOPE_READ; +localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE; + +localparam MMIO_CSR_CORE = `AFU_IMAGE_MMIO_CSR_CORE; +localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR; +localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA; +localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ; + +localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE); +localparam CCI_RD_RQ_DATAW = $bits(t_ccip_clData) + CCI_RD_RQ_TAGW; + +localparam STATE_IDLE = 0; +localparam STATE_READ = 1; +localparam STATE_WRITE = 2; +localparam STATE_START = 3; +localparam STATE_RUN = 4; +localparam STATE_CLFLUSH = 5; +localparam STATE_CSR_READ = 6; +localparam STATE_CSR_WRITE = 7; +localparam STATE_MAX_VALUE = 8; +localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE); + +`ifdef SCOPE +`SCOPE_DECL_SIGNALS +`endif + +wire [127:0] afu_id = `AFU_ACCEL_UUID; + +reg [STATE_WIDTH-1:0] state; + +// Vortex ports /////////////////////////////////////////////////////////////// + +wire vx_dram_req_valid; +wire vx_dram_req_rw; +wire [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen; +wire [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr; +wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data; +wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag; +wire vx_dram_req_ready; + +wire vx_dram_rsp_valid; +wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data; +wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag; +wire vx_dram_rsp_ready; + +reg vx_snp_req_valid; +reg [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; +wire vx_snp_req_inv = 0; +wire [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; +wire vx_snp_req_ready; + +wire vx_snp_rsp_valid; +`DEBUG_BEGIN +wire [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; +`DEBUG_END +reg vx_snp_rsp_ready; + +wire vx_csr_io_req_valid; +wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid; +wire [11:0] vx_csr_io_req_addr; +wire vx_csr_io_req_rw; +wire [31:0] vx_csr_io_req_data; +wire vx_csr_io_req_ready; + +wire vx_csr_io_rsp_valid; +wire [31:0] vx_csr_io_rsp_data; +wire vx_csr_io_rsp_ready; + +wire vx_busy; + +reg vx_reset; +reg vx_enabled; + +// CMD variables ////////////////////////////////////////////////////////////// + +t_ccip_clAddr cmd_io_addr; +reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr; +reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size; + +`ifdef SCOPE +wire [63:0] cmd_scope_rdata; +wire [63:0] cmd_scope_wdata; +wire cmd_scope_read; +wire cmd_scope_write; +`endif + +reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; +reg [11:0] cmd_csr_addr; +reg [31:0] cmd_csr_rdata; +reg [31:0] cmd_csr_wdata; + +// MMIO controller //////////////////////////////////////////////////////////// + +`IGNORE_WARNINGS_BEGIN +t_ccip_c0_ReqMmioHdr mmio_hdr; +`IGNORE_WARNINGS_END +assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); + +`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!")) + +t_if_ccip_c2_Tx mmio_tx; +assign af2cp_sTxPort.c2 = mmio_tx; + +`ifdef SCOPE +assign cmd_scope_wdata = 64'(cp2af_sRxPort.c0.data); +assign cmd_scope_read = cp2af_sRxPort.c0.mmioRdValid && (MMIO_SCOPE_READ == mmio_hdr.address); +assign cmd_scope_write = cp2af_sRxPort.c0.mmioWrValid && (MMIO_SCOPE_WRITE == mmio_hdr.address); +`endif + +/* +`DEBUG_BEGIN +wire cp2af_sRxPort_c0_mmioWrValid = cp2af_sRxPort.c0.mmioWrValid; +wire cp2af_sRxPort_c0_mmioRdValid = cp2af_sRxPort.c0.mmioRdValid; +wire cp2af_sRxPort_c0_rspValid = cp2af_sRxPort.c0.rspValid; +wire cp2af_sRxPort_c1_rspValid = cp2af_sRxPort.c1.rspValid; +wire cp2af_sRxPort_c0TxAlmFull = cp2af_sRxPort.c0TxAlmFull; +wire cp2af_sRxPort_c1TxAlmFull = cp2af_sRxPort.c1TxAlmFull; +wire[$bits(mmio_hdr.address)-1:0] mmio_hdr_address = mmio_hdr.address; +wire[$bits(mmio_hdr.length)-1:0] mmio_hdr_length = mmio_hdr.length; +wire[$bits(mmio_hdr.tid)-1:0] mmio_hdr_tid = mmio_hdr.tid; +wire[$bits(cp2af_sRxPort.c0.hdr.mdata)-1:0] cp2af_sRxPort_c0_hdr_mdata = cp2af_sRxPort.c0.hdr.mdata; +`DEBUG_END +*/ + +wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address)) ? 3'(cp2af_sRxPort.c0.data) : 3'h0; + +`ifdef SCOPE +reg scope_start; +`endif + +// disable assertions until reset +`ifndef VERILATOR +initial begin + $assertoff; +end +`endif + +always @(posedge clk) begin + if (reset) begin + `ifndef VERILATOR + $asserton; // enable assertions + `endif + mmio_tx.mmioRdValid <= 0; + mmio_tx.hdr <= 0; + `ifdef SCOPE + scope_start <= 0; + `endif + end else begin + mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid; + mmio_tx.hdr.tid <= mmio_hdr.tid; + `ifdef SCOPE + scope_start <= cp2af_sRxPort.c0.mmioWrValid; + `endif + end + + // serve MMIO write request + if (cp2af_sRxPort.c0.mmioWrValid) begin + case (mmio_hdr.address) + MMIO_IO_ADDR: begin + cmd_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_IO_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_ccip_clAddr'(cp2af_sRxPort.c0.data)); + `endif + end + MMIO_MEM_ADDR: begin + cmd_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_MEM_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_local_mem_addr'(cp2af_sRxPort.c0.data)); + `endif + end + MMIO_DATA_SIZE: begin + cmd_data_size <= $bits(cmd_data_size)'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_DATA_SIZE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data)); + `endif + end + MMIO_CMD_TYPE: begin + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CMD_TYPE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_type)'(cp2af_sRxPort.c0.data)); + `endif + end + `ifdef SCOPE + MMIO_SCOPE_WRITE: begin + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_SCOPE_WRITE: addr=%0h, data=%0h", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)); + `endif + end + `endif + MMIO_CSR_CORE: begin + cmd_csr_core <= $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CSR_CORE: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data)); + `endif + end + MMIO_CSR_ADDR: begin + cmd_csr_addr <= $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CSR_ADDR: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data)); + `endif + end + MMIO_CSR_DATA: begin + cmd_csr_wdata <= $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CSR_DATA: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); + `endif + end + default: begin + `ifdef DBG_PRINT_OPAE + $display("%t: Unknown MMIO Wr: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); + `endif + end + endcase + end + + // serve MMIO read requests + if (cp2af_sRxPort.c0.mmioRdValid) begin + case (mmio_hdr.address) + // AFU header + 16'h0000: mmio_tx.data <= { + 4'b0001, // Feature type = AFU + 8'b0, // reserved + 4'b0, // afu minor revision = 0 + 7'b0, // reserved + 1'b1, // end of DFH list = 1 + 24'b0, // next DFH offset = 0 + 4'b0, // afu major revision = 0 + 12'b0 // feature ID = 0 + }; + AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low + AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi + 16'h0006: mmio_tx.data <= 64'h0; // next AFU + 16'h0008: mmio_tx.data <= 64'h0; // reserved + MMIO_STATUS: begin + mmio_tx.data <= 64'(state); + `ifdef DBG_PRINT_OPAE + if (state != STATE_WIDTH'(mmio_tx.data)) begin + $display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state); + end + `endif + end + MMIO_CSR_READ: begin + mmio_tx.data <= 64'(cmd_csr_rdata); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CSR_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_csr_rdata); + `endif + end + `ifdef SCOPE + MMIO_SCOPE_READ: begin + mmio_tx.data <= cmd_scope_rdata; + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_SCOPE_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_scope_rdata); + `endif + end + `endif + default: begin + mmio_tx.data <= 64'h0; + `ifdef DBG_PRINT_OPAE + $display("%t: Unknown MMIO Rd: addr=%0h", $time, mmio_hdr.address); + `endif + end + endcase + end +end + +// COMMAND FSM //////////////////////////////////////////////////////////////// + +wire cmd_read_done; +wire cmd_write_done; +wire cmd_clflush_done; +wire cmd_csr_done; +wire cmd_run_done; + +always @(posedge clk) begin + if (reset) begin + state <= STATE_IDLE; + vx_reset <= 0; + vx_enabled <= 0; + end else begin + + vx_reset <= 0; + + case (state) + STATE_IDLE: begin + case (cmd_type) + CMD_MEM_READ: begin + `ifdef DBG_PRINT_OPAE + $display("%t: STATE READ: ia=%0h addr=%0h size=%0d", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size); + `endif + state <= STATE_READ; + end + CMD_MEM_WRITE: begin + `ifdef DBG_PRINT_OPAE + $display("%t: STATE WRITE: ia=%0h addr=%0h size=%0d", $time, cmd_io_addr, cmd_mem_addr, cmd_data_size); + `endif + state <= STATE_WRITE; + end + CMD_RUN: begin + `ifdef DBG_PRINT_OPAE + $display("%t: STATE START", $time); + `endif + vx_reset <= 1; + vx_enabled <= 1; + state <= STATE_START; + end + CMD_CLFLUSH: begin + `ifdef DBG_PRINT_OPAE + $display("%t: STATE CFLUSH: addr=%0h size=%0d", $time, cmd_mem_addr, cmd_data_size); + `endif + state <= STATE_CLFLUSH; + end + CMD_CSR_READ: begin + `ifdef DBG_PRINT_OPAE + $display("%t: STATE CSR_READ: addr=%0h", $time, cmd_csr_addr); + `endif + state <= STATE_CSR_READ; + end + CMD_CSR_WRITE: begin + `ifdef DBG_PRINT_OPAE + $display("%t: STATE CSR_WRITE: addr=%0h data=%0d", $time, cmd_csr_addr, cmd_csr_wdata); + `endif + state <= STATE_CSR_WRITE; + end + default: begin + state <= state; + end + endcase + end + + STATE_READ: begin + if (cmd_read_done) begin + state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif + end + end + + STATE_WRITE: begin + if (cmd_write_done) begin + state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif + end + end + + STATE_START: begin // vortex reset cycle + state <= STATE_RUN; + end + + STATE_RUN: begin + if (cmd_run_done) begin + state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif + end + end + + STATE_CLFLUSH: begin + if (cmd_clflush_done) begin + state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif + end + end + + STATE_CSR_READ: begin + if (cmd_csr_done) begin + state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif + end + end + + STATE_CSR_WRITE: begin + if (cmd_csr_done) begin + state <= STATE_IDLE; + `ifdef DBG_PRINT_OPAE + $display("%t: STATE IDLE", $time); + `endif + end + end + + default: begin + state <= state; + end + + endcase + end +end + +// AVS Controller ///////////////////////////////////////////////////////////// + +wire dram_req_valid; +wire dram_req_rw; +t_local_mem_byte_mask dram_req_byteen; +t_local_mem_addr dram_req_addr; +t_local_mem_data dram_req_data; +wire [AVS_REQ_TAGW:0] dram_req_tag; +wire dram_req_ready; + +wire dram_rsp_valid; +t_local_mem_data dram_rsp_data; +wire [AVS_REQ_TAGW:0] dram_rsp_tag; +wire dram_rsp_ready; + +wire cci_dram_req_valid; +wire cci_dram_req_rw; +t_local_mem_byte_mask cci_dram_req_byteen; +t_local_mem_addr cci_dram_req_addr; +t_local_mem_data cci_dram_req_data; +wire [AVS_REQ_TAGW-1:0] cci_dram_req_tag; +wire cci_dram_req_ready; + +wire cci_dram_rsp_valid; +t_local_mem_data cci_dram_rsp_data; +wire [AVS_REQ_TAGW-1:0] cci_dram_rsp_tag; +wire cci_dram_rsp_ready; + +wire vx_dram_req_valid_qual; +t_local_mem_addr vx_dram_req_addr_qual; +t_local_mem_byte_mask vx_dram_req_byteen_qual; +t_local_mem_data vx_dram_req_data_qual; +wire [AVS_REQ_TAGW-1:0] vx_dram_req_tag_qual; + +wire [(1 << VX_DRAM_LINE_IDX)-1:0][`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data_unqual; +wire [AVS_REQ_TAGW-1:0] vx_dram_rsp_tag_unqual; + +wire cci_dram_rd_req_valid, cci_dram_wr_req_valid; +wire [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr; +wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout; + +//-- + +assign cci_dram_req_valid = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_valid : cci_dram_rd_req_valid; + +assign cci_dram_req_addr = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_addr : cci_dram_rd_req_addr; + +assign cci_dram_req_rw = (CMD_MEM_WRITE == state); + +assign cci_dram_req_byteen = {64{1'b1}}; + +assign cci_dram_req_data = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]; + +assign cci_dram_req_tag = AVS_REQ_TAGW'(0); + +`UNUSED_VAR (cci_dram_rsp_tag) + +//-- + +assign vx_dram_req_valid_qual = vx_dram_req_valid && vx_enabled; + +assign vx_dram_req_addr_qual = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH]; + +if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin + wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]; + wire [VX_DRAM_LINE_IDX-1:0] vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0]; + assign vx_dram_req_byteen_qual = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]) << (VX_DRAM_LINE_LW-3)); + assign vx_dram_req_data_qual = DRAM_LINE_WIDTH'(vx_dram_req_data) << ((DRAM_LINE_LW'(vx_dram_req_idx)) << VX_DRAM_LINE_LW); + assign vx_dram_req_tag_qual = {vx_dram_req_tag, vx_dram_req_idx}; + assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx]; +end else begin + assign vx_dram_req_byteen_qual = vx_dram_req_byteen; + assign vx_dram_req_tag_qual = vx_dram_req_tag; + assign vx_dram_req_data_qual = vx_dram_req_data; + assign vx_dram_rsp_data = vx_dram_rsp_data_unqual; +end + +assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_IDX-1:VX_DRAM_LINE_IDX]; + +//-- + +VX_mem_arb #( + .NUM_REQS (2), + .DATA_WIDTH ($bits(t_local_mem_data)), + .ADDR_WIDTH ($bits(t_local_mem_addr)), + .TAG_IN_WIDTH (AVS_REQ_TAGW), + .TAG_OUT_WIDTH (AVS_REQ_TAGW+1) +) dram_arb ( + .clk (clk), + .reset (reset), + + // Source request + .req_valid_in ({cci_dram_req_valid, vx_dram_req_valid_qual}), + .req_rw_in ({cci_dram_req_rw, vx_dram_req_rw}), + .req_byteen_in ({cci_dram_req_byteen, vx_dram_req_byteen_qual}), + .req_addr_in ({cci_dram_req_addr, vx_dram_req_addr_qual}), + .req_data_in ({cci_dram_req_data, vx_dram_req_data_qual}), + .req_tag_in ({cci_dram_req_tag, vx_dram_req_tag_qual}), + .req_ready_in ({cci_dram_req_ready, vx_dram_req_ready}), + + // DRAM request + .req_valid_out (dram_req_valid), + .req_rw_out (dram_req_rw), + .req_byteen_out (dram_req_byteen), + .req_addr_out (dram_req_addr), + .req_data_out (dram_req_data), + .req_tag_out (dram_req_tag), + .req_ready_out (dram_req_ready), + + // Source response + .rsp_valid_out ({cci_dram_rsp_valid, vx_dram_rsp_valid}), + .rsp_data_out ({cci_dram_rsp_data, vx_dram_rsp_data_unqual}), + .rsp_tag_out ({cci_dram_rsp_tag, vx_dram_rsp_tag_unqual}), + .rsp_ready_out ({cci_dram_rsp_ready, vx_dram_rsp_ready}), + + // DRAM response + .rsp_valid_in (dram_rsp_valid), + .rsp_tag_in (dram_rsp_tag), + .rsp_data_in (dram_rsp_data), + .rsp_ready_in (dram_rsp_ready) +); + +//-- + +VX_avs_wrapper #( + .AVS_DATAW ($bits(t_local_mem_data)), + .AVS_ADDRW ($bits(t_local_mem_addr)), + .AVS_BURSTW ($bits(t_local_mem_burst_cnt)), + .AVS_BANKS (NUM_LOCAL_MEM_BANKS), + .REQ_TAGW (AVS_REQ_TAGW+1), + .RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE) +) avs_wrapper ( + .clk (clk), + .reset (reset), + + // AVS bus + .avs_writedata (avs_writedata), + .avs_readdata (avs_readdata), + .avs_address (avs_address), + .avs_waitrequest (avs_waitrequest), + .avs_write (avs_write), + .avs_read (avs_read), + .avs_byteenable (avs_byteenable), + .avs_burstcount (avs_burstcount), + .avs_readdatavalid (avs_readdatavalid), + .avs_bankselect (mem_bank_select), + + // DRAM request + .dram_req_valid (dram_req_valid), + .dram_req_rw (dram_req_rw), + .dram_req_byteen (dram_req_byteen), + .dram_req_addr (dram_req_addr), + .dram_req_data (dram_req_data), + .dram_req_tag (dram_req_tag), + .dram_req_ready (dram_req_ready), + + // DRAM response + .dram_rsp_valid (dram_rsp_valid), + .dram_rsp_data (dram_rsp_data), + .dram_rsp_tag (dram_rsp_tag), + .dram_rsp_ready (dram_rsp_ready) +); + +// CCI-P Read Request /////////////////////////////////////////////////////////// + +reg [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads; +wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_ctr; +reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr; +wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_addr_unqual; +wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag; +reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr; +t_ccip_clAddr cci_rd_req_addr; + +reg cci_rd_req_enable, cci_rd_req_wait; + +wire cci_rdq_push, cci_rdq_pop; +wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din; +wire cci_rdq_empty; + +always @(*) begin + af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0); + af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr; + af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag); +end + +wire cci_dram_wr_req_fire = cci_dram_wr_req_valid && cci_dram_req_ready; + +wire cci_rd_req_fire = af2cp_sTxPort.c0.valid; +wire cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid; + +assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr); +assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata); + +assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0); + +assign cci_rdq_pop = cci_dram_wr_req_fire; +assign cci_rdq_push = cci_rd_rsp_fire; +assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag}; + +assign cci_pending_reads_next = cci_pending_reads + + $bits(cci_pending_reads)'((cci_rd_req_fire && !cci_rdq_pop) ? 1 : + (!cci_rd_req_fire && cci_rdq_pop) ? -1 : 0); + +assign cci_dram_wr_req_valid = !cci_rdq_empty; + +assign cci_dram_wr_req_addr = cci_dram_wr_req_addr_unqual + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout))); + +assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait; + +assign cmd_write_done = (cci_dram_wr_req_ctr == cmd_data_size); + +// Send read requests to CCI +always @(posedge clk) begin + if (reset) begin + cci_rd_req_addr <= 0; + cci_rd_req_ctr <= 0; + cci_rd_rsp_ctr <= 0; + cci_pending_reads <= 0; + cci_rd_req_enable <= 0; + cci_rd_req_wait <= 0; + cci_dram_wr_req_ctr <= 0; + cci_dram_wr_req_addr_unqual <= 0; + end + else begin + if ((STATE_IDLE == state) + && (CMD_MEM_WRITE == cmd_type)) begin + cci_rd_req_addr <= cmd_io_addr; + cci_rd_req_ctr <= 0; + cci_rd_rsp_ctr <= 0; + cci_pending_reads <= 0; + cci_rd_req_enable <= (cmd_data_size != 0); + cci_rd_req_wait <= 0; + cci_dram_wr_req_ctr <= 0; + cci_dram_wr_req_addr_unqual <= cmd_mem_addr; + end + + cci_rd_req_enable <= (STATE_WRITE == state) + && (cci_rd_req_ctr_next < cmd_data_size) + && (cci_pending_reads_next < CCI_RD_QUEUE_SIZE) + && !cp2af_sRxPort.c0TxAlmFull; + + if (cci_rd_req_fire) begin + cci_rd_req_addr <= cci_rd_req_addr + 1; + cci_rd_req_ctr <= cci_rd_req_ctr_next; + if (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin + cci_rd_req_wait <= 1; // end current request batch + end + `ifdef DBG_PRINT_OPAE + $display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads_next); + `endif + end + + if (cci_rd_rsp_fire) begin + cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1); + if (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin + cci_rd_req_wait <= 0; // restart new request batch + end + `ifdef DBG_PRINT_OPAE + $display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data); + `endif + end + + /*if (cci_rdq_pop) begin + `ifdef DBG_PRINT_OPAE + $display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads_next); + `endif + end*/ + + if (cci_dram_wr_req_fire) begin + cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0)); + cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1); + end + + cci_pending_reads <= cci_pending_reads_next; + end +end + +VX_generic_queue #( + .DATAW(CCI_RD_RQ_DATAW), + .SIZE(CCI_RD_QUEUE_SIZE) +) cci_rd_req_queue ( + .clk (clk), + .reset (reset), + .push (cci_rdq_push), + .data_in (cci_rdq_din), + .pop (cci_rdq_pop), + .data_out (cci_rdq_dout), + .empty (cci_rdq_empty), + `UNUSED_PIN (full), + `UNUSED_PIN (size) +); + +`ifdef VERILATOR +`DEBUG_BLOCK( + reg [CCI_RD_WINDOW_SIZE-1:0] dbg_cci_rd_rsp_mask; + always @(posedge clk) begin + if (reset) begin + dbg_cci_rd_rsp_mask <= 0; + end else begin + if (cci_rd_rsp_fire) begin + if (cci_rd_rsp_ctr == 0) begin + dbg_cci_rd_rsp_mask <= (CCI_RD_WINDOW_SIZE'(1) << cci_rd_rsp_tag); + end else begin + assert(!dbg_cci_rd_rsp_mask[cci_rd_rsp_tag]); + dbg_cci_rd_rsp_mask[cci_rd_rsp_tag] <= 1; + end + end + end + end +) +`endif + +// CCI-P Write Request ////////////////////////////////////////////////////////// + +reg [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes; +wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes_next; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr; +reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr; +reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr_unqual; +t_ccip_clAddr cci_wr_req_addr; + +always @(*) begin + af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0); + af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr; + af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode + af2cp_sTxPort.c1.data = t_ccip_clData'(cci_dram_rsp_data); +end + +wire cci_wr_req_fire = af2cp_sTxPort.c1.valid; +wire cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid; + +wire cci_dram_rd_req_fire = cci_dram_rd_req_valid && cci_dram_req_ready; + +assign cci_pending_writes_next = cci_pending_writes + + $bits(cci_pending_writes)'((cci_wr_req_fire && !cci_wr_rsp_fire) ? 1 : + (!cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0); + +assign cci_dram_rd_req_valid = (cci_dram_rd_req_ctr != 0); + +assign cci_dram_rd_req_addr = cci_dram_rd_req_addr_unqual; + +assign af2cp_sTxPort.c1.valid = cci_dram_rsp_valid; +assign cci_dram_rsp_ready = !cp2af_sRxPort.c1TxAlmFull; + +assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes); + +// Send write requests to CCI +always @(posedge clk) +begin + if (reset) begin + cci_wr_req_addr <= 0; + cci_wr_req_ctr <= 0; + cci_pending_writes <= 0; + cci_dram_rd_req_ctr <= 0; + cci_dram_rd_req_addr_unqual <= 0; + end + else begin + if ((STATE_IDLE == state) + && (CMD_MEM_READ == cmd_type)) begin + cci_wr_req_addr <= cmd_io_addr; + cci_wr_req_ctr <= cmd_data_size; + cci_pending_writes <= 0; + cci_dram_rd_req_ctr <= cmd_data_size; + cci_dram_rd_req_addr_unqual <= cmd_mem_addr; + end + + if (cci_wr_req_fire) begin + assert(cci_wr_req_ctr != 0); + cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1); + cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1); + `ifdef DBG_PRINT_OPAE + $display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next, af2cp_sTxPort.c1.data); + `endif + end + + /*`ifdef DBG_PRINT_OPAE + if (cci_wr_rsp_fire) begin + $display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes_next); + end + `endif*/ + + if (cci_dram_rd_req_fire) begin + cci_dram_rd_req_addr_unqual <= cci_dram_rd_req_addr_unqual + DRAM_ADDR_WIDTH'(1); + cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1); + end + + cci_pending_writes <= cci_pending_writes_next; + end +end + +// Vortex cache snooping ////////////////////////////////////////////////////// + +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size; +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr; +reg [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_rsp_ctr; +wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr_next, snp_rsp_ctr_next; + +wire vx_snp_req_fire, vx_snp_rsp_fire; + +if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin + assign snp_req_baseaddr = {cmd_mem_addr, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)}; + assign snp_req_size = {cmd_data_size, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)}; +end else begin + assign snp_req_baseaddr = cmd_mem_addr; + assign snp_req_size = cmd_data_size; +end + +assign vx_snp_req_tag = (`VX_SNP_TAG_WIDTH)'(snp_req_ctr); + +assign vx_snp_req_fire = vx_snp_req_valid && vx_snp_req_ready; +assign vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready; + +assign snp_req_ctr_next = vx_snp_req_fire ? (snp_req_ctr + `VX_DRAM_ADDR_WIDTH'(1)) : snp_req_ctr; +assign snp_rsp_ctr_next = vx_snp_rsp_fire ? (snp_rsp_ctr - `VX_DRAM_ADDR_WIDTH'(1)) : snp_rsp_ctr; + +assign cmd_clflush_done = (0 == snp_rsp_ctr); + +always @(posedge clk) begin + if (reset) begin + vx_snp_req_valid <= 0; + vx_snp_req_addr <= 0; + vx_snp_rsp_ready <= 0; + snp_req_ctr <= 0; + snp_rsp_ctr <= 0; + end else begin + if ((STATE_IDLE == state) + && (CMD_CLFLUSH == cmd_type)) begin + vx_snp_req_valid <= (snp_req_size != 0); + vx_snp_req_addr <= snp_req_baseaddr; + vx_snp_rsp_ready <= (snp_req_size != 0); + snp_req_ctr <= 0; + snp_rsp_ctr <= snp_req_size; + end + + if ((STATE_CLFLUSH == state) + && (snp_req_ctr_next >= snp_req_size)) begin + vx_snp_req_valid <= 0; + end + + if ((STATE_CLFLUSH == state) + && (0 == snp_rsp_ctr_next)) begin + vx_snp_rsp_ready <= 0; + end + + if (vx_snp_req_fire) begin + assert(snp_req_ctr < snp_req_size); + vx_snp_req_addr <= vx_snp_req_addr + `VX_DRAM_ADDR_WIDTH'(1); + snp_req_ctr <= snp_req_ctr_next; + `ifdef DBG_PRINT_OPAE + $display("%t: AFU Snp Req: addr=%0h, tag=%0h, rem=%0d", $time, `TO_FULL_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next)); + `endif + end + + if ((STATE_CLFLUSH == state) + && vx_snp_rsp_fire) begin + assert(snp_rsp_ctr != 0); + snp_rsp_ctr <= snp_rsp_ctr_next; + `ifdef DBG_PRINT_OPAE + $display("%t: AFU Snp Rsp: tag=%0h, rem=%0d", $time, vx_snp_rsp_tag, snp_rsp_ctr_next); + `endif + end + end +end + +// CSRs /////////////////////////////////////////////////////////////////////// + +reg csr_io_req_sent; + +assign vx_csr_io_req_valid = !csr_io_req_sent + && ((STATE_CSR_READ == state || STATE_CSR_WRITE == state)); +assign vx_csr_io_req_coreid = cmd_csr_core; +assign vx_csr_io_req_rw = (STATE_CSR_WRITE == state); +assign vx_csr_io_req_addr = cmd_csr_addr; +assign vx_csr_io_req_data = cmd_csr_wdata; + +assign vx_csr_io_rsp_ready = 1; + +assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_io_rsp_valid; + +always @(posedge clk) begin + if (reset) begin + csr_io_req_sent <= 0; + end else begin + if (vx_csr_io_req_valid && vx_csr_io_req_ready) begin + csr_io_req_sent <= 1; + end + if (cmd_csr_done) begin + csr_io_req_sent <= 0; + end + end + + if ((STATE_CSR_READ == state) + && vx_csr_io_rsp_ready + && vx_csr_io_rsp_valid) begin + cmd_csr_rdata <= vx_csr_io_rsp_data; + end +end + +// Vortex ///////////////////////////////////////////////////////////////////// + +assign cmd_run_done = !vx_busy; + +Vortex #() vortex ( + `SCOPE_BIND_afu_vortex + + .clk (clk), + .reset (reset | vx_reset), + + // DRAM request + .dram_req_valid (vx_dram_req_valid), + .dram_req_rw (vx_dram_req_rw), + .dram_req_byteen (vx_dram_req_byteen), + .dram_req_addr (vx_dram_req_addr), + .dram_req_data (vx_dram_req_data), + .dram_req_tag (vx_dram_req_tag), + .dram_req_ready (vx_dram_req_ready), + + // DRAM response + .dram_rsp_valid (vx_dram_rsp_valid), + .dram_rsp_data (vx_dram_rsp_data), + .dram_rsp_tag (vx_dram_rsp_tag), + .dram_rsp_ready (vx_dram_rsp_ready), + + // Snoop request + .snp_req_valid (vx_snp_req_valid), + .snp_req_addr (vx_snp_req_addr), + .snp_req_inv (vx_snp_req_inv), + .snp_req_tag (vx_snp_req_tag), + .snp_req_ready (vx_snp_req_ready), + + // Snoop response + .snp_rsp_valid (vx_snp_rsp_valid), + .snp_rsp_tag (vx_snp_rsp_tag), + .snp_rsp_ready (vx_snp_rsp_ready), + + // I/O request + `UNUSED_PIN (io_req_valid), + `UNUSED_PIN (io_req_rw), + `UNUSED_PIN (io_req_byteen), + `UNUSED_PIN (io_req_addr), + `UNUSED_PIN (io_req_data), + `UNUSED_PIN (io_req_tag), + .io_req_ready (1'b1), + + // I/O response + .io_rsp_valid (1'b0), + .io_rsp_data (0), + .io_rsp_tag (0), + `UNUSED_PIN (io_rsp_ready), + + // CSR I/O Request + .csr_io_req_valid (vx_csr_io_req_valid), + .csr_io_req_coreid(vx_csr_io_req_coreid), + .csr_io_req_addr (vx_csr_io_req_addr), + .csr_io_req_rw (vx_csr_io_req_rw), + .csr_io_req_data (vx_csr_io_req_data), + .csr_io_req_ready (vx_csr_io_req_ready), + + // CSR I/O Response + .csr_io_rsp_valid (vx_csr_io_rsp_valid), + .csr_io_rsp_data (vx_csr_io_rsp_data), + .csr_io_rsp_ready (vx_csr_io_rsp_ready), + + // status + .busy (vx_busy), + `UNUSED_PIN (ebreak) +); + +// SCOPE ////////////////////////////////////////////////////////////////////// + +`ifdef SCOPE + +`SCOPE_ASSIGN (ccip_sRxPort_c0_mmioRdValid, cp2af_sRxPort.c0.mmioRdValid); +`SCOPE_ASSIGN (ccip_sRxPort_c0_mmioWrValid, cp2af_sRxPort.c0.mmioWrValid); +`SCOPE_ASSIGN (mmio_hdr_address, mmio_hdr.address); +`SCOPE_ASSIGN (mmio_hdr_length, mmio_hdr.length); +`SCOPE_ASSIGN (ccip_sRxPort_c0_hdr_mdata, cp2af_sRxPort.c0.hdr.mdata); +`SCOPE_ASSIGN (ccip_sRxPort_c0_rspValid, cp2af_sRxPort.c0.rspValid); +`SCOPE_ASSIGN (ccip_sRxPort_c1_rspValid, cp2af_sRxPort.c1.rspValid); +`SCOPE_ASSIGN (ccip_sTxPort_c0_valid, af2cp_sTxPort.c0.valid); +`SCOPE_ASSIGN (ccip_sTxPort_c0_hdr_address, af2cp_sTxPort.c0.hdr.address); +`SCOPE_ASSIGN (ccip_sTxPort_c0_hdr_mdata, af2cp_sTxPort.c0.hdr.mdata); +`SCOPE_ASSIGN (ccip_sTxPort_c1_valid, af2cp_sTxPort.c1.valid); +`SCOPE_ASSIGN (ccip_sTxPort_c1_hdr_address, af2cp_sTxPort.c1.hdr.address); +`SCOPE_ASSIGN (ccip_sTxPort_c2_mmioRdValid, af2cp_sTxPort.c2.mmioRdValid); +`SCOPE_ASSIGN (ccip_sRxPort_c0TxAlmFull, cp2af_sRxPort.c0TxAlmFull); +`SCOPE_ASSIGN (ccip_sRxPort_c1TxAlmFull, cp2af_sRxPort.c1TxAlmFull); +`SCOPE_ASSIGN (avs_address, avs_address); +`SCOPE_ASSIGN (avs_waitrequest, avs_waitrequest); +`SCOPE_ASSIGN (avs_write_fire, avs_write && !avs_waitrequest); +`SCOPE_ASSIGN (avs_read_fire, avs_read && !avs_waitrequest); +`SCOPE_ASSIGN (avs_byteenable, avs_byteenable); +`SCOPE_ASSIGN (avs_burstcount, avs_burstcount); +`SCOPE_ASSIGN (avs_readdatavalid, avs_readdatavalid); +`SCOPE_ASSIGN (mem_bank_select, mem_bank_select); +`SCOPE_ASSIGN (ccip_dram_rd_req_ctr, cci_dram_rd_req_ctr); +`SCOPE_ASSIGN (ccip_dram_wr_req_ctr, cci_dram_wr_req_ctr); +`SCOPE_ASSIGN (ccip_rd_req_ctr, cci_rd_req_ctr); +`SCOPE_ASSIGN (ccip_rd_rsp_ctr, cci_rd_rsp_ctr); +`SCOPE_ASSIGN (ccip_wr_req_ctr, cci_wr_req_ctr); +`SCOPE_ASSIGN (snp_req_ctr, snp_req_ctr); +`SCOPE_ASSIGN (snp_rsp_ctr, snp_rsp_ctr); + +wire scope_changed = `SCOPE_TRIGGER; + +VX_scope #( + .DATAW ($bits({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST})), + .BUSW (64), + .SIZE (`SCOPE_SIZE), + .UPDW ($bits({`SCOPE_UPDATE_LIST})) +) scope ( + .clk (clk), + .reset (reset), + .start (scope_start), + .stop (1'b0), + .changed (scope_changed), + .data_in ({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST}), + .bus_in (cmd_scope_wdata), + .bus_out (cmd_scope_rdata), + .bus_read (cmd_scope_read), + .bus_write(cmd_scope_write) +); + +`endif + +endmodule \ No newline at end of file diff --git a/hw/rtl/afu/vortex_afu.vh b/hw/rtl/afu/vortex_afu.vh new file mode 100644 index 00000000..15513137 --- /dev/null +++ b/hw/rtl/afu/vortex_afu.vh @@ -0,0 +1,37 @@ +`ifndef __VORTEX_AFU__ +`define __VORTEX_AFU__ + +`include "ccip_if_pkg.sv" + +`define PLATFORM_PROVIDES_LOCAL_MEMORY +`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH 26 +`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH 512 +`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH 4 + +`include "local_mem_cfg_pkg.sv" + +`define AFU_ACCEL_NAME "vortex_afu" +`define AFU_ACCEL_UUID 128'h35f9452b_25c2_434c_93d5_6f8c60db361c + +`define AFU_IMAGE_CMD_CLFLUSH 4 +`define AFU_IMAGE_CMD_CSR_READ 5 +`define AFU_IMAGE_CMD_CSR_WRITE 6 +`define AFU_IMAGE_CMD_MEM_READ 1 +`define AFU_IMAGE_CMD_MEM_WRITE 2 +`define AFU_IMAGE_CMD_RUN 3 +`define AFU_IMAGE_MMIO_CMD_TYPE 10 +`define AFU_IMAGE_MMIO_CSR_CORE 24 +`define AFU_IMAGE_MMIO_CSR_ADDR 26 +`define AFU_IMAGE_MMIO_CSR_DATA 28 +`define AFU_IMAGE_MMIO_CSR_READ 30 +`define AFU_IMAGE_MMIO_DATA_SIZE 16 +`define AFU_IMAGE_MMIO_IO_ADDR 12 +`define AFU_IMAGE_MMIO_MEM_ADDR 14 +`define AFU_IMAGE_MMIO_SCOPE_READ 20 +`define AFU_IMAGE_MMIO_SCOPE_WRITE 22 +`define AFU_IMAGE_MMIO_STATUS 18 + +`define AFU_IMAGE_POWER 0 +`define AFU_TOP_IFC "ccip_std_afu_avalon_mm" + +`endif \ No newline at end of file diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index bd2c6a30..27268895 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -138,12 +138,22 @@ module VX_snp_forwarder #( end reg [NUM_REQS-1:0] snp_fwdout_ready_other; + wire [NUM_REQS-1:0] fwdout_ready_unqual; for (genvar i = 0; i < NUM_REQS; i++) begin - assign snp_fwdout_valid[i] = fwdout_valid && snp_fwdout_ready_other[i]; - assign snp_fwdout_addr[i] = fwdout_addr; - assign snp_fwdout_inv[i] = fwdout_inv; - assign snp_fwdout_tag[i] = fwdout_tag; + VX_skid_buffer #( + .DATAW (DST_ADDR_WIDTH + 1 + TAG_OUT_WIDTH), + .PASSTHRU (NUM_REQS >= 4) + ) fwdout_buffer ( + .clk (clk), + .reset (reset), + .valid_in (fwdout_valid && snp_fwdout_ready_other[i]), + .data_in ({fwdout_addr, fwdout_inv, fwdout_tag}), + .ready_in (fwdout_ready_unqual[i]), + .valid_out (snp_fwdout_valid[i]), + .data_out ({snp_fwdout_addr[i], snp_fwdout_inv[i], snp_fwdout_tag[i]}), + .ready_out (snp_fwdout_ready[i]) + ); end always @(*) begin @@ -151,12 +161,12 @@ module VX_snp_forwarder #( for (integer i = 0; i < NUM_REQS; i++) begin for (integer j = 0; j < NUM_REQS; j++) begin if (i != j) - snp_fwdout_ready_other[i] &= snp_fwdout_ready[j]; + snp_fwdout_ready_other[i] &= fwdout_ready_unqual[j]; end end end - assign fwdout_ready = (& snp_fwdout_ready); + assign fwdout_ready = (& fwdout_ready_unqual); assign snp_req_ready = fwdout_ready && !sfq_full && !dispatch_hold;