diff --git a/driver/tests/dogfood/Memcpy/hw/rtl/_hdr b/driver/tests/dogfood/Memcpy/hw/rtl/_hdr deleted file mode 100644 index 39a1dd9e..00000000 --- a/driver/tests/dogfood/Memcpy/hw/rtl/_hdr +++ /dev/null @@ -1,603 +0,0 @@ -// -// Copyright (c) 2017, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// Neither the name of the Intel Corporation nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - - -// Read from the memory locations first and then write to the memory locations - -`include "platform_if.vh" -`include "afu_json_info.vh" - - -module ccip_std_afu - ( - // CCI-P Clocks and Resets - input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock - input logic pClkDiv2, // 200MHz - CCI-P clock domain. - input logic pClkDiv4, // 100MHz - CCI-P clock domain. - input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock ** - input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock ** - input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset - input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State - input logic pck_cp2af_error, // CCI-P Protocol Error Detected - - // Interface structures - input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port - output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port - ); - - - // - // Run the entire design at the standard CCI-P frequency (400 MHz). - // - logic clk; - assign clk = pClk; - - logic reset; - assign reset = pck_cp2af_softReset; - - logic [511:0] wr_data; - logic [511:0] rd_data; - - logic get_write_addr; - logic do_update; - logic rd_end_of_list; - logic rd_needed; - logic wr_needed; - logic [15:0] cnt_list_length; - - // ========================================================================= - // - // Register requests. - // - // ========================================================================= - - // - // The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be - // registered. Here we register pck_cp2af_sRx and assign it to sRx. - // We also assign pck_af2cp_sTx to sTx here but don't register it. - // The code below never uses combinational logic to write sTx. - // - - t_if_ccip_Rx sRx; - always_ff @(posedge clk) - begin - sRx <= pck_cp2af_sRx; - end - - t_if_ccip_Tx sTx; - assign pck_af2cp_sTx = sTx; - - - // ========================================================================= - // - // CSR (MMIO) handling. - // - // ========================================================================= - - // The AFU ID is a unique ID for a given program. Here we generated - // one with the "uuidgen" program and stored it in the AFU's JSON file. - // ASE and synthesis setup scripts automatically invoke afu_json_mgr - // to extract the UUID into afu_json_info.vh. - logic [127:0] afu_id = `AFU_ACCEL_UUID; - - // - // A valid AFU must implement a device feature list, starting at MMIO - // address 0. Every entry in the feature list begins with 5 64-bit - // words: a device feature header, two AFU UUID words and two reserved - // words. - // - - // Is a CSR read request active this cycle? - logic is_csr_read; - assign is_csr_read = sRx.c0.mmioRdValid; - - // Is a CSR write request active this cycle? - logic is_csr_write; - assign is_csr_write = sRx.c0.mmioWrValid; - - // The MMIO request header is overlayed on the normal c0 memory read - // response data structure. Cast the c0Rx header to an MMIO request - // header. - t_ccip_c0_ReqMmioHdr mmio_req_hdr; - assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr); - - - // - // Implement the device feature list by responding to MMIO reads. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c2.mmioRdValid <= 1'b0; - end - else - begin - // Always respond with something for every read request - sTx.c2.mmioRdValid <= is_csr_read; - - // The unique transaction ID matches responses to requests - sTx.c2.hdr.tid <= mmio_req_hdr.tid; - - // Addresses are of 32-bit objects in MMIO space. Addresses - // of 64-bit objects are thus multiples of 2. - case (mmio_req_hdr.address) - 0: // AFU DFH (device feature header) - begin - // Here we define a trivial feature list. In this - // example, our AFU is the only entry in this list. - sTx.c2.data <= t_ccip_mmioData'(0); - // Feature type is AFU - sTx.c2.data[63:60] <= 4'h1; - // End of list (last entry in list) - sTx.c2.data[40] <= 1'b1; - end - - // AFU_ID_L - 2: sTx.c2.data <= afu_id[63:0]; - - // AFU_ID_H - 4: sTx.c2.data <= afu_id[127:64]; - - // DFH_RSVD0 - 6: sTx.c2.data <= t_ccip_mmioData'(0); - - // DFH_RSVD1 - 8: sTx.c2.data <= t_ccip_mmioData'(0); - - default: sTx.c2.data <= t_ccip_mmioData'(0); - endcase - end - end - - - // - // CSR write handling. Host software must tell the AFU the memory address - // to which it should be writing. The address is set by writing a CSR. - // - - // We use MMIO address 0 to set the memory address. The read and - // write MMIO spaces are logically separate so we are free to use - // whatever we like. This may not be good practice for cleanly - // organizing the MMIO address space, but it is legal. - logic is_mem_addr_csr_write; - assign is_mem_addr_csr_write = get_write_addr && is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(0)); - - // Memory address to which this AFU will write. - t_ccip_clAddr write_mem_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - get_write_addr <= 1'b1; - end - else if (is_mem_addr_csr_write) - begin - write_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - get_write_addr <= 1'b0; - end - end - - - // We use MMIO address 0 to set the memory address for reading data. - logic is_mem_addr_csr_read; - assign is_mem_addr_csr_read = !get_write_addr && is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(0)); - - // Memory address from which this AFU will read. - logic start_read; - t_ccip_clAddr read_mem_addr; - - //logic start_traversal = 'b0; - //t_ccip_clAddr start_traversal_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - start_read <= 1'b0; - end - else if (is_mem_addr_csr_read) - begin - read_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - start_read <= 'b1; - end - end - - - // ========================================================================= - // - // Main AFU logic - // - // ========================================================================= - - // - // States in our simple example. - // - //typedef enum logic [0:0] - typedef enum logic [1:0] - { - STATE_IDLE, - STATE_READ, - STATE_UPDATE, - STATE_WRITE - } - t_state; - - t_state state; - - // - // State machine - // - always_ff @(posedge clk) - begin - if (reset) - begin - state <= STATE_IDLE; - rd_end_of_list <= 1'b0; - end - else - begin - case (state) - STATE_IDLE: - begin - // Traversal begins when CSR 1 is written - if (start_read) - begin - state <= STATE_READ; - $display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr)); - end - end - - STATE_READ: - begin - if (rd_needed) - begin - // Read data from the address and update address - state <= STATE_UPDATE; - start_read <= 'b0; - $display("AFU reading data and pointing to next read address..."); - end - end - - STATE_UPDATE: - begin - // Update the read value to be written back - if (do_update) - begin - state <= STATE_WRITE; - $display("AFU performing comutations on the read values..."); - end - end - - STATE_WRITE: - begin - // Write the updated value to the address - // Point to new address after that - // if done then point to IDLE; else read new values - if (rd_end_of_list) - begin - state <= STATE_IDLE; - $display("AFU done..."); - end - else - begin - if (wr_needed) - begin - state <= STATE_READ; - $display("AFU reading again from read address..."); - end - end - end - endcase - end - end - - - // ========================================================================= - // - // Read logic. - // - // ========================================================================= - - // - // READ REQUEST - // - - // Did a write response just arrive - logic addr_next_valid; - - // Next read address - t_ccip_clAddr addr_next; - - always_ff @(posedge clk) - begin - // Next read address is valid when we have got the write response back - // and channel is not full - //addr_next_valid <= sRx.c0TxAlmFull; - addr_next_valid <= sRx.c1.rspValid; - - // Next address is current address plus address length - // Apurve - //addr_next <= addr_next + addr_size; - addr_next <= addr_next + 0; - - // End of list reached if we have read 10 times - rd_end_of_list <= (cnt_list_length == 'h10); - end - - // - // Since back pressure may prevent an immediate read request, we must - // record whether a read is needed and hold it until the request can - // be sent to the FIU. - // - t_ccip_clAddr rd_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - rd_needed <= 1'b0; - end - else - begin - // If reads are allowed this cycle then we can safely clear - // any previously requested reads. This simple AFU has only - // one read in flight at a time since it is walking a pointer - // chain. - if (rd_needed) - begin - rd_needed <= sRx.c0TxAlmFull; - end - else - begin - // Need a read under two conditions: - // - Starting a new walk - // - A read response just arrived from a line containing - // a next pointer. - rd_needed <= (start_read || (addr_next_valid && ! rd_end_of_list)); - rd_addr <= (start_read ? read_mem_addr : addr_next); - end - end - end - - // - // Emit read requests to the FIU. - // - - // Read header defines the request to the FIU - t_cci_c0_ReqMemHdr rd_hdr; - - always_comb - begin - rd_hdr = t_cci_c0_ReqMemHdr'(0); - - // Read request type - rd_hdr.req_type = eREQ_RDLINE_I; - // Virtual address (MPF virtual addressing is enabled) - rd_hdr.address = rd_addr; - // Let the FIU pick the channel - rd_hdr.vc_sel = eVC_VA; - // Read 4 lines (the size of an entry in the list) - rd_hdr.cl_len = eCL_LEN_4; - end - - // Send read requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c0.valid <= 1'b0; - cnt_list_length <= 0; - end - else - begin - // Generate a read request when needed and the FIU isn't full - sTx.c0.valid <= (rd_needed && ! sRx.c0TxAlmFull); - sTx.c0.hdr <= rd_hdr; - - if (rd_needed && ! sRx.c0TxAlmFull) - begin - cnt_list_length <= cnt_list_length + 1; - //$display(" Reading from VA 0x%x", clAddrToByteAddr(rd_addr)); - $display("Incrementing read count..."); - end - end - end - - // - // READ RESPONSE HANDLING - // - - // - // Receive data (read responses). - // - always_ff @(posedge clk) - begin - if (reset) - begin - do_update <= 1'b0; - end - else - begin - if (state == STATE_READ) - begin - rd_data <= sRx.c0.data; - do_update <= 1'b1; - end - if (state == STATE_UPDATE) - begin - // Update the read data and put it in the write data to be written - wr_data <= rd_data + 1; - do_update <= 1'b0; - end - end - end - - - // ========================================================================= - // - // Write logic. - // - // ========================================================================= - - - // - // WRITE REQUEST - // - - // Did a write response just arrive - logic wr_addr_next_valid; - - // Next write address - t_ccip_clAddr wr_addr_next; - - always_ff @(posedge clk) - begin - // Next write address is valid when we have got the read response back - // and channel is not full - //wr_addr_next_valid <= sRx.c1TxAlmFull; - wr_addr_next_valid <= sRx.c0.rspValid; - - // Next address is current address plus address length - // Apurve - //wr_addr_next <= wr_addr_next + addr_size; - wr_addr_next <= wr_addr_next + 0; - end - - // - // Since back pressure may prevent an immediate write request, we must - // record whether a write is needed and hold it until the request can - // be sent to the FIU. - // - t_ccip_clAddr wr_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - wr_needed <= 1'b0; - end - else - begin - // If writes are allowed this cycle then we can safely clear - // any previously requested writes. This simple AFU has only - // one write in flight at a time since it is walking a pointer - // chain. - if (wr_needed) - begin - wr_needed <= sRx.c1TxAlmFull; - end - else - begin - // Need a write under two conditions: - // - Starting a new walk - // - A write response just arrived from a line containing - // a next pointer. - //wr_needed <= (start_write || (wr_addr_next_valid && ! rd_end_of_list)); - wr_needed <= (start_write || wr_addr_next_valid); - wr_addr <= (start_write ? write_mem_addr : wr_addr_next); - end - end - end - - // - // Emit write requests to the FIU. - // - - // Write header defines the request to the FIU - t_ccip_c1_ReqMemHdr wr_hdr; - - always_comb - begin - wr_hdr = t_cci_c1_ReqMemHdr'(0); - - // Write request type - wr_hdr.req_type = eREQ_RDLINE_I; - // Virtual address (MPF virtual addressing is enabled) - wr_hdr.address = wr_addr; - // Let the FIU pick the channel - wr_hdr.vc_sel = eVC_VA; - // Write 4 lines (the size of an entry in the list) - wr_hdr.cl_len = eCL_LEN_4; - // Start of packet is true (single line write) - wr_hdr.sop = 1'b1; - end - - // Send write requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c1.valid <= 1'b0; - //cnt_list_length <= 0; - end - else - begin - // Generate a write request when needed and the FIU isn't full - sTx.c1.valid <= (wr_needed && ! sRx.c1TxAlmFull); - sTx.c1.hdr <= wr_hdr; - sTx.c1.data = t_ccip_clData'(wr_data); - - //if (wr_needed && ! sRx.c1TxAlmFull) - //begin - // cnt_list_length <= cnt_list_length + 1; - // //$display(" Writing from VA 0x%x", clAddrToByteAddr(rd_addr)); - // $display("Incrementing write count..."); - //end - end - end - - // - // WRITE RESPONSE HANDLING - // - - // Apurve: Check if a signal is to be sent to read to start reading in case - // write response does not work - // - // Send data (write requests). - // - //always_ff @(posedge clk) - //begin - // if (state == STATE_WRITE) - // begin - // rd_data <= sRx.c0.data; - // end - // if (state == STATE_UPDATE) - // begin - // // Update the write data and put it in the write data to be written - // wr_data <= rd_data + 1; - // end - //end - -endmodule diff --git a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello.json b/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello.json deleted file mode 100644 index 85d7a529..00000000 --- a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "version": 1, - "afu-image": { - "power": 0, - "afu-top-interface": - { - "name": "ccip_std_afu" - }, - "accelerator-clusters": - [ - { - "name": "cci_hello", - "total-contexts": 1, - "accelerator-type-uuid": "c6aa954a-9b91-4a37-abc1-1d9f0709dcc3" - } - ] - } -} diff --git a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu.sv b/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu.sv deleted file mode 100644 index eaee72da..00000000 --- a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu.sv +++ /dev/null @@ -1,653 +0,0 @@ -// -// Copyright (c) 2017, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// Neither the name of the Intel Corporation nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - - -// Read from the memory locations first and then write to the memory locations - -`include "platform_if.vh" -`include "afu_json_info.vh" - - -module ccip_std_afu - ( - // CCI-P Clocks and Resets - input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock - input logic pClkDiv2, // 200MHz - CCI-P clock domain. - input logic pClkDiv4, // 100MHz - CCI-P clock domain. - input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock ** - input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock ** - input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset - input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State - input logic pck_cp2af_error, // CCI-P Protocol Error Detected - - // Interface structures - input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port - output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port - ); - - - // - // Run the entire design at the standard CCI-P frequency (400 MHz). - // - logic clk; - assign clk = pClk; - - logic reset; - assign reset = pck_cp2af_softReset; - - logic [511:0] wr_data; - logic [511:0] rd_data; - - logic do_update; - logic start_read; - logic start_write; - logic wr_addr_next_valid; - logic addr_next_valid; - logic rd_end_of_list; - logic rd_needed; - logic wr_needed; - logic read_req; - logic write_req; - logic [15:0] cnt_list_length; - t_ccip_clAddr rd_addr; - t_ccip_clAddr wr_addr; - t_ccip_clAddr addr_next; - t_ccip_clAddr wr_addr_next; - - // ========================================================================= - // - // Register requests. - // - // ========================================================================= - - // - // The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be - // registered. Here we register pck_cp2af_sRx and assign it to sRx. - // We also assign pck_af2cp_sTx to sTx here but don't register it. - // The code below never uses combinational logic to write sTx. - // - - t_if_ccip_Rx sRx; - always_ff @(posedge clk) - begin - sRx <= pck_cp2af_sRx; - end - - t_if_ccip_Tx sTx; - assign pck_af2cp_sTx = sTx; - - - // ========================================================================= - // - // CSR (MMIO) handling. - // - // ========================================================================= - - // The AFU ID is a unique ID for a given program. Here we generated - // one with the "uuidgen" program and stored it in the AFU's JSON file. - // ASE and synthesis setup scripts automatically invoke afu_json_mgr - // to extract the UUID into afu_json_info.vh. - logic [127:0] afu_id = `AFU_ACCEL_UUID; - - // - // A valid AFU must implement a device feature list, starting at MMIO - // address 0. Every entry in the feature list begins with 5 64-bit - // words: a device feature header, two AFU UUID words and two reserved - // words. - // - - // Is a CSR read request active this cycle? - logic is_csr_read; - assign is_csr_read = sRx.c0.mmioRdValid; - - // Is a CSR write request active this cycle? - logic is_csr_write; - assign is_csr_write = sRx.c0.mmioWrValid; - - // The MMIO request header is overlayed on the normal c0 memory read - // response data structure. Cast the c0Rx header to an MMIO request - // header. - t_ccip_c0_ReqMmioHdr mmio_req_hdr; - assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr); - - - // - // Implement the device feature list by responding to MMIO reads. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c2.mmioRdValid <= 1'b0; - end - else - begin - // Always respond with something for every read request - sTx.c2.mmioRdValid <= is_csr_read; - - // The unique transaction ID matches responses to requests - sTx.c2.hdr.tid <= mmio_req_hdr.tid; - - // Addresses are of 32-bit objects in MMIO space. Addresses - // of 64-bit objects are thus multiples of 2. - case (mmio_req_hdr.address) - 0: // AFU DFH (device feature header) - begin - // Here we define a trivial feature list. In this - // example, our AFU is the only entry in this list. - sTx.c2.data <= t_ccip_mmioData'(0); - // Feature type is AFU - sTx.c2.data[63:60] <= 4'h1; - // End of list (last entry in list) - sTx.c2.data[40] <= 1'b1; - end - - // AFU_ID_L - 2: sTx.c2.data <= afu_id[63:0]; - - // AFU_ID_H - 4: sTx.c2.data <= afu_id[127:64]; - - // DFH_RSVD0 - 6: sTx.c2.data <= t_ccip_mmioData'(0); - - // DFH_RSVD1 - 8: sTx.c2.data <= t_ccip_mmioData'(0); - - // Updated by apurve to check fpgaReadMMIO - 10: sTx.c2.data <= t_ccip_mmioData'(start_read); - - default: sTx.c2.data <= t_ccip_mmioData'(0); - endcase - end - end - - - // - // CSR write handling. Host software must tell the AFU the memory address - // to which it should be writing. The address is set by writing a CSR. - // - - // We use MMIO address 0 to set the memory address. The read and - // write MMIO spaces are logically separate so we are free to use - // whatever we like. This may not be good practice for cleanly - // organizing the MMIO address space, but it is legal. - logic is_mem_addr_csr_write; - assign is_mem_addr_csr_write = is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(0)); - - // Memory address to which this AFU will write. - t_ccip_clAddr write_mem_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - start_write <= 1'b0; - end - else if (is_mem_addr_csr_write) - begin - write_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - start_write <= 1'b1; - //$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr)); - end - end - - - // We use MMIO address 8 to set the memory address for reading data. - logic is_mem_addr_csr_read; - assign is_mem_addr_csr_read = is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(2)); - - // Memory address from which this AFU will read. - t_ccip_clAddr read_mem_addr; - - //logic start_traversal = 'b0; - //t_ccip_clAddr start_traversal_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - start_read <= 1'b0; - end - else if (is_mem_addr_csr_read) - begin - read_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - start_read <= 1'b1; - //$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr)); - end - end - - - // ========================================================================= - // - // Main AFU logic - // - // ========================================================================= - - // - // States in our simple example. - // - //typedef enum logic [0:0] - typedef enum logic [1:0] - { - STATE_IDLE, - STATE_READ, - STATE_UPDATE, - STATE_WRITE - } - t_state; - - t_state state; - - // - // State machine - // - always_ff @(posedge clk) - begin - if (reset) - begin - state <= STATE_IDLE; - rd_end_of_list <= 1'b0; - end - else - begin - case (state) - STATE_IDLE: - begin - // Traversal begins when CSR 1 is written - if (start_read) - begin - state <= STATE_READ; - $display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr)); - end - end - - STATE_READ: - begin - $display("AFU in READ..."); - $display("do_update is %d...",do_update); - $display("addr_next_valid is %d...",addr_next_valid); - $display("rd_needed is %d...",rd_needed); - if (!rd_needed && do_update) - begin - state <= STATE_UPDATE; - $display("AFU moving to UPDATE..."); - end - end - - STATE_UPDATE: - begin - // Update the read value to be written back - $display("AFU in UPDATE..."); - if (!do_update) - begin - state <= STATE_WRITE; - wr_needed <= 1'b1; - $display("AFU moving to WRITE..."); - end - end - - STATE_WRITE: - begin - // Write the updated value to the address - // Point to new address after that - // if done then point to IDLE; else read new values - $display("AFU in WRITE..."); - if (rd_end_of_list) - begin - state <= STATE_IDLE; - $display("AFU done..."); - end - else if (!wr_needed) - begin - state <= STATE_READ; - $display("AFU moving to READ from WRITE..."); - start_write <= 1'b0; - write_req <= 1'b0; - end - end - endcase - end - end - - - // ========================================================================= - // - // Read logic. - // - // ========================================================================= - - // - // READ REQUEST - // - - // Did a write response just arrive - - // Next read address - - always_ff @(posedge clk) - begin - // Next read address is valid when we have got the write response back - if (sRx.c1.rspValid) - begin - addr_next_valid <= sRx.c1.rspValid; - - //if (state == STATE_READ && !rd_needed) - //begin - // Apurve: Next address is current address plus address length - //addr_next <= addr_next + addr_size; - addr_next <= (addr_next_valid ? rd_addr + 0 : rd_addr); - - // End of list reached if we have read 5 times - rd_end_of_list <= (cnt_list_length == 'h5); - //end - end - end - - // - // Since back pressure may prevent an immediate read request, we must - // record whether a read is needed and hold it until the request can - // be sent to the FIU. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - rd_needed <= 1'b0; - end - else - begin - // If reads are allowed this cycle then we can safely clear - // any previously requested reads. This simple AFU has only - // one read in flight at a time since it is walking a pointer - // chain. - if (rd_needed) - begin - //rd_needed <= sRx.c0TxAlmFull; - //rd_needed <= (!sRx.c0TxAlmFull && !sRx.c0.rspValid); - rd_needed <= !sRx.c0.rspValid; - end - else if (state == STATE_READ) - begin - // Need a read under two conditions: - // - Starting a new walk - // - A read response just arrived from a line containing - // a next pointer. - rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list))); - rd_addr <= (start_read ? read_mem_addr : addr_next); - //$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr)); - //$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr)); - //$display("start read is %d", start_read); - end - end - end - - // - // Emit read requests to the FIU. - // - - // Read header defines the request to the FIU - t_ccip_c0_ReqMemHdr rd_hdr; - - always_comb - begin - rd_hdr = t_ccip_c0_ReqMemHdr'(0); - - // Read request type (No intention to cache) - //rd_hdr.req_type = 4'h0; - - // Virtual address (MPF virtual addressing is enabled) - rd_hdr.address = rd_addr; - - // Read over channel VA - //rd_hdr.vc_sel = 2'h0; - - // Read one cache line (64 bytes) - //rd_hdr.cl_len = 2'h0; - end - - // Send read requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c0.valid <= 1'b0; - cnt_list_length <= 0; - read_req <= 1'b0; - end - else - begin - // Generate a read request when needed and the FIU isn't full - if (state == STATE_READ) - begin - sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull && !read_req); - - if (rd_needed && !sRx.c0TxAlmFull && !read_req) - begin - sTx.c0.hdr <= rd_hdr; - cnt_list_length <= cnt_list_length + 1; - read_req <= 1'b1; - $display("Incrementing read count...%d",cnt_list_length); - $display("Read address is 0x%x...",rd_hdr.address); - addr_next_valid <= 1'b0; - // Apurve: Add something to stop read once this section has been accessed - //rd_needed <= 1'b0; - end - end - end - end - - // - // READ RESPONSE HANDLING - // - - // - // Receive data (read responses). - // - always_ff @(posedge clk) - begin - if (reset) - begin - do_update <= 1'b0; - end - else - begin - if (!do_update && sRx.c0.rspValid) - begin - rd_data <= sRx.c0.data; - do_update <= 1'b1; - $display("rd data is %d...",rd_data); - end - - if ((state == STATE_UPDATE) && (do_update == 1'b1)) - begin - // Update the read data and put it in the write data to be written - wr_data <= rd_data + 2; - do_update <= 1'b0; - read_req <= 1'b0; - $display("write data is %d...",wr_data); - - // First read done. Next reads should be from the updated addresses - start_read <= 1'b0; - end - end - end - - - // ========================================================================= - // - // Write logic. - // - // ========================================================================= - - - // - // WRITE REQUEST - // - - // Did a write response just arrive - - // Next write address - - always_ff @(posedge clk) - begin - if (sRx.c0.rspValid) - begin - // Next write address is valid when we have got the read response back - wr_addr_next_valid <= sRx.c0.rspValid; - //wr_addr_next_valid <= (!start_write && sRx.c0.rspValid); - - //if (state == STATE_WRITE && !wr_needed) - //begin - // Apurve: Next address is current address plus address length - //wr_addr_next <= wr_addr + 0; - wr_addr_next <= (wr_addr_next_valid ? wr_addr + 0 : wr_addr); - //end - end - end - - // - // Since back pressure may prevent an immediate write request, we must - // record whether a write is needed and hold it until the request can - // be sent to the FIU. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - wr_needed <= 1'b0; - end - else - begin - // If writes are allowed this cycle then we can safely clear - // any previously requested writes. This simple AFU has only - // one write in flight at a time since it is walking a pointer - // chain. - if (wr_needed) - begin - //wr_needed <= sRx.c1TxAlmFull; - //wr_needed <= (!sRx.c1TxAlmFull && !sRx.c1.rspValid); - wr_needed <= !sRx.c1.rspValid; - end - else - begin - // Need a write under two conditions: - // - Starting a new walk - // - A write response just arrived from a line containing - // a next pointer. - wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid)); - wr_addr <= (start_write ? write_mem_addr : wr_addr_next); - //$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr)); - end - end - end - - // - // Emit write requests to the FIU. - // - - // Write header defines the request to the FIU - t_ccip_c1_ReqMemHdr wr_hdr; - - always_comb - begin - wr_hdr = t_ccip_c1_ReqMemHdr'(0); - - // Write request type - //wr_hdr.req_type = 4'h0; - - // Virtual address (MPF virtual addressing is enabled) - wr_hdr.address = wr_addr; - - // Let the FIU pick the channel - //wr_hdr.vc_sel = 2'h2; - - // Write 1 cache line (64 bytes) - //wr_hdr.cl_len = 2'h0; - - // Start of packet is true (single line write) - wr_hdr.sop = 1'b1; - end - - // Send write requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c1.valid <= 1'b0; - write_req <= 1'b0; - end - else - begin - // Generate a write request when needed and the FIU isn't full - if (state == STATE_WRITE) - begin - sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull && !write_req); - if (wr_needed && !sRx.c1TxAlmFull && !write_req) - begin - sTx.c1.hdr <= wr_hdr; - sTx.c1.data <= t_ccip_clData'(wr_data); - write_req <= 1'b1; - wr_addr_next_valid <= 1'b0; - $display("Write address is 0x%x...", wr_hdr.address); - end - end - end - end - - - // - // WRITE RESPONSE HANDLING - // - - // Apurve: Check if a signal is to be sent to read to start reading in case - // write response does not work - // - // Send data (write requests). - // - //always_ff @(posedge clk) - //begin - // if (state == STATE_WRITE) - // begin - // rd_data <= sRx.c0.data; - // end - // if (state == STATE_UPDATE) - // begin - // // Update the write data and put it in the write data to be written - // wr_data <= rd_data + 1; - // end - //end - -endmodule diff --git a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu_working.sv b/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu_working.sv deleted file mode 100644 index 144b430e..00000000 --- a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu_working.sv +++ /dev/null @@ -1,621 +0,0 @@ -// -// Copyright (c) 2017, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// Neither the name of the Intel Corporation nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - - -// Read from the memory locations first and then write to the memory locations - -`include "platform_if.vh" -`include "afu_json_info.vh" - - -module ccip_std_afu - ( - // CCI-P Clocks and Resets - input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock - input logic pClkDiv2, // 200MHz - CCI-P clock domain. - input logic pClkDiv4, // 100MHz - CCI-P clock domain. - input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock ** - input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock ** - input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset - input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State - input logic pck_cp2af_error, // CCI-P Protocol Error Detected - - // Interface structures - input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port - output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port - ); - - - // - // Run the entire design at the standard CCI-P frequency (400 MHz). - // - logic clk; - assign clk = pClk; - - logic reset; - assign reset = pck_cp2af_softReset; - - logic [511:0] wr_data; - logic [511:0] rd_data; - - logic do_update; - logic start_read; - logic start_write; - logic wr_addr_next_valid; - logic addr_next_valid; - logic rd_end_of_list; - logic rd_needed; - logic wr_needed; - logic [15:0] cnt_list_length; - t_ccip_clAddr rd_addr; - t_ccip_clAddr wr_addr; - t_ccip_clAddr addr_next; - t_ccip_clAddr wr_addr_next; - - // ========================================================================= - // - // Register requests. - // - // ========================================================================= - - // - // The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be - // registered. Here we register pck_cp2af_sRx and assign it to sRx. - // We also assign pck_af2cp_sTx to sTx here but don't register it. - // The code below never uses combinational logic to write sTx. - // - - t_if_ccip_Rx sRx; - always_ff @(posedge clk) - begin - sRx <= pck_cp2af_sRx; - end - - t_if_ccip_Tx sTx; - assign pck_af2cp_sTx = sTx; - - - // ========================================================================= - // - // CSR (MMIO) handling. - // - // ========================================================================= - - // The AFU ID is a unique ID for a given program. Here we generated - // one with the "uuidgen" program and stored it in the AFU's JSON file. - // ASE and synthesis setup scripts automatically invoke afu_json_mgr - // to extract the UUID into afu_json_info.vh. - logic [127:0] afu_id = `AFU_ACCEL_UUID; - - // - // A valid AFU must implement a device feature list, starting at MMIO - // address 0. Every entry in the feature list begins with 5 64-bit - // words: a device feature header, two AFU UUID words and two reserved - // words. - // - - // Is a CSR read request active this cycle? - logic is_csr_read; - assign is_csr_read = sRx.c0.mmioRdValid; - - // Is a CSR write request active this cycle? - logic is_csr_write; - assign is_csr_write = sRx.c0.mmioWrValid; - - // The MMIO request header is overlayed on the normal c0 memory read - // response data structure. Cast the c0Rx header to an MMIO request - // header. - t_ccip_c0_ReqMmioHdr mmio_req_hdr; - assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr); - - - // - // Implement the device feature list by responding to MMIO reads. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c2.mmioRdValid <= 1'b0; - end - else - begin - // Always respond with something for every read request - sTx.c2.mmioRdValid <= is_csr_read; - - // The unique transaction ID matches responses to requests - sTx.c2.hdr.tid <= mmio_req_hdr.tid; - - // Addresses are of 32-bit objects in MMIO space. Addresses - // of 64-bit objects are thus multiples of 2. - case (mmio_req_hdr.address) - 0: // AFU DFH (device feature header) - begin - // Here we define a trivial feature list. In this - // example, our AFU is the only entry in this list. - sTx.c2.data <= t_ccip_mmioData'(0); - // Feature type is AFU - sTx.c2.data[63:60] <= 4'h1; - // End of list (last entry in list) - sTx.c2.data[40] <= 1'b1; - end - - // AFU_ID_L - 2: sTx.c2.data <= afu_id[63:0]; - - // AFU_ID_H - 4: sTx.c2.data <= afu_id[127:64]; - - // DFH_RSVD0 - 6: sTx.c2.data <= t_ccip_mmioData'(0); - - // DFH_RSVD1 - 8: sTx.c2.data <= t_ccip_mmioData'(0); - - // Updated by apurve to check fpgaReadMMIO - 10: sTx.c2.data <= t_ccip_mmioData'(start_read); - - default: sTx.c2.data <= t_ccip_mmioData'(0); - endcase - end - end - - - // - // CSR write handling. Host software must tell the AFU the memory address - // to which it should be writing. The address is set by writing a CSR. - // - - // We use MMIO address 0 to set the memory address. The read and - // write MMIO spaces are logically separate so we are free to use - // whatever we like. This may not be good practice for cleanly - // organizing the MMIO address space, but it is legal. - logic is_mem_addr_csr_write; - assign is_mem_addr_csr_write = is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(0)); - - // Memory address to which this AFU will write. - t_ccip_clAddr write_mem_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - start_write <= 1'b0; - end - else if (is_mem_addr_csr_write) - begin - write_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - start_write <= 1'b1; - //$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr)); - end - end - - - // We use MMIO address 8 to set the memory address for reading data. - logic is_mem_addr_csr_read; - assign is_mem_addr_csr_read = is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(2)); - - // Memory address from which this AFU will read. - t_ccip_clAddr read_mem_addr; - - //logic start_traversal = 'b0; - //t_ccip_clAddr start_traversal_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - start_read <= 1'b0; - end - else if (is_mem_addr_csr_read) - begin - read_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - start_read <= 1'b1; - //$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr)); - end - end - - - // ========================================================================= - // - // Main AFU logic - // - // ========================================================================= - - // - // States in our simple example. - // - //typedef enum logic [0:0] - typedef enum logic [1:0] - { - STATE_IDLE, - STATE_READ, - STATE_UPDATE, - STATE_WRITE - } - t_state; - - t_state state; - - // - // State machine - // - always_ff @(posedge clk) - begin - if (reset) - begin - state <= STATE_IDLE; - rd_end_of_list <= 1'b0; - end - else - begin - case (state) - STATE_IDLE: - begin - // Traversal begins when CSR 1 is written - if (start_read) - begin - state <= STATE_READ; - $display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr)); - end - end - - STATE_READ: - begin - $display("AFU in READ..."); - if (!rd_needed && do_update) - begin - state <= STATE_UPDATE; - $display("AFU moving to UPDATE..."); - end - end - - STATE_UPDATE: - begin - // Update the read value to be written back - $display("AFU in UPDATE..."); - if (!do_update) - begin - state <= STATE_WRITE; - wr_needed <= 1'b1; - $display("AFU moving to WRITE..."); - end - end - - STATE_WRITE: - begin - // Write the updated value to the address - // Point to new address after that - // if done then point to IDLE; else read new values - $display("AFU in WRITE..."); - if (rd_end_of_list) - begin - state <= STATE_IDLE; - $display("AFU done..."); - end - else if (!wr_needed) - begin - state <= STATE_READ; - $display("AFU moving to READ from WRITE..."); - start_write <= 1'b0; - end - end - endcase - end - end - - - // ========================================================================= - // - // Read logic. - // - // ========================================================================= - - // - // READ REQUEST - // - - // Did a write response just arrive - - // Next read address - - always_ff @(posedge clk) - begin - // Next read address is valid when we have got the write response back - addr_next_valid <= sRx.c1.rspValid; - - // Apurve: Next address is current address plus address length - //addr_next <= addr_next + addr_size; - addr_next <= rd_addr + 0; - - // End of list reached if we have read 5 times - rd_end_of_list <= (cnt_list_length == 'h5); - end - - // - // Since back pressure may prevent an immediate read request, we must - // record whether a read is needed and hold it until the request can - // be sent to the FIU. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - rd_needed <= 1'b0; - end - else - begin - // If reads are allowed this cycle then we can safely clear - // any previously requested reads. This simple AFU has only - // one read in flight at a time since it is walking a pointer - // chain. - if (rd_needed) - begin - rd_needed <= sRx.c0TxAlmFull; - end - else - begin - // Need a read under two conditions: - // - Starting a new walk - // - A read response just arrived from a line containing - // a next pointer. - rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list))); - rd_addr <= (start_read ? read_mem_addr : addr_next); - //$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr)); - //$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr)); - //$display("start read is %d", start_read); - end - end - end - - // - // Emit read requests to the FIU. - // - - // Read header defines the request to the FIU - t_ccip_c0_ReqMemHdr rd_hdr; - - always_comb - begin - rd_hdr = t_ccip_c0_ReqMemHdr'(0); - - // Read request type (No intention to cache) - //rd_hdr.req_type = 4'h0; - - // Virtual address (MPF virtual addressing is enabled) - rd_hdr.address = rd_addr; - - // Read over channel VA - //rd_hdr.vc_sel = 2'h0; - - // Read one cache line (64 bytes) - //rd_hdr.cl_len = 2'h0; - end - - // Send read requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c0.valid <= 1'b0; - cnt_list_length <= 0; - end - else - begin - // Generate a read request when needed and the FIU isn't full - if (state == STATE_READ) - begin - sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull); - - if (rd_needed && !sRx.c0TxAlmFull) - begin - sTx.c0.hdr <= rd_hdr; - cnt_list_length <= cnt_list_length + 1; - $display("Incrementing read count...%d",cnt_list_length); - $display("Read address is 0x%x...",rd_hdr.address); - // Apurve: Add something to stop read once this section has been accessed - end - end - end - end - - // - // READ RESPONSE HANDLING - // - - // - // Receive data (read responses). - // - always_ff @(posedge clk) - begin - if (reset) - begin - do_update <= 1'b0; - end - else - begin - if (sRx.c0.rspValid) - begin - rd_data <= sRx.c0.data; - do_update <= 1'b1; - //$display("rd data is %d...",rd_data); - end - - if (state == STATE_UPDATE) - begin - // Update the read data and put it in the write data to be written - wr_data <= rd_data + 2; - do_update <= 1'b0; - $display("write data is %d...",wr_data); - - // First read done. Next reads should be from the updated addresses - start_read <= 1'b0; - end - end - end - - - // ========================================================================= - // - // Write logic. - // - // ========================================================================= - - - // - // WRITE REQUEST - // - - // Did a write response just arrive - - // Next write address - - always_ff @(posedge clk) - begin - // Next write address is valid when we have got the read response back - wr_addr_next_valid <= sRx.c0.rspValid; - - // Apurve: Next address is current address plus address length - wr_addr_next <= wr_addr + 0; - - end - - // - // Since back pressure may prevent an immediate write request, we must - // record whether a write is needed and hold it until the request can - // be sent to the FIU. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - wr_needed <= 1'b0; - end - else - begin - // If writes are allowed this cycle then we can safely clear - // any previously requested writes. This simple AFU has only - // one write in flight at a time since it is walking a pointer - // chain. - if (wr_needed) - begin - wr_needed <= sRx.c1TxAlmFull; - end - else - begin - // Need a write under two conditions: - // - Starting a new walk - // - A write response just arrived from a line containing - // a next pointer. - wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid)); - wr_addr <= (start_write ? write_mem_addr : wr_addr_next); - //$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr)); - end - end - end - - // - // Emit write requests to the FIU. - // - - // Write header defines the request to the FIU - t_ccip_c1_ReqMemHdr wr_hdr; - - always_comb - begin - wr_hdr = t_ccip_c1_ReqMemHdr'(0); - - // Write request type - //wr_hdr.req_type = 4'h0; - - // Virtual address (MPF virtual addressing is enabled) - wr_hdr.address = wr_addr; - - // Let the FIU pick the channel - //wr_hdr.vc_sel = 2'h2; - - // Write 1 cache line (64 bytes) - //wr_hdr.cl_len = 2'h0; - - // Start of packet is true (single line write) - wr_hdr.sop = 1'b1; - end - - // Send write requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c1.valid <= 1'b0; - end - else - begin - // Generate a write request when needed and the FIU isn't full - if (state == STATE_WRITE) - begin - sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull); - if (wr_needed && !sRx.c1TxAlmFull) - begin - sTx.c1.hdr <= wr_hdr; - sTx.c1.data <= t_ccip_clData'(wr_data); - end - end - end - end - - - // - // WRITE RESPONSE HANDLING - // - - // Apurve: Check if a signal is to be sent to read to start reading in case - // write response does not work - // - // Send data (write requests). - // - //always_ff @(posedge clk) - //begin - // if (state == STATE_WRITE) - // begin - // rd_data <= sRx.c0.data; - // end - // if (state == STATE_UPDATE) - // begin - // // Update the write data and put it in the write data to be written - // wr_data <= rd_data + 1; - // end - //end - -endmodule diff --git a/driver/tests/dogfood/Memcpy/hw/rtl/sources.txt b/driver/tests/dogfood/Memcpy/hw/rtl/sources.txt deleted file mode 100644 index 8a73008b..00000000 --- a/driver/tests/dogfood/Memcpy/hw/rtl/sources.txt +++ /dev/null @@ -1,2 +0,0 @@ -cci_hello.json -cci_hello_afu.sv diff --git a/driver/tests/dogfood/Memcpy/hw/sim/setup_ase b/driver/tests/dogfood/Memcpy/hw/sim/setup_ase deleted file mode 100755 index a8414ac0..00000000 --- a/driver/tests/dogfood/Memcpy/hw/sim/setup_ase +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -## -## Setup ASE environment using ../rtl/sources.txt. -## - -# Absolute path to this script -SCRIPT=$(readlink -f "$0") -SCRIPT_PATH=$(dirname "$SCRIPT") - -afu_sim_setup --sources="${SCRIPT_PATH}/../rtl/sources.txt" $@ diff --git a/driver/tests/dogfood/Memcpy/sw/Makefile b/driver/tests/dogfood/Memcpy/sw/Makefile deleted file mode 100644 index f3b66c12..00000000 --- a/driver/tests/dogfood/Memcpy/sw/Makefile +++ /dev/null @@ -1,41 +0,0 @@ -include ../../common/sw/common_include.mk - -# Primary test name -TEST = cci_hello - -# Build directory -OBJDIR = obj -CFLAGS += -I./$(OBJDIR) -CPPFLAGS += -I./$(OBJDIR) - -# Files and folders -SRCS = $(TEST).c -OBJS = $(addprefix $(OBJDIR)/,$(patsubst %.c,%.o,$(SRCS))) - -# Targets (build only $(TEST)_ase by default) -all: $(TEST) $(TEST)_ase - -# AFU info from JSON file, including AFU UUID -AFU_JSON_INFO = $(OBJDIR)/afu_json_info.h - -$(AFU_JSON_INFO): ../hw/rtl/$(TEST).json | objdir - afu_json_mgr json-info --afu-json=$^ --c-hdr=$@ - -$(OBJS): $(AFU_JSON_INFO) - -$(TEST): $(OBJS) - $(CC) -o $@ $^ $(LDFLAGS) $(FPGA_LIBS) - -$(TEST)_ase: $(OBJS) - $(CC) -o $@ $^ $(LDFLAGS) $(ASE_LIBS) - -$(OBJDIR)/%.o: %.c | objdir - $(CC) $(CFLAGS) -c $< -o $@ - -clean: - rm -rf $(TEST) $(TEST)_ase $(OBJDIR) - -objdir: - @mkdir -p $(OBJDIR) - -.PHONY: all clean diff --git a/driver/tests/dogfood/Memcpy/sw/cci_hello.c b/driver/tests/dogfood/Memcpy/sw/cci_hello.c deleted file mode 100644 index f12d95c2..00000000 --- a/driver/tests/dogfood/Memcpy/sw/cci_hello.c +++ /dev/null @@ -1,210 +0,0 @@ -// -// Copyright (c) 2017, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// Neither the name of the Intel Corporation nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -#include -#include -#include -#include -#include -#include - -#include - -// State from the AFU's JSON file, extracted using OPAE's afu_json_mgr script -#include "afu_json_info.h" - -#define CACHELINE_BYTES 64 -#define CL(x) ((x) * CACHELINE_BYTES) - - -// -// Search for an accelerator matching the requested UUID and connect to it. -// -static fpga_handle connect_to_accel(const char *accel_uuid) -{ - fpga_properties filter = NULL; - fpga_guid guid; - fpga_token accel_token; - uint32_t num_matches; - fpga_handle accel_handle; - fpga_result r; - - // Don't print verbose messages in ASE by default - //setenv("ASE_LOG", "0", 0); - - // Set up a filter that will search for an accelerator - fpgaGetProperties(NULL, &filter); - fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR); - - // Add the desired UUID to the filter - uuid_parse(accel_uuid, guid); - fpgaPropertiesSetGUID(filter, guid); - - // Do the search across the available FPGA contexts - num_matches = 1; - fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches); - - // Not needed anymore - fpgaDestroyProperties(&filter); - - if (num_matches < 1) - { - fprintf(stderr, "Accelerator %s not found!\n", accel_uuid); - return 0; - } - - // Open accelerator - r = fpgaOpen(accel_token, &accel_handle, 0); - assert(FPGA_OK == r); - - // Done with token - fpgaDestroyToken(&accel_token); - - return accel_handle; -} - - -// -// Allocate a buffer in I/O memory, shared with the FPGA. -// -static volatile void* alloc_buffer(fpga_handle accel_handle, - ssize_t size, - uint64_t *wsid, - uint64_t *io_addr) -{ - fpga_result r; - volatile void* buf; - - r = fpgaPrepareBuffer(accel_handle, size, (void*)&buf, wsid, 0); - if (FPGA_OK != r) return NULL; - - // Get the physical address of the buffer in the accelerator - r = fpgaGetIOAddress(accel_handle, *wsid, io_addr); - assert(FPGA_OK == r); - - return buf; -} - - -int main(int argc, char *argv[]) -{ - fpga_handle accel_handle; - volatile char *buf; - volatile char *buf_r; - uint64_t wsid1; - uint64_t wsid2; - uint64_t buf_pa; - uint64_t ret_buf_pa; - uint64_t buf_rpa; - uint64_t ret_buf_rpa; - fpga_result r; - - // Find and connect to the accelerator - accel_handle = connect_to_accel(AFU_ACCEL_UUID); - - // Allocate a single page memory buffer for write - buf = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(), - &wsid1, &buf_pa); - // Allocate a single page memory buffer for read - buf_r = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(), - &wsid2, &buf_rpa); - assert(NULL != buf); - - //// Set the low byte of the shared buffer to 0. The FPGA will write - //// a non-zero value to it. - //buf[0] = 0; - - // Set the low byte of the shared buffer buf_r to 0. The FPGA will read - // the values and write to buf address - buf[0] = 5; - buf_r[0] = 5; - - // Tell the accelerator the address of the buffer using cache line - // addresses. The accelerator will respond by writing to the buffer. - r = fpgaWriteMMIO64(accel_handle, 0, 0, buf_pa / CL(1)); - printf("Write address is %08lx\n", buf_pa); - printf("Write address div 64 is %08lx\n", buf_pa/ CL(1)); - assert(FPGA_OK == r); - - // Wait for response from FPGA. Check using fpgaReadMMIO - //r = fpgaReadMMIO64(accel_handle, 0, 0, &ret_buf_pa); - //printf("Returned write is %08lx\n", ret_buf_pa); - //assert(FPGA_OK == r); - -///////////////////// Added to check fpgaRead - // Wait for response from FPGA. Check using fpgaReadMMIO - r = fpgaReadMMIO64(accel_handle, 0, 5 * sizeof(uint64_t), &ret_buf_rpa); - printf("Returned read at 10 is %08lx\n", ret_buf_rpa); - assert(FPGA_OK == r); -/////////////////////////////////////////////// - - - // Tell the accelerator the address of the buffer using cache line - // addresses. The accelerator will read from the buffer. - // Write the address to MMIO 1 - r = fpgaWriteMMIO64(accel_handle, 0, sizeof(uint64_t), buf_rpa / CL(1)); - printf("Read address is %08lx\n", buf_rpa); - printf("Read address div64 is %08lx\n", buf_rpa / CL(1)); - assert(FPGA_OK == r); - - // Wait for response from FPGA. Check using fpgaReadMMIO - //r = fpgaReadMMIO64(accel_handle, 0, sizeof(uint64_t), &ret_buf_rpa); - //printf("Returned write is %08lx\n", ret_buf_rpa); - //assert(FPGA_OK == r); - - - - - - - - - // Update this - // Spin, waiting for the value in memory to change to something non-zero. - while (5 == buf[0]) - { - // A well-behaved program would use _mm_pause(), nanosleep() or - // equivalent to save power here. - }; - - // Print the string written by the FPGA - printf("%d\n", buf[0]); - - do { - //printf("%d\n", buf[0]); - } while (10 != buf[0]); - - // Done - fpgaReleaseBuffer(accel_handle, wsid1); - fpgaReleaseBuffer(accel_handle, wsid2); - fpgaClose(accel_handle); - - return 0; -} diff --git a/driver/tests/dogfood/Memcpy/sw/obj/afu_json_info.h b/driver/tests/dogfood/Memcpy/sw/obj/afu_json_info.h deleted file mode 100644 index e16a5349..00000000 --- a/driver/tests/dogfood/Memcpy/sw/obj/afu_json_info.h +++ /dev/null @@ -1,13 +0,0 @@ -// -// Generated by afu_json_mgr from ../hw/rtl/cci_hello.json -// - -#ifndef __AFU_JSON_INFO__ -#define __AFU_JSON_INFO__ - -#define AFU_ACCEL_NAME "cci_hello" -#define AFU_ACCEL_UUID "C6AA954A-9B91-4A37-ABC1-1D9F0709DCC3" -#define AFU_IMAGE_POWER 0 -#define AFU_TOP_IFC "ccip_std_afu" - -#endif // __AFU_JSON_INFO__ diff --git a/driver/tests/dogfood/Memcpy/sw/obj/cci_hello.o b/driver/tests/dogfood/Memcpy/sw/obj/cci_hello.o deleted file mode 100644 index a6d79f49..00000000 Binary files a/driver/tests/dogfood/Memcpy/sw/obj/cci_hello.o and /dev/null differ diff --git a/hw/rtl/VX_commit.v b/hw/rtl/VX_commit.v index d822dc59..d4c246c3 100644 --- a/hw/rtl/VX_commit.v +++ b/hw/rtl/VX_commit.v @@ -38,7 +38,7 @@ module VX_commit #( .count (num_commits) ); - assign cmt_to_csr_if.valid = (| commited_mask); + assign cmt_to_csr_if.valid = (| commited_mask); assign cmt_to_csr_if.warp_num = cmt_to_issue_if.fpu_data.warp_num; assign cmt_to_csr_if.num_commits = num_commits; @@ -46,16 +46,16 @@ module VX_commit #( integer i; - reg [`FFG_BITS-1:0] fflags; + fflags_t fflags; always @(*) begin fflags = 0; for (i = 0; i < `NUM_THREADS; i++) begin if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin - fflags[0] |= fpu_commit_if.fflags[i][0]; - fflags[1] |= fpu_commit_if.fflags[i][1]; - fflags[2] |= fpu_commit_if.fflags[i][2]; - fflags[3] |= fpu_commit_if.fflags[i][3]; - fflags[4] |= fpu_commit_if.fflags[i][4]; + fflags.NX |= fpu_commit_if.fflags[i].NX; + fflags.UF |= fpu_commit_if.fflags[i].UF; + fflags.OF |= fpu_commit_if.fflags[i].OF; + fflags.DZ |= fpu_commit_if.fflags[i].DZ; + fflags.NV |= fpu_commit_if.fflags[i].NV; end end end diff --git a/hw/rtl/VX_csr_data.v b/hw/rtl/VX_csr_data.v index be090d26..71971aa4 100644 --- a/hw/rtl/VX_csr_data.v +++ b/hw/rtl/VX_csr_data.v @@ -71,8 +71,8 @@ module VX_csr_data #( `CSR_PMPADDR0: csr_pmpaddr[0] <= write_data; default: begin - assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr); - end + assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr); + end endcase end end diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 8c684ddf..8881413b 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -38,7 +38,7 @@ /////////////////////////////////////////////////////////////////////////////// -`define LATENCY_IDIV 22 +`define LATENCY_IDIV 23 `define LATENCY_IMUL 2 `define LATENCY_FDIV 16 @@ -201,13 +201,6 @@ `define FRM_DYN 3'b111 // dynamic mode `define FRM_BITS 3 -`define FFG_NX 0 // inexact -`define FFG_UF 1 // underflow -`define FFG_OF 2 // overflow -`define FFG_DZ 3 // division by zero -`define FFG_NV 4 // invalid -`define FFG_BITS 5 - `define GPU_TMC 3'h0 `define GPU_WSPAWN 3'h1 `define GPU_SPLIT 3'h2 @@ -440,4 +433,14 @@ typedef struct packed { logic is_quiet; } fp_type_t; +typedef struct packed { + logic NV; // Invalid + logic DZ; // Divide by zero + logic OF; // Overflow + logic UF; // Underflow + logic NX; // Inexact +} fflags_t; + +`define FFG_BITS $bits(fflags_t) + `endif diff --git a/hw/rtl/VX_issue.v b/hw/rtl/VX_issue.v index 7bf96889..3c1b9171 100644 --- a/hw/rtl/VX_issue.v +++ b/hw/rtl/VX_issue.v @@ -28,13 +28,16 @@ module VX_issue #( wire [`ISTAG_BITS-1:0] issue_tag, issue_tmp_tag; - wire gpr_busy = ~gpr_read_if.in_ready; - wire alu_busy = ~alu_req_if.ready; - wire lsu_busy = ~lsu_req_if.ready; - wire csr_busy = ~csr_req_if.ready; - wire mul_busy = ~mul_req_if.ready; - wire fpu_busy = ~mul_req_if.ready; - wire gpu_busy = ~gpu_req_if.ready; + wire schedule_delay; + + wire gpr_busy = ~gpr_read_if.in_ready; + + wire ex_busy = (~alu_req_if.ready && (decode_if.ex_type == `EX_ALU)) + || (~lsu_req_if.ready && (decode_if.ex_type == `EX_LSU)) + || (~csr_req_if.ready && (decode_if.ex_type == `EX_CSR)) + || (~mul_req_if.ready && (decode_if.ex_type == `EX_MUL)) + || (~fpu_req_if.ready && (decode_if.ex_type == `EX_FPU)) + || (~gpu_req_if.ready && (decode_if.ex_type == `EX_GPU)); VX_scheduler #( .CORE_ID(CORE_ID) @@ -44,14 +47,10 @@ module VX_issue #( .decode_if (decode_if), .writeback_if (writeback_if), .cmt_to_issue_if(cmt_to_issue_if), - .gpr_busy (gpr_busy), - .alu_busy (alu_busy), - .lsu_busy (lsu_busy), - .csr_busy (csr_busy), - .mul_busy (mul_busy), - .fpu_busy (fpu_busy), - .gpu_busy (gpu_busy), - .issue_tag (issue_tag) + .ex_busy (ex_busy), + .gpr_busy (gpr_busy), + .issue_tag (issue_tag), + .schedule_delay (schedule_delay) ); VX_gpr_stage #( @@ -66,8 +65,8 @@ module VX_issue #( VX_decode_if decode_tmp_if(); VX_gpr_read_if gpr_read_tmp_if(); - wire stall = ~alu_req_if.ready || ~decode_if.ready; - wire flush = alu_req_if.ready && ~decode_if.ready; + wire stall = schedule_delay; + wire flush = schedule_delay && ~ex_busy; VX_generic_register #( .N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32)) @@ -80,17 +79,19 @@ module VX_issue #( .out ({decode_tmp_if.valid, issue_tmp_tag, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, gpr_read_tmp_if.rs1_data, gpr_read_tmp_if.rs2_data, gpr_read_tmp_if.rs3_data}) ); + assign decode_if.ready = ~stall; + VX_issue_demux issue_demux ( - .decode_if (decode_tmp_if), - .gpr_read_if (gpr_read_tmp_if), - .issue_tag (issue_tmp_tag), - .alu_req_if (alu_req_if), - .lsu_req_if (lsu_req_if), - .csr_req_if (csr_req_if), - .mul_req_if (mul_req_if), - .fpu_req_if (fpu_req_if), - .gpu_req_if (gpu_req_if) - ); + .decode_if (decode_tmp_if), + .gpr_read_if(gpr_read_tmp_if), + .issue_tag (issue_tmp_tag), + .alu_req_if (alu_req_if), + .lsu_req_if (lsu_req_if), + .csr_req_if (csr_req_if), + .mul_req_if (mul_req_if), + .fpu_req_if (fpu_req_if), + .gpu_req_if (gpu_req_if) + ); `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin diff --git a/hw/rtl/VX_scheduler.v b/hw/rtl/VX_scheduler.v index d922e768..d5c9ed08 100644 --- a/hw/rtl/VX_scheduler.v +++ b/hw/rtl/VX_scheduler.v @@ -8,64 +8,52 @@ module VX_scheduler #( VX_decode_if decode_if, VX_wb_if writeback_if, - VX_cmt_to_issue_if cmt_to_issue_if, + VX_cmt_to_issue_if cmt_to_issue_if, + input wire ex_busy, input wire gpr_busy, - input wire alu_busy, - input wire lsu_busy, - input wire csr_busy, - input wire mul_busy, - input wire fpu_busy, - input wire gpu_busy, - output wire [`ISTAG_BITS-1:0] issue_tag + output wire [`ISTAG_BITS-1:0] issue_tag, + output wire schedule_delay ); localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1); - reg [`NUM_THREADS-1:0] inuse_registers [`NUM_WARPS-1:0][`NUM_REGS-1:0]; + reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0]; reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0]; wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[decode_if.warp_num] & decode_if.reg_use_mask; wire inuse_hazard = (inuse_mask != 0); - wire exu_stalled = (alu_busy && (decode_if.ex_type == `EX_ALU)) - || (lsu_busy && (decode_if.ex_type == `EX_LSU)) - || (csr_busy && (decode_if.ex_type == `EX_CSR)) - || (mul_busy && (decode_if.ex_type == `EX_MUL)) - || (fpu_busy && (decode_if.ex_type == `EX_FPU)) - || (gpu_busy && (decode_if.ex_type == `EX_GPU)); - wire issue_buf_full; - wire stall = (gpr_busy || exu_stalled || inuse_hazard || issue_buf_full) && decode_if.valid; + wire stall = gpr_busy || ex_busy || inuse_hazard || issue_buf_full; - wire acquire_rd = decode_if.valid && (decode_if.wb != 0) && ~stall; + wire issue_fire = decode_if.valid && ~stall; + + wire acquire_rd = issue_fire && (decode_if.wb != 0); wire release_rd = writeback_if.valid; - wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.thread_mask; + wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[{writeback_if.warp_num, writeback_if.rd}] & ~writeback_if.thread_mask; always @(posedge clk) begin if (reset) begin - integer i, w; - for (w = 0; w < `NUM_WARPS; w++) begin - for (i = 0; i < `NUM_REGS; i++) begin - inuse_registers[w][i] <= 0; + for (integer w = 0; w < `NUM_WARPS; w++) begin + for (integer i = 0; i < `NUM_REGS; i++) begin + inuse_registers[w * `NUM_REGS + i] <= 0; end inuse_reg_mask[w] <= `NUM_REGS'(0); end end else begin if (acquire_rd) begin - inuse_registers[decode_if.warp_num][decode_if.rd] <= decode_if.thread_mask; + inuse_registers[{decode_if.warp_num, decode_if.rd}] <= decode_if.thread_mask; inuse_reg_mask[decode_if.warp_num][decode_if.rd] <= 1; end if (release_rd) begin assert(inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] != 0); - inuse_registers[writeback_if.warp_num][writeback_if.rd] <= inuse_registers_n; + inuse_registers[{writeback_if.warp_num, writeback_if.rd}] <= inuse_registers_n; inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] <= (| inuse_registers_n); end end end - wire issue_fire = decode_if.valid && ~stall; - VX_cam_buffer #( .DATAW ($bits(issue_data_t)), .SIZE (`ISSUEQ_SIZE), @@ -82,14 +70,14 @@ module VX_scheduler #( .full (issue_buf_full) ); - assign decode_if.ready = ~stall; + assign schedule_delay = stall; `ifdef DBG_PRINT_PIPELINE always @(posedge clk) begin - if (stall) begin - $display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, gpr=%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b", - $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full, inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], - inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], gpr_busy, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy); + if (decode_if.valid && stall) begin + $display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, ex_busy=%b, gpr_busy=%b", + $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full, + inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], ex_busy, gpr_busy); end end `endif diff --git a/hw/rtl/VX_writeback.v b/hw/rtl/VX_writeback.v index 60e86440..2da93479 100644 --- a/hw/rtl/VX_writeback.v +++ b/hw/rtl/VX_writeback.v @@ -18,113 +18,131 @@ module VX_writeback #( // outputs VX_wb_if writeback_if ); + reg [`ISSUEQ_SIZE-1:0] wb_valid_table, wb_valid_table_n; + reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0][31:0] wb_data_table, wb_data_table_n; + reg [`ISSUEQ_SIZE-1:0][`NW_BITS-1:0] wb_warp_num_table, wb_warp_num_table_n; + reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0] wb_thread_mask_table, wb_thread_mask_table_n; + reg [`ISSUEQ_SIZE-1:0][31:0] wb_curr_PC_table, wb_curr_PC_table_n; + reg [`ISSUEQ_SIZE-1:0][`NR_BITS-1:0] wb_rd_table, wb_rd_table_n; - reg [`NUM_THREADS-1:0][31:0] wb_data_table [`ISSUEQ_SIZE-1:0]; - reg [`NW_BITS-1:0] wb_warp_num_table [`ISSUEQ_SIZE-1:0]; - reg [`NUM_THREADS-1:0] wb_thread_mask_table [`ISSUEQ_SIZE-1:0]; - reg [31:0] wb_curr_PC_table [`ISSUEQ_SIZE-1:0]; - reg [`NR_BITS-1:0] wb_rd_table [`ISSUEQ_SIZE-1:0]; + reg [`NUM_THREADS-1:0][31:0] wb_data, wb_data_n; + reg [`NW_BITS-1:0] wb_warp_num, wb_warp_num_n; + reg [`NUM_THREADS-1:0] wb_thread_mask, wb_thread_mask_n; + reg [31:0] wb_curr_PC, wb_curr_PC_n; + reg [`NR_BITS-1:0] wb_rd, wb_rd_n; - reg [`ISSUEQ_SIZE-1:0] wb_valid_table; - reg [`ISSUEQ_SIZE-1:0] wb_valid_table_n; - - reg [`ISTAG_BITS-1:0] wb_index; - wire [`ISTAG_BITS-1:0] wb_index_n; + reg [`ISTAG_BITS-1:0] wb_index; + reg [`ISTAG_BITS-1:0] wb_index_n; reg wb_valid; - wire wb_valid_n; + reg wb_valid_n; always @(*) begin - wb_valid_table_n = wb_valid_table; + wb_valid_table_n = wb_valid_table; + wb_warp_num_table_n = wb_warp_num_table; + wb_thread_mask_table_n = wb_thread_mask_table; + wb_curr_PC_table_n = wb_curr_PC_table; + wb_rd_table_n = wb_rd_table; + wb_data_table_n = wb_data_table; if (wb_valid) begin wb_valid_table_n[wb_index] = 0; end if (alu_commit_if.valid) begin - wb_valid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wb; + wb_valid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wb; + wb_thread_mask_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.thread_mask; + wb_data_table_n [alu_commit_if.issue_tag] = alu_commit_if.data; + wb_warp_num_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.warp_num; + wb_curr_PC_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.curr_PC; + wb_rd_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.rd; end + if (lsu_commit_if.valid) begin - wb_valid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wb; + wb_valid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wb; + wb_thread_mask_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.thread_mask; + wb_data_table_n [lsu_commit_if.issue_tag] = lsu_commit_if.data; + wb_warp_num_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.warp_num; + wb_curr_PC_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.curr_PC; + wb_rd_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.rd; end + if (csr_commit_if.valid) begin - wb_valid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wb; + wb_valid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wb; + wb_thread_mask_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.thread_mask; + wb_data_table_n [csr_commit_if.issue_tag] = csr_commit_if.data; + wb_warp_num_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.warp_num; + wb_curr_PC_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.curr_PC; + wb_rd_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.rd; end + if (mul_commit_if.valid) begin - wb_valid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wb; + wb_valid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wb; + wb_thread_mask_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.thread_mask; + wb_data_table_n [mul_commit_if.issue_tag] = mul_commit_if.data; + wb_warp_num_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.warp_num; + wb_curr_PC_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.curr_PC; + wb_rd_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.rd; end + if (fpu_commit_if.valid) begin - wb_valid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wb; + wb_valid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wb; + wb_thread_mask_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.thread_mask; + wb_data_table_n [fpu_commit_if.issue_tag] = fpu_commit_if.data; + wb_warp_num_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.warp_num; + wb_curr_PC_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.curr_PC; + wb_rd_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.rd; end end - VX_priority_encoder #( - .N(`ISSUEQ_SIZE) - ) wb_select ( - .data_in (wb_valid_table_n), - .data_out (wb_index_n), - .valid_out (wb_valid_n) - ); + integer i; + + always @(*) begin + wb_index_n = 0; + wb_valid_n = 0; + for (i = `ISSUEQ_SIZE-1; i >= 0; i--) begin + if (wb_valid_table_n[i]) begin + wb_index_n = `ISTAG_BITS'(i); + wb_valid_n = 1; + wb_thread_mask_n= wb_thread_mask_table_n[i]; + wb_warp_num_n = wb_warp_num_table_n[i]; + wb_curr_PC_n = wb_curr_PC_table_n[i]; + wb_rd_n = wb_rd_table_n[i]; + wb_data_n = wb_data_table_n[i]; + end + end + end always @(posedge clk) begin if (reset) begin wb_valid_table <= 0; - wb_index <= 0; - wb_valid <= 0; + wb_index <= 0; + wb_valid <= 0; end else begin - if (alu_commit_if.valid) begin - wb_data_table [alu_commit_if.issue_tag] <= alu_commit_if.data; - wb_warp_num_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.warp_num; - wb_thread_mask_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.thread_mask; - wb_curr_PC_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.curr_PC; - wb_rd_table [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.rd; - end - - if (lsu_commit_if.valid) begin - wb_data_table [lsu_commit_if.issue_tag] <= lsu_commit_if.data; - wb_warp_num_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.warp_num; - wb_thread_mask_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.thread_mask; - wb_curr_PC_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.curr_PC; - wb_rd_table [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.rd; - end - - if (csr_commit_if.valid) begin - wb_data_table [csr_commit_if.issue_tag] <= csr_commit_if.data; - wb_warp_num_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.warp_num; - wb_thread_mask_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.thread_mask; - wb_curr_PC_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.curr_PC; - wb_rd_table [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.rd; - end - - if (mul_commit_if.valid) begin - wb_data_table [mul_commit_if.issue_tag] <= mul_commit_if.data; - wb_warp_num_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.warp_num; - wb_thread_mask_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.thread_mask; - wb_curr_PC_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.curr_PC; - wb_rd_table [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.rd; - end + wb_valid_table <= wb_valid_table_n; + wb_thread_mask_table <= wb_thread_mask_table_n; + wb_warp_num_table <= wb_warp_num_table_n; + wb_curr_PC_table <= wb_curr_PC_table_n; + wb_rd_table <= wb_rd_table_n; + wb_data_table <= wb_data_table_n; - if (fpu_commit_if.valid) begin - wb_data_table [fpu_commit_if.issue_tag] <= fpu_commit_if.data; - wb_warp_num_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.warp_num; - wb_thread_mask_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.thread_mask; - wb_curr_PC_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.curr_PC; - wb_rd_table [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.rd; - end - - wb_valid_table <= wb_valid_table_n; - wb_index <= wb_index_n; - wb_valid <= wb_valid_n && writeback_if.ready; + wb_index <= wb_index_n; + wb_valid <= wb_valid_n && writeback_if.ready; + wb_thread_mask <= wb_thread_mask_n; + wb_warp_num <= wb_warp_num_n; + wb_curr_PC <= wb_curr_PC_n; + wb_rd <= wb_rd_n; + wb_data <= wb_data_n; end end // writeback request assign writeback_if.valid = wb_valid; - assign writeback_if.warp_num = wb_warp_num_table [wb_index]; - assign writeback_if.thread_mask = wb_thread_mask_table [wb_index]; - assign writeback_if.curr_PC = wb_curr_PC_table [wb_index]; - assign writeback_if.rd = wb_rd_table [wb_index]; - assign writeback_if.data = wb_data_table [wb_index]; + assign writeback_if.thread_mask = wb_thread_mask; + assign writeback_if.warp_num = wb_warp_num; + assign writeback_if.curr_PC = wb_curr_PC; + assign writeback_if.rd = wb_rd; + assign writeback_if.data = wb_data; // commit back-pressure assign alu_commit_if.ready = 1'b1; diff --git a/hw/rtl/cache/VX_tag_data_access.v b/hw/rtl/cache/VX_tag_data_access.v index 13e52cb7..0b1748de 100644 --- a/hw/rtl/cache/VX_tag_data_access.v +++ b/hw/rtl/cache/VX_tag_data_access.v @@ -26,11 +26,13 @@ module VX_tag_data_access #( input wire reset, `ifdef DBG_CORE_REQ_INFO +`IGNORE_WARNINGS_BEGIN input wire[31:0] debug_pc_st1e, input wire debug_wb_st1e, input wire[`NR_BITS-1:0] debug_rd_st1e, input wire[`NW_BITS-1:0] debug_warp_num_st1e, input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e, +`IGNORE_WARNINGS_END `endif input wire stall, diff --git a/hw/rtl/fp_cores/VX_fp_fpga.v b/hw/rtl/fp_cores/VX_fp_fpga.v index fc35d9db..e2142417 100644 --- a/hw/rtl/fp_cores/VX_fp_fpga.v +++ b/hw/rtl/fp_cores/VX_fp_fpga.v @@ -5,8 +5,8 @@ module VX_fp_fpga ( input wire clk, input wire reset, - output wire in_ready, input wire in_valid, + output wire in_ready, input wire [`ISTAG_BITS-1:0] in_tag, @@ -19,7 +19,7 @@ module VX_fp_fpga ( output wire [`NUM_THREADS-1:0][31:0] result, output wire has_fflags, - output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags, + output fflags_t [`NUM_THREADS-1:0] fflags, output wire [`ISTAG_BITS-1:0] out_tag, @@ -29,31 +29,30 @@ module VX_fp_fpga ( localparam NUM_FPC = 12; localparam FPC_BITS = `LOG2UP(NUM_FPC); - reg [FPC_BITS-1:0] core_select; - wire [NUM_FPC-1:0] core_in_ready; wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] core_result; wire fpnew_has_fflags; - wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fpnew_fflags; + fflags_t fpnew_fflags; wire [NUM_FPC-1:0][`ISTAG_BITS-1:0] core_out_tag; wire [NUM_FPC-1:0] core_out_ready; wire [NUM_FPC-1:0] core_out_valid; - reg negate_output; + reg [FPC_BITS-1:0] core_select; + reg fmadd_negate; genvar i; always @(*) begin - core_select = 0; - negate_output = 0; + core_select = 0; + fmadd_negate = 0; case (op) `FPU_ADD: core_select = 1; `FPU_SUB: core_select = 2; `FPU_MUL: core_select = 3; `FPU_MADD: core_select = 4; `FPU_MSUB: core_select = 5; - `FPU_NMSUB: begin core_select = 4; negate_output = 1; end - `FPU_NMADD: begin core_select = 5; negate_output = 1; end + `FPU_NMSUB: begin core_select = 4; fmadd_negate = 1; end + `FPU_NMADD: begin core_select = 5; fmadd_negate = 1; end `FPU_DIV: core_select = 6; `FPU_SQRT: core_select = 7; `FPU_CVTWS: core_select = 8; @@ -130,7 +129,7 @@ module VX_fp_fpga ( .in_valid (in_valid && (core_select == 4)), .in_ready (core_in_ready[4]), .in_tag (in_tag), - .negate (negate_output), + .negate (fmadd_negate), .dataa (dataa), .datab (datab), .datac (datac), @@ -146,7 +145,7 @@ module VX_fp_fpga ( .in_valid (in_valid && (core_select == 5)), .in_ready (core_in_ready[5]), .in_tag (in_tag), - .negate (negate_output), + .negate (fmadd_negate), .dataa (dataa), .datab (datab), .datac (datac), @@ -250,10 +249,21 @@ module VX_fp_fpga ( assign core_out_ready[i] = out_ready && (i == fp_index); end - assign has_fflags = fpnew_has_fflags && (fp_index == 0); - assign fflags = fpnew_fflags; - assign out_tag = core_out_tag[fp_index]; - assign result = core_result[fp_index]; - assign out_valid = fp_valid; + wire tmp_valid = fp_valid; + wire [`ISTAG_BITS-1:0] tmp_tag = core_out_tag[fp_index]; + wire [`NUM_THREADS-1:0][31:0] tmp_result = core_result[fp_index]; + wire tmp_has_fflags = fpnew_has_fflags && (fp_index == 0); + fflags_t [`NUM_THREADS-1:0] tmp_flags = fpnew_fflags; + + VX_generic_register #( + .N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + `FFG_BITS) + ) nc_reg ( + .clk (clk), + .reset (reset), + .stall (stall), + .flush (1'b0), + .in ({tmp_valid, tmp_tag, tmp_result, tmp_has_fflags, tmp_fflags}), + .out ({out_valid, out_tag, result, has_fflags, fflags}) + ); endmodule \ No newline at end of file diff --git a/hw/rtl/fp_cores/VX_fp_noncomp.v b/hw/rtl/fp_cores/VX_fp_noncomp.v index b081e571..4bcde8c4 100644 --- a/hw/rtl/fp_cores/VX_fp_noncomp.v +++ b/hw/rtl/fp_cores/VX_fp_noncomp.v @@ -17,7 +17,7 @@ module VX_fp_noncomp ( output wire [`NUM_THREADS-1:0][31:0] result, output wire has_fflags, - output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags, + output fflags_t [`NUM_THREADS-1:0] fflags, output wire [`ISTAG_BITS-1:0] out_tag, @@ -178,7 +178,7 @@ module VX_fp_noncomp ( reg tmp_valid; reg tmp_has_fflags; - reg [`NUM_THREADS-1:0][`FFG_BITS-1:0] tmp_fflags; + fflags_t [`NUM_THREADS-1:0] tmp_fflags; reg [`NUM_THREADS-1:0][31:0] tmp_result; always @(*) begin @@ -199,27 +199,27 @@ module VX_fp_noncomp ( case (op) `FPU_CLASS: begin tmp_result[i] = fclass_mask[i]; - {tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0; + {tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0; end `FPU_MVXW,`FPU_MVWX: begin tmp_result[i] = dataa[i]; - {tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0; + {tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0; end `FPU_MIN,`FPU_MAX: begin tmp_result[i] = fminmax_res[i]; - {tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = {a_type[i][0] | b_type[i][0], 4'h0}; + {tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = {a_type[i][0] | b_type[i][0], 4'h0}; end `FPU_SGNJ,`FPU_SGNJN,`FPU_SGNJX: begin tmp_result[i] = fsgnj_res[i]; - {tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0; + {tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0; end `FPU_CMP: begin tmp_result[i] = fcmp_res[i]; - {tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = fcmp_excp[i]; + {tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = fcmp_excp[i]; end default: begin tmp_result[i] = 32'hdeadbeaf; - {tmp_fflags[i][`FFG_NV], tmp_fflags[i][`FFG_DZ], tmp_fflags[i][`FFG_OF], tmp_fflags[i][`FFG_UF], tmp_fflags[i][`FFG_NX]} = 5'h0; + {tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0; tmp_valid = 1'b0; end endcase @@ -230,7 +230,7 @@ module VX_fp_noncomp ( assign in_ready = ~stall; VX_generic_register #( - .N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + `FFG_BITS) + .N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS)) ) nc_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fp_cores/VX_fpnew.v b/hw/rtl/fp_cores/VX_fpnew.v index d56f8695..71668082 100644 --- a/hw/rtl/fp_cores/VX_fpnew.v +++ b/hw/rtl/fp_cores/VX_fpnew.v @@ -11,8 +11,8 @@ module VX_fpnew #( input wire clk, input wire reset, - output wire in_ready, input wire in_valid, + output wire in_ready, input wire [`ISTAG_BITS-1:0] in_tag, @@ -25,7 +25,7 @@ module VX_fpnew #( output wire [`NUM_THREADS-1:0][31:0] result, output wire has_fflags, - output wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags, + output fflags_t [`NUM_THREADS-1:0] fflags, output wire [`ISTAG_BITS-1:0] out_tag, @@ -75,7 +75,7 @@ module VX_fpnew #( wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32; wire [`NUM_THREADS-1:0][31:0] fpu_result; - fpnew_pkg::status_t fpu_status [0:`NUM_THREADS-1]; + fpnew_pkg::status_t [0:`NUM_THREADS-1] fpu_status; wire is_class_op_i, is_class_op_o; assign is_class_op_i = (op == `FPU_CLASS); @@ -194,7 +194,8 @@ module VX_fpnew #( `ENABLE_TRACING assign fpu_in_valid = in_valid; - assign in_ready = fpu_in_ready; + assign in_ready = fpu_in_ready + || ~in_valid; // fix fpnews's in_ready containing in_valid; assign fpu_in_tag = in_tag; assign out_tag = fpu_out_tag; @@ -202,14 +203,7 @@ module VX_fpnew #( assign result = fpu_result; assign has_fflags = fpu_has_fflags_o; - - for (i = 0; i < `NUM_THREADS; i++) begin - assign fflags[i][`FFG_NX] = fpu_status[i].NX; - assign fflags[i][`FFG_UF] = fpu_status[i].UF; - assign fflags[i][`FFG_OF] = fpu_status[i].OF; - assign fflags[i][`FFG_DZ] = fpu_status[i].DZ; - assign fflags[i][`FFG_NV] = fpu_status[i].NV; - end + assign fflags = fpu_status; assign out_valid = fpu_out_valid; assign fpu_out_ready = out_ready; diff --git a/hw/rtl/interfaces/VX_cmt_to_csr_if.v b/hw/rtl/interfaces/VX_cmt_to_csr_if.v index ee6165bf..c35e3ecd 100644 --- a/hw/rtl/interfaces/VX_cmt_to_csr_if.v +++ b/hw/rtl/interfaces/VX_cmt_to_csr_if.v @@ -12,7 +12,7 @@ interface VX_cmt_to_csr_if (); wire [`NE_BITS:0] num_commits; wire has_fflags; - wire [`FFG_BITS-1:0] fflags; + fflags_t fflags; endinterface diff --git a/hw/rtl/interfaces/VX_fpu_to_cmt_if.v b/hw/rtl/interfaces/VX_fpu_to_cmt_if.v index ee14584c..beb44a5a 100644 --- a/hw/rtl/interfaces/VX_fpu_to_cmt_if.v +++ b/hw/rtl/interfaces/VX_fpu_to_cmt_if.v @@ -7,9 +7,9 @@ interface VX_fpu_to_cmt_if (); wire valid; wire [`ISTAG_BITS-1:0] issue_tag; - wire [`NUM_THREADS-1:0][31:0] data; + wire [`NUM_THREADS-1:0][31:0] data; wire has_fflags; - wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags; + fflags_t [`NUM_THREADS-1:0] fflags; wire ready; endinterface diff --git a/hw/rtl/libs/VX_index_queue.v b/hw/rtl/libs/VX_index_queue.v index 75252e56..bee8ccb9 100644 --- a/hw/rtl/libs/VX_index_queue.v +++ b/hw/rtl/libs/VX_index_queue.v @@ -15,7 +15,7 @@ module VX_index_queue #( input wire [`LOG2UP(SIZE)-1:0] read_addr, output wire [DATAW-1:0] read_data ); - `USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0]; + reg [DATAW-1:0] entries [SIZE-1:0]; reg [SIZE-1:0] valid; reg [`LOG2UP(SIZE):0] rd_ptr, wr_ptr; @@ -38,7 +38,7 @@ module VX_index_queue #( valid <= 0; end else begin if (enqueue) begin - data[wr_a] <= write_data; + entries[wr_a] <= write_data; valid[wr_a] <= 1; wr_ptr <= wr_ptr + 1; end @@ -52,6 +52,6 @@ module VX_index_queue #( end assign write_addr = wr_a; - assign read_data = data[read_addr]; + assign read_data = entries[read_addr]; endmodule \ No newline at end of file diff --git a/hw/simulate/Makefile b/hw/simulate/Makefile index 7788d0b6..ea6d87a1 100644 --- a/hw/simulate/Makefile +++ b/hw/simulate/Makefile @@ -14,7 +14,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE -#DBG_FLAGS += $(DBG_PRINT_FLAGS) +DBG_FLAGS += $(DBG_PRINT_FLAGS) DBG_FLAGS += -DDBG_CORE_REQ_INFO FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src diff --git a/hw/syn/quartus/project.sdc b/hw/syn/quartus/project.sdc index 61b8cba9..59686a41 100644 --- a/hw/syn/quartus/project.sdc +++ b/hw/syn/quartus/project.sdc @@ -1,6 +1,6 @@ set_time_format -unit ns -decimal_places 3 -create_clock -name {clk} -period "300 MHz" -waveform { 0.0 1.0 } [get_ports {clk}] +create_clock -name {clk} -period "200 MHz" -waveform { 0.0 1.0 } [get_ports {clk}] derive_pll_clocks -create_base_clocks derive_clock_uncertainty diff --git a/hw/syn/quartus/timing.tcl b/hw/syn/quartus/timing.tcl index 411379dc..5e9def4d 100644 --- a/hw/syn/quartus/timing.tcl +++ b/hw/syn/quartus/timing.tcl @@ -1,4 +1,4 @@ -project_open Vortex_Socket +project_open VX_pipeline set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL