diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index b0457aa6..afb8b402 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -133,9 +133,9 @@ module VX_ibuffer #( deq_wid_n = (!deq_fire || q_sizeMany[deq_wid]) ? deq_wid : ibuf_enq_if.wid; deq_instr_n = deq_fire ? (q_sizeMany[deq_wid] ? q_data_prev[deq_wid] : q_data_in) : q_data_out[deq_wid]; end else begin - deq_valid_n = (| schedule_table_n); + deq_valid_n = (| schedule_table); for (integer i = 0; i < `NUM_WARPS; i++) begin - if (schedule_table_n[i]) begin + if (schedule_table[i]) begin deq_wid_n = `NW_BITS'(i); deq_instr_n = q_data_out[i]; schedule_table_n[i] = 0; diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 5e711599..8b874fec 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -2,18 +2,19 @@ module VX_bank #( parameter CACHE_ID = 0, - parameter BANK_ID = 0, + parameter BANK_ID = 0, + + // Number of Word requests per cycle + parameter NUM_REQS = 1, // Size of cache in bytes parameter CACHE_SIZE = 1, // Size of line inside a bank in bytes parameter CACHE_LINE_SIZE = 1, // Number of bankS - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 1, // Size of a word in bytes parameter WORD_SIZE = 1, - // Number of Word requests per cycle - parameter NUM_REQS = 1, // Core Request Queue Size parameter CREQ_SIZE = 1, @@ -55,8 +56,9 @@ module VX_bank #( input wire core_req_valid, input wire [`REQS_BITS-1:0] core_req_tid, input wire core_req_rw, + input wire [`LINE_ADDR_WIDTH-1:0] core_req_addr, + input wire [`WORD_SELECT_BITS-1:0] core_req_wsel, input wire [WORD_SIZE-1:0] core_req_byteen, - input wire [`WORD_ADDR_WIDTH-1:0] core_req_addr, input wire [`WORD_WIDTH-1:0] core_req_data, input wire [CORE_TAG_WIDTH-1:0] core_req_tag, output wire core_req_ready, @@ -94,41 +96,27 @@ module VX_bank #( wire creq_pop; wire creq_full, creq_empty; wire creq_rw; + wire [`LINE_ADDR_WIDTH-1:0] creq_addr; + wire [`WORD_SELECT_BITS-1:0] creq_wsel; wire [WORD_SIZE-1:0] creq_byteen; - wire [`REQS_BITS-1:0] creq_tid; -`IGNORE_WARNINGS_BEGIN - wire [`WORD_ADDR_WIDTH-1:0] creq_addr; -`IGNORE_WARNINGS_END - wire [`LINE_ADDR_WIDTH-1:0] creq_line_addr; - wire [`UP(`WORD_SELECT_BITS)-1:0] creq_wsel; wire [`WORD_WIDTH-1:0] creq_data; wire [CORE_TAG_WIDTH-1:0] creq_tag; + wire [`REQS_BITS-1:0] creq_tid; wire creq_push = core_req_valid && core_req_ready; assign core_req_ready = !creq_full; - if (BANK_ADDR_OFFSET == 0) begin - assign creq_line_addr = `LINE_SELECT_ADDR0(creq_addr); - end else begin - assign creq_line_addr = `LINE_SELECT_ADDRX(creq_addr); - end - - if (`WORD_SELECT_BITS != 0) begin - assign creq_wsel = creq_addr[`WORD_SELECT_BITS-1:0]; - end else begin - assign creq_wsel = 0; - end - VX_fifo_queue #( - .DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + `WORD_WIDTH), - .SIZE (CREQ_SIZE) + .DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + `LINE_ADDR_WIDTH + `WORD_SELECT_BITS + WORD_SIZE + `WORD_WIDTH), + .SIZE (CREQ_SIZE), + .BUFFERED (1) ) core_req_queue ( .clk (clk), .reset (reset), .push (creq_push), .pop (creq_pop), - .data_in ({core_req_tag, core_req_tid, core_req_rw, core_req_byteen, core_req_addr, core_req_data}), - .data_out ({creq_tag, creq_tid, creq_rw, creq_byteen, creq_addr, creq_data}), + .data_in ({core_req_tag, core_req_tid, core_req_rw, core_req_addr, core_req_wsel, core_req_byteen, core_req_data}), + .data_out ({creq_tag, creq_tid, creq_rw, creq_addr, creq_wsel, creq_byteen, creq_data}), .empty (creq_empty), .full (creq_full), `UNUSED_PIN (alm_empty), @@ -141,15 +129,14 @@ module VX_bank #( wire mshr_push; wire mshr_pending; wire mshr_valid; - wire [`REQS_BITS-1:0] mshr_tid; wire [`LINE_ADDR_WIDTH-1:0] mshr_addr; - wire [`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel; - wire [CORE_TAG_WIDTH-1:0] mshr_tag; - wire mshr_rw; + wire [`WORD_SELECT_BITS-1:0] mshr_wsel; wire [WORD_SIZE-1:0] mshr_byteen; + wire [CORE_TAG_WIDTH-1:0] mshr_tag; + wire [`REQS_BITS-1:0] mshr_tid; wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1; - wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1; + wire [`WORD_SELECT_BITS-1:0] wsel_st0, wsel_st1; wire mem_rw_st0, mem_rw_st1; wire [WORD_SIZE-1:0] byteen_st0, byteen_st1; wire [`CACHE_LINE_WIDTH-1:0] data_st0, data_st1; @@ -206,7 +193,7 @@ module VX_bank #( // we have a miss in mshr or entering it for the current address wire mshr_pending_sel = mshr_pending - || (is_miss_st1 && (creq_line_addr == addr_st1)); + || (is_miss_st1 && (creq_addr == addr_st1)); `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin @@ -217,7 +204,7 @@ module VX_bank #( `endif VX_pipe_register #( - .DATAW (1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + 1 + WORD_SIZE + `CACHE_LINE_WIDTH + `REQS_BITS + CORE_TAG_WIDTH + 1 + 1), + .DATAW (1 + 1 + 1 + `LINE_ADDR_WIDTH + `WORD_SELECT_BITS + 1 + WORD_SIZE + `CACHE_LINE_WIDTH + `REQS_BITS + CORE_TAG_WIDTH + 1 + 1), .RESETW (1) ) pipe_reg0 ( .clk (clk), @@ -227,9 +214,9 @@ module VX_bank #( mshr_pop || drsq_pop || creq_pop, mshr_pop_unqual, drsq_pop_unqual, - mshr_pop_unqual ? mshr_addr : (dram_rsp_valid ? dram_rsp_addr : creq_line_addr), + mshr_pop_unqual ? mshr_addr : (dram_rsp_valid ? dram_rsp_addr : creq_addr), mshr_pop_unqual ? mshr_wsel : creq_wsel, - mshr_pop_unqual ? mshr_rw : creq_rw, + mshr_pop_unqual ? 1'b0 : creq_rw, mshr_pop_unqual ? mshr_byteen : creq_byteen, dram_rsp_valid ? dram_rsp_data : {`WORDS_PER_LINE{creq_data}}, mshr_pop_unqual ? mshr_tid : creq_tid, @@ -291,7 +278,7 @@ module VX_bank #( assign incoming_fill_st0 = dram_rsp_valid && (addr_st0 == dram_rsp_addr); VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + CORE_TAG_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `WORD_SELECT_BITS + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + CORE_TAG_WIDTH), .RESETW (1) ) pipe_reg1 ( .clk (clk), @@ -366,7 +353,7 @@ module VX_bank #( wire mshr_init_ready_state = !miss_st1 || incoming_fill_qual_st1; // use dram rsp or core req address to lookup the mshr - wire [`LINE_ADDR_WIDTH-1:0] lookup_addr = dram_rsp_valid ? dram_rsp_addr : creq_line_addr; + wire [`LINE_ADDR_WIDTH-1:0] lookup_addr = dram_rsp_valid ? dram_rsp_addr : creq_addr; VX_miss_resrv #( .BANK_ID (BANK_ID), @@ -393,7 +380,7 @@ module VX_bank #( // enqueue .enqueue (mshr_push), .enqueue_addr (addr_st1), - .enqueue_data ({req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}), + .enqueue_data ({wsel_st1, byteen_st1, tag_st1, req_tid_st1}), .enqueue_is_mshr (is_mshr_st1), .enqueue_as_ready (mshr_init_ready_state), `UNUSED_PIN (enqueue_almfull), @@ -408,7 +395,7 @@ module VX_bank #( .schedule (mshr_pop), .schedule_valid (mshr_valid), .schedule_addr (mshr_addr), - .schedule_data ({mshr_tid, mshr_tag, mshr_rw, mshr_byteen, mshr_wsel}), + .schedule_data ({mshr_wsel, mshr_byteen, mshr_tag, mshr_tid}), // dequeue .dequeue (mshr_dequeue) @@ -421,14 +408,12 @@ module VX_bank #( assign crsq_push = valid_st1 && crsq_push_st1; assign crsq_pop = core_rsp_valid && core_rsp_ready; - wire [`REQS_BITS-1:0] crsq_tid_st1 = req_tid_st1; - wire [CORE_TAG_WIDTH-1:0] crsq_tag_st1 = CORE_TAG_WIDTH'(tag_st1); - wire [`WORD_WIDTH-1:0] crsq_data_st1; - - if (`WORD_SELECT_BITS != 0) begin - assign crsq_data_st1 = readdata_st1[wsel_st1 * `WORD_WIDTH +: `WORD_WIDTH]; + wire [`WORD_WIDTH-1:0] crsq_data; + + if (`WORD_SELECT_BITS != 0) begin + assign crsq_data = readdata_st1[wsel_st1 * `WORD_WIDTH +: `WORD_WIDTH]; end else begin - assign crsq_data_st1 = readdata_st1; + assign crsq_data = readdata_st1; end VX_fifo_queue #( @@ -441,7 +426,7 @@ module VX_bank #( .reset (reset), .push (crsq_push), .pop (crsq_pop), - .data_in ({crsq_tid_st1, crsq_tag_st1, crsq_data_st1}), + .data_in ({req_tid_st1, tag_st1, crsq_data}), .data_out ({core_rsp_tid, core_rsp_tag, core_rsp_data}), .empty (crsq_empty), .alm_full (crsq_alm_full), @@ -462,10 +447,7 @@ module VX_bank #( wire writeback = WRITE_ENABLE && do_writeback_st1; - wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = addr_st1; - - wire [`CACHE_LINE_WIDTH-1:0] dreq_data; - wire [CACHE_LINE_SIZE-1:0] dreq_byteen, dreq_byteen_unqual; + wire [CACHE_LINE_SIZE-1:0] dreq_byteen, dreq_byteen_unqual; if (`WORD_SELECT_BITS != 0) begin for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin @@ -474,7 +456,6 @@ module VX_bank #( end else begin assign dreq_byteen_unqual = byteen_st1; end - assign dreq_data = data_st1; assign dreq_byteen = writeback ? dreq_byteen_unqual : {CACHE_LINE_SIZE{1'b1}}; @@ -487,7 +468,7 @@ module VX_bank #( .reset (reset), .push (dreq_push), .pop (dreq_pop), - .data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}), + .data_in ({writeback, dreq_byteen, addr_st1, data_st1}), .data_out ({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), .empty (dreq_empty), .alm_full (dreq_alm_full), @@ -534,10 +515,7 @@ module VX_bank #( $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_rsp_addr, BANK_ID), dram_rsp_data); end if (mshr_pop) begin - if (mshr_rw) - $display("%t: cache%0d:%0d mshr-wr-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), mshr_tag, mshr_tid, mshr_byteen, mshr_data, debug_wid_sel, debug_pc_sel); - else - $display("%t: cache%0d:%0d mshr-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), mshr_tag, mshr_tid, mshr_byteen, debug_wid_sel, debug_pc_sel); + $display("%t: cache%0d:%0d mshr-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), mshr_tag, mshr_tid, mshr_byteen, debug_wid_sel, debug_pc_sel); end if (creq_pop) begin if (creq_rw) diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index bab3c83d..adbb6568 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -3,16 +3,17 @@ module VX_cache #( parameter CACHE_ID = 0, + // Number of Word requests per cycle + parameter NUM_REQS = 4, + // Size of cache in bytes parameter CACHE_SIZE = 16384, // Size of line inside a bank in bytes parameter CACHE_LINE_SIZE = 64, // Number of banks - parameter NUM_BANKS = 4, + parameter NUM_BANKS = NUM_REQS, // Size of a word in bytes parameter WORD_SIZE = 4, - // Number of Word requests per cycle - parameter NUM_REQS = NUM_BANKS, // Core Request Queue Size parameter CREQ_SIZE = 4, @@ -51,8 +52,8 @@ module VX_cache #( // Core request input wire [NUM_REQS-1:0] core_req_valid, input wire [NUM_REQS-1:0] core_req_rw, - input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, + input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data, input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, output wire [NUM_REQS-1:0] core_req_ready, @@ -87,18 +88,19 @@ module VX_cache #( `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) wire [NUM_BANKS-1:0] per_bank_core_req_valid; - wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; wire [NUM_BANKS-1:0] per_bank_core_req_rw; + wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr; + wire [NUM_BANKS-1:0][`WORD_SELECT_BITS-1:0] per_bank_core_req_wsel; wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen; - wire [NUM_BANKS-1:0][`WORD_ADDR_WIDTH-1:0] per_bank_core_req_addr; - wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag; wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data; + wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag; + wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; wire [NUM_BANKS-1:0] per_bank_core_req_ready; wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; - wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid; wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data; wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag; + wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid; wire [NUM_BANKS-1:0] per_bank_core_rsp_ready; wire [NUM_BANKS-1:0] per_bank_dram_req_valid; @@ -131,8 +133,9 @@ module VX_cache #( assign dram_rsp_ready = !drsq_full; VX_fifo_queue #( - .DATAW (DRAM_TAG_WIDTH + `CACHE_LINE_WIDTH), - .SIZE (DRSQ_SIZE) + .DATAW (DRAM_TAG_WIDTH + `CACHE_LINE_WIDTH), + .SIZE (DRSQ_SIZE), + .BUFFERED (1) ) dram_rsp_queue ( .clk (clk), .reset (reset), @@ -184,23 +187,22 @@ module VX_cache #( .reset (reset), `ifdef PERF_ENABLE .bank_stalls(perf_cache_if.bank_stalls), - `else - `UNUSED_PIN (bank_stalls), `endif .core_req_valid (core_req_valid), .core_req_rw (core_req_rw), - .core_req_byteen(core_req_byteen), .core_req_addr (core_req_addr), + .core_req_byteen(core_req_byteen), .core_req_data (core_req_data), .core_req_tag (core_req_tag), .core_req_ready (core_req_ready), .per_bank_core_req_valid (per_bank_core_req_valid), - .per_bank_core_req_tid (per_bank_core_req_tid), .per_bank_core_req_rw (per_bank_core_req_rw), - .per_bank_core_req_byteen(per_bank_core_req_byteen), .per_bank_core_req_addr (per_bank_core_req_addr), - .per_bank_core_req_tag (per_bank_core_req_tag), + .per_bank_core_req_wsel (per_bank_core_req_wsel), + .per_bank_core_req_byteen(per_bank_core_req_byteen), .per_bank_core_req_data (per_bank_core_req_data), + .per_bank_core_req_tag (per_bank_core_req_tag), + .per_bank_core_req_tid (per_bank_core_req_tid), .per_bank_core_req_ready (per_bank_core_req_ready) ); @@ -208,12 +210,13 @@ module VX_cache #( for (genvar i = 0; i < NUM_BANKS; i++) begin wire curr_bank_core_req_valid; - wire [`REQS_BITS-1:0] curr_bank_core_req_tid; wire curr_bank_core_req_rw; + wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr; + wire [`WORD_SELECT_BITS-1:0] curr_bank_core_req_wsel; wire [WORD_SIZE-1:0] curr_bank_core_req_byteen; - wire [`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr; - wire [CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; wire [`WORD_WIDTH-1:0] curr_bank_core_req_data; + wire [CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; + wire [`REQS_BITS-1:0] curr_bank_core_req_tid; wire curr_bank_core_req_ready; wire curr_bank_core_rsp_valid; @@ -237,12 +240,13 @@ module VX_cache #( // Core Req assign curr_bank_core_req_valid = per_bank_core_req_valid[i]; - assign curr_bank_core_req_tid = per_bank_core_req_tid[i]; assign curr_bank_core_req_addr = per_bank_core_req_addr[i]; assign curr_bank_core_req_rw = per_bank_core_req_rw[i]; + assign curr_bank_core_req_wsel = per_bank_core_req_wsel[i]; assign curr_bank_core_req_byteen = per_bank_core_req_byteen[i]; assign curr_bank_core_req_data = per_bank_core_req_data[i]; assign curr_bank_core_req_tag = per_bank_core_req_tag[i]; + assign curr_bank_core_req_tid = per_bank_core_req_tid[i]; assign per_bank_core_req_ready[i] = curr_bank_core_req_ready; // Core WB @@ -308,12 +312,13 @@ module VX_cache #( // Core request .core_req_valid (curr_bank_core_req_valid), - .core_req_tid (curr_bank_core_req_tid), .core_req_rw (curr_bank_core_req_rw), .core_req_byteen (curr_bank_core_req_byteen), .core_req_addr (curr_bank_core_req_addr), + .core_req_wsel (curr_bank_core_req_wsel), .core_req_data (curr_bank_core_req_data), .core_req_tag (curr_bank_core_req_tag), + .core_req_tid (curr_bank_core_req_tid), .core_req_ready (curr_bank_core_req_ready), // Core response @@ -350,9 +355,9 @@ module VX_cache #( .clk (clk), .reset (reset), .per_bank_core_rsp_valid (per_bank_core_rsp_valid), + .per_bank_core_rsp_data (per_bank_core_rsp_data), .per_bank_core_rsp_tag (per_bank_core_rsp_tag), .per_bank_core_rsp_tid (per_bank_core_rsp_tid), - .per_bank_core_rsp_data (per_bank_core_rsp_data), .per_bank_core_rsp_ready (per_bank_core_rsp_ready), .core_rsp_valid (core_rsp_valid), .core_rsp_tag (core_rsp_tag), diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index 6c0f613f..cd3fdac6 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -9,11 +9,11 @@ `define REQS_BITS `LOG2UP(NUM_REQS) -// tag rw byteen tid -`define REQ_INST_META_WIDTH (CORE_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS) +// tag byteen tid +`define REQ_INST_META_WIDTH (CORE_TAG_WIDTH + WORD_SIZE + `REQS_BITS) // metadata word_sel -`define MSHR_DATA_WIDTH (`REQ_INST_META_WIDTH + `UP(`WORD_SELECT_BITS)) +`define MSHR_DATA_WIDTH (`REQ_INST_META_WIDTH + `WORD_SELECT_BITS) `define WORD_WIDTH (8 * WORD_SIZE) @@ -23,7 +23,6 @@ `define LINES_PER_BANK (`BANK_SIZE / CACHE_LINE_SIZE) `define WORDS_PER_LINE (CACHE_LINE_SIZE / WORD_SIZE) -`define WORD_SELECT_BITS `CLOG2(`WORDS_PER_LINE) `define WORD_ADDR_WIDTH (32-`CLOG2(WORD_SIZE)) `define DRAM_ADDR_WIDTH (32-`CLOG2(CACHE_LINE_SIZE)) `define LINE_ADDR_WIDTH (`DRAM_ADDR_WIDTH-`BANK_SELECT_BITS) diff --git a/hw/rtl/cache/VX_cache_core_req_bank_sel.v b/hw/rtl/cache/VX_cache_core_req_bank_sel.v index d9dc24cd..b0370bf9 100644 --- a/hw/rtl/cache/VX_cache_core_req_bank_sel.v +++ b/hw/rtl/cache/VX_cache_core_req_bank_sel.v @@ -6,75 +6,98 @@ module VX_cache_core_req_bank_sel #( // Size of a word in bytes parameter WORD_SIZE = 4, // Number of banks - parameter NUM_BANKS = 4, + parameter NUM_BANKS = 4, // Number of Word requests per cycle parameter NUM_REQS = 4, // core request tag size parameter CORE_TAG_WIDTH = 3, // bank offset from beginning of index range - parameter BANK_ADDR_OFFSET = 0, - // buffer the output - parameter BUFFERED = 0 + parameter BANK_ADDR_OFFSET = 0 ) ( input wire clk, input wire reset, +`ifdef PERF_ENABLE output wire [63:0] bank_stalls, +`endif input wire [NUM_REQS-1:0] core_req_valid, input wire [NUM_REQS-1:0] core_req_rw, - input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, + input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data, input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, output wire [NUM_REQS-1:0] core_req_ready, output wire [NUM_BANKS-1:0] per_bank_core_req_valid, - output wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid, output wire [NUM_BANKS-1:0] per_bank_core_req_rw, + output wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr, + output wire [NUM_BANKS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel, output wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen, - output wire [NUM_BANKS-1:0][`WORD_ADDR_WIDTH-1:0] per_bank_core_req_addr, - output wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag, output wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data, + output wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag, + output wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid, input wire [NUM_BANKS-1:0] per_bank_core_req_ready ); - if (NUM_BANKS > 1) begin + `STATIC_ASSERT (NUM_REQS >= NUM_BANKS, ("invalid number of banks")); + + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + + wire [NUM_REQS-1:0][`LINE_ADDR_WIDTH-1:0] core_req_line_addr; + wire [NUM_REQS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel; + wire [NUM_REQS-1:0][`UP(`BANK_SELECT_BITS)-1:0] core_req_bid; + + for (genvar i = 0; i < NUM_REQS; i++) begin + if (BANK_ADDR_OFFSET == 0) begin + assign core_req_line_addr[i] = `LINE_SELECT_ADDR0(core_req_addr[i]); + end else begin + assign core_req_line_addr[i] = `LINE_SELECT_ADDRX(core_req_addr[i]); + end + assign core_req_wsel[i] = core_req_addr[i][`UP(`WORD_SELECT_BITS)-1:0]; + end + + for (genvar i = 0; i < NUM_REQS; ++i) begin + if (NUM_BANKS > 1) begin + assign core_req_bid[i] = `BANK_SELECT_ADDR(core_req_addr[i]); + end else begin + assign core_req_bid[i] = 0; + end + end + + if (NUM_REQS > 1) begin reg [NUM_BANKS-1:0] per_bank_core_req_valid_r; - reg [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r; reg [NUM_BANKS-1:0] per_bank_core_req_rw_r; + reg [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_r; + reg [NUM_BANKS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel_r; reg [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_r; - reg [NUM_BANKS-1:0][`WORD_ADDR_WIDTH-1:0] per_bank_core_req_addr_r; - reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r; reg [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_r; - reg [NUM_BANKS-1:0] per_bank_core_req_stall; + reg [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_r; + reg [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_r; - reg [NUM_REQS-1:0] core_req_ready_r; - reg [NUM_REQS-1:0] core_req_sel_r; - wire [NUM_REQS-1:0][`BANK_SELECT_BITS-1:0] core_req_bid; - - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_req_bid[i] = `BANK_SELECT_ADDR(core_req_addr[i]); - end + reg [NUM_REQS-1:0] core_req_ready_r; always @(*) begin per_bank_core_req_valid_r = 0; - per_bank_core_req_tid_r = 'x; per_bank_core_req_rw_r = 'x; - per_bank_core_req_byteen_r= 'x; per_bank_core_req_addr_r = 'x; - per_bank_core_req_tag_r = 'x; + per_bank_core_req_wsel_r = 'x; + per_bank_core_req_byteen_r= 'x; per_bank_core_req_data_r = 'x; + per_bank_core_req_tag_r = 'x; + per_bank_core_req_tid_r = 'x; - for (integer i = NUM_REQS-1; i >= 0; --i) begin + for (integer i = NUM_REQS-1; i >= 0; --i) begin if (core_req_valid[i]) begin per_bank_core_req_valid_r[core_req_bid[i]] = 1; - per_bank_core_req_tid_r[core_req_bid[i]] = `REQS_BITS'(i); per_bank_core_req_rw_r[core_req_bid[i]] = core_req_rw[i]; + per_bank_core_req_addr_r[core_req_bid[i]] = core_req_line_addr[i]; + per_bank_core_req_wsel_r[core_req_bid[i]] = core_req_wsel[i]; per_bank_core_req_byteen_r[core_req_bid[i]]= core_req_byteen[i]; - per_bank_core_req_addr_r[core_req_bid[i]] = core_req_addr[i]; - per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i]; per_bank_core_req_data_r[core_req_bid[i]] = core_req_data[i]; + per_bank_core_req_tag_r[core_req_bid[i]] = core_req_tag[i]; + per_bank_core_req_tid_r[core_req_bid[i]] = `REQS_BITS'(i); end end end @@ -84,65 +107,61 @@ module VX_cache_core_req_bank_sel #( for (integer j = 0; j < NUM_BANKS; ++j) begin for (integer i = 0; i < NUM_REQS; ++i) begin if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin - core_req_ready_r[i] = ~per_bank_core_req_stall[j]; + core_req_ready_r[i] = per_bank_core_req_ready[j]; break; end end end end - always @(*) begin - core_req_sel_r = 0; - for (integer j = 0; j < NUM_BANKS; ++j) begin - for (integer i = 0; i < NUM_REQS; ++i) begin - if (core_req_valid[i] && (core_req_bid[i] == `BANK_SELECT_BITS'(j))) begin - core_req_sel_r[i] = ~per_bank_core_req_stall[j]; - end - end - end - end - - reg [63:0] bank_stalls_r; - always @(posedge clk) begin - if (reset) begin - bank_stalls_r <= 0; - end else begin - bank_stalls_r <= bank_stalls_r + 64'($countones(core_req_sel_r & ~core_req_ready_r)); - end - end - - for (genvar i = 0; i < NUM_BANKS; ++i) begin - assign per_bank_core_req_stall[i] = ~per_bank_core_req_ready[i] && (!BUFFERED || per_bank_core_req_valid[i]); - VX_pipe_register #( - .DATAW (1 + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + CORE_TAG_WIDTH + `WORD_WIDTH), - .RESETW (1), - .DEPTH (BUFFERED) - ) pipe_reg ( - .clk (clk), - .reset (reset), - .enable (~per_bank_core_req_stall[i]), - .data_in ({per_bank_core_req_valid_r[i], per_bank_core_req_tid_r[i], per_bank_core_req_rw_r[i], per_bank_core_req_byteen_r[i], per_bank_core_req_addr_r[i], per_bank_core_req_tag_r[i], per_bank_core_req_data_r[i]}), - .data_out ({per_bank_core_req_valid[i], per_bank_core_req_tid[i], per_bank_core_req_rw[i], per_bank_core_req_byteen[i], per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_data[i]}) - ); - end - + assign per_bank_core_req_valid = per_bank_core_req_valid_r; + assign per_bank_core_req_rw = per_bank_core_req_rw_r; + assign per_bank_core_req_addr = per_bank_core_req_addr_r; + assign per_bank_core_req_wsel = per_bank_core_req_wsel_r; + assign per_bank_core_req_byteen = per_bank_core_req_byteen_r; + assign per_bank_core_req_data = per_bank_core_req_data_r; + assign per_bank_core_req_tag = per_bank_core_req_tag_r; + assign per_bank_core_req_tid = per_bank_core_req_tid_r; assign core_req_ready = core_req_ready_r; - assign bank_stalls = bank_stalls_r; end else begin - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - assign bank_stalls = 0; - assign per_bank_core_req_valid = core_req_valid; + assign per_bank_core_req_valid = core_req_valid[0]; + assign per_bank_core_req_rw[0] = core_req_rw[0]; + assign per_bank_core_req_addr[0] = core_req_line_addr[0]; + assign per_bank_core_req_wsel[0] = core_req_wsel[0]; + assign per_bank_core_req_byteen[0] = core_req_byteen[0]; + assign per_bank_core_req_data[0] = core_req_data[0]; + assign per_bank_core_req_tag[0] = core_req_tag[0]; assign per_bank_core_req_tid[0] = 0; - assign per_bank_core_req_rw[0] = core_req_rw; - assign per_bank_core_req_byteen[0] = core_req_byteen; - assign per_bank_core_req_addr[0] = core_req_addr; - assign per_bank_core_req_tag[0] = core_req_tag; - assign per_bank_core_req_data[0] = core_req_data; - assign core_req_ready[0] = per_bank_core_req_ready; + assign core_req_ready[0] = per_bank_core_req_ready; + end - end +`ifdef PERF_ENABLE + reg [NUM_REQS-1:0] core_req_sel_r; + + always @(*) begin + core_req_sel_r = 0; + for (integer j = 0; j < NUM_BANKS; ++j) begin + for (integer i = 0; i < NUM_REQS; ++i) begin + if (core_req_valid[i] && (core_req_bid[i] == `UP(`BANK_SELECT_BITS)'(j))) begin + core_req_sel_r[i] = per_bank_core_req_ready[j]; + end + end + end + end + + reg [63:0] bank_stalls_r; + + always @(posedge clk) begin + if (reset) begin + bank_stalls_r <= 0; + end else begin + bank_stalls_r <= bank_stalls_r + 64'($countones(core_req_sel_r & ~core_req_ready)); + end + end + + assign bank_stalls = bank_stalls_r; +`endif endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index d50b0e45..f44e007e 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -16,10 +16,10 @@ module VX_cache_core_rsp_merge #( input wire reset, // Per Bank WB - input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid, + input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid, + input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data, input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag, input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid, - input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data, output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready, // Core Response diff --git a/hw/rtl/cache/VX_data_access.v b/hw/rtl/cache/VX_data_access.v index e5f5f7a5..432ca72a 100644 --- a/hw/rtl/cache/VX_data_access.v +++ b/hw/rtl/cache/VX_data_access.v @@ -39,7 +39,7 @@ module VX_data_access #( // writing input wire writeen, input wire is_fill, - input wire [`UP(`WORD_SELECT_BITS)-1:0] wsel, + input wire [`WORD_SELECT_BITS-1:0] wsel, input wire [WORD_SIZE-1:0] byteen, input wire [`CACHE_LINE_WIDTH-1:0] wrdata ); diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index ebdecf5c..4f142d08 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -4,14 +4,15 @@ module VX_miss_resrv #( parameter CACHE_ID = 0, parameter BANK_ID = 0, + // Number of Word requests per cycle + parameter NUM_REQS = 1, + // Size of line inside a bank in bytes parameter CACHE_LINE_SIZE = 1, // Number of banks - parameter NUM_BANKS = 1, + parameter NUM_BANKS = 1, // Size of a word in bytes parameter WORD_SIZE = 1, - // Number of Word requests per cycle - parameter NUM_REQS = 1, // Miss Reserv Queue Knob parameter MSHR_SIZE = 1, parameter ALM_FULL = (MSHR_SIZE-1), diff --git a/hw/rtl/cache/VX_shared_mem.v b/hw/rtl/cache/VX_shared_mem.v index b79350f5..3cb5843f 100644 --- a/hw/rtl/cache/VX_shared_mem.v +++ b/hw/rtl/cache/VX_shared_mem.v @@ -38,8 +38,8 @@ module VX_shared_mem #( // Core request input wire [NUM_REQS-1:0] core_req_valid, input wire [NUM_REQS-1:0] core_req_rw, - input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, + input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data, input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, output wire [NUM_REQS-1:0] core_req_ready, @@ -63,12 +63,12 @@ module VX_shared_mem #( `endif wire [NUM_BANKS-1:0] per_bank_core_req_valid_unqual; - wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual; wire [NUM_BANKS-1:0] per_bank_core_req_rw_unqual; + wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_unqual; wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen_unqual; - wire [NUM_BANKS-1:0][`WORD_ADDR_WIDTH-1:0] per_bank_core_req_addr_unqual; wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data_unqual; wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag_unqual; + wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid_unqual; wire [NUM_BANKS-1:0] per_bank_core_req_ready_unqual; VX_cache_core_req_bank_sel #( @@ -77,28 +77,26 @@ module VX_shared_mem #( .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .BANK_ADDR_OFFSET(BANK_ADDR_OFFSET), - .BUFFERED (0) + .BANK_ADDR_OFFSET(BANK_ADDR_OFFSET) ) core_req_bank_sel ( .clk (clk), .reset (reset), `ifdef PERF_ENABLE .bank_stalls(perf_cache_if.bank_stalls), - `else - `UNUSED_PIN (bank_stalls), `endif .core_req_valid (core_req_valid), - .core_req_rw (core_req_rw), - .core_req_byteen(core_req_byteen), + .core_req_rw (core_req_rw), .core_req_addr (core_req_addr), + .core_req_byteen(core_req_byteen), .core_req_data (core_req_data), .core_req_tag (core_req_tag), .core_req_ready (core_req_ready), - .per_bank_core_req_valid (per_bank_core_req_valid_unqual), + .per_bank_core_req_valid (per_bank_core_req_valid_unqual), .per_bank_core_req_tid (per_bank_core_req_tid_unqual), .per_bank_core_req_rw (per_bank_core_req_rw_unqual), - .per_bank_core_req_byteen(per_bank_core_req_byteen_unqual), .per_bank_core_req_addr (per_bank_core_req_addr_unqual), + `UNUSED_PIN (per_bank_core_req_wsel), + .per_bank_core_req_byteen(per_bank_core_req_byteen_unqual), .per_bank_core_req_tag (per_bank_core_req_tag_unqual), .per_bank_core_req_data (per_bank_core_req_data_unqual), .per_bank_core_req_ready (per_bank_core_req_ready_unqual) @@ -108,12 +106,12 @@ module VX_shared_mem #( `UNUSED_VAR (per_bank_core_req_rw_unqual) wire [NUM_BANKS-1:0] per_bank_core_req_valid; - wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; - wire [NUM_REQS-1:0] per_bank_core_req_rw; + wire [NUM_BANKS-1:0] per_bank_core_req_rw; + wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr; wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen; - wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr; wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data; wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag; + wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; wire creq_push, creq_pop, creq_empty, creq_full; wire crsq_full; @@ -121,18 +119,16 @@ module VX_shared_mem #( assign creq_push = (| core_req_valid) && !creq_full; assign creq_pop = ~creq_empty && ~crsq_full; - assign per_bank_core_req_ready_unqual = {NUM_BANKS{~creq_full}}; + assign per_bank_core_req_ready_unqual = {NUM_BANKS{~creq_full}}; wire [NUM_REQS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual; `UNUSED_VAR (per_bank_core_req_addr_unqual) - for (genvar i = 0; i < NUM_REQS; i++) begin - wire [`LINE_ADDR_WIDTH-1:0] tmp = `LINE_SELECT_ADDRX(per_bank_core_req_addr_unqual[i]); - assign per_bank_core_req_addr_qual[i] = tmp[`LINE_SELECT_BITS-1:0]; - `UNUSED_VAR (tmp) + for (genvar i = 0; i < NUM_REQS; i++) begin + assign per_bank_core_req_addr_qual[i] = per_bank_core_req_addr_unqual[i][`LINE_SELECT_BITS-1:0]; end VX_fifo_queue #( - .DATAW (NUM_BANKS * (1 + `REQS_BITS + 1 + WORD_SIZE + `LINE_SELECT_BITS + `WORD_WIDTH + CORE_TAG_WIDTH)), + .DATAW (NUM_BANKS * (1 + 1 + `LINE_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS)), .SIZE (CREQ_SIZE), .BUFFERED (1) ) core_req_queue ( @@ -140,20 +136,20 @@ module VX_shared_mem #( .reset (reset), .push (creq_push), .pop (creq_pop), - .data_in ({per_bank_core_req_valid_unqual, - per_bank_core_req_tid_unqual, + .data_in ({per_bank_core_req_valid_unqual, per_bank_core_req_rw_unqual, - per_bank_core_req_byteen_unqual, per_bank_core_req_addr_qual, + per_bank_core_req_byteen_unqual, per_bank_core_req_data_unqual, - per_bank_core_req_tag_unqual}), - .data_out({per_bank_core_req_valid, - per_bank_core_req_tid, + per_bank_core_req_tag_unqual, + per_bank_core_req_tid_unqual}), + .data_out({per_bank_core_req_valid, per_bank_core_req_rw, - per_bank_core_req_byteen, per_bank_core_req_addr, + per_bank_core_req_byteen, per_bank_core_req_data, - per_bank_core_req_tag}), + per_bank_core_req_tag, + per_bank_core_req_tid}), .empty (creq_empty), .full (creq_full), `UNUSED_PIN (alm_empty), @@ -248,13 +244,41 @@ module VX_shared_mem #( `endif `ifdef PERF_ENABLE - assign perf_cache_if.reads = '0; - assign perf_cache_if.writes = '0; + // per cycle: core_reads, core_writes + reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle; + reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle; + + assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw); + assign perf_core_writes_per_cycle = $countones(core_req_valid & core_req_ready & core_req_rw); + + if (CORE_TAG_ID_BITS != 0) begin + assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & {NUM_REQS{!core_rsp_ready}}); + end else begin + assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready); + end + + reg [63:0] perf_core_reads; + reg [63:0] perf_core_writes; + reg [63:0] perf_crsp_stalls; + + always @(posedge clk) begin + if (reset) begin + perf_core_reads <= 0; + perf_core_writes <= 0; + perf_crsp_stalls <= 0; + end else begin + perf_core_reads <= perf_core_reads + 64'(perf_core_reads_per_cycle); + perf_core_writes <= perf_core_writes + 64'(perf_core_writes_per_cycle); + perf_crsp_stalls <= perf_crsp_stalls + 64'(perf_crsp_stall_per_cycle); + end + end + + assign perf_cache_if.reads = perf_core_reads; + assign perf_cache_if.writes = perf_core_writes; assign perf_cache_if.read_misses = '0; assign perf_cache_if.write_misses = '0; - assign perf_cache_if.mshr_stalls = '0; assign perf_cache_if.pipe_stalls = '0; - assign perf_cache_if.crsp_stalls = '0; + assign perf_cache_if.crsp_stalls = perf_crsp_stalls; `endif endmodule