fixed OPAE crash, added custom bram module to controll rw collision, dogfood testcase argurment, optimzed buffered fifo, quartus build optimization flags
This commit is contained in:
@@ -7,7 +7,7 @@ CXXFLAGS += -I../include -I$(OPAE_HOME)/include -I../../hw
|
||||
|
||||
LDFLAGS += -L$(OPAE_HOME)/lib
|
||||
|
||||
SCOPE=1
|
||||
#SCOPE=1
|
||||
|
||||
# stack execution protection
|
||||
LDFLAGS +=-z noexecstack
|
||||
@@ -32,8 +32,6 @@ ASE_LIBS += -luuid -lopae-c-ase
|
||||
|
||||
VLSIM_LIBS += -lopae-c-vlsim
|
||||
|
||||
LIB_DIR=../lib
|
||||
|
||||
ASE_DIR = ase
|
||||
|
||||
VLSIM_DIR = vlsim
|
||||
@@ -67,10 +65,10 @@ fpga: $(SRCS)
|
||||
asesim: $(SRCS) $(ASE_DIR)
|
||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
|
||||
|
||||
vlsim: $(SRCS) opae-vlsim
|
||||
$(CXX) $(CXXFLAGS) -L./vlsim -DUSE_VLSIM $(SRCS) $(LDFLAGS) $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
|
||||
vlsim: $(SRCS) vlsim-hw
|
||||
$(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -L./vlsim $(VLSIM_LIBS) -o $(PROJECT_VLSIM)
|
||||
|
||||
opae-vlsim:
|
||||
vlsim-hw:
|
||||
$(SET_SCOPE) $(MAKE) -C vlsim
|
||||
|
||||
vortex.o: vortex.cpp
|
||||
|
||||
@@ -15,8 +15,8 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||
DBG_PRINT_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||
|
||||
DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
#DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
||||
#DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
@@ -72,9 +72,13 @@ ifdef SCOPE
|
||||
SCOPE_VH = $(RTL_DIR)/scope-defs.vh
|
||||
endif
|
||||
|
||||
# use our OPAE shim
|
||||
VL_FLAGS += -DNOPAE
|
||||
CFLAGS += -DNOPAE
|
||||
|
||||
# use DPI FPU
|
||||
VL_FLAGS += -DFPU_FAST
|
||||
|
||||
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
||||
|
||||
PROJECT = libopae-c-vlsim.so
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
fpga_result res = _expr; \
|
||||
if (res == FPGA_OK) \
|
||||
break; \
|
||||
printf("OPAE Error: '%s' returned %d, %s!\n", \
|
||||
printf("[VXDRV] Error: '%s' returned %d, %s!\n", \
|
||||
#_expr, (int)res, fpgaErrStr(res)); \
|
||||
return -1; \
|
||||
} while (false)
|
||||
@@ -118,7 +118,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
*value = STARTUP_ADDR;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "invalid caps id: %d\n", caps_id);
|
||||
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
|
||||
std::abort();
|
||||
return -1;
|
||||
}
|
||||
@@ -156,7 +156,7 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
fpgaDestroyProperties(&filter);
|
||||
|
||||
if (num_matches < 1) {
|
||||
fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID);
|
||||
fprintf(stderr, "[VXDRV] Error: accelerator %s not found!\n", AFU_ACCEL_UUID);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -197,9 +197,10 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
||||
fpgaClose(accel_handle);
|
||||
return ret;
|
||||
}
|
||||
|
||||
fprintf(stdout, "DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n",
|
||||
#ifndef NDEBUG
|
||||
fprintf(stdout, "[VXDRV] DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n",
|
||||
device->implementation_id, device->num_cores, device->num_warps, device->num_threads);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef SCOPE
|
||||
@@ -236,18 +237,18 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
||||
int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles);
|
||||
assert(ret == 0);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||
fprintf(stdout, "[VXDRV] PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC);
|
||||
total_instrs += instrs;
|
||||
total_cycles = std::max<uint64_t>(total_cycles, cycles);
|
||||
}
|
||||
float IPC = (float)(double(total_instrs) / double(total_cycles));
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC);
|
||||
} else {
|
||||
uint64_t instrs, cycles;
|
||||
int ret = vx_get_perf(hdevice, 0, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
assert(ret == 0);
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
fprintf(stdout, "[VXDRV] PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -373,7 +374,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_STATUS, &data));
|
||||
if (0 == data || 0 == timeout) {
|
||||
if (data != 0) {
|
||||
fprintf(stdout, "ready-wait timed out: status=%ld\n", data);
|
||||
fprintf(stdout, "[VXDRV] ready-wait timed out: status=%ld\n", data);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -90,16 +90,20 @@ vx_buffer_h dst_buf = nullptr;
|
||||
|
||||
static void show_usage() {
|
||||
std::cout << "Vortex Driver Test." << std::endl;
|
||||
std::cout << "Usage: [-s:testid] [-e:testid] [-k: kernel] [-n words] [-c] [-h: help]" << std::endl;
|
||||
std::cout << "Usage: [-t:testid] [-s:testid] [-e:testid] [-k: kernel] [-n words] [-c] [-h: help]" << std::endl;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:s:e:k:ch?")) != -1) {
|
||||
while ((c = getopt(argc, argv, "n:t:s:e:k:ch?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
count = atoi(optarg);
|
||||
break;
|
||||
case 't':
|
||||
testid_s = atoi(optarg);
|
||||
testid_e = atoi(optarg);
|
||||
break;
|
||||
case 's':
|
||||
testid_s = atoi(optarg);
|
||||
break;
|
||||
|
||||
@@ -60,9 +60,9 @@ qsub-sim
|
||||
make ase
|
||||
|
||||
# tests
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -t1 -n1
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n16
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n1 -s4 -e4
|
||||
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n16
|
||||
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
||||
|
||||
# modify "vsim_run.tcl" to dump VCD trace
|
||||
@@ -97,7 +97,7 @@ kill -9 <pid>
|
||||
# fixing device resource busy issue when deleting /build_ase_1c/
|
||||
lsof +D build_ase_1c
|
||||
|
||||
# quick off cache synthesis
|
||||
# quick off synthesis
|
||||
make -C pipeline clean && make -C pipeline > pipeline/build.log 2>&1 &
|
||||
make -C cache clean && make -C cache > cache/build.log 2>&1 &
|
||||
make -C core clean && make -C core > core/build.log 2>&1 &
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
+define+FPU_FAST
|
||||
+define+SCOPE
|
||||
#+define+SCOPE
|
||||
|
||||
#+define+DBG_PRINT_CORE_ICACHE
|
||||
#+define+DBG_PRINT_CORE_DCACHE
|
||||
|
||||
@@ -7,3 +7,20 @@ set_global_assignment -name VERILOG_MACRO SYNTHESIS
|
||||
set_global_assignment -name VERILOG_MACRO NDEBUG
|
||||
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||
set_global_assignment -name VERILOG_MACRO FPU_FAST
|
||||
|
||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
|
||||
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
|
||||
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
set_global_assignment -name POWER_USE_TA_VALUE 65
|
||||
set_global_assignment -name SEED 1
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
@@ -175,8 +175,9 @@ logic [31:0] cmd_csr_wdata;
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
t_ccip_c0_ReqMmioHdr mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
|
||||
t_ccip_c0_ReqMmioHdr mmio_hdr;
|
||||
`IGNORE_WARNINGS_END
|
||||
assign mmio_hdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
|
||||
|
||||
`STATIC_ASSERT(($bits(t_ccip_c0_ReqMmioHdr)-$bits(mmio_hdr.address)) == 12, ("Oops!"))
|
||||
|
||||
@@ -204,9 +205,20 @@ wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hd
|
||||
reg scope_start;
|
||||
`endif
|
||||
|
||||
// disable assertions until reset
|
||||
`ifndef VERILATOR
|
||||
initial begin
|
||||
$assertoff;
|
||||
end
|
||||
`endif
|
||||
|
||||
always_ff @(posedge clk)
|
||||
begin
|
||||
if (reset) begin
|
||||
`ifndef VERILATOR
|
||||
$asserton; // enable assertions
|
||||
`endif
|
||||
|
||||
mmio_tx.hdr <= 0;
|
||||
mmio_tx.data <= 0;
|
||||
mmio_tx.mmioRdValid <= 0;
|
||||
@@ -324,6 +336,7 @@ begin
|
||||
end
|
||||
`endif
|
||||
default: begin
|
||||
mmio_tx.data <= 64'h0;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: Unknown MMIO Rd: addr=%0h", $time, mmio_hdr.address);
|
||||
`endif
|
||||
|
||||
@@ -59,8 +59,6 @@
|
||||
`define EXT_F_ENABLE
|
||||
`endif
|
||||
|
||||
//`define FPU_FAST
|
||||
|
||||
// Device identification
|
||||
`define VENDOR_ID 0
|
||||
`define ARCHITECTURE_ID 0
|
||||
|
||||
@@ -347,7 +347,7 @@ module VX_decode #(
|
||||
assign decode_if.rd = rd;
|
||||
assign decode_if.rs1 = rs1_qual;
|
||||
assign decode_if.rs2 = rs2;
|
||||
assign decode_if.rs3 = rs3;
|
||||
assign decode_if.rs3 = 0;
|
||||
`endif
|
||||
|
||||
assign decode_if.use_rs3 = use_rs3;
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
// control module to support multi-cycle read for fp register
|
||||
|
||||
module VX_gpr_fp_ctrl (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs1_data,
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
|
||||
VX_gpr_req_if gpr_req_if,
|
||||
|
||||
// outputs
|
||||
output wire [`NW_BITS+`NR_BITS-1:0] raddr1,
|
||||
VX_gpr_rsp_if gpr_rsp_if
|
||||
);
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data, rsp_rs3_data;
|
||||
reg rsp_valid;
|
||||
reg [31:0] rsp_pc;
|
||||
reg [`NW_BITS-1:0] rsp_wid;
|
||||
reg read_rs1;
|
||||
|
||||
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && read_rs1;
|
||||
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rsp_valid <= 0;
|
||||
rsp_pc <= 0;
|
||||
rsp_rs1_data <= 0;
|
||||
rsp_rs2_data <= 0;
|
||||
rsp_rs3_data <= 0;
|
||||
rsp_wid <= 0;
|
||||
read_rs1 <= 1;
|
||||
end else begin
|
||||
if (rs3_delay) begin
|
||||
read_rs1 <= 0;
|
||||
rsp_wid <= gpr_req_if.wid;
|
||||
end else if (read_fire) begin
|
||||
read_rs1 <= 1;
|
||||
end
|
||||
|
||||
rsp_valid <= gpr_req_if.valid;
|
||||
rsp_wid <= gpr_req_if.wid;
|
||||
rsp_pc <= gpr_req_if.PC;
|
||||
|
||||
if (read_rs1) begin
|
||||
rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data;
|
||||
end
|
||||
rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data;
|
||||
rsp_rs3_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data;
|
||||
|
||||
assert(read_rs1 || rsp_wid == gpr_req_if.wid);
|
||||
end
|
||||
end
|
||||
|
||||
// outputs
|
||||
wire [`NR_BITS-1:0] rs1 = read_rs1 ? gpr_req_if.rs1 : gpr_req_if.rs3;
|
||||
assign raddr1 = {gpr_req_if.wid, rs1};
|
||||
assign gpr_req_if.ready = ~rs3_delay;
|
||||
|
||||
assign gpr_rsp_if.valid = rsp_valid;
|
||||
assign gpr_rsp_if.wid = rsp_wid;
|
||||
assign gpr_rsp_if.PC = rsp_pc;
|
||||
assign gpr_rsp_if.rs1_data = rsp_rs1_data;
|
||||
assign gpr_rsp_if.rs2_data = rsp_rs2_data;
|
||||
assign gpr_rsp_if.rs3_data = rsp_rs3_data;
|
||||
|
||||
endmodule
|
||||
@@ -12,21 +12,24 @@ module VX_gpr_ram (
|
||||
);
|
||||
`ifndef ASIC
|
||||
|
||||
reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_THREADS-1:0][31:0] q1, q2;
|
||||
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (we[i]) begin
|
||||
ram[waddr][i][0] <= wdata[i][07:00];
|
||||
ram[waddr][i][1] <= wdata[i][15:08];
|
||||
ram[waddr][i][2] <= wdata[i][23:16];
|
||||
ram[waddr][i][3] <= wdata[i][31:24];
|
||||
mem[waddr][i][0] <= wdata[i][07:00];
|
||||
mem[waddr][i][1] <= wdata[i][15:08];
|
||||
mem[waddr][i][2] <= wdata[i][23:16];
|
||||
mem[waddr][i][3] <= wdata[i][31:24];
|
||||
end
|
||||
end
|
||||
q1 <= mem[rs1];
|
||||
q2 <= mem[rs2];
|
||||
end
|
||||
|
||||
assign rs1_data = ram[rs1];
|
||||
assign rs2_data = ram[rs2];
|
||||
assign rs1_data = q1;
|
||||
assign rs2_data = q2;
|
||||
|
||||
`else
|
||||
|
||||
|
||||
@@ -15,8 +15,15 @@ module VX_gpr_stage #(
|
||||
);
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg rsp_valid;
|
||||
reg [`NW_BITS-1:0] rsp_wid;
|
||||
reg [31:0] rsp_pc;
|
||||
reg rs1_is_zero, rs2_is_zero;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data;
|
||||
wire [`NW_BITS+`NR_BITS-1:0] raddr1;
|
||||
wire [`NW_BITS+`NR_BITS-1:0] raddr1, raddr2;
|
||||
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
|
||||
VX_gpr_ram gpr_ram (
|
||||
.clk (clk),
|
||||
@@ -24,60 +31,77 @@ module VX_gpr_stage #(
|
||||
.waddr ({writeback_if.wid, writeback_if.rd}),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (raddr1),
|
||||
.rs2 ({gpr_req_if.wid, gpr_req_if.rs2}),
|
||||
.rs2 (raddr2),
|
||||
.rs1_data (rs1_data),
|
||||
.rs2_data (rs2_data)
|
||||
);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.rs1_data (rs1_data),
|
||||
.rs2_data (rs2_data),
|
||||
.raddr1 (raddr1),
|
||||
.gpr_req_if (gpr_req_if),
|
||||
.gpr_rsp_if (gpr_rsp_if)
|
||||
);
|
||||
`else
|
||||
reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data;
|
||||
reg rsp_valid;
|
||||
reg [`NW_BITS-1:0] rsp_wid;
|
||||
reg [31:0] rsp_pc;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rsp_valid <= 0;
|
||||
rsp_wid <= 0;
|
||||
rsp_pc <= 0;
|
||||
rsp_rs1_data <= 0;
|
||||
rsp_rs2_data <= 0;
|
||||
rsp_valid <= 0;
|
||||
rsp_wid <= 0;
|
||||
rsp_pc <= 0;
|
||||
rs1_is_zero <= 0;
|
||||
rs2_is_zero <= 0;
|
||||
end else begin
|
||||
rsp_valid <= gpr_req_if.valid;
|
||||
rsp_wid <= gpr_req_if.wid;
|
||||
rsp_pc <= gpr_req_if.PC;
|
||||
rsp_rs1_data <= (gpr_req_if.rs1 == 0) ? (`NUM_THREADS*32)'(0) : rs1_data;
|
||||
rsp_rs2_data <= (gpr_req_if.rs2 == 0) ? (`NUM_THREADS*32)'(0) : rs2_data;
|
||||
rsp_valid <= gpr_req_if.valid;
|
||||
rsp_wid <= gpr_req_if.wid;
|
||||
rsp_pc <= gpr_req_if.PC;
|
||||
rs1_is_zero <= (0 == gpr_req_if.rs1);
|
||||
rs2_is_zero <= (0 == gpr_req_if.rs2);
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
reg read_rs3, save_rs3;
|
||||
|
||||
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && !read_rs3;
|
||||
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rs3_data <= 0;
|
||||
read_rs3 <= 0;
|
||||
end else begin
|
||||
if (rs3_delay) begin
|
||||
read_rs3 <= 1;
|
||||
save_rs3 <= 1;
|
||||
end else if (read_fire) begin
|
||||
read_rs3 <= 0;
|
||||
end
|
||||
if (save_rs3) begin
|
||||
rs3_data <= rs1_data;
|
||||
save_rs3 <= 0;
|
||||
end
|
||||
assert(!read_rs3 || rsp_wid == gpr_req_if.wid);
|
||||
end
|
||||
end
|
||||
|
||||
assign raddr1 = {gpr_req_if.wid, (rs3_delay ? gpr_req_if.rs3 : gpr_req_if.rs1)};
|
||||
assign gpr_req_if.ready = ~rs3_delay;
|
||||
assign gpr_rsp_if.rs3_data = rs3_data;
|
||||
|
||||
`else
|
||||
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
|
||||
assign gpr_req_if.ready = 1;
|
||||
|
||||
assign gpr_rsp_if.valid = rsp_valid;
|
||||
assign gpr_rsp_if.wid = rsp_wid;
|
||||
assign gpr_rsp_if.PC = rsp_pc;
|
||||
assign gpr_rsp_if.rs1_data = rsp_rs1_data;
|
||||
assign gpr_rsp_if.rs2_data = rsp_rs2_data;
|
||||
assign gpr_rsp_if.rs3_data = 0;
|
||||
|
||||
`UNUSED_VAR (gpr_req_if.valid);
|
||||
`UNUSED_VAR (gpr_req_if.rs3);
|
||||
`UNUSED_VAR (gpr_req_if.use_rs3);
|
||||
`UNUSED_VAR (gpr_rsp_if.ready);
|
||||
|
||||
`endif
|
||||
|
||||
assign gpr_rsp_if.rs1_data = rs1_is_zero ? (`NUM_THREADS*32)'(0) : rs1_data;
|
||||
assign gpr_rsp_if.rs2_data = rs2_is_zero ? (`NUM_THREADS*32)'(0) : rs2_data;
|
||||
assign gpr_rsp_if.valid = rsp_valid;
|
||||
assign gpr_rsp_if.wid = rsp_wid;
|
||||
assign gpr_rsp_if.PC = rsp_pc;
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
||||
@@ -20,13 +20,13 @@ module VX_ibuffer #(
|
||||
localparam ADDRW = $clog2(SIZE);
|
||||
localparam NWARPSW = $clog2(`NUM_WARPS+1);
|
||||
|
||||
reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NUM_WARPS-1:0] q_full;
|
||||
wire [`NUM_WARPS-1:0][SIZEW-1:0] q_size;
|
||||
wire [DATAW-1:0] q_data_in;
|
||||
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
|
||||
|
||||
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
|
||||
reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0];
|
||||
|
||||
wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready;
|
||||
wire deq_fire = ibuf_deq_if.valid && ibuf_deq_if.ready;
|
||||
@@ -36,7 +36,7 @@ module VX_ibuffer #(
|
||||
wire writing = enq_fire && (i == ibuf_enq_if.wid);
|
||||
wire reading = deq_fire && (i == ibuf_deq_if.wid);
|
||||
|
||||
wire is_slot0 = ((0 == size_r[i]) || ((1 == size_r[i]) && reading));
|
||||
wire is_slot0 = (0 == size_r[i]) || ((1 == size_r[i]) && reading);
|
||||
|
||||
wire push = writing && !is_slot0;
|
||||
wire pop = reading && (size_r[i] != 1);
|
||||
@@ -48,32 +48,33 @@ module VX_ibuffer #(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (push),
|
||||
.data_in (q_data_in),
|
||||
.pop (pop),
|
||||
.data_in (q_data_in),
|
||||
.data_out (q_data_prev[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (writing && is_slot0) begin
|
||||
q_data_out[i] <= q_data_in;
|
||||
end
|
||||
if (pop) begin
|
||||
q_data_out[i] <= q_data_prev[i];
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
size_r[i] <= 0;
|
||||
end else begin
|
||||
if (writing && !reading) begin
|
||||
size_r[i] <= size_r[i] + SIZEW'(1);
|
||||
if (writing) begin
|
||||
if (is_slot0) begin
|
||||
q_data_out[i] <= q_data_in;
|
||||
end
|
||||
if (!reading) begin
|
||||
size_r[i] <= size_r[i] + SIZEW'(1);
|
||||
end
|
||||
end
|
||||
if (reading && !writing) begin
|
||||
size_r[i] <= size_r[i] - SIZEW'(1);
|
||||
if (reading) begin
|
||||
if (size_r[i] != 1) begin
|
||||
q_data_out[i] <= q_data_prev[i];
|
||||
end
|
||||
if (!writing) begin
|
||||
size_r[i] <= size_r[i] - SIZEW'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -29,7 +29,7 @@ module VX_icache_stage #(
|
||||
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (icache_req_fire) begin
|
||||
if (icache_req_fire) begin
|
||||
rsp_PC_buf[req_tag] <= ifetch_req_if.PC;
|
||||
rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask;
|
||||
end
|
||||
|
||||
@@ -41,9 +41,9 @@
|
||||
|
||||
`define STRINGIFY(x) `"x`"
|
||||
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
if (!(cond)) $error msg; \
|
||||
`define STATIC_ASSERT(cond, msg) \
|
||||
generate \
|
||||
if (!(cond)) $error msg; \
|
||||
endgenerate
|
||||
|
||||
`define ENABLE_TRACING /* verilator tracing_on */
|
||||
@@ -51,8 +51,8 @@
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define USE_FAST_BRAM (* syn_ramstyle = "mlab" *)
|
||||
`define RELAXED_RW_BRAM (* syn_ramstyle = "no_rw_check" *)
|
||||
`define USE_FAST_BRAM (* ramstyle="mlab" *)
|
||||
`define NO_RW_RAM_CHECK (* ramstyle="no_rw_check" *)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
2
hw/rtl/cache/VX_bank.v
vendored
2
hw/rtl/cache/VX_bank.v
vendored
@@ -447,6 +447,8 @@ module VX_bank #(
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
|
||||
end else begin
|
||||
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0;
|
||||
end
|
||||
`endif
|
||||
|
||||
|
||||
1
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
1
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
@@ -58,6 +58,7 @@ module VX_cache_miss_resrv #(
|
||||
);
|
||||
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
|
||||
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
|
||||
|
||||
reg [MRVQ_SIZE-1:0] valid_table;
|
||||
reg [MRVQ_SIZE-1:0] ready_table;
|
||||
reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr;
|
||||
|
||||
29
hw/rtl/cache/VX_tag_data_store.v
vendored
29
hw/rtl/cache/VX_tag_data_store.v
vendored
@@ -30,7 +30,6 @@ module VX_tag_data_store #(
|
||||
input wire fill_sent
|
||||
);
|
||||
|
||||
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0][7:0] data [`BANK_LINE_COUNT-1:0];
|
||||
reg [`TAG_SELECT_BITS-1:0] tag [`BANK_LINE_COUNT-1:0];
|
||||
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0];
|
||||
reg [`BANK_LINE_COUNT-1:0] dirty;
|
||||
@@ -40,7 +39,6 @@ module VX_tag_data_store #(
|
||||
assign read_dirty = dirty [read_addr];
|
||||
assign read_dirtyb = dirtyb [read_addr];
|
||||
assign read_tag = tag [read_addr];
|
||||
assign read_data = data [read_addr];
|
||||
|
||||
wire do_write = (| write_enable);
|
||||
|
||||
@@ -69,15 +67,26 @@ module VX_tag_data_store #(
|
||||
if (invalidate) begin
|
||||
valid[write_addr] <= 0;
|
||||
end
|
||||
|
||||
for (integer j = 0; j < `BANK_LINE_WORDS; j++) begin
|
||||
for (integer i = 0; i < WORD_SIZE; i++) begin
|
||||
if (write_enable[j][i]) begin
|
||||
data[write_addr][j][i] <= write_data[j * `WORD_WIDTH + i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [(`BANK_LINE_WORDS * WORD_SIZE)-1:0] ram_wren;
|
||||
assign ram_wren = write_enable & {(`BANK_LINE_WORDS * WORD_SIZE){!stall_bank_pipe}};
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(`BANK_LINE_WORDS * WORD_SIZE * 8),
|
||||
.SIZE(`BANK_LINE_COUNT),
|
||||
.BYTEENW(`BANK_LINE_WORDS * WORD_SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(1)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(write_addr),
|
||||
.raddr(read_addr),
|
||||
.wren(ram_wren),
|
||||
.rden(1'b1),
|
||||
.din(write_data),
|
||||
.dout(read_data)
|
||||
);
|
||||
|
||||
endmodule
|
||||
117
hw/rtl/libs/VX_dp_ram.v
Normal file
117
hw/rtl/libs/VX_dp_ram.v
Normal file
@@ -0,0 +1,117 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_dp_ram #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter BYTEENW = 1,
|
||||
parameter BUFFERED = 1,
|
||||
parameter RWCHECK = 1,
|
||||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter SIZEW = $clog2(SIZE+1)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [ADDRW-1:0] raddr,
|
||||
input wire [BYTEENW-1:0] wren,
|
||||
input wire rden,
|
||||
input wire [DATAW-1:0] din,
|
||||
output wire [DATAW-1:0] dout
|
||||
);
|
||||
|
||||
if (BUFFERED) begin
|
||||
|
||||
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
reg [DATAW-1:0] dout_r;
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
|
||||
end
|
||||
if (rden)
|
||||
dout_r <= mem[raddr];
|
||||
end
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[waddr] <= din;
|
||||
if (rden)
|
||||
dout_r <= mem[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
assign dout = dout_r;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR(rden)
|
||||
|
||||
if (RWCHECK) begin
|
||||
|
||||
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[waddr] <= din;
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
reg [DATAW-1:0] din_r;
|
||||
wire writing;
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
assign writing = (| wren);
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
assign writing = wren;
|
||||
always @(posedge clk) begin
|
||||
din_r <= din;
|
||||
end
|
||||
end
|
||||
|
||||
reg bypass_r;
|
||||
always @(posedge clk) begin
|
||||
bypass_r <= writing && (raddr == waddr);
|
||||
end
|
||||
|
||||
assign dout = bypass_r ? din_r : mem[raddr];
|
||||
`else
|
||||
assign dout = mem[raddr];
|
||||
`endif
|
||||
|
||||
end else begin
|
||||
|
||||
reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < BYTEENW; i++) begin
|
||||
if (wren[i])
|
||||
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
mem[waddr] <= din;
|
||||
end
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -19,15 +19,10 @@ module VX_generic_queue #(
|
||||
);
|
||||
`STATIC_ASSERT(`ISPOW2(SIZE), ("must be 0 or power of 2!"))
|
||||
|
||||
always @(*) begin
|
||||
assert(!pop || !empty);
|
||||
assert(!push || !full);
|
||||
end
|
||||
if (SIZE == 1) begin
|
||||
|
||||
if (SIZE == 1) begin // (SIZE == 1)
|
||||
|
||||
reg [SIZEW-1:0] size_r;
|
||||
reg [DATAW-1:0] head_r;
|
||||
reg size_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
@@ -35,8 +30,10 @@ module VX_generic_queue #(
|
||||
size_r <= 0;
|
||||
end else begin
|
||||
if (push && !pop) begin
|
||||
assert(!full);
|
||||
size_r <= 1;
|
||||
end else if (pop && !push) begin
|
||||
assert(!empty);
|
||||
size_r <= 0;
|
||||
end
|
||||
if (push) begin
|
||||
@@ -50,62 +47,13 @@ module VX_generic_queue #(
|
||||
assign full = (size_r != 0);
|
||||
assign size = size_r;
|
||||
|
||||
end else begin // (SIZE > 1)
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
scfifo scfifo_component (
|
||||
.clock (clk),
|
||||
.data (data_in),
|
||||
.rdreq (pop),
|
||||
.wrreq (push),
|
||||
.empty (empty),
|
||||
.full (full),
|
||||
.q (data_out),
|
||||
.sclr (reset),
|
||||
.usedw (),
|
||||
.aclr (),
|
||||
.almost_empty (),
|
||||
.almost_full (),
|
||||
.eccstatus ()
|
||||
);
|
||||
|
||||
defparam
|
||||
scfifo_component.lpm_type = "scfifo",
|
||||
scfifo_component.intended_device_family = "Arria 10",
|
||||
scfifo_component.lpm_numwords = SIZE,
|
||||
scfifo_component.lpm_width = DATAW,
|
||||
scfifo_component.lpm_widthu = $clog2(SIZE),
|
||||
scfifo_component.lpm_showahead = "ON",
|
||||
scfifo_component.add_ram_output_register = (BUFFERED ? "ON" : "ON"),
|
||||
scfifo_component.use_eab = "ON";
|
||||
|
||||
reg [SIZEW-1:0] size_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
size_r <= 0;
|
||||
end else begin
|
||||
if (push && !pop) begin
|
||||
size_r <= size_r + SIZEW'(1);
|
||||
end
|
||||
if (pop && !push) begin
|
||||
size_r <= size_r - SIZEW'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign size = size_r;
|
||||
|
||||
`else
|
||||
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0];
|
||||
end else begin
|
||||
|
||||
if (0 == BUFFERED) begin
|
||||
|
||||
reg [SIZEW-1:0] size_r;
|
||||
reg [ADDRW:0] rd_ptr_r;
|
||||
reg [ADDRW:0] wr_ptr_r;
|
||||
reg [ADDRW-1:0] used_r;
|
||||
|
||||
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[ADDRW-1:0];
|
||||
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[ADDRW-1:0];
|
||||
@@ -114,111 +62,127 @@ module VX_generic_queue #(
|
||||
if (reset) begin
|
||||
rd_ptr_r <= 0;
|
||||
wr_ptr_r <= 0;
|
||||
size_r <= 0;
|
||||
used_r <= 0;
|
||||
end else begin
|
||||
if (push) begin
|
||||
assert(!full);
|
||||
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1);
|
||||
if (!pop) begin
|
||||
size_r <= size_r + SIZEW'(1);
|
||||
used_r <= used_r + ADDRW'(1);
|
||||
end
|
||||
end
|
||||
if (pop) begin
|
||||
assert(!empty);
|
||||
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1);
|
||||
if (!push) begin
|
||||
size_r <= size_r - SIZEW'(1);
|
||||
used_r <= used_r - ADDRW'(1);
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
data[wr_ptr_a] <= data_in;
|
||||
end
|
||||
end
|
||||
VX_dp_ram #(
|
||||
.DATAW(DATAW),
|
||||
.SIZE(SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(1)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_a),
|
||||
.raddr(rd_ptr_a),
|
||||
.wren(push),
|
||||
.rden(pop),
|
||||
.din(data_in),
|
||||
.dout(data_out)
|
||||
);
|
||||
|
||||
assign data_out = data[rd_ptr_a];
|
||||
assign empty = (wr_ptr_r == rd_ptr_r);
|
||||
assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]);
|
||||
assign size = size_r;
|
||||
assign empty = (wr_ptr_r == rd_ptr_r);
|
||||
assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]);
|
||||
assign size = {full, used_r};
|
||||
|
||||
end else begin
|
||||
|
||||
reg [SIZEW-1:0] size_r;
|
||||
reg [DATAW-1:0] head_r;
|
||||
reg [DATAW-1:0] curr_r;
|
||||
wire [DATAW-1:0] dout;
|
||||
|
||||
reg [DATAW-1:0] din_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] rd_ptr_next_r;
|
||||
reg [ADDRW-1:0] rd_ptr_n_r;
|
||||
reg [ADDRW-1:0] used_r;
|
||||
reg empty_r;
|
||||
reg full_r;
|
||||
reg bypass_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
size_r <= 0;
|
||||
curr_r <= 0;
|
||||
wr_ptr_r <= 0;
|
||||
rd_ptr_r <= 0;
|
||||
rd_ptr_next_r <= 1;
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
wr_ptr_r <= 0;
|
||||
rd_ptr_r <= 0;
|
||||
rd_ptr_n_r <= 1;
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
used_r <= 0;
|
||||
end else begin
|
||||
if (push) begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(1);
|
||||
|
||||
if (!pop) begin
|
||||
empty_r <= 0;
|
||||
if (size_r == SIZEW'(SIZE-1)) begin
|
||||
if (used_r == ADDRW'(SIZE-1)) begin
|
||||
full_r <= 1;
|
||||
end
|
||||
size_r <= size_r + SIZEW'(1);
|
||||
used_r <= used_r + ADDRW'(1);
|
||||
end
|
||||
end
|
||||
|
||||
if (pop) begin
|
||||
rd_ptr_r <= rd_ptr_next_r;
|
||||
rd_ptr_r <= rd_ptr_n_r;
|
||||
|
||||
if (SIZE > 2) begin
|
||||
rd_ptr_next_r <= rd_ptr_r + ADDRW'(2);
|
||||
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
|
||||
end else begin // (SIZE == 2);
|
||||
rd_ptr_next_r <= ~rd_ptr_next_r;
|
||||
rd_ptr_n_r <= ~rd_ptr_n_r;
|
||||
end
|
||||
|
||||
if (!push) begin
|
||||
if (size_r == SIZEW'(1)) begin
|
||||
assert(rd_ptr_next_r == wr_ptr_r);
|
||||
full_r <= 0;
|
||||
if (used_r == ADDRW'(1)) begin
|
||||
assert(rd_ptr_n_r == wr_ptr_r);
|
||||
empty_r <= 1;
|
||||
end;
|
||||
full_r <= 0;
|
||||
size_r <= size_r - SIZEW'(1);
|
||||
used_r <= used_r - ADDRW'(1);
|
||||
end
|
||||
end
|
||||
|
||||
bypass_r <= push && (empty_r || ((size_r == SIZEW'(1)) && pop));
|
||||
curr_r <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
head_r <= 0;
|
||||
end else begin
|
||||
if (push) begin
|
||||
data[wr_ptr_r] <= data_in;
|
||||
end
|
||||
head_r <= data[pop ? rd_ptr_next_r : rd_ptr_r];
|
||||
end
|
||||
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
|
||||
bypass_r <= 1;
|
||||
din_r <= data_in;
|
||||
end else if (pop)
|
||||
bypass_r <= 0;
|
||||
end
|
||||
|
||||
assign data_out = bypass_r ? curr_r : head_r;
|
||||
VX_dp_ram #(
|
||||
.DATAW(DATAW),
|
||||
.SIZE(SIZE),
|
||||
.BUFFERED(1),
|
||||
.RWCHECK(0)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_r),
|
||||
.raddr(rd_ptr_n_r),
|
||||
.wren(push),
|
||||
.rden(pop),
|
||||
.din(data_in),
|
||||
.dout(dout)
|
||||
);
|
||||
|
||||
assign data_out = bypass_r ? din_r : dout;
|
||||
assign empty = empty_r;
|
||||
assign full = full_r;
|
||||
assign size = size_r;
|
||||
assign size = {full_r, used_r};
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -43,17 +43,19 @@ set_global_assignment -name VERILOG_MACRO FPU_FAST
|
||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
set_global_assignment -name POWER_USE_TA_VALUE 65
|
||||
set_global_assignment -name SEED 1
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
|
||||
set idx 0
|
||||
foreach arg $q_args_orig {
|
||||
|
||||
Reference in New Issue
Block a user