Merge remote-tracking branch 'origin/main' into asplos-ae
This commit is contained in:
6
.gitmodules
vendored
6
.gitmodules
vendored
@@ -1,6 +1,8 @@
|
|||||||
[submodule "src/main/resources/vsrc/vortex"]
|
[submodule "src/main/resources/vsrc/vortex"]
|
||||||
path = src/main/resources/vsrc/vortex
|
path = src/main/resources/vsrc/vortex
|
||||||
url = https://github.com/hansungk/vortex.git
|
url = https://github.com/hansungk/vortex.git
|
||||||
[submodule "radpie"]
|
[submodule "cyclotron-main"]
|
||||||
path = radpie
|
path = cyclotron-main
|
||||||
|
url = https://github.com/hansungk/cyclotron-main.git
|
||||||
|
[submodule "cyclotron"]
|
||||||
url = https://github.com/hansungk/cyclotron.git
|
url = https://github.com/hansungk/cyclotron.git
|
||||||
|
|||||||
1
cyclotron
Submodule
1
cyclotron
Submodule
Submodule cyclotron added at 073584b083
1
cyclotron-main
Submodule
1
cyclotron-main
Submodule
Submodule cyclotron-main added at 06081eb052
39
radiance.mk
39
radiance.mk
@@ -3,15 +3,18 @@
|
|||||||
##############################################################
|
##############################################################
|
||||||
|
|
||||||
VORTEX_SRC_DIR = $(base_dir)/generators/radiance/src/main/resources/vsrc/vortex
|
VORTEX_SRC_DIR = $(base_dir)/generators/radiance/src/main/resources/vsrc/vortex
|
||||||
RADPIE_SRC_DIR = $(base_dir)/generators/radiance/radpie
|
CYCLOTRON_SRC_DIR = $(base_dir)/generators/radiance/cyclotron
|
||||||
RADPIE_BUILD_DIR = $(RADPIE_SRC_DIR)/target/release
|
CYCLOTRON_BUILD_DIR = $(CYCLOTRON_SRC_DIR)/target/debug
|
||||||
|
# CYCLOTRON_BUILD_DIR = $(CYCLOTRON_SRC_DIR)/target/release
|
||||||
|
RADIANCE_CSRC_DIR = $(base_dir)/generators/radiance/src/main/resources/csrc
|
||||||
|
RADIANCE_VSRC_DIR = $(base_dir)/generators/radiance/src/main/resources/vsrc
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
# THE FOLLOWING MUST BE += operators
|
# THE FOLLOWING MUST BE += operators
|
||||||
##################################################################
|
##################################################################
|
||||||
|
|
||||||
# EXTRA_SIM_REQS += radpie
|
EXTRA_SIM_REQS += cyclotron
|
||||||
# EXTRA_SIM_LDFLAGS += -L$(RADPIE_BUILD_DIR) -Wl,-rpath,$(RADPIE_BUILD_DIR) -lradpie
|
EXTRA_SIM_LDFLAGS += -L$(CYCLOTRON_BUILD_DIR) -Wl,-rpath,$(CYCLOTRON_BUILD_DIR) -lcyclotron
|
||||||
ifeq ($(shell echo $(CONFIG) | grep -E "SynConfig$$"),$(CONFIG))
|
ifeq ($(shell echo $(CONFIG) | grep -E "SynConfig$$"),$(CONFIG))
|
||||||
EXTRA_SIM_PREPROC_DEFINES += +define+SYNTHESIS +define+NDEBUG +define+DPI_DISABLE
|
EXTRA_SIM_PREPROC_DEFINES += +define+SYNTHESIS +define+NDEBUG +define+DPI_DISABLE
|
||||||
endif
|
endif
|
||||||
@@ -31,21 +34,25 @@ VCS_NONCC_OPTS += +vcs+initreg+random
|
|||||||
|
|
||||||
# cargo handles building of Rust files all on its own, so make this a PHONY
|
# cargo handles building of Rust files all on its own, so make this a PHONY
|
||||||
# target to run cargo unconditionally
|
# target to run cargo unconditionally
|
||||||
.PHONY: radpie
|
.PHONY: cyclotron
|
||||||
radpie:
|
cyclotron:
|
||||||
cd $(RADPIE_SRC_DIR) && cargo build --release
|
cd $(CYCLOTRON_SRC_DIR) && cargo build # --release
|
||||||
|
|
||||||
EXTRA_SIM_REQS += vortex_vsrc.$(CONFIG)
|
EXTRA_SIM_REQS += vortex_vsrc.$(CONFIG)
|
||||||
# below manipulation of VORTEX_VLOG_SOURCES doesn't work if we try to reuse
|
# below manipulation of RADIANCE_EXTERNAL_SRCS doesn't work if we try to reuse
|
||||||
# $(call lookup_srcs) from common.mk, the variable doesn't expand somehow
|
# $(call lookup_srcs) from common.mk, the variable doesn't expand somehow
|
||||||
ifeq ($(shell which fd 2> /dev/null),)
|
ifeq ($(shell which fdfd 2> /dev/null),)
|
||||||
VORTEX_VLOG_SOURCES := $(shell find -L $(VORTEX_SRC_DIR) -type f -iname "*.sv" -o -iname "*.vh" -o -iname "*.v")
|
# RADIANCE_EXTERNAL_SRCS := $(shell find -L $(VORTEX_SRC_DIR) -type f -iname "*.sv" -o -iname "*.vh" -o -iname "*.v")
|
||||||
|
RADIANCE_EXTERNAL_SRCS := $(shell find -L $(RADIANCE_VSRC_DIR) -type f -iname "*.sv" -o -iname "*.vh" -o -iname "*.v")
|
||||||
|
RADIANCE_EXTERNAL_SRCS += $(shell find -L $(RADIANCE_CSRC_DIR) -type f)
|
||||||
else
|
else
|
||||||
VORTEX_VLOG_SOURCES := $(shell fd -L -t f -e "sv" -e "vh" -e "v" . $(VORTEX_SRC_DIR))
|
# RADIANCE_EXTERNAL_SRCS := $(shell fdfind -L -t f -e "sv" -e "vh" -e "v" . $(VORTEX_SRC_DIR))
|
||||||
|
RADIANCE_EXTERNAL_SRCS := $(shell fdfind -L -t f -e "sv" -e "vh" -e "v" . $(RADIANCE_VSRC_DIR))
|
||||||
|
RADIANCE_EXTERNAL_SRCS += $(shell fdfind -L -t f . $(RADIANCE_CSRC_DIR))
|
||||||
endif
|
endif
|
||||||
# VORTEX_COLLATERAL := $(patsubst $(VORTEX_SRC_DIR)%,$(GEN_COLLATERAL_DIR)%,$(VORTEX_VLOG_SOURCES))
|
|
||||||
# check if expanded
|
# for debug; check if expanded
|
||||||
# $(info VORTEX_VLOG_SOURCES: $(VORTEX_VLOG_SOURCES))
|
# $(info RADIANCE_EXTERNAL_SRCS: $(RADIANCE_EXTERNAL_SRCS))
|
||||||
|
|
||||||
# For every Vortex verilog source file, if there's a matching file in
|
# For every Vortex verilog source file, if there's a matching file in
|
||||||
# gen-collateral/, copy them over. This is a hacky way to ensure the changes
|
# gen-collateral/, copy them over. This is a hacky way to ensure the changes
|
||||||
@@ -53,8 +60,8 @@ endif
|
|||||||
# necessary when common.mk does not trigger chipyard jar rebuild upon verilog
|
# necessary when common.mk does not trigger chipyard jar rebuild upon verilog
|
||||||
# source updates, in which case we need to manually ensure the up-to-date-ness
|
# source updates, in which case we need to manually ensure the up-to-date-ness
|
||||||
# of gen-collateral/.
|
# of gen-collateral/.
|
||||||
vortex_vsrc.$(CONFIG): $(VORTEX_VLOG_SOURCES)
|
vortex_vsrc.$(CONFIG): $(RADIANCE_EXTERNAL_SRCS)
|
||||||
@for file in $(VORTEX_VLOG_SOURCES); do \
|
@for file in $(RADIANCE_EXTERNAL_SRCS); do \
|
||||||
filename=$$(basename "$$file"); \
|
filename=$$(basename "$$file"); \
|
||||||
if [ -f $(GEN_COLLATERAL_DIR)/$$filename ]; then \
|
if [ -f $(GEN_COLLATERAL_DIR)/$$filename ]; then \
|
||||||
if ! diff $$file $(GEN_COLLATERAL_DIR)/$$filename &>/dev/null ; then \
|
if ! diff $$file $(GEN_COLLATERAL_DIR)/$$filename &>/dev/null ; then \
|
||||||
|
|||||||
1
radpie
1
radpie
Submodule radpie deleted from 493b8e10a5
43
src/main/resources/csrc/SimEmulator.cc
Normal file
43
src/main/resources/csrc/SimEmulator.cc
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
#ifndef NO_VPI
|
||||||
|
#include <vpi_user.h>
|
||||||
|
#include <svdpi.h>
|
||||||
|
#endif
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
extern "C" void emulator_init_rs(int num_lanes);
|
||||||
|
extern "C" void emulator_tick_rs(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||||
|
long long *vec_a_address,
|
||||||
|
uint8_t *vec_a_is_store, int *vec_a_size,
|
||||||
|
long long *vec_a_data, uint8_t *vec_d_ready,
|
||||||
|
uint8_t *vec_d_valid, uint8_t *vec_d_is_store,
|
||||||
|
int *vec_d_size, long long *vec_d_data,
|
||||||
|
uint8_t inflight, uint8_t *finished);
|
||||||
|
// extern "C" void emulator_generate_rs(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||||
|
// long long *vec_a_address,
|
||||||
|
// uint8_t *vec_a_is_store, int *vec_a_size,
|
||||||
|
// long long *vec_a_data,
|
||||||
|
// uint8_t *vec_d_ready, uint8_t inflight,
|
||||||
|
// uint8_t *finished);
|
||||||
|
|
||||||
|
extern "C" void emulator_init(int num_lanes) { emulator_init_rs(num_lanes); }
|
||||||
|
|
||||||
|
extern "C" void emulator_tick(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||||
|
long long *vec_a_address, uint8_t *vec_a_is_store,
|
||||||
|
int *vec_a_size, long long *vec_a_data,
|
||||||
|
uint8_t *vec_d_ready, uint8_t *vec_d_valid,
|
||||||
|
uint8_t *vec_d_is_store, int *vec_d_size,
|
||||||
|
long long *vec_d_data, uint8_t inflight,
|
||||||
|
uint8_t *finished) {
|
||||||
|
emulator_tick_rs(vec_a_ready, vec_a_valid, vec_a_address, vec_a_is_store,
|
||||||
|
vec_a_size, vec_a_data, vec_d_ready, vec_d_valid,
|
||||||
|
vec_d_is_store, vec_d_size, vec_d_data, inflight, finished);
|
||||||
|
}
|
||||||
|
|
||||||
|
// extern "C" void emulator_generate(uint8_t *vec_a_ready, uint8_t *vec_a_valid,
|
||||||
|
// long long *vec_a_address,
|
||||||
|
// uint8_t *vec_a_is_store, int *vec_a_size,
|
||||||
|
// long long *vec_a_data, uint8_t *vec_d_ready,
|
||||||
|
// uint8_t inflight, uint8_t *finished) {
|
||||||
|
// emulator_generate_rs(vec_a_ready, vec_a_valid, vec_a_address, vec_a_is_store,
|
||||||
|
// vec_a_size, vec_a_data, vec_d_ready, inflight, finished);
|
||||||
|
// }
|
||||||
@@ -2,7 +2,6 @@
|
|||||||
#include <vpi_user.h>
|
#include <vpi_user.h>
|
||||||
#include <svdpi.h>
|
#include <svdpi.h>
|
||||||
#endif
|
#endif
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
extern "C" void memfuzz_init_rs(int num_lanes);
|
extern "C" void memfuzz_init_rs(int num_lanes);
|
||||||
|
|||||||
131
src/main/resources/vsrc/SimEmulator.v
Normal file
131
src/main/resources/vsrc/SimEmulator.v
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
`include "SimDefaults.vh"
|
||||||
|
|
||||||
|
import "DPI-C" function void emulator_init(
|
||||||
|
input longint num_lanes
|
||||||
|
);
|
||||||
|
|
||||||
|
// Make sure to sync the parameters for:
|
||||||
|
// (1) import "DPI-C" declaration
|
||||||
|
// (2) C function declaration
|
||||||
|
// (3) DPI function calls inside initial/always blocks
|
||||||
|
import "DPI-C" function void emulator_tick
|
||||||
|
(
|
||||||
|
input bit vec_a_ready[`MAX_NUM_LANES],
|
||||||
|
output bit vec_a_valid[`MAX_NUM_LANES],
|
||||||
|
output longint vec_a_address[`MAX_NUM_LANES],
|
||||||
|
output bit vec_a_is_store[`MAX_NUM_LANES],
|
||||||
|
output int vec_a_size[`MAX_NUM_LANES],
|
||||||
|
output longint vec_a_data[`MAX_NUM_LANES],
|
||||||
|
|
||||||
|
output bit vec_d_ready[`MAX_NUM_LANES],
|
||||||
|
input bit vec_d_valid[`MAX_NUM_LANES],
|
||||||
|
input bit vec_d_is_store[`MAX_NUM_LANES],
|
||||||
|
input int vec_d_size[`MAX_NUM_LANES],
|
||||||
|
input longint vec_d_data[`MAX_NUM_LANES],
|
||||||
|
|
||||||
|
input bit inflight,
|
||||||
|
output bit finished
|
||||||
|
);
|
||||||
|
|
||||||
|
module SimEmulator #(parameter NUM_LANES = 4) (
|
||||||
|
input clock,
|
||||||
|
input reset,
|
||||||
|
|
||||||
|
input [NUM_LANES-1:0] a_ready,
|
||||||
|
output [NUM_LANES-1:0] a_valid,
|
||||||
|
output [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] a_address,
|
||||||
|
output [NUM_LANES-1:0] a_is_store,
|
||||||
|
output [`SIMMEM_LOGSIZE_WIDTH*NUM_LANES-1:0] a_size,
|
||||||
|
output [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] a_data,
|
||||||
|
|
||||||
|
output [NUM_LANES-1:0] d_ready,
|
||||||
|
input [NUM_LANES-1:0] d_valid,
|
||||||
|
input [NUM_LANES-1:0] d_is_store,
|
||||||
|
input [`SIMMEM_LOGSIZE_WIDTH*NUM_LANES-1:0] d_size,
|
||||||
|
input [`SIMMEM_DATA_WIDTH*NUM_LANES-1:0] d_data,
|
||||||
|
// TODO: d_mask
|
||||||
|
|
||||||
|
input inflight,
|
||||||
|
output finished
|
||||||
|
);
|
||||||
|
// "in": C->verilog, "out": verilog->C
|
||||||
|
// need to be in ascending order to match with C indexing
|
||||||
|
// C array sizes are static, so need to use MAX_NUM_LANES
|
||||||
|
bit __out_a_ready [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __in_a_valid [0:`MAX_NUM_LANES-1];
|
||||||
|
longint __in_a_address [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __in_a_is_store [0:`MAX_NUM_LANES-1];
|
||||||
|
int __in_a_size [0:`MAX_NUM_LANES-1];
|
||||||
|
longint __in_a_data [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __in_d_ready [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __out_d_valid [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __out_d_is_store [0:`MAX_NUM_LANES-1];
|
||||||
|
int __out_d_size [0:`MAX_NUM_LANES-1];
|
||||||
|
longint __out_d_data [0:`MAX_NUM_LANES-1];
|
||||||
|
bit __out_inflight;
|
||||||
|
bit __in_finished;
|
||||||
|
|
||||||
|
genvar g;
|
||||||
|
generate
|
||||||
|
for (g = 0; g < NUM_LANES; g = g + 1) begin
|
||||||
|
assign __out_a_ready[g] = a_ready[g];
|
||||||
|
assign a_valid[g] = __in_a_valid[g];
|
||||||
|
assign a_address[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH]
|
||||||
|
= __in_a_address[g][`SIMMEM_DATA_WIDTH-1:0];
|
||||||
|
assign a_is_store[g] = __in_a_is_store[g];
|
||||||
|
assign a_size[`SIMMEM_LOGSIZE_WIDTH*g +: `SIMMEM_LOGSIZE_WIDTH]
|
||||||
|
= __in_a_size[g][`SIMMEM_LOGSIZE_WIDTH-1:0];
|
||||||
|
assign a_data[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH]
|
||||||
|
= __in_a_data[g][`SIMMEM_DATA_WIDTH-1:0];
|
||||||
|
assign d_ready[g] = __in_d_ready[g];
|
||||||
|
assign __out_d_valid[g] = d_valid[g];
|
||||||
|
assign __out_d_is_store[g] = d_is_store[g];
|
||||||
|
assign __out_d_size[g] = d_size[`SIMMEM_LOGSIZE_WIDTH*g +: `SIMMEM_LOGSIZE_WIDTH];
|
||||||
|
assign __out_d_data[g] = d_data[`SIMMEM_DATA_WIDTH*g +: `SIMMEM_DATA_WIDTH];
|
||||||
|
end
|
||||||
|
assign __out_inflight = inflight;
|
||||||
|
endgenerate
|
||||||
|
assign finished = __in_finished;
|
||||||
|
|
||||||
|
initial begin
|
||||||
|
emulator_init(NUM_LANES);
|
||||||
|
end
|
||||||
|
|
||||||
|
// negedge might make it easier to view waveform since DPI changes are
|
||||||
|
// instant and make it look like they happen before the clockedge
|
||||||
|
always @(posedge clock) begin
|
||||||
|
if (reset) begin
|
||||||
|
for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
|
||||||
|
__in_a_valid[tid] = 1'b0;
|
||||||
|
__in_a_address[tid] = `SIMMEM_DATA_WIDTH'b0;
|
||||||
|
__in_a_is_store[tid] = 1'b0;
|
||||||
|
__in_a_size[tid] = 32'b0;
|
||||||
|
__in_a_data[tid] = `SIMMEM_DATA_WIDTH'b0;
|
||||||
|
__in_d_ready[tid] = 1'b0;
|
||||||
|
end
|
||||||
|
__in_finished = 1'b0;
|
||||||
|
end else begin
|
||||||
|
emulator_tick(
|
||||||
|
__out_a_ready,
|
||||||
|
__in_a_valid,
|
||||||
|
__in_a_address,
|
||||||
|
__in_a_is_store,
|
||||||
|
__in_a_size,
|
||||||
|
__in_a_data,
|
||||||
|
|
||||||
|
__in_d_ready,
|
||||||
|
__out_d_valid,
|
||||||
|
__out_d_is_store,
|
||||||
|
__out_d_size,
|
||||||
|
__out_d_data,
|
||||||
|
|
||||||
|
__out_inflight,
|
||||||
|
__in_finished
|
||||||
|
);
|
||||||
|
// for (integer tid = 0; tid < NUM_LANES; tid = tid + 1) begin
|
||||||
|
// $display("verilog: %04d a_valid[%d]=%d, a_address[%d]=0x%x, d_ready[%d]=%d",
|
||||||
|
// $time, tid, __in_a_valid[tid], tid, __in_a_address[tid], tid, __in_d_ready[tid]);
|
||||||
|
// end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
endmodule
|
||||||
@@ -47,7 +47,7 @@ module SimMemFuzzer #(parameter NUM_LANES = 4) (
|
|||||||
input inflight,
|
input inflight,
|
||||||
output finished
|
output finished
|
||||||
);
|
);
|
||||||
// "in": verilog->C, "out": C->verilog
|
// "in": C->verilog, "out": verilog->C
|
||||||
// need to be in ascending order to match with C indexing
|
// need to be in ascending order to match with C indexing
|
||||||
// C array sizes are static, so need to use MAX_NUM_LANES
|
// C array sizes are static, so need to use MAX_NUM_LANES
|
||||||
bit __out_a_ready [0:`MAX_NUM_LANES-1];
|
bit __out_a_ready [0:`MAX_NUM_LANES-1];
|
||||||
|
|||||||
246
src/main/scala/radiance/core/Emulator.scala
Normal file
246
src/main/scala/radiance/core/Emulator.scala
Normal file
@@ -0,0 +1,246 @@
|
|||||||
|
package radiance.core
|
||||||
|
|
||||||
|
import chisel3._
|
||||||
|
import chisel3.util._
|
||||||
|
import org.chipsalliance.cde.config.{Field, Parameters}
|
||||||
|
import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp}
|
||||||
|
import freechips.rocketchip.tilelink._
|
||||||
|
import freechips.rocketchip.diplomacy.{IdRange, AddressSet, BufferParams}
|
||||||
|
import radiance.memory.{SourceGenerator, TraceLine, TLPrintf}
|
||||||
|
|
||||||
|
case class SIMTCoreParams(
|
||||||
|
nWarps: Int = 4, // # of warps in the core
|
||||||
|
nCoreLanes: Int = 4, // # of SIMT threads in the core
|
||||||
|
nMemLanes: Int = 4, // # of memory lanes in the memory interface to the
|
||||||
|
// cache; relates to the LSU lanes
|
||||||
|
nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes
|
||||||
|
)
|
||||||
|
case class MemtraceCoreParams(
|
||||||
|
tracefilename: String = "undefined",
|
||||||
|
traceHasSource: Boolean = false
|
||||||
|
)
|
||||||
|
|
||||||
|
case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/ )
|
||||||
|
case object MemtraceCoreKey
|
||||||
|
extends Field[Option[MemtraceCoreParams]](None /*default*/ )
|
||||||
|
|
||||||
|
// #############################################################################
|
||||||
|
// FIXME: copy-paste from MemFuzzer
|
||||||
|
// #############################################################################
|
||||||
|
|
||||||
|
class Emulator(
|
||||||
|
numLanes: Int,
|
||||||
|
numSrcIds: Int,
|
||||||
|
wordSizeInBytes: Int,
|
||||||
|
)(implicit p: Parameters)
|
||||||
|
extends LazyModule {
|
||||||
|
val laneNodes = Seq.tabulate(numLanes) { i =>
|
||||||
|
val clientParam = Seq(
|
||||||
|
TLMasterParameters.v1(
|
||||||
|
name = "Emulator" + i.toString,
|
||||||
|
sourceId = IdRange(0, numSrcIds)
|
||||||
|
// visibility = Seq(AddressSet(0x0000, 0xffffff))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
TLClientNode(Seq(TLMasterPortParameters.v1(clientParam)))
|
||||||
|
}
|
||||||
|
|
||||||
|
val node = TLIdentityNode()
|
||||||
|
laneNodes.foreach(node := _)
|
||||||
|
|
||||||
|
lazy val module = new EmulatorImp(this, numLanes, numSrcIds, wordSizeInBytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
class EmulatorImp(
|
||||||
|
outer: Emulator,
|
||||||
|
numLanes : Int,
|
||||||
|
numSrcIds: Int,
|
||||||
|
wordSizeInBytes: Int,
|
||||||
|
) extends LazyModuleImp(outer) {
|
||||||
|
val io = IO(new Bundle {
|
||||||
|
val finished = Output(Bool())
|
||||||
|
})
|
||||||
|
val sim = Module(new SimEmulator(numLanes))
|
||||||
|
|
||||||
|
sim.io.clock := clock
|
||||||
|
sim.io.reset := reset.asBool
|
||||||
|
|
||||||
|
sim.io.a.ready := VecInit(outer.laneNodes.map { node =>
|
||||||
|
val (tlOut, _) = node.out(0)
|
||||||
|
tlOut.a.ready
|
||||||
|
}).asUInt
|
||||||
|
|
||||||
|
io.finished := sim.io.finished
|
||||||
|
|
||||||
|
// connect Verilog <-> Chisel IO
|
||||||
|
// Verilog IO flattened across all lanes
|
||||||
|
val laneReqs = Wire(Vec(numLanes, Decoupled(new TraceLine)))
|
||||||
|
val addrW = laneReqs(0).bits.address.getWidth
|
||||||
|
val sizeW = laneReqs(0).bits.size.getWidth
|
||||||
|
val dataW = laneReqs(0).bits.data.getWidth
|
||||||
|
laneReqs.zipWithIndex.foreach { case (req, i) =>
|
||||||
|
req.valid := sim.io.a.valid(i)
|
||||||
|
req.bits.source := 0.U // DPI doesn't generate contain source id
|
||||||
|
req.bits.address := sim.io.a.address(addrW * (i + 1) - 1, addrW * i)
|
||||||
|
req.bits.is_store := sim.io.a.is_store(i)
|
||||||
|
req.bits.size := sim.io.a.size(sizeW * (i + 1) - 1, sizeW * i)
|
||||||
|
req.bits.data := sim.io.a.data(dataW * (i + 1) - 1, dataW * i)
|
||||||
|
}
|
||||||
|
sim.io.a.ready := VecInit(laneReqs.map(_.ready)).asUInt
|
||||||
|
|
||||||
|
val laneResps = Wire(Vec(numLanes, Flipped(Decoupled(new TraceLine))))
|
||||||
|
laneResps.zipWithIndex.foreach { case (resp, i) =>
|
||||||
|
resp.ready := sim.io.d.ready(i)
|
||||||
|
// TODO: not handled in DPI
|
||||||
|
resp.bits.source := DontCare
|
||||||
|
resp.bits.address := DontCare
|
||||||
|
resp.bits.data := DontCare
|
||||||
|
}
|
||||||
|
sim.io.d.valid := VecInit(laneResps.map(_.valid)).asUInt
|
||||||
|
sim.io.d.is_store := VecInit(laneResps.map(_.bits.is_store)).asUInt
|
||||||
|
sim.io.d.size := VecInit(laneResps.map(_.bits.size)).asUInt
|
||||||
|
sim.io.d.data := VecInit(laneResps.map(_.bits.data)).asUInt
|
||||||
|
|
||||||
|
val sourceGens = Seq.fill(numLanes)(
|
||||||
|
Module(
|
||||||
|
new SourceGenerator(
|
||||||
|
log2Ceil(numSrcIds),
|
||||||
|
ignoreInUse = false
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
val anyInflight = sourceGens.map(_.io.inflight).reduce(_ || _)
|
||||||
|
sim.io.inflight := anyInflight
|
||||||
|
|
||||||
|
// Take requests off of the queue and generate TL requests
|
||||||
|
(outer.laneNodes zip (laneReqs zip laneResps)).zipWithIndex.foreach {
|
||||||
|
case ((node, (req, resp)), lane) =>
|
||||||
|
val (tlOut, edge) = node.out(0)
|
||||||
|
|
||||||
|
// Requests --------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Core only makes accesses of granularity larger than a word, so we want
|
||||||
|
// the trace driver to act so as well.
|
||||||
|
// That means if req.size is smaller than word size, we need to pad data
|
||||||
|
// with zeros to generate a word-size request, and set mask accordingly.
|
||||||
|
val offsetInWord = req.bits.address % wordSizeInBytes.U
|
||||||
|
val subword = req.bits.size < log2Ceil(wordSizeInBytes).U
|
||||||
|
|
||||||
|
// `mask` is currently unused
|
||||||
|
// val mask = Wire(UInt(wordSizeInBytes.W))
|
||||||
|
val wordData = Wire(UInt((wordSizeInBytes * 8 * 2).W))
|
||||||
|
val sizeInBytes = Wire(UInt((sizeW + 1).W))
|
||||||
|
sizeInBytes := (1.U) << req.bits.size
|
||||||
|
// mask := Mux(subword, (~((~0.U(64.W)) << sizeInBytes)) << offsetInWord, ~0.U)
|
||||||
|
wordData := Mux(subword, req.bits.data << (offsetInWord * 8.U), req.bits.data)
|
||||||
|
val wordAlignedAddress =
|
||||||
|
req.bits.address & ~((1 << log2Ceil(wordSizeInBytes)) - 1).U(addrW.W)
|
||||||
|
val wordAlignedSize = Mux(subword, 2.U, req.bits.size)
|
||||||
|
|
||||||
|
val sourceGen = sourceGens(lane)
|
||||||
|
sourceGen.io.gen := tlOut.a.fire
|
||||||
|
sourceGen.io.reclaim.valid := tlOut.d.fire
|
||||||
|
sourceGen.io.reclaim.bits := tlOut.d.bits.source
|
||||||
|
sourceGen.io.meta := DontCare
|
||||||
|
|
||||||
|
val (plegal, pbits) = edge.Put(
|
||||||
|
fromSource = sourceGen.io.id.bits,
|
||||||
|
toAddress = wordAlignedAddress,
|
||||||
|
lgSize = wordAlignedSize, // trace line already holds log2(size)
|
||||||
|
// data should be aligned to beatBytes
|
||||||
|
data =
|
||||||
|
(wordData << (8.U * (wordAlignedAddress % edge.manager.beatBytes.U))).asUInt
|
||||||
|
)
|
||||||
|
val (glegal, gbits) = edge.Get(
|
||||||
|
fromSource = sourceGen.io.id.bits,
|
||||||
|
toAddress = wordAlignedAddress,
|
||||||
|
lgSize = wordAlignedSize
|
||||||
|
)
|
||||||
|
val legal = Mux(req.bits.is_store, plegal, glegal)
|
||||||
|
val bits = Mux(req.bits.is_store, pbits, gbits)
|
||||||
|
|
||||||
|
tlOut.a.valid := req.valid && sourceGen.io.id.valid
|
||||||
|
req.ready := tlOut.a.ready && sourceGen.io.id.valid
|
||||||
|
|
||||||
|
when(tlOut.a.fire) {
|
||||||
|
assert(legal, "illegal TL req gen")
|
||||||
|
}
|
||||||
|
tlOut.a.bits := bits
|
||||||
|
|
||||||
|
// Responses -------------------------------------------------------------
|
||||||
|
//
|
||||||
|
tlOut.d.ready := resp.ready
|
||||||
|
resp.valid := tlOut.d.valid
|
||||||
|
resp.bits.is_store := !edge.hasData(tlOut.d.bits)
|
||||||
|
resp.bits.size := tlOut.d.bits.size
|
||||||
|
|
||||||
|
tlOut.b.ready := true.B
|
||||||
|
tlOut.c.valid := false.B
|
||||||
|
tlOut.e.valid := false.B
|
||||||
|
|
||||||
|
// debug
|
||||||
|
dontTouch(req)
|
||||||
|
when(tlOut.a.valid) {
|
||||||
|
printf(s"Lane ${lane}: ");
|
||||||
|
TLPrintf(
|
||||||
|
"Emulator",
|
||||||
|
tlOut.a.bits.source,
|
||||||
|
tlOut.a.bits.address,
|
||||||
|
tlOut.a.bits.size,
|
||||||
|
tlOut.a.bits.mask,
|
||||||
|
req.bits.is_store,
|
||||||
|
tlOut.a.bits.data,
|
||||||
|
req.bits.data
|
||||||
|
)
|
||||||
|
}
|
||||||
|
dontTouch(tlOut.a)
|
||||||
|
dontTouch(tlOut.d)
|
||||||
|
}
|
||||||
|
|
||||||
|
// when(traceFinished && allReqReclaimed && noValidReqs) {
|
||||||
|
// assert(
|
||||||
|
// false.B,
|
||||||
|
// "\n\n\nsimulation Successfully finished\n\n\n (this assertion intentional fail upon MemTracer termination)"
|
||||||
|
// )
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
class SimEmulator(numLanes: Int)
|
||||||
|
extends BlackBox(Map("NUM_LANES" -> numLanes))
|
||||||
|
with HasBlackBoxResource {
|
||||||
|
val traceLineT = new TraceLine
|
||||||
|
val addrW = traceLineT.address.getWidth
|
||||||
|
val sizeW = traceLineT.size.getWidth
|
||||||
|
val dataW = traceLineT.data.getWidth
|
||||||
|
val io = IO(new Bundle {
|
||||||
|
val clock = Input(Clock())
|
||||||
|
val reset = Input(Bool())
|
||||||
|
val inflight = Input(Bool())
|
||||||
|
val finished = Output(Bool())
|
||||||
|
|
||||||
|
val a =
|
||||||
|
new Bundle {
|
||||||
|
val ready = Input(UInt(numLanes.W))
|
||||||
|
val valid = Output(UInt(numLanes.W))
|
||||||
|
// Chisel can't interface with Verilog 2D port, so flatten all lanes into
|
||||||
|
// single wide 1D array.
|
||||||
|
val address = Output(UInt((addrW * numLanes).W))
|
||||||
|
val is_store = Output(UInt(numLanes.W))
|
||||||
|
val size = Output(UInt((sizeW * numLanes).W))
|
||||||
|
val data = Output(UInt((dataW * numLanes).W))
|
||||||
|
}
|
||||||
|
val d =
|
||||||
|
new Bundle {
|
||||||
|
val ready = Output(UInt(numLanes.W))
|
||||||
|
val valid = Input(UInt(numLanes.W))
|
||||||
|
val is_store = Input(UInt(numLanes.W))
|
||||||
|
val size = Input(UInt((sizeW * numLanes).W))
|
||||||
|
val data = Input(UInt((dataW * numLanes).W))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
addResource("/vsrc/SimDefaults.vh")
|
||||||
|
addResource("/vsrc/SimEmulator.v")
|
||||||
|
addResource("/csrc/SimEmulator.cc")
|
||||||
|
}
|
||||||
|
|
||||||
@@ -4,6 +4,7 @@ import freechips.rocketchip.diplomacy.LazyModule
|
|||||||
import freechips.rocketchip.subsystem._
|
import freechips.rocketchip.subsystem._
|
||||||
import org.chipsalliance.cde.config.Parameters
|
import org.chipsalliance.cde.config.Parameters
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
|
import radiance.core.{SIMTCoreKey, MemtraceCoreKey}
|
||||||
|
|
||||||
// TODO: possibly move to somewhere closer to CoalescingUnit
|
// TODO: possibly move to somewhere closer to CoalescingUnit
|
||||||
// TODO: separate coalescer config from CanHaveMemtraceCore
|
// TODO: separate coalescer config from CanHaveMemtraceCore
|
||||||
|
|||||||
@@ -10,25 +10,10 @@ import org.chipsalliance.diplomacy.lazymodule.{LazyModule, LazyModuleImp}
|
|||||||
import freechips.rocketchip.util.{Code, MultiPortQueue, OnePortLanePositionedQueue}
|
import freechips.rocketchip.util.{Code, MultiPortQueue, OnePortLanePositionedQueue}
|
||||||
import freechips.rocketchip.unittest._
|
import freechips.rocketchip.unittest._
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
|
import radiance.core.{SIMTCoreParams, SIMTCoreKey}
|
||||||
|
|
||||||
// TODO: find better place for these
|
|
||||||
|
|
||||||
case class SIMTCoreParams(
|
|
||||||
nWarps: Int = 4, // # of warps in the core
|
|
||||||
nCoreLanes: Int = 4, // # of SIMT threads in the core
|
|
||||||
nMemLanes: Int = 4, // # of memory lanes in the memory interface to the
|
|
||||||
// cache; relates to the LSU lanes
|
|
||||||
nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes
|
|
||||||
)
|
|
||||||
case class MemtraceCoreParams(
|
|
||||||
tracefilename: String = "undefined",
|
|
||||||
traceHasSource: Boolean = false
|
|
||||||
)
|
|
||||||
case class CoalXbarParam()
|
case class CoalXbarParam()
|
||||||
|
|
||||||
case object SIMTCoreKey extends Field[Option[SIMTCoreParams]](None /*default*/ )
|
|
||||||
case object MemtraceCoreKey
|
|
||||||
extends Field[Option[MemtraceCoreParams]](None /*default*/ )
|
|
||||||
case object CoalescerKey
|
case object CoalescerKey
|
||||||
extends Field[Option[CoalescerConfig]](None /*default*/ )
|
extends Field[Option[CoalescerConfig]](None /*default*/ )
|
||||||
case object CoalXbarKey extends Field[Option[CoalXbarParam]](None /*default*/ )
|
case object CoalXbarKey extends Field[Option[CoalXbarParam]](None /*default*/ )
|
||||||
@@ -2055,7 +2040,7 @@ class MemFuzzer(
|
|||||||
val laneNodes = Seq.tabulate(numLanes) { i =>
|
val laneNodes = Seq.tabulate(numLanes) { i =>
|
||||||
val clientParam = Seq(
|
val clientParam = Seq(
|
||||||
TLMasterParameters.v1(
|
TLMasterParameters.v1(
|
||||||
name = "MemTraceDriver" + i.toString,
|
name = "MemFuzzer" + i.toString,
|
||||||
sourceId = IdRange(0, numSrcIds)
|
sourceId = IdRange(0, numSrcIds)
|
||||||
// visibility = Seq(AddressSet(0x0000, 0xffffff))
|
// visibility = Seq(AddressSet(0x0000, 0xffffff))
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import freechips.rocketchip.subsystem._
|
|||||||
import gemmini._
|
import gemmini._
|
||||||
import gemmini.Arithmetic.FloatArithmetic._
|
import gemmini.Arithmetic.FloatArithmetic._
|
||||||
import radiance.tile._
|
import radiance.tile._
|
||||||
|
import radiance.core._
|
||||||
import radiance.memory._
|
import radiance.memory._
|
||||||
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
|
import radiance.subsystem.RadianceGemminiDataType.{BF16, FP16, FP32, Int8}
|
||||||
|
|
||||||
@@ -106,6 +107,44 @@ class WithRadianceCores(
|
|||||||
), tensorCoreFP16, tensorCoreDecoupled, useVxCache)
|
), tensorCoreFP16, tensorCoreDecoupled, useVxCache)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class WithEmulatorCores(
|
||||||
|
n: Int,
|
||||||
|
useVxCache: Boolean
|
||||||
|
) extends Config((site, _, up) => {
|
||||||
|
case TilesLocated(InSubsystem) => {
|
||||||
|
val prev = up(TilesLocated(InSubsystem))
|
||||||
|
val idOffset = up(NumTiles)
|
||||||
|
val emulator = EmulatorTileParams(
|
||||||
|
core = VortexCoreParams(),
|
||||||
|
useVxCache = useVxCache)
|
||||||
|
List.tabulate(n)(i => EmulatorTileAttachParams(
|
||||||
|
emulator.copy(tileId = i + idOffset),
|
||||||
|
RocketCrossingParams()
|
||||||
|
)) ++ prev
|
||||||
|
}
|
||||||
|
case NumTiles => up(NumTiles) + 1
|
||||||
|
case NumRadianceCores => up(NumRadianceCores) + 1
|
||||||
|
})
|
||||||
|
|
||||||
|
class WithFuzzerCores(
|
||||||
|
n: Int,
|
||||||
|
useVxCache: Boolean
|
||||||
|
) extends Config((site, _, up) => {
|
||||||
|
case TilesLocated(InSubsystem) => {
|
||||||
|
val prev = up(TilesLocated(InSubsystem))
|
||||||
|
val idOffset = up(NumTiles)
|
||||||
|
val fuzzer = FuzzerTileParams(
|
||||||
|
core = VortexCoreParams(),
|
||||||
|
useVxCache = useVxCache)
|
||||||
|
List.tabulate(n)(i => FuzzerTileAttachParams(
|
||||||
|
fuzzer.copy(tileId = i + idOffset),
|
||||||
|
RocketCrossingParams()
|
||||||
|
)) ++ prev
|
||||||
|
}
|
||||||
|
case NumTiles => up(NumTiles) + 1
|
||||||
|
case NumRadianceCores => up(NumRadianceCores) + 1
|
||||||
|
})
|
||||||
|
|
||||||
object RadianceGemminiDataType extends Enumeration {
|
object RadianceGemminiDataType extends Enumeration {
|
||||||
type Type = Value
|
type Type = Value
|
||||||
val FP32, FP16, BF16, Int8 = Value
|
val FP32, FP16, BF16, Int8 = Value
|
||||||
@@ -136,7 +175,7 @@ class WithRadianceGemmini(location: HierarchicalLocation, crossing: RocketCrossi
|
|||||||
case FP16 => GemminiFPConfigs.FP16DefaultConfig.copy(
|
case FP16 => GemminiFPConfigs.FP16DefaultConfig.copy(
|
||||||
acc_scale_args = Some(ScaleArguments(
|
acc_scale_args = Some(ScaleArguments(
|
||||||
(t: Float, u: Float) => {t},
|
(t: Float, u: Float) => {t},
|
||||||
1, Float(5, 11), -1, identity = "1.0", c_str = "((x))"
|
1, Float(8, 24), -1, identity = "1.0", c_str = "((x))"
|
||||||
)),
|
)),
|
||||||
mvin_scale_args = Some(ScaleArguments(
|
mvin_scale_args = Some(ScaleArguments(
|
||||||
(t: Float, u: Float) => t * u,
|
(t: Float, u: Float) => t * u,
|
||||||
@@ -148,8 +187,8 @@ class WithRadianceGemmini(location: HierarchicalLocation, crossing: RocketCrossi
|
|||||||
// from sirius
|
// from sirius
|
||||||
spatialArrayInputType = Float(5, 11, isRecoded = skipRecoding),
|
spatialArrayInputType = Float(5, 11, isRecoded = skipRecoding),
|
||||||
spatialArrayWeightType = Float(5, 11, isRecoded = skipRecoding),
|
spatialArrayWeightType = Float(5, 11, isRecoded = skipRecoding),
|
||||||
spatialArrayOutputType = Float(5, 11, isRecoded = skipRecoding),
|
spatialArrayOutputType = Float(8, 24, isRecoded = skipRecoding),
|
||||||
accType = Float(5, 11),
|
accType = Float(8, 24),
|
||||||
// hardcode_d_to_garbage_addr = true,
|
// hardcode_d_to_garbage_addr = true,
|
||||||
acc_read_full_width = false, // set to true to output fp32
|
acc_read_full_width = false, // set to true to output fp32
|
||||||
|
|
||||||
@@ -244,25 +283,6 @@ class WithRadianceFrameBuffer(baseAddress: BigInt,
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
class WithFuzzerCores(
|
|
||||||
n: Int,
|
|
||||||
useVxCache: Boolean
|
|
||||||
) extends Config((site, _, up) => {
|
|
||||||
case TilesLocated(InSubsystem) => {
|
|
||||||
val prev = up(TilesLocated(InSubsystem))
|
|
||||||
val idOffset = up(NumTiles)
|
|
||||||
val fuzzer = FuzzerTileParams(
|
|
||||||
core = VortexCoreParams(),
|
|
||||||
useVxCache = useVxCache)
|
|
||||||
List.tabulate(n)(i => FuzzerTileAttachParams(
|
|
||||||
fuzzer.copy(tileId = i + idOffset),
|
|
||||||
RocketCrossingParams()
|
|
||||||
)) ++ prev
|
|
||||||
}
|
|
||||||
case NumTiles => up(NumTiles) + 1
|
|
||||||
case NumRadianceCores => up(NumRadianceCores) + 1
|
|
||||||
})
|
|
||||||
|
|
||||||
class WithRadianceCluster(
|
class WithRadianceCluster(
|
||||||
clusterId: Int,
|
clusterId: Int,
|
||||||
location: HierarchicalLocation = InSubsystem,
|
location: HierarchicalLocation = InSubsystem,
|
||||||
|
|||||||
96
src/main/scala/radiance/tile/EmulatorTile.scala
Normal file
96
src/main/scala/radiance/tile/EmulatorTile.scala
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
// See LICENSE.SiFive for license details.
|
||||||
|
// See LICENSE.Berkeley for license details.
|
||||||
|
|
||||||
|
package radiance.tile
|
||||||
|
|
||||||
|
import chisel3._
|
||||||
|
import org.chipsalliance.cde.config.Parameters
|
||||||
|
import org.chipsalliance.diplomacy.lazymodule.LazyModule
|
||||||
|
import freechips.rocketchip.resources.SimpleDevice
|
||||||
|
import freechips.rocketchip.prci.ClockCrossingType
|
||||||
|
import freechips.rocketchip.rocket._
|
||||||
|
import freechips.rocketchip.tile._
|
||||||
|
import freechips.rocketchip.tilelink._
|
||||||
|
import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile}
|
||||||
|
import freechips.rocketchip.prci.{ClockSinkParameters}
|
||||||
|
import radiance.core._
|
||||||
|
import radiance.memory.{CoalescingUnit, CoalescerKey}
|
||||||
|
|
||||||
|
// TODO: De-duplicate between this and FuzzerTile
|
||||||
|
|
||||||
|
case class EmulatorTileParams(
|
||||||
|
core: VortexCoreParams = VortexCoreParams(), // TODO: remove this
|
||||||
|
useVxCache: Boolean = false,
|
||||||
|
tileId: Int = 0,
|
||||||
|
) extends InstantiableTileParams[EmulatorTile] {
|
||||||
|
def instantiate(crossing: HierarchicalElementCrossingParamsLike, lookup: LookupByHartIdImpl)(
|
||||||
|
implicit p: Parameters
|
||||||
|
): EmulatorTile = {
|
||||||
|
new EmulatorTile(this, crossing, lookup)
|
||||||
|
}
|
||||||
|
val clockSinkParams = ClockSinkParameters()
|
||||||
|
val blockerCtrlAddr = None
|
||||||
|
val icache = None
|
||||||
|
val dcache = None
|
||||||
|
val btb = None
|
||||||
|
val baseName = "radiance_emulator_tile"
|
||||||
|
val uniqueName = s"${baseName}_$tileId"
|
||||||
|
}
|
||||||
|
|
||||||
|
case class EmulatorTileAttachParams(
|
||||||
|
tileParams: EmulatorTileParams,
|
||||||
|
crossingParams: HierarchicalElementCrossingParamsLike
|
||||||
|
) extends CanAttachTile { type TileType = EmulatorTile }
|
||||||
|
|
||||||
|
class EmulatorTile private (
|
||||||
|
val EmulatorParams: EmulatorTileParams,
|
||||||
|
crossing: ClockCrossingType,
|
||||||
|
lookup: LookupByHartIdImpl,
|
||||||
|
q: Parameters
|
||||||
|
) extends BaseTile(EmulatorParams, crossing, lookup, q)
|
||||||
|
with SinksExternalInterrupts
|
||||||
|
with SourcesExternalNotifications {
|
||||||
|
def this(
|
||||||
|
params: EmulatorTileParams,
|
||||||
|
crossing: HierarchicalElementCrossingParamsLike,
|
||||||
|
lookup: LookupByHartIdImpl
|
||||||
|
)(implicit p: Parameters) =
|
||||||
|
this(params, crossing.crossingType, lookup, p)
|
||||||
|
|
||||||
|
val cpuDevice: SimpleDevice = new SimpleDevice("emulator", Nil)
|
||||||
|
|
||||||
|
val intOutwardNode = None
|
||||||
|
val slaveNode: TLInwardNode = TLIdentityNode()
|
||||||
|
val masterNode = visibilityNode
|
||||||
|
// val statusNode = BundleBridgeSource(() => new GroundTestStatus)
|
||||||
|
|
||||||
|
val (numLanes, numSrcIds) = p(SIMTCoreKey) match {
|
||||||
|
case Some(param) => (param.nMemLanes, param.nSrcIds)
|
||||||
|
case None => {
|
||||||
|
require(false, "emulator requires SIMTCoreKey to be defined")
|
||||||
|
(0, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// FIXME: parameterize
|
||||||
|
val wordSizeInBytes = 4
|
||||||
|
|
||||||
|
val emulator = LazyModule(new Emulator(numLanes, numSrcIds, wordSizeInBytes))
|
||||||
|
|
||||||
|
// Conditionally instantiate memory coalescer
|
||||||
|
val coalescerNode = p(CoalescerKey) match {
|
||||||
|
case Some(coalParam) => {
|
||||||
|
val coal = LazyModule(new CoalescingUnit(coalParam))
|
||||||
|
coal.cpuNode :=* TLWidthWidget(4) :=* emulator.node
|
||||||
|
coal.aggregateNode
|
||||||
|
}
|
||||||
|
case None => emulator.node
|
||||||
|
}
|
||||||
|
|
||||||
|
masterNode :=* coalescerNode
|
||||||
|
|
||||||
|
override lazy val module = new EmulatorTileModuleImp(this)
|
||||||
|
}
|
||||||
|
|
||||||
|
class EmulatorTileModuleImp(outer: EmulatorTile) extends BaseTileModuleImp(outer) {
|
||||||
|
outer.reportCease(Some(outer.emulator.module.io.finished))
|
||||||
|
}
|
||||||
@@ -4,14 +4,16 @@
|
|||||||
package radiance.tile
|
package radiance.tile
|
||||||
|
|
||||||
import chisel3._
|
import chisel3._
|
||||||
import org.chipsalliance.cde.config.{Parameters}
|
import org.chipsalliance.cde.config.Parameters
|
||||||
import freechips.rocketchip.diplomacy.{SimpleDevice, LazyModule}
|
import org.chipsalliance.diplomacy.lazymodule.LazyModule
|
||||||
|
import freechips.rocketchip.resources.SimpleDevice
|
||||||
import freechips.rocketchip.prci.ClockCrossingType
|
import freechips.rocketchip.prci.ClockCrossingType
|
||||||
import freechips.rocketchip.rocket._
|
import freechips.rocketchip.rocket._
|
||||||
import freechips.rocketchip.tile._
|
import freechips.rocketchip.tile._
|
||||||
import freechips.rocketchip.tilelink._
|
import freechips.rocketchip.tilelink._
|
||||||
import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile}
|
import freechips.rocketchip.subsystem.{HierarchicalElementCrossingParamsLike, CanAttachTile}
|
||||||
import freechips.rocketchip.prci.{ClockSinkParameters}
|
import freechips.rocketchip.prci.{ClockSinkParameters}
|
||||||
|
import radiance.core.{SIMTCoreKey}
|
||||||
import radiance.memory._
|
import radiance.memory._
|
||||||
|
|
||||||
case class FuzzerTileParams(
|
case class FuzzerTileParams(
|
||||||
|
|||||||
@@ -168,6 +168,8 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
|
|||||||
val rs2 = UInt(64.W)
|
val rs2 = UInt(64.W)
|
||||||
}
|
}
|
||||||
val ciscInst = Wire(ciscInstT)
|
val ciscInst = Wire(ciscInstT)
|
||||||
|
val startsLoop = WireInit(false.B)
|
||||||
|
val runningLoops = RegInit(0.U(4.W))
|
||||||
|
|
||||||
val accCommandQueue = Module(new Queue(UInt(32.W), 4, false, true))
|
val accCommandQueue = Module(new Queue(UInt(32.W), 4, false, true))
|
||||||
accCommandQueue.io.enq.bits := accSlave.cmd.bits
|
accCommandQueue.io.enq.bits := accSlave.cmd.bits
|
||||||
@@ -175,10 +177,15 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
|
|||||||
accCommandQueue.io.deq.ready := !ciscValid
|
accCommandQueue.io.deq.ready := !ciscValid
|
||||||
assert(!accSlave.cmd.valid || accCommandQueue.io.enq.ready, "cisc command queue full")
|
assert(!accSlave.cmd.valid || accCommandQueue.io.enq.ready, "cisc command queue full")
|
||||||
|
|
||||||
|
when (accCommandQueue.io.enq.fire) {
|
||||||
|
val enqId = accSlave.cmd.bits(6, 0)
|
||||||
|
startsLoop := VecInit(Seq(0, 1, 2, 9, 10, 12).map { x => enqId === x.U }).asUInt.orR
|
||||||
|
}
|
||||||
|
|
||||||
when (accCommandQueue.io.deq.fire) {
|
when (accCommandQueue.io.deq.fire) {
|
||||||
ciscValid := true.B
|
ciscValid := true.B
|
||||||
ciscId := accSlave.cmd.bits(7, 0)
|
ciscId := accCommandQueue.io.deq.bits(7, 0)
|
||||||
ciscArgs := accSlave.cmd.bits(31, 8)
|
ciscArgs := accCommandQueue.io.deq.bits(31, 8)
|
||||||
instCounter.reset()
|
instCounter.reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -228,6 +235,7 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
|
|||||||
|
|
||||||
println(s"gemmini cisc initialized with DIM=${config.DIM}, tileSize=${tileSizeM},${tileSizeN},${tileSizeK}")
|
println(s"gemmini cisc initialized with DIM=${config.DIM}, tileSize=${tileSizeM},${tileSizeN},${tileSizeK}")
|
||||||
println(f"boundsInst=${rectBoundsInst.litValue}%x, hexadecile=${spadHexadecile}")
|
println(f"boundsInst=${rectBoundsInst.litValue}%x, hexadecile=${spadHexadecile}")
|
||||||
|
|
||||||
when (ciscValid) {
|
when (ciscValid) {
|
||||||
switch (ciscId(6, 0)) {
|
switch (ciscId(6, 0)) {
|
||||||
is (0.U) { // compute on given hexadeciles
|
is (0.U) { // compute on given hexadeciles
|
||||||
@@ -241,6 +249,7 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
|
|||||||
val accSkipInst = genAccSkipInst(0.U, ((ciscArgs(23, 16) * spadHexadecile.U) << 32).asUInt | 0x238.U)
|
val accSkipInst = genAccSkipInst(0.U, ((ciscArgs(23, 16) * spadHexadecile.U) << 32).asUInt | 0x238.U)
|
||||||
ciscInst := microcodeEntry(Seq(boundsInst, strideInst, accSkipInst))
|
ciscInst := microcodeEntry(Seq(boundsInst, strideInst, accSkipInst))
|
||||||
}
|
}
|
||||||
|
is (2.U) {} // no actual invocation, fake job placeholder
|
||||||
is (8.U) { // set a, b stride
|
is (8.U) { // set a, b stride
|
||||||
val inst = Wire(ciscInstT)
|
val inst = Wire(ciscInstT)
|
||||||
inst.inst := 0x1820b07b.U
|
inst.inst := 0x1820b07b.U
|
||||||
@@ -279,6 +288,11 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
val completionCount = PopCount(outer.gemmini.module.completion_io.completed)
|
||||||
|
val loopStarted = Mux(startsLoop, 1.U, 0.U)
|
||||||
|
runningLoops := runningLoops + loopStarted - completionCount
|
||||||
|
assert(runningLoops + loopStarted >= completionCount)
|
||||||
|
|
||||||
val gemminiIO = outer.gemmini.module.io.cmd
|
val gemminiIO = outer.gemmini.module.io.cmd
|
||||||
|
|
||||||
val regValid = Wire(Bool())
|
val regValid = Wire(Bool())
|
||||||
@@ -299,6 +313,11 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
|
|||||||
// (!outer.gemmini.module.io.busy, outer.gemmini.module.io.busy.asUInt)
|
// (!outer.gemmini.module.io.busy, outer.gemmini.module.io.busy.asUInt)
|
||||||
(true.B, outer.gemmini.module.io.busy.asUInt)
|
(true.B, outer.gemmini.module.io.busy.asUInt)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def gemminiRunningLoopsReg(_dReady: Bool): (Bool, UInt) = {
|
||||||
|
(true.B, runningLoops)
|
||||||
|
}
|
||||||
|
|
||||||
outer.regNode.regmap(
|
outer.regNode.regmap(
|
||||||
0x00 -> Seq(RegField.w(32, gemminiCommandReg(_, _))),
|
0x00 -> Seq(RegField.w(32, gemminiCommandReg(_, _))),
|
||||||
0x10 -> Seq(
|
0x10 -> Seq(
|
||||||
@@ -307,7 +326,8 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
|
|||||||
0x18 -> Seq(
|
0x18 -> Seq(
|
||||||
RegField.w(32, gemminiRs2RegLSB),
|
RegField.w(32, gemminiRs2RegLSB),
|
||||||
RegField.w(32, gemminiRs2RegMSB)),
|
RegField.w(32, gemminiRs2RegMSB)),
|
||||||
0x20 -> Seq(RegField.r(32, gemminiBusyReg(_)))
|
0x20 -> Seq(RegField.r(32, gemminiBusyReg(_))),
|
||||||
|
0x28 -> Seq(RegField.r(32, gemminiRunningLoopsReg(_)))
|
||||||
)
|
)
|
||||||
|
|
||||||
assert(!regValid || gemminiIO.ready)
|
assert(!regValid || gemminiIO.ready)
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import freechips.rocketchip.tilelink._
|
|||||||
import freechips.rocketchip.util._
|
import freechips.rocketchip.util._
|
||||||
import midas.targetutils.SynthesizePrintf
|
import midas.targetutils.SynthesizePrintf
|
||||||
import org.chipsalliance.cde.config._
|
import org.chipsalliance.cde.config._
|
||||||
|
import radiance.core._
|
||||||
import radiance.memory._
|
import radiance.memory._
|
||||||
import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceSimArgs}
|
import radiance.subsystem.{GPUMemParams, GPUMemory, RadianceSimArgs}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user