wide dram support and enlarge queues
This commit is contained in:
@@ -14,7 +14,6 @@ EXTRA_SIM_LDFLAGS += -L$(RADPIE_BUILD_DIR) -Wl,-rpath,$(RADPIE_BUILD_DIR) -lradp
|
||||
EXTRA_SIM_PREPROC_DEFINES += \
|
||||
+define+SIMULATION \
|
||||
+define+GPR_RESET \
|
||||
+define+GPR_DUPLICATED \
|
||||
+define+LSU_DUP_DISABLE \
|
||||
+define+DBG_TRACE_CORE_PIPELINE_VCS \
|
||||
+define+PERF_ENABLE \
|
||||
|
||||
@@ -10,18 +10,18 @@ import org.chipsalliance.cde.config.{Parameters, Field}
|
||||
case object VortexL1Key extends Field[Option[VortexL1Config]](None /*default*/ )
|
||||
|
||||
case class VortexL1Config(
|
||||
cacheSize: Int, // total cache size in bytes
|
||||
numBanks: Int,
|
||||
wordSize: Int, // This is the read/write granularity of the L1 cache
|
||||
cacheLineSize: Int,
|
||||
coreTagWidth: Int,
|
||||
writeInfoReqQSize: Int,
|
||||
mshrSize: Int,
|
||||
memSideSourceIds: Int,
|
||||
uncachedAddrSets: Seq[AddressSet]
|
||||
cacheSize: Int, // total cache size in bytes
|
||||
numBanks: Int,
|
||||
inputSize: Int, // This is the read/write granularity of the L1 cache
|
||||
cacheLineSize: Int,
|
||||
coreTagWidth: Int,
|
||||
writeInfoReqQSize: Int,
|
||||
mshrSize: Int,
|
||||
memSideSourceIds: Int,
|
||||
uncachedAddrSets: Seq[AddressSet]
|
||||
) {
|
||||
def coreTagPlusSizeWidth: Int = {
|
||||
log2Ceil(wordSize) + coreTagWidth
|
||||
log2Ceil(inputSize) + coreTagWidth
|
||||
}
|
||||
// NOTE: This assertion depends on the fact that the Vortex cache is
|
||||
// configured to have 1 bank, and that it uses MSHR id as the tag of
|
||||
@@ -37,7 +37,7 @@ object defaultVortexL1Config
|
||||
extends VortexL1Config(
|
||||
cacheSize = 16384,
|
||||
numBanks = 4,
|
||||
wordSize = 16,
|
||||
inputSize = 16,
|
||||
cacheLineSize = 16,
|
||||
coreTagWidth = 8,
|
||||
writeInfoReqQSize = 16,
|
||||
@@ -80,15 +80,15 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters)
|
||||
// Slave node to upstream
|
||||
val managerParam = Seq(
|
||||
TLSlavePortParameters.v1(
|
||||
beatBytes = config.wordSize,
|
||||
beatBytes = config.inputSize,
|
||||
managers = Seq(
|
||||
TLSlaveParameters.v1(
|
||||
address = config.uncachedAddrSets,
|
||||
regionType = RegionType.IDEMPOTENT,
|
||||
executable = false,
|
||||
supportsGet = TransferSizes(1, config.wordSize),
|
||||
supportsPutPartial = TransferSizes(1, config.wordSize),
|
||||
supportsPutFull = TransferSizes(1, config.wordSize),
|
||||
supportsGet = TransferSizes(1, config.inputSize),
|
||||
supportsPutPartial = TransferSizes(1, config.inputSize),
|
||||
supportsPutFull = TransferSizes(1, config.inputSize),
|
||||
fifoId = Some(0)
|
||||
)
|
||||
)
|
||||
@@ -107,10 +107,10 @@ class VortexBankPassThrough(config: VortexL1Config)(implicit p: Parameters)
|
||||
config.memSideSourceIds
|
||||
) + 5 /*FIXME: give more sourceId so that passthrough doesn't block; hacky*/ )
|
||||
),
|
||||
supportsProbe = TransferSizes(1, config.wordSize),
|
||||
supportsGet = TransferSizes(1, config.wordSize),
|
||||
supportsPutFull = TransferSizes(1, config.wordSize),
|
||||
supportsPutPartial = TransferSizes(1, config.wordSize)
|
||||
supportsProbe = TransferSizes(1, config.cacheLineSize),
|
||||
supportsGet = TransferSizes(1, config.cacheLineSize),
|
||||
supportsPutFull = TransferSizes(1, config.cacheLineSize),
|
||||
supportsPutPartial = TransferSizes(1, config.cacheLineSize)
|
||||
)
|
||||
)
|
||||
)
|
||||
@@ -141,8 +141,8 @@ class VortexBank(
|
||||
// suppose have 4 bank
|
||||
// base for bank 1: ...000000|01|0000
|
||||
// mask for bank 1; 111111|00|1111
|
||||
val base = 0x00000000L | (bankId * config.wordSize)
|
||||
val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.wordSize)
|
||||
val base = 0x00000000L | (bankId * config.inputSize)
|
||||
val mask = 0xffffffffL ^ ((config.numBanks - 1) * config.inputSize)
|
||||
|
||||
val excludeSets = config.uncachedAddrSets
|
||||
var remainingSets: Seq[AddressSet] = Seq(AddressSet(base, mask))
|
||||
@@ -155,15 +155,15 @@ class VortexBank(
|
||||
// Slave node to upstream
|
||||
val managerParam = Seq(
|
||||
TLSlavePortParameters.v1(
|
||||
beatBytes = config.wordSize,
|
||||
beatBytes = config.inputSize,
|
||||
managers = Seq(
|
||||
TLSlaveParameters.v1(
|
||||
address = generateAddressSets(),
|
||||
regionType = RegionType.IDEMPOTENT, // idk what this does
|
||||
executable = false,
|
||||
supportsGet = TransferSizes(1, config.wordSize),
|
||||
supportsPutPartial = TransferSizes(1, config.wordSize),
|
||||
supportsPutFull = TransferSizes(1, config.wordSize),
|
||||
supportsGet = TransferSizes(1, config.inputSize),
|
||||
supportsPutPartial = TransferSizes(1, config.inputSize),
|
||||
supportsPutFull = TransferSizes(1, config.inputSize),
|
||||
fifoId = Some(0)
|
||||
)
|
||||
)
|
||||
@@ -177,10 +177,10 @@ class VortexBank(
|
||||
TLMasterParameters.v1(
|
||||
name = s"VortexBank${bankId}",
|
||||
sourceId = IdRange(0, config.memSideSourceIds),
|
||||
supportsProbe = TransferSizes(1, config.wordSize),
|
||||
supportsGet = TransferSizes(1, config.wordSize),
|
||||
supportsPutFull = TransferSizes(1, config.wordSize),
|
||||
supportsPutPartial = TransferSizes(1, config.wordSize)
|
||||
supportsProbe = TransferSizes(1, config.inputSize),
|
||||
supportsGet = TransferSizes(1, config.inputSize),
|
||||
supportsPutFull = TransferSizes(1, config.inputSize),
|
||||
supportsPutPartial = TransferSizes(1, config.inputSize)
|
||||
)
|
||||
)
|
||||
)
|
||||
@@ -204,7 +204,7 @@ class VortexBankImp(
|
||||
) extends LazyModuleImp(outer) {
|
||||
val vxCache = Module(
|
||||
new VX_cache_top(
|
||||
WORD_SIZE = config.wordSize,
|
||||
WORD_SIZE = config.inputSize,
|
||||
// distribute total size across numBanks
|
||||
CACHE_SIZE = config.cacheSize / config.numBanks,
|
||||
CACHE_LINE_SIZE = config.cacheLineSize,
|
||||
@@ -236,7 +236,7 @@ class VortexBankImp(
|
||||
}
|
||||
|
||||
class ReadReqInfo(config: VortexL1Config) extends Bundle {
|
||||
val size = UInt(log2Ceil(config.wordSize).W)
|
||||
val size = UInt(log2Ceil(config.inputSize + 1).W)
|
||||
val id = UInt(config.coreTagWidth.W)
|
||||
}
|
||||
|
||||
@@ -264,7 +264,7 @@ class VortexBankImp(
|
||||
// 4 is also hardcoded, it should be log2WordSize
|
||||
vxCache.io.core_req_addr := tlInFromCoal.a.bits.address(
|
||||
31,
|
||||
log2Ceil(config.wordSize)
|
||||
log2Ceil(config.inputSize)
|
||||
)
|
||||
vxCache.io.core_req_byteen := tlInFromCoal.a.bits.mask
|
||||
vxCache.io.core_req_data := tlInFromCoal.a.bits.data
|
||||
@@ -362,17 +362,17 @@ class VortexBankImp(
|
||||
TLMessages.Get
|
||||
)
|
||||
|
||||
tlOutToL2.a.bits.address := Cat(vxCache.io.mem_req_addr, 0.U(4.W))
|
||||
tlOutToL2.a.bits.address := Cat(vxCache.io.mem_req_addr, 0.U(log2Ceil(config.cacheLineSize).W))
|
||||
tlOutToL2.a.bits.mask := Mux(
|
||||
vxCache.io.mem_req_rw,
|
||||
vxCache.io.mem_req_byteen,
|
||||
0xffff.U
|
||||
~(0.U(config.cacheLineSize.W))
|
||||
)
|
||||
tlOutToL2.a.bits.data := vxCache.io.mem_req_data
|
||||
tlOutToL2.a.bits.source := sourceGen.io.id.bits
|
||||
// ignore param, size, corrupt fields
|
||||
tlOutToL2.a.bits.param := 0.U
|
||||
tlOutToL2.a.bits.size := 4.U // FIXME: hardcoded
|
||||
tlOutToL2.a.bits.size := log2Ceil(config.cacheLineSize).U
|
||||
tlOutToL2.a.bits.corrupt := false.B
|
||||
// downstream L2 -> vxCache response
|
||||
tlOutToL2.d.ready := vxCache.io.mem_rsp_ready
|
||||
|
||||
@@ -126,7 +126,7 @@ class WithFuzzerCores(
|
||||
class WithRadianceCluster(
|
||||
clusterId: Int,
|
||||
location: HierarchicalLocation = InSubsystem,
|
||||
crossing: RocketCrossingParams = RocketCrossingParams() // TODO make this not rocket
|
||||
crossing: RocketCrossingParams = RocketCrossingParams()
|
||||
) extends Config((site, here, up) => {
|
||||
case ClustersLocated(`location`) => up(ClustersLocated(location)) :+ RadianceClusterAttachParams(
|
||||
RadianceClusterParams(clusterId = clusterId),
|
||||
@@ -174,7 +174,17 @@ class WithPriorityCoalXbar extends Config((site, _, up) => {
|
||||
|
||||
class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, _, up) => {
|
||||
case VortexL1Key => {
|
||||
Some(defaultVortexL1Config.copy(numBanks = nBanks))
|
||||
Some(defaultVortexL1Config.copy(
|
||||
numBanks = nBanks,
|
||||
inputSize = up(SIMTCoreKey).get.nMemLanes * 4,
|
||||
cacheLineSize = up(SIMTCoreKey).get.nMemLanes * 4,
|
||||
memSideSourceIds = 64,
|
||||
mshrSize = 64,
|
||||
coreTagWidth = log2Ceil(up(SIMTCoreKey).get.nSrcIds.max(up(CoalescerKey) match {
|
||||
case Some(key) => key.numNewSrcIds
|
||||
case None => 0
|
||||
})) + log2Ceil(up(SIMTCoreKey).get.nMemLanes) + 1
|
||||
))
|
||||
}
|
||||
})
|
||||
|
||||
@@ -197,8 +207,7 @@ class WithCoalescer(nNewSrcIds: Int = 8, enable : Boolean = true) extends Config
|
||||
// If instantiating L1 cache, the maximum coalescing size should match the
|
||||
// cache line size
|
||||
val maxCoalSizeInBytes = up(VortexL1Key, site) match {
|
||||
case Some(param) =>
|
||||
(param.wordSize)
|
||||
case Some(param) => param.inputSize
|
||||
case None => sbusWidthInBytes
|
||||
}
|
||||
|
||||
|
||||
@@ -140,6 +140,9 @@ class GemminiTileModuleImp(outer: GemminiTile) extends BaseTileModuleImp(outer)
|
||||
|
||||
tieOffGemminiRocc
|
||||
|
||||
outer.traceSourceNode.bundle := DontCare
|
||||
outer.traceSourceNode.bundle.insns foreach (_.valid := false.B)
|
||||
|
||||
// hacky, but cluster will AND the cease signals from all tiles, and we want
|
||||
// the core tiles to determine cluster cease not Gemmini
|
||||
outer.reportCease(Some(true.B))
|
||||
|
||||
@@ -8,6 +8,7 @@ import chisel3.util._
|
||||
import freechips.rocketchip.diplomacy._
|
||||
import freechips.rocketchip.prci.ClockSinkParameters
|
||||
import freechips.rocketchip.subsystem._
|
||||
import freechips.rocketchip.tile.TraceBundle
|
||||
import freechips.rocketchip.tilelink._
|
||||
import gemmini._
|
||||
import org.chipsalliance.cde.config.Parameters
|
||||
@@ -91,7 +92,7 @@ class RadianceCluster (
|
||||
callback(p)
|
||||
}
|
||||
}
|
||||
def connect_one[T <: BaseNode with TLNode](from: TLNode, to: () => T): T = {
|
||||
def connect_one[T <: TLNode](from: TLNode, to: () => T): T = {
|
||||
val t = to()
|
||||
guard_monitors { implicit p => t := from }
|
||||
t
|
||||
@@ -183,13 +184,18 @@ class RadianceCluster (
|
||||
|
||||
val spad_read_nodes = Seq.fill(smem_banks) {
|
||||
val r_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||
guard_monitors { implicit p => r_dist := gemmini.spad_read_nodes }
|
||||
guard_monitors { implicit p => r_dist := TLBuffer(BufferParams(1, false, true), BufferParams(0)) := gemmini.spad_read_nodes }
|
||||
Seq.fill(smem_subbanks) { connect_one(r_dist, TLIdentityNode.apply) }
|
||||
}
|
||||
val spad_write_nodes = Seq.fill(smem_banks) {
|
||||
val w_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||
guard_monitors { implicit p => w_dist := gemmini.spad_write_nodes }
|
||||
guard_monitors { implicit p => w_dist := TLBuffer(BufferParams(1, false, true), BufferParams(0)) := gemmini.spad_write_nodes }
|
||||
Seq.fill(smem_subbanks) { connect_one(w_dist, TLIdentityNode.apply) }
|
||||
/* Seq.fill(smem_subbanks) {
|
||||
val buf = TLBuffer(BufferParams(1, false, true), BufferParams(0))
|
||||
buf := w_dist
|
||||
buf
|
||||
} */
|
||||
}
|
||||
val ws_dist = DistributorNode(from = smem_width, to = wordSize)
|
||||
guard_monitors { implicit p => ws_dist := gemmini.spad.spad_writer.node } // this is the dma write node
|
||||
|
||||
@@ -165,10 +165,14 @@ class RadianceTile private (
|
||||
// to a stall in the backend pipeline and resulting in a deadlock.
|
||||
val imemSourceWidth = 4 // 1 << imemSourceWidth == IBUF_SIZE
|
||||
|
||||
val dmemSourceWidth = p(SIMTCoreKey) match {
|
||||
// TODO: respect coalescer newSrcIds
|
||||
val smemSourceWidth = p(SIMTCoreKey) match {
|
||||
case Some(simtParam) => log2Ceil(simtParam.nSrcIds)
|
||||
case None => 4
|
||||
case None => 4
|
||||
}
|
||||
|
||||
val dmemSourceWidth = p(CoalescerKey) match {
|
||||
case Some(coalParam) => log2Ceil(coalParam.numOldSrcIds)
|
||||
case None => smemSourceWidth
|
||||
}
|
||||
// require(
|
||||
// dmemSourceWidth >= 4,
|
||||
@@ -177,8 +181,6 @@ class RadianceTile private (
|
||||
// "We recommend setting nSrcIds to at least 16."
|
||||
// )
|
||||
|
||||
val smemSourceWidth = 4 // FIXME: hardcoded
|
||||
|
||||
// Replicates some of the logic of how Vortex determines the tag width of
|
||||
// memory requests so that Chisel and Verilog are in agreement on bitwidths.
|
||||
// See VX_gpu_pkg.sv
|
||||
@@ -190,7 +192,8 @@ class RadianceTile private (
|
||||
}
|
||||
val imemTagWidth = UUID_WIDTH + NW_WIDTH
|
||||
|
||||
val LSUQ_SIZE = 2 * numWarps * (numCoreLanes / numLsuLanes)
|
||||
val LSUQ_SIZE = 8 * numWarps * (numCoreLanes / numLsuLanes)
|
||||
assert(LSUQ_SIZE == p(SIMTCoreKey).get.nSrcIds)
|
||||
val LSUQ_TAG_BITS = log2Ceil(LSUQ_SIZE) + 1 /*DCACHE_BATCH_SEL_BITS*/
|
||||
val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS
|
||||
// dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH
|
||||
|
||||
Reference in New Issue
Block a user