shared memory config and sane smem write delays
This commit is contained in:
@@ -13,6 +13,17 @@ import gemmini.{CapacityInKilobytes, GemminiFPConfigs}
|
|||||||
import radiance.tile._
|
import radiance.tile._
|
||||||
import radiance.memory._
|
import radiance.memory._
|
||||||
|
|
||||||
|
case class RadianceSharedMemKey(address: BigInt,
|
||||||
|
size: Int,
|
||||||
|
numBanks: Int,
|
||||||
|
numWords: Int,
|
||||||
|
wordSize: Int = 4,
|
||||||
|
strideByWord: Boolean = true,
|
||||||
|
filterAligned: Boolean = true,
|
||||||
|
disableMonitors: Boolean = true,
|
||||||
|
serializeUnaligned: Boolean = true)
|
||||||
|
case object RadianceSharedMemKey extends Field[Option[RadianceSharedMemKey]](None)
|
||||||
|
|
||||||
class WithRadianceCores(
|
class WithRadianceCores(
|
||||||
n: Int,
|
n: Int,
|
||||||
location: HierarchicalLocation,
|
location: HierarchicalLocation,
|
||||||
@@ -63,19 +74,18 @@ class WithRadianceCores(
|
|||||||
|
|
||||||
class WithRadianceGemmini(location: HierarchicalLocation,
|
class WithRadianceGemmini(location: HierarchicalLocation,
|
||||||
crossing: RocketCrossingParams,
|
crossing: RocketCrossingParams,
|
||||||
dim: Int, extMemBase: BigInt,
|
dim: Int, accSizeInKB: Int) extends Config((site, _, up) => {
|
||||||
spSizeInKB: Int, accSizeInKB: Int) extends Config((site, _, up) => {
|
|
||||||
case TilesLocated(`location`) => {
|
case TilesLocated(`location`) => {
|
||||||
val prev = up(TilesLocated(`location`), site)
|
val prev = up(TilesLocated(`location`), site)
|
||||||
val idOffset = prev.size
|
val idOffset = prev.size
|
||||||
if (idOffset == 0) {
|
if (idOffset == 0) {
|
||||||
println("******WARNING****** gemmini tile id is 0! radiance tiles in the same cluster needs to be before gemmini")
|
println("******WARNING****** gemmini tile id is 0! radiance tiles in the same cluster needs to be before gemmini")
|
||||||
}
|
}
|
||||||
|
val smKey = site(RadianceSharedMemKey).get
|
||||||
val gemmini = GemminiTileParams(gemminiConfig = GemminiFPConfigs.FP32DefaultConfig.copy(
|
val gemmini = GemminiTileParams(gemminiConfig = GemminiFPConfigs.FP32DefaultConfig.copy(
|
||||||
has_training_convs = false,
|
has_training_convs = false,
|
||||||
has_max_pool = false,
|
has_max_pool = false,
|
||||||
use_tl_ext_mem = true,
|
use_tl_ext_mem = true,
|
||||||
tl_ext_mem_base = extMemBase,
|
|
||||||
sp_singleported = false,
|
sp_singleported = false,
|
||||||
spad_read_delay = 4,
|
spad_read_delay = 4,
|
||||||
use_shared_ext_mem = true,
|
use_shared_ext_mem = true,
|
||||||
@@ -83,9 +93,12 @@ class WithRadianceGemmini(location: HierarchicalLocation,
|
|||||||
has_normalizations = false,
|
has_normalizations = false,
|
||||||
meshRows = dim,
|
meshRows = dim,
|
||||||
meshColumns = dim,
|
meshColumns = dim,
|
||||||
dma_buswidth = dim * 32,
|
|
||||||
tile_latency = 0,
|
tile_latency = 0,
|
||||||
sp_capacity = CapacityInKilobytes(spSizeInKB),
|
dma_maxbytes = site(CacheBlockBytes),
|
||||||
|
dma_buswidth = dim * 32,
|
||||||
|
tl_ext_mem_base = smKey.address,
|
||||||
|
sp_banks = smKey.numBanks,
|
||||||
|
sp_capacity = CapacityInKilobytes(smKey.size >> 10),
|
||||||
acc_capacity = CapacityInKilobytes(accSizeInKB),
|
acc_capacity = CapacityInKilobytes(accSizeInKB),
|
||||||
))
|
))
|
||||||
List.tabulate(1)(i => GemminiTileAttachParams(
|
List.tabulate(1)(i => GemminiTileAttachParams(
|
||||||
@@ -94,8 +107,7 @@ class WithRadianceGemmini(location: HierarchicalLocation,
|
|||||||
)) ++ prev
|
)) ++ prev
|
||||||
}
|
}
|
||||||
}) {
|
}) {
|
||||||
def this(location: HierarchicalLocation = InSubsystem,
|
def this(location: HierarchicalLocation = InSubsystem, dim: Int, accSizeInKB: Int) =
|
||||||
dim: Int, extMemBase: BigInt, spSizeInKB: Int, accSizeInKB: Int) =
|
|
||||||
this(location, RocketCrossingParams(
|
this(location, RocketCrossingParams(
|
||||||
master = HierarchicalElementMasterPortParams.locationDefault(location),
|
master = HierarchicalElementMasterPortParams.locationDefault(location),
|
||||||
slave = HierarchicalElementSlavePortParams.locationDefault(location),
|
slave = HierarchicalElementSlavePortParams.locationDefault(location),
|
||||||
@@ -103,9 +115,27 @@ class WithRadianceGemmini(location: HierarchicalLocation,
|
|||||||
case InSubsystem => CBUS
|
case InSubsystem => CBUS
|
||||||
case InCluster(clusterId) => CCBUS(clusterId)
|
case InCluster(clusterId) => CCBUS(clusterId)
|
||||||
}
|
}
|
||||||
), dim, extMemBase, spSizeInKB, accSizeInKB)
|
), dim, accSizeInKB)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class WithRadianceSharedMem(address: BigInt,
|
||||||
|
size: Int,
|
||||||
|
numBanks: Int,
|
||||||
|
numWords: Int,
|
||||||
|
strideByWord: Boolean = true,
|
||||||
|
filterAligned: Boolean = true,
|
||||||
|
disableMonitors: Boolean = true,
|
||||||
|
serializeUnaligned: Boolean = true
|
||||||
|
) extends Config((site, _, _) => {
|
||||||
|
case RadianceSharedMemKey => {
|
||||||
|
require(isPow2(size) && size >= 1024)
|
||||||
|
Some(RadianceSharedMemKey(
|
||||||
|
address, size, numBanks, numWords, 4, strideByWord,
|
||||||
|
filterAligned, disableMonitors, serializeUnaligned
|
||||||
|
))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
class WithFuzzerCores(
|
class WithFuzzerCores(
|
||||||
n: Int,
|
n: Int,
|
||||||
useVxCache: Boolean
|
useVxCache: Boolean
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import gemmini._
|
|||||||
import midas.targetutils.SynthesizePrintf
|
import midas.targetutils.SynthesizePrintf
|
||||||
import org.chipsalliance.cde.config.Parameters
|
import org.chipsalliance.cde.config.Parameters
|
||||||
import radiance.memory._
|
import radiance.memory._
|
||||||
|
import radiance.subsystem.RadianceSharedMemKey
|
||||||
|
|
||||||
case class RadianceClusterParams(
|
case class RadianceClusterParams(
|
||||||
val clusterId: Int,
|
val clusterId: Int,
|
||||||
@@ -42,7 +43,6 @@ class RadianceCluster (
|
|||||||
//
|
//
|
||||||
// Instantiate the same number of banks as there are lanes.
|
// Instantiate the same number of banks as there are lanes.
|
||||||
// val numLsuLanes = 4 // FIXME: hardcoded
|
// val numLsuLanes = 4 // FIXME: hardcoded
|
||||||
val wordSize = 4
|
|
||||||
|
|
||||||
// must toSeq here, otherwise Iterable is lazy and will break diplomacy
|
// must toSeq here, otherwise Iterable is lazy and will break diplomacy
|
||||||
val gemminis = leafTiles.values.filter(_.isInstanceOf[GemminiTile]).toSeq.asInstanceOf[Seq[GemminiTile]]
|
val gemminis = leafTiles.values.filter(_.isInstanceOf[GemminiTile]).toSeq.asInstanceOf[Seq[GemminiTile]]
|
||||||
@@ -72,15 +72,17 @@ class RadianceCluster (
|
|||||||
val unified_mem_read_node = TLIdentityNode()
|
val unified_mem_read_node = TLIdentityNode()
|
||||||
val unified_mem_write_node = TLIdentityNode()
|
val unified_mem_write_node = TLIdentityNode()
|
||||||
|
|
||||||
val spad_data_len = gemminiConfig.sp_width / 8
|
val smem_key = p(RadianceSharedMemKey).get
|
||||||
val acc_data_len = gemminiConfig.sp_width / gemminiConfig.inputType.getWidth * gemminiConfig.accType.getWidth / 8
|
val wordSize = smem_key.wordSize
|
||||||
|
val smem_base = smem_key.address
|
||||||
val smem_base = gemminiConfig.tl_ext_mem_base
|
val smem_banks = smem_key.numBanks
|
||||||
val smem_width = spad_data_len
|
val smem_width = smem_key.numWords * smem_key.wordSize
|
||||||
val smem_depth = gemminiConfig.sp_bank_entries * spad_data_len / smem_width
|
val smem_depth = smem_key.size / smem_width / smem_banks
|
||||||
val smem_banks = gemminiConfig.sp_banks
|
|
||||||
val smem_subbanks = smem_width / wordSize
|
val smem_subbanks = smem_width / wordSize
|
||||||
val smem_size = smem_width * smem_depth * smem_banks
|
val smem_size = smem_width * smem_depth * smem_banks
|
||||||
|
assert(gemminiConfig.sp_banks == smem_banks)
|
||||||
|
assert(gemminiConfig.sp_width / 8 == smem_width)
|
||||||
|
assert(gemminiConfig.sp_bank_entries == smem_depth)
|
||||||
|
|
||||||
val stride_by_word = true
|
val stride_by_word = true
|
||||||
val filter_aligned = true
|
val filter_aligned = true
|
||||||
@@ -298,13 +300,13 @@ class RadianceCluster (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
unified_mem_read_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_read_nodes
|
unified_mem_read_node :=* TLWidthWidget(smem_width) :=* gemmini.spad_read_nodes
|
||||||
unified_mem_write_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_write_nodes
|
unified_mem_write_node :=* TLWidthWidget(smem_width) :=* gemmini.spad_write_nodes
|
||||||
unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node
|
unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node
|
||||||
|
|
||||||
val splitter_node = RWSplitterNode()
|
val splitter_node = RWSplitterNode()
|
||||||
unified_mem_read_node := TLWidthWidget(spad_data_len) := splitter_node
|
unified_mem_read_node := TLWidthWidget(smem_width) := splitter_node
|
||||||
unified_mem_write_node := TLWidthWidget(spad_data_len) := splitter_node
|
unified_mem_write_node := TLWidthWidget(smem_width) := splitter_node
|
||||||
|
|
||||||
radiance_smem_fanout.foreach(clbus.inwardNode := _)
|
radiance_smem_fanout.foreach(clbus.inwardNode := _)
|
||||||
splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode
|
splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode
|
||||||
@@ -334,7 +336,8 @@ class RadianceCluster (
|
|||||||
|
|
||||||
val traceTLNode = TLAdapterNode(clientFn = c => c, managerFn = m => m)
|
val traceTLNode = TLAdapterNode(clientFn = c => c, managerFn = m => m)
|
||||||
// printf and perf counter buffer
|
// printf and perf counter buffer
|
||||||
TLRAM(AddressSet(x"ff000000" + smem_size, numCores * 0x200 - 1)) := traceTLNode := TLFragmenter(4, 4) := clbus.outwardNode
|
TLRAM(AddressSet(x"ff000000" + smem_size, numCores * 0x200 - 1)) := traceTLNode :=
|
||||||
|
TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode
|
||||||
|
|
||||||
|
|
||||||
// Diplomacy sink nodes for cluster-wide barrier sync signal
|
// Diplomacy sink nodes for cluster-wide barrier sync signal
|
||||||
@@ -455,12 +458,15 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
|||||||
metadata_pipe.ready := r_node.d.ready
|
metadata_pipe.ready := r_node.d.ready
|
||||||
|
|
||||||
// WRITE
|
// WRITE
|
||||||
mem.io.wen := w_node.a.fire
|
mem.io.wen := RegNext(w_node.a.fire)
|
||||||
mem.io.wdata := w_node.a.bits.data
|
mem.io.wdata := RegNext(w_node.a.bits.data)
|
||||||
mem.io.mask := w_node.a.bits.mask.asBools
|
mem.io.mask := RegNext(VecInit(w_node.a.bits.mask.asBools))
|
||||||
w_node.a.ready := w_node.d.ready// && (mem.io.waddr =/= mem.io.raddr)
|
|
||||||
w_node.d.valid := w_node.a.valid
|
val write_resp = Wire(Flipped(w_node.d.cloneType))
|
||||||
w_node.d.bits := w_edge.AccessAck(w_node.a.bits)
|
write_resp.bits := w_edge.AccessAck(w_node.a.bits)
|
||||||
|
write_resp.valid := w_node.a.valid
|
||||||
|
w_node.a.ready := write_resp.ready
|
||||||
|
w_node.d <> Queue(write_resp, 2)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (outer.stride_by_word) {
|
if (outer.stride_by_word) {
|
||||||
|
|||||||
Reference in New Issue
Block a user