camelCase
This commit is contained in:
@@ -64,74 +64,73 @@ class RadianceCluster (
|
||||
// /___/_/ /_/___/_/ /_/
|
||||
//
|
||||
// **************************************
|
||||
val unifiedMemReadNode = TLIdentityNode()
|
||||
val unifiedMemWriteNode = TLIdentityNode()
|
||||
|
||||
val unified_mem_read_node = TLIdentityNode()
|
||||
val unified_mem_write_node = TLIdentityNode()
|
||||
|
||||
val smem_key = p(RadianceSharedMemKey).get
|
||||
val wordSize = smem_key.wordSize
|
||||
val smem_base = smem_key.address
|
||||
val smem_banks = smem_key.numBanks
|
||||
val smem_width = smem_key.numWords * smem_key.wordSize
|
||||
val smem_depth = smem_key.size / smem_width / smem_banks
|
||||
val smem_subbanks = smem_width / wordSize
|
||||
val smem_size = smem_width * smem_depth * smem_banks
|
||||
val smemKey = p(RadianceSharedMemKey).get
|
||||
val wordSize = smemKey.wordSize
|
||||
val smemBase = smemKey.address
|
||||
val smemBanks = smemKey.numBanks
|
||||
val smemWidth = smemKey.numWords * smemKey.wordSize
|
||||
val smemDepth = smemKey.size / smemWidth / smemBanks
|
||||
val smemSubbanks = smemWidth / wordSize
|
||||
val smemSize = smemWidth * smemDepth * smemBanks
|
||||
|
||||
gemminiConfigs.foreach { config =>
|
||||
assert(smem_banks == config.sp_banks && isPow2(smem_banks / config.sp_banks)) // TODO: should allow >=
|
||||
assert(smem_width >= (config.sp_width / 8) && isPow2(smem_width / (config.sp_width / 8)))
|
||||
assert(smem_size == config.sp_capacity.asInstanceOf[CapacityInKilobytes].kilobytes * 1024)
|
||||
assert(smemBanks == config.sp_banks && isPow2(smemBanks / config.sp_banks)) // TODO: should allow >=
|
||||
assert(smemWidth >= (config.sp_width / 8) && isPow2(smemWidth / (config.sp_width / 8)))
|
||||
assert(smemSize == config.sp_capacity.asInstanceOf[CapacityInKilobytes].kilobytes * 1024)
|
||||
}
|
||||
|
||||
val stride_by_word = true
|
||||
val filter_aligned = true
|
||||
val disable_monitors = true // otherwise it generate 1k+ different tl monitors
|
||||
val serialize_unaligned = true
|
||||
val strideByWord = true
|
||||
val filterAligned = true
|
||||
val disableMonitors = true // otherwise it generate 1k+ different tl monitors
|
||||
val serializeUnaligned = true
|
||||
|
||||
def guard_monitors[T](callback: Parameters => T)(implicit p: Parameters): Unit = {
|
||||
if (disable_monitors) {
|
||||
def guardMonitors[T](callback: Parameters => T)(implicit p: Parameters): Unit = {
|
||||
if (disableMonitors) {
|
||||
DisableMonitors { callback }
|
||||
} else {
|
||||
callback(p)
|
||||
}
|
||||
}
|
||||
def connect_one[T <: TLNode](from: TLNode, to: () => T): T = {
|
||||
def connectOne[T <: TLNode](from: TLNode, to: () => T): T = {
|
||||
val t = to()
|
||||
guard_monitors { implicit p => t := from }
|
||||
guardMonitors { implicit p => t := from }
|
||||
t
|
||||
}
|
||||
def connect_xbar_name(from: TLNode, name: Option[String] = None,
|
||||
def connectXbarName(from: TLNode, name: Option[String] = None,
|
||||
policy: TLArbiter.Policy = TLArbiter.roundRobin): TLNexusNode = {
|
||||
val t = LazyModule(new TLXbar(policy))
|
||||
name.map(t.suggestName)
|
||||
guard_monitors { implicit p => t.node := from }
|
||||
guardMonitors { implicit p => t.node := from }
|
||||
t.node
|
||||
}
|
||||
def connect_xbar(from: TLNode): TLNexusNode = {
|
||||
connect_xbar_name(from, None)
|
||||
def connectXbar(from: TLNode): TLNexusNode = {
|
||||
connectXbarName(from, None)
|
||||
}
|
||||
|
||||
val radiance_smem_fanout = radianceTiles.zipWithIndex.flatMap { case (tile, cid) =>
|
||||
val radianceSmemFanout = radianceTiles.zipWithIndex.flatMap { case (tile, cid) =>
|
||||
tile.smemNodes.zipWithIndex.map { case (m, lid) =>
|
||||
val smem_fanout_xbar = LazyModule(new TLXbar())
|
||||
smem_fanout_xbar.suggestName(f"rad_smem_fanout_cl${thisClusterParams.clusterId}_c${cid}_l${lid}_xbar")
|
||||
smem_fanout_xbar.node :=* m
|
||||
smem_fanout_xbar.node
|
||||
val smemFanoutXbar = LazyModule(new TLXbar())
|
||||
smemFanoutXbar.suggestName(f"rad_smem_fanout_cl${thisClusterParams.clusterId}_c${cid}_l${lid}_xbar")
|
||||
smemFanoutXbar.node :=* m
|
||||
smemFanoutXbar.node
|
||||
}
|
||||
}
|
||||
|
||||
require(isPow2(smem_banks))
|
||||
require(isPow2(smemBanks))
|
||||
// collection of read and write managers for each sram (sub)bank
|
||||
val smem_bank_mgrs : Seq[Seq[TLManagerNode]] = if (stride_by_word) {
|
||||
require(isPow2(smem_subbanks))
|
||||
(0 until smem_banks).flatMap { bid =>
|
||||
(0 until smem_subbanks).map { wid =>
|
||||
val smemBankMgrs : Seq[Seq[TLManagerNode]] = if (strideByWord) {
|
||||
require(isPow2(smemSubbanks))
|
||||
(0 until smemBanks).flatMap { bid =>
|
||||
(0 until smemSubbanks).map { wid =>
|
||||
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||
managers = Seq(TLSlaveParameters.v2(
|
||||
name = Some(f"sp_bank${bid}_word${wid}_read_mgr"),
|
||||
address = Seq(AddressSet(
|
||||
smem_base + (smem_depth * smem_width * bid) + wordSize * wid,
|
||||
smem_depth * smem_width - smem_width + wordSize - 1
|
||||
smemBase + (smemDepth * smemWidth * bid) + wordSize * wid,
|
||||
smemDepth * smemWidth - smemWidth + wordSize - 1
|
||||
)),
|
||||
supports = TLMasterToSlaveTransferSizes(
|
||||
get = TransferSizes(wordSize, wordSize)),
|
||||
@@ -143,8 +142,8 @@ class RadianceCluster (
|
||||
managers = Seq(TLSlaveParameters.v2(
|
||||
name = Some(f"sp_bank${bid}_word${wid}_write_mgr"),
|
||||
address = Seq(AddressSet(
|
||||
smem_base + (smem_depth * smem_width * bid) + wordSize * wid,
|
||||
smem_depth * smem_width - smem_width + wordSize - 1
|
||||
smemBase + (smemDepth * smemWidth * bid) + wordSize * wid,
|
||||
smemDepth * smemWidth - smemWidth + wordSize - 1
|
||||
)),
|
||||
supports = TLMasterToSlaveTransferSizes(
|
||||
putFull = TransferSizes(wordSize, wordSize),
|
||||
@@ -156,94 +155,94 @@ class RadianceCluster (
|
||||
}
|
||||
}
|
||||
} else {
|
||||
(0 until smem_banks).map { bank =>
|
||||
(0 until smemBanks).map { bank =>
|
||||
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||
managers = Seq(TLSlaveParameters.v2(
|
||||
name = Some(f"sp_bank${bank}_read_mgr"),
|
||||
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank),
|
||||
smem_depth * smem_width - 1)),
|
||||
address = Seq(AddressSet(smemBase + (smemDepth * smemWidth * bank),
|
||||
smemDepth * smemWidth - 1)),
|
||||
supports = TLMasterToSlaveTransferSizes(
|
||||
get = TransferSizes(1, smem_width)),
|
||||
get = TransferSizes(1, smemWidth)),
|
||||
fifoId = Some(0)
|
||||
)),
|
||||
beatBytes = smem_width
|
||||
beatBytes = smemWidth
|
||||
))
|
||||
), TLManagerNode(Seq(TLSlavePortParameters.v1(
|
||||
managers = Seq(TLSlaveParameters.v2(
|
||||
name = Some(f"sp_bank${bank}_write_mgr"),
|
||||
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank),
|
||||
smem_depth * smem_width - 1)),
|
||||
address = Seq(AddressSet(smemBase + (smemDepth * smemWidth * bank),
|
||||
smemDepth * smemWidth - 1)),
|
||||
supports = TLMasterToSlaveTransferSizes(
|
||||
putFull = TransferSizes(1, smem_width),
|
||||
putPartial = TransferSizes(1, smem_width)),
|
||||
putFull = TransferSizes(1, smemWidth),
|
||||
putPartial = TransferSizes(1, smemWidth)),
|
||||
fifoId = Some(0)
|
||||
)),
|
||||
beatBytes = smem_width
|
||||
beatBytes = smemWidth
|
||||
))))
|
||||
}
|
||||
}
|
||||
|
||||
val uniform_policy_nodes: Seq[ArrayBuffer[ArrayBuffer[ExtPolicyMasterNode]]] = // mutable
|
||||
Seq.fill(2)(ArrayBuffer.fill(smem_banks)(ArrayBuffer.fill(smem_subbanks)(null)))
|
||||
val uniform_nodes_in: Seq[ArrayBuffer[ArrayBuffer[Seq[TLIdentityNode]]]] =
|
||||
Seq.fill(2)(ArrayBuffer.fill(smem_banks)(ArrayBuffer.fill(smem_subbanks)(Seq())))
|
||||
val uniform_nodes_out: Seq[ArrayBuffer[ArrayBuffer[TLIdentityNode]]] =
|
||||
Seq.fill(2)(ArrayBuffer.fill(smem_banks)(ArrayBuffer.fill(smem_subbanks)(null)))
|
||||
val uniformPolicyNodes: Seq[ArrayBuffer[ArrayBuffer[ExtPolicyMasterNode]]] = // mutable
|
||||
Seq.fill(2)(ArrayBuffer.fill(smemBanks)(ArrayBuffer.fill(smemSubbanks)(null)))
|
||||
val uniformNodesIn: Seq[ArrayBuffer[ArrayBuffer[Seq[TLIdentityNode]]]] =
|
||||
Seq.fill(2)(ArrayBuffer.fill(smemBanks)(ArrayBuffer.fill(smemSubbanks)(Seq())))
|
||||
val uniformNodesOut: Seq[ArrayBuffer[ArrayBuffer[TLIdentityNode]]] =
|
||||
Seq.fill(2)(ArrayBuffer.fill(smemBanks)(ArrayBuffer.fill(smemSubbanks)(null)))
|
||||
|
||||
val (uniform_r_nodes, uniform_w_nodes, _, _) =
|
||||
val (uniformRNodes, uniformWNodes, _, _) =
|
||||
|
||||
if (stride_by_word) {
|
||||
def dist_and_duplicate(nodes: Seq[TLNode], suffix: String): Seq[Seq[TLNexusNode]] = {
|
||||
val word_fanout_nodes = gemminis.zip(nodes).zipWithIndex.map { case ((gemmini, node), gemmini_idx) =>
|
||||
val sp_width_bytes = gemmini.config.sp_width / 8
|
||||
val sp_subbanks = sp_width_bytes / wordSize
|
||||
val dist = DistributorNode(from = sp_width_bytes, to = wordSize)
|
||||
guard_monitors { implicit p =>
|
||||
if (strideByWord) {
|
||||
def distAndDuplicate(nodes: Seq[TLNode], suffix: String): Seq[Seq[TLNexusNode]] = {
|
||||
val wordFanoutNodes = gemminis.zip(nodes).zipWithIndex.map { case ((gemmini, node), gemminiIdx) =>
|
||||
val spWidthBytes = gemmini.config.sp_width / 8
|
||||
val spSubbanks = spWidthBytes / wordSize
|
||||
val dist = DistributorNode(from = spWidthBytes, to = wordSize)
|
||||
guardMonitors { implicit p =>
|
||||
dist := node
|
||||
}
|
||||
val fanout = Seq.tabulate(sp_subbanks) { w =>
|
||||
val fanout = Seq.tabulate(spSubbanks) { w =>
|
||||
val buf = TLBuffer(BufferParams(1, false, true), BufferParams(0))
|
||||
buf := dist
|
||||
connect_xbar_name(buf, Some(s"spad_g${gemmini_idx}w${w}_fanout_$suffix"))
|
||||
connectXbarName(buf, Some(s"spad_g${gemminiIdx}w${w}_fanout_$suffix"))
|
||||
}
|
||||
Seq.fill(smem_width / sp_width_bytes)(fanout).flatten // smem wider than spad, duplicate masters
|
||||
Seq.fill(smemWidth / spWidthBytes)(fanout).flatten // smem wider than spad, duplicate masters
|
||||
}
|
||||
// (gemmini, word) => (word, gemmini)
|
||||
word_fanout_nodes.transpose
|
||||
wordFanoutNodes.transpose
|
||||
}
|
||||
|
||||
// (banks, subbanks, gemminis)
|
||||
val spad_read_nodes = Seq.fill(smem_banks)(dist_and_duplicate(gemminis.map(_.spad_read_nodes), "r"))
|
||||
val spad_write_nodes = Seq.fill(smem_banks)(dist_and_duplicate(gemminis.map(_.spad_write_nodes), "w"))
|
||||
val spad_sp_write_nodes_single_bank = dist_and_duplicate(gemminis.map(_.spad.spad_writer.node), "ws")
|
||||
val spad_sp_write_nodes = Seq.fill(smem_banks)(spad_sp_write_nodes_single_bank) // executed only once
|
||||
val spadReadNodes = Seq.fill(smemBanks)(distAndDuplicate(gemminis.map(_.spad_read_nodes), "r"))
|
||||
val spadWriteNodes = Seq.fill(smemBanks)(distAndDuplicate(gemminis.map(_.spad_write_nodes), "w"))
|
||||
val spadSpWriteNodesSingleBank = distAndDuplicate(gemminis.map(_.spad.spad_writer.node), "ws")
|
||||
val spadSpWriteNodes = Seq.fill(smemBanks)(spadSpWriteNodesSingleBank) // executed only once
|
||||
|
||||
val (uniform_r_nodes, uniform_w_nodes, nonuniform_r_nodes, nonuniform_w_nodes):
|
||||
(Seq[Seq[Seq[TLNexusNode]]], Seq[Seq[Seq[TLNexusNode]]], Seq[TLNode], Seq[TLNode]) = if (filter_aligned) {
|
||||
val (uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes):
|
||||
(Seq[Seq[Seq[TLNexusNode]]], Seq[Seq[Seq[TLNexusNode]]], Seq[TLNode], Seq[TLNode]) = if (filterAligned) {
|
||||
|
||||
val num_lsu_lanes = radianceTiles.head.numLsuLanes
|
||||
val num_lane_dupes = Math.max(1, smem_subbanks / num_lsu_lanes)
|
||||
val filter_range = Math.min(smem_subbanks, num_lsu_lanes)
|
||||
println(s"num_lsu_lanes ${num_lsu_lanes} num_lane_dupes ${num_lane_dupes} filter_range ${filter_range}")
|
||||
val numLsuLanes = radianceTiles.head.numLsuLanes
|
||||
val numLaneDupes = Math.max(1, smemSubbanks / numLsuLanes)
|
||||
val filterRange = Math.min(smemSubbanks, numLsuLanes)
|
||||
println(s"num_lsu_lanes ${numLsuLanes} num_lane_dupes ${numLaneDupes} filter_range ${filterRange}")
|
||||
|
||||
// (subbank, sources, aligned) = rw node
|
||||
val (f_aligned, f_unaligned) = if (num_lsu_lanes >= smem_subbanks) {
|
||||
val filter_nodes: Seq[Seq[(TLNode, TLNode)]] = Seq.tabulate(num_lane_dupes) { did =>
|
||||
Seq.tabulate(filter_range) { wid =>
|
||||
val true_wid = did * filter_range + wid
|
||||
val address = AddressSet(smem_base + wordSize * true_wid, (smem_size - 1) - (smem_subbanks - 1) * wordSize)
|
||||
val (fAligned, fUnaligned) = if (numLsuLanes >= smemSubbanks) {
|
||||
val filterNodes: Seq[Seq[(TLNode, TLNode)]] = Seq.tabulate(numLaneDupes) { did =>
|
||||
Seq.tabulate(filterRange) { wid =>
|
||||
val trueWid = did * filterRange + wid
|
||||
val address = AddressSet(smemBase + wordSize * trueWid, (smemSize - 1) - (smemSubbanks - 1) * wordSize)
|
||||
|
||||
radiance_smem_fanout.grouped(num_lsu_lanes).toList.zipWithIndex.flatMap { case (lanes, cid) =>
|
||||
radianceSmemFanout.grouped(numLsuLanes).toList.zipWithIndex.flatMap { case (lanes, cid) =>
|
||||
lanes.zipWithIndex.flatMap { case (lane, lid) =>
|
||||
if ((lid % filter_range) == wid) {
|
||||
if ((lid % filterRange) == wid) {
|
||||
println(f"c${cid}_l${lid} connected to d${did}w${wid}")
|
||||
val filter_node = AlignFilterNode(Seq(address))(p, ValName(s"filter_l${lid}_w${true_wid}"), info)
|
||||
DisableMonitors { implicit p => filter_node := lane }
|
||||
val filterNode = AlignFilterNode(Seq(address))(p, ValName(s"filter_l${lid}_w${trueWid}"), info)
|
||||
DisableMonitors { implicit p => filterNode := lane }
|
||||
// Seq((aligned splitter, unaligned splitter))
|
||||
Seq((
|
||||
connect_one(filter_node, () =>
|
||||
RWSplitterNode(address, s"aligned_splitter_c${cid}_l${lid}_w${true_wid}")),
|
||||
connect_one(filter_node, () =>
|
||||
connectOne(filterNode, () =>
|
||||
RWSplitterNode(address, s"aligned_splitter_c${cid}_l${lid}_w${trueWid}")),
|
||||
connectOne(filterNode, () =>
|
||||
RWSplitterNode(AddressSet.everything, s"unaligned_splitter_c${cid}_l${lid}"))
|
||||
))
|
||||
} else Seq()
|
||||
@@ -252,174 +251,174 @@ class RadianceCluster (
|
||||
}
|
||||
}.flatten
|
||||
|
||||
val f_aligned = Seq.fill(2)(filter_nodes.map(_.map(_._1).map(connect_xbar_name(_, Some("rad_aligned")))))
|
||||
val f_unaligned = if (serialize_unaligned) {
|
||||
val fAligned = Seq.fill(2)(filterNodes.map(_.map(_._1).map(connectXbarName(_, Some("rad_aligned")))))
|
||||
val fUnaligned = if (serializeUnaligned) {
|
||||
Seq.fill(2) {
|
||||
val serialized_node = TLEphemeralNode()
|
||||
val serialized_in_xbar = LazyModule(new TLXbar())
|
||||
val serialized_out_xbar = LazyModule(new TLXbar())
|
||||
serialized_in_xbar.suggestName("unaligned_serialized_in_xbar")
|
||||
serialized_out_xbar.suggestName("unaligned_serialized_out_xbar")
|
||||
guard_monitors { implicit p =>
|
||||
filter_nodes.foreach(_.map(_._2).foreach(serialized_in_xbar.node := _))
|
||||
serialized_node := serialized_in_xbar.node
|
||||
serialized_out_xbar.node := serialized_node
|
||||
val serializedNode = TLEphemeralNode()
|
||||
val serializedInXbar = LazyModule(new TLXbar())
|
||||
val serializedOutXbar = LazyModule(new TLXbar())
|
||||
serializedInXbar.suggestName("unaligned_serialized_in_xbar")
|
||||
serializedOutXbar.suggestName("unaligned_serialized_out_xbar")
|
||||
guardMonitors { implicit p =>
|
||||
filterNodes.foreach(_.map(_._2).foreach(serializedInXbar.node := _))
|
||||
serializedNode := serializedInXbar.node
|
||||
serializedOutXbar.node := serializedNode
|
||||
}
|
||||
Seq(serialized_out_xbar.node)
|
||||
Seq(serializedOutXbar.node)
|
||||
}
|
||||
} else {
|
||||
Seq.fill(2)(filter_nodes.flatMap(_.map(_._2).map(connect_xbar)))
|
||||
Seq.fill(2)(filterNodes.flatMap(_.map(_._2).map(connectXbar)))
|
||||
}
|
||||
(f_aligned, f_unaligned)
|
||||
(fAligned, fUnaligned)
|
||||
} else { // aligned: (subbanks, cores) = rw node
|
||||
// (lanes, cores) = filter_node
|
||||
val filter_nodes = Seq.tabulate(filter_range) { wid =>
|
||||
val addresses = Seq.tabulate(num_lane_dupes) { did =>
|
||||
AddressSet(smem_base + (did * filter_range + wid) * wordSize,
|
||||
(smem_size - 1) - (smem_subbanks - 1) * wordSize)
|
||||
val filterNodes = Seq.tabulate(filterRange) { wid =>
|
||||
val addresses = Seq.tabulate(numLaneDupes) { did =>
|
||||
AddressSet(smemBase + (did * filterRange + wid) * wordSize,
|
||||
(smemSize - 1) - (smemSubbanks - 1) * wordSize)
|
||||
}
|
||||
radiance_smem_fanout.grouped(num_lsu_lanes).toSeq.zipWithIndex.map { case (lanes, cid) =>
|
||||
radianceSmemFanout.grouped(numLsuLanes).toSeq.zipWithIndex.map { case (lanes, cid) =>
|
||||
val lane = lanes(wid)
|
||||
val filter_node = AlignFilterNode(addresses)(p, ValName(s"filter_c${cid}_w${wid}"), info)
|
||||
guard_monitors { implicit p =>
|
||||
filter_node := lane
|
||||
val filterNode = AlignFilterNode(addresses)(p, ValName(s"filter_c${cid}_w${wid}"), info)
|
||||
guardMonitors { implicit p =>
|
||||
filterNode := lane
|
||||
}
|
||||
filter_node
|
||||
filterNode
|
||||
}
|
||||
}
|
||||
val f_aligned_rw = Seq.tabulate(num_lane_dupes) { did =>
|
||||
filter_nodes.zipWithIndex.map { case (cores, lid) =>
|
||||
val fAlignedRw = Seq.tabulate(numLaneDupes) { did =>
|
||||
filterNodes.zipWithIndex.map { case (cores, lid) =>
|
||||
cores.zipWithIndex.map { case (fn, cid) =>
|
||||
val address = AddressSet(smem_base + (did * filter_range + lid) * wordSize,
|
||||
(smem_size - 1) - (smem_subbanks - 1) * wordSize)
|
||||
connect_one(fn, () => RWSplitterNode(address, s"aligned_split_c${cid}_l${lid}_d${did}"))
|
||||
val address = AddressSet(smemBase + (did * filterRange + lid) * wordSize,
|
||||
(smemSize - 1) - (smemSubbanks - 1) * wordSize)
|
||||
connectOne(fn, () => RWSplitterNode(address, s"aligned_split_c${cid}_l${lid}_d${did}"))
|
||||
}
|
||||
}
|
||||
}.flatten
|
||||
val f_unaligned_rw = filter_nodes.zipWithIndex.flatMap { case (cores, lid) =>
|
||||
val fUnalignedRw = filterNodes.zipWithIndex.flatMap { case (cores, lid) =>
|
||||
cores.zipWithIndex.map { case (fn, cid) =>
|
||||
connect_one(fn, () => RWSplitterNode(AddressSet.everything, s"unaligned_split_c${cid}_l${lid}"))
|
||||
connectOne(fn, () => RWSplitterNode(AddressSet.everything, s"unaligned_split_c${cid}_l${lid}"))
|
||||
}
|
||||
}
|
||||
val f_aligned = Seq.fill(2)(f_aligned_rw.map(_.map(connect_xbar_name(_, Some("rad_aligned")))))
|
||||
val fAligned = Seq.fill(2)(fAlignedRw.map(_.map(connectXbarName(_, Some("rad_aligned")))))
|
||||
|
||||
val f_unaligned = if (serialize_unaligned) {
|
||||
val fUnaligned = if (serializeUnaligned) {
|
||||
Seq.fill(2) {
|
||||
val serialized_node = TLEphemeralNode()
|
||||
val serialized_in_xbar = TLXbar(nameSuffix = Some("unaligned_ser_in"))
|
||||
val serialized_out_xbar = TLXbar(nameSuffix = Some("unaligned_ser_out"))
|
||||
guard_monitors { implicit p =>
|
||||
f_unaligned_rw.foreach(serialized_in_xbar := _)
|
||||
serialized_node := serialized_in_xbar
|
||||
serialized_out_xbar := serialized_node
|
||||
val serializedNode = TLEphemeralNode()
|
||||
val serializedInXbar = TLXbar(nameSuffix = Some("unaligned_ser_in"))
|
||||
val serializedOutXbar = TLXbar(nameSuffix = Some("unaligned_ser_out"))
|
||||
guardMonitors { implicit p =>
|
||||
fUnalignedRw.foreach(serializedInXbar := _)
|
||||
serializedNode := serializedInXbar
|
||||
serializedOutXbar := serializedNode
|
||||
}
|
||||
Seq(serialized_out_xbar)
|
||||
Seq(serializedOutXbar)
|
||||
}
|
||||
} else {
|
||||
Seq.fill(2)(f_unaligned_rw.map(connect_xbar))
|
||||
Seq.fill(2)(fUnalignedRw.map(connectXbar))
|
||||
}
|
||||
(f_aligned, f_unaligned)
|
||||
(fAligned, fUnaligned)
|
||||
}
|
||||
|
||||
|
||||
val uniform_r_nodes: Seq[Seq[Seq[TLNexusNode]]] = spad_read_nodes.map { rb =>
|
||||
(rb zip f_aligned.head).map { case (rw, fa) => rw ++ fa }
|
||||
val uniformRNodes: Seq[Seq[Seq[TLNexusNode]]] = spadReadNodes.map { rb =>
|
||||
(rb zip fAligned.head).map { case (rw, fa) => rw ++ fa }
|
||||
}
|
||||
val uniform_w_nodes: Seq[Seq[Seq[TLNexusNode]]] = (spad_write_nodes zip spad_sp_write_nodes).map { case (wb, wsb) =>
|
||||
(wb lazyZip wsb lazyZip f_aligned.last).map {
|
||||
val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]] = (spadWriteNodes zip spadSpWriteNodes).map { case (wb, wsb) =>
|
||||
(wb lazyZip wsb lazyZip fAligned.last).map {
|
||||
case (ww, wsw, fa) => ww ++ wsw ++ fa
|
||||
}
|
||||
}
|
||||
|
||||
// all to all xbar
|
||||
val Seq(nonuniform_r_nodes, nonuniform_w_nodes) = f_unaligned
|
||||
val Seq(nonuniformRNodes, nonuniformWNodes) = fUnaligned
|
||||
|
||||
(uniform_r_nodes, uniform_w_nodes, nonuniform_r_nodes, nonuniform_w_nodes)
|
||||
(uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes)
|
||||
} else {
|
||||
val splitter_nodes = radiance_smem_fanout.map { connect_one(_, RWSplitterNode.apply) }
|
||||
val splitterNodes = radianceSmemFanout.map { connectOne(_, RWSplitterNode.apply) }
|
||||
// these nodes access an entire line simultaneously
|
||||
val uniform_r_nodes: Seq[Seq[Seq[TLNexusNode]]] = spad_read_nodes
|
||||
val uniform_w_nodes: Seq[Seq[Seq[TLNexusNode]]] = (spad_write_nodes zip spad_sp_write_nodes).map { case (wb, wsb) =>
|
||||
val uniformRNodes: Seq[Seq[Seq[TLNexusNode]]] = spadReadNodes
|
||||
val uniformWNodes: Seq[Seq[Seq[TLNexusNode]]] = (spadWriteNodes zip spadSpWriteNodes).map { case (wb, wsb) =>
|
||||
(wb zip wsb).map { case (ww, wsw) => ww ++ wsw }
|
||||
}
|
||||
// these nodes are random access
|
||||
val nonuniform_r_nodes: Seq[TLNode] = splitter_nodes.map(connect_xbar_name(_, Some("rad_unaligned_r")))
|
||||
val nonuniform_w_nodes: Seq[TLNode] = splitter_nodes.map(connect_xbar_name(_, Some("rad_unaligned_w")))
|
||||
val nonuniformRNodes: Seq[TLNode] = splitterNodes.map(connectXbarName(_, Some("rad_unaligned_r")))
|
||||
val nonuniformWNodes: Seq[TLNode] = splitterNodes.map(connectXbarName(_, Some("rad_unaligned_w")))
|
||||
|
||||
(uniform_r_nodes, uniform_w_nodes, nonuniform_r_nodes, nonuniform_w_nodes)
|
||||
(uniformRNodes, uniformWNodes, nonuniformRNodes, nonuniformWNodes)
|
||||
}
|
||||
|
||||
guard_monitors { implicit p => radiance_smem_fanout.foreach(clbus.inwardNode := _) }
|
||||
guardMonitors { implicit p => radianceSmemFanout.foreach(clbus.inwardNode := _) }
|
||||
|
||||
smem_bank_mgrs.grouped(smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) =>
|
||||
bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
|
||||
smemBankMgrs.grouped(smemSubbanks).zipWithIndex.foreach { case (bankMgrs, bid) =>
|
||||
bankMgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
|
||||
// TODO: this should be a coordinated round robin
|
||||
val subbank_r_xbar = LazyModule(new TLXbar(TLArbiter.lowestIndexFirst))
|
||||
val subbank_w_xbar = LazyModule(new TLXbar(TLArbiter.lowestIndexFirst))
|
||||
subbank_r_xbar.suggestName(s"smem_b${bid}_w${wid}_r_xbar")
|
||||
subbank_w_xbar.suggestName(s"smem_b${bid}_w${wid}_w_xbar")
|
||||
val subbankRXbar = LazyModule(new TLXbar(TLArbiter.lowestIndexFirst))
|
||||
val subbankWXbar = LazyModule(new TLXbar(TLArbiter.lowestIndexFirst))
|
||||
subbankRXbar.suggestName(s"smem_b${bid}_w${wid}_r_xbar")
|
||||
subbankWXbar.suggestName(s"smem_b${bid}_w${wid}_w_xbar")
|
||||
|
||||
guard_monitors { implicit p =>
|
||||
r := subbank_r_xbar.node
|
||||
w := subbank_w_xbar.node
|
||||
guardMonitors { implicit p =>
|
||||
r := subbankRXbar.node
|
||||
w := subbankWXbar.node
|
||||
|
||||
val ur_xbar = XbarWithExtPolicy(Some(s"ur_b${bid}_w${wid}"))
|
||||
val uw_xbar = XbarWithExtPolicy(Some(s"uw_b${bid}_w${wid}"))
|
||||
val r_policy_node = ExtPolicyMasterNode(uniform_r_nodes(bid)(wid).length)
|
||||
val w_policy_node = ExtPolicyMasterNode(uniform_w_nodes(bid)(wid).length)
|
||||
ur_xbar.policySlaveNode := r_policy_node
|
||||
uw_xbar.policySlaveNode := w_policy_node
|
||||
uniform_policy_nodes.head(bid)(wid) = r_policy_node
|
||||
uniform_policy_nodes.last(bid)(wid) = w_policy_node
|
||||
val urXbar = XbarWithExtPolicy(Some(s"ur_b${bid}_w${wid}"))
|
||||
val uwXbar = XbarWithExtPolicy(Some(s"uw_b${bid}_w${wid}"))
|
||||
val rPolicyNode = ExtPolicyMasterNode(uniformRNodes(bid)(wid).length)
|
||||
val wPolicyNode = ExtPolicyMasterNode(uniformWNodes(bid)(wid).length)
|
||||
urXbar.policySlaveNode := rPolicyNode
|
||||
uwXbar.policySlaveNode := wPolicyNode
|
||||
uniformPolicyNodes.head(bid)(wid) = rPolicyNode
|
||||
uniformPolicyNodes.last(bid)(wid) = wPolicyNode
|
||||
|
||||
(Seq(ur_xbar, uw_xbar) lazyZip uniform_nodes_in lazyZip Seq(uniform_r_nodes, uniform_w_nodes))
|
||||
.foreach { case (xbar, id_buf, u_nodes) =>
|
||||
(Seq(urXbar, uwXbar) lazyZip uniformNodesIn lazyZip Seq(uniformRNodes, uniformWNodes))
|
||||
.foreach { case (xbar, idBuf, uNodes) =>
|
||||
|
||||
id_buf(bid)(wid) = u_nodes(bid)(wid).map { u =>
|
||||
idBuf(bid)(wid) = uNodes(bid)(wid).map { u =>
|
||||
val id = TLIdentityNode()
|
||||
xbar.node := id := u
|
||||
id
|
||||
}
|
||||
}
|
||||
|
||||
// uniform_w_nodes(bid)(wid).foreach( uw_xbar.node := _ )
|
||||
uniform_nodes_out.head(bid)(wid) = TLIdentityNode()
|
||||
uniform_nodes_out.last(bid)(wid) = TLIdentityNode()
|
||||
subbank_r_xbar.node := uniform_nodes_out.head(bid)(wid) := ur_xbar.node
|
||||
subbank_w_xbar.node := uniform_nodes_out.last(bid)(wid) := uw_xbar.node
|
||||
// uniformWNodes(bid)(wid).foreach( uwXbar.node := _ )
|
||||
uniformNodesOut.head(bid)(wid) = TLIdentityNode()
|
||||
uniformNodesOut.last(bid)(wid) = TLIdentityNode()
|
||||
subbankRXbar.node := uniformNodesOut.head(bid)(wid) := urXbar.node
|
||||
subbankWXbar.node := uniformNodesOut.last(bid)(wid) := uwXbar.node
|
||||
|
||||
nonuniform_r_nodes.foreach( subbank_r_xbar.node := _ )
|
||||
nonuniform_w_nodes.foreach( subbank_w_xbar.node := _ )
|
||||
nonuniformRNodes.foreach( subbankRXbar.node := _ )
|
||||
nonuniformWNodes.foreach( subbankWXbar.node := _ )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(Some(uniform_r_nodes), Some(uniform_w_nodes), Some(nonuniform_r_nodes), Some(nonuniform_w_nodes))
|
||||
(Some(uniformRNodes), Some(uniformWNodes), Some(nonuniformRNodes), Some(nonuniformWNodes))
|
||||
} else {
|
||||
gemminis.foreach { gemmini =>
|
||||
unified_mem_read_node :=* TLWidthWidget(smem_width) :=* gemmini.spad_read_nodes
|
||||
unified_mem_write_node :=* TLWidthWidget(smem_width) :=* gemmini.spad_write_nodes
|
||||
unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node
|
||||
unifiedMemReadNode :=* TLWidthWidget(smemWidth) :=* gemmini.spad_read_nodes
|
||||
unifiedMemWriteNode :=* TLWidthWidget(smemWidth) :=* gemmini.spad_write_nodes
|
||||
unifiedMemWriteNode := gemmini.spad.spad_writer.node // this is the dma write node
|
||||
}
|
||||
|
||||
val splitter_node = RWSplitterNode()
|
||||
unified_mem_read_node := TLWidthWidget(smem_width) := splitter_node
|
||||
unified_mem_write_node := TLWidthWidget(smem_width) := splitter_node
|
||||
val splitterNode = RWSplitterNode()
|
||||
unifiedMemReadNode := TLWidthWidget(smemWidth) := splitterNode
|
||||
unifiedMemWriteNode := TLWidthWidget(smemWidth) := splitterNode
|
||||
|
||||
radiance_smem_fanout.foreach(clbus.inwardNode := _)
|
||||
splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode
|
||||
radianceSmemFanout.foreach(clbus.inwardNode := _)
|
||||
splitterNode :=* TLWidthWidget(4) :=* clbus.outwardNode
|
||||
|
||||
val smem_r_xbar = TLXbar()
|
||||
val smem_w_xbar = TLXbar()
|
||||
val smemRXbar = TLXbar()
|
||||
val smemWXbar = TLXbar()
|
||||
DisableMonitors { implicit p =>
|
||||
smem_r_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_read_node
|
||||
smem_w_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_write_node
|
||||
smemRXbar :=* TLWidthWidget(wordSize) :=* unifiedMemReadNode
|
||||
smemWXbar :=* TLWidthWidget(wordSize) :=* unifiedMemWriteNode
|
||||
}
|
||||
|
||||
smem_bank_mgrs.foreach { mem =>
|
||||
smemBankMgrs.foreach { mem =>
|
||||
require(mem.length == 2)
|
||||
mem.head := smem_r_xbar
|
||||
mem.last := smem_w_xbar
|
||||
mem.head := smemRXbar
|
||||
mem.last := smemWXbar
|
||||
}
|
||||
|
||||
(None, None, None, None)
|
||||
@@ -444,7 +443,7 @@ class RadianceCluster (
|
||||
|
||||
val traceTLNode = TLAdapterNode(clientFn = c => c, managerFn = m => m)
|
||||
// printf and perf counter buffer
|
||||
TLRAM(AddressSet(smem_key.address + smem_size, numCoresInCluster * 0x200 - 1)) := traceTLNode :=
|
||||
TLRAM(AddressSet(smemKey.address + smemSize, numCoresInCluster * 0x200 - 1)) := traceTLNode :=
|
||||
TLBuffer() := TLFragmenter(4, 4) := clbus.outwardNode
|
||||
|
||||
p(RadianceFrameBufferKey).foreach { key =>
|
||||
@@ -517,82 +516,81 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: remove Pipeline dependency of gemmini
|
||||
def makeSmemBanks(): Unit = {
|
||||
def make_buffer[T <: Data](mem: TwoPortSyncMem[T], r_node: TLBundle, r_edge: TLEdgeIn,
|
||||
w_node: TLBundle, w_edge: TLEdgeIn): Unit = {
|
||||
mem.io.ren := r_node.a.fire
|
||||
def makeBuffer[T <: Data](mem: TwoPortSyncMem[T], rNode: TLBundle, rEdge: TLEdgeIn,
|
||||
wNode: TLBundle, wEdge: TLEdgeIn): Unit = {
|
||||
mem.io.ren := rNode.a.fire
|
||||
|
||||
val data_pipe_in = Wire(DecoupledIO(mem.io.rdata.cloneType))
|
||||
data_pipe_in.valid := RegNext(mem.io.ren)
|
||||
data_pipe_in.bits := mem.io.rdata
|
||||
val dataPipeIn = Wire(DecoupledIO(mem.io.rdata.cloneType))
|
||||
dataPipeIn.valid := RegNext(mem.io.ren)
|
||||
dataPipeIn.bits := mem.io.rdata
|
||||
|
||||
val metadata_pipe_in = Wire(DecoupledIO(new Bundle {
|
||||
val source = r_node.a.bits.source.cloneType
|
||||
val size = r_node.a.bits.size.cloneType
|
||||
val metadataPipeIn = Wire(DecoupledIO(new Bundle {
|
||||
val source = rNode.a.bits.source.cloneType
|
||||
val size = rNode.a.bits.size.cloneType
|
||||
}))
|
||||
metadata_pipe_in.valid := mem.io.ren
|
||||
metadata_pipe_in.bits.source := r_node.a.bits.source
|
||||
metadata_pipe_in.bits.size := r_node.a.bits.size
|
||||
metadataPipeIn.valid := mem.io.ren
|
||||
metadataPipeIn.bits.source := rNode.a.bits.source
|
||||
metadataPipeIn.bits.size := rNode.a.bits.size
|
||||
|
||||
val sram_read_backup_reg = RegInit(0.U.asTypeOf(Valid(mem.io.rdata.cloneType)))
|
||||
val sramReadBackupReg = RegInit(0.U.asTypeOf(Valid(mem.io.rdata.cloneType)))
|
||||
|
||||
val data_pipe_inst = Module(new Pipeline(data_pipe_in.bits.cloneType, 1)())
|
||||
data_pipe_inst.io.in <> data_pipe_in
|
||||
val data_pipe = data_pipe_inst.io.out
|
||||
val metadata_pipe = Pipeline(metadata_pipe_in, 2)
|
||||
assert((data_pipe.valid || sram_read_backup_reg.valid) === metadata_pipe.valid)
|
||||
val dataPipeInst = Module(new Pipeline(dataPipeIn.bits.cloneType, 1)())
|
||||
dataPipeInst.io.in <> dataPipeIn
|
||||
val dataPipe = dataPipeInst.io.out
|
||||
val metadataPipe = Pipeline(metadataPipeIn, 2)
|
||||
assert((dataPipe.valid || sramReadBackupReg.valid) === metadataPipe.valid)
|
||||
|
||||
// data pipe is filled, but D is not ready and SRAM read came back
|
||||
when (data_pipe.valid && !r_node.d.ready && data_pipe_in.valid) {
|
||||
assert(!data_pipe_in.ready) // we should fill backup reg only if data pipe is not enqueueing
|
||||
assert(!sram_read_backup_reg.valid) // backup reg should be empty
|
||||
assert(!metadata_pipe_in.ready) // metadata should be filled previous cycle
|
||||
sram_read_backup_reg.valid := true.B
|
||||
sram_read_backup_reg.bits := mem.io.rdata
|
||||
when (dataPipe.valid && !rNode.d.ready && dataPipeIn.valid) {
|
||||
assert(!dataPipeIn.ready) // we should fill backup reg only if data pipe is not enqueueing
|
||||
assert(!sramReadBackupReg.valid) // backup reg should be empty
|
||||
assert(!metadataPipeIn.ready) // metadata should be filled previous cycle
|
||||
sramReadBackupReg.valid := true.B
|
||||
sramReadBackupReg.bits := mem.io.rdata
|
||||
}.otherwise {
|
||||
assert(data_pipe_in.ready || !data_pipe_in.valid) // do not skip any response
|
||||
assert(dataPipeIn.ready || !dataPipeIn.valid) // do not skip any response
|
||||
}
|
||||
|
||||
assert(metadata_pipe_in.fire || !mem.io.ren) // when requesting sram, metadata needs to be ready
|
||||
assert(r_node.d.fire === metadata_pipe.fire) // metadata dequeues iff D fires
|
||||
assert(metadataPipeIn.fire || !mem.io.ren) // when requesting sram, metadata needs to be ready
|
||||
assert(rNode.d.fire === metadataPipe.fire) // metadata dequeues iff D fires
|
||||
|
||||
// when D becomes ready, and data pipe has emptied, time for backup to empty
|
||||
when (r_node.d.ready && sram_read_backup_reg.valid && !data_pipe.valid) {
|
||||
sram_read_backup_reg.valid := false.B
|
||||
when (rNode.d.ready && sramReadBackupReg.valid && !dataPipe.valid) {
|
||||
sramReadBackupReg.valid := false.B
|
||||
}
|
||||
// must empty backup before filling data pipe
|
||||
assert(!(sram_read_backup_reg.valid && data_pipe.valid && data_pipe_in.fire))
|
||||
assert(!(sramReadBackupReg.valid && dataPipe.valid && dataPipeIn.fire))
|
||||
|
||||
r_node.d.bits := r_edge.AccessAck(
|
||||
Mux(r_node.d.valid, metadata_pipe.bits.source, 0.U),
|
||||
Mux(r_node.d.valid, metadata_pipe.bits.size, 0.U),
|
||||
Mux(!data_pipe.valid, sram_read_backup_reg.bits, data_pipe.bits).asUInt)
|
||||
r_node.d.valid := data_pipe.valid || sram_read_backup_reg.valid
|
||||
rNode.d.bits := rEdge.AccessAck(
|
||||
Mux(rNode.d.valid, metadataPipe.bits.source, 0.U),
|
||||
Mux(rNode.d.valid, metadataPipe.bits.size, 0.U),
|
||||
Mux(!dataPipe.valid, sramReadBackupReg.bits, dataPipe.bits).asUInt)
|
||||
rNode.d.valid := dataPipe.valid || sramReadBackupReg.valid
|
||||
// r node A is not ready only if D is not ready and both slots filled
|
||||
r_node.a.ready := r_node.d.ready && !(data_pipe.valid && sram_read_backup_reg.valid)
|
||||
data_pipe.ready := r_node.d.ready
|
||||
metadata_pipe.ready := r_node.d.ready
|
||||
rNode.a.ready := rNode.d.ready && !(dataPipe.valid && sramReadBackupReg.valid)
|
||||
dataPipe.ready := rNode.d.ready
|
||||
metadataPipe.ready := rNode.d.ready
|
||||
|
||||
// WRITE
|
||||
mem.io.wen := RegNext(w_node.a.fire)
|
||||
mem.io.wdata := RegNext(w_node.a.bits.data)
|
||||
mem.io.mask := RegNext(VecInit(w_node.a.bits.mask.asBools))
|
||||
mem.io.wen := RegNext(wNode.a.fire)
|
||||
mem.io.wdata := RegNext(wNode.a.bits.data)
|
||||
mem.io.mask := RegNext(VecInit(wNode.a.bits.mask.asBools))
|
||||
|
||||
val write_resp = Wire(Flipped(w_node.d.cloneType))
|
||||
write_resp.bits := w_edge.AccessAck(w_node.a.bits)
|
||||
write_resp.valid := w_node.a.valid
|
||||
w_node.a.ready := write_resp.ready
|
||||
w_node.d <> Queue(write_resp, 2)
|
||||
val writeResp = Wire(Flipped(wNode.d.cloneType))
|
||||
writeResp.bits := wEdge.AccessAck(wNode.a.bits)
|
||||
writeResp.valid := wNode.a.valid
|
||||
wNode.a.ready := writeResp.ready
|
||||
wNode.d <> Queue(writeResp, 2)
|
||||
}
|
||||
|
||||
// read OR write access counter for smem banks
|
||||
val smem_bank_mgrs_grouped = outer.smem_bank_mgrs.grouped(outer.smem_subbanks)
|
||||
val numBanks = smem_bank_mgrs_grouped.length
|
||||
val smemBankMgrsGrouped = outer.smemBankMgrs.grouped(outer.smemSubbanks)
|
||||
val numBanks = smemBankMgrsGrouped.length
|
||||
val counterWidth = 32
|
||||
val smemReadsPerBankPerCycle = Seq.fill(numBanks)(Seq.fill(outer.smem_subbanks)
|
||||
val smemReadsPerBankPerCycle = Seq.fill(numBanks)(Seq.fill(outer.smemSubbanks)
|
||||
(Wire(UInt(counterWidth.W))))
|
||||
val smemWritesPerBankPerCycle = Seq.fill(numBanks)(Seq.fill(outer.smem_subbanks)
|
||||
val smemWritesPerBankPerCycle = Seq.fill(numBanks)(Seq.fill(outer.smemSubbanks)
|
||||
(Wire(UInt(counterWidth.W))))
|
||||
val smemReadsPerCycle = smemReadsPerBankPerCycle.map(_.reduce(_ + _)).reduce(_ + _)
|
||||
val smemWritesPerCycle = smemWritesPerBankPerCycle.map(_.reduce(_ + _)).reduce(_ + _)
|
||||
@@ -604,123 +602,122 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
|
||||
dontTouch(smemReadCounter)
|
||||
dontTouch(smemWriteCounter)
|
||||
|
||||
if (outer.stride_by_word) {
|
||||
val uniform_fires = Seq.fill(2)(VecInit.fill(outer.smem_banks)(VecInit.fill(outer.smem_subbanks)(false.B)))
|
||||
if (outer.strideByWord) {
|
||||
val uniformFires = Seq.fill(2)(VecInit.fill(outer.smemBanks)(VecInit.fill(outer.smemSubbanks)(false.B)))
|
||||
|
||||
outer.smem_bank_mgrs.grouped(outer.smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) =>
|
||||
outer.smemBankMgrs.grouped(outer.smemSubbanks).zipWithIndex.foreach { case (bankMgrs, bid) =>
|
||||
// TODO move this loop out
|
||||
// val Seq(valid_r_sources, valid_w_sources) = uniform_xbar_nodes.map(_(bid)).map { words =>
|
||||
// VecInit(words.map(_.out.map(_._1.a.valid)).transpose.map { words_with_same_idx =>
|
||||
// VecInit(words_with_same_idx.toSeq).asUInt.orR
|
||||
// }.toSeq).asUInt
|
||||
// }
|
||||
val word_selects_1h = Seq(
|
||||
Wire(UInt(outer.uniform_nodes_in.head(bid).head.length.W)).suggestName(s"ws_r_b${bid}"),
|
||||
Wire(UInt(outer.uniform_nodes_in.last(bid).head.length.W)).suggestName(s"ws_w_b${bid}"))
|
||||
val Seq(valid_r_sources, valid_w_sources) = outer.uniform_nodes_in.zipWithIndex.map { case (banks, rw) =>
|
||||
VecInit(banks(bid).map(_.map(_.in.head._1.a.valid)).transpose.map { words_in_idx =>
|
||||
VecInit(words_in_idx.toSeq).asUInt.orR
|
||||
val wordSelects1h = Seq(
|
||||
Wire(UInt(outer.uniformNodesIn.head(bid).head.length.W)).suggestName(s"ws_r_b${bid}"),
|
||||
Wire(UInt(outer.uniformNodesIn.last(bid).head.length.W)).suggestName(s"ws_w_b${bid}"))
|
||||
val Seq(validRSources, validWSources) = outer.uniformNodesIn.zipWithIndex.map { case (banks, rw) =>
|
||||
VecInit(banks(bid).map(_.map(_.in.head._1.a.valid)).transpose.map { wordsInIdx =>
|
||||
VecInit(wordsInIdx.toSeq).asUInt.orR
|
||||
}.toSeq).asUInt.suggestName(s"valid_sources_rw${rw}_b${bid}")
|
||||
}
|
||||
|
||||
assert(bank_mgrs.flatten.size == 2/* read and write */ * outer.smem_subbanks)
|
||||
bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
|
||||
assert(bankMgrs.flatten.size == 2/* read and write */ * outer.smemSubbanks)
|
||||
bankMgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
|
||||
assert(!r.portParams.map(_.anySupportPutFull).reduce(_ || _))
|
||||
assert(!w.portParams.map(_.anySupportGet).reduce(_ || _))
|
||||
|
||||
val mem_depth = outer.smem_depth
|
||||
val mem_width = outer.smem_width
|
||||
val word_width = outer.wordSize
|
||||
val memDepth = outer.smemDepth
|
||||
val memWidth = outer.smemWidth
|
||||
val wordWidth = outer.wordSize
|
||||
|
||||
val mem = TwoPortSyncMem(
|
||||
n = mem_depth,
|
||||
t = UInt((word_width * 8).W),
|
||||
mask_len = word_width // byte level mask
|
||||
n = memDepth,
|
||||
t = UInt((wordWidth * 8).W),
|
||||
mask_len = wordWidth // byte level mask
|
||||
)
|
||||
mem.suggestName(s"rad_smem_c${outer.thisClusterParams.clusterId}_b${bid}_w${wid}")
|
||||
|
||||
val (r_node, r_edge) = r.in.head
|
||||
val (w_node, w_edge) = w.in.head
|
||||
val (rNode, rEdge) = r.in.head
|
||||
val (wNode, wEdge) = w.in.head
|
||||
|
||||
// address format is
|
||||
// [ smem_base | bank_id | line_id | word_id | byte_offset ]
|
||||
// line_id is used to index into the SRAMs
|
||||
mem.io.raddr := (r_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U
|
||||
mem.io.waddr := RegNext((w_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U)
|
||||
mem.io.raddr := (rNode.a.bits.address & (memDepth * memWidth - 1).U) >> log2Ceil(memWidth).U
|
||||
mem.io.waddr := RegNext((wNode.a.bits.address & (memDepth * memWidth - 1).U) >> log2Ceil(memWidth).U)
|
||||
|
||||
assert((bid.U === ((r_node.a.bits.address & (mem_depth * mem_width * outer.smem_banks - 1).U) >>
|
||||
log2Ceil(mem_depth * mem_width).U).asUInt) || !r_node.a.valid, "bank id mismatch with request")
|
||||
assert((wid.U === ((r_node.a.bits.address & (mem_width - 1).U) >>
|
||||
log2Ceil(word_width).U).asUInt) || !r_node.a.valid, "word id mismatch with request")
|
||||
assert((bid.U === ((rNode.a.bits.address & (memDepth * memWidth * outer.smemBanks - 1).U) >>
|
||||
log2Ceil(memDepth * memWidth).U).asUInt) || !rNode.a.valid, "bank id mismatch with request")
|
||||
assert((wid.U === ((rNode.a.bits.address & (memWidth - 1).U) >>
|
||||
log2Ceil(wordWidth).U).asUInt) || !rNode.a.valid, "word id mismatch with request")
|
||||
|
||||
make_buffer(mem, r_node, r_edge, w_node, w_edge)
|
||||
makeBuffer(mem, rNode, rEdge, wNode, wEdge)
|
||||
|
||||
// add access counters to banks
|
||||
smemReadsPerBankPerCycle(bid)(wid) := (r_node.a.fire === true.B)
|
||||
smemWritesPerBankPerCycle(bid)(wid) := (w_node.a.fire === true.B)
|
||||
smemReadsPerBankPerCycle(bid)(wid) := (rNode.a.fire === true.B)
|
||||
smemWritesPerBankPerCycle(bid)(wid) := (wNode.a.fire === true.B)
|
||||
|
||||
// (uniform_fires zip Seq(uniform_r_nodes, uniform_w_nodes)).foreach { case (uf, n) =>
|
||||
// uf(bid)(wid) := VecInit(n(bid)(wid).map(_.out.head._1.a.fire)).asUInt.orR
|
||||
// }
|
||||
(uniform_fires zip outer.uniform_nodes_out).foreach { case (uf, n) =>
|
||||
(uniformFires zip outer.uniformNodesOut).foreach { case (uf, n) =>
|
||||
uf(bid)(wid) := n(bid)(wid).in.head._1.a.fire
|
||||
}
|
||||
}
|
||||
// use round robin to decide uniform select
|
||||
(word_selects_1h zip Seq(valid_r_sources, valid_w_sources)).zipWithIndex.foreach { case ((ws, vs), rw) =>
|
||||
ws := TLArbiter.roundRobin(vs.getWidth, vs, uniform_fires(rw)(bid).asUInt.orR)
|
||||
(wordSelects1h zip Seq(validRSources, validWSources)).zipWithIndex.foreach { case ((ws, vs), rw) =>
|
||||
ws := TLArbiter.roundRobin(vs.getWidth, vs, uniformFires(rw)(bid).asUInt.orR)
|
||||
}
|
||||
// mask valid into xbar to prevent triggering assertion
|
||||
// (word_selects_1h zip outer.uniform_nodes_in).foreach { case (ws, ui) =>
|
||||
// (wordSelects1h zip outer.uniformNodesIn).foreach { case (ws, ui) =>
|
||||
// ui(bid).foreach { sources =>
|
||||
// val in_valid = sources.map(_.in.head._1.a.valid)
|
||||
// val out_valid = sources.map(_.out.head._1.a.valid)
|
||||
// val ws_actual = Mux((ws & VecInit(in_valid).asUInt).orR,
|
||||
// val inValid = sources.map(_.in.head._1.a.valid)
|
||||
// val outValid = sources.map(_.out.head._1.a.valid)
|
||||
// val wsActual = Mux((ws & VecInit(inValid).asUInt).orR,
|
||||
// ws, TLArbiter.roundRobin(
|
||||
// in_valid.length, VecInit(in_valid).asUInt, VecInit(sources.map(_.in.head._1.a.fire)).asUInt.orR))
|
||||
// (in_valid lazyZip out_valid lazyZip ws_actual.asBools).foreach { case (iv, ov, sel) =>
|
||||
// inValid.length, VecInit(inValid).asUInt, VecInit(sources.map(_.in.head._1.a.fire)).asUInt.orR))
|
||||
// (inValid lazyZip outValid lazyZip wsActual.asBools).foreach { case (iv, ov, sel) =>
|
||||
// ov := iv && sel // only present output valid if input is selected
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
(word_selects_1h lazyZip outer.uniform_policy_nodes lazyZip outer.uniform_nodes_in).foreach { case (ws, pn, ui) =>
|
||||
(wordSelects1h lazyZip outer.uniformPolicyNodes lazyZip outer.uniformNodesIn).foreach { case (ws, pn, ui) =>
|
||||
(pn(bid) zip ui(bid)).foreach { case (policies, sources) =>
|
||||
val in_valid = sources.map(_.in.head._1.a.valid)
|
||||
val out_valid = sources.map(_.out.head._1.a.valid)
|
||||
val hint_hit = (ws & VecInit(in_valid).asUInt).orR
|
||||
val ws_actual = Mux(hint_hit, ws, TLArbiter.lowestIndexFirst(
|
||||
in_valid.length, VecInit(in_valid).asUInt, hint_hit && policies.out.head._1.actual(0)))
|
||||
(in_valid lazyZip out_valid lazyZip ws_actual.asBools).foreach { case (iv, ov, sel) =>
|
||||
val inValid = sources.map(_.in.head._1.a.valid)
|
||||
val outValid = sources.map(_.out.head._1.a.valid)
|
||||
val hintHit = (ws & VecInit(inValid).asUInt).orR
|
||||
val wsActual = Mux(hintHit, ws, TLArbiter.lowestIndexFirst(
|
||||
inValid.length, VecInit(inValid).asUInt, hintHit && policies.out.head._1.actual(0)))
|
||||
(inValid lazyZip outValid lazyZip wsActual.asBools).foreach { case (iv, ov, sel) =>
|
||||
ov := iv && sel // only present output valid if input is selected
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(outer.uniform_policy_nodes zip word_selects_1h).zipWithIndex.foreach { case ((nodes_bw, ws), rw) =>
|
||||
nodes_bw(bid).foreach { policy =>
|
||||
(outer.uniformPolicyNodes zip wordSelects1h).zipWithIndex.foreach { case ((nodesBw, ws), rw) =>
|
||||
nodesBw(bid).foreach { policy =>
|
||||
policy.out.head._1.hint := ws
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
outer.smem_bank_mgrs.foreach { case Seq(r, w) =>
|
||||
val mem_depth = outer.smem_depth
|
||||
val mem_width = outer.smem_width
|
||||
outer.smemBankMgrs.foreach { case Seq(r, w) =>
|
||||
val memDepth = outer.smemDepth
|
||||
val memWidth = outer.smemWidth
|
||||
|
||||
val mem = TwoPortSyncMem(
|
||||
n = mem_depth,
|
||||
t = UInt((mem_width * 8).W),
|
||||
mask_len = mem_width // byte level mask
|
||||
n = memDepth,
|
||||
t = UInt((memWidth * 8).W),
|
||||
mask_len = memWidth // byte level mask
|
||||
)
|
||||
|
||||
val (r_node, r_edge) = r.in.head
|
||||
val (w_node, w_edge) = w.in.head
|
||||
val (rNode, rEdge) = r.in.head
|
||||
val (wNode, wEdge) = w.in.head
|
||||
|
||||
mem.io.raddr := (r_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
|
||||
mem.io.waddr := RegNext((w_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U)
|
||||
mem.io.raddr := (rNode.a.bits.address ^ outer.smemBase.U) >> log2Ceil(memWidth).U
|
||||
mem.io.waddr := RegNext((wNode.a.bits.address ^ outer.smemBase.U) >> log2Ceil(memWidth).U)
|
||||
|
||||
make_buffer(mem, r_node, r_edge, w_node, w_edge)
|
||||
makeBuffer(mem, rNode, rEdge, wNode, wEdge)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user