diff --git a/src/main/scala/radiance/memory/DistributorNode.scala b/src/main/scala/radiance/memory/DistributorNode.scala new file mode 100644 index 0000000..1576b16 --- /dev/null +++ b/src/main/scala/radiance/memory/DistributorNode.scala @@ -0,0 +1,144 @@ +package radiance.memory + +import chisel3._ +import chisel3.experimental.SourceInfo +import chisel3.util._ +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.util.BundleField +import org.chipsalliance.cde.config.Parameters + + +class DistributorNode(from: Int, to: Int)(implicit p: Parameters) extends LazyModule { + require(isPow2(from) && isPow2(to) && (from >= to), "invalid distributor node parameters") + println(s"distributor node to segment from $from into $to") + val num_clients = from / to + + val node = TLNexusNode(clientFn = seq => { + require(seq.map(_.masters.size).sum == 1, s"there should only be one client to a distributor node, found ${seq.map(_.masters.size).sum}") + val master = seq.head.masters.head + require(isPow2(master.sourceId.size)) + seq.head.v1copy( + clients = Seq.tabulate(num_clients)(i => master.v2copy( + name = s"${name}_dist_client_$i", + emits = TLMasterToSlaveTransferSizes( + get = TransferSizes(to, to), + putFull = TransferSizes(to, to), + putPartial = TransferSizes(to, to) + ), + sourceId = master.sourceId.shift(master.sourceId.size * i) + )) + ) + }, managerFn = seq => { + seq.head.v1copy( + responseFields = BundleField.union(seq.flatMap(_.responseFields)), + requestKeys = seq.flatMap(_.requestKeys).distinct, + minLatency = seq.map(_.minLatency).min, + endSinkId = TLXbar.mapOutputIds(seq).map(_.end).max, + managers = Seq(TLSlaveParameters.v2( + name = Some(s"${name}_manager"), + address = AddressSet.unify(seq.flatMap(_.slaves.flatMap(_.address))), + supports = TLMasterToSlaveTransferSizes( + get = TransferSizes(from, from), + putFull = TransferSizes(from, from), + putPartial = TransferSizes(from, from) + ), + fifoId = Some(0), + )), + beatBytes = from + ) + }) + + lazy val module = new LazyModuleImp(this) { + val cn = node.in.head._1 + val mn = node.out.map(_._1) + println(f"$name node in size ${node.in.size}, out size ${node.out.size}") + assert(node.out.size == num_clients, s"got ${node.out.size} clients instead of $num_clients") + + // A channel + val ca = cn.a.bits + mn.map(_.a.bits).zipWithIndex.foreach { case (m, i) => + println(s"$i master source id width ${m.source.getWidth}, client source id width ${ca.source.getWidth}") + m.opcode := ca.opcode + m.param := ca.param + m.user := ca.user + m.source := Cat(i.U(log2Ceil(num_clients).W), ca.source) + m.address := ca.address + (to * i).U + m.mask := ca.mask((i + 1) * to - 1, i * to) + m.data := ca.data((i + 1) * to * 8 - 1, i * to * 8) + m.size := log2Ceil(to).U + } + mn.map(_.a.valid).foreach(_ := cn.a.valid) + cn.a.ready := mn.map(_.a.ready).reduce(_ && _) + + // D channel + val cd = cn.d.bits + cd.size := log2Ceil(from).U + val partialWait = RegInit(false.B) + val arrived = RegInit(0.U(num_clients.W)) + val cdReg = RegInit(0.U.asTypeOf(cd.cloneType)) + + def setMetadata(to: TLBundleD, from: TLBundleD): Unit = { + to.opcode := from.opcode + to.user := from.user + to.param := from.param + to.sink := from.sink + to.denied := from.denied + to.corrupt := from.corrupt + to.source := from.source(to.source.getWidth - 1, 0) + } + + def partialData: UInt = VecInit(mn.map(_.d).map(d => Mux(d.fire, d.bits.data, 0.U(d.bits.data.getWidth.W)))).asUInt + def partialValid: UInt = VecInit(mn.map(_.d.fire)).asUInt + + mn.map(_.d.ready).zip(arrived.asBools).foreach { case (r, a) => + r := cn.d.ready && (!partialWait || !a) // if waiting for partial response, ready only if not arrived yet + } + + // TODO: might need coverage test for this + when (!partialWait) { + cn.d.valid := false.B + partialWait := false.B + when (partialValid.asBools.reduce(_ && _)) { + // all valids, immediately return both metadata and data + cn.d.valid := true.B + cd.data := Cat(mn.map(_.d.bits.data).reverse) + setMetadata(cd, mn.head.d.bits) + assert(cd.data === partialData, "sanity check") + }.elsewhen (partialValid.asBools.reduce(_ || _)) { + // at least 1 valid: enter partial valid state, store partial data into regs + partialWait := true.B + arrived := partialValid + cdReg.data := partialData + when (mn.head.d.valid) { setMetadata(cdReg, mn.head.d.bits) } + } + }.otherwise { + cn.d.valid := false.B + partialWait := true.B + when ((arrived | partialValid).asBools.reduce(_ && _)) { + // all valids received now + when (mn.head.d.valid) { + setMetadata(cd, mn.head.d.bits) + }.otherwise { + cd := cdReg + } + cn.d.valid := true.B + cd.data := cdReg.data | partialData + partialWait := false.B + cdReg := 0.U.asTypeOf(cdReg.cloneType) + arrived := 0.U + }.elsewhen (partialValid.asBools.reduce(_ || _)) { + // update partial data + arrived := arrived | partialValid + cdReg.data := cdReg.data | partialData + when (mn.head.d.valid) { setMetadata(cdReg, mn.head.d.bits) } + } + } + } +} + +object DistributorNode { + def apply(from: Int, to: Int)(implicit p: Parameters, valName: ValName, sourceInfo: SourceInfo): TLNexusNode = { + LazyModule(new DistributorNode(from, to)).node + } +} diff --git a/src/main/scala/radiance/memory/RWSplitterNode.scala b/src/main/scala/radiance/memory/RWSplitterNode.scala index b47c641..618eb2d 100644 --- a/src/main/scala/radiance/memory/RWSplitterNode.scala +++ b/src/main/scala/radiance/memory/RWSplitterNode.scala @@ -27,7 +27,7 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten require(isPow2(vis_mask + 1) || vis_mask == -1) println(f"combined visibilities of splitter memory node clients: ${vis_min}, ${vis_mask}") - seq(0).v1copy( + seq.head.v1copy( echoFields = BundleField.union(seq.flatMap(_.echoFields)), requestFields = BundleField.union(seq.flatMap(_.requestFields)), responseKeys = seq.flatMap(_.responseKeys).distinct, @@ -56,9 +56,8 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten ) }, managerFn = { seq => - println(seq.flatMap(_.slaves.map(_.supports))) // val fifoIdFactory = TLXbar.relabeler() - seq(0).v1copy( + seq.head.v1copy( responseFields = BundleField.union(seq.flatMap(_.responseFields)), requestKeys = seq.flatMap(_.requestKeys).distinct, minLatency = seq.map(_.minLatency).min, @@ -81,7 +80,7 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten val u_out = node.out val u_in = node.in assert(u_out.length == 2) - println(f"gemmini unified memory node has ${u_in.length} incoming client(s)") + println(f"${name} has ${u_in.length} incoming client(s)") val r_out = u_out.head val w_out = u_out.last diff --git a/src/main/scala/radiance/tile/RadianceCluster.scala b/src/main/scala/radiance/tile/RadianceCluster.scala index 502ae87..4ce2412 100644 --- a/src/main/scala/radiance/tile/RadianceCluster.scala +++ b/src/main/scala/radiance/tile/RadianceCluster.scala @@ -7,13 +7,11 @@ import chisel3._ import chisel3.util._ import freechips.rocketchip.diplomacy._ import freechips.rocketchip.prci.ClockSinkParameters -import freechips.rocketchip.regmapper.RegField import freechips.rocketchip.subsystem._ import freechips.rocketchip.tilelink._ -import freechips.rocketchip.util.BundleField import gemmini._ import org.chipsalliance.cde.config.Parameters -import radiance.memory.RWSplitterNode +import radiance.memory._ case class RadianceClusterParams( val clusterId: Int, @@ -69,50 +67,63 @@ class RadianceCluster ( // TODO: stride by word val unified_mem_read_node = TLIdentityNode() val unified_mem_write_node = TLIdentityNode() + val spad_data_len = gemminiConfig.sp_width / 8 val acc_data_len = gemminiConfig.sp_width / gemminiConfig.inputType.getWidth * gemminiConfig.accType.getWidth / 8 - val max_data_len = spad_data_len // max acc_data_len + val smem_base = gemminiConfig.tl_ext_mem_base - val smem_depth = gemminiConfig.sp_bank_entries * spad_data_len / max_data_len - val smem_width = max_data_len + val smem_width = spad_data_len + val smem_depth = gemminiConfig.sp_bank_entries * spad_data_len / smem_width val smem_banks = gemminiConfig.sp_banks - val smem_subbanks = 1 + val smem_subbanks = smem_width / wordSize + val smem_size = smem_width * smem_depth * smem_banks - val splitter_node = RWSplitterNode() + val stride_by_word = true - unified_mem_read_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_read_nodes - unified_mem_write_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_write_nodes - unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node - // unified_mem_read_node :=* TLWidthWidget(acc_data_len) :=* acc_read_nodes - // unified_mem_write_node :=* TLWidthWidget(acc_data_len) :=* acc_write_nodes + val radiance_smem_fanout = radianceTiles.flatMap { + _.smemNodes.map { m => + val smem_fanout_xbar = TLXbar() + smem_fanout_xbar :=* m + smem_fanout_xbar + } + } - // assert(splitter_node.in.map(_._2.slave.slaves.flatMap(_.supports.get))) - - /* address = Seq(AddressSet(gemmini.spad_base, smem_depth * smem_width * smem_banks - 1)), - supports = TLMasterToSlaveTransferSizes( - get = TransferSizes(1, smem_width), - putFull = TransferSizes(1, smem_width), - putPartial = TransferSizes(1, smem_width)),*/ - - unified_mem_read_node := TLWidthWidget(spad_data_len) := splitter_node - unified_mem_write_node := TLWidthWidget(spad_data_len) := splitter_node - - val stride_by_word = false + require(isPow2(smem_banks)) // collection of read and write managers for each sram (sub)bank val smem_bank_mgrs : Seq[Seq[TLManagerNode]] = if (stride_by_word) { - assert(false, "TODO under construction") - // assert((config.sp_capacity match { case CapacityInKilobytes(kb) => kb * 1024}) == - // gemmini.config.sp_bank_entries * spad_data_len / max_data_len * gemmini.config.sp_banks * max_data_len) - (0 until gemminiConfig.sp_banks).map { bank => - LazyModule(new TLRAM( - address = AddressSet(max_data_len * bank, - ((gemminiConfig.sp_bank_entries * spad_data_len / max_data_len - 1) * gemminiConfig.sp_banks + bank) - * max_data_len + (max_data_len - 1)), - beatBytes = max_data_len - )) - }.map(x => Seq(x.node)) + require(isPow2(smem_subbanks)) + (0 until smem_banks).flatMap { bid => + (0 until smem_subbanks).map { wid => + Seq(TLManagerNode(Seq(TLSlavePortParameters.v1( + managers = Seq(TLSlaveParameters.v2( + name = Some(f"sp_bank${bid}_word${wid}_read_mgr"), + address = Seq(AddressSet( + smem_base + (smem_depth * smem_width * bid) + wordSize * wid, + smem_depth * smem_width - smem_width + wordSize - 1 + )), + supports = TLMasterToSlaveTransferSizes( + get = TransferSizes(wordSize, wordSize)), + fifoId = Some(0) + )), + beatBytes = wordSize + )) + ), TLManagerNode(Seq(TLSlavePortParameters.v1( + managers = Seq(TLSlaveParameters.v2( + name = Some(f"sp_bank${bid}_word${wid}_write_mgr"), + address = Seq(AddressSet( + smem_base + (smem_depth * smem_width * bid) + wordSize * wid, + smem_depth * smem_width - smem_width + wordSize - 1 + )), + supports = TLMasterToSlaveTransferSizes( + putFull = TransferSizes(wordSize, wordSize), + putPartial = TransferSizes(wordSize, wordSize)), + fifoId = Some(0) + )), + beatBytes = wordSize + )))) + } + } } else { - require(isPow2(smem_banks)) (0 until smem_banks).map { bank => Seq(TLManagerNode(Seq(TLSlavePortParameters.v1( managers = Seq(TLSlaveParameters.v2( @@ -124,39 +135,131 @@ class RadianceCluster ( fifoId = Some(0) )), beatBytes = smem_width - ))), - TLManagerNode(Seq(TLSlavePortParameters.v1( - managers = Seq(TLSlaveParameters.v2( - name = Some(f"sp_bank${bank}_write_mgr"), - address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank), - smem_depth * smem_width - 1)), - supports = TLMasterToSlaveTransferSizes( - putFull = TransferSizes(1, smem_width), - putPartial = TransferSizes(1, smem_width)), - fifoId = Some(0) - )), - beatBytes = smem_width - )))) + )) + ), TLManagerNode(Seq(TLSlavePortParameters.v1( + managers = Seq(TLSlaveParameters.v2( + name = Some(f"sp_bank${bank}_write_mgr"), + address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank), + smem_depth * smem_width - 1)), + supports = TLMasterToSlaveTransferSizes( + putFull = TransferSizes(1, smem_width), + putPartial = TransferSizes(1, smem_width)), + fifoId = Some(0) + )), + beatBytes = smem_width + )))) } } - val smem_r_xbar = TLXbar() - val smem_w_xbar = TLXbar() - smem_r_xbar :=* unified_mem_read_node - smem_w_xbar :=* unified_mem_write_node + if (stride_by_word) { + val spad_read_nodes = Seq.fill(smem_banks) { + val r_dist = DistributorNode(from = smem_width, to = wordSize) + r_dist := gemmini.spad_read_nodes + Seq.fill(smem_subbanks) { + val id_node = TLIdentityNode() + id_node := r_dist + id_node + } + } + val spad_write_nodes = Seq.fill(smem_banks) { + val w_dist = DistributorNode(from = smem_width, to = wordSize) + w_dist := gemmini.spad_write_nodes + Seq.fill(smem_subbanks) { + val id_node = TLIdentityNode() + id_node := w_dist + id_node + } + } + val ws_dist = DistributorNode(from = smem_width, to = wordSize) + ws_dist := gemmini.spad.spad_writer.node // this is the dma write node + val spad_sp_write_nodes = Seq.fill(smem_subbanks) { + val ws_xbar = TLXbar() // fanout to 4 banks + ws_xbar := ws_dist + ws_xbar + } - smem_bank_mgrs.foreach { mem => - require(mem.length == 2) - mem.head := smem_r_xbar - mem.last := TLFragmenter(spad_data_len, max_write_width_bytes) := smem_w_xbar + // spad_read_nodes.flatten.foreach(node => unified_mem_read_node :=* node) + // spad_write_nodes.flatten.foreach(node => unified_mem_write_node :=* node) + // spad_sp_write_nodes.foreach(node => unified_mem_write_node :=* node) + // unified_mem_write_node :=* DistributorNode(from = smem_width, to = wordSize) :=* gemmini.spad.spad_writer.node // this is the dma write node + // unified_mem_read_node :=* TLWidthWidget(acc_data_len) :=* acc_read_nodes + // unified_mem_write_node :=* TLWidthWidget(acc_data_len) :=* acc_write_nodes + + // these nodes access an entire line simultaneously + val uniform_r_nodes: Seq[Seq[Seq[TLNode]]] = spad_read_nodes.map { rb => + rb.map { rw => Seq(rw) } + } + val uniform_w_nodes: Seq[Seq[Seq[TLNode]]] = spad_write_nodes.map { wb => + (wb zip spad_sp_write_nodes).map { case (ww, sw) => Seq(ww, sw) } + } + + val splitter_nodes = radiance_smem_fanout.map { m => + val splitter_node = RWSplitterNode() + splitter_node := m + splitter_node + } + + radiance_smem_fanout.foreach(clbus.inwardNode := _) + + // these nodes are random access + val nonuniform_r_nodes: Seq[TLNode] = splitter_nodes.map { s => + val nu_r_xbar = TLXbar() + nu_r_xbar := s + nu_r_xbar + }.toSeq + val nonuniform_w_nodes: Seq[TLNode] = splitter_nodes.map { s => + val nu_w_xbar = TLXbar() + nu_w_xbar := s + nu_w_xbar + }.toSeq + + smem_bank_mgrs.grouped(smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) => + bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) => + // TODO: this should be a coordinated round robin + val subbank_r_xbar = TLXbar(TLArbiter.lowestIndexFirst) + val subbank_w_xbar = TLXbar(TLArbiter.lowestIndexFirst) + r := subbank_r_xbar + w := subbank_w_xbar + uniform_r_nodes(bid)(wid).foreach( subbank_r_xbar := _ ) + uniform_w_nodes(bid)(wid).foreach( subbank_w_xbar := _ ) + + nonuniform_r_nodes.foreach( subbank_r_xbar := _ ) + nonuniform_w_nodes.foreach( subbank_w_xbar := _ ) + } + } + } else { + unified_mem_read_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_read_nodes + unified_mem_write_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_write_nodes + unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node + + val splitter_node = RWSplitterNode() + unified_mem_read_node := TLWidthWidget(spad_data_len) := splitter_node + unified_mem_write_node := TLWidthWidget(spad_data_len) := splitter_node + + radiance_smem_fanout.foreach(clbus.inwardNode := _) + splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode + + val smem_r_xbar = TLXbar() + val smem_w_xbar = TLXbar() + DisableMonitors { implicit p => + smem_r_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_read_node + smem_w_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_write_node + } + + smem_bank_mgrs.foreach { mem => + require(mem.length == 2) + mem.head := smem_r_xbar + mem.last := smem_w_xbar + } } // connect tile smem nodes to xbar, and xbar to banks // val smem_xbar = TLXbar() - splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode gemminiTile.slaveNode :=* TLWidthWidget(4) :=* clbus.outwardNode - // printf and perf counter buffer FIXME: make configurable - TLRAM(AddressSet(x"ff004000", numCores * 0x200 - 1)) := TLFragmenter(4, 4) := clbus.outwardNode + + assert(smem_size == 0x4000, "fix me") + // printf and perf counter buffer + TLRAM(AddressSet(x"ff000000" + smem_size, numCores * 0x200 - 1)) := TLFragmenter(4, 4) := clbus.outwardNode // Diplomacy sink nodes for cluster-wide barrier sync signal val barrierSlaveNode = BarrierSlaveNode(numCores) @@ -174,7 +277,6 @@ class RadianceCluster ( // (perSmemPortXbars zip tile.smemNodes).foreach { // case (xbar, node) => xbar.node := node // } - tile.smemNodes.foreach(clbus.inwardNode := _) barrierSlaveNode := tile.barrierMasterNode } // perSmemPortXbars.foreach { clbus.inwardNode := _.node } @@ -212,23 +314,10 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp( } // TODO: remove Pipeline dependency of gemmini - def makeSmemBanks: Unit = { - outer.smem_bank_mgrs.foreach { case Seq(r, w) => - val mem_depth = outer.smem_depth - val mem_width = outer.smem_width - - val mem = TwoPortSyncMem( - n = mem_depth, - t = UInt((mem_width * 8).W), - mask_len = mem_width // byte level mask - ) - - val (r_node, r_edge) = r.in.head - val (w_node, w_edge) = w.in.head - - // READ + def makeSmemBanks(): Unit = { + def make_buffer[T <: Data](mem: TwoPortSyncMem[T], r_node: TLBundle, r_edge: TLEdgeIn, + w_node: TLBundle, w_edge: TLEdgeIn): Unit = { mem.io.ren := r_node.a.fire - mem.io.raddr := (r_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U val data_pipe_in = Wire(DecoupledIO(mem.io.rdata.cloneType)) data_pipe_in.valid := RegNext(mem.io.ren) @@ -274,7 +363,7 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp( r_node.d.bits := r_edge.AccessAck( Mux(r_node.d.valid, metadata_pipe.bits.source, 0.U), Mux(r_node.d.valid, metadata_pipe.bits.size, 0.U), - Mux(!data_pipe.valid, sram_read_backup_reg.bits, data_pipe.bits)) + Mux(!data_pipe.valid, sram_read_backup_reg.bits, data_pipe.bits).asUInt) r_node.d.valid := data_pipe.valid || sram_read_backup_reg.valid // r node A is not ready only if D is not ready and both slots filled r_node.a.ready := r_node.d.ready && !(data_pipe.valid && sram_read_backup_reg.valid) @@ -283,16 +372,71 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp( // WRITE mem.io.wen := w_node.a.fire - mem.io.waddr := (w_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U mem.io.wdata := w_node.a.bits.data mem.io.mask := w_node.a.bits.mask.asBools w_node.a.ready := w_node.d.ready// && (mem.io.waddr =/= mem.io.raddr) w_node.d.valid := w_node.a.valid w_node.d.bits := w_edge.AccessAck(w_node.a.bits) } + + if (outer.stride_by_word) { + outer.smem_bank_mgrs.grouped(outer.smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) => + assert(bank_mgrs.flatten.size == 2 * outer.smem_subbanks) + bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) => + assert(!r.portParams.map(_.anySupportPutFull).reduce(_ || _)) + assert(!w.portParams.map(_.anySupportGet).reduce(_ || _)) + + val mem_depth = outer.smem_depth + val mem_width = outer.smem_width + val word_width = outer.wordSize + + val mem = TwoPortSyncMem( + n = mem_depth, + t = UInt((word_width * 8).W), + mask_len = word_width // byte level mask + ) + mem.suggestName(s"rad_smem_c${outer.thisClusterParams.clusterId}_b${bid}_w${wid}") + + val (r_node, r_edge) = r.in.head + val (w_node, w_edge) = w.in.head + + // address format is + // [ smem_base | bank_id | line_id | word_id | byte_offset ] + // line_id is used to index into the SRAMs + mem.io.raddr := (r_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U + mem.io.waddr := (w_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U + + assert((bid.U === ((r_node.a.bits.address & (mem_depth * mem_width * outer.smem_banks - 1).U) >> + log2Ceil(mem_depth * mem_width).U).asUInt) || !r_node.a.valid, "bank id mismatch with request") + assert((wid.U === ((r_node.a.bits.address & (mem_width - 1).U) >> + log2Ceil(word_width).U).asUInt) || !r_node.a.valid, "word id mismatch with request") + + make_buffer(mem, r_node, r_edge, w_node, w_edge) + } + } + } else { + outer.smem_bank_mgrs.foreach { case Seq(r, w) => + val mem_depth = outer.smem_depth + val mem_width = outer.smem_width + + val mem = TwoPortSyncMem( + n = mem_depth, + t = UInt((mem_width * 8).W), + mask_len = mem_width // byte level mask + ) + + val (r_node, r_edge) = r.in.head + val (w_node, w_edge) = w.in.head + + mem.io.raddr := (r_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U + mem.io.waddr := (w_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U + + make_buffer(mem, r_node, r_edge, w_node, w_edge) + } + } } - makeSmemBanks + makeSmemBanks() println(s"======== barrierSlaveNode: ${outer.barrierSlaveNode.in(0)._2.barrierIdBits}") }