word strided subbanks, parallel subbank access for gemmini and all-to-all xbar parallel access for radiance smem

This commit is contained in:
Richard Yan
2024-04-01 10:54:17 -07:00
parent 9fb861a873
commit f60a318edb
3 changed files with 372 additions and 85 deletions

View File

@@ -0,0 +1,144 @@
package radiance.memory
import chisel3._
import chisel3.experimental.SourceInfo
import chisel3.util._
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.util.BundleField
import org.chipsalliance.cde.config.Parameters
class DistributorNode(from: Int, to: Int)(implicit p: Parameters) extends LazyModule {
require(isPow2(from) && isPow2(to) && (from >= to), "invalid distributor node parameters")
println(s"distributor node to segment from $from into $to")
val num_clients = from / to
val node = TLNexusNode(clientFn = seq => {
require(seq.map(_.masters.size).sum == 1, s"there should only be one client to a distributor node, found ${seq.map(_.masters.size).sum}")
val master = seq.head.masters.head
require(isPow2(master.sourceId.size))
seq.head.v1copy(
clients = Seq.tabulate(num_clients)(i => master.v2copy(
name = s"${name}_dist_client_$i",
emits = TLMasterToSlaveTransferSizes(
get = TransferSizes(to, to),
putFull = TransferSizes(to, to),
putPartial = TransferSizes(to, to)
),
sourceId = master.sourceId.shift(master.sourceId.size * i)
))
)
}, managerFn = seq => {
seq.head.v1copy(
responseFields = BundleField.union(seq.flatMap(_.responseFields)),
requestKeys = seq.flatMap(_.requestKeys).distinct,
minLatency = seq.map(_.minLatency).min,
endSinkId = TLXbar.mapOutputIds(seq).map(_.end).max,
managers = Seq(TLSlaveParameters.v2(
name = Some(s"${name}_manager"),
address = AddressSet.unify(seq.flatMap(_.slaves.flatMap(_.address))),
supports = TLMasterToSlaveTransferSizes(
get = TransferSizes(from, from),
putFull = TransferSizes(from, from),
putPartial = TransferSizes(from, from)
),
fifoId = Some(0),
)),
beatBytes = from
)
})
lazy val module = new LazyModuleImp(this) {
val cn = node.in.head._1
val mn = node.out.map(_._1)
println(f"$name node in size ${node.in.size}, out size ${node.out.size}")
assert(node.out.size == num_clients, s"got ${node.out.size} clients instead of $num_clients")
// A channel
val ca = cn.a.bits
mn.map(_.a.bits).zipWithIndex.foreach { case (m, i) =>
println(s"$i master source id width ${m.source.getWidth}, client source id width ${ca.source.getWidth}")
m.opcode := ca.opcode
m.param := ca.param
m.user := ca.user
m.source := Cat(i.U(log2Ceil(num_clients).W), ca.source)
m.address := ca.address + (to * i).U
m.mask := ca.mask((i + 1) * to - 1, i * to)
m.data := ca.data((i + 1) * to * 8 - 1, i * to * 8)
m.size := log2Ceil(to).U
}
mn.map(_.a.valid).foreach(_ := cn.a.valid)
cn.a.ready := mn.map(_.a.ready).reduce(_ && _)
// D channel
val cd = cn.d.bits
cd.size := log2Ceil(from).U
val partialWait = RegInit(false.B)
val arrived = RegInit(0.U(num_clients.W))
val cdReg = RegInit(0.U.asTypeOf(cd.cloneType))
def setMetadata(to: TLBundleD, from: TLBundleD): Unit = {
to.opcode := from.opcode
to.user := from.user
to.param := from.param
to.sink := from.sink
to.denied := from.denied
to.corrupt := from.corrupt
to.source := from.source(to.source.getWidth - 1, 0)
}
def partialData: UInt = VecInit(mn.map(_.d).map(d => Mux(d.fire, d.bits.data, 0.U(d.bits.data.getWidth.W)))).asUInt
def partialValid: UInt = VecInit(mn.map(_.d.fire)).asUInt
mn.map(_.d.ready).zip(arrived.asBools).foreach { case (r, a) =>
r := cn.d.ready && (!partialWait || !a) // if waiting for partial response, ready only if not arrived yet
}
// TODO: might need coverage test for this
when (!partialWait) {
cn.d.valid := false.B
partialWait := false.B
when (partialValid.asBools.reduce(_ && _)) {
// all valids, immediately return both metadata and data
cn.d.valid := true.B
cd.data := Cat(mn.map(_.d.bits.data).reverse)
setMetadata(cd, mn.head.d.bits)
assert(cd.data === partialData, "sanity check")
}.elsewhen (partialValid.asBools.reduce(_ || _)) {
// at least 1 valid: enter partial valid state, store partial data into regs
partialWait := true.B
arrived := partialValid
cdReg.data := partialData
when (mn.head.d.valid) { setMetadata(cdReg, mn.head.d.bits) }
}
}.otherwise {
cn.d.valid := false.B
partialWait := true.B
when ((arrived | partialValid).asBools.reduce(_ && _)) {
// all valids received now
when (mn.head.d.valid) {
setMetadata(cd, mn.head.d.bits)
}.otherwise {
cd := cdReg
}
cn.d.valid := true.B
cd.data := cdReg.data | partialData
partialWait := false.B
cdReg := 0.U.asTypeOf(cdReg.cloneType)
arrived := 0.U
}.elsewhen (partialValid.asBools.reduce(_ || _)) {
// update partial data
arrived := arrived | partialValid
cdReg.data := cdReg.data | partialData
when (mn.head.d.valid) { setMetadata(cdReg, mn.head.d.bits) }
}
}
}
}
object DistributorNode {
def apply(from: Int, to: Int)(implicit p: Parameters, valName: ValName, sourceInfo: SourceInfo): TLNexusNode = {
LazyModule(new DistributorNode(from, to)).node
}
}

View File

@@ -27,7 +27,7 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten
require(isPow2(vis_mask + 1) || vis_mask == -1)
println(f"combined visibilities of splitter memory node clients: ${vis_min}, ${vis_mask}")
seq(0).v1copy(
seq.head.v1copy(
echoFields = BundleField.union(seq.flatMap(_.echoFields)),
requestFields = BundleField.union(seq.flatMap(_.requestFields)),
responseKeys = seq.flatMap(_.responseKeys).distinct,
@@ -56,9 +56,8 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten
)
},
managerFn = { seq =>
println(seq.flatMap(_.slaves.map(_.supports)))
// val fifoIdFactory = TLXbar.relabeler()
seq(0).v1copy(
seq.head.v1copy(
responseFields = BundleField.union(seq.flatMap(_.responseFields)),
requestKeys = seq.flatMap(_.requestKeys).distinct,
minLatency = seq.map(_.minLatency).min,
@@ -81,7 +80,7 @@ class RWSplitterNode(name: String = "rw_splitter")(implicit p: Parameters) exten
val u_out = node.out
val u_in = node.in
assert(u_out.length == 2)
println(f"gemmini unified memory node has ${u_in.length} incoming client(s)")
println(f"${name} has ${u_in.length} incoming client(s)")
val r_out = u_out.head
val w_out = u_out.last

View File

@@ -7,13 +7,11 @@ import chisel3._
import chisel3.util._
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.prci.ClockSinkParameters
import freechips.rocketchip.regmapper.RegField
import freechips.rocketchip.subsystem._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.util.BundleField
import gemmini._
import org.chipsalliance.cde.config.Parameters
import radiance.memory.RWSplitterNode
import radiance.memory._
case class RadianceClusterParams(
val clusterId: Int,
@@ -69,50 +67,63 @@ class RadianceCluster (
// TODO: stride by word
val unified_mem_read_node = TLIdentityNode()
val unified_mem_write_node = TLIdentityNode()
val spad_data_len = gemminiConfig.sp_width / 8
val acc_data_len = gemminiConfig.sp_width / gemminiConfig.inputType.getWidth * gemminiConfig.accType.getWidth / 8
val max_data_len = spad_data_len // max acc_data_len
val smem_base = gemminiConfig.tl_ext_mem_base
val smem_depth = gemminiConfig.sp_bank_entries * spad_data_len / max_data_len
val smem_width = max_data_len
val smem_width = spad_data_len
val smem_depth = gemminiConfig.sp_bank_entries * spad_data_len / smem_width
val smem_banks = gemminiConfig.sp_banks
val smem_subbanks = 1
val smem_subbanks = smem_width / wordSize
val smem_size = smem_width * smem_depth * smem_banks
val splitter_node = RWSplitterNode()
val stride_by_word = true
unified_mem_read_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_read_nodes
unified_mem_write_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_write_nodes
unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node
// unified_mem_read_node :=* TLWidthWidget(acc_data_len) :=* acc_read_nodes
// unified_mem_write_node :=* TLWidthWidget(acc_data_len) :=* acc_write_nodes
val radiance_smem_fanout = radianceTiles.flatMap {
_.smemNodes.map { m =>
val smem_fanout_xbar = TLXbar()
smem_fanout_xbar :=* m
smem_fanout_xbar
}
}
// assert(splitter_node.in.map(_._2.slave.slaves.flatMap(_.supports.get)))
/* address = Seq(AddressSet(gemmini.spad_base, smem_depth * smem_width * smem_banks - 1)),
supports = TLMasterToSlaveTransferSizes(
get = TransferSizes(1, smem_width),
putFull = TransferSizes(1, smem_width),
putPartial = TransferSizes(1, smem_width)),*/
unified_mem_read_node := TLWidthWidget(spad_data_len) := splitter_node
unified_mem_write_node := TLWidthWidget(spad_data_len) := splitter_node
val stride_by_word = false
require(isPow2(smem_banks))
// collection of read and write managers for each sram (sub)bank
val smem_bank_mgrs : Seq[Seq[TLManagerNode]] = if (stride_by_word) {
assert(false, "TODO under construction")
// assert((config.sp_capacity match { case CapacityInKilobytes(kb) => kb * 1024}) ==
// gemmini.config.sp_bank_entries * spad_data_len / max_data_len * gemmini.config.sp_banks * max_data_len)
(0 until gemminiConfig.sp_banks).map { bank =>
LazyModule(new TLRAM(
address = AddressSet(max_data_len * bank,
((gemminiConfig.sp_bank_entries * spad_data_len / max_data_len - 1) * gemminiConfig.sp_banks + bank)
* max_data_len + (max_data_len - 1)),
beatBytes = max_data_len
))
}.map(x => Seq(x.node))
require(isPow2(smem_subbanks))
(0 until smem_banks).flatMap { bid =>
(0 until smem_subbanks).map { wid =>
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
managers = Seq(TLSlaveParameters.v2(
name = Some(f"sp_bank${bid}_word${wid}_read_mgr"),
address = Seq(AddressSet(
smem_base + (smem_depth * smem_width * bid) + wordSize * wid,
smem_depth * smem_width - smem_width + wordSize - 1
)),
supports = TLMasterToSlaveTransferSizes(
get = TransferSizes(wordSize, wordSize)),
fifoId = Some(0)
)),
beatBytes = wordSize
))
), TLManagerNode(Seq(TLSlavePortParameters.v1(
managers = Seq(TLSlaveParameters.v2(
name = Some(f"sp_bank${bid}_word${wid}_write_mgr"),
address = Seq(AddressSet(
smem_base + (smem_depth * smem_width * bid) + wordSize * wid,
smem_depth * smem_width - smem_width + wordSize - 1
)),
supports = TLMasterToSlaveTransferSizes(
putFull = TransferSizes(wordSize, wordSize),
putPartial = TransferSizes(wordSize, wordSize)),
fifoId = Some(0)
)),
beatBytes = wordSize
))))
}
}
} else {
require(isPow2(smem_banks))
(0 until smem_banks).map { bank =>
Seq(TLManagerNode(Seq(TLSlavePortParameters.v1(
managers = Seq(TLSlaveParameters.v2(
@@ -124,39 +135,131 @@ class RadianceCluster (
fifoId = Some(0)
)),
beatBytes = smem_width
))),
TLManagerNode(Seq(TLSlavePortParameters.v1(
managers = Seq(TLSlaveParameters.v2(
name = Some(f"sp_bank${bank}_write_mgr"),
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank),
smem_depth * smem_width - 1)),
supports = TLMasterToSlaveTransferSizes(
putFull = TransferSizes(1, smem_width),
putPartial = TransferSizes(1, smem_width)),
fifoId = Some(0)
)),
beatBytes = smem_width
))))
))
), TLManagerNode(Seq(TLSlavePortParameters.v1(
managers = Seq(TLSlaveParameters.v2(
name = Some(f"sp_bank${bank}_write_mgr"),
address = Seq(AddressSet(smem_base + (smem_depth * smem_width * bank),
smem_depth * smem_width - 1)),
supports = TLMasterToSlaveTransferSizes(
putFull = TransferSizes(1, smem_width),
putPartial = TransferSizes(1, smem_width)),
fifoId = Some(0)
)),
beatBytes = smem_width
))))
}
}
val smem_r_xbar = TLXbar()
val smem_w_xbar = TLXbar()
smem_r_xbar :=* unified_mem_read_node
smem_w_xbar :=* unified_mem_write_node
if (stride_by_word) {
val spad_read_nodes = Seq.fill(smem_banks) {
val r_dist = DistributorNode(from = smem_width, to = wordSize)
r_dist := gemmini.spad_read_nodes
Seq.fill(smem_subbanks) {
val id_node = TLIdentityNode()
id_node := r_dist
id_node
}
}
val spad_write_nodes = Seq.fill(smem_banks) {
val w_dist = DistributorNode(from = smem_width, to = wordSize)
w_dist := gemmini.spad_write_nodes
Seq.fill(smem_subbanks) {
val id_node = TLIdentityNode()
id_node := w_dist
id_node
}
}
val ws_dist = DistributorNode(from = smem_width, to = wordSize)
ws_dist := gemmini.spad.spad_writer.node // this is the dma write node
val spad_sp_write_nodes = Seq.fill(smem_subbanks) {
val ws_xbar = TLXbar() // fanout to 4 banks
ws_xbar := ws_dist
ws_xbar
}
smem_bank_mgrs.foreach { mem =>
require(mem.length == 2)
mem.head := smem_r_xbar
mem.last := TLFragmenter(spad_data_len, max_write_width_bytes) := smem_w_xbar
// spad_read_nodes.flatten.foreach(node => unified_mem_read_node :=* node)
// spad_write_nodes.flatten.foreach(node => unified_mem_write_node :=* node)
// spad_sp_write_nodes.foreach(node => unified_mem_write_node :=* node)
// unified_mem_write_node :=* DistributorNode(from = smem_width, to = wordSize) :=* gemmini.spad.spad_writer.node // this is the dma write node
// unified_mem_read_node :=* TLWidthWidget(acc_data_len) :=* acc_read_nodes
// unified_mem_write_node :=* TLWidthWidget(acc_data_len) :=* acc_write_nodes
// these nodes access an entire line simultaneously
val uniform_r_nodes: Seq[Seq[Seq[TLNode]]] = spad_read_nodes.map { rb =>
rb.map { rw => Seq(rw) }
}
val uniform_w_nodes: Seq[Seq[Seq[TLNode]]] = spad_write_nodes.map { wb =>
(wb zip spad_sp_write_nodes).map { case (ww, sw) => Seq(ww, sw) }
}
val splitter_nodes = radiance_smem_fanout.map { m =>
val splitter_node = RWSplitterNode()
splitter_node := m
splitter_node
}
radiance_smem_fanout.foreach(clbus.inwardNode := _)
// these nodes are random access
val nonuniform_r_nodes: Seq[TLNode] = splitter_nodes.map { s =>
val nu_r_xbar = TLXbar()
nu_r_xbar := s
nu_r_xbar
}.toSeq
val nonuniform_w_nodes: Seq[TLNode] = splitter_nodes.map { s =>
val nu_w_xbar = TLXbar()
nu_w_xbar := s
nu_w_xbar
}.toSeq
smem_bank_mgrs.grouped(smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) =>
bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
// TODO: this should be a coordinated round robin
val subbank_r_xbar = TLXbar(TLArbiter.lowestIndexFirst)
val subbank_w_xbar = TLXbar(TLArbiter.lowestIndexFirst)
r := subbank_r_xbar
w := subbank_w_xbar
uniform_r_nodes(bid)(wid).foreach( subbank_r_xbar := _ )
uniform_w_nodes(bid)(wid).foreach( subbank_w_xbar := _ )
nonuniform_r_nodes.foreach( subbank_r_xbar := _ )
nonuniform_w_nodes.foreach( subbank_w_xbar := _ )
}
}
} else {
unified_mem_read_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_read_nodes
unified_mem_write_node :=* TLWidthWidget(spad_data_len) :=* gemmini.spad_write_nodes
unified_mem_write_node := gemmini.spad.spad_writer.node // this is the dma write node
val splitter_node = RWSplitterNode()
unified_mem_read_node := TLWidthWidget(spad_data_len) := splitter_node
unified_mem_write_node := TLWidthWidget(spad_data_len) := splitter_node
radiance_smem_fanout.foreach(clbus.inwardNode := _)
splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode
val smem_r_xbar = TLXbar()
val smem_w_xbar = TLXbar()
DisableMonitors { implicit p =>
smem_r_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_read_node
smem_w_xbar :=* TLWidthWidget(wordSize) :=* unified_mem_write_node
}
smem_bank_mgrs.foreach { mem =>
require(mem.length == 2)
mem.head := smem_r_xbar
mem.last := smem_w_xbar
}
}
// connect tile smem nodes to xbar, and xbar to banks
// val smem_xbar = TLXbar()
splitter_node :=* TLWidthWidget(4) :=* clbus.outwardNode
gemminiTile.slaveNode :=* TLWidthWidget(4) :=* clbus.outwardNode
// printf and perf counter buffer FIXME: make configurable
TLRAM(AddressSet(x"ff004000", numCores * 0x200 - 1)) := TLFragmenter(4, 4) := clbus.outwardNode
assert(smem_size == 0x4000, "fix me")
// printf and perf counter buffer
TLRAM(AddressSet(x"ff000000" + smem_size, numCores * 0x200 - 1)) := TLFragmenter(4, 4) := clbus.outwardNode
// Diplomacy sink nodes for cluster-wide barrier sync signal
val barrierSlaveNode = BarrierSlaveNode(numCores)
@@ -174,7 +277,6 @@ class RadianceCluster (
// (perSmemPortXbars zip tile.smemNodes).foreach {
// case (xbar, node) => xbar.node := node
// }
tile.smemNodes.foreach(clbus.inwardNode := _)
barrierSlaveNode := tile.barrierMasterNode
}
// perSmemPortXbars.foreach { clbus.inwardNode := _.node }
@@ -212,23 +314,10 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
}
// TODO: remove Pipeline dependency of gemmini
def makeSmemBanks: Unit = {
outer.smem_bank_mgrs.foreach { case Seq(r, w) =>
val mem_depth = outer.smem_depth
val mem_width = outer.smem_width
val mem = TwoPortSyncMem(
n = mem_depth,
t = UInt((mem_width * 8).W),
mask_len = mem_width // byte level mask
)
val (r_node, r_edge) = r.in.head
val (w_node, w_edge) = w.in.head
// READ
def makeSmemBanks(): Unit = {
def make_buffer[T <: Data](mem: TwoPortSyncMem[T], r_node: TLBundle, r_edge: TLEdgeIn,
w_node: TLBundle, w_edge: TLEdgeIn): Unit = {
mem.io.ren := r_node.a.fire
mem.io.raddr := (r_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
val data_pipe_in = Wire(DecoupledIO(mem.io.rdata.cloneType))
data_pipe_in.valid := RegNext(mem.io.ren)
@@ -274,7 +363,7 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
r_node.d.bits := r_edge.AccessAck(
Mux(r_node.d.valid, metadata_pipe.bits.source, 0.U),
Mux(r_node.d.valid, metadata_pipe.bits.size, 0.U),
Mux(!data_pipe.valid, sram_read_backup_reg.bits, data_pipe.bits))
Mux(!data_pipe.valid, sram_read_backup_reg.bits, data_pipe.bits).asUInt)
r_node.d.valid := data_pipe.valid || sram_read_backup_reg.valid
// r node A is not ready only if D is not ready and both slots filled
r_node.a.ready := r_node.d.ready && !(data_pipe.valid && sram_read_backup_reg.valid)
@@ -283,16 +372,71 @@ class RadianceClusterModuleImp(outer: RadianceCluster) extends ClusterModuleImp(
// WRITE
mem.io.wen := w_node.a.fire
mem.io.waddr := (w_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
mem.io.wdata := w_node.a.bits.data
mem.io.mask := w_node.a.bits.mask.asBools
w_node.a.ready := w_node.d.ready// && (mem.io.waddr =/= mem.io.raddr)
w_node.d.valid := w_node.a.valid
w_node.d.bits := w_edge.AccessAck(w_node.a.bits)
}
if (outer.stride_by_word) {
outer.smem_bank_mgrs.grouped(outer.smem_subbanks).zipWithIndex.foreach { case (bank_mgrs, bid) =>
assert(bank_mgrs.flatten.size == 2 * outer.smem_subbanks)
bank_mgrs.zipWithIndex.foreach { case (Seq(r, w), wid) =>
assert(!r.portParams.map(_.anySupportPutFull).reduce(_ || _))
assert(!w.portParams.map(_.anySupportGet).reduce(_ || _))
val mem_depth = outer.smem_depth
val mem_width = outer.smem_width
val word_width = outer.wordSize
val mem = TwoPortSyncMem(
n = mem_depth,
t = UInt((word_width * 8).W),
mask_len = word_width // byte level mask
)
mem.suggestName(s"rad_smem_c${outer.thisClusterParams.clusterId}_b${bid}_w${wid}")
val (r_node, r_edge) = r.in.head
val (w_node, w_edge) = w.in.head
// address format is
// [ smem_base | bank_id | line_id | word_id | byte_offset ]
// line_id is used to index into the SRAMs
mem.io.raddr := (r_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U
mem.io.waddr := (w_node.a.bits.address & (mem_depth * mem_width - 1).U) >> log2Ceil(mem_width).U
assert((bid.U === ((r_node.a.bits.address & (mem_depth * mem_width * outer.smem_banks - 1).U) >>
log2Ceil(mem_depth * mem_width).U).asUInt) || !r_node.a.valid, "bank id mismatch with request")
assert((wid.U === ((r_node.a.bits.address & (mem_width - 1).U) >>
log2Ceil(word_width).U).asUInt) || !r_node.a.valid, "word id mismatch with request")
make_buffer(mem, r_node, r_edge, w_node, w_edge)
}
}
} else {
outer.smem_bank_mgrs.foreach { case Seq(r, w) =>
val mem_depth = outer.smem_depth
val mem_width = outer.smem_width
val mem = TwoPortSyncMem(
n = mem_depth,
t = UInt((mem_width * 8).W),
mask_len = mem_width // byte level mask
)
val (r_node, r_edge) = r.in.head
val (w_node, w_edge) = w.in.head
mem.io.raddr := (r_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
mem.io.waddr := (w_node.a.bits.address ^ outer.smem_base.U) >> log2Ceil(mem_width).U
make_buffer(mem, r_node, r_edge, w_node, w_edge)
}
}
}
makeSmemBanks
makeSmemBanks()
println(s"======== barrierSlaveNode: ${outer.barrierSlaveNode.in(0)._2.barrierIdBits}")
}