diff --git a/src/main/scala/radiance/memory/CanHaveMemtraceCore.scala b/src/main/scala/radiance/memory/CanHaveMemtraceCore.scala index ae803d5..701a274 100644 --- a/src/main/scala/radiance/memory/CanHaveMemtraceCore.scala +++ b/src/main/scala/radiance/memory/CanHaveMemtraceCore.scala @@ -17,10 +17,10 @@ trait CanHaveMemtraceCore { this: BaseSubsystem => // Safe to use get as WithMemtraceCore requires WithNLanes to be defined val simtParam = p(SIMTCoreKey).get val config = DefaultCoalescerConfig.copy( - numLanes = simtParam.nLanes, + numLanes = simtParam.nMemLanes, numOldSrcIds = simtParam.nSrcIds ) - val numLanes = simtParam.nLanes + val numLanes = simtParam.nMemLanes val filename = param.tracefilename // Need to explicitly generate clock domain; see rocket-chip 8881ccd diff --git a/src/main/scala/radiance/memory/Coalescing.scala b/src/main/scala/radiance/memory/Coalescing.scala index cc5c40e..338c36c 100644 --- a/src/main/scala/radiance/memory/Coalescing.scala +++ b/src/main/scala/radiance/memory/Coalescing.scala @@ -12,8 +12,13 @@ import freechips.rocketchip.tilelink._ // TODO: find better place for these -// Note: numNewSrcId is not a part of CoreParam, because the SIMT core should be agnostic to how inflight coalesced request can be genertated -case class SIMTCoreParams(nLanes: Int = 4, nSrcIds: Int = 8) +case class SIMTCoreParams( + nWarps: Int = 4, // # of warps in the core + nCoreLanes: Int = 4, // # of SIMT threads in the core + nMemLanes: Int = 4, // # of memory lanes in the memory interface to the + // cache; relates to the LSU lanes + nSrcIds: Int = 8 // # of source IDs allocated to each of the nMemLanes +) case class MemtraceCoreParams( tracefilename: String = "undefined", traceHasSource: Boolean = false @@ -2325,7 +2330,7 @@ class DummyDriverImp(outer: DummyDriver, config: CoalescerConfig) // A dummy harness around the coalescer for use in VLSI flow. // Should not instantiate any memtrace modules. class DummyCoalescer(implicit p: Parameters) extends LazyModule { - val numLanes = p(SIMTCoreKey).get.nLanes + val numLanes = p(SIMTCoreKey).get.nMemLanes val config = DefaultCoalescerConfig.copy(numLanes = numLanes) val driver = LazyModule(new DummyDriver(config)) @@ -2362,7 +2367,7 @@ class DummyCoalescerTest(timeout: Int = 500000)(implicit p: Parameters) // tracedriver --> coalescer --> tracelogger --> tlram class TLRAMCoalescerLogger(filename: String)(implicit p: Parameters) extends LazyModule { - val numLanes = p(SIMTCoreKey).get.nLanes + val numLanes = p(SIMTCoreKey).get.nMemLanes val config = DefaultCoalescerConfig.copy(numLanes = numLanes) val driver = LazyModule(new MemTraceDriver(config, filename)) @@ -2454,7 +2459,7 @@ class TLRAMCoalescerLoggerTest(filename: String, timeout: Int = 500000)(implicit // tracedriver --> coalescer --> tlram class TLRAMCoalescer(implicit p: Parameters) extends LazyModule { - val numLanes = p(SIMTCoreKey).get.nLanes + val numLanes = p(SIMTCoreKey).get.nMemLanes val config = DefaultCoalescerConfig.copy(numLanes = numLanes) val filename = "vecadd.core1.thread4.trace" diff --git a/src/main/scala/radiance/memory/UnitTest.scala b/src/main/scala/radiance/memory/UnitTest.scala index 24ea69d..c070ef4 100644 --- a/src/main/scala/radiance/memory/UnitTest.scala +++ b/src/main/scala/radiance/memory/UnitTest.scala @@ -8,7 +8,7 @@ import freechips.rocketchip.subsystem.{BaseSubsystemConfig} import freechips.rocketchip.devices.tilelink._ import freechips.rocketchip.tilelink._ import freechips.rocketchip.util._ -import radiance.subsystem.WithSimtLanes +import radiance.subsystem.WithSimtConfig import freechips.rocketchip.unittest._ //import rocket.VortexFatBankTest @@ -27,7 +27,7 @@ class WithCoalescingUnitTests extends Config((site, _, _) => { // Module(new TLRAMCoalescerLoggerTest(filename="sfilter.core1.thread4.trace", timeout=timeout)), // Module(new TLRAMCoalescerLoggerTest(filename="nearn.core1.thread4.trace", timeout=50000000 * site(TestDurationMultiplier))), // Module(new TLRAMCoalescerLoggerTest(filename="psort.core1.thread4.trace", timeout=timeout)), - // Module(new TLRAMCoalescerLoggerTest(filename="nvbit.vecadd.n100000.filter_sm0.trace", timeout=timeout)(new WithSimtLanes(32))), + // Module(new TLRAMCoalescerLoggerTest(filename="nvbit.vecadd.n100000.filter_sm0.trace", timeout=timeout)(new WithSimtConfig(32))), // Module(new TLRAMCoalescerLoggerTest(filename="nvbit.vecadd.n100000.filter_sm0.lane4.trace", timeout=timeout)), ) } }) @@ -48,12 +48,12 @@ class WithCoalescingUnitSynthesisDummy(nLanes: Int) extends Config((site, _, _) implicit val p = q val timeout = 50000 * site(TestDurationMultiplier) Seq( - Module(new DummyCoalescerTest(timeout=timeout)(new WithSimtLanes(nLanes=4))), + Module(new DummyCoalescerTest(timeout=timeout)(new WithSimtConfig(nMemLanes=4))), ) } }) -class CoalescingUnitTestConfig extends Config(new WithCoalescingUnitTests ++ new WithTestDuration(10) ++ new WithSimtLanes(nLanes=4) ++ new BaseSubsystemConfig) -//class VortexFatBankUnitTestConfig extends Config(new WithVortexFatBankUnitTests ++ new WithTestDuration(10) ++ new WithSimtLanes(nLanes=4) ++ new BaseSubsystemConfig) +class CoalescingUnitTestConfig extends Config(new WithCoalescingUnitTests ++ new WithTestDuration(10) ++ new WithSimtConfig(nMemLanes=4) ++ new BaseSubsystemConfig) +//class VortexFatBankUnitTestConfig extends Config(new WithVortexFatBankUnitTests ++ new WithTestDuration(10) ++ new WithSimtConfig(nLanes=4) ++ new BaseSubsystemConfig) // Dummy configs of various sizes for synthesis class CoalescingSynthesisDummyLane4Config extends Config(new WithCoalescingUnitSynthesisDummy(4) ++ new WithTestDuration(10) ++ new BaseSubsystemConfig) diff --git a/src/main/scala/radiance/subsystem/Configs.scala b/src/main/scala/radiance/subsystem/Configs.scala index 77b0711..daaa9f3 100644 --- a/src/main/scala/radiance/subsystem/Configs.scala +++ b/src/main/scala/radiance/subsystem/Configs.scala @@ -66,10 +66,13 @@ class WithFuzzerCores( }) // `nSrcIds`: number of source IDs for dmem requests on each SIMT lane -class WithSimtLanes(nLanes: Int, nSrcIds: Int = 8) extends Config((site, _, up) => { +class WithSimtConfig(nWarps: Int = 4, nCoreLanes: Int = 4, nMemLanes: Int = 4, nSrcIds: Int = 8) +extends Config((site, _, up) => { case SIMTCoreKey => { Some(up(SIMTCoreKey, site).getOrElse(SIMTCoreParams()).copy( - nLanes = nLanes, + nWarps = nWarps, + nCoreLanes = nCoreLanes, + nMemLanes = nMemLanes, nSrcIds = nSrcIds )) } @@ -105,7 +108,7 @@ class WithVortexL1Banks(nBanks: Int = 4) extends Config ((site, _, up) => { class WithCoalescer(nNewSrcIds: Int = 8, enable : Boolean = true) extends Config((site, _, up) => { case CoalescerKey => { val (nLanes, numOldSrcIds) = up(SIMTCoreKey, site) match { - case Some(param) => (param.nLanes, param.nSrcIds) + case Some(param) => (param.nMemLanes, param.nSrcIds) case None => (1,1) } @@ -182,4 +185,4 @@ class WithExtGPUMem(address: BigInt = BigInt("0x100000000", 16), }) }) case class GPUMemParams(address: BigInt = BigInt("0x100000000", 16), size: BigInt = 0x80000000) -case class GPUMemory() extends Field[Option[GPUMemParams]](None) \ No newline at end of file +case class GPUMemory() extends Field[Option[GPUMemParams]](None) diff --git a/src/main/scala/radiance/tile/FuzzerTile.scala b/src/main/scala/radiance/tile/FuzzerTile.scala index e76342e..c139744 100644 --- a/src/main/scala/radiance/tile/FuzzerTile.scala +++ b/src/main/scala/radiance/tile/FuzzerTile.scala @@ -60,7 +60,7 @@ class FuzzerTile private ( // val statusNode = BundleBridgeSource(() => new GroundTestStatus) val (numLanes, numSrcIds) = p(SIMTCoreKey) match { - case Some(param) => (param.nLanes, param.nSrcIds) + case Some(param) => (param.nMemLanes, param.nSrcIds) case None => { require(false, "fuzzer requires SIMTCoreKey to be defined") (0, 0) diff --git a/src/main/scala/radiance/tile/RadianceTile.scala b/src/main/scala/radiance/tile/RadianceTile.scala index f069aaf..4b6a9be 100644 --- a/src/main/scala/radiance/tile/RadianceTile.scala +++ b/src/main/scala/radiance/tile/RadianceTile.scala @@ -140,10 +140,21 @@ class RadianceTile private ( require( p(SIMTCoreKey).isDefined, - "SIMTCoreKey not defined; make sure to use WithSimtLanes when using RadianceTile" + "SIMTCoreKey not defined; make sure to use WithSimtConfig when using RadianceTile" ) - val numLanes = p(SIMTCoreKey) match { - case Some(simtParam) => simtParam.nLanes + + // NOTE: when changing these, remember to change +define+NUM_THREADS/WARPS in + // EXTRA_SIM_PREPROC_DEFINES as well! + val numWarps = p(SIMTCoreKey) match { + case Some(simtParam) => simtParam.nWarps + case None => 4 + } + val numCoreLanes = p(SIMTCoreKey) match { + case Some(simtParam) => simtParam.nCoreLanes + case None => 4 + } + val numLsuLanes = p(SIMTCoreKey) match { + case Some(simtParam) => simtParam.nMemLanes case None => 4 } @@ -170,13 +181,14 @@ class RadianceTile private ( val smemSourceWidth = 4 // FIXME: hardcoded - val numWarps = 4 // TODO: parametrize + // Replicates some of the logic of how Vortex determines the tag width of + // memory requests so that Chisel and Verilog are in agreement on bitwidths. + // See VX_gpu_pkg.sv val NW_WIDTH = (if (numWarps == 1) 1 else log2Ceil(numWarps)) val UUID_WIDTH = 44 val imemTagWidth = UUID_WIDTH + NW_WIDTH - val numLsuLanes = 4 - // see VX_gpu_pkg.sv - val LSUQ_SIZE = 8 * (numLanes / numLsuLanes) + + val LSUQ_SIZE = 8 * (numCoreLanes / numLsuLanes) val LSUQ_TAG_BITS = log2Ceil(LSUQ_SIZE) + 1 /*DCACHE_BATCH_SEL_BITS*/ val dmemTagWidth = UUID_WIDTH + LSUQ_TAG_BITS // dmem and smem shares the same tag width, DCACHE_NOSM_TAG_WIDTH @@ -764,7 +776,6 @@ class VortexTLAdapter( io.outReq.bits.corrupt := 0.U io.inReq.ready := io.outReq.ready // VortexBundleD <> TLBundleD - // Filtering out write requests is handled inside the wrapper Verilog io.inResp.valid := io.outResp.valid io.inResp.bits.opcode := io.outResp.bits.opcode io.inResp.bits.size := io.outResp.bits.size