diff --git a/generators/chipyard/src/main/scala/DigitalTop.scala b/generators/chipyard/src/main/scala/DigitalTop.scala index 0097b21e..879ede57 100644 --- a/generators/chipyard/src/main/scala/DigitalTop.scala +++ b/generators/chipyard/src/main/scala/DigitalTop.scala @@ -33,6 +33,7 @@ class DigitalTop(implicit p: Parameters) extends ChipyardSystem with chipyard.example.CanHavePeripheryStreamingPassthrough // Enables optionally adding the DSPTools streaming-passthrough example widget with nvidia.blocks.dla.CanHavePeripheryNVDLA // Enables optionally having an NVDLA with chipyard.clocking.HasChipyardPRCI // Use Chipyard reset/clock distribution + with chipyard.clocking.CanHaveClockTap // Enables optionally adding a clock tap output port with fftgenerator.CanHavePeripheryFFT // Enables optionally having an MMIO-based FFT block with constellation.soc.CanHaveGlobalNoC // Support instantiating a global NoC interconnect { diff --git a/generators/chipyard/src/main/scala/clocking/CanHaveClockTap.scala b/generators/chipyard/src/main/scala/clocking/CanHaveClockTap.scala new file mode 100644 index 00000000..5d0ce538 --- /dev/null +++ b/generators/chipyard/src/main/scala/clocking/CanHaveClockTap.scala @@ -0,0 +1,27 @@ +package chipyard.clocking + +import chisel3._ + +import org.chipsalliance.cde.config.{Parameters, Field, Config} +import freechips.rocketchip.diplomacy._ +import freechips.rocketchip.tilelink._ +import freechips.rocketchip.subsystem._ +import freechips.rocketchip.util._ +import freechips.rocketchip.tile._ +import freechips.rocketchip.prci._ + +case object ClockTapKey extends Field[Boolean](true) + +trait CanHaveClockTap { this: BaseSubsystem => + require(p(SubsystemDriveAsyncClockGroupsKey).isEmpty, "Subsystem asyncClockGroups must be undriven") + val clockTapNode = Option.when(p(ClockTapKey)) { + val clockTap = ClockSinkNode(Seq(ClockSinkParameters(name=Some("clock_tap")))) + clockTap := ClockGroup() := asyncClockGroupsNode + clockTap + } + val clockTapIO = clockTapNode.map { node => InModuleBody { + val clock_tap = IO(Output(Clock())) + clock_tap := node.in.head._1.clock + clock_tap + }} +} diff --git a/generators/chipyard/src/main/scala/clocking/ClockBinders.scala b/generators/chipyard/src/main/scala/clocking/ClockBinders.scala index 95db85d4..fdb2ec9e 100644 --- a/generators/chipyard/src/main/scala/clocking/ClockBinders.scala +++ b/generators/chipyard/src/main/scala/clocking/ClockBinders.scala @@ -2,7 +2,7 @@ package chipyard.clocking import chisel3._ import chisel3.util._ -import chipyard.iobinders.{OverrideLazyIOBinder, GetSystemParameters, IOCellKey, ClockPort, ResetPort} +import chipyard.iobinders._ import freechips.rocketchip.prci._ import freechips.rocketchip.diplomacy._ import freechips.rocketchip.subsystem._ @@ -14,14 +14,17 @@ import barstools.iocell.chisel._ // blocks, which allow memory-mapped control of clock division, and clock muxing // between the FakePLL and the slow off-chip clock // Note: This will not simulate properly with firesim -class WithPLLSelectorDividerClockGenerator extends OverrideLazyIOBinder({ +// Unsetting enable will prevent the divider/selector from actually modifying the clock, +// while preserving the address map. Unsetting enable should only be done for RTL +// simulators (Verilator) which do not model reset properly +class WithPLLSelectorDividerClockGenerator(enable: Boolean = true) extends OverrideLazyIOBinder({ (system: HasChipyardPRCI) => { // Connect the implicit clock implicit val p = GetSystemParameters(system) val tlbus = system.asInstanceOf[BaseSubsystem].locateTLBusWrapper(system.prciParams.slaveWhere) val baseAddress = system.prciParams.baseAddress - val clockDivider = system.prci_ctrl_domain { LazyModule(new TLClockDivider (baseAddress + 0x20000, tlbus.beatBytes)) } - val clockSelector = system.prci_ctrl_domain { LazyModule(new TLClockSelector(baseAddress + 0x30000, tlbus.beatBytes)) } + val clockDivider = system.prci_ctrl_domain { LazyModule(new TLClockDivider (baseAddress + 0x20000, tlbus.beatBytes, enable=enable)) } + val clockSelector = system.prci_ctrl_domain { LazyModule(new TLClockSelector(baseAddress + 0x30000, tlbus.beatBytes, enable=enable)) } val pllCtrl = system.prci_ctrl_domain { LazyModule(new FakePLLCtrl (baseAddress + 0x40000, tlbus.beatBytes)) } clockDivider.tlNode := system.prci_ctrl_domain { TLFragmenter(tlbus.beatBytes, tlbus.blockBytes) := system.prci_ctrl_bus.get } @@ -98,3 +101,12 @@ class WithPassthroughClockGenerator extends OverrideLazyIOBinder({ } } }) + +class WithClockTapIOCells extends OverrideIOBinder({ + (system: CanHaveClockTap) => { + system.clockTapIO.map { tap => + val (clock_tap_io, clock_tap_cell) = IOCell.generateIOFromSignal(tap.getWrappedValue, "clock_tap") + (Seq(ClockTapPort(() => clock_tap_io)), clock_tap_cell) + }.getOrElse((Nil, Nil)) + } +}) diff --git a/generators/chipyard/src/main/scala/clocking/TLClockDivider.scala b/generators/chipyard/src/main/scala/clocking/TLClockDivider.scala index a3c33561..958c9194 100644 --- a/generators/chipyard/src/main/scala/clocking/TLClockDivider.scala +++ b/generators/chipyard/src/main/scala/clocking/TLClockDivider.scala @@ -15,11 +15,27 @@ import testchipip.clocking._ // This module adds a TileLink memory-mapped clock divider to the clock graph // The output clock/reset pairs from this module should be synchronized later -class TLClockDivider(address: BigInt, beatBytes: Int, divBits: Int = 8)(implicit p: Parameters) extends LazyModule { +// If enable is unset, this will not divide the clock +// DO NOT unset enable for VLSI, or prototyping flows. The disable feature is a work around for +// some RTL simulators which do not simulate the reset synchronization properly +class TLClockDivider(address: BigInt, beatBytes: Int, divBits: Int = 8, enable: Boolean = true)(implicit p: Parameters) extends LazyModule { val device = new SimpleDevice(s"clk-div-ctrl", Nil) val clockNode = ClockGroupIdentityNode() val tlNode = TLRegisterNode(Seq(AddressSet(address, 4096-1)), device, "reg/control", beatBytes=beatBytes) + if (!enable) println(Console.RED + s""" + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +WARNING: + +YOU ARE USING THE TLCLOCKDIVIDER IN +"DISABLED" MODE. THIS SHOULD ONLY BE DONE +FOR RTL SIMULATION + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +""" + Console.RESET) + lazy val module = new LazyModuleImp(this) { require (clockNode.out.size == 1) val sources = clockNode.in.head._1.member.data.toSeq @@ -45,13 +61,21 @@ class TLClockDivider(address: BigInt, beatBytes: Int, divBits: Int = 8)(implicit // by setting divisor=0. The divisor signal into the ClockDividerOrPass is synchronized internally divider.io.divisor := Mux(busReset.asBool, 0.U, reg.io.q) divider.io.resetAsync := ResetStretcher(sources(i).clock, asyncReset, 20).asAsyncReset - sinks(i)._2.clock := divider.io.clockOut - // Note this is not synchronized to the output clock, which takes time to appear - // so this is still asyncreset - // Stretch the reset for 40 cycles, to give enough time to reset any downstream - // digital logic - sinks(i)._2.reset := ResetStretcher(sources(i).clock, asyncReset, 40).asAsyncReset + if (enable) { + sinks(i)._2.clock := divider.io.clockOut + + // Note this is not synchronized to the output clock, which takes time to appear + // so this is still asyncreset + // Stretch the reset for 40 cycles, to give enough time to reset any downstream + // digital logic + sinks(i)._2.reset := ResetStretcher(sources(i).clock, asyncReset, 40).asAsyncReset + } else { + // WARNING: THIS IS FOR RTL SIMULATION ONLY + sinks(i)._2.clock := sources(i).clock + sinks(i)._2.reset := sources(i).reset + } + reg } diff --git a/generators/chipyard/src/main/scala/clocking/TLClockSelector.scala b/generators/chipyard/src/main/scala/clocking/TLClockSelector.scala index 1d0a149d..8c6371f1 100644 --- a/generators/chipyard/src/main/scala/clocking/TLClockSelector.scala +++ b/generators/chipyard/src/main/scala/clocking/TLClockSelector.scala @@ -21,12 +21,30 @@ case class ClockSelNode()(implicit valName: ValName) // This module adds a TileLink memory-mapped clock mux for each downstream clock domain // in the clock graph. The output clock/reset should be synchronized downstream -class TLClockSelector(address: BigInt, beatBytes: Int)(implicit p: Parameters) extends LazyModule { +// If enable is unset, this will always pass through the 0'th clock +// DO NOT unset enable for VLSI, or prototyping flows. The disable feature is a work around for +// some RTL simulators which do not simulate the reset synchronization properly +class TLClockSelector(address: BigInt, beatBytes: Int, enable: Boolean = true)(implicit p: Parameters) extends LazyModule { val device = new SimpleDevice("clk-sel-ctrl", Nil) val tlNode = TLRegisterNode(Seq(AddressSet(address, 4096-1)), device, "reg/control", beatBytes=beatBytes) val clockNode = ClockSelNode() + if (!enable) println(Console.RED + s""" + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +WARNING: + +YOU ARE USING THE TLCLOCKSELECTOR IN +"DISABLED" MODE. THIS SHOULD ONLY BE DONE +FOR RTL SIMULATION + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +""" + Console.RESET) + + + lazy val module = new LazyModuleImp(this) { val asyncReset = clockNode.in.map(_._1).map(_.reset).toSeq(0) val clocks = clockNode.in.map(_._1).map(_.clock) @@ -43,10 +61,15 @@ class TLClockSelector(address: BigInt, beatBytes: Int)(implicit p: Parameters) e val mux = ClockMutexMux(clocks).suggestName(s"${sinkName}_clkmux") mux.io.sel := sel mux.io.resetAsync := asyncReset.asAsyncReset - sinks(i).clock := mux.io.clockOut - // Stretch the reset for 20 cycles, to give time to reset any downstream digital logic - sinks(i).reset := ResetStretcher(clocks(0), asyncReset, 20).asAsyncReset - + if (enable) { + sinks(i).clock := mux.io.clockOut + // Stretch the reset for 20 cycles, to give time to reset any downstream digital logic + sinks(i).reset := ResetStretcher(clocks(0), asyncReset, 20).asAsyncReset + } else { + // WARNING: THIS IS FOR RTL SIMULATION ONLY + sinks(i).clock := clocks(0) + sinks(i).reset := asyncReset + } reg } tlNode.regmap((0 until sinks.size).map { i => diff --git a/generators/chipyard/src/main/scala/config/AbstractConfig.scala b/generators/chipyard/src/main/scala/config/AbstractConfig.scala index 643a0de6..4f8b96bc 100644 --- a/generators/chipyard/src/main/scala/config/AbstractConfig.scala +++ b/generators/chipyard/src/main/scala/config/AbstractConfig.scala @@ -51,9 +51,10 @@ class AbstractConfig extends Config( new chipyard.iobinders.WithUARTTSIPunchthrough ++ new chipyard.iobinders.WithNMITiedOff ++ - // By default, punch out IOs to the Harness - new chipyard.clocking.WithPassthroughClockGenerator ++ - new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", Seq("sbus", "mbus", "pbus", "fbus", "cbus", "obus", "implicit"), Seq("tile"))) ++ + new chipyard.clocking.WithClockTapIOCells ++ // Default generate a clock tapio + new chipyard.clocking.WithPassthroughClockGenerator ++ // Default punch out IOs to the Harness + new chipyard.clocking.WithClockGroupsCombinedByName(("uncore", // Default merge all the bus clocks + Seq("sbus", "mbus", "pbus", "fbus", "cbus", "obus", "implicit", "clock_tap"), Seq("tile"))) ++ new chipyard.config.WithPeripheryBusFrequency(500.0) ++ // Default 500 MHz pbus new chipyard.config.WithControlBusFrequency(500.0) ++ // Default 500 MHz cbus new chipyard.config.WithMemoryBusFrequency(500.0) ++ // Default 500 MHz mbus diff --git a/generators/chipyard/src/main/scala/config/ChipConfigs.scala b/generators/chipyard/src/main/scala/config/ChipConfigs.scala index ffcb3f77..13b03b21 100644 --- a/generators/chipyard/src/main/scala/config/ChipConfigs.scala +++ b/generators/chipyard/src/main/scala/config/ChipConfigs.scala @@ -110,5 +110,9 @@ class TetheredChipLikeRocketConfig extends Config( class VerilatorCITetheredChipLikeRocketConfig extends Config( new chipyard.harness.WithAbsoluteFreqHarnessClockInstantiator ++ // use absolute freqs for sims in the harness new chipyard.harness.WithMultiChipSerialTL(0, 1) ++ // connect the serial-tl ports of the chips together - new chipyard.harness.WithMultiChip(0, new chipyard.config.WithNoResetSynchronizers ++ new ChipLikeRocketConfig) ++ + new chipyard.harness.WithMultiChip(0, // These fragments remove all troublesome + new chipyard.clocking.WithPLLSelectorDividerClockGenerator(enable=false) ++ // clocking features from the design + new chipyard.iobinders.WithDebugIOCells(syncReset = false) ++ + new chipyard.config.WithNoResetSynchronizers ++ + new ChipLikeRocketConfig) ++ new chipyard.harness.WithMultiChip(1, new ChipBringupHostConfig)) diff --git a/generators/chipyard/src/main/scala/config/RocketConfigs.scala b/generators/chipyard/src/main/scala/config/RocketConfigs.scala index 584ff740..0ddb3737 100644 --- a/generators/chipyard/src/main/scala/config/RocketConfigs.scala +++ b/generators/chipyard/src/main/scala/config/RocketConfigs.scala @@ -63,7 +63,7 @@ class MulticlockRocketConfig extends Config( new freechips.rocketchip.subsystem.WithNBigCores(1) ++ // Frequency specifications new chipyard.config.WithTileFrequency(1000.0) ++ // Matches the maximum frequency of U540 - new chipyard.clocking.WithClockGroupsCombinedByName(("uncore" , Seq("sbus", "cbus", "implicit"), Nil), + new chipyard.clocking.WithClockGroupsCombinedByName(("uncore" , Seq("sbus", "cbus", "implicit", "clock_tap"), Nil), ("periphery", Seq("pbus", "fbus"), Nil)) ++ new chipyard.config.WithSystemBusFrequency(500.0) ++ // Matches the maximum frequency of U540 new chipyard.config.WithMemoryBusFrequency(500.0) ++ // Matches the maximum frequency of U540 diff --git a/generators/chipyard/src/main/scala/config/fragments/ClockingFragments.scala b/generators/chipyard/src/main/scala/config/fragments/ClockingFragments.scala index f0a80357..8cf6ae67 100644 --- a/generators/chipyard/src/main/scala/config/fragments/ClockingFragments.scala +++ b/generators/chipyard/src/main/scala/config/fragments/ClockingFragments.scala @@ -111,14 +111,22 @@ class WithOffchipBusFrequency(freqMHz: Double) extends Config((site, here, up) = class WithRationalMemoryBusCrossing extends WithSbusToMbusCrossingType(RationalCrossing(Symmetric)) class WithAsynchrousMemoryBusCrossing extends WithSbusToMbusCrossingType(AsynchronousCrossing()) +// Remove the tile clock gaters in this system class WithNoTileClockGaters extends Config((site, here, up) => { case ChipyardPRCIControlKey => up(ChipyardPRCIControlKey).copy(enableTileClockGating = false) }) +// Remove the tile reset control blocks in this system class WithNoTileResetSetters extends Config((site, here, up) => { case ChipyardPRCIControlKey => up(ChipyardPRCIControlKey).copy(enableTileResetSetting = false) }) +// Remove the global reset synchronizers in this system class WithNoResetSynchronizers extends Config((site, here, up) => { case ChipyardPRCIControlKey => up(ChipyardPRCIControlKey).copy(enableResetSynchronizers = false) }) + +// Remove any ClockTap ports in this system +class WithNoClockTap extends Config((site, here, up) => { + case ClockTapKey => false +}) diff --git a/generators/chipyard/src/main/scala/iobinders/IOBinders.scala b/generators/chipyard/src/main/scala/iobinders/IOBinders.scala index 738c7bde..309ec004 100644 --- a/generators/chipyard/src/main/scala/iobinders/IOBinders.scala +++ b/generators/chipyard/src/main/scala/iobinders/IOBinders.scala @@ -285,7 +285,9 @@ class JTAGChipIO extends Bundle { val TDO = Output(Bool()) } -class WithDebugIOCells extends OverrideLazyIOBinder({ +// WARNING: Don't disable syncReset unless you are trying to +// get around bugs in RTL simulators +class WithDebugIOCells(syncReset: Boolean = true) extends OverrideLazyIOBinder({ (system: HasPeripheryDebug) => { implicit val p = GetSystemParameters(system) val tlbus = system.asInstanceOf[BaseSubsystem].locateTLBusWrapper(p(ExportDebug).slaveWhere) @@ -309,7 +311,7 @@ class WithDebugIOCells extends OverrideLazyIOBinder({ d.disableDebug.foreach { d => d := false.B } // Drive JTAG on-chip IOs d.systemjtag.map { j => - j.reset := ResetCatchAndSync(j.jtag.TCK, clockBundle.reset.asBool) + j.reset := (if (syncReset) ResetCatchAndSync(j.jtag.TCK, clockBundle.reset.asBool) else clockBundle.reset.asBool) j.mfr_id := p(JtagDTMKey).idcodeManufId.U(11.W) j.part_number := p(JtagDTMKey).idcodePartNum.U(16.W) j.version := p(JtagDTMKey).idcodeVersion.U(4.W) diff --git a/generators/chipyard/src/main/scala/iobinders/Ports.scala b/generators/chipyard/src/main/scala/iobinders/Ports.scala index ba14cc39..eb23be47 100644 --- a/generators/chipyard/src/main/scala/iobinders/Ports.scala +++ b/generators/chipyard/src/main/scala/iobinders/Ports.scala @@ -90,6 +90,9 @@ case class CustomBootPort (val getIO: () => Bool) case class ClockPort (val getIO: () => Clock, val freqMHz: Double) extends Port[Clock] +case class ClockTapPort (val getIO: () => Clock) + extends Port[Clock] + case class ResetPort (val getIO: () => AsyncReset) extends Port[Reset] diff --git a/generators/firechip/src/main/scala/TargetConfigs.scala b/generators/firechip/src/main/scala/TargetConfigs.scala index d86ecb50..8a99abec 100644 --- a/generators/firechip/src/main/scala/TargetConfigs.scala +++ b/generators/firechip/src/main/scala/TargetConfigs.scala @@ -85,6 +85,7 @@ class WithMinimalFireSimDesignTweaks extends Config( new chipyard.harness.WithHarnessBinderClockFreqMHz(1000.0) ++ new chipyard.harness.WithClockFromHarness ++ new chipyard.harness.WithResetFromHarness ++ + new chipyard.config.WithNoClockTap ++ new chipyard.clocking.WithPassthroughClockGenerator ++ // Required*: When using FireSim-as-top to provide a correct path to the target bootrom source new WithBootROM ++ @@ -99,6 +100,8 @@ class WithMinimalFireSimDesignTweaks extends Config( // Non-frequency tweaks that are generally applied to all firesim configs class WithFireSimDesignTweaks extends Config( new WithMinimalFireSimDesignTweaks ++ + // Required: Remove the debug clock tap, this breaks compilation of target-level sim in FireSim + new chipyard.config.WithNoClockTap ++ // Required: Bake in the default FASED memory model new WithDefaultMemModel ++ // Optional: reduce the width of the Serial TL interface