NVDLA Integration + Cleanup Ariane Preprocessing (#505)

* [nvdla] initial nvdla integration

* [nvdla] add firesim configs

* [nvdla] re-add accidentally deleted line

* [nvdla] works on master with small

* [nvdla] use master branch of nvdla

* [nvdla] remove extra sources

* [nvdla] bump

* [nvdla + ariane] bump and use insert-includes for pre-processing

* [nvdla] add ci | remove target configs in FireChip | update naming

* [nvdla] bump nvdla | fix ci run-tests error

* [nvdla] re-enable PCWM-L error | fix/update makefile(s)

* [nvdla] bump nvdla fragments in FireChip

* [misc] bump tutorial patches

* [chipyard] remove extra import

* [nvdla] bump nvdla for pbus [ci skip]

* [nvdla] update firemarshal and add nvdla workload

* [nvdla] bump nvdla-workload

* [nvdla] bump hw

* [docs] add basic documentation

* [docs] adjustments to documentation

* [misc] update docs | bump firesim with recipe

* [misc] disable error on warnings in verilator | bump number width to match RC

* [docs] fix doc build error

* [verilator] move no fail on warning to be global

* [ci skip] [nvdla] bump submodule urls

* [misc] move firesim specific configs into nvdla dir [ci skip]

* [nvdla] fix run-tests in ci

* update RC configs | bump marshal | bump nvdla-workload

* [nvdla] bump nvdla-workload [ci skip]

* add topology mixin to nvdla configs

* update tutorial patches
This commit is contained in:
Abraham Gonzalez
2020-05-16 12:22:30 -07:00
committed by GitHub
parent 3f5a204fd0
commit 85b555dbce
25 changed files with 7063 additions and 27 deletions

View File

@@ -262,6 +262,11 @@ jobs:
steps: steps:
- prepare-rtl: - prepare-rtl:
project-key: "testchipip" project-key: "testchipip"
prepare-chipyard-nvdla:
executor: main-env
steps:
- prepare-rtl:
project-key: "chipyard-nvdla"
prepare-chipyard-spiflashwrite: prepare-chipyard-spiflashwrite:
executor: main-env executor: main-env
steps: steps:
@@ -368,6 +373,11 @@ jobs:
- run-tests: - run-tests:
project-key: "chipyard-ariane" project-key: "chipyard-ariane"
timeout: "30m" timeout: "30m"
chipyard-nvdla-run-tests:
executor: main-env
steps:
- run-tests:
project-key: "chipyard-nvdla"
icenet-run-tests: icenet-run-tests:
executor: main-env executor: main-env
steps: steps:
@@ -484,6 +494,11 @@ workflows:
- install-riscv-toolchain - install-riscv-toolchain
- install-verilator - install-verilator
- prepare-chipyard-nvdla:
requires:
- install-riscv-toolchain
- install-verilator
- prepare-chipyard-spiflashwrite: - prepare-chipyard-spiflashwrite:
requires: requires:
- install-riscv-toolchain - install-riscv-toolchain
@@ -567,6 +582,9 @@ workflows:
requires: requires:
- prepare-chipyard-ariane - prepare-chipyard-ariane
- chipyard-nvdla-run-tests:
requires:
- prepare-chipyard-nvdla
- icenet-run-tests: - icenet-run-tests:
requires: requires:
- prepare-icenet - prepare-icenet

View File

@@ -56,6 +56,7 @@ mapping["chipyard-spiflashread"]="SUB_PROJECT=chipyard CONFIG=LargeSPIFlashROMRo
mapping["chipyard-spiflashwrite"]="SUB_PROJECT=chipyard CONFIG=SmallSPIFlashRocketConfig" mapping["chipyard-spiflashwrite"]="SUB_PROJECT=chipyard CONFIG=SmallSPIFlashRocketConfig"
mapping["tracegen"]="SUB_PROJECT=chipyard CONFIG=NonBlockingTraceGenL2Config TOP=TraceGenSystem" mapping["tracegen"]="SUB_PROJECT=chipyard CONFIG=NonBlockingTraceGenL2Config TOP=TraceGenSystem"
mapping["tracegen-boom"]="SUB_PROJECT=chipyard CONFIG=BoomTraceGenConfig TOP=TraceGenSystem" mapping["tracegen-boom"]="SUB_PROJECT=chipyard CONFIG=BoomTraceGenConfig TOP=TraceGenSystem"
mapping["chipyard-nvdla"]="SUB_PROJECT=chipyard CONFIG=SmallNVDLARocketConfig"
mapping["firesim"]="SCALA_TEST=firesim.firesim.RocketNICF1Tests" mapping["firesim"]="SCALA_TEST=firesim.firesim.RocketNICF1Tests"
mapping["firesim-multiclock"]="SCALA_TEST=firesim.firesim.RocketMulticlockF1Tests" mapping["firesim-multiclock"]="SCALA_TEST=firesim.firesim.RocketMulticlockF1Tests"
mapping["fireboom"]="SCALA_TEST=firesim.firesim.BoomF1Tests" mapping["fireboom"]="SCALA_TEST=firesim.firesim.BoomF1Tests"

View File

@@ -80,6 +80,10 @@ case $1 in
chipyard-ariane) chipyard-ariane)
make run-binary-fast -C $LOCAL_SIM_DIR ${mapping[$1]} BINARY=$RISCV/riscv64-unknown-elf/share/riscv-tests/benchmarks/dhrystone.riscv make run-binary-fast -C $LOCAL_SIM_DIR ${mapping[$1]} BINARY=$RISCV/riscv64-unknown-elf/share/riscv-tests/benchmarks/dhrystone.riscv
;; ;;
chipyard-nvdla)
make -C $LOCAL_CHIPYARD_DIR/tests
make -C $LOCAL_SIM_DIR ${mapping[$1]} BINARY=$LOCAL_CHIPYARD_DIR/tests/nvdla.riscv run-binary
;;
icenet) icenet)
make run-none-fast -C $LOCAL_SIM_DIR ${mapping[$1]} make run-none-fast -C $LOCAL_SIM_DIR ${mapping[$1]}
;; ;;

6
.gitmodules vendored
View File

@@ -119,6 +119,12 @@
[submodule "tools/DRAMSim2"] [submodule "tools/DRAMSim2"]
path = tools/DRAMSim2 path = tools/DRAMSim2
url = https://github.com/firesim/DRAMSim2.git url = https://github.com/firesim/DRAMSim2.git
[submodule "generators/nvdla"]
path = generators/nvdla
url = https://github.com/ucb-bar/nvdla-wrapper.git
[submodule "software/nvdla-workload"]
path = software/nvdla-workload
url = https://github.com/ucb-bar/nvdla-workload.git
[submodule "tools/dromajo/dromajo-src"] [submodule "tools/dromajo/dromajo-src"]
path = tools/dromajo/dromajo-src path = tools/dromajo/dromajo-src
url = https://github.com/abejgonzalez/dromajo.git url = https://github.com/abejgonzalez/dromajo.git

View File

@@ -10,7 +10,7 @@ To get started using Chipyard, see the documentation on the Chipyard documentati
Chipyard is an open source framework for agile development of Chisel-based systems-on-chip. Chipyard is an open source framework for agile development of Chisel-based systems-on-chip.
It will allow you to leverage the Chisel HDL, Rocket Chip SoC generator, and other [Berkeley][berkeley] projects to produce a [RISC-V][riscv] SoC with everything from MMIO-mapped peripherals to custom accelerators. It will allow you to leverage the Chisel HDL, Rocket Chip SoC generator, and other [Berkeley][berkeley] projects to produce a [RISC-V][riscv] SoC with everything from MMIO-mapped peripherals to custom accelerators.
Chipyard contains processor cores ([Rocket][rocket-chip], [BOOM][boom], [Ariane][ariane]), accelerators ([Hwacha][hwacha], [Gemmini][gemmini]), memory systems, and additional peripherals and tooling to help create a full featured SoC. Chipyard contains processor cores ([Rocket][rocket-chip], [BOOM][boom], [Ariane][ariane]), accelerators ([Hwacha][hwacha], [Gemmini][gemmini], [NVDLA][nvdla]), memory systems, and additional peripherals and tooling to help create a full featured SoC.
Chipyard supports multiple concurrent flows of agile hardware development, including software RTL simulation, FPGA-accelerated simulation ([FireSim][firesim]), automated VLSI flows ([Hammer][hammer]), and software workload generation for bare-metal and Linux-based systems ([FireMarshal][firemarshal]). Chipyard supports multiple concurrent flows of agile hardware development, including software RTL simulation, FPGA-accelerated simulation ([FireSim][firesim]), automated VLSI flows ([Hammer][hammer]), and software workload generation for bare-metal and Linux-based systems ([FireMarshal][firemarshal]).
Chipyard is actively developed in the [Berkeley Architecture Research Group][ucb-bar] in the [Electrical Engineering and Computer Sciences Department][eecs] at the [University of California, Berkeley][berkeley]. Chipyard is actively developed in the [Berkeley Architecture Research Group][ucb-bar] in the [Electrical Engineering and Computer Sciences Department][eecs] at the [University of California, Berkeley][berkeley].
@@ -65,3 +65,4 @@ These publications cover many of the internal components used in Chipyard. Howev
[firemarshal]: https://github.com/firesim/FireMarshal/ [firemarshal]: https://github.com/firesim/FireMarshal/
[ariane]: https://github.com/pulp-platform/ariane/ [ariane]: https://github.com/pulp-platform/ariane/
[gemmini]: https://github.com/ucb-bar/gemmini [gemmini]: https://github.com/ucb-bar/gemmini
[nvdla]: http://nvdla.org/

View File

@@ -129,7 +129,7 @@ lazy val iocell = (project in file("./tools/barstools/iocell/"))
lazy val chipyard = conditionalDependsOn(project in file("generators/chipyard")) lazy val chipyard = conditionalDependsOn(project in file("generators/chipyard"))
.dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, iocell, .dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, iocell,
sha3, // On separate line to allow for cleaner tutorial-setup patches sha3, // On separate line to allow for cleaner tutorial-setup patches
gemmini, icenet, tracegen, ariane) gemmini, icenet, tracegen, ariane, nvdla)
.settings(commonSettings) .settings(commonSettings)
lazy val tracegen = conditionalDependsOn(project in file("generators/tracegen")) lazy val tracegen = conditionalDependsOn(project in file("generators/tracegen"))
@@ -163,6 +163,10 @@ lazy val gemmini = (project in file("generators/gemmini"))
.dependsOn(rocketchip, chisel_testers, testchipip) .dependsOn(rocketchip, chisel_testers, testchipip)
.settings(commonSettings) .settings(commonSettings)
lazy val nvdla = (project in file("generators/nvdla"))
.dependsOn(rocketchip)
.settings(commonSettings)
lazy val tapeout = conditionalDependsOn(project in file("./tools/barstools/tapeout/")) lazy val tapeout = conditionalDependsOn(project in file("./tools/barstools/tapeout/"))
.dependsOn(chisel_testers, chipyard) .dependsOn(chisel_testers, chipyard)
.settings(commonSettings) .settings(commonSettings)

View File

@@ -14,6 +14,7 @@ SHELL=/bin/bash
######################################################################################### #########################################################################################
include $(base_dir)/generators/ariane/ariane.mk include $(base_dir)/generators/ariane/ariane.mk
include $(base_dir)/generators/tracegen/tracegen.mk include $(base_dir)/generators/tracegen/tracegen.mk
include $(base_dir)/generators/nvdla/nvdla.mk
include $(base_dir)/tools/dromajo/dromajo.mk include $(base_dir)/tools/dromajo/dromajo.mk
######################################################################################### #########################################################################################

16
docs/Generators/NVDLA.rst Normal file
View File

@@ -0,0 +1,16 @@
NVDLA
====================================
`NVDLA <http://nvdla.org/>`_ is an open-source deep learning accelerator developed by NVIDIA.
The `NVDLA` is attached as a TileLink peripheral so it can be used as a component within the `Rocket Chip SoC generator`.
The accelerator by itself exposes an AXI memory interface (or two if you use the "Large" configuration), a control interface, and an interrupt line.
The main way to use the accelerator in Chipyard is to use the `NVDLA SW repository <https://github.com/ucb-bar/nvdla-sw>`_ that was ported to work on FireSim Linux.
However, you can also use the accelerator in baremetal simulations (refer to ``tests/nvdla.c``).
For more information on both the HW architecture and the SW, please visit their `website <http://nvdla.org/>`_.
NVDLA Software with FireMarshal
-------------------------------
Located at ``software/nvdla-workload`` is a FireMarshal-based workload to boot Linux with the proper NVDLA drivers.
Refer to that ``README.md`` for more information on how to run a simulation.

View File

@@ -28,4 +28,5 @@ so changes to the generators themselves will automatically be used when building
SiFive-Generators SiFive-Generators
SHA3 SHA3
Ariane Ariane
NVDLA

View File

@@ -72,7 +72,6 @@ class WithTracegenSystem extends Config((site, here, up) => {
case BuildSystem => (p: Parameters) => LazyModule(new tracegen.TraceGenSystem()(p)) case BuildSystem => (p: Parameters) => LazyModule(new tracegen.TraceGenSystem()(p))
}) })
class WithRenumberHarts(rocketFirst: Boolean = false) extends Config((site, here, up) => { class WithRenumberHarts(rocketFirst: Boolean = false) extends Config((site, here, up) => {
case RocketTilesKey => up(RocketTilesKey, site).zipWithIndex map { case (r, i) => case RocketTilesKey => up(RocketTilesKey, site).zipWithIndex map { case (r, i) =>
r.copy(hartId = i + (if(rocketFirst) 0 else up(BoomTilesKey, site).length)) r.copy(hartId = i + (if(rocketFirst) 0 else up(BoomTilesKey, site).length))
@@ -83,12 +82,6 @@ class WithRenumberHarts(rocketFirst: Boolean = false) extends Config((site, here
case MaxHartIdBits => log2Up(up(BoomTilesKey, site).size + up(RocketTilesKey, site).size) case MaxHartIdBits => log2Up(up(BoomTilesKey, site).size + up(RocketTilesKey, site).size)
}) })
// ------------------
// Multi-RoCC Support
// ------------------
/** /**
* Map from a hartId to a particular RoCC accelerator * Map from a hartId to a particular RoCC accelerator
*/ */

View File

@@ -23,6 +23,7 @@ class DigitalTop(implicit p: Parameters) extends System
with icenet.CanHavePeripheryIceNIC // Enables optionally adding the IceNIC for FireSim with icenet.CanHavePeripheryIceNIC // Enables optionally adding the IceNIC for FireSim
with chipyard.example.CanHavePeripheryInitZero // Enables optionally adding the initzero example widget with chipyard.example.CanHavePeripheryInitZero // Enables optionally adding the initzero example widget
with chipyard.example.CanHavePeripheryGCD // Enables optionally adding the GCD example widget with chipyard.example.CanHavePeripheryGCD // Enables optionally adding the GCD example widget
with nvidia.blocks.dla.CanHavePeripheryNVDLA // Enables optionally having an NVDLA
{ {
override lazy val module = new DigitalTopModule(this) override lazy val module = new DigitalTopModule(this)
} }

View File

@@ -425,3 +425,41 @@ class RingSystemBusRocketConfig extends Config(
new freechips.rocketchip.subsystem.WithCoherentBusTopology ++ new freechips.rocketchip.subsystem.WithCoherentBusTopology ++
new freechips.rocketchip.system.BaseConfig) new freechips.rocketchip.system.BaseConfig)
// DOC include end: RingSystemBusRocket // DOC include end: RingSystemBusRocket
class SmallNVDLARocketConfig extends Config(
new chipyard.iobinders.WithUARTAdapter ++
new chipyard.iobinders.WithTieOffInterrupts ++
new chipyard.iobinders.WithBlackBoxSimMem ++
new chipyard.iobinders.WithTiedOffDebug ++
new chipyard.iobinders.WithSimSerial ++
new testchipip.WithTSI ++
new chipyard.config.WithBootROM ++
new chipyard.config.WithUART ++
new chipyard.config.WithL2TLBs(1024) ++
new nvidia.blocks.dla.WithNVDLA("small") ++ // add a small NVDLA
new freechips.rocketchip.subsystem.WithNoMMIOPort ++
new freechips.rocketchip.subsystem.WithNoSlavePort ++
new freechips.rocketchip.subsystem.WithInclusiveCache ++
new freechips.rocketchip.subsystem.WithNExtTopInterrupts(0) ++
new freechips.rocketchip.subsystem.WithNBigCores(1) ++
new freechips.rocketchip.subsystem.WithCoherentBusTopology ++
new freechips.rocketchip.system.BaseConfig)
class LargeNVDLARocketConfig extends Config(
new chipyard.iobinders.WithUARTAdapter ++
new chipyard.iobinders.WithTieOffInterrupts ++
new chipyard.iobinders.WithBlackBoxSimMem ++
new chipyard.iobinders.WithTiedOffDebug ++
new chipyard.iobinders.WithSimSerial ++
new testchipip.WithTSI ++
new chipyard.config.WithBootROM ++
new chipyard.config.WithUART ++
new chipyard.config.WithL2TLBs(1024) ++
new nvidia.blocks.dla.WithNVDLA("large", true) ++ // add a large NVDLA with synth. rams
new freechips.rocketchip.subsystem.WithNoMMIOPort ++
new freechips.rocketchip.subsystem.WithNoSlavePort ++
new freechips.rocketchip.subsystem.WithInclusiveCache ++
new freechips.rocketchip.subsystem.WithNExtTopInterrupts(0) ++
new freechips.rocketchip.subsystem.WithNBigCores(1) ++
new freechips.rocketchip.subsystem.WithCoherentBusTopology ++
new freechips.rocketchip.system.BaseConfig)

View File

@@ -44,14 +44,12 @@ class WithPeripheryBusFrequency(freq: BigInt) extends Config((site, here, up) =>
case PeripheryBusKey => up(PeripheryBusKey).copy(dtsFrequency = Some(freq)) case PeripheryBusKey => up(PeripheryBusKey).copy(dtsFrequency = Some(freq))
}) })
class WithPerfCounters extends Config((site, here, up) => { class WithPerfCounters extends Config((site, here, up) => {
case RocketTilesKey => up(RocketTilesKey) map (tile => tile.copy( case RocketTilesKey => up(RocketTilesKey) map (tile => tile.copy(
core = tile.core.copy(nPerfCounters = 29) core = tile.core.copy(nPerfCounters = 29)
)) ))
}) })
// Disables clock-gating; doesn't play nice with our FAME-1 pass // Disables clock-gating; doesn't play nice with our FAME-1 pass
class WithoutClockGating extends Config((site, here, up) => { class WithoutClockGating extends Config((site, here, up) => {
case DebugModuleKey => up(DebugModuleKey, site).map(_.copy(clockGate = false)) case DebugModuleKey => up(DebugModuleKey, site).map(_.copy(clockGate = false))
@@ -63,7 +61,6 @@ class WithScalaTestFeatures extends Config((site, here, up) => {
case TracePortKey => up(TracePortKey, site).map(_.copy(print = true)) case TracePortKey => up(TracePortKey, site).map(_.copy(print = true))
}) })
// FASED Config Aliases. This to enable config generation via "_" concatenation // FASED Config Aliases. This to enable config generation via "_" concatenation
// which requires that all config classes be defined in the same package // which requires that all config classes be defined in the same package
class DDR3FRFCFS extends FRFCFS16GBQuadRank class DDR3FRFCFS extends FRFCFS16GBQuadRank
@@ -71,7 +68,9 @@ class DDR3FRFCFSLLC4MB extends FRFCFS16GBQuadRankLLC4MB
class WithNIC extends icenet.WithIceNIC(inBufFlits = 8192, ctrlQueueDepth = 64) class WithNIC extends icenet.WithIceNIC(inBufFlits = 8192, ctrlQueueDepth = 64)
// Adds a small/large NVDLA to the system
class WithNVDLALarge extends nvidia.blocks.dla.WithNVDLA("large")
class WithNVDLASmall extends nvidia.blocks.dla.WithNVDLA("small")
// Tweaks that are generally applied to all firesim configs // Tweaks that are generally applied to all firesim configs

1
generators/nvdla Submodule

Submodule generators/nvdla added at b2b78c9f89

51
scripts/insert-includes.py Executable file
View File

@@ -0,0 +1,51 @@
#!/usr/bin/python
# replaces a `include with the full include file
#
# args
# $1 - file to remove includes from
# $2 - file to write output to
# $3 - list of directories to search for includes in (note: NON-RECURSIVE must specify all dirs)
# includes are found relative to this path
# this is equivalent to something like +incdir+
import sys
import re
import os
inVlog = sys.argv[1]
outVlog = sys.argv[2]
print("[INFO] Replaces includes from: " + str(inVlog))
if inVlog == outVlog:
sys.exit("[ERROR] The input and output file cannot be the same.")
# add directories to search list
incDirs = sys.argv[3:]
print("[INFO] Searching following dirs for includes: " + str(incDirs))
# open file
with open(inVlog, 'r') as inFile:
with open(outVlog, 'w') as outFile:
# for each include found, search through all dirs and replace if found, error if not
for num, line in enumerate(inFile, 1):
match = re.match(r"^ *`include +\"(.*)\"", line)
if match:
# search for include and replace
found = False
for d in incDirs:
potentialIncFileName = d + "/" + match.group(1)
if os.path.exists(potentialIncFileName):
found = True
with open(potentialIncFileName, 'r') as incFile:
for iline in incFile:
outFile.write(iline)
break
# must find something to include with
if not found:
sys.exit("[ERROR] Couldn't replace include \"" + str(match.group(1)) + "\" found on line " + str(num))
else:
outFile.write(line)
print("[INFO] Success. Writing output to: " + str(outVlog))

View File

@@ -1,5 +1,5 @@
diff --git a/generators/chipyard/src/main/scala/config/RocketConfigs.scala b/generators/chipyard/src/main/scala/config/RocketConfigs.scala diff --git a/generators/chipyard/src/main/scala/config/RocketConfigs.scala b/generators/chipyard/src/main/scala/config/RocketConfigs.scala
index 49d2238..afaa36d 100644 index f29c580..0bd36ca 100644
--- a/generators/chipyard/src/main/scala/config/RocketConfigs.scala --- a/generators/chipyard/src/main/scala/config/RocketConfigs.scala
+++ b/generators/chipyard/src/main/scala/config/RocketConfigs.scala +++ b/generators/chipyard/src/main/scala/config/RocketConfigs.scala
@@ -333,7 +333,7 @@ class Sha3RocketConfig extends Config( @@ -333,7 +333,7 @@ class Sha3RocketConfig extends Config(

View File

@@ -1,17 +1,17 @@
diff --git a/build.sbt b/build.sbt diff --git a/build.sbt b/build.sbt
index a633066..3df8b74 100644 index 0c4581f..ff0597c 100644
--- a/build.sbt --- a/build.sbt
+++ b/build.sbt +++ b/build.sbt
@@ -124,7 +124,7 @@ lazy val testchipip = (project in file("generators/testchipip")) @@ -128,7 +128,7 @@ lazy val iocell = (project in file("./tools/barstools/iocell/"))
lazy val chipyard = conditionalDependsOn(project in file("generators/chipyard")) lazy val chipyard = conditionalDependsOn(project in file("generators/chipyard"))
.dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, iocell, .dependsOn(boom, hwacha, sifive_blocks, sifive_cache, utilities, iocell,
- sha3, // On separate line to allow for cleaner tutorial-setup patches - sha3, // On separate line to allow for cleaner tutorial-setup patches
+// sha3, // On separate line to allow for cleaner tutorial-setup patches +// sha3, // On separate line to allow for cleaner tutorial-setup patches
gemmini, icenet, tracegen, ariane) gemmini, icenet, tracegen, ariane, nvdla)
.settings(commonSettings) .settings(commonSettings)
@@ -151,9 +151,9 @@ lazy val ariane = (project in file("generators/ariane")) @@ -155,9 +155,9 @@ lazy val ariane = (project in file("generators/ariane"))
.dependsOn(rocketchip) .dependsOn(rocketchip)
.settings(commonSettings) .settings(commonSettings)

View File

@@ -55,18 +55,16 @@ VCS_CC_OPTS = \
VCS_NONCC_OPTS = \ VCS_NONCC_OPTS = \
+lint=all,noVCDE,noONGS,noUI \ +lint=all,noVCDE,noONGS,noUI \
-error=PCWM-L \
-timescale=1ns/1ps \ -timescale=1ns/1ps \
-quiet \ -quiet \
-q \ -q \
+rad \ +rad \
+v2k \
+vcs+lic+wait \ +vcs+lic+wait \
+vc+list \ +vc+list \
-error=noZMMCM \ -error=noZMMCM \
-assert svaext \ -error=PCWM-L \
-sverilog \ -sverilog +systemverilogext+.sv+.svi+.svh+.svt -assert svaext +libext+.sv \
+libext+.v \ +v2k +verilog2001ext+.v95+.vt+.vp +libext+.v \
+incdir+$(build_dir) \ +incdir+$(build_dir) \
-f $(sim_common_files) \ -f $(sim_common_files) \
$(sim_vsrcs) $(sim_vsrcs)

View File

@@ -72,7 +72,6 @@ ARIANE_VERILATOR_FLAGS = \
--unroll-count 256 \ --unroll-count 256 \
-Werror-PINMISSING \ -Werror-PINMISSING \
-Werror-IMPLICIT \ -Werror-IMPLICIT \
-Wno-fatal \
-Wno-PINCONNECTEMPTY \ -Wno-PINCONNECTEMPTY \
-Wno-ASSIGNDLY \ -Wno-ASSIGNDLY \
-Wno-DECLFILENAME \ -Wno-DECLFILENAME \
@@ -91,9 +90,11 @@ TIMESCALE_OPTS := $(shell verilator --version | perl -lne 'if (/(\d.\d+)/ && $$1
VERILATOR_NONCC_OPTS = \ VERILATOR_NONCC_OPTS = \
$(TIMESCALE_OPTS) \ $(TIMESCALE_OPTS) \
--top-module $(VLOG_MODEL) \ --top-module $(VLOG_MODEL) \
-Wno-fatal \
$(shell if ! grep -iq "module.*ariane" $(build_dir)/*.*v; then echo "$(CHIPYARD_VERILATOR_FLAGS)"; else echo "$(ARIANE_VERILATOR_FLAGS)"; fi) \ $(shell if ! grep -iq "module.*ariane" $(build_dir)/*.*v; then echo "$(CHIPYARD_VERILATOR_FLAGS)"; else echo "$(ARIANE_VERILATOR_FLAGS)"; fi) \
--output-split 10000 \ --output-split 10000 \
--output-split-cfuncs 100 \ --output-split-cfuncs 100 \
--max-num-width 1048576 \
-f $(sim_common_files) \ -f $(sim_common_files) \
$(sim_vsrcs) $(sim_vsrcs)

View File

@@ -5,7 +5,7 @@ LDFLAGS= -static
include libgloss.mk include libgloss.mk
PROGRAMS = pwm blkdev accum charcount nic-loopback big-blkdev pingd spiflashread spiflashwrite PROGRAMS = pwm blkdev accum charcount nic-loopback big-blkdev pingd nvdla spiflashread spiflashwrite
spiflash.img: spiflash.py spiflash.img: spiflash.py
python3 $< python3 $<

468
tests/nvdla.c Normal file
View File

@@ -0,0 +1,468 @@
#include <stdint.h>
#include "nvdla.h"
#include "mmio.h"
#include <riscv-pk/encoding.h>
#define NVDLA_BASE 0x10040000
#define reg_write(addr,val) reg_write32(NVDLA_BASE+addr,val)
#define reg_read(addr) reg_read32(NVDLA_BASE+addr)
int main(void)
{
//----------## Layer:CDP_0: cross layer dependency, begin----------
//----------## Layer:CDP_0: cross layer dependency, end----------
//----------## Layer:CDP_0: set producer pointer, begin----------
reg_write(CDP_S_POINTER_0, 0);
reg_write(CDP_RDMA_S_POINTER_0, 0);
//----------## Layer:CDP_0: set producer pointer, end----------
//----------## Layer:CDP_0: LUT programming, begin----------
reg_write(CDP_S_LUT_ACCESS_CFG_0, 0x30000);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x0);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x4);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x5);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x6);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x7);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x8);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x9);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x10);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x11);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x12);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x13);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x14);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x15);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x16);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x17);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x18);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x19);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x20);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x21);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x22);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x23);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x24);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x25);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x26);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x27);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x28);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x29);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x30);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x31);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x32);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x33);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x34);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x35);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x36);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x37);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x38);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x39);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x40);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x41);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x42);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x43);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x44);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x45);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x46);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x47);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x48);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x49);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x4a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x4b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x4c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x4d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x4e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x4f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x50);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x51);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x52);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x53);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x54);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x55);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x56);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x57);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x58);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x59);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x5a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x5b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x5c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x5d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x5e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x5f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x60);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x61);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x62);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x63);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x64);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x65);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x66);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x67);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x68);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x69);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x6a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x6b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x6c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x6d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x6e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x6f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x70);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x71);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x72);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x73);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x74);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x75);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x76);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x77);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x78);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x79);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x7a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x7b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x7c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x7d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x7e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x7f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x80);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x81);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x82);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x83);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x84);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x85);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x86);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x87);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x88);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x89);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x8a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x8b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x8c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x8d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x8e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x8f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x90);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x91);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x92);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x93);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x94);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x95);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x96);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x97);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x98);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x99);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x9a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x9b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x9c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x9d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x9e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x9f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa0);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa1);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa2);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa3);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa4);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa5);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa6);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa7);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa8);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa9);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xaa);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xab);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xac);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xad);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xae);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xaf);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb0);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb1);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb2);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb3);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb4);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb5);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb6);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb7);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb8);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb9);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xba);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xbb);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xbc);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xbd);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xbe);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xbf);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc0);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc1);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc2);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc3);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc4);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc5);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc6);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc7);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc8);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc9);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xca);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xcb);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xcc);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xcd);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xce);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xcf);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd0);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd1);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd2);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd3);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd4);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd5);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd6);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd7);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd8);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd9);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xda);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xdb);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xdc);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xdd);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xde);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xdf);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe0);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe1);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe2);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe3);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe4);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe5);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe6);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe7);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe8);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe9);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xea);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xeb);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xec);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xed);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xee);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xef);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf0);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf1);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf2);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf3);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf4);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf5);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf6);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf7);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf8);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf9);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xfa);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xfb);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xfc);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xfd);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xfe);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xff);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x100);
reg_write(CDP_S_LUT_ACCESS_CFG_0, 0x20000);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x0);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x4);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x5);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x6);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x7);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x8);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x9);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xa);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xb);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xc);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xd);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xe);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0xf);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x10);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x11);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x12);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x13);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x14);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x15);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x16);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x17);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x18);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x19);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x1f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x20);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x21);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x22);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x23);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x24);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x25);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x26);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x27);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x28);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x29);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x2f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x30);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x31);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x32);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x33);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x34);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x35);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x36);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x37);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x38);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x39);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3a);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3b);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3c);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3d);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3e);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x3f);
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x40);
reg_write(CDP_S_LUT_LE_START_LOW_0, 0x0);
// CDP_S_LUT_LE_START_LOW_0.LUT_LE_START_LOW:0x0
reg_write(CDP_S_LUT_LO_END_LOW_0, 0x100);
// CDP_S_LUT_LO_END_LOW_0.LUT_LO_END_LOW:0x100
reg_write(CDP_S_LUT_ACCESS_CFG_0, 0x0);
// CDP_S_LUT_ACCESS_CFG_0.LUT_ACCESS_TYPE:READ : 0x0
// CDP_S_LUT_ACCESS_CFG_0.LUT_TABLE_ID:LE : 0x0
// CDP_S_LUT_ACCESS_CFG_0.LUT_ADDR:0x0
reg_write(CDP_S_LUT_ACCESS_DATA_0, 0x0);
// CDP_S_LUT_ACCESS_DATA_0.LUT_DATA:0x0
reg_write(CDP_S_LUT_LE_START_HIGH_0, 0x0);
// CDP_S_LUT_LE_START_HIGH_0.LUT_LE_START_HIGH:0x0
reg_write(CDP_S_LUT_LO_END_HIGH_0, 0x0);
// CDP_S_LUT_LO_END_HIGH_0.LUT_LO_END_HIGH:0x0
reg_write(CDP_S_LUT_CFG_0, 0x1);
// CDP_S_LUT_CFG_0.LUT_UFLOW_PRIORITY:LE : 0x0
// CDP_S_LUT_CFG_0.LUT_OFLOW_PRIORITY:LE : 0x0
// CDP_S_LUT_CFG_0.LUT_HYBRID_PRIORITY:LE : 0x0
// CDP_S_LUT_CFG_0.LUT_LE_FUNCTION:LINEAR : 0x1
reg_write(CDP_S_LUT_LE_SLOPE_SHIFT_0, 0x0);
// CDP_S_LUT_LE_SLOPE_SHIFT_0.LUT_LE_SLOPE_OFLOW_SHIFT:0x0
// CDP_S_LUT_LE_SLOPE_SHIFT_0.LUT_LE_SLOPE_UFLOW_SHIFT:0x0
reg_write(CDP_S_LUT_LE_SLOPE_SCALE_0, 0x0);
// CDP_S_LUT_LE_SLOPE_SCALE_0.LUT_LE_SLOPE_UFLOW_SCALE:0x0
// CDP_S_LUT_LE_SLOPE_SCALE_0.LUT_LE_SLOPE_OFLOW_SCALE:0x0
reg_write(CDP_S_LUT_INFO_0, 0x0);
// CDP_S_LUT_INFO_0.LUT_LE_INDEX_SELECT:0x0
// CDP_S_LUT_INFO_0.LUT_LE_INDEX_OFFSET:0x0
// CDP_S_LUT_INFO_0.LUT_LO_INDEX_SELECT:0x0
reg_write(CDP_S_LUT_LE_END_LOW_0, 0x40);
// CDP_S_LUT_LE_END_LOW_0.LUT_LE_END_LOW:0x40
reg_write(CDP_S_LUT_LO_SLOPE_SCALE_0, 0x0);
// CDP_S_LUT_LO_SLOPE_SCALE_0.LUT_LO_SLOPE_OFLOW_SCALE:0x0
// CDP_S_LUT_LO_SLOPE_SCALE_0.LUT_LO_SLOPE_UFLOW_SCALE:0x0
reg_write(CDP_S_LUT_LE_END_HIGH_0, 0x0);
// CDP_S_LUT_LE_END_HIGH_0.LUT_LE_END_HIGH:0x0
reg_write(CDP_S_LUT_LO_START_HIGH_0, 0x0);
// CDP_S_LUT_LO_START_HIGH_0.LUT_LO_START_HIGH:0x0
reg_write(CDP_S_LUT_LO_START_LOW_0, 0x0);
// CDP_S_LUT_LO_START_LOW_0.LUT_LO_START_LOW:0x0
reg_write(CDP_S_LUT_LO_SLOPE_SHIFT_0, 0x0);
// CDP_S_LUT_LO_SLOPE_SHIFT_0.LUT_LO_SLOPE_UFLOW_SHIFT:0x0
// CDP_S_LUT_LO_SLOPE_SHIFT_0.LUT_LO_SLOPE_OFLOW_SHIFT:0x0
//----------## Layer:CDP_0: LUT programming, end----------
//----------## Layer:CDP_0: configuraion, begin----------
reg_write(CDP_D_DATOUT_OFFSET_0, 0x80);
// CDP_D_DATOUT_OFFSET_0.DATOUT_OFFSET:0x80
reg_write(CDP_D_DST_SURFACE_STRIDE_0, 0x800);
// CDP_D_DST_SURFACE_STRIDE_0.DST_SURFACE_STRIDE:0x40
reg_write(CDP_RDMA_D_SRC_BASE_ADDR_LOW_0, 0x90000000);
// CDP_RDMA_D_SRC_BASE_ADDR_LOW_0.SRC_BASE_ADDR_LOW:0x4000000
reg_write(CDP_D_DST_DMA_CFG_0, 0x1);
// CDP_D_DST_DMA_CFG_0.DST_RAM_TYPE:MC : 0x1
reg_write(CDP_RDMA_D_DATA_CUBE_WIDTH_0, 0x7);
// CDP_RDMA_D_DATA_CUBE_WIDTH_0.WIDTH:0x7
reg_write(CDP_RDMA_D_DATA_FORMAT_0, 0x0);
// CDP_RDMA_D_DATA_FORMAT_0.INPUT_DATA:INT8 : 0x0
reg_write(CDP_D_DATIN_SCALE_0, 0x1);
// CDP_D_DATIN_SCALE_0.DATIN_SCALE:0x1
reg_write(CDP_D_DATOUT_SHIFTER_0, 0x0);
// CDP_D_DATOUT_SHIFTER_0.DATOUT_SHIFTER:0x0
reg_write(CDP_D_CYA_0, 0x0);
// CDP_D_CYA_0.CYA:0x0
reg_write(CDP_RDMA_D_PERF_ENABLE_0, 0x0);
// CDP_RDMA_D_PERF_ENABLE_0.DMA_EN:DISABLE : 0x0
reg_write(CDP_D_LRN_CFG_0, 0x0);
// CDP_D_LRN_CFG_0.NORMALZ_LEN:LEN3 : 0x0
reg_write(CDP_RDMA_D_DATA_CUBE_CHANNEL_0, 0x1f);
// CDP_RDMA_D_DATA_CUBE_CHANNEL_0.CHANNEL:0x1f
reg_write(CDP_D_DATA_FORMAT_0, 0x0);
// CDP_D_DATA_FORMAT_0.INPUT_DATA_TYPE:INT8 : 0x0
reg_write(CDP_D_DATIN_SHIFTER_0, 0x0);
// CDP_D_DATIN_SHIFTER_0.DATIN_SHIFTER:0x0
reg_write(CDP_D_PERF_ENABLE_0, 0x0);
// CDP_D_PERF_ENABLE_0.LUT_EN:DISABLE : 0x0
// CDP_D_PERF_ENABLE_0.DMA_EN:DISABLE : 0x0
reg_write(CDP_RDMA_D_SRC_BASE_ADDR_HIGH_0, 0x0);
// CDP_RDMA_D_SRC_BASE_ADDR_HIGH_0.SRC_BASE_ADDR_HIGH:0x0
reg_write(CDP_D_DST_BASE_ADDR_HIGH_0, 0x0);
// CDP_D_DST_BASE_ADDR_HIGH_0.DST_BASE_ADDR_HIGH:0x0
reg_write(CDP_RDMA_D_SRC_DMA_CFG_0, 0x1);
// CDP_RDMA_D_SRC_DMA_CFG_0.SRC_RAM_TYPE:MC : 0x1
reg_write(CDP_D_DATOUT_SCALE_0, 0x1);
// CDP_D_DATOUT_SCALE_0.DATOUT_SCALE:0x1
reg_write(CDP_D_DATIN_OFFSET_0, 0x80);
// CDP_D_DATIN_OFFSET_0.DATIN_OFFSET:0x80
reg_write(CDP_D_NAN_FLUSH_TO_ZERO_0, 0x0);
// CDP_D_NAN_FLUSH_TO_ZERO_0.NAN_TO_ZERO:DISABLE : 0x0
reg_write(CDP_D_FUNC_BYPASS_0, 0x3);
// CDP_D_FUNC_BYPASS_0.SQSUM_BYPASS:ENABLE : 0x1
// CDP_D_FUNC_BYPASS_0.MUL_BYPASS:ENABLE : 0x1
reg_write(CDP_D_DST_BASE_ADDR_LOW_0, 0x90080000);
// CDP_D_DST_BASE_ADDR_LOW_0.DST_BASE_ADDR_LOW:0x4004000
reg_write(CDP_RDMA_D_CYA_0, 0x0);
// CDP_RDMA_D_CYA_0.CYA:0x0
reg_write(CDP_RDMA_D_SRC_SURFACE_STRIDE_0, 0x800);
// CDP_RDMA_D_SRC_SURFACE_STRIDE_0.SRC_SURFACE_STRIDE:0x40
reg_write(CDP_D_DST_LINE_STRIDE_0, 0x100);
// CDP_D_DST_LINE_STRIDE_0.DST_LINE_STRIDE:0x8
reg_write(CDP_RDMA_D_SRC_LINE_STRIDE_0, 0x100);
// CDP_RDMA_D_SRC_LINE_STRIDE_0.SRC_LINE_STRIDE:0x8
reg_write(CDP_RDMA_D_DATA_CUBE_HEIGHT_0, 0x7);
// CDP_RDMA_D_DATA_CUBE_HEIGHT_0.HEIGHT:0x7
//----------## Layer:CDP_0: configuraion, end----------
//----------## Layer:CDP_0: operation enable, begin----------
//----------#### Layer:CDP_0: operation enable, block:NVDLA_CDP_RDMA, begin --
reg_write(CDP_RDMA_D_OP_ENABLE_0,0x1);
//----------#### Layer:CDP_0: operation enable, block:NVDLA_CDP_RDMA, end --
//----------#### Layer:CDP_0: operation enable, block:NVDLA_CDP, begin --
reg_write(CDP_D_OP_ENABLE_0,0x1);
//----------#### Layer:CDP_0: operation enable, block:NVDLA_CDP, end --
//----------## Layer:CDP_0: operation enable, end----------
register uint64_t cycle1 = rdcycle();
for (register int idx = 0; idx < 32767; idx++) {
if (reg_read(GLB_S_INTR_STATUS_0) != 0)
break;
}
uint64_t cycle2 = rdcycle();
printf("cycle1: %lu, cycle2: %lu, diff: %lu\n", cycle1, cycle2, cycle2 - cycle1 );
return 0;
}

6433
tests/nvdla.h Normal file

File diff suppressed because it is too large Load Diff