From 9a9ebea207f6539c651c2f2f4b344904036dcb0f Mon Sep 17 00:00:00 2001 From: Howard Mao Date: Thu, 22 Jun 2017 16:43:14 -0700 Subject: [PATCH] add new (Tilelink2) RoCC accelerator interface Includes configuration, test programs, and documentation updates. --- README.md | 29 +++++-- rocket-chip | 2 +- src/main/scala/example/Configs.scala | 4 + tests/Makefile | 2 +- tests/accum.c | 47 +++++++++++ tests/charcount.c | 19 +++++ tests/rocc.h | 122 +++++++++++++++++++++++++++ 7 files changed, 214 insertions(+), 11 deletions(-) create mode 100644 tests/accum.c create mode 100644 tests/charcount.c create mode 100644 tests/rocc.h diff --git a/README.md b/README.md index d6a5be1e..d3b7ee42 100644 --- a/README.md +++ b/README.md @@ -331,9 +331,14 @@ the accelerator can use to distinguish different instructions from each other. ### Creating an accelerator -RoCC accelerators should extends the RoCC class. +RoCC accelerators are lazy modules that extend the LazyRoCC class. +Their implementation should extends the LazyRoCCModule class. - class CustomAccelerator(implicit p: Parameters) extends RoCC()(p) { + class CustomAccelerator(implicit p: Parameters) extends LazyRoCC { + override lazy val module = new CustomAcceleratorModule(this) + } + + class CustomAcceleratorModule(outer: CustomAccelerator) extends LazyRoCCModule(outer) { val cmd = Queue(io.cmd) // The parts of the command are as follows // inst - the parts of the instruction itself @@ -350,13 +355,19 @@ RoCC accelerators should extends the RoCC class. ... } -The other interfaces available to the accelerator are `mem`, which provides -access to the L1 cache, `ptw` which provides access to the page-table walker, -`autl` which provides shared access to the L2 alongside the ICache refill, -and `utl` which provides dedicated access to the L2. +The LazyRoCC class contains two TLOutputNode instances, `atlNode` and `tlNode`. +The former connects into a tile-local arbiter along with the backside of the +L1 instruction cache. The latter connects directly to the L1-L2 crossbar. +The corresponding Tilelink ports in the module implementation's IO bundle +are `atl` and `tl`, respectively. -Look at the examples in rocket-chip/src/main/scala/tile/LegacyRocc.scala for -detailed information on the different IOs +The other interfaces available to the accelerator are `mem`, which provides +access to the L1 cache; `ptw` which provides access to the page-table walker; +the `busy` signal, which indicates when the accelerator is still handling an +instruction; and the `interrupt` signal, which can be used to interrupt the CPU. + +Look at the examples in rocket-chip/src/main/scala/tile/LazyRocc.scala for +detailed information on the different IOs. ### Adding RoCC accelerator to Config @@ -373,7 +384,7 @@ route custom0 and custom1 instructions to it, we could do the following. r.copy(rocc = Seq( RoCCParams( opcodes = OpcodeSet.custom0 | OpcodeSet.custom1, - generator = (p: Parameters) => Module(new CustomAccelerator()(p))))) + generator = (p: Parameters) => LazyModule(new CustomAccelerator()(p))))) } }) diff --git a/rocket-chip b/rocket-chip index 0fdaa286..1f18a37f 160000 --- a/rocket-chip +++ b/rocket-chip @@ -1 +1 @@ -Subproject commit 0fdaa286942a96220e19658a0a8a80f8ce8fee5f +Subproject commit 1f18a37f01f1034b501a7f4c2edaaffb292d7186 diff --git a/src/main/scala/example/Configs.scala b/src/main/scala/example/Configs.scala index d04cdeda..8a0ea8bc 100644 --- a/src/main/scala/example/Configs.scala +++ b/src/main/scala/example/Configs.scala @@ -2,6 +2,10 @@ package example import config.{Parameters, Config} import testchipip.WithSerialAdapter +import coreplex.WithRoccExample class DefaultExampleConfig extends Config( new WithSerialAdapter ++ new rocketchip.DefaultConfig) + +class RoccExampleConfig extends Config( + new WithRoccExample ++ new DefaultExampleConfig) diff --git a/tests/Makefile b/tests/Makefile index 02237f2c..91a0da0d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -3,7 +3,7 @@ OBJDUMP=riscv64-unknown-elf-objdump CFLAGS=-mcmodel=medany -std=gnu99 -O2 -fno-common -fno-builtin-printf LDFLAGS=-static -nostdlib -nostartfiles -lgcc -PROGRAMS = pwm blkdev +PROGRAMS = pwm blkdev accum charcount default: $(addsuffix .riscv,$(PROGRAMS)) diff --git a/tests/accum.c b/tests/accum.c new file mode 100644 index 00000000..b82b174c --- /dev/null +++ b/tests/accum.c @@ -0,0 +1,47 @@ +#include "rocc.h" + +static inline void accum_write(int idx, unsigned long data) +{ + ROCC_INSTRUCTION_SS(0, data, idx, 0); +} + +static inline unsigned long accum_read(int idx) +{ + unsigned long value; + ROCC_INSTRUCTION_DSS(0, value, 0, idx, 1); + return value; +} + +static inline void accum_load(int idx, void *ptr) +{ + asm volatile ("fence"); + ROCC_INSTRUCTION_SS(0, (uintptr_t) ptr, idx, 2); +} + +static inline void accum_add(int idx, unsigned long addend) +{ + ROCC_INSTRUCTION_SS(0, addend, idx, 3); +} + +unsigned long data = 0x3421L; + +int main(void) +{ + unsigned long result; + + accum_load(0, &data); + accum_add(0, 2); + result = accum_read(0); + + if (result != data + 2) + return 1; + + accum_write(0, 3); + accum_add(0, 1); + result = accum_read(0); + + if (result != 4) + return 2; + + return 0; +} diff --git a/tests/charcount.c b/tests/charcount.c new file mode 100644 index 00000000..f8b3641b --- /dev/null +++ b/tests/charcount.c @@ -0,0 +1,19 @@ +#include "rocc.h" + +char string[64] = "The quick brown fox jumped over the lazy dog"; + +static inline unsigned long count_chars(char *start, char needle) +{ + unsigned long count; + asm volatile ("fence"); + ROCC_INSTRUCTION_DSS(2, count, start, needle, 0); + return count; +} + +int main(void) +{ + unsigned long count = count_chars(string + 14, 'o'); + if (count != 3) + return count + 1; + return 0; +} diff --git a/tests/rocc.h b/tests/rocc.h new file mode 100644 index 00000000..c76a2827 --- /dev/null +++ b/tests/rocc.h @@ -0,0 +1,122 @@ +// Copyright (c) 2016, Boston University (BU). All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// 3. Neither the name of BU nor the names of its contributors may be +// used to endorse or promote products derived from this software +// without specific prior written permission. +// +// IN NO EVENT SHALL BU BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, +// SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, +// ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF +// BU HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// BU SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF +// ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". BU HAS NO OBLIGATION TO +// PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + +#ifndef SRC_MAIN_C_ROCC_H +#define SRC_MAIN_C_ROCC_H + +#include + +#define STR1(x) #x +#define STR(x) STR1(x) +#define EXTRACT(a, size, offset) (((~(~0 << size) << offset) & a) >> offset) + +#define CUSTOMX_OPCODE(x) CUSTOM_ ## x +#define CUSTOM_0 0b0001011 +#define CUSTOM_1 0b0101011 +#define CUSTOM_2 0b1011011 +#define CUSTOM_3 0b1111011 + +#define CUSTOMX(X, xd, xs1, xs2, rd, rs1, rs2, funct) \ + CUSTOMX_OPCODE(X) | \ + (rd << (7)) | \ + (xs2 << (7+5)) | \ + (xs1 << (7+5+1)) | \ + (xd << (7+5+2)) | \ + (rs1 << (7+5+3)) | \ + (rs2 << (7+5+3+5)) | \ + (EXTRACT(funct, 7, 0) << (7+5+3+5+5)) + +// Standard macro that passes rd, rs1, and rs2 via registers +#define ROCC_INSTRUCTION_DSS(X, rd, rs1, rs2, funct) \ + ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, 10, 11, 12) + +#define ROCC_INSTRUCTION_DS(X, rd, rs1, funct) \ + ROCC_INSTRUCTION_R_R_I(X, rd, rs1, 0, funct, 10, 11) + +#define ROCC_INSTRUCTION_D(X, rd, funct) \ + ROCC_INSTRUCTION_R_I_I(X, rd, 0, 0, funct, 10) + +#define ROCC_INSTRUCTION_SS(X, rs1, rs2, funct) \ + ROCC_INSTRUCTION_I_R_R(X, 0, rs1, rs2, funct, 11, 12) + +#define ROCC_INSTRUCTION_S(X, rs1, funct) \ + ROCC_INSTRUCTION_I_R_I(X, 0, rs1, 0, funct, 11) + +#define ROCC_INSTRUCTION(X, funct) \ + ROCC_INSTRUCTION_I_I_I(X, 0, 0, 0, funct) + +// rd, rs1, and rs2 are data +// rd_n, rs_1, and rs2_n are the register numbers to use +#define ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, rd_n, rs1_n, rs2_n) { \ + register uint64_t rd_ asm ("x" # rd_n); \ + register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \ + register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \ + asm volatile ( \ + ".word " STR(CUSTOMX(X, 1, 1, 1, rd_n, rs1_n, rs2_n, funct)) "\n\t" \ + : "=r" (rd_) \ + : [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \ + rd = rd_; \ + } + +#define ROCC_INSTRUCTION_R_R_I(X, rd, rs1, rs2, funct, rd_n, rs1_n) { \ + register uint64_t rd_ asm ("x" # rd_n); \ + register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \ + asm volatile ( \ + ".word " STR(CUSTOMX(X, 1, 1, 0, rd_n, rs1_n, rs2, funct)) "\n\t" \ + : "=r" (rd_) : [_rs1] "r" (rs1_)); \ + rd = rd_; \ + } + +#define ROCC_INSTRUCTION_R_I_I(X, rd, rs1, rs2, funct, rd_n) { \ + register uint64_t rd_ asm ("x" # rd_n); \ + asm volatile ( \ + ".word " STR(CUSTOMX(X, 1, 0, 0, rd_n, rs1, rs2, funct)) "\n\t" \ + : "=r" (rd_)); \ + rd = rd_; \ + } + +#define ROCC_INSTRUCTION_I_R_R(X, rd, rs1, rs2, funct, rs1_n, rs2_n) { \ + register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \ + register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \ + asm volatile ( \ + ".word " STR(CUSTOMX(X, 0, 1, 1, rd, rs1_n, rs2_n, funct)) "\n\t" \ + :: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \ + } + +#define ROCC_INSTRUCTION_I_R_I(X, rd, rs1, rs2, funct, rs1_n) { \ + register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \ + asm volatile ( \ + ".word " STR(CUSTOMX(X, 0, 1, 0, rd, rs1_n, rs2, funct)) "\n\t" \ + :: [_rs1] "r" (rs1_)); \ + } + +#define ROCC_INSTRUCTION_I_I_I(X, rd, rs1, rs2, funct) { \ + asm volatile ( \ + ".word " STR(CUSTOMX(X, 0, 0, 0, rd, rs1, rs2, funct)) "\n\t" ); \ + } + +#endif // SRC_MAIN_C_ACCUMULATOR_H