add new (Tilelink2) RoCC accelerator interface
Includes configuration, test programs, and documentation updates.
This commit is contained in:
29
README.md
29
README.md
@@ -331,9 +331,14 @@ the accelerator can use to distinguish different instructions from each other.
|
||||
|
||||
### Creating an accelerator
|
||||
|
||||
RoCC accelerators should extends the RoCC class.
|
||||
RoCC accelerators are lazy modules that extend the LazyRoCC class.
|
||||
Their implementation should extends the LazyRoCCModule class.
|
||||
|
||||
class CustomAccelerator(implicit p: Parameters) extends RoCC()(p) {
|
||||
class CustomAccelerator(implicit p: Parameters) extends LazyRoCC {
|
||||
override lazy val module = new CustomAcceleratorModule(this)
|
||||
}
|
||||
|
||||
class CustomAcceleratorModule(outer: CustomAccelerator) extends LazyRoCCModule(outer) {
|
||||
val cmd = Queue(io.cmd)
|
||||
// The parts of the command are as follows
|
||||
// inst - the parts of the instruction itself
|
||||
@@ -350,13 +355,19 @@ RoCC accelerators should extends the RoCC class.
|
||||
...
|
||||
}
|
||||
|
||||
The other interfaces available to the accelerator are `mem`, which provides
|
||||
access to the L1 cache, `ptw` which provides access to the page-table walker,
|
||||
`autl` which provides shared access to the L2 alongside the ICache refill,
|
||||
and `utl` which provides dedicated access to the L2.
|
||||
The LazyRoCC class contains two TLOutputNode instances, `atlNode` and `tlNode`.
|
||||
The former connects into a tile-local arbiter along with the backside of the
|
||||
L1 instruction cache. The latter connects directly to the L1-L2 crossbar.
|
||||
The corresponding Tilelink ports in the module implementation's IO bundle
|
||||
are `atl` and `tl`, respectively.
|
||||
|
||||
Look at the examples in rocket-chip/src/main/scala/tile/LegacyRocc.scala for
|
||||
detailed information on the different IOs
|
||||
The other interfaces available to the accelerator are `mem`, which provides
|
||||
access to the L1 cache; `ptw` which provides access to the page-table walker;
|
||||
the `busy` signal, which indicates when the accelerator is still handling an
|
||||
instruction; and the `interrupt` signal, which can be used to interrupt the CPU.
|
||||
|
||||
Look at the examples in rocket-chip/src/main/scala/tile/LazyRocc.scala for
|
||||
detailed information on the different IOs.
|
||||
|
||||
### Adding RoCC accelerator to Config
|
||||
|
||||
@@ -373,7 +384,7 @@ route custom0 and custom1 instructions to it, we could do the following.
|
||||
r.copy(rocc = Seq(
|
||||
RoCCParams(
|
||||
opcodes = OpcodeSet.custom0 | OpcodeSet.custom1,
|
||||
generator = (p: Parameters) => Module(new CustomAccelerator()(p)))))
|
||||
generator = (p: Parameters) => LazyModule(new CustomAccelerator()(p)))))
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
Submodule rocket-chip updated: 0fdaa28694...1f18a37f01
@@ -2,6 +2,10 @@ package example
|
||||
|
||||
import config.{Parameters, Config}
|
||||
import testchipip.WithSerialAdapter
|
||||
import coreplex.WithRoccExample
|
||||
|
||||
class DefaultExampleConfig extends Config(
|
||||
new WithSerialAdapter ++ new rocketchip.DefaultConfig)
|
||||
|
||||
class RoccExampleConfig extends Config(
|
||||
new WithRoccExample ++ new DefaultExampleConfig)
|
||||
|
||||
@@ -3,7 +3,7 @@ OBJDUMP=riscv64-unknown-elf-objdump
|
||||
CFLAGS=-mcmodel=medany -std=gnu99 -O2 -fno-common -fno-builtin-printf
|
||||
LDFLAGS=-static -nostdlib -nostartfiles -lgcc
|
||||
|
||||
PROGRAMS = pwm blkdev
|
||||
PROGRAMS = pwm blkdev accum charcount
|
||||
|
||||
default: $(addsuffix .riscv,$(PROGRAMS))
|
||||
|
||||
|
||||
47
tests/accum.c
Normal file
47
tests/accum.c
Normal file
@@ -0,0 +1,47 @@
|
||||
#include "rocc.h"
|
||||
|
||||
static inline void accum_write(int idx, unsigned long data)
|
||||
{
|
||||
ROCC_INSTRUCTION_SS(0, data, idx, 0);
|
||||
}
|
||||
|
||||
static inline unsigned long accum_read(int idx)
|
||||
{
|
||||
unsigned long value;
|
||||
ROCC_INSTRUCTION_DSS(0, value, 0, idx, 1);
|
||||
return value;
|
||||
}
|
||||
|
||||
static inline void accum_load(int idx, void *ptr)
|
||||
{
|
||||
asm volatile ("fence");
|
||||
ROCC_INSTRUCTION_SS(0, (uintptr_t) ptr, idx, 2);
|
||||
}
|
||||
|
||||
static inline void accum_add(int idx, unsigned long addend)
|
||||
{
|
||||
ROCC_INSTRUCTION_SS(0, addend, idx, 3);
|
||||
}
|
||||
|
||||
unsigned long data = 0x3421L;
|
||||
|
||||
int main(void)
|
||||
{
|
||||
unsigned long result;
|
||||
|
||||
accum_load(0, &data);
|
||||
accum_add(0, 2);
|
||||
result = accum_read(0);
|
||||
|
||||
if (result != data + 2)
|
||||
return 1;
|
||||
|
||||
accum_write(0, 3);
|
||||
accum_add(0, 1);
|
||||
result = accum_read(0);
|
||||
|
||||
if (result != 4)
|
||||
return 2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
19
tests/charcount.c
Normal file
19
tests/charcount.c
Normal file
@@ -0,0 +1,19 @@
|
||||
#include "rocc.h"
|
||||
|
||||
char string[64] = "The quick brown fox jumped over the lazy dog";
|
||||
|
||||
static inline unsigned long count_chars(char *start, char needle)
|
||||
{
|
||||
unsigned long count;
|
||||
asm volatile ("fence");
|
||||
ROCC_INSTRUCTION_DSS(2, count, start, needle, 0);
|
||||
return count;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
unsigned long count = count_chars(string + 14, 'o');
|
||||
if (count != 3)
|
||||
return count + 1;
|
||||
return 0;
|
||||
}
|
||||
122
tests/rocc.h
Normal file
122
tests/rocc.h
Normal file
@@ -0,0 +1,122 @@
|
||||
// Copyright (c) 2016, Boston University (BU). All Rights Reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// 3. Neither the name of BU nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software
|
||||
// without specific prior written permission.
|
||||
//
|
||||
// IN NO EVENT SHALL BU BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
|
||||
// SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
|
||||
// ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
|
||||
// BU HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// BU SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
// PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF
|
||||
// ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". BU HAS NO OBLIGATION TO
|
||||
// PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
|
||||
#ifndef SRC_MAIN_C_ROCC_H
|
||||
#define SRC_MAIN_C_ROCC_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define STR1(x) #x
|
||||
#define STR(x) STR1(x)
|
||||
#define EXTRACT(a, size, offset) (((~(~0 << size) << offset) & a) >> offset)
|
||||
|
||||
#define CUSTOMX_OPCODE(x) CUSTOM_ ## x
|
||||
#define CUSTOM_0 0b0001011
|
||||
#define CUSTOM_1 0b0101011
|
||||
#define CUSTOM_2 0b1011011
|
||||
#define CUSTOM_3 0b1111011
|
||||
|
||||
#define CUSTOMX(X, xd, xs1, xs2, rd, rs1, rs2, funct) \
|
||||
CUSTOMX_OPCODE(X) | \
|
||||
(rd << (7)) | \
|
||||
(xs2 << (7+5)) | \
|
||||
(xs1 << (7+5+1)) | \
|
||||
(xd << (7+5+2)) | \
|
||||
(rs1 << (7+5+3)) | \
|
||||
(rs2 << (7+5+3+5)) | \
|
||||
(EXTRACT(funct, 7, 0) << (7+5+3+5+5))
|
||||
|
||||
// Standard macro that passes rd, rs1, and rs2 via registers
|
||||
#define ROCC_INSTRUCTION_DSS(X, rd, rs1, rs2, funct) \
|
||||
ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, 10, 11, 12)
|
||||
|
||||
#define ROCC_INSTRUCTION_DS(X, rd, rs1, funct) \
|
||||
ROCC_INSTRUCTION_R_R_I(X, rd, rs1, 0, funct, 10, 11)
|
||||
|
||||
#define ROCC_INSTRUCTION_D(X, rd, funct) \
|
||||
ROCC_INSTRUCTION_R_I_I(X, rd, 0, 0, funct, 10)
|
||||
|
||||
#define ROCC_INSTRUCTION_SS(X, rs1, rs2, funct) \
|
||||
ROCC_INSTRUCTION_I_R_R(X, 0, rs1, rs2, funct, 11, 12)
|
||||
|
||||
#define ROCC_INSTRUCTION_S(X, rs1, funct) \
|
||||
ROCC_INSTRUCTION_I_R_I(X, 0, rs1, 0, funct, 11)
|
||||
|
||||
#define ROCC_INSTRUCTION(X, funct) \
|
||||
ROCC_INSTRUCTION_I_I_I(X, 0, 0, 0, funct)
|
||||
|
||||
// rd, rs1, and rs2 are data
|
||||
// rd_n, rs_1, and rs2_n are the register numbers to use
|
||||
#define ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, rd_n, rs1_n, rs2_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 1, 1, rd_n, rs1_n, rs2_n, funct)) "\n\t" \
|
||||
: "=r" (rd_) \
|
||||
: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_R_R_I(X, rd, rs1, rs2, funct, rd_n, rs1_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 1, 0, rd_n, rs1_n, rs2, funct)) "\n\t" \
|
||||
: "=r" (rd_) : [_rs1] "r" (rs1_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_R_I_I(X, rd, rs1, rs2, funct, rd_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 0, 0, rd_n, rs1, rs2, funct)) "\n\t" \
|
||||
: "=r" (rd_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_R_R(X, rd, rs1, rs2, funct, rs1_n, rs2_n) { \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 1, 1, rd, rs1_n, rs2_n, funct)) "\n\t" \
|
||||
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_R_I(X, rd, rs1, rs2, funct, rs1_n) { \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 1, 0, rd, rs1_n, rs2, funct)) "\n\t" \
|
||||
:: [_rs1] "r" (rs1_)); \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_I_I(X, rd, rs1, rs2, funct) { \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 0, 0, rd, rs1, rs2, funct)) "\n\t" ); \
|
||||
}
|
||||
|
||||
#endif // SRC_MAIN_C_ACCUMULATOR_H
|
||||
Reference in New Issue
Block a user