add new (Tilelink2) RoCC accelerator interface

Includes configuration, test programs, and documentation updates.
This commit is contained in:
Howard Mao
2017-06-22 16:43:14 -07:00
parent 634cad9e78
commit 9a9ebea207
7 changed files with 214 additions and 11 deletions

View File

@@ -331,9 +331,14 @@ the accelerator can use to distinguish different instructions from each other.
### Creating an accelerator
RoCC accelerators should extends the RoCC class.
RoCC accelerators are lazy modules that extend the LazyRoCC class.
Their implementation should extends the LazyRoCCModule class.
class CustomAccelerator(implicit p: Parameters) extends RoCC()(p) {
class CustomAccelerator(implicit p: Parameters) extends LazyRoCC {
override lazy val module = new CustomAcceleratorModule(this)
}
class CustomAcceleratorModule(outer: CustomAccelerator) extends LazyRoCCModule(outer) {
val cmd = Queue(io.cmd)
// The parts of the command are as follows
// inst - the parts of the instruction itself
@@ -350,13 +355,19 @@ RoCC accelerators should extends the RoCC class.
...
}
The other interfaces available to the accelerator are `mem`, which provides
access to the L1 cache, `ptw` which provides access to the page-table walker,
`autl` which provides shared access to the L2 alongside the ICache refill,
and `utl` which provides dedicated access to the L2.
The LazyRoCC class contains two TLOutputNode instances, `atlNode` and `tlNode`.
The former connects into a tile-local arbiter along with the backside of the
L1 instruction cache. The latter connects directly to the L1-L2 crossbar.
The corresponding Tilelink ports in the module implementation's IO bundle
are `atl` and `tl`, respectively.
Look at the examples in rocket-chip/src/main/scala/tile/LegacyRocc.scala for
detailed information on the different IOs
The other interfaces available to the accelerator are `mem`, which provides
access to the L1 cache; `ptw` which provides access to the page-table walker;
the `busy` signal, which indicates when the accelerator is still handling an
instruction; and the `interrupt` signal, which can be used to interrupt the CPU.
Look at the examples in rocket-chip/src/main/scala/tile/LazyRocc.scala for
detailed information on the different IOs.
### Adding RoCC accelerator to Config
@@ -373,7 +384,7 @@ route custom0 and custom1 instructions to it, we could do the following.
r.copy(rocc = Seq(
RoCCParams(
opcodes = OpcodeSet.custom0 | OpcodeSet.custom1,
generator = (p: Parameters) => Module(new CustomAccelerator()(p)))))
generator = (p: Parameters) => LazyModule(new CustomAccelerator()(p)))))
}
})

View File

@@ -2,6 +2,10 @@ package example
import config.{Parameters, Config}
import testchipip.WithSerialAdapter
import coreplex.WithRoccExample
class DefaultExampleConfig extends Config(
new WithSerialAdapter ++ new rocketchip.DefaultConfig)
class RoccExampleConfig extends Config(
new WithRoccExample ++ new DefaultExampleConfig)

View File

@@ -3,7 +3,7 @@ OBJDUMP=riscv64-unknown-elf-objdump
CFLAGS=-mcmodel=medany -std=gnu99 -O2 -fno-common -fno-builtin-printf
LDFLAGS=-static -nostdlib -nostartfiles -lgcc
PROGRAMS = pwm blkdev
PROGRAMS = pwm blkdev accum charcount
default: $(addsuffix .riscv,$(PROGRAMS))

47
tests/accum.c Normal file
View File

@@ -0,0 +1,47 @@
#include "rocc.h"
static inline void accum_write(int idx, unsigned long data)
{
ROCC_INSTRUCTION_SS(0, data, idx, 0);
}
static inline unsigned long accum_read(int idx)
{
unsigned long value;
ROCC_INSTRUCTION_DSS(0, value, 0, idx, 1);
return value;
}
static inline void accum_load(int idx, void *ptr)
{
asm volatile ("fence");
ROCC_INSTRUCTION_SS(0, (uintptr_t) ptr, idx, 2);
}
static inline void accum_add(int idx, unsigned long addend)
{
ROCC_INSTRUCTION_SS(0, addend, idx, 3);
}
unsigned long data = 0x3421L;
int main(void)
{
unsigned long result;
accum_load(0, &data);
accum_add(0, 2);
result = accum_read(0);
if (result != data + 2)
return 1;
accum_write(0, 3);
accum_add(0, 1);
result = accum_read(0);
if (result != 4)
return 2;
return 0;
}

19
tests/charcount.c Normal file
View File

@@ -0,0 +1,19 @@
#include "rocc.h"
char string[64] = "The quick brown fox jumped over the lazy dog";
static inline unsigned long count_chars(char *start, char needle)
{
unsigned long count;
asm volatile ("fence");
ROCC_INSTRUCTION_DSS(2, count, start, needle, 0);
return count;
}
int main(void)
{
unsigned long count = count_chars(string + 14, 'o');
if (count != 3)
return count + 1;
return 0;
}

122
tests/rocc.h Normal file
View File

@@ -0,0 +1,122 @@
// Copyright (c) 2016, Boston University (BU). All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the
// distribution.
// 3. Neither the name of BU nor the names of its contributors may be
// used to endorse or promote products derived from this software
// without specific prior written permission.
//
// IN NO EVENT SHALL BU BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
// SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
// ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
// BU HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// BU SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
// PARTICULAR PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF
// ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS". BU HAS NO OBLIGATION TO
// PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
#ifndef SRC_MAIN_C_ROCC_H
#define SRC_MAIN_C_ROCC_H
#include <stdint.h>
#define STR1(x) #x
#define STR(x) STR1(x)
#define EXTRACT(a, size, offset) (((~(~0 << size) << offset) & a) >> offset)
#define CUSTOMX_OPCODE(x) CUSTOM_ ## x
#define CUSTOM_0 0b0001011
#define CUSTOM_1 0b0101011
#define CUSTOM_2 0b1011011
#define CUSTOM_3 0b1111011
#define CUSTOMX(X, xd, xs1, xs2, rd, rs1, rs2, funct) \
CUSTOMX_OPCODE(X) | \
(rd << (7)) | \
(xs2 << (7+5)) | \
(xs1 << (7+5+1)) | \
(xd << (7+5+2)) | \
(rs1 << (7+5+3)) | \
(rs2 << (7+5+3+5)) | \
(EXTRACT(funct, 7, 0) << (7+5+3+5+5))
// Standard macro that passes rd, rs1, and rs2 via registers
#define ROCC_INSTRUCTION_DSS(X, rd, rs1, rs2, funct) \
ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, 10, 11, 12)
#define ROCC_INSTRUCTION_DS(X, rd, rs1, funct) \
ROCC_INSTRUCTION_R_R_I(X, rd, rs1, 0, funct, 10, 11)
#define ROCC_INSTRUCTION_D(X, rd, funct) \
ROCC_INSTRUCTION_R_I_I(X, rd, 0, 0, funct, 10)
#define ROCC_INSTRUCTION_SS(X, rs1, rs2, funct) \
ROCC_INSTRUCTION_I_R_R(X, 0, rs1, rs2, funct, 11, 12)
#define ROCC_INSTRUCTION_S(X, rs1, funct) \
ROCC_INSTRUCTION_I_R_I(X, 0, rs1, 0, funct, 11)
#define ROCC_INSTRUCTION(X, funct) \
ROCC_INSTRUCTION_I_I_I(X, 0, 0, 0, funct)
// rd, rs1, and rs2 are data
// rd_n, rs_1, and rs2_n are the register numbers to use
#define ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, rd_n, rs1_n, rs2_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 1, 1, rd_n, rs1_n, rs2_n, funct)) "\n\t" \
: "=r" (rd_) \
: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_R_R_I(X, rd, rs1, rs2, funct, rd_n, rs1_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 1, 0, rd_n, rs1_n, rs2, funct)) "\n\t" \
: "=r" (rd_) : [_rs1] "r" (rs1_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_R_I_I(X, rd, rs1, rs2, funct, rd_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 0, 0, rd_n, rs1, rs2, funct)) "\n\t" \
: "=r" (rd_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_I_R_R(X, rd, rs1, rs2, funct, rs1_n, rs2_n) { \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 1, 1, rd, rs1_n, rs2_n, funct)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define ROCC_INSTRUCTION_I_R_I(X, rd, rs1, rs2, funct, rs1_n) { \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 1, 0, rd, rs1_n, rs2, funct)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define ROCC_INSTRUCTION_I_I_I(X, rd, rs1, rs2, funct) { \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 0, 0, rd, rs1, rs2, funct)) "\n\t" ); \
}
#endif // SRC_MAIN_C_ACCUMULATOR_H