Check-in gemmini headers instead of submodule

This commit is contained in:
Hansung Kim
2025-01-29 17:08:32 -08:00
parent e86aac3a6f
commit 3de51577ef
19 changed files with 5234 additions and 5 deletions

3
.gitmodules vendored
View File

@@ -7,6 +7,3 @@
[submodule "third_party/ramulator"]
path = third_party/ramulator
url = https://github.com/CMU-SAFARI/ramulator.git
[submodule "third_party/gemmini-rocc-tests"]
path = third_party/gemmini-rocc-tests
url = https://github.com/ucb-bar/gemmini-rocc-tests

View File

@@ -0,0 +1,24 @@
// See LICENSE for license details.
#ifndef SRC_MAIN_C_ACCUMULATOR_H
#define SRC_MAIN_C_ACCUMULATOR_H
#include "rocc-software/src/xcustom.h"
#define k_DO_WRITE 0
#define k_DO_READ 1
#define k_DO_LOAD 2
#define k_DO_ACCUM 3
#define XCUSTOM_ACC 0
#define doWrite(y, rocc_rd, data) \
ROCC_INSTRUCTION(XCUSTOM_ACC, y, data, rocc_rd, k_DO_WRITE);
#define doRead(y, rocc_rd) \
ROCC_INSTRUCTION(XCUSTOM_ACC, y, 0, rocc_rd, k_DO_READ);
#define doLoad(y, rocc_rd, mem_addr) \
ROCC_INSTRUCTION(XCUSTOM_ACC, y, mem_addr, rocc_rd, k_DO_LOAD);
#define doAccum(y, rocc_rd, data) \
ROCC_INSTRUCTION(XCUSTOM_ACC, y, data, rocc_rd, k_DO_ACCUM);
#endif // SRC_MAIN_C_ACCUMULATOR_H

View File

@@ -0,0 +1,10 @@
// See LICENSE for license details.
#ifndef SRC_MAIN_C_CHARACTER_H
#define SRC_MAIN_C_CHARACTER_H
#include "rocc-software/src/xcustom.h"
#define XCUSTOM_CHAR 2
#endif // SRC_MAIN_C_CHARACTER_H

3611
gemmini/include/gemmini.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,79 @@
// See LICENSE for license details.
#ifndef COUNTER_H_
#define COUNTER_H_
#define DISABLE 0
#define INCREMENTAL_COUNTERS 44
// All existing Gemmini performance counters
#define MAIN_LD_CYCLES 1
#define MAIN_ST_CYCLES 2
#define MAIN_EX_CYCLES 3
#define MAIN_LD_ST_CYCLES 4
#define MAIN_LD_EX_CYCLES 5
#define MAIN_ST_EX_CYCLES 6
#define MAIN_LD_ST_EX_CYCLES 7
#define LOAD_DMA_WAIT_CYCLE 8
#define LOAD_ACTIVE_CYCLE 9
#define LOAD_SCRATCHPAD_WAIT_CYCLE 10
#define STORE_DMA_WAIT_CYCLE 11
#define STORE_ACTIVE_CYCLE 12
#define STORE_POOLING_CYCLE 13
#define STORE_SCRATCHPAD_WAIT_CYCLE 14
#define DMA_TLB_MISS_CYCLE 15
#define DMA_TLB_HIT_REQ 16
#define DMA_TLB_TOTAL_REQ 17
#define RDMA_ACTIVE_CYCLE 18
#define RDMA_TLB_WAIT_CYCLES 19
#define RDMA_TL_WAIT_CYCLES 20
#define WDMA_ACTIVE_CYCLE 21
#define WDMA_TLB_WAIT_CYCLES 22
#define WDMA_TL_WAIT_CYCLES 23
#define EXE_ACTIVE_CYCLE 24
#define EXE_FLUSH_CYCLE 25
#define EXE_CONTROL_Q_BLOCK_CYCLE 26
#define EXE_PRELOAD_HAZ_CYCLE 27
#define EXE_OVERLAP_HAZ_CYCLE 28
#define SCRATCHPAD_A_WAIT_CYCLE 29
#define SCRATCHPAD_B_WAIT_CYCLE 30
#define SCRATCHPAD_D_WAIT_CYCLE 31
#define ACC_A_WAIT_CYCLE 32
#define ACC_B_WAIT_CYCLE 33
#define ACC_D_WAIT_CYCLE 34
#define A_GARBAGE_CYCLES 35
#define B_GARBAGE_CYCLES 36
#define D_GARBAGE_CYCLES 37
#define IM2COL_MEM_CYCLES 38
#define IM2COL_ACTIVE_CYCLES 39
#define IM2COL_TRANSPOSER_WAIT_CYCLE 40
#define RESERVATION_STATION_FULL_CYCLES 41
#define RESERVATION_STATION_ACTIVE_CYCLES 42
#define LOOP_MATMUL_ACTIVE_CYCLES 43
#define TRANSPOSE_PRELOAD_UNROLLER_ACTIVE_CYCLES 44
#define RESERVATION_STATION_LD_COUNT (INCREMENTAL_COUNTERS + 1)
#define RESERVATION_STATION_ST_COUNT (INCREMENTAL_COUNTERS + 2)
#define RESERVATION_STATION_EX_COUNT (INCREMENTAL_COUNTERS + 3)
#define RDMA_BYTES_REC (INCREMENTAL_COUNTERS + 4)
#define WDMA_BYTES_SENT (INCREMENTAL_COUNTERS + 5)
#define RDMA_TOTAL_LATENCY (INCREMENTAL_COUNTERS + 6)
#define WDMA_TOTAL_LATENCY (INCREMENTAL_COUNTERS + 7)
#endif

View File

@@ -0,0 +1,576 @@
#ifndef GEMMINI_NN_H
#define GEMMINI_NN_H
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#ifndef BAREMETAL
#include <sys/mman.h>
#endif
#include "include/gemmini.h"
#include "include/gemmini_testutils.h"
struct ConvParams {
int batch_size;
int in_row_dim;
int in_col_dim;
int out_row_dim;
int out_col_dim;
int kernel_size;
int in_channels;
int out_channels;
int in_stride;
int weight_stride;
int out_stride;
int stride;
int padding;
bool bias;
bool depthwise;
int n_patches;
int patch_size;
acc_scale_t output_scale;
scale_t res_scale;
int pool_size, pool_stride, pool_padding, out_dim_pooled;
int I, J, K;
};
struct FcParams {
int batch_size;
int in_features;
int out_features;
acc_scale_t output_scale;
bool bias;
int I, J, K;
};
#define HIST_IMAGES(IMAGES) \
for (int num = -128; num <= 127; num++) { \
int count = 0; \
for (int i = 0; i < sizeof(IMAGES)/sizeof(IMAGES[0]); i++) { \
for (int j = 0; j < sizeof(IMAGES[0])/sizeof(IMAGES[0][0]); j++) { \
for (int k = 0; k < sizeof(IMAGES[0][0])/sizeof(IMAGES[0][0][0]); k++) { \
for (int l = 0; l < sizeof(IMAGES[0][0][0])/sizeof(IMAGES[0][0][0][0]); l++) { \
if (IMAGES[i][j][k][l] == num) { \
count++; \
} \
} \
} \
} \
} \
if (count > 0) \
printf("%d: %d times\n", num, count); \
}
#define HIST_MATRIX(MATRIX) \
for (int num = -128; num <= 127; num++) { \
int count = 0; \
for (int i = 0; i < sizeof(MATRIX)/sizeof(MATRIX[0]); i++) { \
for (int j = 0; j < sizeof(MATRIX[0])/sizeof(MATRIX[0][0]); j++) { \
if (MATRIX[i][j] == num) { \
count++; \
} \
} \
} \
if (count > 0) \
printf("%d: %d times\n", num, count); \
}
// This function runs a tiled matrix multiplication, with explicit tiling
// factors
static void tiled_matmul_nn(size_t dim_I, size_t dim_J, size_t dim_K,
const elem_t A[dim_I][dim_K], const elem_t B[dim_K][dim_J],
const void * D, elem_t C[dim_I][dim_J],
int act, acc_scale_t scale, bool repeating_bias,
size_t tile_I, size_t tile_J, size_t tile_K,
enum tiled_matmul_type_t tiled_matmul_type,
bool check, char * layer_name)
{
if (check)
printf("%s: gemmini\n", layer_name);
tiled_matmul(dim_I, dim_J, dim_K,
(elem_t*)A, (elem_t*)B, D, (elem_t*)C,
dim_K, dim_J, dim_J, dim_J,
MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY,
act, scale, 0, repeating_bias,
tile_I, tile_J, tile_K,
false, false,
false, false,
0,
tiled_matmul_type);
if (check) {
printf("%s: CPU\n", layer_name);
elem_t gold[dim_I][dim_J];
tiled_matmul_auto(dim_I, dim_J, dim_K,
(elem_t*)A, (elem_t*)B, D, (elem_t*)gold,
dim_K, dim_J, dim_J, dim_J,
MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY,
act, scale, 0, repeating_bias,
false, false,
false, false,
0,
CPU);
if (!MAT_IS_EQUAL(dim_I, dim_J, C, gold)) {
printf("Layer calculated incorrectly: %s\n", layer_name);
exit(1);
}
}
}
// This function runs a tiled matrix multiplication, with automatically
// calculated tiling factors
// With default auto-stride calc (A_stride = dim_K, B_stride/C_stride/D_stride = dim_J)
static void tiled_matmul_nn_auto(size_t dim_I, size_t dim_J, size_t dim_K,
const elem_t A[dim_I][dim_K], const elem_t B[dim_K][dim_J],
const void * D, elem_t C[dim_I][dim_J],
int act, acc_scale_t scale, bool repeating_bias,
enum tiled_matmul_type_t tiled_matmul_type,
bool check, char * layer_name)
{
if (check)
printf("%s: gemmini\n", layer_name);
tiled_matmul_auto(dim_I, dim_J, dim_K,
(elem_t*)A, (elem_t*)B, D, (elem_t*)C,
dim_K, dim_J, dim_J, dim_J,
MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY,
act, scale, 0, repeating_bias,
false, false,
false, false,
0,
tiled_matmul_type);
if (check) {
printf("%s: CPU\n", layer_name);
elem_t gold[dim_I][dim_J];
tiled_matmul_auto(dim_I, dim_J, dim_K,
(elem_t*)A, (elem_t*)B, D, (elem_t*)gold,
dim_K, dim_J, dim_J, dim_J,
MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY,
act, scale, 0, repeating_bias,
false, false,
false, false,
0,
CPU);
if (!MAT_IS_EQUAL(dim_I, dim_J, C, gold)) {
printf("Layer calculated incorrectly: %s\n", layer_name);
exit(1);
}
}
}
// need to specify stride
// auto tiling calc
static void tiled_matmul_nn_stride_auto(size_t dim_I, size_t dim_J, size_t dim_K,
const size_t A_stride, const size_t B_stride, const size_t C_stride,
const elem_t * A, const elem_t * B, const void * D, const elem_t * C,
int act, acc_scale_t scale, bool repeating_bias,
enum tiled_matmul_type_t tiled_matmul_type)
{
tiled_matmul_auto(dim_I, dim_J, dim_K,
(elem_t*)A, (elem_t*)B, D, (elem_t*)C,
A_stride, B_stride, C_stride, C_stride,
MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY,
act, scale, 0, repeating_bias,
false, false,
false, false,
0,
tiled_matmul_type);
}
static void conv_dw(size_t I, size_t J,
const size_t batch_size, const size_t channels,
const size_t in_row_dim, const size_t in_col_dim,
const size_t out_row_dim, const size_t out_col_dim,
const size_t kernel_size,
const elem_t input[batch_size][in_row_dim][in_col_dim][channels],
const elem_t weight[channels][kernel_size][kernel_size],
const acc_t * bias,
// elem_t output [batch_size][out_row_dim][out_col_dim][channels],
elem_t output [I][J],
const struct ConvParams * params)
{
for (int batch = 0; batch < batch_size; batch++) {
for (int channel = 0; channel < channels; channel++) {
for (int out_row = 0; out_row < out_row_dim; out_row++) {
for (int out_col = 0; out_col < out_col_dim; out_col++) {
int in_row = out_row * params->stride - params->padding;
acc_t result = 0;
if (params->bias) {
result = bias[channel];
}
for (int kernel_row = 0; kernel_row < params->kernel_size; kernel_row++) {
int in_col = out_col * params->stride - params->padding;
for (int kernel_col = 0; kernel_col < params->kernel_size; kernel_col++) {
if (in_row >= 0 && in_row < params->in_row_dim && in_col >= 0 && in_col < params->in_col_dim) {
result += input[batch][in_row][in_col][channel] * weight[channel][kernel_row][kernel_col];
}
in_col++;
}
in_row++;
}
if (result < 0) {
result = 0;
}
acc_t scaled = ACC_SCALE(result, params->output_scale);
if (scaled > elem_t_max) {
scaled = elem_t_max;
} else if (scaled < elem_t_min) {
scaled = elem_t_min;
}
size_t r = batch * params->out_row_dim * params->out_col_dim + out_row * params->out_col_dim + out_col;
output[r][channel] = scaled;
// output[batch][out_row][out_col][channel] = scaled;
}
}
}
}
}
static void conv_dw_with_col2im(size_t prev_I, size_t prev_J, size_t I, size_t J,
const size_t batch_size, const size_t channels,
const size_t out_row_dim, const size_t out_col_dim, const size_t kernel_size,
const elem_t input[prev_I][prev_J],
const elem_t weight[channels][kernel_size][kernel_size],
const acc_t * bias,
// elem_t output [batch_size][out_dim][out_dim][channels],
elem_t output [I][J],
const struct ConvParams * params)
{
for (int batch = 0; batch < batch_size; batch++) {
for (int channel = 0; channel < channels; channel++) {
for (int out_row = 0; out_row < out_row_dim; out_row++) {
for (int out_col = 0; out_col < out_col_dim; out_col++) {
int in_row = out_row * params->stride - params->padding;
acc_t result = 0;
if (params->bias) {
result = bias[channel];
}
for (int kernel_row = 0; kernel_row < params->kernel_size; kernel_row++) {
int in_col = out_col * params->stride - params->padding;
for (int kernel_col = 0; kernel_col < params->kernel_size; kernel_col++) {
if (in_row >= 0 && in_row < params->in_row_dim && in_col >= 0 && in_col < params->in_col_dim) {
// result += input[batch][in_row][in_col][channel] * weight[channel][kernel_row][kernel_col];
size_t r = batch * params->in_row_dim * params->in_col_dim + in_row * params->in_col_dim + in_col;
result += input[r][channel] * weight[channel][kernel_row][kernel_col];
}
in_col++;
}
in_row++;
}
if (result < 0) {
result = 0;
}
acc_t scaled = ACC_SCALE(result, params->output_scale);
if (scaled > elem_t_max) {
scaled = elem_t_max;
} else if (scaled < elem_t_min) {
scaled = elem_t_min;
}
size_t r = batch * params->out_row_dim * params->out_col_dim + out_row * params->out_col_dim + out_col;
output[r][channel] = scaled;
// output[batch][out_row][out_col][channel] = scaled;
}
}
}
}
}
static void im2col(size_t batch_size, size_t channels, size_t im_row_dim, size_t im_col_dim,
size_t I, size_t K,
const elem_t input[batch_size][im_row_dim][im_col_dim][channels],
elem_t output[I][K],
const struct ConvParams * params)
{
int patch_row = 0;
for (int n_batch = 0; n_batch < params->batch_size; n_batch++) {
for (int im_row = -params->padding; im_row < params->in_row_dim - params->kernel_size + params->padding + 1; im_row += params->stride) {
for (int im_col = -params->padding; im_col < params->in_col_dim - params->kernel_size + params->padding + 1; im_col += params->stride) {
int patch_col = 0;
for (int filter_row = 0; filter_row < params->kernel_size; filter_row++) {
for (int filter_col = 0; filter_col < params->kernel_size; filter_col++) {
for (int im_channel = 0; im_channel < params->in_channels; im_channel++) {
int pixel_row = im_row + filter_row;
int pixel_col = im_col + filter_col;
if (pixel_row < 0 || pixel_row >= params->in_row_dim
|| pixel_col < 0 || pixel_col >= params->in_col_dim) {
// output[patch_row][patch_col] = 0;
} else {
output[patch_row][patch_col] = input[n_batch][pixel_row][pixel_col][im_channel];
}
patch_col++;
}
}
}
patch_row++;
}
}
}
}
static void im2col_with_col2im(size_t prev_I, size_t prev_J,
size_t next_I, size_t next_K,
const elem_t input[prev_I][prev_J],
elem_t output[next_I][next_K],
const struct ConvParams * params)
{
int out_row = 0;
for (int n_batch = 0; n_batch < params->batch_size; n_batch++) {
for (int im_row = -params->padding; im_row < params->in_row_dim - params->kernel_size + params->padding + 1; im_row += params->stride) {
for (int im_col = -params->padding; im_col < params->in_col_dim - params->kernel_size + params->padding + 1; im_col += params->stride) {
int out_col = 0;
for (int filter_row = 0; filter_row < params->kernel_size; filter_row++) {
for (int filter_col = 0; filter_col < params->kernel_size; filter_col++) {
for (int im_channel = 0; im_channel < params->in_channels; im_channel++) {
int pixel_row = im_row + filter_row;
int pixel_col = im_col + filter_col;
if (pixel_row < 0 || pixel_row >= params->in_row_dim
|| pixel_col < 0 || pixel_col >= params->in_col_dim) {
// output[out_row][out_col] = 0;
} else {
int in_row = n_batch * params->in_row_dim * params->in_col_dim + pixel_row * params->in_col_dim + pixel_col;
int in_col = im_channel;
output[out_row][out_col] = input[in_row][in_col];
}
out_col++;
}
}
}
out_row++;
}
}
}
}
// Compute C = A + B with saturating add
void vecadd(size_t len, const elem_t * A, const elem_t * B, elem_t * C, scale_t A_shift) {
for (size_t i = 0; i < len; i++) {
acc_t result = MVIN_SCALE(A[i], A_shift) + B[i];
if (result > elem_t_max) {
result = elem_t_max;
} else if (result < elem_t_min) {
result = elem_t_min;
}
C[i] = result;
}
}
void resadd1(const size_t batch_size, const size_t channels, const size_t im_dim,
const elem_t A[batch_size][im_dim][im_dim][channels],
const elem_t B[batch_size][im_dim][im_dim][channels],
elem_t C[batch_size][im_dim][im_dim][channels],
bool relu,
const struct ConvParams * params) {
const int minimum = relu ? 0 : elem_t_min;
for (size_t batch = 0; batch < params->batch_size; batch++) {
for (size_t row = 0; row < params->out_dim_pooled; row++) {
for (size_t col = 0; col < params->out_dim_pooled; col++) {
for (size_t channel = 0; channel < params->out_channels; channel++) {
acc_t result = MVIN_SCALE(A[batch][row][col][channel], params->res_scale) + B[batch][row][col][channel];
if (result > elem_t_max) {
result = elem_t_max;
} else if (result < minimum) {
result = minimum;
}
C[batch][row][col][channel] = result;
}
}
}
}
}
void resadd2(const size_t I, const size_t J,
const size_t batch_size, const size_t channels, const size_t im_dim,
const elem_t A[I][J],
const elem_t B[batch_size][im_dim][im_dim][channels],
elem_t C[batch_size][im_dim][im_dim][channels],
bool relu,
const struct ConvParams * params) {
const int minimum = relu ? 0 : elem_t_min;
for (size_t batch = 0; batch < params->batch_size; batch++) {
for (size_t row = 0; row < params->out_dim_pooled; row++) {
for (size_t col = 0; col < params->out_dim_pooled; col++) {
for (size_t channel = 0; channel < params->out_channels; channel++) {
size_t r = batch * params->out_dim_pooled * params->out_dim_pooled + row * params->out_dim_pooled + col;
acc_t result = MVIN_SCALE(A[r][channel], params->res_scale) + B[batch][row][col][channel];
if (result > elem_t_max) {
result = elem_t_max;
} else if (result < minimum) {
result = minimum;
}
C[batch][row][col][channel] = result;
}
}
}
}
}
void resadd3(const size_t I, const size_t J,
const elem_t A[I][J],
const elem_t B[I][J],
elem_t C[I][J],
bool relu,
const struct ConvParams * params) {
const int minimum = relu ? 0 : elem_t_min;
for (size_t batch = 0; batch < params->batch_size; batch++) {
for (size_t row = 0; row < params->out_dim_pooled; row++) {
for (size_t col = 0; col < params->out_dim_pooled; col++) {
for (size_t channel = 0; channel < params->out_channels; channel++) {
size_t r = batch * params->out_dim_pooled * params->out_dim_pooled + row * params->out_dim_pooled + col;
acc_t result = MVIN_SCALE(A[r][channel], params->res_scale) + B[r][channel];
if (result > elem_t_max) {
result = elem_t_max;
} else if (result < minimum) {
result = minimum;
}
C[r][channel] = result;
}
}
}
}
}
// Pooling
void pool(size_t batch_size, size_t channels, size_t in_row_dim, size_t in_col_dim,
size_t out_row_dim, size_t out_col_dim,
elem_t input[batch_size][in_row_dim][in_col_dim][channels],
elem_t output[batch_size][out_row_dim][out_col_dim][channels],
const struct ConvParams * params)
{
size_t kernel_size = params->pool_size;
size_t stride = params->pool_stride;
// size_t in_dim = params->out_dim;
size_t padding = params->pool_padding;
for (int batch = 0; batch < batch_size; batch++) {
for (int channel = 0; channel < channels; channel++) {
for (int out_row = 0; out_row < out_row_dim; out_row++) {
for (int out_col = 0; out_col < out_col_dim; out_col++) {
int in_row = out_row * stride - padding;
elem_t result = elem_t_min;
for (int kernel_row = 0; kernel_row < kernel_size; kernel_row++) {
int in_col = out_col * stride - padding;
for (int kernel_col = 0; kernel_col < kernel_size; kernel_col++) {
if (in_row >= 0 && in_row < in_row_dim && in_col >= 0 && in_col < in_col_dim) {
if (input[batch][in_row][in_col][channel] > result) {
result = input[batch][in_row][in_col][channel];
}
} else if (0 > result) {
result = 0;
}
in_col++;
}
in_row++;
}
output[batch][out_row][out_col][channel] = result;
}
}
}
}
}
void pool_with_col2im(size_t I, size_t J,
size_t batch_size, size_t channels, size_t out_row_dim, size_t out_col_dim,
elem_t input[I][J],
elem_t output[batch_size][out_row_dim][out_col_dim][channels],
const struct ConvParams * params)
{
size_t kernel_size = params->pool_size;
size_t stride = params->pool_stride;
size_t in_row_dim = params->out_row_dim;
size_t in_col_dim = params->out_col_dim;
size_t padding = params->pool_padding;
for (int batch = 0; batch < batch_size; batch++) {
for (int channel = 0; channel < channels; channel++) {
for (int out_row = 0; out_row < out_row_dim; out_row++) {
for (int out_col = 0; out_col < out_col_dim; out_col++) {
int in_row = out_row * stride - padding;
elem_t result = elem_t_min;
for (int kernel_row = 0; kernel_row < kernel_size; kernel_row++) {
int in_col = out_col * stride - padding;
for (int kernel_col = 0; kernel_col < kernel_size; kernel_col++) {
if (in_row >= 0 && in_row < in_row_dim && in_col >= 0 && in_col < in_col_dim) {
if (input[batch * in_row_dim * in_col_dim + in_row * in_col_dim + in_col][channel] > result) {
result = input[batch * in_row_dim * in_col_dim + in_row * in_col_dim + in_col][channel];
}
} else if (0 > result) {
result = 0;
}
in_col++;
}
in_row++;
}
output[batch][out_row][out_col][channel] = result;
}
}
}
}
}
#endif // GEMMINI_NN_H

View File

@@ -0,0 +1,90 @@
#ifndef GEMMINI_PARAMS_H
#define GEMMINI_PARAMS_H
#include <stdint.h>
#include <limits.h>
#define XCUSTOM_ACC 3
#define DIM 16
#define ADDR_LEN 32
#define BANK_NUM 4
#define BANK_ROWS 1024
#define ACC_ROWS 1024
#define MAX_BYTES 64
#define MAX_BLOCK_LEN (MAX_BYTES/(DIM*2))
#define MAX_BLOCK_LEN_ACC (MAX_BYTES/(DIM*2))
typedef uint16_t elem_t;
#define ELEM_T_IS_LOWPREC_FLOAT
static const float elem_t_max = 65504.0;
static const float elem_t_min = -65504.0;
typedef uint16_t acc_t;
typedef double full_t;
#define ELEM_T_IS_FLOAT
#define ELEM_T_EXP_BITS 5
#define ELEM_T_SIG_BITS 11
#define ACC_T_EXP_BITS 5
#define ACC_T_SIG_BITS 11
typedef uint16_t elem_t_bits;
typedef uint16_t acc_t_bits;
#define HAS_MVIN_SCALE
typedef uint16_t scale_t;
typedef uint16_t scale_t_bits;
typedef int32_t scale_acc_t;
typedef uint32_t scale_acc_t_bits;
typedef uint16_t acc_scale_t;
typedef uint16_t acc_scale_t_bits;
#define row_align(blocks) __attribute__((aligned(blocks*DIM*sizeof(elem_t))))
#define row_align_acc(blocks) __attribute__((aligned(blocks*DIM*sizeof(acc_t))))
#define MVIN_SCALE_IDENTITY 0x3c00
#define ACC_SCALE_IDENTITY 1.0
#define ROUNDING_RIGHT_SHIFT(x, shift) \
((x) / (1 << (shift)))
#ifdef __cplusplus
#define SAME_TYPE(x) decltype(x)
#else
#define SAME_TYPE(x) typeof(x)
#endif
#define ROUND_NEAR_EVEN(x) \
({ const SAME_TYPE(x) x_ = (x); \
const long long i = x_; \
const long long next = x_ < 0 ? x_ - 1 : x_ + 1; \
SAME_TYPE(x) rem = x_ - i; \
rem = rem < 0 ? -rem : rem; \
SAME_TYPE(x) result = rem < 0.5 ? i : (rem > 0.5 ? next : ( \
i % 2 == 0 ? i : next)); \
result; })
// Rounding right shift equation: https://riscv.github.io/documents/riscv-v-spec/#_vector_fixed_point_rounding_mode_register_vxrm
#define ROUNDING_RIGHT_SHIFT_BITS(x, shift) \
((shift) > 0 ? (((x) >> (shift)) + \
(((shift) == 0 ? 0 : (((x) >> ((shift)-1)) & 1)) & \
((((shift) <= 1 ? 0 : ((x) & ((1 << ((shift)-1)) - 1))) != 0) | (((x) >> (shift)) & 1)))) : ((x) << (-(shift))))
#define ACC_SCALE(x, scale) \
((x))
#define MVIN_SCALE(x, scale) \
((x) * (scale))
#define MVIN_SCALE_ACC(x, scale) (x)
#define ACC_SCALE_T_IS_FLOAT
#define ACC_SCALE_EXP_BITS 5
#define ACC_SCALE_SIG_BITS 11
#define ACC_READ_SMALL_WIDTH
#define HAS_FIRST_LAYER_OPTIMIZATIONS
#endif // GEMMINI_PARAMS_H

View File

@@ -0,0 +1,92 @@
#ifndef GEMMINI_PARAMS_H
#define GEMMINI_PARAMS_H
#include <stdint.h>
#include <limits.h>
#define XCUSTOM_ACC 3
#define DIM 8
#define ADDR_LEN 32
#define BANK_NUM 8
#define BANK_ROWS 1024
#define ACC_ROWS 512
#define MAX_BYTES 64
#define MAX_BLOCK_LEN (MAX_BYTES/(DIM*4))
#define MAX_BLOCK_LEN_ACC (MAX_BYTES/(DIM*4))
typedef float elem_t;
static const elem_t elem_t_max = 3.4028235E38;
static const elem_t elem_t_min = -3.4028235E38;
typedef float acc_t;
typedef double full_t;
#define ELEM_T_IS_FLOAT
#define ELEM_T_EXP_BITS 8
#define ELEM_T_SIG_BITS 24
#define ACC_T_EXP_BITS 8
#define ACC_T_SIG_BITS 24
typedef uint32_t elem_t_bits;
typedef uint32_t acc_t_bits;
#define HAS_MVIN_SCALE
typedef float scale_t;
typedef uint32_t scale_t_bits;
#define HAS_MVIN_ACC_SCALE
typedef float scale_acc_t;
typedef uint32_t scale_acc_t_bits;
typedef float acc_scale_t;
typedef uint32_t acc_scale_t_bits;
#define row_align(blocks) __attribute__((aligned(blocks*DIM*sizeof(elem_t))))
#define row_align_acc(blocks) __attribute__((aligned(blocks*DIM*sizeof(acc_t))))
#define MVIN_SCALE_IDENTITY 1.0
#define ACC_SCALE_IDENTITY 1.0
#define ROUNDING_RIGHT_SHIFT(x, shift) \
((x) / (1 << (shift)))
#ifdef __cplusplus
#define SAME_TYPE(x) decltype(x)
#else
#define SAME_TYPE(x) typeof(x)
#endif
#define ROUND_NEAR_EVEN(x) \
({ const SAME_TYPE(x) x_ = (x); \
const long long i = x_; \
const long long next = x_ < 0 ? x_ - 1 : x_ + 1; \
SAME_TYPE(x) rem = x_ - i; \
rem = rem < 0 ? -rem : rem; \
SAME_TYPE(x) result = rem < 0.5 ? i : (rem > 0.5 ? next : ( \
i % 2 == 0 ? i : next)); \
result; })
// Rounding right shift equation: https://riscv.github.io/documents/riscv-v-spec/#_vector_fixed_point_rounding_mode_register_vxrm
#define ROUNDING_RIGHT_SHIFT_BITS(x, shift) \
((shift) > 0 ? (((x) >> (shift)) + \
(((shift) == 0 ? 0 : (((x) >> ((shift)-1)) & 1)) & \
((((shift) <= 1 ? 0 : ((x) & ((1 << ((shift)-1)) - 1))) != 0) | (((x) >> (shift)) & 1)))) : ((x) << (-(shift))))
#define ACC_SCALE(x, scale) \
((x) * (scale))
#define MVIN_SCALE(x, scale) \
((x) * (scale))
#define MVIN_SCALE_ACC(x, scale) \
((x) * (scale))
#define ACC_SCALE_T_IS_FLOAT
#define ACC_SCALE_EXP_BITS 8
#define ACC_SCALE_SIG_BITS 24
#define ACC_READ_SMALL_WIDTH
#define ACC_READ_FULL_WIDTH
#define HAS_FIRST_LAYER_OPTIMIZATIONS
#endif // GEMMINI_PARAMS_H

View File

@@ -0,0 +1 @@
gemmini_params.dim16fp16.h

View File

@@ -0,0 +1,285 @@
// See LICENSE for license details.
#ifndef SRC_MAIN_C_GEMMINI_TESTUTILS_H
#define SRC_MAIN_C_GEMMINI_TESTUTILS_H
#undef abs
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <limits.h>
#include <stdbool.h>
#include "include/gemmini_params.h"
#include "include/gemmini.h"
#ifdef BAREMETAL
#undef assert
#define assert(expr) \
if (!(expr)) { \
printf("Failed assertion: " #expr "\n " __FILE__ ":%u\n", __LINE__); \
exit(1); \
}
#endif
// #define GEMMINI_ASSERTIONS
// Matmul utility functions
static void matmul(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C_full[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C_full[r][c] += A[r][k]*B[k][c];
}
}
static void matmul_short(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], elem_t C[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C[r][c] += A[r][k]*B[k][c];
}
}
static void matmul_full(elem_t A[DIM][DIM], elem_t B[DIM][DIM], full_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
// Identical to the other matmul function, but with a 64-bit bias
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C_full[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C_full[r][c] += A[r][k]*B[k][c];
}
}
static void matmul_A_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C_full[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C_full[r][c] += A[k][r]*B[k][c];
}
}
static void matmul_short_A_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], elem_t C[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C[r][c] += A[k][r]*B[k][c];
}
}
static void matmul_full_A_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], full_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C_full[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C_full[r][c] += A[k][r]*B[k][c];
}
}
static void matmul_B_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C_full[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C_full[r][c] += A[r][k]*B[c][k];
}
}
static void matmul_short_B_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], elem_t C[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C[r][c] += A[r][k]*B[c][k];
}
}
static void matmul_full_B_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], full_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C_full[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C_full[r][c] += A[r][k]*B[c][k];
}
}
static void matmul_AB_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C_full[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C_full[r][c] += A[k][r]*B[c][k];
}
}
static void matmul_short_AB_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], elem_t C[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C[r][c] += A[k][r]*B[c][k];
}
}
static void matmul_full_AB_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], full_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
C_full[r][c] = D[r][c];
for (size_t k = 0; k < DIM; k++)
C_full[r][c] += A[k][r]*B[c][k];
}
}
static void matadd(full_t sum[DIM][DIM], full_t m1[DIM][DIM], full_t m2[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++)
sum[r][c] = m1[r][c] + m2[r][c];
}
// THIS IS A ROUNDING SHIFT! It also performs a saturating cast
static void matshift(full_t full[DIM][DIM], elem_t out[DIM][DIM], int shift) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
// Bitshift and round element
full_t shifted = ROUNDING_RIGHT_SHIFT(full[r][c], shift);
// Saturate and cast element
#ifndef ELEM_T_IS_FLOAT
full_t elem = shifted > elem_t_max ? elem_t_max : (shifted < elem_t_min ? elem_t_min : shifted);
out[r][c] = elem;
#else
out[r][c] = shifted; // TODO should we also saturate when using floats?
#endif
}
}
static void matscale(full_t full[DIM][DIM], elem_t out[DIM][DIM], acc_scale_t scale) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++) {
// Bitshift and round element
full_t scaled = ACC_SCALE(full[r][c], scale);
// Saturate and cast element
#ifndef ELEM_T_IS_FLOAT
full_t elem = scaled > elem_t_max ? elem_t_max : (scaled < elem_t_min ? elem_t_min : scaled);
out[r][c] = elem;
#else
out[r][c] = scaled; // TODO should we also saturate when using floats?
#endif
}
}
static void matrelu(elem_t in[DIM][DIM], elem_t out[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++)
out[r][c] = in[r][c] > 0 ? in[r][c] : 0;
}
static void transpose(elem_t in[DIM][DIM], elem_t out[DIM][DIM]) {
for (size_t r = 0; r < DIM; r++)
for (size_t c = 0; c < DIM; c++)
out[c][r] = in[r][c];
}
int rand() {
static uint32_t x = 777;
x = x * 1664525 + 1013904223;
return x >> 24;
}
#ifdef ELEM_T_IS_FLOAT
double rand_double() {
double a = (double)(rand() % 128) / (double)(1 + (rand() % 64));
double b = (double)(rand() % 128) / (double)(1 + (rand() % 64));
return a - b;
}
#endif
static void printMatrix(elem_t m[DIM][DIM]) {
for (size_t i = 0; i < DIM; ++i) {
for (size_t j = 0; j < DIM; ++j)
#ifndef ELEM_T_IS_FLOAT
printf("%d ", m[i][j]);
#else
printf("%x ", elem_t_to_elem_t_bits(m[i][j]));
#endif
printf("\n");
}
}
static void printMatrixAcc(acc_t m[DIM][DIM]) {
for (size_t i = 0; i < DIM; ++i) {
for (size_t j = 0; j < DIM; ++j)
#ifndef ELEM_T_IS_FLOAT
printf("%d ", m[i][j]);
#else
printf("%x ", acc_t_to_acc_t_bits(m[i][j]));
#endif
printf("\n");
}
}
static int is_equal(elem_t x[DIM][DIM], elem_t y[DIM][DIM]) {
for (size_t i = 0; i < DIM; ++i)
for (size_t j = 0; j < DIM; ++j) {
#ifndef ELEM_T_IS_FLOAT
if (x[i][j] != y[i][j])
#else
bool isnanx = elem_t_isnan(x[i][j]);
bool isnany = elem_t_isnan(y[i][j]);
if (x[i][j] != y[i][j] && !(isnanx && isnany))
#endif
return 0;
}
return 1;
}
static int is_equal_transposed(elem_t x[DIM][DIM], elem_t y[DIM][DIM]) {
for (size_t i = 0; i < DIM; ++i)
for (size_t j = 0; j < DIM; ++j) {
#ifndef ELEM_T_IS_FLOAT
if (x[i][j] != y[j][i])
#else
bool isnanx = elem_t_isnan(x[i][j]);
bool isnany = elem_t_isnan(y[j][i]);
if (x[i][j] != y[j][i] && !(isnanx && isnany))
#endif
return 0;
}
return 1;
}
// This is a GNU extension known as statment expressions
#define MAT_IS_EQUAL(dim_i, dim_j, x, y) \
({int result = 1; \
for (size_t i = 0; i < dim_i; i++) \
for (size_t j = 0; j < dim_j; ++j) { \
if (x[i][j] != y[i][j]) { \
result = 0; \
break; \
} \
} \
result;})
static uint64_t read_cycles() {
uint64_t cycles;
asm volatile ("rdcycle %0" : "=r" (cycles));
return cycles;
// const uint32_t * mtime = (uint32_t *)(33554432 + 0xbff8);
// const uint32_t * mtime = (uint32_t *)(33554432 + 0xbffc);
// return *mtime;
}
#undef abs
#endif // SRC_MAIN_C_GEMMINI_TESTUTILS_H

View File

@@ -0,0 +1,13 @@
// See LICENSE for license details.
#ifndef SRC_MAIN_C_TRANSLATOR_H
#define SRC_MAIN_C_TRANSLATOR_H
#include "rocc-software/src/xcustom.h"
#define XCUSTOM_TRANS 1
#define doTranslate(y, vaddr) \
ROCC_INSTRUCTION(XCUSTOM_TRANS, y, vaddr, 0, 0);
#endif // SRC_MAIN_C_TRANSLATOR_H

3
gemmini/rocc-software/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
*~
*#
*.#*

View File

@@ -0,0 +1,46 @@
All contributors must agree to the Developer Certificate of Origin Version 1.1. (DCO 1.1) by signing their commits with:
```
DCO 1.1 Signed-off-by: [NAME] <[EMAIL]>
```
The full text of the DCO 1.1 is as follows:
```
Developer Certificate of Origin
Version 1.1
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
660 York Street, Suite 102,
San Francisco, CA 94110 USA
Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.
Developer's Certificate of Origin 1.1
By making a contribution to this project, I certify that:
(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or
(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or
(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.
(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.
```

View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -0,0 +1,4 @@
Rocket Custom Coprocessor (RoCC) Software
========================================
This is a set of C and RISC-V Assembly macros that help with emitting custom RISC-V instructions for talking with Rocket Custom Coprocessors (RoCCs).

View File

@@ -0,0 +1,28 @@
// Copyright 2018 IBM
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ROCC_SOFTWARE_SRC_RISCV_TEST_ROCC_H_
#define ROCC_SOFTWARE_SRC_RISCV_TEST_ROCC_H_
#define RVTEST_XS_ENABLE \
li a0, MSTATUS_XS & (MSTATUS_XS >> 1); \
csrs mstatus, a0;
#define RVTEST_WITH_ROCC \
.macro init; \
RVTEST_XS_ENABLE \
.endm
#endif // ROCC_SOFTWARE_SRC_RISCV_TEST_ROCC_H_

View File

@@ -0,0 +1,170 @@
// Copyright 2018--2020 IBM
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ROCC_SOFTWARE_SRC_XCUSTOM_H_
#define ROCC_SOFTWARE_SRC_XCUSTOM_H_
#define STR1(x) #x
#ifndef STR
#define STR(x) STR1(x)
#endif
#define CAT_(A, B) A##B
#define CAT(A, B) CAT_(A, B)
/** Assembly macro for creating "raw" Rocket Custom Coproessor (RoCC)
* assembly language instructions that will return data in rd. These
* are to be used only in assembly language programs (not C/C++).
*
* Example:
*
* Consider the following macro consisting of a CUSTOM_0 instruction
* with func7 "42" that is doing some operation of "a0 = op(a1, a2)":
*
* ROCC_INSTRUCTION_RAW_R_R_R(0, a0, a1, a2, 42)
*
* This will produce the following pseudo assembly language
* instruction:
*
* .insn r CUSTOM_0, 7, 42, a0, a1, a2
*
* @param x the custom instruction number: 0, 1, 2, or 3
* @param rd the destination register, e.g., a0 or x10
* @param rs1 the first source register, e.g., a0 or x10
* @param rs2 the second source register, e.g., a0 or x10
* @param func7 the value of the func7 field
* @return a raw .insn RoCC instruction
*/
#define ROCC_INSTRUCTION_RAW_R_R_R(x, rd, rs1, rs2, func7) \
.insn r CAT(CUSTOM_, x), 7, func7, rd, rs1, rs2
/** Assembly macro for creating "raw" Rocket Custom Coproessor (RoCC)
* assembly language instructions that will *NOT* return data in rd.
* These are to be used only in assembly language programs (not
* C/C++).
*
* Example:
*
* Consider the following macro consisting of a CUSTOM_1 instruction
* with func7 "42" that is doing some operation of "op(a1, a2)". *NO*
* data is returned:
*
* ROCC_INSTRUCTION_RAW_R_R_R(1, a1, a2, 42)
*
* This will produce the following pseudo assembly language
* instruction:
*
* .insn r CUSTOM_1, 3, 42, x0, a1, a2
*
* @param x the custom instruction number: 0, 1, 2, or 3
* @param rs1 the first source register, e.g., a0 or x10
* @param rs2 the second source register, e.g., a0 or x10
* @param func7 the value of the func7 field
* @return a raw .insn RoCC instruction
*/
#define ROCC_INSTRUCTION_RAW_0_R_R(x, rs1, rs2, func7) \
.insn r CAT(CUSTOM_, x), 3, func7, x0, rs1, rs2
/** C/C++ inline assembly macro for creating Rocket Custom Coprocessor
* (RoCC) instructions that return data in rd. These are to be used
* only in C/C++ programs (not bare assembly).
*
* This is equivalent to ROCC_INSTRUCTION_R_R_R. See it's
* documentation.
*/
#define ROCC_INSTRUCTION(x, rd, rs1, rs2, func7) \
ROCC_INSTRUCTION_R_R_R(x, rd, rs1, rs2, func7)
/** C/C++ inline assembly macro for creating Rocket Custom Coprocessor
* (RoCC) instructions that return data in C variable rd.
* These are to be used only in C/C++ programs (not bare assembly).
*
* Example:
*
* Consider the following macro consisting of a CUSTOM_2 instruction
* with func7 "42" that is doing some operation of "a0 = op(a1, a2)"
* (where a0, a1, and a2 are variables defined in C):
*
* ROCC_INSTRUCTION(2, a0, a1, a2, 42)
*
* This will produce the following inline assembly:
*
* asm volatile(
* ".insn r CUSTOM_2, 0x7, 42, %0, %1, %2"
* : "=r"(rd)
* : "r"(rs1), "r"(rs2));
*
* @param x the custom instruction number: 0, 1, 2, or 3
* @param rd the C variable to capture as destination operand
* @param rs1 the C variable to capture for first source register
* @param rs2 the C variable to capture for second source register
* @param func7 the value of the func7 field
* @return an inline assembly RoCC instruction
*/
#define ROCC_INSTRUCTION_R_R_R(x, rd, rs1, rs2, func7) \
{ \
asm volatile( \
".insn r " STR(CAT(CUSTOM_, x)) ", " STR(0x7) ", " STR(func7) ", %0, %1, %2" \
: "=r"(rd) \
: "r"(rs1), "r"(rs2)); \
}
/** C/C++ inline assembly macro for creating Rocket Custom Coprocessor
* (RoCC) instructions that return data in C variable rd.
* These are to be used only in C/C++ programs (not bare assembly).
*
* Example:
*
* Consider the following macro consisting of a CUSTOM_3 instruction
* with func7 "42" that is doing some operation of "a0 = op(a1, a2)"
* (where a0, a1, and a2 are variables defined in C):
*
* ROCC_INSTRUCTION(3, a0, a1, a2, 42)
*
* This will produce the following inline assembly:
*
* asm volatile(
* ".insn r CUSTOM_3, 0x7, 42, %0, %1, %2"
* :: "r"(rs1), "r"(rs2));
*
* @param x the custom instruction number: 0, 1, 2, or 3
* @param rs1 the C variable to capture for first source register
* @param rs2 the C variable to capture for second source register
* @param funct7 the value of the funct7 f
* @return an inline assembly RoCC instruction
*/
#define ROCC_INSTRUCTION_0_R_R(x, rs1, rs2, func7) \
{ \
asm volatile( \
".insn r " STR(CAT(CUSTOM_, x)) ", " STR(0x3) ", " STR(func7) ", x0, %0, %1" \
: \
: "r"(rs1), "r"(rs2)); \
}
// [TODO] fix these to align with the above approach
// Macro to pass rs2_ as an immediate
/*
#define ROCC_INSTRUCTION_R_R_I(XCUSTOM_, rd_, rs1_, rs2_, funct_) \
asm volatile (XCUSTOM_" %[rd], %[rs1], %[rs2], %[funct]" \
: [rd] "=r" (rd_) \
: [rs1] "r" (rs1_), [rs2] "i" (rs2_), [funct] "i" (funct_))
// Macro to pass rs1_ and rs2_ as immediates
#define ROCC_INSTRUCTION_R_I_I(XCUSTOM_, rd_, rs1_, rs2_, funct_) \
asm volatile (XCUSTOM_" %[rd], %[rs1], %[rs2], %[funct]" \
: [rd] "=r" (rd_) \
: [rs1] "i" (rs1_), [rs2] "i" (rs2_), [funct] "i" (funct_))
*/
#endif // ROCC_SOFTWARE_SRC_XCUSTOM_H_

View File

@@ -22,7 +22,7 @@ RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
VORTEX_KN_PATH ?= $(realpath ../../../kernel)
GEMMINI_SW_PATH ?= $(realpath ../../../third_party/gemmini-rocc-tests)
GEMMINI_SW_PATH ?= $(realpath ../../../gemmini)
FPGA_BIN_DIR ?= $(VORTEX_RT_PATH)/opae