Check-in gemmini headers instead of submodule
This commit is contained in:
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -7,6 +7,3 @@
|
||||
[submodule "third_party/ramulator"]
|
||||
path = third_party/ramulator
|
||||
url = https://github.com/CMU-SAFARI/ramulator.git
|
||||
[submodule "third_party/gemmini-rocc-tests"]
|
||||
path = third_party/gemmini-rocc-tests
|
||||
url = https://github.com/ucb-bar/gemmini-rocc-tests
|
||||
|
||||
24
gemmini/include/accumulator.h
Normal file
24
gemmini/include/accumulator.h
Normal file
@@ -0,0 +1,24 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#ifndef SRC_MAIN_C_ACCUMULATOR_H
|
||||
#define SRC_MAIN_C_ACCUMULATOR_H
|
||||
|
||||
#include "rocc-software/src/xcustom.h"
|
||||
|
||||
#define k_DO_WRITE 0
|
||||
#define k_DO_READ 1
|
||||
#define k_DO_LOAD 2
|
||||
#define k_DO_ACCUM 3
|
||||
|
||||
#define XCUSTOM_ACC 0
|
||||
|
||||
#define doWrite(y, rocc_rd, data) \
|
||||
ROCC_INSTRUCTION(XCUSTOM_ACC, y, data, rocc_rd, k_DO_WRITE);
|
||||
#define doRead(y, rocc_rd) \
|
||||
ROCC_INSTRUCTION(XCUSTOM_ACC, y, 0, rocc_rd, k_DO_READ);
|
||||
#define doLoad(y, rocc_rd, mem_addr) \
|
||||
ROCC_INSTRUCTION(XCUSTOM_ACC, y, mem_addr, rocc_rd, k_DO_LOAD);
|
||||
#define doAccum(y, rocc_rd, data) \
|
||||
ROCC_INSTRUCTION(XCUSTOM_ACC, y, data, rocc_rd, k_DO_ACCUM);
|
||||
|
||||
#endif // SRC_MAIN_C_ACCUMULATOR_H
|
||||
10
gemmini/include/character.h
Normal file
10
gemmini/include/character.h
Normal file
@@ -0,0 +1,10 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#ifndef SRC_MAIN_C_CHARACTER_H
|
||||
#define SRC_MAIN_C_CHARACTER_H
|
||||
|
||||
#include "rocc-software/src/xcustom.h"
|
||||
|
||||
#define XCUSTOM_CHAR 2
|
||||
|
||||
#endif // SRC_MAIN_C_CHARACTER_H
|
||||
3611
gemmini/include/gemmini.h
Normal file
3611
gemmini/include/gemmini.h
Normal file
File diff suppressed because it is too large
Load Diff
79
gemmini/include/gemmini_counter.h
Normal file
79
gemmini/include/gemmini_counter.h
Normal file
@@ -0,0 +1,79 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#ifndef COUNTER_H_
|
||||
#define COUNTER_H_
|
||||
|
||||
#define DISABLE 0
|
||||
|
||||
#define INCREMENTAL_COUNTERS 44
|
||||
|
||||
// All existing Gemmini performance counters
|
||||
|
||||
#define MAIN_LD_CYCLES 1
|
||||
#define MAIN_ST_CYCLES 2
|
||||
#define MAIN_EX_CYCLES 3
|
||||
#define MAIN_LD_ST_CYCLES 4
|
||||
#define MAIN_LD_EX_CYCLES 5
|
||||
#define MAIN_ST_EX_CYCLES 6
|
||||
#define MAIN_LD_ST_EX_CYCLES 7
|
||||
|
||||
#define LOAD_DMA_WAIT_CYCLE 8
|
||||
#define LOAD_ACTIVE_CYCLE 9
|
||||
#define LOAD_SCRATCHPAD_WAIT_CYCLE 10
|
||||
|
||||
#define STORE_DMA_WAIT_CYCLE 11
|
||||
#define STORE_ACTIVE_CYCLE 12
|
||||
#define STORE_POOLING_CYCLE 13
|
||||
#define STORE_SCRATCHPAD_WAIT_CYCLE 14
|
||||
|
||||
#define DMA_TLB_MISS_CYCLE 15
|
||||
#define DMA_TLB_HIT_REQ 16
|
||||
#define DMA_TLB_TOTAL_REQ 17
|
||||
|
||||
#define RDMA_ACTIVE_CYCLE 18
|
||||
#define RDMA_TLB_WAIT_CYCLES 19
|
||||
#define RDMA_TL_WAIT_CYCLES 20
|
||||
|
||||
#define WDMA_ACTIVE_CYCLE 21
|
||||
#define WDMA_TLB_WAIT_CYCLES 22
|
||||
#define WDMA_TL_WAIT_CYCLES 23
|
||||
|
||||
#define EXE_ACTIVE_CYCLE 24
|
||||
#define EXE_FLUSH_CYCLE 25
|
||||
#define EXE_CONTROL_Q_BLOCK_CYCLE 26
|
||||
#define EXE_PRELOAD_HAZ_CYCLE 27
|
||||
#define EXE_OVERLAP_HAZ_CYCLE 28
|
||||
|
||||
#define SCRATCHPAD_A_WAIT_CYCLE 29
|
||||
#define SCRATCHPAD_B_WAIT_CYCLE 30
|
||||
#define SCRATCHPAD_D_WAIT_CYCLE 31
|
||||
|
||||
#define ACC_A_WAIT_CYCLE 32
|
||||
#define ACC_B_WAIT_CYCLE 33
|
||||
#define ACC_D_WAIT_CYCLE 34
|
||||
|
||||
#define A_GARBAGE_CYCLES 35
|
||||
#define B_GARBAGE_CYCLES 36
|
||||
#define D_GARBAGE_CYCLES 37
|
||||
|
||||
#define IM2COL_MEM_CYCLES 38
|
||||
#define IM2COL_ACTIVE_CYCLES 39
|
||||
#define IM2COL_TRANSPOSER_WAIT_CYCLE 40
|
||||
|
||||
#define RESERVATION_STATION_FULL_CYCLES 41
|
||||
#define RESERVATION_STATION_ACTIVE_CYCLES 42
|
||||
|
||||
#define LOOP_MATMUL_ACTIVE_CYCLES 43
|
||||
#define TRANSPOSE_PRELOAD_UNROLLER_ACTIVE_CYCLES 44
|
||||
|
||||
#define RESERVATION_STATION_LD_COUNT (INCREMENTAL_COUNTERS + 1)
|
||||
#define RESERVATION_STATION_ST_COUNT (INCREMENTAL_COUNTERS + 2)
|
||||
#define RESERVATION_STATION_EX_COUNT (INCREMENTAL_COUNTERS + 3)
|
||||
|
||||
#define RDMA_BYTES_REC (INCREMENTAL_COUNTERS + 4)
|
||||
#define WDMA_BYTES_SENT (INCREMENTAL_COUNTERS + 5)
|
||||
|
||||
#define RDMA_TOTAL_LATENCY (INCREMENTAL_COUNTERS + 6)
|
||||
#define WDMA_TOTAL_LATENCY (INCREMENTAL_COUNTERS + 7)
|
||||
|
||||
#endif
|
||||
576
gemmini/include/gemmini_nn.h
Normal file
576
gemmini/include/gemmini_nn.h
Normal file
@@ -0,0 +1,576 @@
|
||||
#ifndef GEMMINI_NN_H
|
||||
#define GEMMINI_NN_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#ifndef BAREMETAL
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
#include "include/gemmini.h"
|
||||
#include "include/gemmini_testutils.h"
|
||||
|
||||
struct ConvParams {
|
||||
int batch_size;
|
||||
int in_row_dim;
|
||||
int in_col_dim;
|
||||
int out_row_dim;
|
||||
int out_col_dim;
|
||||
int kernel_size;
|
||||
int in_channels;
|
||||
int out_channels;
|
||||
int in_stride;
|
||||
int weight_stride;
|
||||
int out_stride;
|
||||
int stride;
|
||||
int padding;
|
||||
bool bias;
|
||||
bool depthwise;
|
||||
int n_patches;
|
||||
int patch_size;
|
||||
acc_scale_t output_scale;
|
||||
scale_t res_scale;
|
||||
int pool_size, pool_stride, pool_padding, out_dim_pooled;
|
||||
|
||||
int I, J, K;
|
||||
};
|
||||
|
||||
struct FcParams {
|
||||
int batch_size;
|
||||
int in_features;
|
||||
int out_features;
|
||||
acc_scale_t output_scale;
|
||||
bool bias;
|
||||
|
||||
int I, J, K;
|
||||
};
|
||||
|
||||
#define HIST_IMAGES(IMAGES) \
|
||||
for (int num = -128; num <= 127; num++) { \
|
||||
int count = 0; \
|
||||
for (int i = 0; i < sizeof(IMAGES)/sizeof(IMAGES[0]); i++) { \
|
||||
for (int j = 0; j < sizeof(IMAGES[0])/sizeof(IMAGES[0][0]); j++) { \
|
||||
for (int k = 0; k < sizeof(IMAGES[0][0])/sizeof(IMAGES[0][0][0]); k++) { \
|
||||
for (int l = 0; l < sizeof(IMAGES[0][0][0])/sizeof(IMAGES[0][0][0][0]); l++) { \
|
||||
if (IMAGES[i][j][k][l] == num) { \
|
||||
count++; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (count > 0) \
|
||||
printf("%d: %d times\n", num, count); \
|
||||
}
|
||||
|
||||
#define HIST_MATRIX(MATRIX) \
|
||||
for (int num = -128; num <= 127; num++) { \
|
||||
int count = 0; \
|
||||
for (int i = 0; i < sizeof(MATRIX)/sizeof(MATRIX[0]); i++) { \
|
||||
for (int j = 0; j < sizeof(MATRIX[0])/sizeof(MATRIX[0][0]); j++) { \
|
||||
if (MATRIX[i][j] == num) { \
|
||||
count++; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (count > 0) \
|
||||
printf("%d: %d times\n", num, count); \
|
||||
}
|
||||
|
||||
// This function runs a tiled matrix multiplication, with explicit tiling
|
||||
// factors
|
||||
static void tiled_matmul_nn(size_t dim_I, size_t dim_J, size_t dim_K,
|
||||
const elem_t A[dim_I][dim_K], const elem_t B[dim_K][dim_J],
|
||||
const void * D, elem_t C[dim_I][dim_J],
|
||||
int act, acc_scale_t scale, bool repeating_bias,
|
||||
size_t tile_I, size_t tile_J, size_t tile_K,
|
||||
enum tiled_matmul_type_t tiled_matmul_type,
|
||||
bool check, char * layer_name)
|
||||
{
|
||||
if (check)
|
||||
printf("%s: gemmini\n", layer_name);
|
||||
|
||||
tiled_matmul(dim_I, dim_J, dim_K,
|
||||
(elem_t*)A, (elem_t*)B, D, (elem_t*)C,
|
||||
dim_K, dim_J, dim_J, dim_J,
|
||||
MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY,
|
||||
act, scale, 0, repeating_bias,
|
||||
tile_I, tile_J, tile_K,
|
||||
false, false,
|
||||
false, false,
|
||||
0,
|
||||
tiled_matmul_type);
|
||||
|
||||
if (check) {
|
||||
printf("%s: CPU\n", layer_name);
|
||||
elem_t gold[dim_I][dim_J];
|
||||
tiled_matmul_auto(dim_I, dim_J, dim_K,
|
||||
(elem_t*)A, (elem_t*)B, D, (elem_t*)gold,
|
||||
dim_K, dim_J, dim_J, dim_J,
|
||||
MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY,
|
||||
act, scale, 0, repeating_bias,
|
||||
false, false,
|
||||
false, false,
|
||||
0,
|
||||
CPU);
|
||||
|
||||
if (!MAT_IS_EQUAL(dim_I, dim_J, C, gold)) {
|
||||
printf("Layer calculated incorrectly: %s\n", layer_name);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This function runs a tiled matrix multiplication, with automatically
|
||||
// calculated tiling factors
|
||||
// With default auto-stride calc (A_stride = dim_K, B_stride/C_stride/D_stride = dim_J)
|
||||
static void tiled_matmul_nn_auto(size_t dim_I, size_t dim_J, size_t dim_K,
|
||||
const elem_t A[dim_I][dim_K], const elem_t B[dim_K][dim_J],
|
||||
const void * D, elem_t C[dim_I][dim_J],
|
||||
int act, acc_scale_t scale, bool repeating_bias,
|
||||
enum tiled_matmul_type_t tiled_matmul_type,
|
||||
bool check, char * layer_name)
|
||||
{
|
||||
if (check)
|
||||
printf("%s: gemmini\n", layer_name);
|
||||
|
||||
tiled_matmul_auto(dim_I, dim_J, dim_K,
|
||||
(elem_t*)A, (elem_t*)B, D, (elem_t*)C,
|
||||
dim_K, dim_J, dim_J, dim_J,
|
||||
MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY,
|
||||
act, scale, 0, repeating_bias,
|
||||
false, false,
|
||||
false, false,
|
||||
0,
|
||||
tiled_matmul_type);
|
||||
|
||||
if (check) {
|
||||
printf("%s: CPU\n", layer_name);
|
||||
elem_t gold[dim_I][dim_J];
|
||||
tiled_matmul_auto(dim_I, dim_J, dim_K,
|
||||
(elem_t*)A, (elem_t*)B, D, (elem_t*)gold,
|
||||
dim_K, dim_J, dim_J, dim_J,
|
||||
MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY,
|
||||
act, scale, 0, repeating_bias,
|
||||
false, false,
|
||||
false, false,
|
||||
0,
|
||||
CPU);
|
||||
|
||||
if (!MAT_IS_EQUAL(dim_I, dim_J, C, gold)) {
|
||||
printf("Layer calculated incorrectly: %s\n", layer_name);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// need to specify stride
|
||||
// auto tiling calc
|
||||
static void tiled_matmul_nn_stride_auto(size_t dim_I, size_t dim_J, size_t dim_K,
|
||||
const size_t A_stride, const size_t B_stride, const size_t C_stride,
|
||||
const elem_t * A, const elem_t * B, const void * D, const elem_t * C,
|
||||
int act, acc_scale_t scale, bool repeating_bias,
|
||||
enum tiled_matmul_type_t tiled_matmul_type)
|
||||
{
|
||||
|
||||
tiled_matmul_auto(dim_I, dim_J, dim_K,
|
||||
(elem_t*)A, (elem_t*)B, D, (elem_t*)C,
|
||||
A_stride, B_stride, C_stride, C_stride,
|
||||
MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY, MVIN_SCALE_IDENTITY,
|
||||
act, scale, 0, repeating_bias,
|
||||
false, false,
|
||||
false, false,
|
||||
0,
|
||||
tiled_matmul_type);
|
||||
}
|
||||
static void conv_dw(size_t I, size_t J,
|
||||
const size_t batch_size, const size_t channels,
|
||||
const size_t in_row_dim, const size_t in_col_dim,
|
||||
const size_t out_row_dim, const size_t out_col_dim,
|
||||
const size_t kernel_size,
|
||||
const elem_t input[batch_size][in_row_dim][in_col_dim][channels],
|
||||
const elem_t weight[channels][kernel_size][kernel_size],
|
||||
const acc_t * bias,
|
||||
// elem_t output [batch_size][out_row_dim][out_col_dim][channels],
|
||||
elem_t output [I][J],
|
||||
const struct ConvParams * params)
|
||||
{
|
||||
for (int batch = 0; batch < batch_size; batch++) {
|
||||
for (int channel = 0; channel < channels; channel++) {
|
||||
for (int out_row = 0; out_row < out_row_dim; out_row++) {
|
||||
for (int out_col = 0; out_col < out_col_dim; out_col++) {
|
||||
int in_row = out_row * params->stride - params->padding;
|
||||
|
||||
acc_t result = 0;
|
||||
if (params->bias) {
|
||||
result = bias[channel];
|
||||
}
|
||||
|
||||
for (int kernel_row = 0; kernel_row < params->kernel_size; kernel_row++) {
|
||||
int in_col = out_col * params->stride - params->padding;
|
||||
|
||||
for (int kernel_col = 0; kernel_col < params->kernel_size; kernel_col++) {
|
||||
if (in_row >= 0 && in_row < params->in_row_dim && in_col >= 0 && in_col < params->in_col_dim) {
|
||||
result += input[batch][in_row][in_col][channel] * weight[channel][kernel_row][kernel_col];
|
||||
}
|
||||
|
||||
in_col++;
|
||||
}
|
||||
|
||||
in_row++;
|
||||
}
|
||||
|
||||
if (result < 0) {
|
||||
result = 0;
|
||||
}
|
||||
|
||||
acc_t scaled = ACC_SCALE(result, params->output_scale);
|
||||
|
||||
if (scaled > elem_t_max) {
|
||||
scaled = elem_t_max;
|
||||
} else if (scaled < elem_t_min) {
|
||||
scaled = elem_t_min;
|
||||
}
|
||||
|
||||
size_t r = batch * params->out_row_dim * params->out_col_dim + out_row * params->out_col_dim + out_col;
|
||||
output[r][channel] = scaled;
|
||||
// output[batch][out_row][out_col][channel] = scaled;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void conv_dw_with_col2im(size_t prev_I, size_t prev_J, size_t I, size_t J,
|
||||
const size_t batch_size, const size_t channels,
|
||||
const size_t out_row_dim, const size_t out_col_dim, const size_t kernel_size,
|
||||
const elem_t input[prev_I][prev_J],
|
||||
const elem_t weight[channels][kernel_size][kernel_size],
|
||||
const acc_t * bias,
|
||||
// elem_t output [batch_size][out_dim][out_dim][channels],
|
||||
elem_t output [I][J],
|
||||
const struct ConvParams * params)
|
||||
{
|
||||
for (int batch = 0; batch < batch_size; batch++) {
|
||||
for (int channel = 0; channel < channels; channel++) {
|
||||
for (int out_row = 0; out_row < out_row_dim; out_row++) {
|
||||
for (int out_col = 0; out_col < out_col_dim; out_col++) {
|
||||
int in_row = out_row * params->stride - params->padding;
|
||||
|
||||
acc_t result = 0;
|
||||
if (params->bias) {
|
||||
result = bias[channel];
|
||||
}
|
||||
|
||||
for (int kernel_row = 0; kernel_row < params->kernel_size; kernel_row++) {
|
||||
int in_col = out_col * params->stride - params->padding;
|
||||
|
||||
for (int kernel_col = 0; kernel_col < params->kernel_size; kernel_col++) {
|
||||
if (in_row >= 0 && in_row < params->in_row_dim && in_col >= 0 && in_col < params->in_col_dim) {
|
||||
// result += input[batch][in_row][in_col][channel] * weight[channel][kernel_row][kernel_col];
|
||||
|
||||
size_t r = batch * params->in_row_dim * params->in_col_dim + in_row * params->in_col_dim + in_col;
|
||||
|
||||
result += input[r][channel] * weight[channel][kernel_row][kernel_col];
|
||||
}
|
||||
|
||||
in_col++;
|
||||
}
|
||||
|
||||
in_row++;
|
||||
}
|
||||
|
||||
if (result < 0) {
|
||||
result = 0;
|
||||
}
|
||||
|
||||
acc_t scaled = ACC_SCALE(result, params->output_scale);
|
||||
|
||||
if (scaled > elem_t_max) {
|
||||
scaled = elem_t_max;
|
||||
} else if (scaled < elem_t_min) {
|
||||
scaled = elem_t_min;
|
||||
}
|
||||
|
||||
size_t r = batch * params->out_row_dim * params->out_col_dim + out_row * params->out_col_dim + out_col;
|
||||
output[r][channel] = scaled;
|
||||
// output[batch][out_row][out_col][channel] = scaled;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void im2col(size_t batch_size, size_t channels, size_t im_row_dim, size_t im_col_dim,
|
||||
size_t I, size_t K,
|
||||
const elem_t input[batch_size][im_row_dim][im_col_dim][channels],
|
||||
elem_t output[I][K],
|
||||
const struct ConvParams * params)
|
||||
{
|
||||
int patch_row = 0;
|
||||
|
||||
for (int n_batch = 0; n_batch < params->batch_size; n_batch++) {
|
||||
for (int im_row = -params->padding; im_row < params->in_row_dim - params->kernel_size + params->padding + 1; im_row += params->stride) {
|
||||
for (int im_col = -params->padding; im_col < params->in_col_dim - params->kernel_size + params->padding + 1; im_col += params->stride) {
|
||||
int patch_col = 0;
|
||||
|
||||
for (int filter_row = 0; filter_row < params->kernel_size; filter_row++) {
|
||||
for (int filter_col = 0; filter_col < params->kernel_size; filter_col++) {
|
||||
for (int im_channel = 0; im_channel < params->in_channels; im_channel++) {
|
||||
int pixel_row = im_row + filter_row;
|
||||
int pixel_col = im_col + filter_col;
|
||||
|
||||
if (pixel_row < 0 || pixel_row >= params->in_row_dim
|
||||
|| pixel_col < 0 || pixel_col >= params->in_col_dim) {
|
||||
// output[patch_row][patch_col] = 0;
|
||||
} else {
|
||||
output[patch_row][patch_col] = input[n_batch][pixel_row][pixel_col][im_channel];
|
||||
}
|
||||
|
||||
patch_col++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
patch_row++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void im2col_with_col2im(size_t prev_I, size_t prev_J,
|
||||
size_t next_I, size_t next_K,
|
||||
const elem_t input[prev_I][prev_J],
|
||||
elem_t output[next_I][next_K],
|
||||
const struct ConvParams * params)
|
||||
{
|
||||
int out_row = 0;
|
||||
|
||||
for (int n_batch = 0; n_batch < params->batch_size; n_batch++) {
|
||||
for (int im_row = -params->padding; im_row < params->in_row_dim - params->kernel_size + params->padding + 1; im_row += params->stride) {
|
||||
for (int im_col = -params->padding; im_col < params->in_col_dim - params->kernel_size + params->padding + 1; im_col += params->stride) {
|
||||
int out_col = 0;
|
||||
|
||||
for (int filter_row = 0; filter_row < params->kernel_size; filter_row++) {
|
||||
for (int filter_col = 0; filter_col < params->kernel_size; filter_col++) {
|
||||
for (int im_channel = 0; im_channel < params->in_channels; im_channel++) {
|
||||
int pixel_row = im_row + filter_row;
|
||||
int pixel_col = im_col + filter_col;
|
||||
|
||||
if (pixel_row < 0 || pixel_row >= params->in_row_dim
|
||||
|| pixel_col < 0 || pixel_col >= params->in_col_dim) {
|
||||
// output[out_row][out_col] = 0;
|
||||
} else {
|
||||
int in_row = n_batch * params->in_row_dim * params->in_col_dim + pixel_row * params->in_col_dim + pixel_col;
|
||||
int in_col = im_channel;
|
||||
|
||||
output[out_row][out_col] = input[in_row][in_col];
|
||||
}
|
||||
|
||||
out_col++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out_row++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute C = A + B with saturating add
|
||||
void vecadd(size_t len, const elem_t * A, const elem_t * B, elem_t * C, scale_t A_shift) {
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
acc_t result = MVIN_SCALE(A[i], A_shift) + B[i];
|
||||
|
||||
if (result > elem_t_max) {
|
||||
result = elem_t_max;
|
||||
} else if (result < elem_t_min) {
|
||||
result = elem_t_min;
|
||||
}
|
||||
|
||||
C[i] = result;
|
||||
}
|
||||
}
|
||||
|
||||
void resadd1(const size_t batch_size, const size_t channels, const size_t im_dim,
|
||||
const elem_t A[batch_size][im_dim][im_dim][channels],
|
||||
const elem_t B[batch_size][im_dim][im_dim][channels],
|
||||
elem_t C[batch_size][im_dim][im_dim][channels],
|
||||
bool relu,
|
||||
const struct ConvParams * params) {
|
||||
|
||||
const int minimum = relu ? 0 : elem_t_min;
|
||||
|
||||
for (size_t batch = 0; batch < params->batch_size; batch++) {
|
||||
for (size_t row = 0; row < params->out_dim_pooled; row++) {
|
||||
for (size_t col = 0; col < params->out_dim_pooled; col++) {
|
||||
for (size_t channel = 0; channel < params->out_channels; channel++) {
|
||||
acc_t result = MVIN_SCALE(A[batch][row][col][channel], params->res_scale) + B[batch][row][col][channel];
|
||||
|
||||
if (result > elem_t_max) {
|
||||
result = elem_t_max;
|
||||
} else if (result < minimum) {
|
||||
result = minimum;
|
||||
}
|
||||
|
||||
C[batch][row][col][channel] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void resadd2(const size_t I, const size_t J,
|
||||
const size_t batch_size, const size_t channels, const size_t im_dim,
|
||||
const elem_t A[I][J],
|
||||
const elem_t B[batch_size][im_dim][im_dim][channels],
|
||||
elem_t C[batch_size][im_dim][im_dim][channels],
|
||||
bool relu,
|
||||
const struct ConvParams * params) {
|
||||
|
||||
const int minimum = relu ? 0 : elem_t_min;
|
||||
|
||||
for (size_t batch = 0; batch < params->batch_size; batch++) {
|
||||
for (size_t row = 0; row < params->out_dim_pooled; row++) {
|
||||
for (size_t col = 0; col < params->out_dim_pooled; col++) {
|
||||
for (size_t channel = 0; channel < params->out_channels; channel++) {
|
||||
size_t r = batch * params->out_dim_pooled * params->out_dim_pooled + row * params->out_dim_pooled + col;
|
||||
|
||||
acc_t result = MVIN_SCALE(A[r][channel], params->res_scale) + B[batch][row][col][channel];
|
||||
|
||||
if (result > elem_t_max) {
|
||||
result = elem_t_max;
|
||||
} else if (result < minimum) {
|
||||
result = minimum;
|
||||
}
|
||||
|
||||
C[batch][row][col][channel] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void resadd3(const size_t I, const size_t J,
|
||||
const elem_t A[I][J],
|
||||
const elem_t B[I][J],
|
||||
elem_t C[I][J],
|
||||
bool relu,
|
||||
const struct ConvParams * params) {
|
||||
|
||||
const int minimum = relu ? 0 : elem_t_min;
|
||||
|
||||
for (size_t batch = 0; batch < params->batch_size; batch++) {
|
||||
for (size_t row = 0; row < params->out_dim_pooled; row++) {
|
||||
for (size_t col = 0; col < params->out_dim_pooled; col++) {
|
||||
for (size_t channel = 0; channel < params->out_channels; channel++) {
|
||||
size_t r = batch * params->out_dim_pooled * params->out_dim_pooled + row * params->out_dim_pooled + col;
|
||||
|
||||
acc_t result = MVIN_SCALE(A[r][channel], params->res_scale) + B[r][channel];
|
||||
|
||||
if (result > elem_t_max) {
|
||||
result = elem_t_max;
|
||||
} else if (result < minimum) {
|
||||
result = minimum;
|
||||
}
|
||||
|
||||
C[r][channel] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pooling
|
||||
void pool(size_t batch_size, size_t channels, size_t in_row_dim, size_t in_col_dim,
|
||||
size_t out_row_dim, size_t out_col_dim,
|
||||
elem_t input[batch_size][in_row_dim][in_col_dim][channels],
|
||||
elem_t output[batch_size][out_row_dim][out_col_dim][channels],
|
||||
const struct ConvParams * params)
|
||||
{
|
||||
size_t kernel_size = params->pool_size;
|
||||
size_t stride = params->pool_stride;
|
||||
// size_t in_dim = params->out_dim;
|
||||
size_t padding = params->pool_padding;
|
||||
|
||||
for (int batch = 0; batch < batch_size; batch++) {
|
||||
for (int channel = 0; channel < channels; channel++) {
|
||||
for (int out_row = 0; out_row < out_row_dim; out_row++) {
|
||||
for (int out_col = 0; out_col < out_col_dim; out_col++) {
|
||||
int in_row = out_row * stride - padding;
|
||||
|
||||
elem_t result = elem_t_min;
|
||||
|
||||
for (int kernel_row = 0; kernel_row < kernel_size; kernel_row++) {
|
||||
int in_col = out_col * stride - padding;
|
||||
|
||||
for (int kernel_col = 0; kernel_col < kernel_size; kernel_col++) {
|
||||
if (in_row >= 0 && in_row < in_row_dim && in_col >= 0 && in_col < in_col_dim) {
|
||||
if (input[batch][in_row][in_col][channel] > result) {
|
||||
result = input[batch][in_row][in_col][channel];
|
||||
}
|
||||
} else if (0 > result) {
|
||||
result = 0;
|
||||
}
|
||||
|
||||
in_col++;
|
||||
}
|
||||
|
||||
in_row++;
|
||||
}
|
||||
|
||||
output[batch][out_row][out_col][channel] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pool_with_col2im(size_t I, size_t J,
|
||||
size_t batch_size, size_t channels, size_t out_row_dim, size_t out_col_dim,
|
||||
elem_t input[I][J],
|
||||
elem_t output[batch_size][out_row_dim][out_col_dim][channels],
|
||||
const struct ConvParams * params)
|
||||
{
|
||||
size_t kernel_size = params->pool_size;
|
||||
size_t stride = params->pool_stride;
|
||||
size_t in_row_dim = params->out_row_dim;
|
||||
size_t in_col_dim = params->out_col_dim;
|
||||
size_t padding = params->pool_padding;
|
||||
|
||||
for (int batch = 0; batch < batch_size; batch++) {
|
||||
for (int channel = 0; channel < channels; channel++) {
|
||||
for (int out_row = 0; out_row < out_row_dim; out_row++) {
|
||||
for (int out_col = 0; out_col < out_col_dim; out_col++) {
|
||||
int in_row = out_row * stride - padding;
|
||||
|
||||
elem_t result = elem_t_min;
|
||||
|
||||
for (int kernel_row = 0; kernel_row < kernel_size; kernel_row++) {
|
||||
int in_col = out_col * stride - padding;
|
||||
|
||||
for (int kernel_col = 0; kernel_col < kernel_size; kernel_col++) {
|
||||
if (in_row >= 0 && in_row < in_row_dim && in_col >= 0 && in_col < in_col_dim) {
|
||||
if (input[batch * in_row_dim * in_col_dim + in_row * in_col_dim + in_col][channel] > result) {
|
||||
result = input[batch * in_row_dim * in_col_dim + in_row * in_col_dim + in_col][channel];
|
||||
}
|
||||
} else if (0 > result) {
|
||||
result = 0;
|
||||
}
|
||||
|
||||
in_col++;
|
||||
}
|
||||
|
||||
in_row++;
|
||||
}
|
||||
|
||||
output[batch][out_row][out_col][channel] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // GEMMINI_NN_H
|
||||
|
||||
90
gemmini/include/gemmini_params.dim16fp16.h
Normal file
90
gemmini/include/gemmini_params.dim16fp16.h
Normal file
@@ -0,0 +1,90 @@
|
||||
#ifndef GEMMINI_PARAMS_H
|
||||
#define GEMMINI_PARAMS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define XCUSTOM_ACC 3
|
||||
#define DIM 16
|
||||
#define ADDR_LEN 32
|
||||
#define BANK_NUM 4
|
||||
#define BANK_ROWS 1024
|
||||
#define ACC_ROWS 1024
|
||||
#define MAX_BYTES 64
|
||||
#define MAX_BLOCK_LEN (MAX_BYTES/(DIM*2))
|
||||
#define MAX_BLOCK_LEN_ACC (MAX_BYTES/(DIM*2))
|
||||
|
||||
typedef uint16_t elem_t;
|
||||
#define ELEM_T_IS_LOWPREC_FLOAT
|
||||
static const float elem_t_max = 65504.0;
|
||||
static const float elem_t_min = -65504.0;
|
||||
typedef uint16_t acc_t;
|
||||
typedef double full_t;
|
||||
|
||||
#define ELEM_T_IS_FLOAT
|
||||
#define ELEM_T_EXP_BITS 5
|
||||
#define ELEM_T_SIG_BITS 11
|
||||
#define ACC_T_EXP_BITS 5
|
||||
#define ACC_T_SIG_BITS 11
|
||||
typedef uint16_t elem_t_bits;
|
||||
typedef uint16_t acc_t_bits;
|
||||
|
||||
#define HAS_MVIN_SCALE
|
||||
typedef uint16_t scale_t;
|
||||
typedef uint16_t scale_t_bits;
|
||||
|
||||
typedef int32_t scale_acc_t;
|
||||
typedef uint32_t scale_acc_t_bits;
|
||||
|
||||
typedef uint16_t acc_scale_t;
|
||||
typedef uint16_t acc_scale_t_bits;
|
||||
|
||||
#define row_align(blocks) __attribute__((aligned(blocks*DIM*sizeof(elem_t))))
|
||||
#define row_align_acc(blocks) __attribute__((aligned(blocks*DIM*sizeof(acc_t))))
|
||||
|
||||
#define MVIN_SCALE_IDENTITY 0x3c00
|
||||
|
||||
#define ACC_SCALE_IDENTITY 1.0
|
||||
|
||||
#define ROUNDING_RIGHT_SHIFT(x, shift) \
|
||||
((x) / (1 << (shift)))
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define SAME_TYPE(x) decltype(x)
|
||||
#else
|
||||
#define SAME_TYPE(x) typeof(x)
|
||||
#endif
|
||||
|
||||
#define ROUND_NEAR_EVEN(x) \
|
||||
({ const SAME_TYPE(x) x_ = (x); \
|
||||
const long long i = x_; \
|
||||
const long long next = x_ < 0 ? x_ - 1 : x_ + 1; \
|
||||
SAME_TYPE(x) rem = x_ - i; \
|
||||
rem = rem < 0 ? -rem : rem; \
|
||||
SAME_TYPE(x) result = rem < 0.5 ? i : (rem > 0.5 ? next : ( \
|
||||
i % 2 == 0 ? i : next)); \
|
||||
result; })
|
||||
|
||||
// Rounding right shift equation: https://riscv.github.io/documents/riscv-v-spec/#_vector_fixed_point_rounding_mode_register_vxrm
|
||||
#define ROUNDING_RIGHT_SHIFT_BITS(x, shift) \
|
||||
((shift) > 0 ? (((x) >> (shift)) + \
|
||||
(((shift) == 0 ? 0 : (((x) >> ((shift)-1)) & 1)) & \
|
||||
((((shift) <= 1 ? 0 : ((x) & ((1 << ((shift)-1)) - 1))) != 0) | (((x) >> (shift)) & 1)))) : ((x) << (-(shift))))
|
||||
|
||||
#define ACC_SCALE(x, scale) \
|
||||
((x))
|
||||
|
||||
#define MVIN_SCALE(x, scale) \
|
||||
((x) * (scale))
|
||||
|
||||
#define MVIN_SCALE_ACC(x, scale) (x)
|
||||
|
||||
#define ACC_SCALE_T_IS_FLOAT
|
||||
#define ACC_SCALE_EXP_BITS 5
|
||||
#define ACC_SCALE_SIG_BITS 11
|
||||
|
||||
#define ACC_READ_SMALL_WIDTH
|
||||
|
||||
#define HAS_FIRST_LAYER_OPTIMIZATIONS
|
||||
|
||||
#endif // GEMMINI_PARAMS_H
|
||||
92
gemmini/include/gemmini_params.dim8fp32.h
Normal file
92
gemmini/include/gemmini_params.dim8fp32.h
Normal file
@@ -0,0 +1,92 @@
|
||||
#ifndef GEMMINI_PARAMS_H
|
||||
#define GEMMINI_PARAMS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define XCUSTOM_ACC 3
|
||||
#define DIM 8
|
||||
#define ADDR_LEN 32
|
||||
#define BANK_NUM 8
|
||||
#define BANK_ROWS 1024
|
||||
#define ACC_ROWS 512
|
||||
#define MAX_BYTES 64
|
||||
#define MAX_BLOCK_LEN (MAX_BYTES/(DIM*4))
|
||||
#define MAX_BLOCK_LEN_ACC (MAX_BYTES/(DIM*4))
|
||||
|
||||
typedef float elem_t;
|
||||
static const elem_t elem_t_max = 3.4028235E38;
|
||||
static const elem_t elem_t_min = -3.4028235E38;
|
||||
typedef float acc_t;
|
||||
typedef double full_t;
|
||||
|
||||
#define ELEM_T_IS_FLOAT
|
||||
#define ELEM_T_EXP_BITS 8
|
||||
#define ELEM_T_SIG_BITS 24
|
||||
#define ACC_T_EXP_BITS 8
|
||||
#define ACC_T_SIG_BITS 24
|
||||
typedef uint32_t elem_t_bits;
|
||||
typedef uint32_t acc_t_bits;
|
||||
|
||||
#define HAS_MVIN_SCALE
|
||||
typedef float scale_t;
|
||||
typedef uint32_t scale_t_bits;
|
||||
|
||||
#define HAS_MVIN_ACC_SCALE
|
||||
typedef float scale_acc_t;
|
||||
typedef uint32_t scale_acc_t_bits;
|
||||
|
||||
typedef float acc_scale_t;
|
||||
typedef uint32_t acc_scale_t_bits;
|
||||
|
||||
#define row_align(blocks) __attribute__((aligned(blocks*DIM*sizeof(elem_t))))
|
||||
#define row_align_acc(blocks) __attribute__((aligned(blocks*DIM*sizeof(acc_t))))
|
||||
|
||||
#define MVIN_SCALE_IDENTITY 1.0
|
||||
|
||||
#define ACC_SCALE_IDENTITY 1.0
|
||||
|
||||
#define ROUNDING_RIGHT_SHIFT(x, shift) \
|
||||
((x) / (1 << (shift)))
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define SAME_TYPE(x) decltype(x)
|
||||
#else
|
||||
#define SAME_TYPE(x) typeof(x)
|
||||
#endif
|
||||
|
||||
#define ROUND_NEAR_EVEN(x) \
|
||||
({ const SAME_TYPE(x) x_ = (x); \
|
||||
const long long i = x_; \
|
||||
const long long next = x_ < 0 ? x_ - 1 : x_ + 1; \
|
||||
SAME_TYPE(x) rem = x_ - i; \
|
||||
rem = rem < 0 ? -rem : rem; \
|
||||
SAME_TYPE(x) result = rem < 0.5 ? i : (rem > 0.5 ? next : ( \
|
||||
i % 2 == 0 ? i : next)); \
|
||||
result; })
|
||||
|
||||
// Rounding right shift equation: https://riscv.github.io/documents/riscv-v-spec/#_vector_fixed_point_rounding_mode_register_vxrm
|
||||
#define ROUNDING_RIGHT_SHIFT_BITS(x, shift) \
|
||||
((shift) > 0 ? (((x) >> (shift)) + \
|
||||
(((shift) == 0 ? 0 : (((x) >> ((shift)-1)) & 1)) & \
|
||||
((((shift) <= 1 ? 0 : ((x) & ((1 << ((shift)-1)) - 1))) != 0) | (((x) >> (shift)) & 1)))) : ((x) << (-(shift))))
|
||||
|
||||
#define ACC_SCALE(x, scale) \
|
||||
((x) * (scale))
|
||||
|
||||
#define MVIN_SCALE(x, scale) \
|
||||
((x) * (scale))
|
||||
|
||||
#define MVIN_SCALE_ACC(x, scale) \
|
||||
((x) * (scale))
|
||||
|
||||
#define ACC_SCALE_T_IS_FLOAT
|
||||
#define ACC_SCALE_EXP_BITS 8
|
||||
#define ACC_SCALE_SIG_BITS 24
|
||||
|
||||
#define ACC_READ_SMALL_WIDTH
|
||||
#define ACC_READ_FULL_WIDTH
|
||||
|
||||
#define HAS_FIRST_LAYER_OPTIMIZATIONS
|
||||
|
||||
#endif // GEMMINI_PARAMS_H
|
||||
1
gemmini/include/gemmini_params.h
Symbolic link
1
gemmini/include/gemmini_params.h
Symbolic link
@@ -0,0 +1 @@
|
||||
gemmini_params.dim16fp16.h
|
||||
285
gemmini/include/gemmini_testutils.h
Normal file
285
gemmini/include/gemmini_testutils.h
Normal file
@@ -0,0 +1,285 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#ifndef SRC_MAIN_C_GEMMINI_TESTUTILS_H
|
||||
#define SRC_MAIN_C_GEMMINI_TESTUTILS_H
|
||||
|
||||
#undef abs
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "include/gemmini_params.h"
|
||||
#include "include/gemmini.h"
|
||||
|
||||
#ifdef BAREMETAL
|
||||
#undef assert
|
||||
#define assert(expr) \
|
||||
if (!(expr)) { \
|
||||
printf("Failed assertion: " #expr "\n " __FILE__ ":%u\n", __LINE__); \
|
||||
exit(1); \
|
||||
}
|
||||
#endif
|
||||
|
||||
// #define GEMMINI_ASSERTIONS
|
||||
|
||||
// Matmul utility functions
|
||||
static void matmul(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C_full[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C_full[r][c] += A[r][k]*B[k][c];
|
||||
}
|
||||
}
|
||||
|
||||
static void matmul_short(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], elem_t C[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C[r][c] += A[r][k]*B[k][c];
|
||||
}
|
||||
}
|
||||
|
||||
static void matmul_full(elem_t A[DIM][DIM], elem_t B[DIM][DIM], full_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
|
||||
// Identical to the other matmul function, but with a 64-bit bias
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C_full[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C_full[r][c] += A[r][k]*B[k][c];
|
||||
}
|
||||
}
|
||||
|
||||
static void matmul_A_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C_full[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C_full[r][c] += A[k][r]*B[k][c];
|
||||
}
|
||||
}
|
||||
|
||||
static void matmul_short_A_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], elem_t C[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C[r][c] += A[k][r]*B[k][c];
|
||||
}
|
||||
}
|
||||
|
||||
static void matmul_full_A_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], full_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C_full[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C_full[r][c] += A[k][r]*B[k][c];
|
||||
}
|
||||
}
|
||||
|
||||
static void matmul_B_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C_full[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C_full[r][c] += A[r][k]*B[c][k];
|
||||
}
|
||||
}
|
||||
|
||||
static void matmul_short_B_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], elem_t C[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C[r][c] += A[r][k]*B[c][k];
|
||||
}
|
||||
}
|
||||
|
||||
static void matmul_full_B_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], full_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C_full[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C_full[r][c] += A[r][k]*B[c][k];
|
||||
}
|
||||
}
|
||||
|
||||
static void matmul_AB_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C_full[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C_full[r][c] += A[k][r]*B[c][k];
|
||||
}
|
||||
}
|
||||
|
||||
static void matmul_short_AB_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], elem_t D[DIM][DIM], elem_t C[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C[r][c] += A[k][r]*B[c][k];
|
||||
}
|
||||
}
|
||||
|
||||
static void matmul_full_AB_transposed(elem_t A[DIM][DIM], elem_t B[DIM][DIM], full_t D[DIM][DIM], full_t C_full[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
C_full[r][c] = D[r][c];
|
||||
for (size_t k = 0; k < DIM; k++)
|
||||
C_full[r][c] += A[k][r]*B[c][k];
|
||||
}
|
||||
}
|
||||
|
||||
static void matadd(full_t sum[DIM][DIM], full_t m1[DIM][DIM], full_t m2[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++)
|
||||
sum[r][c] = m1[r][c] + m2[r][c];
|
||||
}
|
||||
|
||||
// THIS IS A ROUNDING SHIFT! It also performs a saturating cast
|
||||
static void matshift(full_t full[DIM][DIM], elem_t out[DIM][DIM], int shift) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
// Bitshift and round element
|
||||
full_t shifted = ROUNDING_RIGHT_SHIFT(full[r][c], shift);
|
||||
|
||||
// Saturate and cast element
|
||||
#ifndef ELEM_T_IS_FLOAT
|
||||
full_t elem = shifted > elem_t_max ? elem_t_max : (shifted < elem_t_min ? elem_t_min : shifted);
|
||||
out[r][c] = elem;
|
||||
#else
|
||||
out[r][c] = shifted; // TODO should we also saturate when using floats?
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void matscale(full_t full[DIM][DIM], elem_t out[DIM][DIM], acc_scale_t scale) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++) {
|
||||
// Bitshift and round element
|
||||
full_t scaled = ACC_SCALE(full[r][c], scale);
|
||||
|
||||
// Saturate and cast element
|
||||
#ifndef ELEM_T_IS_FLOAT
|
||||
full_t elem = scaled > elem_t_max ? elem_t_max : (scaled < elem_t_min ? elem_t_min : scaled);
|
||||
out[r][c] = elem;
|
||||
#else
|
||||
out[r][c] = scaled; // TODO should we also saturate when using floats?
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void matrelu(elem_t in[DIM][DIM], elem_t out[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++)
|
||||
out[r][c] = in[r][c] > 0 ? in[r][c] : 0;
|
||||
}
|
||||
|
||||
static void transpose(elem_t in[DIM][DIM], elem_t out[DIM][DIM]) {
|
||||
for (size_t r = 0; r < DIM; r++)
|
||||
for (size_t c = 0; c < DIM; c++)
|
||||
out[c][r] = in[r][c];
|
||||
}
|
||||
|
||||
int rand() {
|
||||
static uint32_t x = 777;
|
||||
x = x * 1664525 + 1013904223;
|
||||
return x >> 24;
|
||||
}
|
||||
|
||||
|
||||
#ifdef ELEM_T_IS_FLOAT
|
||||
double rand_double() {
|
||||
double a = (double)(rand() % 128) / (double)(1 + (rand() % 64));
|
||||
double b = (double)(rand() % 128) / (double)(1 + (rand() % 64));
|
||||
return a - b;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void printMatrix(elem_t m[DIM][DIM]) {
|
||||
for (size_t i = 0; i < DIM; ++i) {
|
||||
for (size_t j = 0; j < DIM; ++j)
|
||||
#ifndef ELEM_T_IS_FLOAT
|
||||
printf("%d ", m[i][j]);
|
||||
#else
|
||||
printf("%x ", elem_t_to_elem_t_bits(m[i][j]));
|
||||
#endif
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void printMatrixAcc(acc_t m[DIM][DIM]) {
|
||||
for (size_t i = 0; i < DIM; ++i) {
|
||||
for (size_t j = 0; j < DIM; ++j)
|
||||
#ifndef ELEM_T_IS_FLOAT
|
||||
printf("%d ", m[i][j]);
|
||||
#else
|
||||
printf("%x ", acc_t_to_acc_t_bits(m[i][j]));
|
||||
#endif
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
static int is_equal(elem_t x[DIM][DIM], elem_t y[DIM][DIM]) {
|
||||
for (size_t i = 0; i < DIM; ++i)
|
||||
for (size_t j = 0; j < DIM; ++j) {
|
||||
#ifndef ELEM_T_IS_FLOAT
|
||||
if (x[i][j] != y[i][j])
|
||||
#else
|
||||
bool isnanx = elem_t_isnan(x[i][j]);
|
||||
bool isnany = elem_t_isnan(y[i][j]);
|
||||
|
||||
if (x[i][j] != y[i][j] && !(isnanx && isnany))
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int is_equal_transposed(elem_t x[DIM][DIM], elem_t y[DIM][DIM]) {
|
||||
for (size_t i = 0; i < DIM; ++i)
|
||||
for (size_t j = 0; j < DIM; ++j) {
|
||||
#ifndef ELEM_T_IS_FLOAT
|
||||
if (x[i][j] != y[j][i])
|
||||
#else
|
||||
bool isnanx = elem_t_isnan(x[i][j]);
|
||||
bool isnany = elem_t_isnan(y[j][i]);
|
||||
|
||||
if (x[i][j] != y[j][i] && !(isnanx && isnany))
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
// This is a GNU extension known as statment expressions
|
||||
#define MAT_IS_EQUAL(dim_i, dim_j, x, y) \
|
||||
({int result = 1; \
|
||||
for (size_t i = 0; i < dim_i; i++) \
|
||||
for (size_t j = 0; j < dim_j; ++j) { \
|
||||
if (x[i][j] != y[i][j]) { \
|
||||
result = 0; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
result;})
|
||||
|
||||
static uint64_t read_cycles() {
|
||||
uint64_t cycles;
|
||||
asm volatile ("rdcycle %0" : "=r" (cycles));
|
||||
return cycles;
|
||||
|
||||
// const uint32_t * mtime = (uint32_t *)(33554432 + 0xbff8);
|
||||
// const uint32_t * mtime = (uint32_t *)(33554432 + 0xbffc);
|
||||
// return *mtime;
|
||||
}
|
||||
|
||||
#undef abs
|
||||
|
||||
#endif // SRC_MAIN_C_GEMMINI_TESTUTILS_H
|
||||
13
gemmini/include/translator.h
Normal file
13
gemmini/include/translator.h
Normal file
@@ -0,0 +1,13 @@
|
||||
// See LICENSE for license details.
|
||||
|
||||
#ifndef SRC_MAIN_C_TRANSLATOR_H
|
||||
#define SRC_MAIN_C_TRANSLATOR_H
|
||||
|
||||
#include "rocc-software/src/xcustom.h"
|
||||
|
||||
#define XCUSTOM_TRANS 1
|
||||
|
||||
#define doTranslate(y, vaddr) \
|
||||
ROCC_INSTRUCTION(XCUSTOM_TRANS, y, vaddr, 0, 0);
|
||||
|
||||
#endif // SRC_MAIN_C_TRANSLATOR_H
|
||||
3
gemmini/rocc-software/.gitignore
vendored
Normal file
3
gemmini/rocc-software/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
*~
|
||||
*#
|
||||
*.#*
|
||||
46
gemmini/rocc-software/CONTRIBUTING.md
Normal file
46
gemmini/rocc-software/CONTRIBUTING.md
Normal file
@@ -0,0 +1,46 @@
|
||||
All contributors must agree to the Developer Certificate of Origin Version 1.1. (DCO 1.1) by signing their commits with:
|
||||
|
||||
```
|
||||
DCO 1.1 Signed-off-by: [NAME] <[EMAIL]>
|
||||
```
|
||||
|
||||
The full text of the DCO 1.1 is as follows:
|
||||
|
||||
```
|
||||
Developer Certificate of Origin
|
||||
Version 1.1
|
||||
|
||||
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
|
||||
660 York Street, Suite 102,
|
||||
San Francisco, CA 94110 USA
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies of this
|
||||
license document, but changing it is not allowed.
|
||||
|
||||
|
||||
Developer's Certificate of Origin 1.1
|
||||
|
||||
By making a contribution to this project, I certify that:
|
||||
|
||||
(a) The contribution was created in whole or in part by me and I
|
||||
have the right to submit it under the open source license
|
||||
indicated in the file; or
|
||||
|
||||
(b) The contribution is based upon previous work that, to the best
|
||||
of my knowledge, is covered under an appropriate open source
|
||||
license and I have the right under that license to submit that
|
||||
work with modifications, whether created in whole or in part
|
||||
by me, under the same open source license (unless I am
|
||||
permitted to submit under a different license), as indicated
|
||||
in the file; or
|
||||
|
||||
(c) The contribution was provided directly to me by some other
|
||||
person who certified (a), (b) or (c) and I have not modified
|
||||
it.
|
||||
|
||||
(d) I understand and agree that this project and the contribution
|
||||
are public and that a record of the contribution (including all
|
||||
personal information I submit with it, including my sign-off) is
|
||||
maintained indefinitely and may be redistributed consistent with
|
||||
this project or the open source license(s) involved.
|
||||
```
|
||||
201
gemmini/rocc-software/LICENSE
Normal file
201
gemmini/rocc-software/LICENSE
Normal file
@@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
4
gemmini/rocc-software/README.md
Normal file
4
gemmini/rocc-software/README.md
Normal file
@@ -0,0 +1,4 @@
|
||||
Rocket Custom Coprocessor (RoCC) Software
|
||||
========================================
|
||||
|
||||
This is a set of C and RISC-V Assembly macros that help with emitting custom RISC-V instructions for talking with Rocket Custom Coprocessors (RoCCs).
|
||||
28
gemmini/rocc-software/src/riscv_test_rocc.h
Normal file
28
gemmini/rocc-software/src/riscv_test_rocc.h
Normal file
@@ -0,0 +1,28 @@
|
||||
// Copyright 2018 IBM
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef ROCC_SOFTWARE_SRC_RISCV_TEST_ROCC_H_
|
||||
#define ROCC_SOFTWARE_SRC_RISCV_TEST_ROCC_H_
|
||||
|
||||
|
||||
#define RVTEST_XS_ENABLE \
|
||||
li a0, MSTATUS_XS & (MSTATUS_XS >> 1); \
|
||||
csrs mstatus, a0;
|
||||
|
||||
#define RVTEST_WITH_ROCC \
|
||||
.macro init; \
|
||||
RVTEST_XS_ENABLE \
|
||||
.endm
|
||||
|
||||
#endif // ROCC_SOFTWARE_SRC_RISCV_TEST_ROCC_H_
|
||||
170
gemmini/rocc-software/src/xcustom.h
Normal file
170
gemmini/rocc-software/src/xcustom.h
Normal file
@@ -0,0 +1,170 @@
|
||||
// Copyright 2018--2020 IBM
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef ROCC_SOFTWARE_SRC_XCUSTOM_H_
|
||||
#define ROCC_SOFTWARE_SRC_XCUSTOM_H_
|
||||
|
||||
#define STR1(x) #x
|
||||
#ifndef STR
|
||||
#define STR(x) STR1(x)
|
||||
#endif
|
||||
|
||||
#define CAT_(A, B) A##B
|
||||
#define CAT(A, B) CAT_(A, B)
|
||||
|
||||
/** Assembly macro for creating "raw" Rocket Custom Coproessor (RoCC)
|
||||
* assembly language instructions that will return data in rd. These
|
||||
* are to be used only in assembly language programs (not C/C++).
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* Consider the following macro consisting of a CUSTOM_0 instruction
|
||||
* with func7 "42" that is doing some operation of "a0 = op(a1, a2)":
|
||||
*
|
||||
* ROCC_INSTRUCTION_RAW_R_R_R(0, a0, a1, a2, 42)
|
||||
*
|
||||
* This will produce the following pseudo assembly language
|
||||
* instruction:
|
||||
*
|
||||
* .insn r CUSTOM_0, 7, 42, a0, a1, a2
|
||||
*
|
||||
* @param x the custom instruction number: 0, 1, 2, or 3
|
||||
* @param rd the destination register, e.g., a0 or x10
|
||||
* @param rs1 the first source register, e.g., a0 or x10
|
||||
* @param rs2 the second source register, e.g., a0 or x10
|
||||
* @param func7 the value of the func7 field
|
||||
* @return a raw .insn RoCC instruction
|
||||
*/
|
||||
#define ROCC_INSTRUCTION_RAW_R_R_R(x, rd, rs1, rs2, func7) \
|
||||
.insn r CAT(CUSTOM_, x), 7, func7, rd, rs1, rs2
|
||||
|
||||
/** Assembly macro for creating "raw" Rocket Custom Coproessor (RoCC)
|
||||
* assembly language instructions that will *NOT* return data in rd.
|
||||
* These are to be used only in assembly language programs (not
|
||||
* C/C++).
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* Consider the following macro consisting of a CUSTOM_1 instruction
|
||||
* with func7 "42" that is doing some operation of "op(a1, a2)". *NO*
|
||||
* data is returned:
|
||||
*
|
||||
* ROCC_INSTRUCTION_RAW_R_R_R(1, a1, a2, 42)
|
||||
*
|
||||
* This will produce the following pseudo assembly language
|
||||
* instruction:
|
||||
*
|
||||
* .insn r CUSTOM_1, 3, 42, x0, a1, a2
|
||||
*
|
||||
* @param x the custom instruction number: 0, 1, 2, or 3
|
||||
* @param rs1 the first source register, e.g., a0 or x10
|
||||
* @param rs2 the second source register, e.g., a0 or x10
|
||||
* @param func7 the value of the func7 field
|
||||
* @return a raw .insn RoCC instruction
|
||||
*/
|
||||
#define ROCC_INSTRUCTION_RAW_0_R_R(x, rs1, rs2, func7) \
|
||||
.insn r CAT(CUSTOM_, x), 3, func7, x0, rs1, rs2
|
||||
|
||||
/** C/C++ inline assembly macro for creating Rocket Custom Coprocessor
|
||||
* (RoCC) instructions that return data in rd. These are to be used
|
||||
* only in C/C++ programs (not bare assembly).
|
||||
*
|
||||
* This is equivalent to ROCC_INSTRUCTION_R_R_R. See it's
|
||||
* documentation.
|
||||
*/
|
||||
#define ROCC_INSTRUCTION(x, rd, rs1, rs2, func7) \
|
||||
ROCC_INSTRUCTION_R_R_R(x, rd, rs1, rs2, func7)
|
||||
|
||||
/** C/C++ inline assembly macro for creating Rocket Custom Coprocessor
|
||||
* (RoCC) instructions that return data in C variable rd.
|
||||
* These are to be used only in C/C++ programs (not bare assembly).
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* Consider the following macro consisting of a CUSTOM_2 instruction
|
||||
* with func7 "42" that is doing some operation of "a0 = op(a1, a2)"
|
||||
* (where a0, a1, and a2 are variables defined in C):
|
||||
*
|
||||
* ROCC_INSTRUCTION(2, a0, a1, a2, 42)
|
||||
*
|
||||
* This will produce the following inline assembly:
|
||||
*
|
||||
* asm volatile(
|
||||
* ".insn r CUSTOM_2, 0x7, 42, %0, %1, %2"
|
||||
* : "=r"(rd)
|
||||
* : "r"(rs1), "r"(rs2));
|
||||
*
|
||||
* @param x the custom instruction number: 0, 1, 2, or 3
|
||||
* @param rd the C variable to capture as destination operand
|
||||
* @param rs1 the C variable to capture for first source register
|
||||
* @param rs2 the C variable to capture for second source register
|
||||
* @param func7 the value of the func7 field
|
||||
* @return an inline assembly RoCC instruction
|
||||
*/
|
||||
#define ROCC_INSTRUCTION_R_R_R(x, rd, rs1, rs2, func7) \
|
||||
{ \
|
||||
asm volatile( \
|
||||
".insn r " STR(CAT(CUSTOM_, x)) ", " STR(0x7) ", " STR(func7) ", %0, %1, %2" \
|
||||
: "=r"(rd) \
|
||||
: "r"(rs1), "r"(rs2)); \
|
||||
}
|
||||
|
||||
/** C/C++ inline assembly macro for creating Rocket Custom Coprocessor
|
||||
* (RoCC) instructions that return data in C variable rd.
|
||||
* These are to be used only in C/C++ programs (not bare assembly).
|
||||
*
|
||||
* Example:
|
||||
*
|
||||
* Consider the following macro consisting of a CUSTOM_3 instruction
|
||||
* with func7 "42" that is doing some operation of "a0 = op(a1, a2)"
|
||||
* (where a0, a1, and a2 are variables defined in C):
|
||||
*
|
||||
* ROCC_INSTRUCTION(3, a0, a1, a2, 42)
|
||||
*
|
||||
* This will produce the following inline assembly:
|
||||
*
|
||||
* asm volatile(
|
||||
* ".insn r CUSTOM_3, 0x7, 42, %0, %1, %2"
|
||||
* :: "r"(rs1), "r"(rs2));
|
||||
*
|
||||
* @param x the custom instruction number: 0, 1, 2, or 3
|
||||
* @param rs1 the C variable to capture for first source register
|
||||
* @param rs2 the C variable to capture for second source register
|
||||
* @param funct7 the value of the funct7 f
|
||||
* @return an inline assembly RoCC instruction
|
||||
*/
|
||||
#define ROCC_INSTRUCTION_0_R_R(x, rs1, rs2, func7) \
|
||||
{ \
|
||||
asm volatile( \
|
||||
".insn r " STR(CAT(CUSTOM_, x)) ", " STR(0x3) ", " STR(func7) ", x0, %0, %1" \
|
||||
: \
|
||||
: "r"(rs1), "r"(rs2)); \
|
||||
}
|
||||
|
||||
// [TODO] fix these to align with the above approach
|
||||
// Macro to pass rs2_ as an immediate
|
||||
/*
|
||||
#define ROCC_INSTRUCTION_R_R_I(XCUSTOM_, rd_, rs1_, rs2_, funct_) \
|
||||
asm volatile (XCUSTOM_" %[rd], %[rs1], %[rs2], %[funct]" \
|
||||
: [rd] "=r" (rd_) \
|
||||
: [rs1] "r" (rs1_), [rs2] "i" (rs2_), [funct] "i" (funct_))
|
||||
|
||||
// Macro to pass rs1_ and rs2_ as immediates
|
||||
#define ROCC_INSTRUCTION_R_I_I(XCUSTOM_, rd_, rs1_, rs2_, funct_) \
|
||||
asm volatile (XCUSTOM_" %[rd], %[rs1], %[rs2], %[funct]" \
|
||||
: [rd] "=r" (rd_) \
|
||||
: [rs1] "i" (rs1_), [rs2] "i" (rs2_), [funct] "i" (funct_))
|
||||
*/
|
||||
|
||||
#endif // ROCC_SOFTWARE_SRC_XCUSTOM_H_
|
||||
@@ -22,7 +22,7 @@ RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
|
||||
|
||||
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
|
||||
VORTEX_KN_PATH ?= $(realpath ../../../kernel)
|
||||
GEMMINI_SW_PATH ?= $(realpath ../../../third_party/gemmini-rocc-tests)
|
||||
GEMMINI_SW_PATH ?= $(realpath ../../../gemmini)
|
||||
|
||||
FPGA_BIN_DIR ?= $(VORTEX_RT_PATH)/opae
|
||||
|
||||
|
||||
1
third_party/gemmini-rocc-tests
vendored
1
third_party/gemmini-rocc-tests
vendored
Submodule third_party/gemmini-rocc-tests deleted from 6148fc0d2c
Reference in New Issue
Block a user