353 lines
12 KiB
C
353 lines
12 KiB
C
/*
|
|
* NOTES:
|
|
*
|
|
* 1) Matrix Market files are always 1-based, i.e. the index of the first
|
|
* element of a matrix is (1,1), not (0,0) as in C. ADJUST THESE
|
|
* OFFSETS ACCORDINGLY when reading and writing
|
|
* to files.
|
|
*
|
|
* 2) ANSI C requires one to use the "l" format modifier when reading
|
|
* double precision floating point numbers in scanf() and
|
|
* its variants. For example, use "%lf", "%lg", or "%le"
|
|
* when reading doubles, otherwise errors will occur.
|
|
*/
|
|
|
|
#include "convert_dataset.h"
|
|
#include "mmio.h"
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
typedef struct _mat_entry {
|
|
int row, col; /* i,j */
|
|
float val;
|
|
} mat_entry;
|
|
|
|
typedef struct _row_stats { // stats on each row
|
|
int index;
|
|
int size;
|
|
int start;
|
|
int padding;
|
|
} row_stats;
|
|
|
|
int sort_rows(const void *a, const void *b) {
|
|
return (((mat_entry *)a)->row - ((mat_entry *)b)->row);
|
|
}
|
|
int sort_cols(const void *a, const void *b) {
|
|
return (((mat_entry *)a)->col - ((mat_entry *)b)->col);
|
|
}
|
|
/* sorts largest by size first */
|
|
int sort_stats(const void *a, const void *b) {
|
|
return (((row_stats *)b)->size - ((row_stats *)a)->size);
|
|
}
|
|
|
|
/*
|
|
* COO to JDS matrix conversion.
|
|
*
|
|
* Needs to output both column and row major JDS formats
|
|
* with the minor unit padded to a multiple of `pad_minor`
|
|
* and the major unit arranged into groups of `group_size`
|
|
*
|
|
* Major unit is col, minor is row. Each block is either a scalar or vec4
|
|
*
|
|
* Inputs:
|
|
* mtx_filename - the file in COO format
|
|
* pad_rows - multiple of packed groups to pad each row to
|
|
* warp_size - each group of `warp_size` cols is padded to the same amount
|
|
* pack_size - number of items to pack
|
|
* mirrored - is the input mtx file a symmetric matrix? The other half will be
|
|
* filled in if this is =1
|
|
* binary - does the sparse matrix file have values in the format "%d %d"
|
|
* or "%d %d %lg"?
|
|
* debug_level - 0 for no output, 1 for simple JDS data, 2 for visual grid
|
|
* Outputs:
|
|
* data - the raw data, padded and grouped as requested
|
|
* data_row_ptr - pointer offset into the `data` output, referenced
|
|
* by the current row loop index
|
|
* nz_count - number of non-zero entries in each row
|
|
* indexed by col / warp_size
|
|
* data_col_index - corresponds to the col that the same
|
|
* array index in `data` is at
|
|
* data_row_map - JDS row to real row
|
|
* data_cols - number of columns the output JDS matrix has
|
|
* dim - dimensions of the input matrix
|
|
* data_ptr_len - size of data_row_ptr (maps to original `depth` var)
|
|
*/
|
|
int coo_to_jds(char *mtx_filename, int pad_rows, int warp_size, int pack_size,
|
|
int mirrored, int binary, int debug_level, float **data,
|
|
int **data_row_ptr, int **nz_count, int **data_col_index,
|
|
int **data_row_map, int *data_cols, int *dim, int *len,
|
|
int *nz_count_len, int *data_ptr_len) {
|
|
int ret_code;
|
|
MM_typecode matcode;
|
|
FILE *f;
|
|
int nz;
|
|
int i;
|
|
float *val;
|
|
mat_entry *entries;
|
|
row_stats *stats;
|
|
int rows, cols;
|
|
|
|
if ((f = fopen(mtx_filename, "r")) == NULL)
|
|
exit(1);
|
|
|
|
if (mm_read_banner(f, &matcode) != 0) {
|
|
printf("Could not process Matrix Market banner.\n");
|
|
exit(1);
|
|
}
|
|
|
|
/* This is how one can screen matrix types if their application */
|
|
/* only supports a subset of the Matrix Market data types. */
|
|
|
|
if (mm_is_complex(matcode) && mm_is_matrix(matcode) &&
|
|
mm_is_sparse(matcode)) {
|
|
printf("Sorry, this application does not support ");
|
|
printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode));
|
|
exit(1);
|
|
}
|
|
|
|
/* find out size of sparse matrix .... */
|
|
|
|
if ((ret_code = mm_read_mtx_crd_size(f, &rows, &cols, &nz)) != 0)
|
|
exit(1);
|
|
*dim = rows;
|
|
|
|
if (mirrored) {
|
|
// max possible size, might be less because diagonal values aren't doubled
|
|
entries = (mat_entry *)malloc(2 * nz * sizeof(mat_entry));
|
|
} else {
|
|
entries = (mat_entry *)malloc(nz * sizeof(mat_entry));
|
|
}
|
|
|
|
/* NOTE: when reading in doubles, ANSI C requires the use of the "l" */
|
|
/* specifier as in "%lg", "%lf", "%le", otherwise errors will occur */
|
|
/* (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) */
|
|
int cur_i = 0; // to account for mirrored diagonal entries
|
|
|
|
for (i = 0; i < nz; i++, cur_i++) {
|
|
if (!binary) {
|
|
fscanf(f, "%d %d %f\n", &entries[cur_i].row, &entries[cur_i].col,
|
|
&entries[cur_i].val);
|
|
} else {
|
|
fscanf(f, "%d %d\n", &entries[cur_i].row, &entries[cur_i].col);
|
|
entries[cur_i].val = 1.0;
|
|
}
|
|
entries[cur_i].row--;
|
|
entries[cur_i].col--;
|
|
// printf("%d,%d = %f\n", entries[cur_i].row, entries[cur_i].col,
|
|
// entries[cur_i].val);
|
|
if (mirrored) {
|
|
// fill in mirrored diagonal
|
|
if (entries[cur_i].row != entries[cur_i].col) { // not a diagonal value
|
|
cur_i++;
|
|
entries[cur_i].val = entries[cur_i - 1].val;
|
|
entries[cur_i].col = entries[cur_i - 1].row;
|
|
entries[cur_i].row = entries[cur_i - 1].col;
|
|
// printf("%d,%d = %f\n", entries[cur_i].row, entries[cur_i].col,
|
|
// entries[cur_i].val);
|
|
}
|
|
}
|
|
}
|
|
// set new non-zero count
|
|
nz = cur_i;
|
|
if (debug_level >= 1) {
|
|
printf("Converting COO to JDS format (%dx%d)\n%d matrix entries, warp size "
|
|
"= %d, "
|
|
"row padding align = %d, pack size = %d\n\n",
|
|
rows, cols, nz, warp_size, pad_rows, pack_size);
|
|
}
|
|
if (f != stdin)
|
|
fclose(f);
|
|
|
|
/*
|
|
* Now we have an array of values in entries
|
|
* Transform to padded JDS format - sort by rows, then fubini
|
|
*/
|
|
|
|
int irow, icol = 0, istart = 0;
|
|
int total_size = 0;
|
|
|
|
/* Loop through each entry to figure out padding, grouping that determine
|
|
* final data array size
|
|
*
|
|
* First calculate stats for each row
|
|
*
|
|
* Collect stats using the major_stats typedef
|
|
*/
|
|
|
|
qsort(entries, nz, sizeof(mat_entry), sort_rows); // sort by row number
|
|
rows = entries[nz - 1].row + 1; // last item is greatest row (zero indexed)
|
|
if (rows % warp_size) { // pad group number to warp_size here
|
|
rows += warp_size - rows % warp_size;
|
|
}
|
|
stats = (row_stats *)calloc(rows, sizeof(row_stats)); // set to 0
|
|
*data_row_map = (int *)calloc(rows, sizeof(int));
|
|
irow = entries[0].row; // set first row
|
|
|
|
// printf("First row %d\n", irow);
|
|
for (i = 0; i < nz; i++) { // loop through each sorted entry
|
|
if (entries[i].row != irow || i == nz - 1) { // new row
|
|
// printf("%d != %d\n", entries[i].row, irow);
|
|
if (i == nz - 1) {
|
|
// last item, add it to current row
|
|
// printf("Last item i=%d, row=%d, irow=%d\n", i, entries[i].row, irow);
|
|
icol++;
|
|
}
|
|
// hit a new row, record stats for the last row (i-1)
|
|
stats[irow].size = icol; // record # cols in previous row
|
|
stats[irow].index = entries[i - 1].row; // row # for previous stat item
|
|
// printf("Row %d, i=%d, irow=%d\n", entries[i].row, i, irow);
|
|
stats[irow].start = istart; // starting location in entries array
|
|
// set stats for the next row until this break again
|
|
icol = 0; // reset row items
|
|
irow = entries[i].row;
|
|
istart = i;
|
|
}
|
|
icol++; // keep track of number of items in this row
|
|
}
|
|
|
|
*nz_count_len = rows / warp_size + rows % warp_size;
|
|
*nz_count =
|
|
(int *)malloc(*nz_count_len * sizeof(int)); // only one value per group
|
|
|
|
/* sort based upon row size, greatest first */
|
|
qsort(stats, rows, sizeof(row_stats), sort_stats);
|
|
/* figure out padding and grouping */
|
|
if (debug_level >= 1) {
|
|
printf("Padding data....%d rows, %d groups\n", rows, *nz_count_len);
|
|
}
|
|
int pad_to, total_padding = 0, pack_to;
|
|
pad_rows *= pack_size; // change padding to account for packed items
|
|
for (i = 0; i < rows; i++) {
|
|
// record JDS to real row number
|
|
(*data_row_map)[i] = stats[i].index;
|
|
if (i < rows - 1) {
|
|
// (*data_row_map)[i]--; // ???? no idea why this is off by 1
|
|
}
|
|
// each row is padded so the number of packed groups % pad_rows == 0
|
|
if (i % warp_size ==
|
|
0) { // on a group boundary with the largest number of items
|
|
// find padding in individual items
|
|
if (stats[i].size % pad_rows) {
|
|
stats[i].padding =
|
|
pad_rows - (stats[i].size % pad_rows); // find padding
|
|
} else {
|
|
stats[i].padding = 0; // no padding necessary, already at pad multiple
|
|
}
|
|
if (stats[i].size % pack_size) {
|
|
pack_to = ceil((float)stats[i].size / pack_size);
|
|
} else {
|
|
pack_to = stats[i].size / pack_size;
|
|
}
|
|
// pack_to = stats[i].size + (!stats[i].size%pack_size) ? 0 : (pack_size -
|
|
// stats[i].size%pack_size);
|
|
pad_to = stats[i].size +
|
|
stats[i].padding; // total size of this row, with padding
|
|
// TODO: change this to reflect the real number of nonzero packed items,
|
|
// not the padded
|
|
// value
|
|
(*nz_count)[i / warp_size] =
|
|
pack_to; // number of packed items in this group
|
|
total_size += pad_to * warp_size; // allocate size for this padded group
|
|
if (debug_level >= 2)
|
|
printf("Padding warp group %d to %d items, zn = %d\n", i / warp_size,
|
|
pad_to, pack_to);
|
|
} else {
|
|
stats[i].padding = pad_to - stats[i].size;
|
|
}
|
|
total_padding += stats[i].padding;
|
|
// if (debug_level >= 2)
|
|
// printf("Row %d, %d items, %d padding\n", stats[i].index,
|
|
// stats[i].size, stats[i].padding);
|
|
}
|
|
|
|
/* allocate data and data_row_index */
|
|
if (debug_level >= 1)
|
|
printf("Allocating data space: %d entries (%f%% padding)\n", total_size,
|
|
(float)100 * total_padding / total_size);
|
|
*data = (float *)calloc(total_size,
|
|
sizeof(float)); // set to 0 so padded values are set
|
|
*data_col_index =
|
|
(int *)calloc(total_size, sizeof(int)); // any unset indexes point to 0
|
|
*data_row_ptr = (int *)calloc(rows, sizeof(int));
|
|
*len = total_size;
|
|
i = 0; // data index, including padding
|
|
|
|
/*
|
|
* Keep looping through each row, writing data a group at a time
|
|
* to the output array. Increment `irow` each time, and use it as
|
|
* an index into entries along with stats.start to get the next
|
|
* data item
|
|
*/
|
|
irow = 0; // keep track of which row we are in inside the fubini-ed array
|
|
int idata = 0; // position within final data array
|
|
int entry_index, j;
|
|
int ipack; // used in internal loop for writing packed values
|
|
mat_entry entry;
|
|
while (1) {
|
|
/* record data_row_ptr */
|
|
(*data_row_ptr)[irow] = idata;
|
|
|
|
/* End condtion: the size of the greatest row is smaller than the current
|
|
Fubini-ed row */
|
|
if (stats[0].size + stats[0].padding <= irow * pack_size)
|
|
break;
|
|
|
|
// printf("Data row pointer for row %d is %d\n", irow, idata);
|
|
for (i = 0; i < rows; i++) {
|
|
/* take one packed group from each original row */
|
|
// printf("Output irow %d icol %d (real %d,%d size %d)\n", irow, i,
|
|
// entry.col, i, stats[i].size);
|
|
/* Watch out for little vs big endian, and how opencl interprets vector
|
|
* casting from pointers */
|
|
for (ipack = 0; ipack < pack_size; ipack++) {
|
|
if (stats[i].size > irow * pack_size + ipack) {
|
|
// copy value
|
|
entry_index = stats[i].start + irow * pack_size + ipack;
|
|
entry = entries[entry_index];
|
|
/* record index and value */
|
|
(*data)[idata] = entry.val;
|
|
/* each data item will get its row index from the thread, col from
|
|
* here */
|
|
(*data_col_index)[idata] = entry.col;
|
|
|
|
if (debug_level >= 2) {
|
|
if (i < 3) {
|
|
// first row debugging
|
|
printf("[%d row%d=%.3f]", ipack + 1, i, entry.val);
|
|
} else {
|
|
printf("%d", ipack + 1);
|
|
}
|
|
}
|
|
} else if (stats[i].size + stats[i].padding >
|
|
irow * pack_size + ipack) {
|
|
/* add padding to the end of each row here - this assumes padding is
|
|
* factored into allocated size */
|
|
if (debug_level >= 2)
|
|
printf("0");
|
|
(*data_col_index)[idata] = -1;
|
|
} else {
|
|
goto endwrite; // no data written this pass, so don't increment idata
|
|
}
|
|
idata += 1;
|
|
}
|
|
}
|
|
endwrite:
|
|
if (debug_level >= 2) {
|
|
printf("\n");
|
|
}
|
|
irow += 1;
|
|
}
|
|
|
|
if (debug_level >= 1)
|
|
printf("Finished converting.\nJDS format has %d columns, %d rows.\n", rows,
|
|
irow);
|
|
free(entries);
|
|
free(stats);
|
|
printf("nz_count_len = %d\n", *nz_count_len);
|
|
|
|
*data_cols = rows;
|
|
*data_ptr_len = irow + 1;
|
|
return 0;
|
|
}
|