cachelab started

This commit is contained in:
2025-04-21 23:52:27 +08:00
parent cc99d9b5d9
commit ace7a46fb9
58 changed files with 10071 additions and 0 deletions

302
cachelab/Cache.c Normal file
View File

@ -0,0 +1,302 @@
///////////////////////////////////////////////////////////////////////
//// Copyright 2022 by mars. //
///////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <stdlib.h>
#include "common.h"
#define DEBUG 0
#define GET_POWER_OF_2(X) (X == 0x00 ? 0 : \
X == 0x01 ? 0 : \
X == 0x02 ? 1 : \
X == 0x04 ? 2 : \
X == 0x08 ? 3 : \
X == 0x10 ? 4 : \
X == 0x20 ? 5 : \
X == 0x40 ? 6 : \
X == 0x80 ? 7 : \
X == 0x100 ? 8 : \
X == 0x200 ? 9 : \
X == 0x400 ? 10 : \
X == 0x800 ? 11 : \
X == 0x1000 ? 12 : \
X == 0x2000 ? 13 : \
X == 0x4000 ? 14 : \
X == 0x8000 ? 15 : \
X == 0x10000 ? 16 : \
X == 0x20000 ? 17 : \
X == 0x40000 ? 18 : \
X == 0x80000 ? 19 : \
X == 0x100000 ? 20 : \
X == 0x200000 ? 21 : \
X == 0x400000 ? 22 : \
X == 0x800000 ? 23 : \
X == 0x1000000 ? 24 : \
X == 0x2000000 ? 25 : \
X == 0x4000000 ? 26 : \
X == 0x8000000 ? 27 : \
X == 0x10000000 ? 28 : \
X == 0x20000000 ? 29 : \
X == 0x40000000 ? 30 : \
X == 0x80000000 ? 31 : \
X == 0x100000000 ? 32 : 0)
/*
直接映射Data Cache16KB大小
每行存放64个字节共256行
*/
#define DCACHE_SIZE 16384
#define DCACHE_DATA_PER_LINE 16 // 必须是8字节的倍数
#define DCACHE_DATA_PER_LINE_ADDR_BITS GET_POWER_OF_2(DCACHE_DATA_PER_LINE) // 必须与上面设置一致即64字节需要6位地址
#define DCACHE_SET (DCACHE_SIZE/DCACHE_DATA_PER_LINE)
#define DCACHE_SET_ADDR_BITS GET_POWER_OF_2(DCACHE_SET) // 必须与上面设置一致即256行需要8位地址
// Cache行的结构包括Valid、Tag和Data。你所有的状态信息只能记录在Cache行中
struct DCACHE_LineStruct
{
UINT8 Valid;
UINT64 Tag;
UINT8 Data[DCACHE_DATA_PER_LINE];
}DCache[DCACHE_SET];
/*
DCache初始化代码一般需要把DCache的有效位Valid设置为0
模拟器启动时会调用此InitDataCache函数
*/
void InitDataCache()
{
UINT32 i;
printf("[%s] +-----------------------------------+\n", __func__);
printf("[%s] | 威震天的Data Cache初始化ing.... |\n", __func__);
printf("[%s] +-----------------------------------+\n", __func__);
for (i = 0; i < DCACHE_SET; i++)
DCache[i].Valid = 0;
}
/*
从Memory中读入一行数据到Data Cache中
*/
void LoadDataCacheLineFromMemory(UINT64 Address, UINT32 CacheLineAddress)
{
// 一次性从Memory中将DCACHE_DATA_PER_LINE数据读入某个Data Cache行
// 提供了一个函数一次可以读入8个字节
UINT32 i;
UINT64 ReadData;
UINT64 AlignAddress;
UINT64* pp;
AlignAddress = Address & ~(DCACHE_DATA_PER_LINE - 1); // 地址必须对齐到DCACHE_DATA_PER_LINE (64)字节边界
pp = (UINT64*)DCache[CacheLineAddress].Data;
for (i = 0; i < DCACHE_DATA_PER_LINE / 8; i++)
{
ReadData = ReadMemory(AlignAddress + 8LL * i);
if (DEBUG)
printf("[%s] Address=%016llX ReadData=%016llX\n", __func__, AlignAddress + 8LL * i, ReadData);
pp[i] = ReadData;
}
}
/*
将Data Cache中的一行数据写入存储器
*/
void StoreDataCacheLineToMemory(UINT64 Address, UINT32 CacheLineAddress)
{
// 一次性将DCACHE_DATA_PER_LINE数据从某个Data Cache行写入Memory中
// 提供了一个函数一次可以写入8个字节
UINT32 i;
UINT64 WriteData;
UINT64 AlignAddress;
UINT64* pp;
AlignAddress = Address & ~(DCACHE_DATA_PER_LINE - 1); // 地址必须对齐到DCACHE_DATA_PER_LINE (64)字节边界
pp = (UINT64*)DCache[CacheLineAddress].Data;
WriteData = 0;
for (i = 0; i < DCACHE_DATA_PER_LINE / 8; i++)
{
WriteData = pp[i];
WriteMemory(AlignAddress + 8LL * i, WriteData);
if (DEBUG)
printf("[%s] Address=%016llX ReadData=%016llX\n", __func__, AlignAddress + 8LL * i, WriteData);
}
}
/*
Data Cache访问接口系统模拟器会调用此接口来实现对你的Data Cache访问
Address: 访存字节地址
Operation: 操作:读操作('L')、写操作('S')、读-修改-写操作('M'
DataSize: 数据大小1字节、2字节、4字节、8字节
StoreValue: 当执行写操作的时候,需要写入的数据
LoadResult: 当执行读操作的时候从Cache读出的数据
*/
UINT8 AccessDataCache(UINT64 Address, UINT8 Operation, UINT8 DataSize, UINT64 StoreValue, UINT64* LoadResult)
{
UINT32 CacheLineAddress;
UINT8 BlockOffset;
UINT64 AddressTag;
UINT8 MissFlag = 'M';
UINT64 ReadValue;
*LoadResult = 0;
/*
* 直接映射中Address被切分为 AddressTagCacheLineAddressBlockOffset
*/
// CacheLineAddress Cache的行号在直接映射中就是组号每组1行
CacheLineAddress = (Address >> DCACHE_DATA_PER_LINE_ADDR_BITS) % DCACHE_SET;
BlockOffset = Address % DCACHE_DATA_PER_LINE;
AddressTag = (Address >> DCACHE_DATA_PER_LINE_ADDR_BITS) >> DCACHE_SET_ADDR_BITS; // 地址去掉DCACHE_SET、DCACHE_DATA_PER_LINE剩下的作为Tag。警告不能将整个地址作为Tag
if (DCache[CacheLineAddress].Valid == 1 && DCache[CacheLineAddress].Tag == AddressTag)
{
MissFlag = 'H'; // 命中!
if (Operation == 'L') // 读操作
{
ReadValue = 0;
switch (DataSize)
{
case 1: // 1个字节
ReadValue = DCache[CacheLineAddress].Data[BlockOffset + 0];
break;
case 2: // 2个字节
BlockOffset = BlockOffset & 0xFE; // 需对齐到2字节边界
ReadValue = DCache[CacheLineAddress].Data[BlockOffset + 1]; ReadValue = ReadValue << 8;
ReadValue |= DCache[CacheLineAddress].Data[BlockOffset + 0];
break;
case 4: // 4个字节
BlockOffset = BlockOffset & 0xFC; // 需对齐到4字节边界
ReadValue = DCache[CacheLineAddress].Data[BlockOffset + 3]; ReadValue = ReadValue << 8;
ReadValue |= DCache[CacheLineAddress].Data[BlockOffset + 2]; ReadValue = ReadValue << 8;
ReadValue |= DCache[CacheLineAddress].Data[BlockOffset + 1]; ReadValue = ReadValue << 8;
ReadValue |= DCache[CacheLineAddress].Data[BlockOffset + 0];
break;
case 8: // 8个字节
BlockOffset = BlockOffset & 0xF8; // 需对齐到8字节边界
ReadValue = DCache[CacheLineAddress].Data[BlockOffset + 7]; ReadValue = ReadValue << 8;
ReadValue |= DCache[CacheLineAddress].Data[BlockOffset + 6]; ReadValue = ReadValue << 8;
ReadValue |= DCache[CacheLineAddress].Data[BlockOffset + 5]; ReadValue = ReadValue << 8;
ReadValue |= DCache[CacheLineAddress].Data[BlockOffset + 4]; ReadValue = ReadValue << 8;
ReadValue |= DCache[CacheLineAddress].Data[BlockOffset + 3]; ReadValue = ReadValue << 8;
ReadValue |= DCache[CacheLineAddress].Data[BlockOffset + 2]; ReadValue = ReadValue << 8;
ReadValue |= DCache[CacheLineAddress].Data[BlockOffset + 1]; ReadValue = ReadValue << 8;
ReadValue |= DCache[CacheLineAddress].Data[BlockOffset + 0];
break;
}
*LoadResult = ReadValue;
if (DEBUG)
printf("[%s] Address=%016llX Operation=%c DataSize=%u StoreValue=%016llX ReadValue=%016llX\n", __func__, Address, Operation, DataSize, StoreValue, ReadValue);
}
else if (Operation == 'S' || Operation == 'M') // 写操作(修改操作在此等价于写操作)
{
if (DEBUG)
printf("[%s] Address=%016llX Operation=%c DataSize=%u StoreValue=%016llX\n", __func__, Address, Operation, DataSize, StoreValue);
switch (DataSize)
{
case 1: // 1个字节
DCache[CacheLineAddress].Data[BlockOffset + 0] = StoreValue & 0xFF;
break;
case 2: // 2个字节
BlockOffset = BlockOffset & 0xFE; // 需对齐到2字节边界
DCache[CacheLineAddress].Data[BlockOffset + 0] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 1] = StoreValue & 0xFF;
break;
case 4: // 4个字节
BlockOffset = BlockOffset & 0xFC; // 需对齐到4字节边界
DCache[CacheLineAddress].Data[BlockOffset + 0] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 1] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 2] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 3] = StoreValue & 0xFF;
break;
case 8: // 8个字节
BlockOffset = BlockOffset & 0xF8; // 需对齐到8字节边界
DCache[CacheLineAddress].Data[BlockOffset + 0] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 1] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 2] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 3] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 4] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 5] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 6] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 7] = StoreValue & 0xFF;
break;
}
}
}
else
{
if (DEBUG)
printf("[%s] Address=%016llX Operation=%c DataSize=%u StoreValue=%016llX\n", __func__, Address, Operation, DataSize, StoreValue);
MissFlag = 'M'; // 不命中
if (DCache[CacheLineAddress].Valid == 1)
{
// 淘汰对应的Cache行如果对应的Cache行有数据需要写回到Memory中
UINT64 OldAddress;
// OldAddress = > (Tag,Set,0000)
OldAddress = ((DCache[CacheLineAddress].Tag << DCACHE_SET_ADDR_BITS) << DCACHE_DATA_PER_LINE_ADDR_BITS) | ((UINT64)CacheLineAddress << DCACHE_DATA_PER_LINE_ADDR_BITS); // 从Tag中恢复旧的地址
StoreDataCacheLineToMemory(OldAddress, CacheLineAddress);
}
// 需要从Memory中读入新的行真实情况下这个LoadCacheLineFromMemory需要很长时间的
LoadDataCacheLineFromMemory(Address, CacheLineAddress);
DCache[CacheLineAddress].Valid = 1;
DCache[CacheLineAddress].Tag = AddressTag;
if (Operation == 'L') // 读操作
{
// 读操作不需要做事情因为已经MISS了
}
else if (Operation == 'S' || Operation == 'M') // 写操作(修改操作在此等价于写操作)
{
// 写操作需要将新的StoreValue更新到CacheLine中
switch (DataSize)
{
case 1: // 1个字节
DCache[CacheLineAddress].Data[BlockOffset + 0] = StoreValue & 0xFF;
break;
case 2: // 2个字节
BlockOffset = BlockOffset & 0xFE; // 需对齐到2字节边界
DCache[CacheLineAddress].Data[BlockOffset + 0] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 1] = StoreValue & 0xFF;
break;
case 4: // 4个字节
BlockOffset = BlockOffset & 0xFC; // 需对齐到4字节边界
DCache[CacheLineAddress].Data[BlockOffset + 0] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 1] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 2] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 3] = StoreValue & 0xFF;
break;
case 8: // 8个字节
BlockOffset = BlockOffset & 0xF8; // 需对齐到8字节边界
DCache[CacheLineAddress].Data[BlockOffset + 0] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 1] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 2] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 3] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 4] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 5] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 6] = StoreValue & 0xFF; StoreValue = StoreValue >> 8;
DCache[CacheLineAddress].Data[BlockOffset + 7] = StoreValue & 0xFF;
break;
}
}
}
return MissFlag;
}
/* 指令Cache实现部分可选实现 */
void InitInstCache(void)
{
return;
}
void LoadInstCacheLineFromMemory(UINT64 Address, UINT32 CacheLineAddress)
{
return;
}
UINT8 AccessInstCache(UINT64 Address, UINT8 Operation, UINT8 InstSize, UINT64* InstResult)
{
// 返回值'M' = Miss'H'=Hit
return 'M';
}

792
cachelab/CacheHelper.c Normal file
View File

@ -0,0 +1,792 @@
///////////////////////////////////////////////////////////////////////
//// Copyright 2022 by mars. //
///////////////////////////////////////////////////////////////////////
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <time.h>
#include "getopt.h"
#include "cbsl.h"
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); }}
/*
fcn ideal% #items #buckets dup% fl add_usec find_usec del-all usec
--- ------ ---------- ---------- ----- -- ---------- ---------- ------------
OAT 89.1% 597731 524288 0% ok 317292 179676 59726
FNV 88.6% 597731 262144 0% ok 226650 220626 61619
JEN 87.8% 597731 524288 0% ok 321989 175945 59956
BER 86.4% 597731 262144 0% ok 198477 179332 60901
SAX 70.4% 597731 524288 0% ok 270281 196427 64064
SFH 69.2% 597731 524288 0% ok 289843 165105 61860
FNV1A_Pippip_Yurii 更快??
*/
// 使用OAT hash算法以获得最高性能
#include "uthash.h"
#undef HASH_FUNCTION
//#define HASH_FUNCTION HASH_OAT
#define HASH_FUNCTION(keyptr,keylen,hashv) (hashv) = FNV1A_Pippip_Yurii((UINT64 *)keyptr)
#define VERBOSE_MSG 0
#ifdef _WIN32
#define ZLIB_WINAPI
#endif
#include "common.h"
#ifdef _WIN64
#pragma comment(lib,"..\\zstd\\libzstd_static-win64.lib")
#elif _WIN32
#pragma comment(lib,"..\\zstd\\libzstd_static-win32.lib")
#endif
/*
构建一个用Hash表访问的Memory以便能够紧凑存放稀疏、随机的Memory数据
*/
struct MemoryDataStruct
{
UINT64 Address; // 地址默认是64位字地址
UINT64 Data; // 数据默认保存64位字数据
UT_hash_handle memory; // makes this structure hashable
} *MemoryHash = NULL;
static void WriteMemoryHash(UINT64 Address, UINT64 WriteValue, UINT8 WriteSize);
static UINT64 ReadMemoryHash(UINT64 Address, UINT8 ReadSize);
#define MEMORY_TRACE_CHUNK (1<<20) // MemoryTrace结构每次增长1M条记录
struct MemoryTraceStruct
{
UINT8 Operation;
UINT64 Address;
UINT8 Size;
} *MemoryTrace;
UINT64 MemoryTraceCounter, MemoryTraceCapacity;
/*
* 统计计数器
*/
UINT64 GlobalMemoryModifyCounter;
UINT64 GlobalMemoryInstCounter;
UINT64 GlobalMemoryWriteCounter;
UINT64 GlobalCacheWriteHitCounter;
UINT64 GlobalMemoryReadCounter;
UINT64 GlobalCacheReadHitCounter;
UINT64 GlobalCacheModifyHitCounter;
UINT64 GlobalSimReadMemoryCounter;
UINT64 GlobalSimWriteMemoryCounter;
UINT64 GlobalCacheInstHitCounter;
#define CHUNK 16384
static void Trim(char* src)
{
char* begin = src;
char* end = src + strlen(src);
if (begin == end) return;
while (*begin == ' ' || *begin == '\t')
++begin;
while ((*end) == '\0' || *end == ' ' || *end == '\t' || *end == '\n' || *end == '\r')
--end;
if (begin > end) {
*src = '\0'; return;
}
while (begin != end) {
*src++ = *begin++;
}
*src++ = *end;
*src = '\0';
return;
}
typedef struct {
const char* start;
size_t len;
} token;
// https://stackoverflow.com/a/39286524
static char** split_space(const char* str, int* field_count)
{
char** array;
unsigned int start = 0, stop, toks = 0, t;
token* tokens = malloc((strlen(str) + 1) * sizeof(token));
for (stop = 0; str[stop]; stop++) {
if (str[stop] == ' ' || str[stop] == '\t' || str[stop] == ',')
{
tokens[toks].start = str + start;
tokens[toks].len = stop - start;
toks++;
start = stop + 1;
}
}
/* Mop up the last token */
tokens[toks].start = str + start;
tokens[toks].len = stop - start;
toks++;
array = malloc(toks * sizeof(char*));
for (t = 0; t < toks; t++) {
/* Calloc makes it nul-terminated */
char* token = calloc(tokens[t].len + 1, 1);
memcpy(token, tokens[t].start, tokens[t].len);
array[t] = token;
}
free(tokens);
*field_count = toks;
return array;
}
#define SR_A ((UINT64)1103515245)
#define SR_C ((UINT64)12345)
#define SR_M ((UINT64)1<<32)
UINT64 Xn;
static void SyncRand(UINT64 Seed)
{
Xn = Seed;
}
static UINT8 GetRand8()
{
Xn = ((SR_A * Xn + SR_C) % SR_M);
return (UINT8)Xn;
}
static UINT16 GetRand16()
{
Xn = ((SR_A * Xn + SR_C) % SR_M);
return (UINT16)Xn;
}
static UINT32 GetRand32()
{
Xn = ((SR_A * Xn + SR_C) % SR_M);
return (UINT32)Xn;
}
static UINT64 GetRand64()
{
UINT32 Hi, Lo;
Hi = GetRand32();
Lo = GetRand32();
return ((UINT64)Hi << 32 | Lo);
}
//https://www.codeproject.com/articles/716530/fastest-hash-function-for-table-lookups-in-c
static inline UINT32 FNV1A_Pippip_Yurii(UINT64* Address)
{
const UINT32 PRIME = 591798841; UINT32 hash32; UINT64 hash64 = 14695981039346656037ULL;
hash64 = (hash64 ^ *Address) * PRIME;
hash32 = (uint32_t)(hash64 ^ (hash64 >> 32));
return hash32 ^ (hash32 >> 16);
}
static void InitMemoryHash()
{
}
static void FreeMemoryHash()
{
struct MemoryDataStruct* p, * tmp;
HASH_ITER(memory, MemoryHash, p, tmp)
{
HASH_DELETE(memory, MemoryHash, p);
free(p);
}
}
static void UpdateMemoryData(UINT64* Location, UINT8 Offset, UINT64 WriteValue, UINT8 WriteSize)
{
UINT64 OriginValue;
OriginValue = *Location;
switch (WriteSize)
{
case 1: // 1个字节
Offset = Offset << 3; // 从字节换算成位
OriginValue = (OriginValue & ~((UINT64)0xFF << Offset)) | ((WriteValue & (UINT64)0xFF) << Offset);
break;
case 2: // 2个字节
Offset = Offset & 0xFE; // 对齐到2字节边界
Offset = Offset << 3; // 从字节换算成位
OriginValue = (OriginValue & ~((UINT64)0xFFFF << Offset)) | ((WriteValue & (UINT64)0xFFFF) << Offset);
break;
case 4: // 4个字节
Offset = Offset & 0xFC; // 对齐到4字节边界
Offset = Offset << 3; // 从字节换算成位
OriginValue = (OriginValue & ~((UINT64)0xFFFFFFFF << Offset)) | ((WriteValue & (UINT64)0xFFFFFFFF) << Offset);
break;
case 8: // 8个字节
OriginValue = WriteValue;
break;
}
*Location = OriginValue;
}
static void WriteMemoryHash(UINT64 Address, UINT64 WriteValue, UINT8 WriteSize)
{
UINT64 AlignAddress;
UINT8 Offset;
struct MemoryDataStruct* s;
AlignAddress = Address & 0xFFFFFFFFFFFFFFF8;
Offset = Address & 0x7;
HASH_FIND(memory, MemoryHash, &AlignAddress, sizeof(AlignAddress), s);
if (s == NULL)
{
// 没有在MemoryData中命中
s = (struct MemoryDataStruct*)malloc(sizeof(struct MemoryDataStruct));
s->Address = AlignAddress;
s->Data = 0xDEADBEEFDEADC0DE;
HASH_ADD(memory, MemoryHash, Address, sizeof(AlignAddress), s);
}
UpdateMemoryData(&(s->Data), Offset, WriteValue, WriteSize);
}
static UINT64 ReadMemoryHash(UINT64 Address, UINT8 ReadSize)
{
UINT64 AlignAddress;
UINT8 Offset;
struct MemoryDataStruct* s;
UINT64 OriginValue, ReadValue;
AlignAddress = Address & 0xFFFFFFFFFFFFFFF8;
Offset = Address & 0x7;
HASH_FIND(memory, MemoryHash, &AlignAddress, sizeof(AlignAddress), s);
if (s == NULL)
{
// 没有在MemoryData中命中出错了
if (VERBOSE_MSG)
printf("[%s] 试图从一个未初始化的内存读取数据Address=%016llX\n", __func__, Address);
OriginValue = 0xDEADBEEFDEADC0DE;
}
else
{
OriginValue = s->Data;
}
/*
* 根据Offset和Size对读出的数据进行修正对齐到小端
*/
ReadValue = OriginValue;
switch (ReadSize)
{
case 1: // 1个字节
Offset = Offset << 3; // 从字节换算成位
ReadValue = (OriginValue >> Offset) & 0xFF;
break;
case 2: // 2个字节
Offset = Offset & 0xFE; // 对齐到2字节边界
Offset = Offset << 3; // 从字节换算成位
ReadValue = (OriginValue >> Offset) & 0xFFFF;
break;
case 4: // 4个字节
Offset = Offset & 0xFC; // 对齐到4字节边界
Offset = Offset << 3; // 从字节换算成位
ReadValue = (OriginValue >> Offset) & 0xFFFFFFFF;
break;
case 8: // 8个字节
ReadValue = OriginValue;
break;
}
return ReadValue;
}
UINT64 ReadMemory(UINT64 Address)
{
GlobalSimReadMemoryCounter++;
return ReadMemoryHash(Address, 8);
}
void WriteMemory(UINT64 Address, UINT64 WriteData)
{
GlobalSimWriteMemoryCounter++;
WriteMemoryHash(Address, WriteData, 8);
}
static void MemoryTraceStat()
{
UINT64 i;
GlobalMemoryInstCounter = 0;
GlobalMemoryReadCounter = 0;
GlobalMemoryWriteCounter = 0;
GlobalMemoryModifyCounter = 0;
for (i = 0; i < MemoryTraceCounter; i++)
{
if (MemoryTrace[i].Operation == 'I')
GlobalMemoryInstCounter++;
else if (MemoryTrace[i].Operation == 'L')
GlobalMemoryReadCounter++;
else if (MemoryTrace[i].Operation == 'S')
GlobalMemoryWriteCounter++;
else if (MemoryTrace[i].Operation == 'M')
GlobalMemoryModifyCounter++;
}
}
static int process_TRACE_line(char* linebuf, UINT64 lineno)
{
int i;
int ret = 0;
UINT8 Operation;
UINT64 Address;
UINT8 Size;
UINT64 RandValue64;
// 删除行首、行尾空白回车等
Trim(linebuf);
int linelen = (int)strlen(linebuf);
// 跳过空行
if (linelen == 0)
return 0;
// 跳过超长的行
if (linelen >= 100)
return 0;
// 跳过注释行
if (linebuf[0] == '#' || linebuf[0] == '/' || linebuf[0] == '=' || linebuf[0] == '-')
return 0;
//printf("lineno=%llu\n", lineno);
char** pt;
int field_count;
pt = split_space(linebuf, &field_count);
if (field_count == 3 || field_count == 4)
{
// 格式: Operation Address,Size
Operation = pt[0][0];
if (field_count == 3)
{
Address = strtoull(pt[1], NULL, 16);
Size = atoi(pt[2]);
}
else
{
Address = strtoull(pt[2], NULL, 16);
Size = atoi(pt[3]);
}
if (Operation != 'I' && Operation != 'L' && Operation != 'S' && Operation != 'M')
{
if (VERBOSE_MSG)
printf("[%s] line %lld 操作类型错误不是I、L、S、M %s\n", __func__, lineno, linebuf);
ret = -1;
}
else if (Size != 1 && Size != 2 && Size != 4 && Size != 8)
{
if (VERBOSE_MSG)
printf("[%s] line %lld 数据大小错误不是1、2、4、8 %s\n", __func__, lineno, linebuf);
ret = -1;
}
else
{
RandValue64 = GetRand64();
// 初始化阶段,需要将所有地址的数据,设置为随机值
WriteMemoryHash(Address, RandValue64, Size);
if (MemoryTraceCounter >= MemoryTraceCapacity)
{
if (VERBOSE_MSG)
printf("[%s] line %lld 扩展内存到%llu\n", __func__, lineno, MemoryTraceCapacity + MEMORY_TRACE_CHUNK);
MemoryTrace = (struct MemoryTraceStruct*)realloc(MemoryTrace, (MemoryTraceCapacity + MEMORY_TRACE_CHUNK) * sizeof(struct MemoryTraceStruct));
if (MemoryTrace == NULL)
{
printf("[%s] line %lld 分配内存失败!\n", __func__, lineno);
ret = -1;
}
else
MemoryTraceCapacity += MEMORY_TRACE_CHUNK;
}
if (MemoryTrace)
{
MemoryTrace[MemoryTraceCounter].Operation = Operation;
MemoryTrace[MemoryTraceCounter].Address = Address;
MemoryTrace[MemoryTraceCounter].Size = Size;
MemoryTraceCounter++;
ret = 0;
}
}
}
else
{
printf("[%s] line %lld 格式错误![fields=%d] %s\n", __func__, lineno, field_count, linebuf);
ret = -1;
}
for (i = 0; i < field_count; i++)
free(pt[i]);
free(pt);
return ret;
}
static int parse_TRACE_file(char* filename)
{
UINT64 lineno;
int ret_parse_line = 0;
clock_t tick1, tick2;
cbsl_errors cbsl_ret = cbsl_error;
char linebuf[CHUNK];
tick1 = clock();
cbsl_ctx* ctx = cbsl_open(cbsl_load_mode, filename);
if (ctx == NULL)
{
printf("[%s] 不能以读方式打开Trace文件 %s\n", __func__, filename);
return -1;
}
lineno = 0;
do
{
lineno++;
cbsl_ret = cbsl_readline(ctx, linebuf, sizeof(linebuf));
CBSL_ERROR_CHECK(cbsl_ret); // 从文件中读取1行
ret_parse_line = process_TRACE_line(linebuf, lineno); // 分析处理1行
if (ret_parse_line == 1 || cbsl_ret == cbsl_end)
break;
if (lineno % 10000 == 0)
{
printf("\33[?25l[%s] ====已处理%llu行====\r", __func__, lineno); // 隐藏光标,显示进度
}
} while (1);
printf("\n\33[?25h"); // 显示光标
CBSL_ERROR_CHECK(cbsl_close(ctx));
tick2 = clock();
MemoryTraceStat();
printf("[%s] +-----------------------------------------------------+\n", __func__);
printf("[%s] | Memory Trace数量 \t : %10llu |\n", __func__, MemoryTraceCounter);
printf("[%s] | Instruction操作数量 \t : %10llu |\n", __func__, GlobalMemoryInstCounter);
printf("[%s] | Data Load操作数量 \t : %10llu |\n", __func__, GlobalMemoryReadCounter);
printf("[%s] | Data Store操作数量 \t : %10llu |\n", __func__, GlobalMemoryWriteCounter);
printf("[%s] | Data Modify操作数量 \t : %10llu |\n", __func__, GlobalMemoryModifyCounter);
printf("[%s] | 时间耗费ms \t : %10.0f |\n", __func__, ((float)(tick2 - tick1) / CLOCKS_PER_SEC) * 1000.0);
printf("[%s] +-----------------------------------------------------+\n", __func__);
if (MemoryTraceCounter == 0)
return -1;
return 0;
}
int SimTrace()
{
int ret = 0;
UINT32 i;
UINT8 Operation;
UINT64 Address;
UINT8 Size;
UINT64 RandValue64;
UINT64 DataFromCache;
UINT64 DataFromMemory;
UINT8 MissFlag;
clock_t tick1, tick2;
tick1 = clock();
GlobalMemoryInstCounter = 0;
GlobalMemoryReadCounter = 0;
GlobalMemoryWriteCounter = 0;
GlobalMemoryModifyCounter = 0;
GlobalCacheInstHitCounter = 0;
GlobalCacheReadHitCounter = 0;
GlobalCacheWriteHitCounter = 0;
GlobalCacheModifyHitCounter = 0;
GlobalSimReadMemoryCounter = 0;
GlobalSimWriteMemoryCounter = 0;
for (i = 0; i < MemoryTraceCounter; i++)
{
Operation = MemoryTrace[i].Operation;
Address = MemoryTrace[i].Address;
Size = MemoryTrace[i].Size;
if (Operation == 'L' || Operation == 'S' || Operation == 'M')
{
RandValue64 = GetRand64();
MissFlag = AccessDataCache(Address, Operation, Size, RandValue64, &DataFromCache);
if (Operation == 'S' || Operation == 'M')
{
WriteMemoryHash(Address, RandValue64, Size);
if (Operation == 'S')
{
GlobalMemoryWriteCounter++;
if (MissFlag == 'H')
{
GlobalCacheWriteHitCounter++;
}
}
else if (Operation == 'M')
{
GlobalMemoryModifyCounter++;
if (MissFlag == 'H')
{
GlobalCacheModifyHitCounter++;
}
}
}
else if (Operation == 'L')
{
GlobalMemoryReadCounter++;
if (MissFlag == 'H')
{
DataFromMemory = ReadMemoryHash(Address, Size);
if (DataFromMemory == DataFromCache)
GlobalCacheReadHitCounter++;
else
{
printf("[%s] 关键错误数据Cache读错误内存地址=%016llX 内存数据=%016llX Cache读数据=%016llX 大小%d字节\n", __func__, Address, DataFromMemory, DataFromCache, Size);
ret = -1;
break;
}
}
}
}
else if (Operation == 'I')
{
MissFlag = AccessInstCache(Address, Operation, Size, &DataFromCache);
GlobalMemoryInstCounter++;
if (MissFlag == 'H')
{
DataFromMemory = ReadMemoryHash(Address, Size);
if (DataFromMemory == DataFromCache)
GlobalCacheInstHitCounter++;
else
{
printf("[%s] 关键错误指令Cache读错误内存地址=%016llX 内存数据=%016llX Cache读数据=%016llX 大小%d字节\n", __func__, Address, DataFromMemory, DataFromCache, Size);
ret = -1;
break;
}
}
}
}
tick2 = clock();
printf("[%s] +-----------------------------------------------------+\n", __func__);
printf("[%s] | Memory Trace数量 \t : %10llu |\n", __func__, MemoryTraceCounter);
printf("[%s] | Instruction操作数量 \t : %10llu |\n", __func__, GlobalMemoryInstCounter);
printf("[%s] | Data Load操作数量 \t : %10llu |\n", __func__, GlobalMemoryReadCounter);
printf("[%s] | Data Store操作数量 \t : %10llu |\n", __func__, GlobalMemoryWriteCounter);
printf("[%s] | Data Modify操作数量 \t : %10llu |\n", __func__, GlobalMemoryModifyCounter);
printf("[%s] | Instruction操作Cache命中数量 \t : %10llu |\n", __func__, GlobalCacheInstHitCounter);
printf("[%s] | Data Load操作Cache命中数量 \t : %10llu |\n", __func__, GlobalCacheReadHitCounter);
printf("[%s] | Data Store操作Cache命中数量 \t : %10llu |\n", __func__, GlobalCacheWriteHitCounter);
printf("[%s] | Data Modify操作Cache命中数量 \t : %10llu |\n", __func__, GlobalCacheModifyHitCounter);
printf("[%s] | Cache访存数量 \t : %10llu |\n", __func__, GlobalSimReadMemoryCounter + GlobalSimWriteMemoryCounter);
printf("[%s] | Cache读存储器数量 \t : %10llu |\n", __func__, GlobalSimReadMemoryCounter);
printf("[%s] | Cache写存储器数量 \t : %10llu |\n", __func__, GlobalSimWriteMemoryCounter);
printf("[%s] | Data Cache命中率 \t : %9.2f%% |\n", __func__, (double)((GlobalCacheReadHitCounter + GlobalCacheWriteHitCounter + GlobalCacheModifyHitCounter) * 100) / (double)(GlobalMemoryReadCounter + GlobalMemoryWriteCounter + GlobalMemoryModifyCounter));
printf("[%s] | Inst Cache命中率 \t : %9.2f%% |\n", __func__, (GlobalMemoryInstCounter == 0) ? 0 : (double)(GlobalCacheInstHitCounter * 100) / (double)GlobalMemoryInstCounter);
printf("[%s] | 时间耗费ms \t : %10.0f |\n", __func__, ((float)(tick2 - tick1) / CLOCKS_PER_SEC) * 1000.0);
printf("[%s] +-----------------------------------------------------+\n", __func__);
return ret;
}
void DisplayHelp(char* argv[])
{
printf("[%s] 请在Cache.c中实现你自己的Cache然后编译项目执行。\n", __func__);
printf("[%s] 从文本格式压缩文件中读取Trace:\t%s <trace>.zst\n", __func__, argv[0]);
printf("[%s] 例如: %s ./traces/dave.trace.zst\n", __func__, argv[0]);
printf("[%s] 将文本格式的Trace转换到bin格式:\t%s -w <trace>.zst\n", __func__, argv[0]);
printf("[%s] 从bin格式文件中读取Trace:\t\t%s -r <trace>.bin.zst\n", __func__, argv[0]);
printf("[%s] 提示从bin中读取Trace速度要远远快于从文本格式中读取。\n", __func__);
}
int main(int argc, char* argv[])
{
int ret_val = -1;
UINT8 ReadBinFileFlag, WriteBinFileFlag, ReadTxtFileFlag;
char* pfilename;
char pfilename_bin[CHUNK];
clock_t tick1, tick2;
UINT64 i;
printf("[%s] Cache模拟器框架 v3.0 by mars, 2022\n", __func__);
pfilename = NULL;
ReadBinFileFlag = 0;
WriteBinFileFlag = 0;
ReadTxtFileFlag = 1;
/* check arguments */
while (1) {
int c = getopt(argc, argv, "-hrw");
if (c == -1) break;
switch (c) {
case 'h': DisplayHelp(argv); return 1;
case 'r': ReadBinFileFlag = 1; ReadTxtFileFlag = 0; break;
case 'w': WriteBinFileFlag = 1; ReadTxtFileFlag = 0; break;
case 1: pfilename = optarg; break;
}
}
if ((ReadBinFileFlag == 1 || WriteBinFileFlag == 1 || ReadTxtFileFlag == 1) && pfilename == NULL)
{
DisplayHelp(argv);
return 1;
}
InitMemoryHash();
printf("[%s] 初始化存储器读入Trace文件[%s],请稍后...\n", __func__, pfilename);
if (ReadTxtFileFlag || WriteBinFileFlag)
{
MemoryTrace = (struct MemoryTraceStruct*)malloc(MEMORY_TRACE_CHUNK * sizeof(struct MemoryTraceStruct));
MemoryTraceCounter = 0;
MemoryTraceCapacity = MEMORY_TRACE_CHUNK;
ret_val = parse_TRACE_file(pfilename);
if (ret_val != 0)
{
FreeMemoryHash();
if (MemoryTrace)
free(MemoryTrace);
printf("[%s] 解压缩文件失败 %s\n", __func__, argv[1]);
return -1;
}
if (WriteBinFileFlag)
{
// 将内存中的MemoryTrace保存到bin文件中
int filenamelen = (int)strlen(pfilename);
memcpy(pfilename_bin, pfilename, filenamelen);
pfilename_bin[filenamelen] = '\0';
if (filenamelen > 5)
{
if ((pfilename_bin[filenamelen - 4] == '.' && pfilename_bin[filenamelen - 3] == 'z' && pfilename_bin[filenamelen - 2] == 's' && pfilename_bin[filenamelen - 1] == 't') ||
(pfilename_bin[filenamelen - 4] == '.' && pfilename_bin[filenamelen - 3] == 'Z' && pfilename_bin[filenamelen - 2] == 'S' && pfilename_bin[filenamelen - 1] == 'T'))
{
pfilename_bin[filenamelen - 3] = 'b';
pfilename_bin[filenamelen - 2] = 'i';
pfilename_bin[filenamelen - 1] = 'n';
pfilename_bin[filenamelen - 0] = '.';
pfilename_bin[filenamelen + 1] = 'z';
pfilename_bin[filenamelen + 2] = 's';
pfilename_bin[filenamelen + 3] = 't';
pfilename_bin[filenamelen + 4] = '\0';
cbsl_ctx* ctx = cbsl_open(cbsl_store_mode, pfilename_bin);
if (ctx == NULL)
{
printf("[%s] 不能以写方式打开文件 %s\n", __func__, pfilename_bin);
ret_val = -1;
}
else
{
CBSL_ERROR_CHECK(cbsl_write(ctx, &MemoryTraceCounter, sizeof(MemoryTraceCounter)));
CBSL_ERROR_CHECK(cbsl_write(ctx, MemoryTrace, MemoryTraceCounter * sizeof(struct MemoryTraceStruct)));
CBSL_ERROR_CHECK(cbsl_close(ctx));
printf("[%s] 已经将Trace保存到文件中 %s\n", __func__, pfilename_bin);
ret_val = 0;
}
}
else
{
printf("[%s] 文件扩展名不是.zst或者.ZST不能转换\n", __func__);
ret_val = -1;
}
}
else
{
printf("[%s] 文件名长度不足5字符不能转换\n", __func__);
ret_val = -1;
}
FreeMemoryHash();
if (MemoryTrace)
free(MemoryTrace);
return ret_val;
}
}
else if (ReadBinFileFlag)
{
cbsl_ctx* ctx = cbsl_open(cbsl_load_mode, pfilename);
if (ctx == NULL)
{
printf("[%s] 不能以读方式打开文件 %s\n", __func__, pfilename);
ret_val = -1;
}
else
{
tick1 = clock();
CBSL_ERROR_CHECK(cbsl_read(ctx, &MemoryTraceCounter, sizeof(MemoryTraceCounter)));
{
MemoryTrace = (struct MemoryTraceStruct*)malloc(MemoryTraceCounter * sizeof(struct MemoryTraceStruct));
MemoryTraceCapacity = MemoryTraceCounter;
CBSL_ERROR_CHECK(cbsl_read(ctx, MemoryTrace, MemoryTraceCounter * sizeof(struct MemoryTraceStruct)));
CBSL_ERROR_CHECK(cbsl_close(ctx));
{
// 初始化阶段,需要将所有地址的数据,设置为随机值
UINT64 RandValue64;
for (i = 0; i < MemoryTraceCounter; i++)
{
RandValue64 = GetRand64();
WriteMemoryHash(MemoryTrace[i].Address, RandValue64, MemoryTrace[i].Size);
}
MemoryTraceStat();
tick2 = clock();
printf("[%s] +-----------------------------------------------------+\n", __func__);
printf("[%s] | Memory Trace数量 \t : %10llu |\n", __func__, MemoryTraceCounter);
printf("[%s] | Instruction操作数量 \t : %10llu |\n", __func__, GlobalMemoryInstCounter);
printf("[%s] | Data Load操作数量 \t : %10llu |\n", __func__, GlobalMemoryReadCounter);
printf("[%s] | Data Store操作数量 \t : %10llu |\n", __func__, GlobalMemoryWriteCounter);
printf("[%s] | Data Modify操作数量 \t : %10llu |\n", __func__, GlobalMemoryModifyCounter);
printf("[%s] | 时间耗费ms \t : %10.0f |\n", __func__, ((float)(tick2 - tick1) / CLOCKS_PER_SEC) * 1000.0);
printf("[%s] +-----------------------------------------------------+\n", __func__);
printf("[%s] 已经将Trace从文件中读取 %s\n", __func__, pfilename);
ret_val = 0;
}
}
}
if (ret_val == -1)
{
if (MemoryTrace)
free(MemoryTrace);
FreeMemoryHash();
return ret_val;
}
}
printf("[%s] 处理Trace文件完毕\n", __func__);
InitDataCache();
InitInstCache();
printf("[%s] 开始Cache模拟请稍后...\n", __func__);
ret_val = SimTrace();
if (ret_val != 0)
{
FreeMemoryHash();
if (MemoryTrace)
free(MemoryTrace);
printf("[%s] Cache模拟失败\n", __func__);
return -1;
}
else
printf("[%s] Cache模拟成功完成\n", __func__);
if (MemoryTrace)
free(MemoryTrace);
FreeMemoryHash();
return 0;
}

26
cachelab/Makefile Normal file
View File

@ -0,0 +1,26 @@
# Copyright 2022 by mars
# Description: Makefile for building a Cache Simulator.
#
LDFLAGS +=
LDLIBS += -lzstd
CPPFLAGS := -O3 -Wall -Wextra -Winline -Winit-self -Wno-sequence-point\
-Wno-unused-function -Wno-inline -fPIC -W -Wcast-qual -Wpointer-arith -Icbsl/include
#CPPFLAGS := -g
PROGRAMS := Cache
objects = Cache.o CacheHelper.o getopt.o cbsl/src/buffer.o cbsl/src/file.o cbsl/src/flush.o cbsl/src/read.o cbsl/src/record.o cbsl/src/utils.o cbsl/src/write.o
all: $(PROGRAMS)
Cache : $(objects)
gcc $(CPPFLAGS) $(LDFLAGS) -o $@ $^ $(LDLIBS)
rm -f $(objects)
clean:
rm -f $(PROGRAMS) $(objects)

View File

@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.31829.152
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MyCache", "MyCache.vcxproj", "{A5677060-D4C9-432A-A29F-858971B64066}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{A5677060-D4C9-432A-A29F-858971B64066}.Debug|x64.ActiveCfg = Debug|x64
{A5677060-D4C9-432A-A29F-858971B64066}.Debug|x64.Build.0 = Debug|x64
{A5677060-D4C9-432A-A29F-858971B64066}.Debug|x86.ActiveCfg = Debug|Win32
{A5677060-D4C9-432A-A29F-858971B64066}.Debug|x86.Build.0 = Debug|Win32
{A5677060-D4C9-432A-A29F-858971B64066}.Release|x64.ActiveCfg = Release|x64
{A5677060-D4C9-432A-A29F-858971B64066}.Release|x64.Build.0 = Release|x64
{A5677060-D4C9-432A-A29F-858971B64066}.Release|x86.ActiveCfg = Release|Win32
{A5677060-D4C9-432A-A29F-858971B64066}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {D4625C4E-654D-406B-A238-D6F64DAF03BB}
EndGlobalSection
EndGlobal

View File

@ -0,0 +1,158 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>16.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword>
<ProjectGuid>{a5677060-d4c9-432a-a29f-858971b64066}</ProjectGuid>
<RootNamespace>MyCache</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v142</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>..\zstd;..\cbsl\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>..\zstd;..\cbsl\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\Cache.c" />
<ClCompile Include="..\CacheHelper.c" />
<ClCompile Include="..\cbsl\src\buffer.c" />
<ClCompile Include="..\cbsl\src\file.c" />
<ClCompile Include="..\cbsl\src\flush.c" />
<ClCompile Include="..\cbsl\src\read.c" />
<ClCompile Include="..\cbsl\src\record.c" />
<ClCompile Include="..\cbsl\src\utils.c" />
<ClCompile Include="..\cbsl\src\write.c" />
<ClCompile Include="..\getopt.c" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

26
cachelab/Readme.txt Normal file
View File

@ -0,0 +1,26 @@
注意你只能修改Cache.c文件其他文件请勿修改
Linux:
1. make
2. ./Cache traces/long.trace.zst
提醒:Linux下需要安装libzstd-dev软件包例如apt install libzstd-dev zstd
Windows VS 2019:
1. 进入MyCache目录打开MyCache.sln
2. 编译即可生成可执行文件
3. 启动命令行窗口, 运行.\MyCache.exe traces/long.trace.zst
提示在traces目录下有多个trace可以跑。
自行生成trace文件
1、安装valgrindUbuntu下可以使用apt install valgrind完成
2、运行valgrind生成某个应用的trace文件。比如为了生成ls命令运行时的trace可以输入下列命令
valgrind --tool=lackey -v --trace-mem=yes --log-file=ls.trace ls -l
3、将生成的trace文件打包为zst压缩格式例如
zstd ls.trace -o ls.trace.zst
4、使用Cache跑这个trace
./Cache ./ls.trace.zst

View File

@ -0,0 +1,125 @@
#
# Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
cmake_minimum_required(VERSION 3.3)
enable_testing()
set(CMAKE_C_STANDARD 99)
project(cbsl
VERSION 2019.5.0
LANGUAGES C Fortran
)
if (CMAKE_BUILD_TYPE)
string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
if (NOT uppercase_CMAKE_BUILD_TYPE MATCHES "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$")
message(FATAL_ERROR "Invalid value for CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
endif()
if (${uppercase_CMAKE_BUILD_TYPE} MATCHES "DEBUG")
set(CBSL_DEBUG 1)
endif ()
endif ()
if (INSTALL_ZSTD)
message(STATUS "Enable installation Zstandard library version 1.4.0")
include(ExternalProject)
ExternalProject_Add(zstd
GIT_REPOSITORY "https://github.com/facebook/zstd.git"
GIT_TAG "v1.4.0"
PREFIX "${CMAKE_BINARY_DIR}/zstd"
SOURCE_SUBDIR "build/cmake"
CMAKE_ARGS -D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -D CMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
STEP_TARGETS install
EXCLUDE_FROM_ALL on
)
include_directories(${CMAKE_INSTALL_PREFIX}/include)
link_directories(${CMAKE_INSTALL_PREFIX}/lib)
endif ()
find_library(ZSTD_LIB NAMES "zstd")
include(CheckCCompilerFlag)
CHECK_C_COMPILER_FLAG("-Wall" C_HAS_WALL)
CHECK_C_COMPILER_FLAG("-Wshadow" C_HAS_WSHADOW)
CHECK_C_COMPILER_FLAG("-Werror" C_HAS_WERROR)
CHECK_C_COMPILER_FLAG("-pedantic-errors" C_HAS_PEDANTIC_ERRORS)
set(ADD_C_EXTRA_FLAGS)
if (C_HAS_WALL)
set(ADD_C_EXTRA_FLAGS "${ADD_C_EXTRA_FLAGS} -Wall")
endif ()
if (C_HAS_WSHADOW)
set(ADD_C_EXTRA_FLAGS "${ADD_C_EXTRA_FLAGS} -Wshadow")
endif ()
if (C_HAS_WERROR)
set(ADD_C_EXTRA_FLAGS "${ADD_C_EXTRA_FLAGS} -Werror")
endif ()
if (C_HAS_PEDANTIC_ERRORS)
set(ADD_C_EXTRA_FLAGS "${ADD_C_EXTRA_FLAGS} -pedantic-errors")
endif ()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ADD_C_EXTRA_FLAGS}")
include(CheckFortranCompilerFlag)
CHECK_Fortran_COMPILER_FLAG("-Wall -Wextra" Fortran_HAS_WALL_EXTRA)
CHECK_Fortran_COMPILER_FLAG("-Wshadow" Fortran_HAS_WERROR)
CHECK_Fortran_COMPILER_FLAG("-pedantic-errors" Fortran_HAS_PEDANTIC_ERRORS)
set(ADD_Fortran_EXTRA_FLAGS)
if (Fortran_HAS_WALL_EXTRA)
set(ADD_Fortran_EXTRA_FLAGS "${ADD_Fortran_EXTRA_FLAGS} -Wall -Wextra")
endif ()
if (Fortran_HAS_WERROR)
set(ADD_Fortran_EXTRA_FLAGS "${ADD_Fortran_EXTRA_FLAGS} -Werror")
endif ()
if (Fortran_HAS_PEDANTIC_ERRORS)
set(ADD_Fortran_EXTRA_FLAGS "${ADD_Fortran_EXTRA_FLAGS} -pedantic-errors")
endif ()
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${ADD_Fortran_EXTRA_FLAGS}")
include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${PROJECT_BINARY_DIR}/include)
set(CMAKE_Fortran_MODULE_DIRECTORY ${PROJECT_BINARY_DIR}/include)
set(CBSL_LIB "cbsl")
set(CBSL_FLIB "${CBSL_LIB}f")
add_subdirectory(include)
add_subdirectory(src)
add_subdirectory(examples)
add_subdirectory(tests)
add_subdirectory(benchmarks)
if (INSTALL_ZSTD)
# install zstd before building the library
add_dependencies(${CBSL_FLIB} ${CBSL_LIB})
add_dependencies(${CBSL_LIB} zstd-install)
endif ()

202
cachelab/cbsl/LICENSE Normal file
View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

93
cachelab/cbsl/README.md Normal file
View File

@ -0,0 +1,93 @@
# CBSL: Compressed Binary Serialization Library
## What's this?
The library provides the binary serialization with the compression by [Zstandard](https://facebook.github.io/zstd/).
A motivation of the library is to implement the data-compressed checkpoint/restart, which is well-known technique to recover computer failures in high-performance computing.
This library aims to simple and lightweight access for users.
The library support C99 or later, and Fortran 2008 or later: perhaps we use the features are all supported by major compilers.
## How to build and test
The library uses [CMake](https://cmake.org/) version 3.3.x or later.
$ cmake --version
cmake version 3.14.3
CMake suite maintained and supported by Kitware (kitware.com/cmake).
$ mkdir build && cd build
$ cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/install/path ..
$ make
$ make test
$ make install
## Do you need help to install `zstd` package?
We can build and install `zstd` automatically in the build process.
Please pass `-D INSTALL_ZSTD=on` to cmake, we will install `zstd` package where `CMAKE_INSTALL_PREFIX` directory before building the library.
$ cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/install/path -D INSTALL_ZSTD=on ..
$ make
$ make test
$ make install
## Test environments
1. CMake version 3.14.3
2. GCC version 4.8.5
3. Zstandard version 1.4.0
4. CentOS Linux release 7.5.1804 (Core)
## Benchmark
We provide the library performance benchmark with best and worst case.
Please be reminded that **the benchmark results not indicate the performance of Zstandard**, these measure the overhead (use cost) of this library.
1. Best case : all data is zero filled (A compression ratio achieves up to 99%)
2. Worst case : data is generated by rand() (A compression ratio is lower than 1%)
`benchmark` target executes the benchmarks.
...
$ make benchmark
min data size = 262144.00 [B]
max data size = 134217728.00 [B]
data is zero filled (maximum compression)
<write data [Byte]> <time [seconds]> <speed [MiB/sec]>
262144 0.002397 109.360615
524288 0.002887 181.572393
1048576 0.003440 304.827177
...
## Use `zstd` command
A compressed file by the library can be decompressed by `zstd` command.
## License
Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
## Future work
1. API error code
2. Refectoring tests
## NOTE
### Unsupported binary compatibility
The library does not support the binary compatibility and endianness conversion, which is required on the communication across machine.
If you want it, please consider using other serialization libraries such as [MessagePack](https://msgpack.org/).

View File

@ -0,0 +1,46 @@
#
# Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
include(CheckLibraryExists)
CHECK_LIBRARY_EXISTS(rt clock_gettime "time.h" HAVE_CLOCK_GETTIME)
if (HAVE_CLOCK_GETTIME)
link_libraries(rt)
else ()
include(CheckFunctionExists)
CHECK_FUNCTION_EXISTS(gettimeofday HAVE_GETTIMEOFDAY)
if (not HAVE_GETTIMEOFDAY)
message(WARNING "Benchmark program uses low resolution timer...")
endif ()
endif ()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
link_libraries(${CBSL_LIB} ${ZSTD_LIB} m)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
add_executable(serialize serialize.c)
add_executable(deserialize deserialize.c)
add_custom_target(benchmark_serialize_bestcase ./serialize 18 27 -f DEPENDS serialize)
add_custom_target(benchmark_deserialize_bestcase ./deserialize 18 27 -f DEPENDS deserialize benchmark_serialize_bestcase)
add_custom_target(benchmark_fast DEPENDS benchmark_serialize_bestcase benchmark_deserialize_bestcase)
add_custom_target(benchmark_serialize_worstcase ./serialize 18 27 -r DEPENDS serialize)
add_custom_target(benchmark_deserialize_worstcase ./deserialize 18 27 -r DEPENDS deserialize benchmark_serialize_worstcase)
add_custom_target(benchmark_slow DEPENDS benchmark_serialize_worstcase benchmark_deserialize_worstcase)
add_custom_target(benchmark DEPENDS benchmark_fast benchmark_slow)

View File

@ -0,0 +1,22 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CBSL_BENCHMARK_CONFIG_INCLUDED
#define CBSL_BENCHMARK_CONFIG_INCLUDED
#cmakedefine01 HAVE_CLOCK_GETTIME
#cmakedefine01 HAVE_GETTIMEOFDAY
#endif /* CBSL_BENCHMARK_CONFIG_INCLUDED */

View File

@ -0,0 +1,99 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#ifdef HAVE_GETTIMEOFDAY
#include <sys/time.h>
#endif
#include <math.h>
#include <cbsl.h>
#include "config.h"
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
typedef unsigned char byte_t;
extern void deserialize_bench(double ds);
extern double seconds();
int main(int argc, char** argv)
{
if (argc <= 2)
exit(1);
int max_data_size_exp2, min_data_size_exp2;
sscanf(argv[1], "%d", &min_data_size_exp2);
sscanf(argv[2], "%d", &max_data_size_exp2);
printf("min data size = %.2lf [B]\n", pow(2,min_data_size_exp2));
printf("max data size = %.2lf [B]\n", pow(2,max_data_size_exp2));
srand((unsigned int)(time(NULL)));
printf("<read data [Byte]> <time [seconds]> <speed [MiB/sec]>\n");
for (int i = min_data_size_exp2; i <= max_data_size_exp2; ++i)
{
deserialize_bench(pow(2,i));
}
return 0;
}
void deserialize_bench(double ds)
{
const uint64_t data_size = (uint64_t)(ds);
char cname[128];
sprintf(cname, "serialize_%lu.zst", data_size);
byte_t* a = (byte_t*)(malloc(data_size));
cbsl_ctx* ctx = cbsl_open(cbsl_load_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open\n");
exit(1);
}
const double beg = seconds();
{
CBSL_ERROR_CHECK(cbsl_read(ctx, a, data_size));
}
const double end = seconds();
const double rt = end - beg;
CBSL_ERROR_CHECK(cbsl_close(ctx));
printf("%lu %lf %lf\n", data_size, rt, (data_size/rt)/1.0e6);
free(a);
}
double seconds() {
#if defined(HAVE_CLOCK_GETTIME)
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return ts.tv_sec + (ts.tv_nsec / 1.0e9);
#elif defined(HAVE_GETTIMEOFDAY)
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + (ts.tv_usec / 1.0e6);
#else
return (double)(clock() / CLOCKS_PER_SEC);
#endif
}

View File

@ -0,0 +1,136 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#ifdef HAVE_GETTIMEOFDAY
#include <sys/time.h>
#endif
#include <math.h>
#include <cbsl.h>
#include "config.h"
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
typedef unsigned char byte_t;
extern void serialize_bench(double ds);
extern void rand_byte_t(uint64_t n, byte_t* a);
extern void zero_filled(uint64_t n, byte_t* a);
extern double seconds();
int rand_generate = 0;
int main(int argc, char** argv)
{
if (argc < 4)
exit(1);
int max_data_size_exp2, min_data_size_exp2;
sscanf(argv[1], "%d", &min_data_size_exp2);
sscanf(argv[2], "%d", &max_data_size_exp2);
printf("min data size = %.2lf [B]\n", pow(2,min_data_size_exp2));
printf("max data size = %.2lf [B]\n", pow(2,max_data_size_exp2));
rand_generate = (strcmp(argv[3],"-r") == 0);
if (rand_generate)
{
printf("data is random generated (stressful compression)\n");
}
else
{
printf("data is zero filled (maximum compression)\n");
}
srand((unsigned int)(time(NULL)));
printf("<write data [Byte]> <time [seconds]> <speed [MiB/sec]>\n");
for (int i = min_data_size_exp2; i <= max_data_size_exp2; ++i)
{
serialize_bench(pow(2,i));
}
return 0;
}
void serialize_bench(double ds)
{
const uint64_t data_size = (uint64_t)(ds);
char cname[128];
sprintf(cname, "serialize_%lu.zst", data_size);
byte_t* a = (byte_t*)(malloc(data_size));
if (rand_generate)
{
rand_byte_t(data_size / sizeof(byte_t), a);
}
else
{
zero_filled(data_size / sizeof(byte_t), a);
}
cbsl_ctx* ctx = cbsl_open(cbsl_store_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open\n");
exit(1);
}
const double beg = seconds();
{
CBSL_ERROR_CHECK(cbsl_write(ctx, a, data_size));
CBSL_ERROR_CHECK(cbsl_flush(ctx));
}
const double end = seconds();
const double wt = end - beg;
CBSL_ERROR_CHECK(cbsl_close(ctx));
printf("%lu %lf %lf\n", data_size, wt, (data_size/wt)/1.0e6);
free(a);
}
void rand_byte_t(uint64_t data_size, byte_t* a)
{
for(uint64_t i = 0; i < data_size; ++i)
a[i] = rand() % 255;
}
void zero_filled(uint64_t n, byte_t* a)
{
for(uint64_t i = 0; i < n; ++i)
a[i] = 0; /* all data fills zero */
}
double seconds() {
#if defined(HAVE_CLOCK_GETTIME)
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return ts.tv_sec + (ts.tv_nsec / 1.0e9);
#elif defined(HAVE_GETTIMEOFDAY)
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + (ts.tv_usec / 1.0e6);
#else
return (double)(clock() / CLOCKS_PER_SEC);
#endif
}

View File

@ -0,0 +1,22 @@
#
# Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
add_executable(simple_usage simple_usage.c)
add_dependencies(simple_usage ${CBSL_LIB})
target_link_libraries(simple_usage ${CBSL_LIB} ${ZSTD_LIB})
add_executable(recommend_usage recommend_usage.c)
add_dependencies(recommend_usage ${CBSL_LIB})
target_link_libraries(recommend_usage ${CBSL_LIB} ${ZSTD_LIB})

View File

@ -0,0 +1,101 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cbsl.h>
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
#define CHECK_BINARY(X,Y) {if (memcmp(&(X),&(Y),sizeof((X))) != 0) { fprintf(stderr, "error: binary check %s == %s\n", (#X), (#Y)); exit(1); }}
#define CHECK(X) {if (!(X)) { fprintf(stderr, "error: %s\n", (#X)); }}
#define SAFE_FREE(X) {if ((X) != NULL) { free((X)); }}
static const int data0 = 43;
static const double data1 = 3.14159265;
static const int data2 = 14142;
extern void checkpoint_restart(cbsl_mode, int*, double*, int*, int**);
char cname[128];
int main(int argc, char** argv)
{
int c0;
double c1;
int c2;
int* a0;
if (argc < 2)
return 1;
sprintf(cname, "checkpoint.zst");
cbsl_mode mode = cbsl_unknown_mode;
if (strcmp(argv[1],"-c") == 0)
{
mode = cbsl_store_mode;
c0 = data0;
c1 = data1;
c2 = data2;
a0 = (int*)(malloc(sizeof(int) * 42));
for (int i = 0; i < 42; ++i)
a0[i] = 42;
}
else if (strcmp(argv[1],"-r") == 0)
{
mode = cbsl_load_mode;
c0 = c1 = c2 = 0;
a0 = NULL;
}
else
{
return 1;
}
checkpoint_restart(mode, &c0, &c1, &c2, &a0);
CHECK_BINARY(c0, data0);
CHECK_BINARY(c1, data1);
CHECK_BINARY(c2, data2);
for (int i = 0; i < 42; ++i)
{
CHECK(a0[i] == 42);
}
SAFE_FREE(a0);
return 0;
}
void checkpoint_restart(cbsl_mode mode, int* c0, double* c1, int* c2, int** a0)
{
int a0_size;
cbsl_ctx* ctx = cbsl_open(mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_record(ctx, c0, sizeof(int)));
CBSL_ERROR_CHECK(cbsl_record(ctx, c1, sizeof(double)));
CBSL_ERROR_CHECK(cbsl_record(ctx, c2, sizeof(int)));
CBSL_ERROR_CHECK(cbsl_record(ctx, &a0_size, sizeof(int)));
if (mode == cbsl_load_mode)
(*a0) = (int*)(malloc(sizeof(int) * a0_size));
CBSL_ERROR_CHECK(cbsl_record(ctx, (*a0), sizeof(int) * a0_size));
CBSL_ERROR_CHECK(cbsl_close(ctx));
}

View File

@ -0,0 +1,99 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <cbsl.h>
#define ARRAY_SIZE(X) (sizeof((X))/sizeof(*(X)))
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); }}
int a[1024]; /* 4 KiB */
double b[1024]; /* 8 KiB */
extern void save(void);
extern void load(void);
extern void rand_int(int, int*);
extern void rand_double(int, double*);
char cname[128];
int main(int argc, char** argv) {
srand((unsigned int)(time(NULL)));
sprintf(cname, "checkpoint.zst");
if (strcmp(argv[1],"-c") == 0)
{
rand_int(ARRAY_SIZE(a), a);
rand_double(ARRAY_SIZE(b), b);
save();
}
else if (strcmp(argv[1],"-r") == 0)
{
load();
}
else
{
return 1;
}
return 0;
}
void save(void) {
cbsl_ctx* ctx = cbsl_open(cbsl_store_mode, "./checkpoint.data");
if (ctx == NULL) {
fprintf(stderr, "error: cbsl_open save\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_write(ctx, a, sizeof(a)));
CBSL_ERROR_CHECK(cbsl_write(ctx, b, sizeof(b)));
CBSL_ERROR_CHECK(cbsl_close(ctx));
printf("before compressed a[%d] = %d\n", 124, a[124]);
printf("before compressed b[%d] = %e\n", 514, b[514]);
}
void load(void) {
cbsl_ctx* ctx = cbsl_open(cbsl_load_mode, "./checkpoint.data");
if (ctx == NULL) {
fprintf(stderr, "error: cbsl_open load\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_read(ctx, a, sizeof(a)));
CBSL_ERROR_CHECK(cbsl_read(ctx, b, sizeof(b)));
CBSL_ERROR_CHECK(cbsl_close(ctx));
printf("decompressed a[%d] = %d\n", 124, a[124]);
printf("decompressed b[%d] = %e\n", 514, b[514]);
}
void rand_int(int n, int *v) {
for(int i = 0; i < n; ++i)
v[i] = rand();
}
void rand_double(int n, double *v) {
for(int i = 0; i < n; ++i)
v[i] = 1.0 / (double)(rand());
}

View File

@ -0,0 +1,23 @@
#
# Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cbsl_config.h.in
${CMAKE_CURRENT_BINARY_DIR}/cbsl_config.h)
install(FILES
${CMAKE_CURRENT_SOURCE_DIR}/cbsl.h
${CMAKE_CURRENT_BINARY_DIR}/cbsl_config.h
TYPE INCLUDE
)

View File

@ -0,0 +1,63 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CBSL_HEADER_INCLUDED
#define CBSL_HEADER_INCLUDED
#if defined(__cplusplus)
extern "C" {
#endif
#include <stdint.h>
#include "cbsl_config.h"
typedef enum
{
cbsl_load_mode = 1,
cbsl_store_mode = 2,
cbsl_unknown_mode = -1
}
cbsl_mode;
typedef enum
{
cbsl_success = 0,
cbsl_end = 1,
cbsl_error = -1
}
cbsl_errors;
typedef struct cbsl_ctx_ cbsl_ctx;
cbsl_ctx* cbsl_open(cbsl_mode open_mode, char* path);
cbsl_errors cbsl_close(cbsl_ctx* ctx);
cbsl_errors cbsl_flush(cbsl_ctx* ctx);
cbsl_errors cbsl_write(cbsl_ctx* ctx, const void* data, uint64_t size);
cbsl_errors cbsl_read(cbsl_ctx* ctx, void* data, uint64_t size);
cbsl_errors cbsl_readline(cbsl_ctx* ctx, char* linebuf, uint64_t size);
cbsl_errors cbsl_record(cbsl_ctx* ctx, void* data, uint64_t size);
cbsl_errors cbsl_record_heap(cbsl_ctx* ctx, void** data, uint64_t* size);
cbsl_mode cbsl_get_mode(cbsl_ctx* ctx);
cbsl_errors cbsl_set_compression_level(cbsl_ctx* ctx, int clevel);
int cbsl_get_compression_level(cbsl_ctx* ctx);
#if defined(__cplusplus)
} /* extern "C" */
#endif
#endif /* CBSL_HEADER_INCLUDED */

View File

@ -0,0 +1,27 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CBSL_CONFIG_HEADER_INCLUDED
#define CBSL_CONFIG_HEADER_INCLUDED
#define CBSL_VERSION 201950
#define CBSL_VERSION_STRING "2019.5.0"
#define CBSL_MAJOR_VERSION 2019
#define CBSL_MINOR_VERSION 5
#define CBSL_PATCH_VERSION 0
/* #undef CBSL_DEBUG */
#endif /* CBSL_CONFIG_HEADER_INCLUDED */

View File

@ -0,0 +1,27 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CBSL_CONFIG_HEADER_INCLUDED
#define CBSL_CONFIG_HEADER_INCLUDED
#define CBSL_VERSION @cbsl_VERSION_MAJOR@@cbsl_VERSION_MINOR@@cbsl_VERSION_PATCH@
#define CBSL_VERSION_STRING "@cbsl_VERSION@"
#define CBSL_MAJOR_VERSION @cbsl_VERSION_MAJOR@
#define CBSL_MINOR_VERSION @cbsl_VERSION_MINOR@
#define CBSL_PATCH_VERSION @cbsl_VERSION_PATCH@
#cmakedefine CBSL_DEBUG
#endif /* CBSL_CONFIG_HEADER_INCLUDED */

443
cachelab/cbsl/references.md Normal file
View File

@ -0,0 +1,443 @@
# CBSL library references
# Build and link libraries
## C
```bash
export CBSL_INCLUDE_PATH=${CBSL_INSTALLED_PATH}/include
export CBSL_LIBRARY_PATH=${CBSL_INSTALLED_PATH}/lib
cc -c xcbsl_use_code.c -I${CBSL_INCLUDE_PATH}
cc -o xcbsl_use_program ${OBJS} -lcbsl -lzstd -L${CBSL_LIBRARY_PATH}
```
## Fortran
```bash
export CBSL_INCLUDE_PATH=${CBSL_INSTALLED_PATH}/include
export CBSL_LIBRARY_PATH=${CBSL_INSTALLED_PATH}/lib
f95 -c xcbslf_use_code.f95 -I${CBSL_INCLUDE_PATH}
f95 -o xcbslf_use_program ${OBJS} -lcbslf -lcbsl -lzstd -L${CBSL_LIBRARY_PATH}
```
# API
We provide `cbslf` module for using the library in Fortran application.
## Versioning
|Version|Value |
|-------|-----------------------|
|major |release year (ex. 2019)|
|minor |release month (ex. 5) |
|patch |patch version (0-9) |
### C
```c
#define CBSL_VERSION // full version (integer)
#define CBSL_VERSION_STRING // full version (string)
#define CBSL_MAJOR_VERSION // major
#define CBSL_MINOR_VERSION // minor
#define CBSL_PATCH_VERSION // patch
```
### Fortran
```fortran
integer(8), parameter :: CBSL_VERSION
character(*), parameter :: CBSL_VERSION_STRING
integer(4), parameter :: CBSL_MAJOR_VERSION
integer(4), parameter :: CBSL_MINOR_VERSION
integer(4), parameter :: CBSL_PATCH_VERSION
```
## Types
### C
```c
// library context
typedef struct cbsl_ctx_ cbsl_ctx;
```
### Fortran
```fortran
type(cbslf_context)
```
## Constant values
### C
```c
// Context mode
typedef enum
{
cbsl_load_mode, // deserialize mode
cbsl_store_mode, // serialize mode
cbsl_unknown_mode // unknown
}
cbsl_mode;
// Errors
typedef enum
{
cbsl_success, // no error
cbsl_error // function happens any error
}
cbsl_errors;
```
### Fortran
```fortran
integer(4), parameter :: cbslf_load_mode
integer(4), parameter :: cbslf_store_mode
integer(4), parameter :: cbslf_unknown_mode
integer(4), parameter :: cbslf_success
integer(4), parameter :: cbslf_errors
```
## API
### C
```c
/*
[brief]
open serialized data file stream.
[arguments]
open_mode: specifies load/store mode
path : data file path
[return]
success : return context pointer
fail : NULL pointer
*/
cbsl_ctx* cbsl_open(cbsl_mode open_mode, char* path);
/*
[brief]
close serialized data file stream.
this function calls cbsl_flush() before closing stream.
[arguments]
ctx : context pointer
[return]
success : cbsl_success
fail : cbsl_error
*/
cbsl_errors cbsl_close(cbsl_ctx* ctx);
/*
[brief]
flush file stream.
this function executes data compression and write to file stream.
[arguments]
ctx : context pointer
[return]
success : cbsl_success
fail : cbsl_error
*/
cbsl_errors cbsl_flush(cbsl_ctx* ctx);
/*
[brief]
the data stores with compression.
this function stores internal buffer to stored data.
When the stored data fills enough size to compress, this function calls cbsl_flush().
[arguments]
ctx : context pointer
data : write data pointer (must be allocated)
size : byte size of data
[return]
success : cbsl_success
fail : cbsl_error
*/
cbsl_errors cbsl_write(cbsl_ctx* ctx, const void* data, uint64_t size);
/*
[brief]
the data loads with decompression.
this function decompresses data from a file stream, and loads from internal decompressed buffer.
[arguments]
ctx : context pointer
data : read data pointer (must be allocated)
size : byte size of data
[return]
success : cbsl_success
fail : cbsl_error
*/
cbsl_errors cbsl_read(cbsl_ctx* ctx, void* data, uint64_t size);
/*
[brief]
the data loads/stores with compression/decompression.
this function calls cbsl_read()/cbsl_write() by context mode to help the implementation of checkpoint/restart in the application.
[argumetns]
ctx : context pointer
data : read/write data pointer (must be allocated)
size : byte size of data
[return]
success : cbsl_success
fail : cbsl_error
*/
cbsl_errors cbsl_record(cbsl_ctx* ctx, void* data, uint64_t size);
/*
[brief]
this is a specialized function of cbsl_record() for heap allocated array.
the function loads/stores array data size [bytes] and array values from/to a file stream.
an array will be allocated on heap memory by malloc() routine if `data` is null pointer.
[arguments]
ctx : context pointer
data : read/write data pointer (it accepts NULL pointer)
size : byte size of data
[return]
success : cbsl_success
fail : cbsl_error
*/
cbsl_errors cbsl_record_heap(cbsl_ctx* ctx, void** data, uint64_t* size);
/*
[brief]
gets context mode.
[arguments]
ctx : context pointer
[return]
success : context mode
*/
cbsl_mode cbsl_get_mode(cbsl_ctx* ctx);
/*
[brief]
sets zstd compression level of serialized data.
[arguments]
ctx : context pointer
clevel : compression level 1-22 (zstd 1.4.0)
[return]
success : cbsl_success
fail : cbsl_error
*/
cbsl_errors cbsl_set_compression_level(cbsl_ctx* ctx, int clevel);
/*
[brief]
gets zstd compression level of serialized data.
[arguments]
ctx : context pointer
[return]
success : compression level 1-22 (zstd 1.4.0)
fail : -1
*/
int cbsl_get_compression_level(cbsl_ctx* ctx);
```
### Fortran
```fortran
!
! [brief]
! open serialized data file stream.
! [arguments]
! open_mode: specifies load/store mode
! path : data file path
! errcode : error code
! [return]
! success : return context pointer
! fail : NULL pointer
!
function cbslf_open(open_mode, path, errcode) result(ctx)
integer(4), intent(in) :: open_mode
character(*), intent(in) :: path
integer(4), intent(out), optional :: errcode
type(cbslf_context) :: ctx
end function
!
! [brief]
! close serialized data file stream.
! this subroutine calls cbslf_flush() before closing stream.
! [arguments]
! ctx : context pointer
! errcode : error code
!
subroutine cbslf_close(ctx, errcode)
type(cbslf_context), intent(in) :: ctx
integer(4), intent(out), optional :: errcode
end subroutine
!
! [brief]
! flush file stream.
! this subroutine executes data compression and write to file stream.
! [arguments]
! ctx : context pointer
! errcode : error code
!
subroutine cbslf_flush(ctx, errcode)
type(cbslf_context), intent(in) :: ctx
integer(4), intent(out), optional :: errcode
end subroutine
!
! [brief]
! this is a generic interface.
! the data stores with compression.
! this subroutine stores internal buffer to stored data.
! When the stored data fills enough size to compress, this subroutine calls cbslf_flush().
! [supported types]
! scalar : logical, character(*)
! +array : integer(4), integer(8), real(4), real(8), complex(4), complex(8) by up to 7-dimensional array
! [arguments]
! ctx : context pointer
! data : write data (array must be allocated)
! errcode : error code
!
interface cbslf_write(ctx, data, errcode)
type(cbslf_context), intent(in) :: ctx
GENERIC_TYPE, intent(in) :: data
integer(4), intent(out), optional :: errcode
end interface
!
! [brief]
! this is a generic interface.
! the data loads with decompression.
! this subroutine decompresses data from a file stream, and loads from internal decompressed buffer.
! [supported types]
! scalar : logical, character(*)
! +array : integer(4), integer(8), real(4), real(8), complex(4), complex(8) by 7-dimensional array
! [arguments]
! ctx : context pointer
! data : write data (array must be allocated)
! errcode : error code
!
interface cbslf_read(ctx, data, errcode)
type(cbslf_context), intent(in) :: ctx
GENERIC_TYPE, intent(out) :: data
integer(4), intent(out), optional :: errcode
end interface
!
! [brief]
! this is a generic interface.
! the data loads/stores with compression/decompression.
! this subroutine calls cbsl_read()/cbsl_write() by context mode to help the implementation of checkpoint/restart in the application.
! [supported types]
! scalar : logical, character(*)
! +array : integer(4), integer(8), real(4), real(8), complex(4), complex(8) by up to 7-dimensional array
! [arguments]
! ctx : context pointer
! data : read/write data (array must be allocated)
! size : byte size of data
! errcode : error code (return value)
!
interface cbslf_record(ctx, data, errcode)
type(cbslf_context), intent(in) :: ctx
GENERIC_TYPE, intent(inout) :: data
integer(4), intent(out), optional :: errcode
end interface
!
! [brief]
! this is a generic interface for allocatable array.
! the data loads/stores with compression/decompression.
! this subroutine calls cbsl_read()/cbsl_write() by context mode to help the implementation of checkpoint/restart in the application.
! this subroutine behaves as like as cbsl_record_heap() in C API.
! an array will be allocated on the memory if it is not allocated.
! [supported types]
! array : integer(4), integer(8), real(4), real(8), complex(4), complex(8) by up to 7-dimensional array
! [arguments]
! ctx : context pointer
! data : read/write data (accepts not allocated array)
! size : byte size of data
! errcode : error code
!
interface cbslf_record_heap(ctx, data, errcode)
type(cbslf_context), intent(in) :: ctx
GENERIC_TYPE, intent(inout) :: data
integer(4), intent(out), optional :: errcode
end interface
!
! [brief]
! gets context mode.
! [arguments]
! ctx : context pointer
! errcode : error code (return value)
! [return]
! success : context mode
!
function cbslf_get_mode(ctx, errcode) result(mode)
type(cbslf_context), intent(in) :: ctx
integer(4), intent(out), optional :: errcode
integer(4) :: mode
end function
!
! [brief]
! sets zstd compression level of serialized data.
! [arguments]
! ctx : context pointer
! clevel : compression level 1-22 (zstd 1.4.0)
! errcode : error code (return value)
!
subroutine cbslf_set_compression_level(ctx, clevel)
type(cbslf_context), intent(in) :: ctx
ingeter(4), intent(in) :: clevel
integer(4), intent(out), optional :: errcode
end subroutine
!
! [brief]
! gets zstd compression level of serialized data.
! [arguments]
! ctx : context pointer
! [return]
! success : compression level 1-22 (zstd 1.4.0)
! fail : -1
!
function cbslf_get_compression_level(ctx, errcode) result(clevel)
type(cbslf_context), intent(in) :: ctx
integer(4), intent(out), optional :: errcode
ingeter(4), :: clevel
end function
```
## Data format
```
BDATA: binary data [1 byte]
BSIZE: total size of binary data [4 bytes]
RSIZE: rank size of a fortran array [4 bytes]
NRANK: number of rank size of a fortran array
sizeof(TYPE): gets byte size of TYPE
Array-value-type: element type of an array
```
### Header
```
[BDATA * 8]: cbsl library version (64-bit integer)
```
### C API
```
Scalar data: HEAD -> [BDATA * sizeof(Scalar)] -> TAIL
Array data : HEAD -> [BSIZE][BDATA * sizeof(Array-value-type) * BSIZE] -> TAIL
```
### Fortran API
```
Scalar data: HEAD -> [BDATA * sizeof(Scalar)] -> TAIL
Array data : HEAD -> [BSIZE][ARRAY_LOWER_BOUNDS][ARRAY_UPPER_BOUNDS][BDATA * sizeof(Array-value-type) * BSIZE] -> TAIL
ARRAY_LOWER_BOUNDS: lower bounds of a fortran array [RSIZE * NRANK]
ARRAY_UPPER_BOUNDS: upper bounds of a fortran array [RSIZE * NRANK]
```

View File

@ -0,0 +1,46 @@
#
# Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set(LIB_SOURCES
buffer.c
file.c
utils.c
flush.c
write.c
read.c
record.c
)
set(FLIB_SOURCES
${CMAKE_CURRENT_BINARY_DIR}/fortran_bindings.f90
)
configure_file(fortran_bindings.f90.in ${CMAKE_CURRENT_BINARY_DIR}/fortran_bindings.f90.in.2)
find_program(CPP cpp)
if (CPP_NOTFOUND)
message(FATAL_ERROR "C preprocessor (cpp) command not found.")
endif ()
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/fortran_bindings.f90
COMMAND ${CPP} ${CMAKE_CURRENT_BINARY_DIR}/fortran_bindings.f90.in.2 ${CMAKE_CURRENT_BINARY_DIR}/fortran_bindings.f90
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/gen_fortran_bindings.cmake ${CMAKE_CURRENT_BINARY_DIR}/fortran_bindings.f90
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/fortran_bindings.f90.in.2
)
add_custom_target(generate_preprocessed_fortran_file SOURCES ${CMAKE_CURRENT_BINARY_DIR}/fortran_bindings.f90)
add_library(${CBSL_LIB} ${LIB_SOURCES})
add_library(${CBSL_FLIB} ${FLIB_SOURCES})
install(TARGETS ${CBSL_LIB} ${CBSL_FLIB})
install(FILES "${CMAKE_Fortran_MODULE_DIRECTORY}/${CBSL_FLIB}.mod" DESTINATION include)

View File

@ -0,0 +1,85 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "./cbsl_internal.h"
cbsl_errors create_streaming_buffers(cbsl_ctx* ctx)
{
ZSTD_CCtx* zstd_cctx = NULL;
ZSTD_DCtx* zstd_dctx = NULL;
uint64_t in_buffer_size;
uint64_t out_buffer_size;
void* in_buffer = NULL;
void* out_buffer = NULL;
void* line_buffer = NULL;
switch (ctx->mode)
{
case cbsl_load_mode:
zstd_dctx = ZSTD_createDCtx();
if (zstd_dctx == NULL) return cbsl_error;
in_buffer_size = ZSTD_DStreamInSize();
out_buffer_size = ZSTD_DStreamOutSize();
break;
case cbsl_store_mode:
zstd_cctx = ZSTD_createCCtx();
if (zstd_cctx == NULL) return cbsl_error;
in_buffer_size = ZSTD_CStreamInSize();
out_buffer_size = ZSTD_CStreamOutSize();
break;
default:
return cbsl_error;
}
in_buffer = (byte_t*)(malloc(in_buffer_size));
out_buffer = (byte_t*)(malloc(out_buffer_size));
line_buffer = (byte_t*)(malloc(CBSL_LINEBUF_SIZE));
if (in_buffer == NULL || out_buffer == NULL || line_buffer == NULL)
{
CBSL_SAFE_FREE_ZSTD_CCTX(zstd_cctx);
CBSL_SAFE_FREE_ZSTD_DCTX(zstd_dctx);
CBSL_SAFE_FREE(in_buffer);
CBSL_SAFE_FREE(out_buffer);
CBSL_SAFE_FREE(line_buffer);
return cbsl_error;
}
ctx->zstd_cctx = zstd_cctx;
ctx->zstd_dctx = zstd_dctx;
ctx->in_buffer = in_buffer;
ctx->out_buffer = out_buffer;
ctx->line_buffer = line_buffer;
ctx->in_buffer_size = in_buffer_size;
ctx->out_buffer_size = out_buffer_size;
ctx->in_buffer_pos = 0;
ctx->out_buffer_pos = 0;
ctx->in_buffer_used = 0;
ctx->out_buffer_used = 0;
ctx->line_buffer_used = 0;
ctx->line_buffer_read_from_zst = 0;
ctx->zstd_buf_end = 0;
ctx->zstd_file_end = 0;
return cbsl_success;
}

View File

@ -0,0 +1,78 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CBSL_INTERNAL_HEADER_INCLUDED
#define CBSL_INTERNAL_HEADER_INCLUDED
#include <stdio.h>
#include <stdint.h>
#include <zstd.h>
#include <cbsl.h>
#define CBSL_LINEBUF_SIZE 8192
typedef unsigned char byte_t;
struct cbsl_ctx_
{
FILE* fp;
ZSTD_CCtx* zstd_cctx;
ZSTD_DCtx* zstd_dctx;
byte_t* in_buffer;
byte_t* out_buffer;
uint64_t in_buffer_size;
uint64_t out_buffer_size;
uint64_t in_buffer_pos;
uint64_t in_buffer_used;
uint64_t out_buffer_pos;
uint64_t out_buffer_used;
cbsl_mode mode;
// support readline
byte_t zstd_buf_end;
byte_t zstd_file_end;
byte_t* line_buffer;
uint64_t line_buffer_used;
uint64_t line_buffer_read_from_zst;
};
cbsl_errors create_streaming_buffers(cbsl_ctx* ctx);
#define MIN(x, y) (x) < (y) ? (x) : (y)
#ifdef CBSL_DEBUG
# include <assert.h>
# define CBSL_ASSERT(COND) { assert((COND)); }
# define CBSL_DEBUG_MESSAGE(...) { fprintf(stderr, __VA_ARGS__); }
#else
# define CBSL_ASSERT(COND) /* */
# define CBSL_DEBUG_MESSAGE(...) /* */
#endif
/* error check */
#define CBSL_CHECK_COND_MSG_IMPL(X,MSG,FINALIZE) { if (!(X)) { fprintf(stderr, "%s at %s l.%d: condition error %s; %s\n", __func__, __FILE__, __LINE__, (#X), (MSG)); FINALIZE; } }
#define CBSL_CHECK_COND(X) CBSL_CHECK_COND_MSG_IMPL((X), "", /* noreturn */)
#define CBSL_CHECK_COND_AND_RETURN(X,RETCODE) CBSL_CHECK_COND_MSG_IMPL((X), "", { return (RETCODE); })
#define CBSL_CHECK_COND_MSG_AND_RETURN(X,MSG,RETCODE) CBSL_CHECK_COND_MSG_IMPL((X), (MSG), { return (RETCODE); })
/* safe memory control */
#define CBSL_SAFE_FREE_RESOURCE(X,FREEFUNC) { if ((X) != NULL) { (FREEFUNC)((X)); (X) = NULL; } }
#define CBSL_SAFE_FCLOSE(X) CBSL_SAFE_FREE_RESOURCE((X), fclose);
#define CBSL_SAFE_FREE(X) CBSL_SAFE_FREE_RESOURCE((X), free)
#define CBSL_SAFE_FREE_ZSTD_CCTX(X) CBSL_SAFE_FREE_RESOURCE((X), ZSTD_freeCCtx)
#define CBSL_SAFE_FREE_ZSTD_DCTX(X) CBSL_SAFE_FREE_RESOURCE((X), ZSTD_freeDCtx)
#endif /* CBSL_INTERNAL_HEADER_INCLUDED */

94
cachelab/cbsl/src/file.c Normal file
View File

@ -0,0 +1,94 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "./cbsl_internal.h"
static
cbsl_ctx* cbsl_open_safe_finalize(cbsl_ctx** ctx)
{
if (*ctx != NULL)
{
cbsl_close(*ctx);
}
return NULL;
}
cbsl_ctx* cbsl_open(cbsl_mode mode, char* path)
{
CBSL_CHECK_COND_AND_RETURN(path != NULL, NULL);
cbsl_ctx* ctx = (cbsl_ctx*)(malloc(sizeof(cbsl_ctx)));
CBSL_CHECK_COND_AND_RETURN(ctx != NULL, NULL);
char const* fopen_mode;
switch (mode)
{
case cbsl_load_mode: fopen_mode = "rb"; break;
case cbsl_store_mode: fopen_mode = "wb"; break;
default: free(ctx); return NULL;
}
#ifdef _WIN32
errno_t err;
FILE* fp;
err = fopen_s(&fp, path, fopen_mode);
CBSL_CHECK_COND_AND_RETURN(err == 0, cbsl_open_safe_finalize(&ctx));
#else
FILE* fp = fopen(path, fopen_mode);
CBSL_CHECK_COND_AND_RETURN(fp != NULL, cbsl_open_safe_finalize(&ctx));
#endif
ctx->mode = mode;
ctx->fp = fp;
CBSL_CHECK_COND_AND_RETURN(create_streaming_buffers(ctx) == cbsl_success, cbsl_open_safe_finalize(&ctx));
/*
uint64_t file_version;
if (mode == cbsl_load_mode)
{
file_version = 0;
CBSL_CHECK_COND_AND_RETURN(cbsl_read(ctx, &file_version, sizeof(file_version)) == cbsl_success, cbsl_open_safe_finalize(&ctx));
CBSL_CHECK_COND_AND_RETURN(file_version == CBSL_VERSION, cbsl_open_safe_finalize(&ctx));
}
else if (mode == cbsl_store_mode)
{
file_version = CBSL_VERSION;
CBSL_CHECK_COND_AND_RETURN(cbsl_write(ctx, &file_version, sizeof(file_version)) == cbsl_success, cbsl_open_safe_finalize(&ctx));
}
*/
return ctx;
}
cbsl_errors cbsl_close(cbsl_ctx* ctx)
{
CBSL_CHECK_COND_AND_RETURN(ctx != NULL, cbsl_success);
if (ctx->mode == cbsl_store_mode)
{
cbsl_flush(ctx);
}
CBSL_SAFE_FREE_ZSTD_CCTX(ctx->zstd_cctx);
CBSL_SAFE_FREE_ZSTD_DCTX(ctx->zstd_dctx);
CBSL_SAFE_FREE(ctx->in_buffer);
CBSL_SAFE_FREE(ctx->out_buffer);
CBSL_SAFE_FREE(ctx->line_buffer);
CBSL_SAFE_FCLOSE(ctx->fp);
CBSL_SAFE_FREE(ctx);
return cbsl_success;
}

55
cachelab/cbsl/src/flush.c Normal file
View File

@ -0,0 +1,55 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "./cbsl_internal.h"
static
cbsl_errors flush_serialize(cbsl_ctx* ctx)
{
if (ctx->in_buffer_used > 0)
{
ZSTD_inBuffer input = { ctx->in_buffer, ctx->in_buffer_used, 0 };
ZSTD_EndDirective mode = (ctx->in_buffer_used < ctx->in_buffer_size) ? ZSTD_e_end : ZSTD_e_continue;
do
{
ZSTD_outBuffer output = { ctx->out_buffer, ctx->out_buffer_size, 0 };
const size_t remaining = ZSTD_compressStream2(ctx->zstd_cctx, &output, &input, mode);
CBSL_CHECK_COND_MSG_AND_RETURN(!ZSTD_isError(remaining), ZSTD_getErrorName(remaining), cbsl_error);
const size_t written = fwrite(ctx->out_buffer, 1, output.pos, ctx->fp);
CBSL_CHECK_COND_AND_RETURN(written == output.pos, cbsl_error);
} while (input.pos < input.size);
ctx->in_buffer_used = 0;
}
fflush(ctx->fp);
return cbsl_success;
}
cbsl_errors cbsl_flush(cbsl_ctx* ctx)
{
CBSL_CHECK_COND_AND_RETURN(ctx != NULL, cbsl_error);
if (ctx->mode == cbsl_store_mode)
{
return flush_serialize(ctx);
}
return cbsl_success;
}

View File

@ -0,0 +1,471 @@
!
! Copyright 2019 Yuta Hirokawa (University of Tsukuba)
!
! Licensed under the Apache License, Version 2.0 (the "License");
! you may not use this file except in compliance with the License.
! You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing, software
! distributed under the License is distributed on an "AS IS" BASIS,
! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
! See the License for the specific language governing permissions and
! limitations under the License.
!
module cbslf
use, intrinsic :: iso_c_binding
implicit none
type cbslf_context
integer(c_intptr_t) :: ctx
end type
public :: cbslf_context
public :: cbslf_store_mode
public :: cbslf_load_mode
public :: cbslf_unknown_mode
public :: cbslf_success
public :: cbslf_error
public :: cbslf_open
public :: cbslf_close
public :: cbslf_flush
public :: cbslf_write
public :: cbslf_read
public :: cbslf_record
public :: cbslf_record_heap
public :: cbslf_get_mode
public :: cbslf_set_compression_level
public :: cbslf_get_compression_level
#define PASTE(a,b) a##b
#define CONCAT(a,b) PASTE(a,b)
#define SET_ERRCODE(CODE) \
if (present(errcode)) errcode = CODE
#define SET_ERRCODE_COND(COND) \
if (present(errcode)) then \
__NL__ if (COND) then \
__NL__ errcode = cbslf_success \
__NL__ else \
__NL__ errcode = cbslf_error \
__NL__ end if \
__NL__ end if
#define FORTRAN_ARRAY_1 :
#define FORTRAN_ARRAY_2 :,:
#define FORTRAN_ARRAY_3 :,:,:
#define FORTRAN_ARRAY_4 :,:,:,:
#define FORTRAN_ARRAY_5 :,:,:,:,:
#define FORTRAN_ARRAY_6 :,:,:,:,:,:
#define FORTRAN_ARRAY_7 :,:,:,:,:,:,:
#define FORTRAN_ARRAY_VALUE_1 1
#define FORTRAN_ARRAY_VALUE_2 1,1
#define FORTRAN_ARRAY_VALUE_3 1,1,1
#define FORTRAN_ARRAY_VALUE_4 1,1,1,1
#define FORTRAN_ARRAY_VALUE_5 1,1,1,1,1
#define FORTRAN_ARRAY_VALUE_6 1,1,1,1,1,1
#define FORTRAN_ARRAY_VALUE_7 1,1,1,1,1,1,1
#define FORTRAN_ARRAY_SHAPE_1 lb(1):ub(1)
#define FORTRAN_ARRAY_SHAPE_2 lb(1):ub(1),lb(2):ub(2)
#define FORTRAN_ARRAY_SHAPE_3 lb(1):ub(1),lb(2):ub(2),lb(3):ub(3)
#define FORTRAN_ARRAY_SHAPE_4 lb(1):ub(1),lb(2):ub(2),lb(3):ub(3),lb(4):ub(4)
#define FORTRAN_ARRAY_SHAPE_5 lb(1):ub(1),lb(2):ub(2),lb(3):ub(3),lb(4):ub(4),lb(5):ub(5)
#define FORTRAN_ARRAY_SHAPE_6 lb(1):ub(1),lb(2):ub(2),lb(3):ub(3),lb(4):ub(4),lb(5):ub(5),lb(6):ub(6)
#define FORTRAN_ARRAY_SHAPE_7 lb(1):ub(1),lb(2):ub(2),lb(3):ub(3),lb(4):ub(4),lb(5):ub(5),lb(6):ub(6),lb(7):ub(7)
#define DECLARE_SCALAR_INTERFACE(SAVE_LOAD,ROUTINE_NAME) \
module procedure CONCAT(CONCAT(SAVE_LOAD,_scalar_),ROUTINE_NAME)
#define DECLARE_ARRAY_INTERFACE_N(SAVE_LOAD,ROUTINE_NAME,DIMS) \
module procedure CONCAT(CONCAT(SAVE_LOAD,CONCAT(_array_,DIMS)),CONCAT(_,ROUTINE_NAME))
#define DECLARE_ARRAY_INTERFACE(SAVE_LOAD,ROUTINE_NAME) \
DECLARE_SCALAR_INTERFACE(SAVE_LOAD,ROUTINE_NAME) \
__NL__ DECLARE_ARRAY_INTERFACE_N(SAVE_LOAD,ROUTINE_NAME,1) \
__NL__ DECLARE_ARRAY_INTERFACE_N(SAVE_LOAD,ROUTINE_NAME,2) \
__NL__ DECLARE_ARRAY_INTERFACE_N(SAVE_LOAD,ROUTINE_NAME,3) \
__NL__ DECLARE_ARRAY_INTERFACE_N(SAVE_LOAD,ROUTINE_NAME,4) \
__NL__ DECLARE_ARRAY_INTERFACE_N(SAVE_LOAD,ROUTINE_NAME,5) \
__NL__ DECLARE_ARRAY_INTERFACE_N(SAVE_LOAD,ROUTINE_NAME,6) \
__NL__ DECLARE_ARRAY_INTERFACE_N(SAVE_LOAD,ROUTINE_NAME,7) \
#define DECLARE_ARRAY_INTERFACE_ALLOCATABLE_N(SAVE_LOAD,ROUTINE_NAME,DIMS) \
module procedure CONCAT(CONCAT(SAVE_LOAD,CONCAT(_array_heap_,DIMS)),CONCAT(_,ROUTINE_NAME))
#define DECLARE_ARRAY_INTERFACE_ALLOCATABLE(SAVE_LOAD,ROUTINE_NAME) \
DECLARE_ARRAY_INTERFACE_ALLOCATABLE_N(SAVE_LOAD,ROUTINE_NAME,1) \
__NL__ DECLARE_ARRAY_INTERFACE_ALLOCATABLE_N(SAVE_LOAD,ROUTINE_NAME,2) \
__NL__ DECLARE_ARRAY_INTERFACE_ALLOCATABLE_N(SAVE_LOAD,ROUTINE_NAME,3) \
__NL__ DECLARE_ARRAY_INTERFACE_ALLOCATABLE_N(SAVE_LOAD,ROUTINE_NAME,4) \
__NL__ DECLARE_ARRAY_INTERFACE_ALLOCATABLE_N(SAVE_LOAD,ROUTINE_NAME,5) \
__NL__ DECLARE_ARRAY_INTERFACE_ALLOCATABLE_N(SAVE_LOAD,ROUTINE_NAME,6) \
__NL__ DECLARE_ARRAY_INTERFACE_ALLOCATABLE_N(SAVE_LOAD,ROUTINE_NAME,7)
#define IMPLEMENT_SCALAR_INTERFACE(ROUTINE_NAME,DATA_TYPE) \
subroutine CONCAT(write_scalar_,ROUTINE_NAME)(ctx, val, errcode) \
__NL__ use, intrinsic :: iso_c_binding, only: c_sizeof \
__NL__ implicit none \
__NL__ type(cbslf_context), intent(in) :: ctx \
__NL__ DATA_TYPE, target, intent(in) :: val \
__NL__ integer(4), intent(out), optional :: errcode \
__NL__ integer(c_int64_t) :: psize \
__NL__ integer(4) :: ret \
__NL__ psize = c_sizeof(val) \
__NL__ ret = cbsl_write(ctx%ctx, loc(val), psize) \
__NL__ SET_ERRCODE(ret) \
__NL__ end subroutine \
__NL__ subroutine CONCAT(read_scalar_,ROUTINE_NAME)(ctx, val, errcode) \
__NL__ use, intrinsic :: iso_c_binding, only: c_sizeof \
__NL__ implicit none \
__NL__ type(cbslf_context), intent(in) :: ctx \
__NL__ DATA_TYPE, target, intent(out) :: val \
__NL__ integer(4), intent(out), optional :: errcode \
__NL__ integer(c_int64_t) :: psize \
__NL__ integer(4) :: ret \
__NL__ psize = c_sizeof(val) \
__NL__ ret = cbsl_read(ctx%ctx, loc(val), psize) \
__NL__ SET_ERRCODE(ret) \
__NL__ end subroutine \
__NL__ subroutine CONCAT(record_scalar_,ROUTINE_NAME)(ctx, val, errcode) \
__NL__ use, intrinsic :: iso_c_binding, only: c_sizeof \
__NL__ implicit none \
__NL__ type(cbslf_context), intent(in) :: ctx \
__NL__ DATA_TYPE, target, intent(inout) :: val \
__NL__ integer(4), intent(out), optional :: errcode \
__NL__ integer(c_int64_t) :: psize \
__NL__ integer(4) :: ret \
__NL__ psize = c_sizeof(val) \
__NL__ ret = cbsl_record(ctx%ctx, loc(val), psize) \
__NL__ SET_ERRCODE(ret) \
__NL__ end subroutine
#define IMPLEMENT_ARRAY_INTERFACE_N(ROUTINE_NAME,DATA_TYPE,DIMS) \
__NL__ subroutine CONCAT(CONCAT(write_array_,CONCAT(DIMS,_)),ROUTINE_NAME)(ctx, val, errcode) \
__NL__ use, intrinsic :: iso_c_binding, only: c_sizeof, c_int64_t \
__NL__ implicit none \
__NL__ type(cbslf_context), intent(in) :: ctx \
__NL__ DATA_TYPE, intent(in) :: val(CONCAT(FORTRAN_ARRAY_,DIMS)) \
__NL__ integer(4), intent(out), optional :: errcode \
__NL__ integer(4) :: lb(DIMS),ub(DIMS),nsize,i \
__NL__ integer(c_int64_t) :: bsize \
__NL__ integer(4) :: ret \
__NL__ nsize = size(val, kind=kind(nsize)) \
__NL__ do i=1,size(lb) \
__NL__ lb(i) = lbound(val, i, kind(lb(i))) \
__NL__ ub(i) = ubound(val, i, kind(ub(i))) \
__NL__ end do \
__NL__ call cbslf_write(ctx, nsize, ret) \
__NL__ bsize = size(lb) * c_sizeof(val(CONCAT(FORTRAN_ARRAY_VALUE_,DIMS))) \
__NL__ ret = iand(cbsl_write(ctx%ctx, loc(lb), bsize), ret) \
__NL__ ret = iand(cbsl_write(ctx%ctx, loc(ub), bsize), ret) \
__NL__ bsize = nsize * c_sizeof(val(CONCAT(FORTRAN_ARRAY_VALUE_,DIMS))) \
__NL__ ret = iand(cbsl_write(ctx%ctx, loc(val), bsize), ret) \
__NL__ SET_ERRCODE(ret) \
__NL__ end subroutine \
__NL__ subroutine CONCAT(CONCAT(read_array_,CONCAT(DIMS,_)),ROUTINE_NAME)(ctx, val, errcode) \
__NL__ use, intrinsic :: iso_c_binding, only: c_sizeof, c_int64_t \
__NL__ implicit none \
__NL__ type(cbslf_context), intent(in) :: ctx \
__NL__ DATA_TYPE, intent(out) :: val(CONCAT(FORTRAN_ARRAY_,DIMS)) \
__NL__ integer(4), intent(out), optional :: errcode \
__NL__ integer(4) :: lb(DIMS),ub(DIMS),nsize \
__NL__ integer(c_int64_t) :: bsize \
__NL__ integer(4) :: ret \
__NL__ ret = cbslf_error \
__NL__ if (size(val) > 0) then \
__NL__ call cbslf_read(ctx, nsize, ret) \
__NL__ if (ret == cbslf_success .and. nsize > 0) then \
__NL__ bsize = size(lb) * c_sizeof(val(CONCAT(FORTRAN_ARRAY_VALUE_,DIMS))) \
__NL__ ret = iand(cbsl_read(ctx%ctx, loc(lb), bsize), ret) \
__NL__ ret = iand(cbsl_read(ctx%ctx, loc(ub), bsize), ret) \
__NL__ bsize = nsize * c_sizeof(val(CONCAT(FORTRAN_ARRAY_VALUE_,DIMS))) \
__NL__ ret = iand(cbsl_read(ctx%ctx, loc(val), bsize), ret) \
__NL__ else \
__NL__ ret = cbslf_error \
__NL__ end if \
__NL__ end if \
__NL__ SET_ERRCODE(ret) \
__NL__ end subroutine \
__NL__ subroutine CONCAT(CONCAT(record_array_,CONCAT(DIMS,_)),ROUTINE_NAME)(ctx, val, errcode) \
__NL__ use, intrinsic :: iso_c_binding, only: c_sizeof, c_int64_t \
__NL__ implicit none \
__NL__ type(cbslf_context), intent(in) :: ctx \
__NL__ DATA_TYPE, allocatable, intent(inout) :: val(CONCAT(FORTRAN_ARRAY_,DIMS)) \
__NL__ integer(4), intent(out), optional :: errcode \
__NL__ integer(4) :: mode,ret \
__NL__ mode = cbslf_get_mode(ctx) \
__NL__ select case(mode) \
__NL__ case(cbslf_store_mode) \
__NL__ call cbslf_write(ctx, val, ret) \
__NL__ case(cbslf_load_mode) \
__NL__ call cbslf_read(ctx, val, ret) \
__NL__ case default \
__NL__ ret = cbslf_error \
__NL__ end select \
__NL__ SET_ERRCODE(ret) \
__NL__ end subroutine \
__NL__ subroutine CONCAT(CONCAT(record_array_heap_,CONCAT(DIMS,_)),ROUTINE_NAME)(ctx, val, errcode) \
__NL__ use, intrinsic :: iso_c_binding, only: c_sizeof, c_int64_t \
__NL__ implicit none \
__NL__ type(cbslf_context), intent(in) :: ctx \
__NL__ DATA_TYPE, allocatable, intent(inout) :: val(CONCAT(FORTRAN_ARRAY_,DIMS)) \
__NL__ integer(4), intent(out), optional :: errcode \
__NL__ integer(4) :: lb(DIMS),ub(DIMS),nsize,mode \
__NL__ integer(4) :: ret \
__NL__ integer(c_int64_t) :: bsize \
__NL__ mode = cbslf_get_mode(ctx) \
__NL__ select case(mode) \
__NL__ case(cbslf_store_mode) \
__NL__ call cbslf_write(ctx, val, ret) \
__NL__ case(cbslf_load_mode) \
__NL__ if (allocated(val)) then \
__NL__ call cbslf_read(ctx, val, ret) \
__NL__ else \
__NL__ call cbslf_read(ctx, nsize, ret) \
__NL__ if (ret == cbslf_success .and. nsize > 0) then \
__NL__ bsize = size(lb) * c_sizeof(val(CONCAT(FORTRAN_ARRAY_VALUE_,DIMS))) \
__NL__ ret = iand(cbsl_read(ctx%ctx, loc(lb), bsize), ret) \
__NL__ ret = iand(cbsl_read(ctx%ctx, loc(ub), bsize), ret) \
__NL__ allocate(val(CONCAT(FORTRAN_ARRAY_SHAPE_,DIMS))) \
__NL__ bsize = nsize * c_sizeof(val(CONCAT(FORTRAN_ARRAY_VALUE_,DIMS))) \
__NL__ ret = iand(cbsl_read(ctx%ctx, loc(val), bsize), ret) \
__NL__ else \
__NL__ ret = cbslf_error \
__NL__ end if \
__NL__ end if \
__NL__ case default \
__NL__ ret = cbslf_error \
__NL__ end select \
__NL__ SET_ERRCODE(ret) \
__NL__ end subroutine
#define IMPLEMENT_ARRAY_INTERFACE(ROUTINE_NAME,DATA_TYPE) \
IMPLEMENT_SCALAR_INTERFACE(ROUTINE_NAME,DATA_TYPE) \
IMPLEMENT_ARRAY_INTERFACE_N(ROUTINE_NAME,DATA_TYPE,1) \
IMPLEMENT_ARRAY_INTERFACE_N(ROUTINE_NAME,DATA_TYPE,2) \
IMPLEMENT_ARRAY_INTERFACE_N(ROUTINE_NAME,DATA_TYPE,3) \
IMPLEMENT_ARRAY_INTERFACE_N(ROUTINE_NAME,DATA_TYPE,4) \
IMPLEMENT_ARRAY_INTERFACE_N(ROUTINE_NAME,DATA_TYPE,5) \
IMPLEMENT_ARRAY_INTERFACE_N(ROUTINE_NAME,DATA_TYPE,6) \
IMPLEMENT_ARRAY_INTERFACE_N(ROUTINE_NAME,DATA_TYPE,7)
!======
private
integer(8), parameter :: CBSL_VERSION = @cbsl_VERSION_MAJOR@@cbsl_VERSION_MINOR@@cbsl_VERSION_PATCH@
character(*), parameter :: CBSL_VERSTION_STRING = '@cbsl_VERSION@'
integer(4), parameter :: CBSL_MAJOR_VERSION = @cbsl_VERSION_MAJOR@
integer(4), parameter :: CBSL_MINOR_VERSION = @cbsl_VERSION_MINOR@
integer(4), parameter :: CBSL_PATCH_VERSION = @cbsl_VERSION_PATCH@
integer(4), parameter :: cbslf_load_mode = 1
integer(4), parameter :: cbslf_store_mode = 2
integer(4), parameter :: cbslf_unknown_mode = -1
integer(4), parameter :: cbslf_success = 0
integer(4), parameter :: cbslf_error = -1
! C function interfaces
interface
function cbsl_open(open_mode, path) result(ctx) &
bind(C,name='cbsl_open')
use, intrinsic :: iso_c_binding
integer(c_int),value, intent(in) :: open_mode
character(kind=c_char),intent(in) :: path(*)
integer(c_intptr_t) :: ctx
end function
function cbsl_close(ctx) result(retcode) &
bind(C,name='cbsl_close')
use, intrinsic :: iso_c_binding
integer(c_intptr_t),value,intent(in) :: ctx
integer(c_int) :: retcode
end function
function cbsl_flush(ctx) result(retcode) &
bind(C,name='cbsl_flush')
use, intrinsic :: iso_c_binding
integer(c_intptr_t),value,intent(in) :: ctx
integer(c_int) :: retcode
end function
function cbsl_get_mode(ctx) result(retcode) &
bind(C,name='cbsl_get_mode')
use, intrinsic :: iso_c_binding
integer(c_intptr_t),value,intent(in) :: ctx
integer(c_int) :: retcode
end function
function cbsl_write(ctx, pdata, psize) result(retcode) &
bind(C,name='cbsl_write')
use, intrinsic :: iso_c_binding
integer(c_intptr_t),value,intent(in) :: ctx
integer(c_intptr_t),value,intent(in) :: pdata
integer(c_int64_t),value,intent(in) :: psize
integer(c_int) :: retcode
end function
function cbsl_read(ctx, pdata, psize) result(retcode) &
bind(C,name='cbsl_read')
use, intrinsic :: iso_c_binding
integer(c_intptr_t),value,intent(in) :: ctx
integer(c_intptr_t),value,intent(in) :: pdata
integer(c_int64_t),value,intent(in) :: psize
integer(c_int) :: retcode
end function
function cbsl_record(ctx, pdata, psize) result(retcode) &
bind(C,name='cbsl_record')
use, intrinsic :: iso_c_binding
integer(c_intptr_t),value,intent(in) :: ctx
integer(c_intptr_t),value,intent(in) :: pdata
integer(c_int64_t),value,intent(in) :: psize
integer(c_int) :: retcode
end function
! cbsl_record_heap() not binds because the function uses malloc()
function cbsl_set_compression_level(ctx,clevel) result(retcode) &
bind(C,name='cbsl_set_compression_level')
use, intrinsic :: iso_c_binding
integer(c_intptr_t),value,intent(in) :: ctx
integer(c_int),value,intent(in) :: clevel
integer(c_int) :: retcode
end function
function cbsl_get_compression_level(ctx) result(clevel) &
bind(C,name='cbsl_get_compression_level')
use, intrinsic :: iso_c_binding
integer(c_intptr_t),value,intent(in) :: ctx
integer(c_int) :: clevel
end function
end interface
! Generic routines
interface cbslf_write
DECLARE_SCALAR_INTERFACE(write,character)
DECLARE_SCALAR_INTERFACE(write,logical)
DECLARE_ARRAY_INTERFACE(write,integer4)
DECLARE_ARRAY_INTERFACE(write,integer8)
DECLARE_ARRAY_INTERFACE(write,sreal)
DECLARE_ARRAY_INTERFACE(write,dreal)
DECLARE_ARRAY_INTERFACE(write,ccomplex)
DECLARE_ARRAY_INTERFACE(write,zcomplex)
end interface
interface cbslf_read
DECLARE_SCALAR_INTERFACE(read,character)
DECLARE_SCALAR_INTERFACE(read,logical)
DECLARE_ARRAY_INTERFACE(read,integer4)
DECLARE_ARRAY_INTERFACE(read,integer8)
DECLARE_ARRAY_INTERFACE(read,sreal)
DECLARE_ARRAY_INTERFACE(read,dreal)
DECLARE_ARRAY_INTERFACE(read,ccomplex)
DECLARE_ARRAY_INTERFACE(read,zcomplex)
end interface
interface cbslf_record
DECLARE_SCALAR_INTERFACE(record,character)
DECLARE_SCALAR_INTERFACE(record,logical)
DECLARE_ARRAY_INTERFACE(record,integer4)
DECLARE_ARRAY_INTERFACE(record,integer8)
DECLARE_ARRAY_INTERFACE(record,sreal)
DECLARE_ARRAY_INTERFACE(record,dreal)
DECLARE_ARRAY_INTERFACE(record,ccomplex)
DECLARE_ARRAY_INTERFACE(record,zcomplex)
end interface
interface cbslf_record_heap
DECLARE_ARRAY_INTERFACE_ALLOCATABLE(record,integer4)
DECLARE_ARRAY_INTERFACE_ALLOCATABLE(record,integer8)
DECLARE_ARRAY_INTERFACE_ALLOCATABLE(record,sreal)
DECLARE_ARRAY_INTERFACE_ALLOCATABLE(record,dreal)
DECLARE_ARRAY_INTERFACE_ALLOCATABLE(record,ccomplex)
DECLARE_ARRAY_INTERFACE_ALLOCATABLE(record,zcomplex)
end interface
contains
IMPLEMENT_SCALAR_INTERFACE(character,character(*,kind=c_char))
IMPLEMENT_SCALAR_INTERFACE(logical,logical(c_bool))
IMPLEMENT_ARRAY_INTERFACE(integer4,integer(c_int32_t))
IMPLEMENT_ARRAY_INTERFACE(integer8,integer(c_int64_t))
IMPLEMENT_ARRAY_INTERFACE(sreal,real(c_float))
IMPLEMENT_ARRAY_INTERFACE(dreal,real(c_double))
IMPLEMENT_ARRAY_INTERFACE(ccomplex,complex(c_float_complex))
IMPLEMENT_ARRAY_INTERFACE(zcomplex,complex(c_double_complex))
function cbslf_open(open_mode, path, errcode) result(ctx)
implicit none
integer(4), intent(in) :: open_mode
character(*), intent(in) :: path
integer(4), intent(out), optional :: errcode
type(cbslf_context) :: ctx
character(1,c_char) :: cpath(len_trim(path)+1)
integer(4) :: n,i
n = len_trim(path, kind=4)
do i=1,n
cpath(i) = path(i:i)
end do
cpath(n+1) = c_null_char
ctx%ctx = cbsl_open(open_mode, cpath)
SET_ERRCODE_COND(ctx%ctx /= 0) ! is not null pointer
end function
subroutine cbslf_close(ctx, errcode)
implicit none
type(cbslf_context), intent(in) :: ctx
integer(4), intent(out), optional :: errcode
integer(4) :: ret
ret = cbsl_close(ctx%ctx)
SET_ERRCODE(ret)
end subroutine
subroutine cbslf_flush(ctx, errcode)
implicit none
type(cbslf_context), intent(in) :: ctx
integer(4), intent(out), optional :: errcode
integer(4) :: ret
ret = cbsl_flush(ctx%ctx)
SET_ERRCODE(ret)
end subroutine
function cbslf_get_mode(ctx, errcode) result(open_mode)
implicit none
type(cbslf_context), intent(in) :: ctx
integer(4), intent(out), optional :: errcode
integer(4) :: open_mode
open_mode = cbsl_get_mode(ctx%ctx)
SET_ERRCODE_COND(open_mode == cbslf_unknown_mode)
end function
subroutine cbslf_set_compression_level(ctx, clevel, errcode)
implicit none
type(cbslf_context), intent(in) :: ctx
integer(4), intent(in) :: clevel
integer(4), intent(out), optional :: errcode
integer(4) :: ret
ret = cbsl_set_compression_level(ctx%ctx, clevel)
SET_ERRCODE(ret)
end subroutine
function cbslf_get_compression_level(ctx, errcode) result(clevel)
implicit none
type(cbslf_context), intent(in) :: ctx
integer(4), intent(out), optional :: errcode
integer(4) :: clevel
clevel = cbsl_get_compression_level(ctx%ctx)
SET_ERRCODE_COND(clevel > 0)
end function
end module

View File

@ -0,0 +1,31 @@
#
# Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
cmake_minimum_required(VERSION 3.3)
if (${CMAKE_ARGC} LESS 4)
message(FATAL_ERROR "Usage ${CMAKE_ARGV2} <input file> <output file>")
endif ()
set(INPUT_FILE ${CMAKE_ARGV3})
if (CMAKE_ARGV4)
set(OUTPUT_FILE ${CMAKE_ARGV4})
else ()
set(OUTPUT_FILE ${CMAKE_ARGV3})
endif ()
file(READ ${INPUT_FILE} FBIND_SOURCE_TEXT)
string(REGEX REPLACE "__NL__" "\n" FBIND_GEN_TEXT ${FBIND_SOURCE_TEXT})
file(WRITE ${OUTPUT_FILE} ${FBIND_GEN_TEXT})

208
cachelab/cbsl/src/read.c Normal file
View File

@ -0,0 +1,208 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <zstd.h>
#include "./cbsl_internal.h"
static
cbsl_errors streaming_read_immediate(cbsl_ctx* ctx, void* data, uint64_t size)
{
CBSL_DEBUG_MESSAGE("%s: call\n", __func__);
/* previous all decompressed data loaded */
CBSL_CHECK_COND_AND_RETURN(ctx->out_buffer_pos >= ctx->out_buffer_used, cbsl_error);
ZSTD_outBuffer output = { data, size, 0 }; /* decompressed data stores directly */
while (output.pos < output.size)
{
/* previous read data from file */
if (ctx->in_buffer_pos < ctx->in_buffer_used)
{
/* input buffer has remained data from the previous decompression */
memmove(ctx->in_buffer, ctx->in_buffer + ctx->in_buffer_pos, ctx->in_buffer_used - ctx->in_buffer_pos);
ctx->in_buffer_used -= ctx->in_buffer_pos;
}
else
{
ctx->in_buffer_used = 0ULL;
}
ctx->in_buffer_pos = 0ULL;
ctx->in_buffer_used += fread(ctx->in_buffer + ctx->in_buffer_used, 1, ctx->in_buffer_size - ctx->in_buffer_used, ctx->fp);
ZSTD_inBuffer input = { ctx->in_buffer, ctx->in_buffer_used, ctx->in_buffer_pos };
while (input.pos < input.size && output.pos < output.size)
{
const size_t ret = ZSTD_decompressStream(ctx->zstd_dctx, &output, &input);
CBSL_CHECK_COND_MSG_AND_RETURN(!ZSTD_isError(ret), ZSTD_getErrorName(ret), cbsl_error);
}
ctx->in_buffer_used = input.size;
ctx->in_buffer_pos = input.pos;
}
CBSL_ASSERT(output.pos == output.size);
ctx->out_buffer_pos = output.pos;
ctx->out_buffer_used = output.size;
return cbsl_success;
}
static
uint64_t read_from_buffer(cbsl_ctx* ctx, byte_t* data, uint64_t size)
{
uint64_t read = 0;
if (ctx->out_buffer_pos < ctx->out_buffer_used)
{
const uint64_t read_size = MIN(ctx->out_buffer_used - ctx->out_buffer_pos, size);
memcpy(data, ctx->out_buffer + ctx->out_buffer_pos, read_size);
ctx->out_buffer_pos += read_size;
read = read_size;
}
return read;
}
static
cbsl_errors streaming_read_buffered(cbsl_ctx* ctx, void* data, uint64_t size)
{
CBSL_DEBUG_MESSAGE("%s: call\n", __func__);
size_t zstd_ret = -1;
/* memcpy from the previous decompression data */
uint64_t read = read_from_buffer(ctx, data, size);
byte_t* pdata = (byte_t*)(data);
/* streaming decompression and memcpy */
while (read < size)
{
if (ctx->in_buffer_pos < ctx->in_buffer_used)
{
/* input buffer has remained data from the previous decompression */
memmove(ctx->in_buffer, ctx->in_buffer + ctx->in_buffer_pos, ctx->in_buffer_used - ctx->in_buffer_pos);
ctx->in_buffer_used -= ctx->in_buffer_pos;
}
else
{
/* input buffer is already consumed by the previous decompression */
ctx->in_buffer_used = 0;
}
ctx->in_buffer_pos = 0ULL;
ctx->in_buffer_used += fread(ctx->in_buffer + ctx->in_buffer_used, 1, ctx->in_buffer_size - ctx->in_buffer_used, ctx->fp);
if (ctx->in_buffer_used == 0) // <20><><EFBFBD>ļ<EFBFBD><C4BC>У<EFBFBD><D0A3><EFBFBD>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ˣ<EFBFBD><CBA3>޷<EFBFBD><DEB7><EFBFBD>ѹ
break;
ZSTD_inBuffer input = { ctx->in_buffer, ctx->in_buffer_used, ctx->in_buffer_pos };
while (input.pos < input.size && read < size)
{
ZSTD_outBuffer output = { ctx->out_buffer, ctx->out_buffer_size, 0 };
zstd_ret = ZSTD_decompressStream(ctx->zstd_dctx, &output, &input);
CBSL_CHECK_COND_MSG_AND_RETURN(!ZSTD_isError(zstd_ret), ZSTD_getErrorName(zstd_ret), cbsl_error);
ctx->out_buffer_pos = 0ULL;
ctx->out_buffer_used = output.pos;
read += read_from_buffer(ctx, pdata + read, size - read);
}
ctx->in_buffer_pos = input.pos;
ctx->in_buffer_used = input.size;
if (zstd_ret == 0)
{
// zstd stream end
ctx->zstd_file_end = 1;
break;
}
}
if (ctx->zstd_file_end == 1 && ctx->out_buffer_pos == ctx->out_buffer_used)
ctx->zstd_buf_end = 1;
CBSL_ASSERT(read == size || ctx->zstd_end == 1);
ctx->line_buffer_read_from_zst = read;
return cbsl_success;
}
cbsl_errors cbsl_read(cbsl_ctx* ctx, void* data, uint64_t size)
{
CBSL_CHECK_COND_AND_RETURN(ctx != NULL, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(ctx->mode == cbsl_load_mode, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(data != NULL, cbsl_error);
CBSL_DEBUG_MESSAGE("read: %lu bytes\n", size);
if (size > ctx->out_buffer_size)
{
/* data size is larger than compression buffer */
return streaming_read_immediate(ctx, data, size);
}
else
{
return streaming_read_buffered(ctx, data, size);
}
}
// readline from stream
cbsl_errors cbsl_readline(cbsl_ctx* ctx, char* data, uint64_t size)
{
cbsl_errors ret = cbsl_success;
CBSL_CHECK_COND_AND_RETURN(ctx != NULL, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(ctx->mode == cbsl_load_mode, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(data != NULL, cbsl_error);
CBSL_DEBUG_MESSAGE("%s: %lu bytes\n", __func__, size);
uint64_t line_buffer_space = 0;
line_buffer_space = CBSL_LINEBUF_SIZE - ctx->line_buffer_used;
if (ctx->zstd_buf_end == 0)
{
if (line_buffer_space > ctx->out_buffer_size)
{
/* line_buffer size is larger than compression buffer */
ret = streaming_read_immediate(ctx, ctx->line_buffer + ctx->line_buffer_used, line_buffer_space);
}
else
{
ret = streaming_read_buffered(ctx, ctx->line_buffer + ctx->line_buffer_used, line_buffer_space);
}
ctx->line_buffer_used += ctx->line_buffer_read_from_zst;
}
uint64_t i;
char* ptr;
ptr = (char*)ctx->line_buffer;
data[0] = '\0';
for (i = 0; i < ctx->line_buffer_used; i++)
{
if (*ptr == '\n' || *ptr == '\r')
{
CBSL_CHECK_COND_AND_RETURN(size >= i - 1, cbsl_error);
memcpy(data, ctx->line_buffer, i + 1);
memmove(ctx->line_buffer, ctx->line_buffer + i + 1, ctx->line_buffer_used - i - 1);
ctx->line_buffer_used -= i + 1;
data[i + 1] = '\0';
break;
}
ptr++;
}
if (ctx->line_buffer_used == 0)
return cbsl_end;
else
return ret;
}

View File

@ -0,0 +1,70 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdint.h>
#include <stdlib.h>
#include "./cbsl_internal.h"
cbsl_errors cbsl_record(cbsl_ctx* ctx, void* data, uint64_t size)
{
CBSL_CHECK_COND_AND_RETURN(ctx != NULL, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(data != NULL, cbsl_error);
switch (ctx->mode)
{
case cbsl_load_mode: return cbsl_read(ctx, data, size);
case cbsl_store_mode: return cbsl_write(ctx, data, size);
default: return cbsl_error;
}
}
cbsl_errors cbsl_record_heap(cbsl_ctx* ctx, void** data, uint64_t* size)
{
CBSL_CHECK_COND_AND_RETURN(ctx != NULL, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(data != NULL, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(size != NULL, cbsl_error);
switch (ctx->mode)
{
case cbsl_load_mode:
{
uint64_t rsize;
void* rdata = *data;
CBSL_CHECK_COND_AND_RETURN(cbsl_read(ctx, &rsize, sizeof(rsize)) == cbsl_success, cbsl_error);
if (rdata == NULL)
{
CBSL_CHECK_COND_AND_RETURN((rdata = malloc(rsize)) != NULL, cbsl_error);
}
else
{
CBSL_CHECK_COND_AND_RETURN(rsize == *size, cbsl_error);
}
CBSL_CHECK_COND_AND_RETURN(cbsl_read(ctx, rdata, rsize) == cbsl_success, cbsl_error);
*size = rsize;
*data = rdata;
}
break;
case cbsl_store_mode:
{
CBSL_CHECK_COND_AND_RETURN(cbsl_write(ctx, size, sizeof(*size)) == cbsl_success, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(cbsl_write(ctx, *data, *size) == cbsl_success, cbsl_error);
}
break;
default:
return cbsl_error;
}
return cbsl_success;
}

49
cachelab/cbsl/src/utils.c Normal file
View File

@ -0,0 +1,49 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "./cbsl_internal.h"
/* FIXME: implicit declaration??? */
extern ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
cbsl_mode cbsl_get_mode(cbsl_ctx* ctx)
{
CBSL_CHECK_COND_AND_RETURN(ctx != NULL, cbsl_unknown_mode);
return ctx->mode;
}
cbsl_errors cbsl_set_compression_level(cbsl_ctx* ctx, int clevel)
{
CBSL_CHECK_COND_AND_RETURN(ctx != NULL, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(ctx->mode == cbsl_store_mode, cbsl_error);
const size_t ret = ZSTD_CCtx_setParameter(ctx->zstd_cctx, ZSTD_c_compressionLevel, clevel);
CBSL_CHECK_COND_MSG_AND_RETURN(!ZSTD_isError(ret), ZSTD_getErrorName(ret), cbsl_error);
return cbsl_success;
}
int cbsl_get_compression_level(cbsl_ctx* ctx)
{
CBSL_CHECK_COND_AND_RETURN(ctx != NULL, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(ctx->mode == cbsl_store_mode, cbsl_error);
int clevel;
const size_t ret = ZSTD_CCtx_getParameter(ctx->zstd_cctx, ZSTD_c_compressionLevel, &clevel);
CBSL_CHECK_COND_MSG_AND_RETURN(!ZSTD_isError(ret), ZSTD_getErrorName(ret), cbsl_error);
return clevel;
}

101
cachelab/cbsl/src/write.c Normal file
View File

@ -0,0 +1,101 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include "./cbsl_internal.h"
static
cbsl_errors streaming_write_flush(cbsl_ctx* ctx, uint64_t size)
{
if (ctx->in_buffer_used > 0 && ctx->in_buffer_used + size > ctx->in_buffer_size)
{
CBSL_CHECK_COND_AND_RETURN(cbsl_flush(ctx) == cbsl_success, cbsl_error);
}
return cbsl_success;
}
static
cbsl_errors streaming_write_immediate(cbsl_ctx* ctx, const void* data, uint64_t size)
{
CBSL_DEBUG_MESSAGE("%s: call\n", __func__);
CBSL_CHECK_COND_AND_RETURN(streaming_write_flush(ctx, size) == cbsl_success, cbsl_error);
uint64_t written = 0;
const byte_t* pdata = (const byte_t*)(data);
while (written < size)
{
/* compressing data copies to internal input buffer */
const uint64_t write_size = MIN(size - written, ctx->in_buffer_size);
memcpy(ctx->in_buffer, pdata + written, write_size);
written += write_size;
ZSTD_inBuffer input = { ctx->in_buffer, write_size, 0 };
ZSTD_EndDirective mode = (size - written < ctx->in_buffer_size) ? ZSTD_e_end : ZSTD_e_continue;
do
{
ZSTD_outBuffer output = { ctx->out_buffer, ctx->out_buffer_size, 0 };
const size_t remaining = ZSTD_compressStream2(ctx->zstd_cctx, &output, &input, mode);
CBSL_CHECK_COND_MSG_AND_RETURN(!ZSTD_isError(remaining), ZSTD_getErrorName(remaining), cbsl_error);
CBSL_CHECK_COND_AND_RETURN(fwrite(ctx->out_buffer, 1, output.pos, ctx->fp) == output.pos, cbsl_error);
} while (input.pos < input.size);
CBSL_ASSERT(input.pos == input.size);
}
CBSL_ASSERT(written == size);
return cbsl_success;
}
static
cbsl_errors streaming_write_buffered(cbsl_ctx* ctx, const void* data, uint64_t size)
{
CBSL_DEBUG_MESSAGE("%s: call\n", __func__);
CBSL_CHECK_COND_AND_RETURN(streaming_write_flush(ctx, size) == cbsl_success, cbsl_error);
uint64_t written = 0;
const byte_t* pdata = (const byte_t*)(data);
do
{
const uint64_t write_size = MIN(size - written, ctx->in_buffer_size);
memcpy(ctx->in_buffer, pdata + written, write_size);
ctx->in_buffer_used = write_size;
CBSL_CHECK_COND_AND_RETURN(cbsl_flush(ctx) == cbsl_success, cbsl_error);
written += write_size;
} while (written < size);
CBSL_ASSERT(written == size);
return cbsl_success;
}
cbsl_errors cbsl_write(cbsl_ctx* ctx, const void* data, uint64_t size)
{
CBSL_CHECK_COND_AND_RETURN(ctx != NULL, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(ctx->mode == cbsl_store_mode, cbsl_error);
CBSL_CHECK_COND_AND_RETURN(data != NULL, cbsl_error);
CBSL_DEBUG_MESSAGE("write: %lu bytes\n", size);
if (size > ctx->in_buffer_size)
{
/* data size is larger than compression buffer */
return streaming_write_immediate(ctx, data, size);
}
else
{
return streaming_write_buffered(ctx, data, size);
}
}

View File

@ -0,0 +1,106 @@
#
# Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
link_libraries(${CBSL_LIB} ${ZSTD_LIB})
add_executable(scalar_data scalar_data.c)
add_executable(single_array single_array.c)
add_executable(multi_array multi_array.c)
add_executable(utility utility.c)
add_executable(split_compression split_compression.c)
add_executable(variable_size_array variable_size_array.c)
add_executable(various_size_array various_size_array.c)
add_executable(cbsl_record cbsl_record.c)
add_executable(cbsl_record_array cbsl_record_array.c)
add_executable(fortran_bindings fortran_bindings.f90)
target_link_libraries(fortran_bindings ${CBSL_FLIB} ${CBSL_LIB} ${ZSTD_LIB})
add_test(NAME utility COMMAND utility)
#
# very short data compression/decompression
#
add_test(NAME scalar_data_compression COMMAND scalar_data -c)
add_test(NAME scalar_data_decompression COMMAND scalar_data -d)
set_tests_properties(scalar_data_decompression PROPERTIES DEPENDS scalar_data_compression)
#
# boundary checking with single array
#
function(add_simple_test_size_boundary DATA_SIZE)
math(EXPR DATA_SIZE_B "${DATA_SIZE} - 1")
add_test(NAME single_array_${DATA_SIZE_B} COMMAND single_array ${DATA_SIZE_B})
add_test(NAME single_array_${DATA_SIZE} COMMAND single_array ${DATA_SIZE})
math(EXPR DATA_SIZE_B "${DATA_SIZE} + 1")
add_test(NAME single_array_${DATA_SIZE_B} COMMAND single_array ${DATA_SIZE_B})
endfunction()
foreach(count RANGE 10 20)
math(EXPR DATA_SIZE "1 << ${count}")
add_simple_test_size_boundary(${DATA_SIZE})
endforeach()
#
# boundary checking with multiple arrays
#
function(add_multiple_test_size NUM_VARS)
foreach(count RANGE 10 20)
math(EXPR DATA_SIZE "1 << ${count}")
add_test(NAME multi_array_${DATA_SIZE}_by_${NUM_VARS} COMMAND multi_array ${NUM_VARS} ${DATA_SIZE})
endforeach()
endfunction()
add_multiple_test_size(2)
add_multiple_test_size(3)
add_multiple_test_size(11)
#
# test compression/decompression form
#
add_test(NAME variable_size_array COMMAND variable_size_array)
math(EXPR DATA_SIZE "1 << 17")
add_test(NAME buffered_read_write COMMAND variable_size_array ${DATA_SIZE})
math(EXPR DATA_SIZE "(1 << 17) + 1")
add_test(NAME immediate_read_write COMMAND variable_size_array ${DATA_SIZE})
add_test(NAME various_size_array COMMAND various_size_array)
add_test(NAME cbsl_record COMMAND cbsl_record)
add_test(NAME cbsl_record_array COMMAND cbsl_record_array)
#
# large array compression/decompression by splitting small block
#
math(EXPR BLOCK_SIZE "1 << 18") # 256 KiB
math(EXPR TOTAL_SIZE "1 << 28") # 256 MiB
add_test(NAME split_compression_256KiB_256MiB COMMAND split_compression ${BLOCK_SIZE} ${TOTAL_SIZE})
math(EXPR BLOCK_SIZE "(1 << 18) - (1 << 16)") # 192 KiB
math(EXPR TOTAL_SIZE "1 << 28") # 256 MiB
add_test(NAME split_compression_192KiB_256MiB COMMAND split_compression ${BLOCK_SIZE} ${TOTAL_SIZE})
#
# Fortran bindings API
#
add_test(NAME fortran_bindings COMMAND fortran_bindings)

View File

@ -0,0 +1,64 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cbsl.h>
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
#define CHECK_BINARY(X,Y) {if (memcmp(&(X),&(Y),sizeof((X))) != 0) { fprintf(stderr, "error: binary check %s == %s\n", (#X), (#Y)); exit(1); }}
static const int data0 = 43;
static const double data1 = 3.14159265;
static const int data2 = 14142;
extern void record(cbsl_mode, int*, double*, int*);
char cname[128];
int main(int argc, char** argv)
{
sprintf(cname, "check_record_scalar.zst");
int c0 = data0;
double c1 = data1;
int c2 = data2;
record(cbsl_store_mode, &c0, &c1, &c2);
c0 = c2 = 0;
c1 = 0;
record(cbsl_load_mode, &c0, &c1, &c2);
CHECK_BINARY(c0, data0);
CHECK_BINARY(c1, data1);
CHECK_BINARY(c2, data2);
return 0;
}
void record(cbsl_mode mode, int* c0, double* c1, int* c2)
{
cbsl_ctx* ctx = cbsl_open(mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_record(ctx, c0, sizeof(int)));
CBSL_ERROR_CHECK(cbsl_record(ctx, c1, sizeof(double)));
CBSL_ERROR_CHECK(cbsl_record(ctx, c2, sizeof(int)));
CBSL_ERROR_CHECK(cbsl_close(ctx));
}

View File

@ -0,0 +1,96 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cbsl.h>
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
#define CHECK(X) {if (!(X)) { fprintf(stderr, "error: check %s\n", (#X)); exit(1); }}
#define CHECK_BINARY(X,Y) {if (memcmp(&(X),&(Y),sizeof((X))) != 0) { fprintf(stderr, "error: binary check %s == %s\n", (#X), (#Y)); exit(1); }}
typedef unsigned char byte_t;
extern void rand_byte_t(uint64_t data_size, byte_t* a);
extern void record(cbsl_mode, uint64_t* size, void** data);
char cname[128];
int main(int argc, char** argv)
{
sprintf(cname, "check_record_scalar.zst");
uint64_t size = 1024 * 1024 * sizeof(byte_t);
byte_t* data = (byte_t*)(malloc(size));
rand_byte_t(size, data);
record(cbsl_store_mode, &size, (void**) &data);
uint64_t rsize = 0;
byte_t* rdata = NULL;
record(cbsl_load_mode, &rsize, (void**) &rdata);
CHECK(size == rsize);
CHECK(rdata != NULL);
for (uint64_t i = 0; i < (size/sizeof(byte_t)); ++i)
{
if (data[i] != rdata[i])
{
fprintf(stderr, "1: mismatch!\n");
exit(1);
}
else
{
rdata[i] = 0;
}
}
CHECK(size == rsize);
CHECK(rdata != NULL);
record(cbsl_load_mode, &rsize, (void**) &rdata);
for (uint64_t i = 0; i < (size/sizeof(byte_t)); ++i)
{
if (data[i] != rdata[i])
{
fprintf(stderr, "2: mismatch!\n");
exit(1);
}
}
free(data);
free(rdata);
return 0;
}
void record(cbsl_mode mode, uint64_t* size, void** data)
{
cbsl_ctx* ctx = cbsl_open(mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_record_heap(ctx, data, size));
CBSL_ERROR_CHECK(cbsl_close(ctx));
}
void rand_byte_t(uint64_t data_size, byte_t* a)
{
for(uint64_t i = 0; i < data_size; ++i)
a[i] = rand() % 255;
}

View File

@ -0,0 +1,159 @@
!
! Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
!
! Licensed under the Apache License, Version 2.0 (the "License");
! you may not use this file except in compliance with the License.
! You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing, software
! distributed under the License is distributed on an "AS IS" BASIS,
! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
! See the License for the specific language governing permissions and
! limitations under the License.
!
program test_fortran_bindings
use cbslf
integer, parameter :: array_size = 100000
real :: source_stack_array(array_size), dest_stack_array(array_size)
real, allocatable :: source_heap_array(:), dest_heap_array(:), dest_heap_array2(:)
character(128) :: source_string, dest_string
character(*), parameter :: cname = 'data_fortran.zst'
allocate(source_heap_array(array_size))
allocate(dest_heap_array(array_size))
call random_number(source_stack_array)
call random_number(source_heap_array)
source_string = 'abcdefgABCDEFG%'
dest_string = ''
print *, 'serialize...'
call serialize
print *, 'deserialize...'
call deserialize
print *, 'compare...'
if (.not. compare_real_array(array_size, source_stack_array, dest_stack_array)) then
print *, 'fail: compare stack array'
stop 1
end if
if (.not. compare_real_array(array_size, source_heap_array, dest_heap_array)) then
print *, 'fail: compare heap array'
stop 1
end if
if (.not. compare_real_array(array_size, source_heap_array, dest_heap_array2)) then
print *, 'fail: compare heap array'
stop 1
end if
if (trim(source_string) /= trim(dest_string)) then
print *, 'fail: compare string'
stop 1
end if
contains
subroutine serialize
implicit none
type(cbslf_context) :: ctx
integer(4) :: errcode
ctx = cbslf_open(cbslf_store_mode, cname, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_open(store)'
stop 1
end if
call cbslf_set_compression_level(ctx, 10, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_set_compression_level'
stop 1
end if
call cbslf_write(ctx, source_stack_array, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_write(stack array)'
stop 1
end if
call cbslf_write(ctx, source_heap_array, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_write(heap array)'
stop 1
end if
call cbslf_write(ctx, source_heap_array, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_write(heap array2)'
stop 1
end if
call cbslf_write(ctx, source_string, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_write(string)'
stop 1
end if
call cbslf_close(ctx)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_close'
stop 1
end if
end subroutine
subroutine deserialize
implicit none
type(cbslf_context) :: ctx
integer(4) :: errcode
ctx = cbslf_open(cbslf_load_mode, cname, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_open(load)'
stop 1
end if
call cbslf_read(ctx, dest_stack_array, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_read(stack array)'
stop 1
end if
call cbslf_read(ctx, dest_heap_array, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_read(heap array)'
stop 1
end if
call cbslf_record_heap(ctx, dest_heap_array2, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_record_heap(heap array2)'
stop 1
end if
call cbslf_read(ctx, dest_string, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_read(string)'
stop 1
end if
call cbslf_close(ctx, errcode)
if (errcode /= cbslf_success) then
print *, 'fail: cbslf_close'
stop 1
end if
end subroutine
function compare_real_array(n, a, b) result(ret)
implicit none
integer, intent(in) :: n
real, intent(in) :: a(n), b(n)
logical :: ret
integer :: i
do i=1,n
ret = (abs(a(i) - b(i)) <= epsilon(a(i)))
end do
end function
end program

View File

@ -0,0 +1,150 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <cbsl.h>
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
typedef unsigned char byte_t;
extern void compress(uint64_t num_vars, uint64_t data_size, byte_t** a);
extern void raw_write(uint64_t num_vars, uint64_t data_size, byte_t** a);
extern void decompress(uint64_t num_vars, uint64_t data_size, byte_t** a);
extern void raw_read(uint64_t num_vars, uint64_t data_size, byte_t** a);
extern void rand_byte_t(uint64_t data_size, byte_t* a);
char cname[128], sname[128];
int main(int argc, char** argv)
{
if (argc <= 2)
exit(1);
uint64_t data_size, num_vars;
sscanf(argv[1], "%lu", &num_vars);
sscanf(argv[2], "%lu", &data_size);
data_size = data_size / num_vars;
printf("variables = %lu\n", num_vars);
printf("data size = %lf [MiB] * %lu\n", (double)(data_size)/pow(2,20), num_vars);
sprintf(cname, "multiple_compressed_%lu.zst", data_size * num_vars);
sprintf(sname, "multiple_raw_%lu.dat", data_size);
srand((unsigned int)(time(NULL)));
byte_t** a = malloc(sizeof(byte_t*) * num_vars);
byte_t** b = malloc(sizeof(byte_t*) * num_vars);
byte_t** c = malloc(sizeof(byte_t*) * num_vars);
for(uint64_t i = 0; i < num_vars; ++i)
{
a[i] = (byte_t*)(malloc(data_size));
rand_byte_t(data_size / sizeof(byte_t), a[i]);
b[i] = (byte_t*)(malloc(data_size));
c[i] = (byte_t*)(malloc(data_size));
}
compress(num_vars, data_size, a);
raw_write(num_vars, data_size, a);
decompress(num_vars, data_size, b);
raw_read(num_vars, data_size, c);
for(uint64_t i = 0; i < num_vars; ++i)
{
for(uint64_t j = 0; j < data_size; ++j)
{
if (b[i][j] != c[i][j])
{
fprintf(stderr, "mismatch!\n");
exit(1);
}
}
}
for(uint64_t i = 0; i < num_vars; ++i)
{
free(a[i]);
free(b[i]);
free(c[i]);
}
free(a);
free(b);
free(c);
return 0;
}
void compress(uint64_t num_vars, uint64_t data_size, byte_t** a)
{
cbsl_ctx* ctx = cbsl_open(cbsl_store_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open\n");
exit(1);
}
for(uint64_t i = 0; i < num_vars; ++i)
CBSL_ERROR_CHECK(cbsl_write(ctx, a[i], data_size));
CBSL_ERROR_CHECK(cbsl_close(ctx));
}
void raw_write(uint64_t num_vars, uint64_t data_size, byte_t** a)
{
FILE* fp = fopen(sname, "wb");
if (fp == NULL)
{
fprintf(stderr, "error: fopen\n");
exit(1);
}
for(int i = 0; i < num_vars; ++i)
fwrite(a[i], 1, data_size, fp);
fclose(fp);
}
void decompress(uint64_t num_vars, uint64_t data_size, byte_t** a)
{
cbsl_ctx* ctx = cbsl_open(cbsl_load_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open\n");
exit(1);
}
for(uint64_t i = 0; i < num_vars; ++i)
CBSL_ERROR_CHECK(cbsl_read(ctx, a[i], data_size));
CBSL_ERROR_CHECK(cbsl_close(ctx));
}
void raw_read(uint64_t num_vars, uint64_t data_size, byte_t** a)
{
FILE* fp = fopen(sname, "rb");
if (fp == NULL)
{
fprintf(stderr, "error: fopen\n");
exit(1);
}
for(uint64_t i = 0; i < num_vars; ++i)
fread(a[i], 1, data_size, fp);
fclose(fp);
}
void rand_byte_t(uint64_t data_size, byte_t* a)
{
for(uint64_t i = 0; i < data_size; ++i)
a[i] = rand() % 255;
}

View File

@ -0,0 +1,111 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cbsl.h>
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
#define CHECK_BINARY(X,Y) {if (memcmp(&(X),&(Y),sizeof((X))) != 0) { fprintf(stderr, "error: binary check %s == %s\n", (#X), (#Y)); exit(1); }}
static const int data0 = 43;
static const double data1 = 3.14159265;
static const int data2 = 14142;
extern void compress();
extern void decompress();
char cname[128], sname[128];
int main(int argc, char** argv)
{
if (argc < 2)
return 1;
sprintf(cname, "scalar_compressed.zst");
sprintf(sname, "scalar_raw.dat");
if (strcmp(argv[1],"-c") == 0)
compress();
else if (strcmp(argv[1],"-d") == 0)
decompress();
else
return 1;
return 0;
}
void compress()
{
const int c0 = data0;
const double c1 = data1;
const int c2 = data2;
cbsl_ctx* ctx = cbsl_open(cbsl_store_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_write(ctx, &c0, sizeof(int)));
CBSL_ERROR_CHECK(cbsl_write(ctx, &c1, sizeof(double)));
CBSL_ERROR_CHECK(cbsl_write(ctx, &c2, sizeof(int)));
CBSL_ERROR_CHECK(cbsl_close(ctx));
FILE* fp = fopen(sname, "wb");
if (fp == NULL)
{
fprintf(stderr, "error: fopen\n");
exit(1);
}
fwrite(&c0, sizeof(int), 1, fp);
fwrite(&c1, sizeof(double), 1, fp);
fwrite(&c2, sizeof(int), 1, fp);
fclose(fp);
}
void decompress()
{
int d0, r0;
double d1, r1;
int d2, r2;
cbsl_ctx* ctx = cbsl_open(cbsl_load_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_read(ctx, &d0, sizeof(int)));
CBSL_ERROR_CHECK(cbsl_read(ctx, &d1, sizeof(double)));
CBSL_ERROR_CHECK(cbsl_read(ctx, &d2, sizeof(int)));
CBSL_ERROR_CHECK(cbsl_close(ctx));
FILE* fp = fopen(sname, "rb");
if (fp == NULL)
{
fprintf(stderr, "error: fopen\n");
exit(1);
}
fread(&r0, sizeof(int), 1, fp);
fread(&r1, sizeof(double), 1, fp);
fread(&r2, sizeof(int), 1, fp);
fclose(fp);
CHECK_BINARY(d0, r0); CHECK_BINARY(d0, data0);
CHECK_BINARY(d1, r1); CHECK_BINARY(d1, data1);
CHECK_BINARY(d2, r2); CHECK_BINARY(d2, data2);
}

View File

@ -0,0 +1,129 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <cbsl.h>
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
typedef unsigned char byte_t;
extern void compress(uint64_t data_size, const byte_t* a);
extern void raw_write(uint64_t data_size, const byte_t* a);
extern void decompress(uint64_t data_size, byte_t* a);
extern void raw_read(uint64_t data_size, byte_t* a);
extern void rand_byte_t(uint64_t data_size, byte_t* a);
char cname[128], sname[128];
int main(int argc, char** argv)
{
if (argc < 2)
exit(1);
int data_size;
sscanf(argv[1], "%d", &data_size);
printf("data size = %lf [MiB]\n", (double)(data_size)/pow(2,20));
sprintf(cname, "simple_compressed_%d.zst", data_size);
sprintf(sname, "simple_raw_%d.dat", data_size);
srand((unsigned int)(time(NULL)));
byte_t* a = (byte_t*)(malloc(data_size));
rand_byte_t(data_size / sizeof(byte_t), a);
compress(data_size, a);
raw_write(data_size, a);
byte_t* b = (byte_t*)(malloc(data_size));
byte_t* c = (byte_t*)(malloc(data_size));
decompress(data_size, b);
raw_read(data_size, c);
for (uint64_t i = 0; i < data_size; ++i)
{
if (a[i] != b[i])
{
fprintf(stderr, "mismatch!\n");
exit(1);
}
}
free(a);
free(b);
free(c);
return 0;
}
void compress(uint64_t data_size, const byte_t* a)
{
cbsl_ctx* ctx = cbsl_open(cbsl_store_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open(cbsl_store_mode)\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_write(ctx, a, data_size));
CBSL_ERROR_CHECK(cbsl_close(ctx));
}
void raw_write(uint64_t data_size, const byte_t* a)
{
FILE* fp = fopen(sname, "wb");
if (fp == NULL)
{
fprintf(stderr, "error: fopen(wb)\n");
exit(1);
}
fwrite(a, 1, data_size, fp);
fclose(fp);
}
void decompress(uint64_t data_size, byte_t* a)
{
cbsl_ctx* ctx = cbsl_open(cbsl_load_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open(cbsl_load_mode)\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_read(ctx, a, data_size));
CBSL_ERROR_CHECK(cbsl_close(ctx));
}
void raw_read(uint64_t data_size, byte_t* a)
{
FILE* fp = fopen(sname, "rb");
if (fp == NULL)
{
fprintf(stderr, "error: fopen(rb)\n");
exit(1);
}
fread(a, 1, data_size, fp);
fclose(fp);
}
void rand_byte_t(uint64_t data_size, byte_t* a)
{
for(uint64_t i = 0; i < data_size; ++i)
a[i] = rand() % 255;
}

View File

@ -0,0 +1,93 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cbsl.h>
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
#define CHECK_BINARY(X,Y) {if (memcmp(&(X),&(Y),sizeof((X))) != 0) { fprintf(stderr, "error: binary check %s == %s\n", (#X), (#Y)); exit(1); }}
typedef unsigned char byte_t;
extern void record(cbsl_mode, int, byte_t*, int);
extern void rand_byte_t(int n, byte_t* a);
char cname[128];
int main(int argc, char** argv)
{
if (argc < 3)
return 1;
sprintf(cname, "split.zst");
int block_size, total_size;
sscanf(argv[1], "%d\n", &block_size);
sscanf(argv[2], "%d\n", &total_size);
if (total_size < block_size)
return 2;
byte_t* a = malloc(sizeof(byte_t) * total_size);
byte_t* b = malloc(sizeof(byte_t) * total_size);
rand_byte_t(total_size, a);
record(cbsl_store_mode, total_size, a, block_size);
record(cbsl_load_mode, total_size, b, block_size);
for (int i = 0; i < total_size; ++i)
{
if (a[i] != b[i])
{
fprintf(stderr, "mismatch!\n");
exit(1);
}
}
return 0;
}
void record(cbsl_mode mode, int total_size, byte_t* a, int block_size)
{
cbsl_ctx* ctx = cbsl_open(mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open\n");
exit(1);
}
int total = 0;
do
{
int size = (total_size - total < block_size) ? total_size - total : block_size;
CBSL_ERROR_CHECK(cbsl_record(ctx, a + total, size));
total += size;
}
while(total < total_size);
if (total != total_size)
{
fprintf(stderr, "total != total_size\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_close(ctx));
}
void rand_byte_t(int n, byte_t* a) {
for(int i = 0; i < n; ++i)
a[i] = rand() % 255;
}

View File

@ -0,0 +1,43 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <cbsl.h>
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
#define CHECK(X) {if (!(X)) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
char cname[128];
int main(int argc, char** argv)
{
sprintf(cname, "data.zst");
cbsl_ctx* ctx = cbsl_open(cbsl_store_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open\n");
exit(1);
}
CHECK(cbsl_get_mode(ctx) == cbsl_store_mode);
CBSL_ERROR_CHECK(cbsl_set_compression_level(ctx, 20));
CHECK(cbsl_get_compression_level(ctx) == 20);
CBSL_ERROR_CHECK(cbsl_close(ctx));
return 0;
}

View File

@ -0,0 +1,155 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <cbsl.h>
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
typedef unsigned char byte_t;
extern void compress(uint64_t data_size, const byte_t* a);
extern void raw_write(uint64_t data_size, const byte_t* a);
extern void decompress(uint64_t* data_size, byte_t** a);
extern void raw_read(uint64_t* data_size, byte_t** a);
extern uint64_t rand_size();
extern void rand_byte_t(uint64_t data_size, byte_t* a);
char cname[128], sname[128];
int main(int argc, char** argv)
{
sprintf(cname, "varsize_compressed.zst");
sprintf(sname, "varsize_raw.dat");
srand((unsigned int)(time(NULL)));
uint64_t data_size;
if (argc >= 2)
{
sscanf(argv[1], "%lu\n", &data_size);
printf("specified data size: %lu byte\n", data_size);
}
else
{
data_size = rand_size();
printf("random generate data size: %lu byte\n", data_size);
}
byte_t* a = (byte_t*)(malloc(data_size));
rand_byte_t(data_size / sizeof(byte_t), a);
compress(data_size, a);
raw_write(data_size, a);
uint64_t b_data_size; byte_t* b;
uint64_t c_data_size; byte_t* c;
decompress(&b_data_size, &b);
raw_read(&c_data_size, &c);
if (data_size != b_data_size || b_data_size != c_data_size)
{
fprintf(stderr, "data size is mismatch!\n");
exit(1);
}
for (uint64_t i = 0; i < data_size; ++i)
{
if (a[i] != b[i] || b[i] != c[i])
{
fprintf(stderr, "data value is mismatch!\n");
exit(1);
}
}
free(a);
free(b);
free(c);
return 0;
}
void compress(uint64_t data_size, const byte_t* a)
{
cbsl_ctx* ctx = cbsl_open(cbsl_store_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open(cbsl_store_mode)\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_write(ctx, &data_size, sizeof(data_size)));
CBSL_ERROR_CHECK(cbsl_write(ctx, a, data_size));
CBSL_ERROR_CHECK(cbsl_close(ctx));
}
void raw_write(uint64_t data_size, const byte_t* a)
{
FILE* fp = fopen(sname, "wb");
if (fp == NULL)
{
fprintf(stderr, "error: fopen(wb)\n");
exit(1);
}
fwrite(&data_size, 1, sizeof(data_size), fp);
fwrite(a, 1, data_size, fp);
fclose(fp);
}
void decompress(uint64_t* data_size, byte_t** a)
{
cbsl_ctx* ctx = cbsl_open(cbsl_load_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open(cbsl_load_mode)\n");
exit(1);
}
CBSL_ERROR_CHECK(cbsl_read(ctx, data_size, sizeof(data_size)));
*a = (byte_t*)(malloc(sizeof(byte_t) * *data_size));
CBSL_ERROR_CHECK(cbsl_read(ctx, *a, *data_size));
CBSL_ERROR_CHECK(cbsl_close(ctx));
}
void raw_read(uint64_t* data_size, byte_t** a)
{
FILE* fp = fopen(sname, "rb");
if (fp == NULL)
{
fprintf(stderr, "error: fopen(rb)\n");
exit(1);
}
fread(data_size, 1, sizeof(data_size), fp);
*a = (byte_t*)(malloc(sizeof(byte_t) * *data_size));
fread(*a, 1, *data_size, fp);
fclose(fp);
}
uint64_t rand_size()
{
const uint64_t min_data_size = 1024;
const uint64_t max_data_size = 1048576; /* 1 MiB */
uint64_t size = rand() % max_data_size;
return (size < min_data_size) ? min_data_size : size;
}
void rand_byte_t(uint64_t data_size, byte_t* a)
{
for(uint64_t i = 0; i < data_size; ++i)
a[i] = rand() % 255;
}

View File

@ -0,0 +1,189 @@
/*
* Copyright 2019 Yuta Hirokawa (University of Tsukuba, Japan)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <cbsl.h>
#define CBSL_ERROR_CHECK(X) {if ((X) == cbsl_error) { fprintf(stderr, "error: %s\n", (#X)); exit(1); }}
typedef unsigned char byte_t;
extern void compress(uint64_t n, uint64_t* sizes, byte_t** a);
extern void raw_write(uint64_t n, uint64_t* sizes, byte_t** a);
extern void decompress(uint64_t n, uint64_t* sizes, byte_t** a);
extern void raw_read(uint64_t n, uint64_t* sizes, byte_t** a);
extern uint64_t rand_size();
extern void rand_byte_t(uint64_t data_size, byte_t* a);
char cname[128], sname[128];
int main(int argc, char** argv)
{
sprintf(cname, "various_compressed.zst");
sprintf(sname, "various_raw.dat");
srand((unsigned int)(time(NULL)));
uint64_t num_vars;
if (argc < 2)
{
num_vars = rand() % 100 + 2;
}
else
{
sscanf(argv[1], "%lu\n", &num_vars);
}
printf("number of array (data sets): %lu\n", num_vars);
byte_t** a = (byte_t**)(malloc(sizeof(byte_t**) * num_vars));
byte_t** b = (byte_t**)(malloc(sizeof(byte_t**) * num_vars));
byte_t** c = (byte_t**)(malloc(sizeof(byte_t**) * num_vars));
uint64_t* asizes = (uint64_t*)(malloc(sizeof(uint64_t*) * num_vars));
uint64_t* bsizes = (uint64_t*)(malloc(sizeof(uint64_t*) * num_vars));
uint64_t* csizes = (uint64_t*)(malloc(sizeof(uint64_t*) * num_vars));
for(uint64_t i = 0; i < num_vars; ++i)
{
asizes[i] = rand_size();
a[i] = (byte_t*)(malloc(sizeof(byte_t*) * asizes[i]));
rand_byte_t(asizes[i] / sizeof(byte_t), a[i]);
printf("array[%lu] = %lu bytes\n", i, asizes[i]);
b[i] = (byte_t*)(malloc(sizeof(byte_t*) * asizes[i]));
c[i] = (byte_t*)(malloc(sizeof(byte_t*) * asizes[i]));
}
compress(num_vars, asizes, a);
raw_write(num_vars, asizes, a);
decompress(num_vars, bsizes, b);
raw_read(num_vars, csizes, c);
for (uint64_t i = 0; i < num_vars; ++i)
{
if (asizes[i] != bsizes[i] || bsizes[i] != csizes[i])
{
fprintf(stderr, "array size is mismatch!\n");
exit(1);
}
}
for (uint64_t i = 0; i < num_vars; ++i)
for (uint64_t j = 0; j < asizes[i]; ++j)
{
if (a[i][j] != b[i][j] || b[i][j] != c[i][j])
{
fprintf(stderr, "data value is mismatch! (a,b,c)[%lu][%lu]\n", i, j);
exit(1);
}
}
for (uint64_t i = 0; i < num_vars; ++i)
{
free(a[i]);
free(b[i]);
free(c[i]);
}
free(a);
free(b);
free(c);
free(asizes);
free(bsizes);
free(csizes);
return 0;
}
void compress(uint64_t n, uint64_t* sizes, byte_t** a)
{
cbsl_ctx* ctx = cbsl_open(cbsl_store_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open(cbsl_store_mode)\n");
exit(1);
}
for (uint64_t i = 0; i < n; ++i)
{
CBSL_ERROR_CHECK(cbsl_write(ctx, &sizes[i], sizeof(sizes[i])));
CBSL_ERROR_CHECK(cbsl_write(ctx, a[i], sizes[i]));
}
CBSL_ERROR_CHECK(cbsl_close(ctx));
}
void raw_write(uint64_t n, uint64_t* sizes, byte_t** a)
{
FILE* fp = fopen(sname, "wb");
if (fp == NULL)
{
fprintf(stderr, "error: fopen(wb)\n");
exit(1);
}
for (uint64_t i = 0; i < n; ++i)
{
fwrite(&sizes[i], 1, sizeof(sizes[i]), fp);
fwrite(a[i], 1, sizes[i], fp);
}
fclose(fp);
}
void decompress(uint64_t n, uint64_t* sizes, byte_t** a)
{
cbsl_ctx* ctx = cbsl_open(cbsl_load_mode, cname);
if (ctx == NULL)
{
fprintf(stderr, "error: cbsl_open(cbsl_load_mode)\n");
exit(1);
}
for (uint64_t i = 0; i < n; ++i)
{
CBSL_ERROR_CHECK(cbsl_read(ctx, &sizes[i], sizeof(sizes[i])));
CBSL_ERROR_CHECK(cbsl_read(ctx, a[i], sizes[i]));
}
CBSL_ERROR_CHECK(cbsl_close(ctx));
}
void raw_read(uint64_t n, uint64_t* sizes, byte_t** a)
{
FILE* fp = fopen(sname, "rb");
if (fp == NULL)
{
fprintf(stderr, "error: fopen(rb)\n");
exit(1);
}
for (uint64_t i = 0; i < n; ++i)
{
fread(&sizes[i], 1, sizeof(sizes[i]), fp);
fread(a[i], 1, sizes[i], fp);
}
fclose(fp);
}
uint64_t rand_size()
{
const uint64_t min_data_size = 1024;
const uint64_t max_data_size = 1048576; /* 1 MiB */
uint64_t size = rand() % max_data_size;
return (size < min_data_size) ? min_data_size : size;
}
void rand_byte_t(uint64_t data_size, byte_t* a)
{
for(uint64_t i = 0; i < data_size; ++i)
a[i] = rand() % 255;
}

22
cachelab/common.h Normal file
View File

@ -0,0 +1,22 @@
///////////////////////////////////////////////////////////////////////
//// Copyright 2022 by mars. //
///////////////////////////////////////////////////////////////////////
#ifndef __TRACE_COMMON_H__
#define __TRACE_COMMON_H__
typedef unsigned char UINT8;
typedef unsigned short UINT16;
typedef unsigned int UINT32;
typedef int INT32;
typedef unsigned long long UINT64;
void InitDataCache(void);
UINT8 AccessDataCache(UINT64 Address, UINT8 Operation, UINT8 DataSize, UINT64 StoreValue, UINT64* LoadResult);
void InitInstCache(void);
UINT8 AccessInstCache(UINT64 Address, UINT8 Operation, UINT8 InstSize, UINT64* InstResult);
UINT64 ReadMemory(UINT64 Address);
void WriteMemory(UINT64 Address, UINT64 WriteData);
#endif

250
cachelab/getopt.c Normal file
View File

@ -0,0 +1,250 @@
/*****************************************************************************
* getopt.c - competent and free getopt library.
* $Header: /cvsroot/freegetopt/freegetopt/getopt.c,v 1.2 2003/10/26 03:10:20 vindaci Exp $
*
* Copyright (c)2002-2003 Mark K. Kim
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* * Neither the original author of this software nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "getopt.h"
// static const char* ID = "$Id: getopt.c,v 1.2 2003/10/26 03:10:20 vindaci Exp $";
char* optarg = NULL;
int optind = 0;
int opterr = 1;
int optopt = '?';
static char** prev_argv = NULL; /* Keep a copy of argv and argc to */
static int prev_argc = 0; /* tell if getopt params change */
static int argv_index = 0; /* Option we're checking */
static int argv_index2 = 0; /* Option argument we're checking */
static int opt_offset = 0; /* Index into compounded "-option" */
static int dashdash = 0; /* True if "--" option reached */
static int nonopt = 0; /* How many nonopts we've found */
static void increment_index()
{
/* Move onto the next option */
if (argv_index < argv_index2)
{
while (prev_argv[++argv_index] && prev_argv[argv_index][0] != '-'
&& argv_index < argv_index2 + 1);
}
else argv_index++;
opt_offset = 1;
}
/*
* Permutes argv[] so that the argument currently being processed is moved
* to the end.
*/
static int permute_argv_once()
{
/* Movability check */
if (argv_index + nonopt >= prev_argc) return 1;
/* Move the current option to the end, bring the others to front */
else
{
char* tmp = prev_argv[argv_index];
/* Move the data */
memmove(&prev_argv[argv_index], &prev_argv[argv_index + 1],
sizeof(char**) * (prev_argc - argv_index - 1));
prev_argv[prev_argc - 1] = tmp;
nonopt++;
return 0;
}
}
int getopt(int argc, char** argv, char* optstr)
{
int c = 0;
/* If we have new argv, reinitialize */
if (prev_argv != argv || prev_argc != argc)
{
/* Initialize variables */
prev_argv = argv;
prev_argc = argc;
argv_index = 1;
argv_index2 = 1;
opt_offset = 1;
dashdash = 0;
nonopt = 0;
}
/* Jump point in case we want to ignore the current argv_index */
getopt_top:
/* Misc. initializations */
optarg = NULL;
/* Dash-dash check */
if (argv[argv_index] && !strcmp(argv[argv_index], "--"))
{
dashdash = 1;
increment_index();
}
/* If we're at the end of argv, that's it. */
if (argv[argv_index] == NULL)
{
c = -1;
}
/* Are we looking at a string? Single dash is also a string */
else if (dashdash || argv[argv_index][0] != '-' || !strcmp(argv[argv_index], "-"))
{
/* If we want a string... */
if (optstr[0] == '-')
{
c = 1;
optarg = argv[argv_index];
increment_index();
}
/* If we really don't want it (we're in POSIX mode), we're done */
else if (optstr[0] == '+')// || getenv("POSIXLY_CORRECT"))
{
c = -1;
/* Everything else is a non-opt argument */
nonopt = argc - argv_index;
}
/* If we mildly don't want it, then move it back */
else
{
if (!permute_argv_once()) goto getopt_top;
else c = -1;
}
}
/* Otherwise we're looking at an option */
else
{
char* opt_ptr = NULL;
/* Grab the option */
c = argv[argv_index][opt_offset++];
/* Is the option in the optstr? */
if (optstr[0] == '-') opt_ptr = strchr(optstr + 1, c);
else opt_ptr = strchr(optstr, c);
/* Invalid argument */
if (!opt_ptr)
{
if (opterr)
{
fprintf(stderr, "%s: invalid option -- %c\n", argv[0], c);
}
optopt = c;
c = '?';
/* Move onto the next option */
increment_index();
}
/* Option takes argument */
else if (opt_ptr[1] == ':')
{
/* ie, -oARGUMENT, -xxxoARGUMENT, etc. */
if (argv[argv_index][opt_offset] != '\0')
{
optarg = &argv[argv_index][opt_offset];
increment_index();
}
/* ie, -o ARGUMENT (only if it's a required argument) */
else if (opt_ptr[2] != ':')
{
/* One of those "you're not expected to understand this" moment */
if (argv_index2 < argv_index) argv_index2 = argv_index;
while (argv[++argv_index2] && argv[argv_index2][0] == '-');
optarg = argv[argv_index2];
/* Don't cross into the non-option argument list */
if (argv_index2 + nonopt >= prev_argc) optarg = NULL;
/* Move onto the next option */
increment_index();
}
else
{
/* Move onto the next option */
increment_index();
}
/* In case we got no argument for an option with required argument */
if (optarg == NULL && opt_ptr[2] != ':')
{
optopt = c;
c = '?';
if (opterr)
{
fprintf(stderr, "%s: option requires an argument -- %c\n",
argv[0], optopt);
}
}
}
/* Option does not take argument */
else
{
/* Next argv_index */
if (argv[argv_index][opt_offset] == '\0')
{
increment_index();
}
}
}
/* Calculate optind */
if (c == -1)
{
optind = argc - nonopt;
}
else
{
optind = argv_index;
}
return c;
}
/* vim:ts=3
*/

63
cachelab/getopt.h Normal file
View File

@ -0,0 +1,63 @@
/*****************************************************************************
* getopt.h - competent and free getopt library.
* $Header: /cvsroot/freegetopt/freegetopt/getopt.h,v 1.2 2003/10/26 03:10:20 vindaci Exp $
*
* Copyright (c)2002-2003 Mark K. Kim
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* * Neither the original author of this software nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
#ifndef GETOPT_H_
#define GETOPT_H_
#ifdef __cplusplus
extern "C" {
#endif
extern char* optarg;
extern int optind;
extern int opterr;
extern int optopt;
int getopt(int argc, char** argv, char* optstr);
#ifdef __cplusplus
}
#endif
#endif /* GETOPT_H_ */
/* vim:ts=3
*/

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

1162
cachelab/uthash.h Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

452
cachelab/zstd/zdict.h Normal file
View File

@ -0,0 +1,452 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef DICTBUILDER_H_001
#define DICTBUILDER_H_001
#if defined (__cplusplus)
extern "C" {
#endif
/*====== Dependencies ======*/
#include <stddef.h> /* size_t */
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
#ifndef ZDICTLIB_VISIBILITY
# if defined(__GNUC__) && (__GNUC__ >= 4)
# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
# else
# define ZDICTLIB_VISIBILITY
# endif
#endif
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
#else
# define ZDICTLIB_API ZDICTLIB_VISIBILITY
#endif
/*******************************************************************************
* Zstd dictionary builder
*
* FAQ
* ===
* Why should I use a dictionary?
* ------------------------------
*
* Zstd can use dictionaries to improve compression ratio of small data.
* Traditionally small files don't compress well because there is very little
* repetition in a single sample, since it is small. But, if you are compressing
* many similar files, like a bunch of JSON records that share the same
* structure, you can train a dictionary on ahead of time on some samples of
* these files. Then, zstd can use the dictionary to find repetitions that are
* present across samples. This can vastly improve compression ratio.
*
* When is a dictionary useful?
* ----------------------------
*
* Dictionaries are useful when compressing many small files that are similar.
* The larger a file is, the less benefit a dictionary will have. Generally,
* we don't expect dictionary compression to be effective past 100KB. And the
* smaller a file is, the more we would expect the dictionary to help.
*
* How do I use a dictionary?
* --------------------------
*
* Simply pass the dictionary to the zstd compressor with
* `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to
* the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other
* more advanced functions that allow selecting some options, see zstd.h for
* complete documentation.
*
* What is a zstd dictionary?
* --------------------------
*
* A zstd dictionary has two pieces: Its header, and its content. The header
* contains a magic number, the dictionary ID, and entropy tables. These
* entropy tables allow zstd to save on header costs in the compressed file,
* which really matters for small data. The content is just bytes, which are
* repeated content that is common across many samples.
*
* What is a raw content dictionary?
* ---------------------------------
*
* A raw content dictionary is just bytes. It doesn't have a zstd dictionary
* header, a dictionary ID, or entropy tables. Any buffer is a valid raw
* content dictionary.
*
* How do I train a dictionary?
* ----------------------------
*
* Gather samples from your use case. These samples should be similar to each
* other. If you have several use cases, you could try to train one dictionary
* per use case.
*
* Pass those samples to `ZDICT_trainFromBuffer()` and that will train your
* dictionary. There are a few advanced versions of this function, but this
* is a great starting point. If you want to further tune your dictionary
* you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow
* you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`.
*
* If the dictionary training function fails, that is likely because you
* either passed too few samples, or a dictionary would not be effective
* for your data. Look at the messages that the dictionary trainer printed,
* if it doesn't say too few samples, then a dictionary would not be effective.
*
* How large should my dictionary be?
* ----------------------------------
*
* A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB.
* The zstd CLI defaults to a 110KB dictionary. You likely don't need a
* dictionary larger than that. But, most use cases can get away with a
* smaller dictionary. The advanced dictionary builders can automatically
* shrink the dictionary for you, and select the smallest size that doesn't
* hurt compression ratio too much. See the `shrinkDict` parameter.
* A smaller dictionary can save memory, and potentially speed up
* compression.
*
* How many samples should I provide to the dictionary builder?
* ------------------------------------------------------------
*
* We generally recommend passing ~100x the size of the dictionary
* in samples. A few thousand should suffice. Having too few samples
* can hurt the dictionaries effectiveness. Having more samples will
* only improve the dictionaries effectiveness. But having too many
* samples can slow down the dictionary builder.
*
* How do I determine if a dictionary will be effective?
* -----------------------------------------------------
*
* Simply train a dictionary and try it out. You can use zstd's built in
* benchmarking tool to test the dictionary effectiveness.
*
* # Benchmark levels 1-3 without a dictionary
* zstd -b1e3 -r /path/to/my/files
* # Benchmark levels 1-3 with a dictionary
* zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary
*
* When should I retrain a dictionary?
* -----------------------------------
*
* You should retrain a dictionary when its effectiveness drops. Dictionary
* effectiveness drops as the data you are compressing changes. Generally, we do
* expect dictionaries to "decay" over time, as your data changes, but the rate
* at which they decay depends on your use case. Internally, we regularly
* retrain dictionaries, and if the new dictionary performs significantly
* better than the old dictionary, we will ship the new dictionary.
*
* I have a raw content dictionary, how do I turn it into a zstd dictionary?
* -------------------------------------------------------------------------
*
* If you have a raw content dictionary, e.g. by manually constructing it, or
* using a third-party dictionary builder, you can turn it into a zstd
* dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to
* provide some samples of the data. It will add the zstd header to the
* raw content, which contains a dictionary ID and entropy tables, which
* will improve compression ratio, and allow zstd to write the dictionary ID
* into the frame, if you so choose.
*
* Do I have to use zstd's dictionary builder?
* -------------------------------------------
*
* No! You can construct dictionary content however you please, it is just
* bytes. It will always be valid as a raw content dictionary. If you want
* a zstd dictionary, which can improve compression ratio, use
* `ZDICT_finalizeDictionary()`.
*
* What is the attack surface of a zstd dictionary?
* ------------------------------------------------
*
* Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so
* zstd should never crash, or access out-of-bounds memory no matter what
* the dictionary is. However, if an attacker can control the dictionary
* during decompression, they can cause zstd to generate arbitrary bytes,
* just like if they controlled the compressed data.
*
******************************************************************************/
/*! ZDICT_trainFromBuffer():
* Train a dictionary from an array of samples.
* Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
* f=20, and accel=1.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* Note: Dictionary training will fail if there are not enough samples to construct a
* dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
* If dictionary training fails, you should use zstd without a dictionary, as the dictionary
* would've been ineffective anyways. If you believe your samples would benefit from a dictionary
* please open an issue with details, and we can look into it.
* Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples);
typedef struct {
int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
* NOTE: The zstd format reserves some dictionary IDs for future use.
* You may use them in private settings, but be warned that they
* may be used by zstd in a public dictionary registry in the future.
* These dictionary IDs are:
* - low range : <= 32767
* - high range : >= (2^31)
*/
} ZDICT_params_t;
/*! ZDICT_finalizeDictionary():
* Given a custom content as a basis for dictionary, and a set of samples,
* finalize dictionary by adding headers and statistics according to the zstd
* dictionary format.
*
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each
* sample in order. The samples are used to construct the statistics, so they
* should be representative of what you will compress with this dictionary.
*
* The compression level can be set in `parameters`. You should pass the
* compression level you expect to use in production. The statistics for each
* compression level differ, so tuning the dictionary for the compression level
* can help quite a bit.
*
* You can set an explicit dictionary ID in `parameters`, or allow us to pick
* a random dictionary ID for you, but we can't guarantee no collisions.
*
* The dstDictBuffer and the dictContent may overlap, and the content will be
* appended to the end of the header. If the header + the content doesn't fit in
* maxDictSize the beginning of the content is truncated to make room, since it
* is presumed that the most profitable content is at the end of the dictionary,
* since that is the cheapest to reference.
*
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
*
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
* or an error code, which can be tested by ZDICT_isError().
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if
* instructed to, using notificationLevel>0.
* NOTE: This function currently may fail in several edge cases including:
* * Not enough samples
* * Samples are uncompressible
* * Samples are all exactly the same
*/
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
const void* dictContent, size_t dictContentSize,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t parameters);
/*====== Helper functions ======*/
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
#ifdef ZDICT_STATIC_LINKING_ONLY
/* ====================================================================================
* The definitions in this section are considered experimental.
* They should never be used with a dynamic library, as they may change in the future.
* They are provided for advanced usages.
* Use them only in association with static linking.
* ==================================================================================== */
#define ZDICT_DICTSIZE_MIN 256
/* Deprecated: Remove in v1.6.0 */
#define ZDICT_CONTENTSIZE_MIN 128
/*! ZDICT_cover_params_t:
* k and d are the only required parameters.
* For others, value 0 means default.
*/
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
ZDICT_params_t zParams;
} ZDICT_cover_params_t;
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */
unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
ZDICT_params_t zParams;
} ZDICT_fastCover_params_t;
/*! ZDICT_trainFromBuffer_cover():
* Train a dictionary from an array of samples using the COVER algorithm.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
void *dictBuffer, size_t dictBufferCapacity,
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
ZDICT_cover_params_t parameters);
/*! ZDICT_optimizeTrainFromBuffer_cover():
* The same requirements as above hold for all the parameters except `parameters`.
* This function tries many parameter combinations and picks the best parameters.
* `*parameters` is filled with the best parameters found,
* dictionary constructed with those parameters is stored in `dictBuffer`.
*
* All of the parameters d, k, steps are optional.
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
* if steps is zero it defaults to its default value.
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
*
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* On success `*parameters` contains the parameters selected.
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
*/
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_cover_params_t* parameters);
/*! ZDICT_trainFromBuffer_fastCover():
* Train a dictionary from an array of samples using a modified version of COVER algorithm.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* d and k are required.
* All other parameters are optional, will use default values if not provided
* The resulting dictionary will be saved into `dictBuffer`.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t parameters);
/*! ZDICT_optimizeTrainFromBuffer_fastCover():
* The same requirements as above hold for all the parameters except `parameters`.
* This function tries many parameter combinations (specifically, k and d combinations)
* and picks the best parameters. `*parameters` is filled with the best parameters found,
* dictionary constructed with those parameters is stored in `dictBuffer`.
* All of the parameters d, k, steps, f, and accel are optional.
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
* if steps is zero it defaults to its default value.
* If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
* If f is zero, default value of 20 is used.
* If accel is zero, default value of 1 is used.
*
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* On success `*parameters` contains the parameters selected.
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
*/
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
size_t dictBufferCapacity, const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t* parameters);
typedef struct {
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
ZDICT_params_t zParams;
} ZDICT_legacy_params_t;
/*! ZDICT_trainFromBuffer_legacy():
* Train a dictionary from an array of samples.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`.
* `parameters` is optional and can be provided with values set to 0 to mean "default".
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
* See ZDICT_trainFromBuffer() for details on failure modes.
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
*/
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_legacy_params_t parameters);
/* Deprecation warnings */
/* It is generally possible to disable deprecation warnings from compiler,
for example with -Wno-deprecated-declarations for gcc
or _CRT_SECURE_NO_WARNINGS in Visual.
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
#else
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
# elif (ZDICT_GCC_VERSION >= 301)
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
# elif defined(_MSC_VER)
# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
# else
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
# define ZDICT_DEPRECATED(message) ZDICTLIB_API
# endif
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
#endif /* ZDICT_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
}
#endif
#endif /* DICTBUILDER_H_001 */

2580
cachelab/zstd/zstd.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,96 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_ERRORS_H_398273423
#define ZSTD_ERRORS_H_398273423
#if defined (__cplusplus)
extern "C" {
#endif
/*===== dependency =====*/
#include <stddef.h> /* size_t */
/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
#ifndef ZSTDERRORLIB_VISIBILITY
# if defined(__GNUC__) && (__GNUC__ >= 4)
# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
# else
# define ZSTDERRORLIB_VISIBILITY
# endif
#endif
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
#else
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
#endif
/*-*********************************************
* Error codes list
*-*********************************************
* Error codes _values_ are pinned down since v1.3.1 only.
* Therefore, don't rely on values if you may link to any version < v1.3.1.
*
* Only values < 100 are considered stable.
*
* note 1 : this API shall be used with static linking only.
* dynamic linking is not yet officially supported.
* note 2 : Prefer relying on the enum than on its value whenever possible
* This is the only supported way to use the error list < v1.3.1
* note 3 : ZSTD_isError() is always correct, whatever the library version.
**********************************************/
typedef enum {
ZSTD_error_no_error = 0,
ZSTD_error_GENERIC = 1,
ZSTD_error_prefix_unknown = 10,
ZSTD_error_version_unsupported = 12,
ZSTD_error_frameParameter_unsupported = 14,
ZSTD_error_frameParameter_windowTooLarge = 16,
ZSTD_error_corruption_detected = 20,
ZSTD_error_checksum_wrong = 22,
ZSTD_error_dictionary_corrupted = 30,
ZSTD_error_dictionary_wrong = 32,
ZSTD_error_dictionaryCreation_failed = 34,
ZSTD_error_parameter_unsupported = 40,
ZSTD_error_parameter_outOfBound = 42,
ZSTD_error_tableLog_tooLarge = 44,
ZSTD_error_maxSymbolValue_tooLarge = 46,
ZSTD_error_maxSymbolValue_tooSmall = 48,
ZSTD_error_stabilityCondition_notRespected = 50,
ZSTD_error_stage_wrong = 60,
ZSTD_error_init_missing = 62,
ZSTD_error_memory_allocation = 64,
ZSTD_error_workSpace_tooSmall= 66,
ZSTD_error_dstSize_tooSmall = 70,
ZSTD_error_srcSize_wrong = 72,
ZSTD_error_dstBuffer_null = 74,
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102,
ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_srcBuffer_wrong = 105,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode;
/*! ZSTD_getErrorCode() :
convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
which can be used to compare with enum list published above */
ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
#if defined (__cplusplus)
}
#endif
#endif /* ZSTD_ERRORS_H_398273423 */