diff --git a/benchmarks/opencl/lib/libOpenCL.so b/benchmarks/opencl/lib/libOpenCL.so deleted file mode 100644 index 522ea2bc..00000000 Binary files a/benchmarks/opencl/lib/libOpenCL.so and /dev/null differ diff --git a/benchmarks/opencl/lib/libOpenCL.so.2 b/benchmarks/opencl/lib/libOpenCL.so.2 deleted file mode 100644 index 522ea2bc..00000000 Binary files a/benchmarks/opencl/lib/libOpenCL.so.2 and /dev/null differ diff --git a/benchmarks/opencl/lib/libOpenCL.so.2.5.0 b/benchmarks/opencl/lib/libOpenCL.so.2.5.0 deleted file mode 100644 index 522ea2bc..00000000 Binary files a/benchmarks/opencl/lib/libOpenCL.so.2.5.0 and /dev/null differ diff --git a/benchmarks/opencl/runtime/lib/libOpenCL.so.2.5.0 b/benchmarks/opencl/runtime/lib/libOpenCL.so.2.5.0 index e5f1a692..e693c28f 100644 Binary files a/benchmarks/opencl/runtime/lib/libOpenCL.so.2.5.0 and b/benchmarks/opencl/runtime/lib/libOpenCL.so.2.5.0 differ diff --git a/benchmarks/opencl/sgemm/main.cc b/benchmarks/opencl/sgemm/main.cc index 2b72d1e5..bb37767a 100644 --- a/benchmarks/opencl/sgemm/main.cc +++ b/benchmarks/opencl/sgemm/main.cc @@ -34,25 +34,27 @@ #define NUM_DATA 64 -#define CL_CHECK(_expr) \ - do { \ - cl_int _err = _expr; \ - if (_err == CL_SUCCESS) \ - break; \ - fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ - abort(); \ - } while (0) +#define CL_CHECK(_expr) \ + do { \ + cl_int _err = _expr; \ + if (_err == CL_SUCCESS) \ + break; \ + printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ + cleanup(); \ + exit(-1); \ + } while (0) -#define CL_CHECK_ERR(_expr) \ - ({ \ - cl_int _err = CL_INVALID_VALUE; \ - decltype(_expr) _ret = _expr; \ - if (_err != CL_SUCCESS) { \ - fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ - abort(); \ - } \ - _ret; \ - }) +#define CL_CHECK2(_expr) \ + ({ \ + cl_int _err = CL_INVALID_VALUE; \ + decltype(_expr) _ret = _expr; \ + if (_err != CL_SUCCESS) { \ + printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \ + cleanup(); \ + exit(-1); \ + } \ + _ret; \ + }) void pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data) { @@ -80,37 +82,34 @@ static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) return 0; } +cl_device_id device_id = NULL; uint8_t *kernel_bin = NULL; +cl_context context = 0; +cl_kernel kernel = 0; +cl_command_queue queue = 0; +cl_program program = 0; +cl_mem memObjects[3] = {0, 0, 0}; /// // Cleanup any created OpenCL resources // -void Cleanup(cl_context context, cl_command_queue commandQueue, - cl_program program, cl_kernel kernel, cl_mem memObjects[3]) { +void cleanup() { for (int i = 0; i < 3; i++) { - if (memObjects[i] != 0) - clReleaseMemObject(memObjects[i]); + if (memObjects[i]) clReleaseMemObject(memObjects[i]); } - if (commandQueue != 0) - clReleaseCommandQueue(commandQueue); - - if (kernel != 0) - clReleaseKernel(kernel); - - if (program != 0) - clReleaseProgram(program); - - if (context != 0) - clReleaseContext(context); - + if (queue) clReleaseCommandQueue(queue); + if (kernel) clReleaseKernel(kernel); + if (program) clReleaseProgram(program); + if (context) clReleaseContext(context); + if (device_id) clReleaseDevice(device_id); + if (kernel_bin) free(kernel_bin); } int main(int argc, char **argv) { printf("enter demo main\n"); - cl_platform_id platform_id; - cl_device_id device_id; + cl_platform_id platform_id; size_t kernel_size; cl_int binary_status = 0; int i; @@ -122,17 +121,11 @@ int main(int argc, char **argv) { // Getting platform and device information CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL)); CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL)); - - cl_context context; - context = CL_CHECK_ERR( - clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err)); - - cl_command_queue queue; - queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, - CL_QUEUE_PROFILING_ENABLE, &_err)); - - cl_kernel kernel = 0; - cl_mem memObjects[3] = {0, 0, 0}; + + context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err)); + + queue = CL_CHECK2(clCreateCommandQueue(context, device_id, + CL_QUEUE_PROFILING_ENABLE, &_err)); // Create OpenCL program - first attempt to load cached binary. // If that is not available, then create the program from source @@ -140,15 +133,13 @@ int main(int argc, char **argv) { std::cout << "Attempting to create program from binary..." << std::endl; // cl_program program = CreateProgramFromBinary(context, device_id, // "kernel.cl.bin"); - cl_program program = CL_CHECK_ERR(clCreateProgramWithBinary( + program = CL_CHECK2(clCreateProgramWithBinary( context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err)); if (program == NULL) { - std::cerr << "Failed to write program binary" << std::endl; - Cleanup(context, queue, program, kernel, memObjects); - return 1; - } else { - std::cout << "Read program from binary." << std::endl; - } + printf("clCreateProgramWithBinary() failed\n"); + cleanup(); + return -1; + } // Build program CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL)); @@ -156,19 +147,19 @@ int main(int argc, char **argv) { printf("attempting to create input buffer\n"); fflush(stdout); cl_mem input_bufferA; - input_bufferA = CL_CHECK_ERR( + input_bufferA = CL_CHECK2( clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err)); cl_mem input_bufferB; - input_bufferB = CL_CHECK_ERR( + input_bufferB = CL_CHECK2( clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err)); printf("attempting to create output buffer\n"); fflush(stdout); cl_mem output_buffer; - output_buffer = CL_CHECK_ERR( + output_buffer = CL_CHECK2( clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err)); @@ -180,7 +171,7 @@ int main(int argc, char **argv) { printf("attempting to create kernel\n"); fflush(stdout); - kernel = CL_CHECK_ERR(clCreateKernel(program, "sgemm", &_err)); + kernel = CL_CHECK2(clCreateKernel(program, "sgemm", &_err)); CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_bufferA), &input_bufferA)); CL_CHECK(clSetKernelArg(kernel, 1, sizeof(input_bufferB), &input_bufferB)); CL_CHECK(clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer)); @@ -209,7 +200,7 @@ int main(int argc, char **argv) { CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &kernel_completion)); - printf("Enqueue'd kerenel\n"); + printf("Enqueue'd kernel\n"); fflush(stdout); cl_ulong time_start, time_end; CL_CHECK(clWaitForEvents(1, &kernel_completion)); @@ -231,13 +222,8 @@ int main(int argc, char **argv) { } printf("\n"); - CL_CHECK(clReleaseMemObject(memObjects[0])); - CL_CHECK(clReleaseMemObject(memObjects[1])); - CL_CHECK(clReleaseMemObject(memObjects[2])); - - CL_CHECK(clReleaseKernel(kernel)); - CL_CHECK(clReleaseProgram(program)); - CL_CHECK(clReleaseContext(context)); + // Clean up + cleanup(); return 0; } diff --git a/benchmarks/opencl/sgemm/sgemm b/benchmarks/opencl/sgemm/sgemm index d75ee3bd..955b1c79 100755 Binary files a/benchmarks/opencl/sgemm/sgemm and b/benchmarks/opencl/sgemm/sgemm differ diff --git a/benchmarks/opencl/vecadd/main.cc b/benchmarks/opencl/vecadd/main.cc index 3b6f889b..96567db1 100644 --- a/benchmarks/opencl/vecadd/main.cc +++ b/benchmarks/opencl/vecadd/main.cc @@ -31,6 +31,7 @@ }) int exitcode = 0; +cl_device_id device_id = NULL; cl_context context = NULL; cl_command_queue commandQueue = NULL; cl_program program = NULL; @@ -72,6 +73,8 @@ static void cleanup() { if (b_memobj) clReleaseMemObject(b_memobj); if (c_memobj) clReleaseMemObject(c_memobj); if (context) clReleaseContext(context); + if (device_id) clReleaseDevice(device_id); + if (kernel_bin) free(kernel_bin); if (A) free(A); if (B) free(B); @@ -104,7 +107,6 @@ int main (int argc, char **argv) { printf("enter demo main\n"); cl_platform_id platform_id; - cl_device_id device_id; size_t kernel_size; cl_int binary_status = 0; int i; @@ -139,6 +141,11 @@ int main (int argc, char **argv) { // Create program from kernel source program = CL_CHECK2(clCreateProgramWithBinary( context, 1, &device_id, &kernel_size, &kernel_bin, &binary_status, &_err)); + if (program == NULL) { + printf("clCreateProgramWithBinary() failed\n"); + cleanup(); + return -1; + } // Build program CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL)); diff --git a/driver/common/vx_utils.cpp b/driver/common/vx_utils.cpp index 4a4eb62d..0e590415 100644 --- a/driver/common/vx_utils.cpp +++ b/driver/common/vx_utils.cpp @@ -91,22 +91,22 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) { return err; } -extern int vx_get_perf(vx_device_h device, size_t* cycles, size_t* instrs) { +extern int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs) { int ret = 0; unsigned value; if (cycles) { - ret |= vx_csr_get(device, 0, CSR_CYCLE_H, &value); + ret |= vx_csr_get(device, core_id, CSR_CYCLE_H, &value); *cycles = value; - ret |= vx_csr_get(device, 0, CSR_CYCLE, &value); + ret |= vx_csr_get(device, core_id, CSR_CYCLE, &value); *cycles = (*cycles << 32) | value; } if (instrs) { - ret |= vx_csr_get(device, 0, CSR_INSTRET_H, &value); + ret |= vx_csr_get(device, core_id, CSR_INSTRET_H, &value); *instrs = value; - ret |= vx_csr_get(device, 0, CSR_INSTRET, &value); + ret |= vx_csr_get(device, core_id, CSR_INSTRET, &value); *instrs = (*instrs << 32) | value; } diff --git a/driver/include/vortex.h b/driver/include/vortex.h index ecdd0542..e5aa9eb6 100644 --- a/driver/include/vortex.h +++ b/driver/include/vortex.h @@ -58,10 +58,10 @@ int vx_start(vx_device_h hdevice); int vx_ready_wait(vx_device_h hdevice, long long timeout); // set device constant registers -int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value); +int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value); // get device constant registers -int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* value); +int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value); ////////////////////////////// UTILITY FUNCIONS /////////////////////////////// @@ -72,7 +72,7 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size) int vx_upload_kernel_file(vx_device_h device, const char* filename); // get performance counters -int vx_get_perf(vx_device_h device, size_t* cycles, size_t* instrs); +int vx_get_perf(vx_device_h device, int core_id, size_t* cycles, size_t* instrs); #ifdef __cplusplus } diff --git a/driver/opae/Makefile b/driver/opae/Makefile index b13b897d..9946470f 100644 --- a/driver/opae/Makefile +++ b/driver/opae/Makefile @@ -17,6 +17,9 @@ CXXFLAGS +=-fstack-protector # Position independent code CXXFLAGS += -fPIC +# Dump perf stats +CXXFLAGS += -DDUMP_PERF_STATS + # Enable scope analyzer #CXXFLAGS += -DSCOPE diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index c8bf410b..e66b9afe 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -211,14 +211,29 @@ extern int vx_dev_close(vx_device_h hdevice) { vx_scope_stop(device->fpga, 0); #endif - { - // Dump perf stats +#ifdef DUMP_PERF_STATS + // Dump perf stats + if (device->num_cores > 1) { + uint64_t total_instrs = 0, total_cycles = 0; + for (unsigned core_id = 0; core_id < device->num_cores; ++core_id) { + uint64_t instrs, cycles; + int ret = vx_get_perf(hdevice, core_id, &instrs, &cycles); + assert(ret == 0); + float IPC = (float)(double(instrs) / double(cycles)); + fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC); + total_instrs += instrs; + total_cycles = std::max(total_cycles, cycles); + } + float IPC = (float)(double(total_instrs) / double(total_cycles)); + fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC); + } else { uint64_t instrs, cycles; - int ret = vx_get_perf(hdevice, &instrs, &cycles); + int ret = vx_get_perf(hdevice, 0, &instrs, &cycles); float IPC = (float)(double(instrs) / double(cycles)); - fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC); assert(ret == 0); + fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC); } +#endif fpgaClose(device->fpga); @@ -480,7 +495,7 @@ extern int vx_start(vx_device_h hdevice) { } // set device constant registers -extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value) { +extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) { if (nullptr == hdevice) return -1; @@ -491,8 +506,8 @@ extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value return -1; // write CSR value - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core)); - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE)); @@ -500,7 +515,7 @@ extern int vx_csr_set(vx_device_h hdevice, int core, int address, unsigned value } // get device constant registers -extern int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* value) { +extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) { if (nullptr == hdevice || nullptr == value) return -1; @@ -512,8 +527,8 @@ extern int vx_csr_get(vx_device_h hdevice, int core, int address, unsigned* valu // write CSR value - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core)); - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ)); // Ensure ready for new command diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index ea4f3c22..d3d0df94 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -28,6 +28,8 @@ CFLAGS += -fPIC CFLAGS += -DUSE_RTLSIM $(CONFIGS) +CFLAGS += -DDUMP_PERF_STATS + LDFLAGS += -shared -pthread # LDFLAGS += -dynamiclib -pthread diff --git a/driver/rtlsim/vortex.cpp b/driver/rtlsim/vortex.cpp index 80167966..788a5a9d 100644 --- a/driver/rtlsim/vortex.cpp +++ b/driver/rtlsim/vortex.cpp @@ -68,8 +68,7 @@ public: simulator_.attach_ram(&ram_); } - ~vx_device() { - simulator_.print_stats(std::cout); + ~vx_device() { if (future_.valid()) { future_.wait(); } @@ -152,6 +151,28 @@ public: return 0; } + int set_csr(int core_id, int addr, unsigned value) { + if (future_.valid()) { + future_.wait(); // ensure prior run completed + } + simulator_.set_csr(core_id, addr, value); + while (simulator_.is_busy()) { + simulator_.step(); + }; + return 0; + } + + int get_csr(int core_id, int addr, unsigned *value) { + if (future_.valid()) { + future_.wait(); // ensure prior run completed + } + simulator_.get_csr(core_id, addr, value); + while (simulator_.is_busy()) { + simulator_.step(); + }; + return 0; + } + private: size_t mem_allocation_; @@ -214,6 +235,29 @@ extern int vx_dev_close(vx_device_h hdevice) { return -1; vx_device *device = ((vx_device*)hdevice); + +#ifdef DUMP_PERF_STATS + unsigned num_cores; + vx_csr_get(hdevice, 0, CSR_NC, &num_cores); + if (num_cores > 1) { + uint64_t total_instrs = 0, total_cycles = 0; + for (unsigned core_id = 0; core_id < num_cores; ++core_id) { + uint64_t instrs, cycles; + vx_get_perf(hdevice, core_id, &instrs, &cycles); + float IPC = (float)(double(instrs) / double(cycles)); + fprintf(stdout, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs, cycles, IPC); + total_instrs += instrs; + total_cycles = std::max(total_cycles, cycles); + } + float IPC = (float)(double(total_instrs) / double(total_cycles)); + fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, total_cycles, IPC); + } else { + uint64_t instrs, cycles; + vx_get_perf(hdevice, 0, &instrs, &cycles); + float IPC = (float)(double(instrs) / double(cycles)); + fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC); + } +#endif delete device; @@ -324,10 +368,20 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { return device->wait(timeout); } -extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) { - return -1; +extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) { + if (nullptr == hdevice) + return -1; + + vx_device *device = ((vx_device*)hdevice); + + return device->set_csr(core_id, addr, value); } -extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) { - return -1; +extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) { + if (nullptr == hdevice) + return -1; + + vx_device *device = ((vx_device*)hdevice); + + return device->get_csr(core_id, addr, value); } \ No newline at end of file diff --git a/driver/simx/vortex.cpp b/driver/simx/vortex.cpp index 292c410f..b499803d 100644 --- a/driver/simx/vortex.cpp +++ b/driver/simx/vortex.cpp @@ -358,10 +358,10 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { return device->wait(timeout); } -extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) { +extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) { return -1; } -extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) { +extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) { return -1; } \ No newline at end of file diff --git a/driver/stub/vortex.cpp b/driver/stub/vortex.cpp index 007bce0e..f4a101f0 100644 --- a/driver/stub/vortex.cpp +++ b/driver/stub/vortex.cpp @@ -48,10 +48,10 @@ extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) { return -1; } -extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned /*value*/) { +extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) { return -1; } -extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core*/, int /*address*/, unsigned* /*value*/) { +extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) { return -1; } \ No newline at end of file diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index b4b1200e..99a5599f 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -22,6 +22,7 @@ Simulator::Simulator() { dram_rsp_active_ = false; snp_req_active_ = false; + csr_req_active_ = false; #ifdef VCD_OUTPUT Verilated::traceEverOn(true); @@ -163,15 +164,6 @@ void Simulator::eval_io_bus() { vortex_->io_rsp_valid = 0; } -void Simulator::eval_csr_bus() { - vortex_->csr_io_req_valid = 0; - vortex_->csr_io_req_coreid = 0; - vortex_->csr_io_req_addr = 0; - vortex_->csr_io_req_rw = 0; - vortex_->csr_io_req_data = 0; - vortex_->csr_io_rsp_ready = 1; -} - void Simulator::eval_snp_bus() { if (snp_req_active_) { if (vortex_->snp_rsp_valid) { @@ -204,6 +196,27 @@ void Simulator::eval_snp_bus() { } } +void Simulator::eval_csr_bus() { + if (csr_req_active_) { + if (vortex_->csr_io_req_rw) { + if (vortex_->csr_io_req_ready) { + vortex_->snp_req_valid = 0; + csr_req_active_ = false; + } + } else { + if (vortex_->csr_io_rsp_valid) { + *csr_rsp_value_ = vortex_->csr_io_rsp_data; + vortex_->snp_req_valid = 0; + vortex_->csr_io_rsp_ready = 0; + csr_req_active_ = false; + } + } + } else { + vortex_->csr_io_req_valid = 0; + vortex_->csr_io_rsp_ready = 0; + } +} + void Simulator::wait(uint32_t cycles) { for (int i = 0; i < cycles; ++i) { this->step(); @@ -211,7 +224,9 @@ void Simulator::wait(uint32_t cycles) { } bool Simulator::is_busy() const { - return vortex_->busy || snp_req_active_; + return vortex_->busy + || snp_req_active_ + || csr_req_active_; } void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { @@ -221,22 +236,52 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) { if (0 == size) return; - snp_req_active_ = true; - snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE; - vortex_->snp_req_addr = mem_addr / GLOBAL_BLOCK_SIZE; vortex_->snp_req_tag = 0; vortex_->snp_req_valid = 1; vortex_->snp_rsp_ready = 1; + snp_req_size_ = (size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE; --snp_req_size_; pending_snp_reqs_ = 1; + + snp_req_active_ = true; #ifdef DBG_PRINT_CACHE_SNP std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl; #endif } +void Simulator::set_csr(int core_id, int addr, unsigned value) { +#ifndef NDEBUG + std::cout << timestamp << ": [sim] set_csr()" << std::endl; +#endif + + vortex_->csr_io_req_valid = 1; + vortex_->csr_io_req_coreid = core_id; + vortex_->csr_io_req_addr = addr; + vortex_->csr_io_req_rw = 1; + vortex_->csr_io_req_data = value; + vortex_->csr_io_rsp_ready = 0; + + csr_req_active_ = true; +} + +void Simulator::get_csr(int core_id, int addr, unsigned *value) { +#ifndef NDEBUG + std::cout << timestamp << ": [sim] get_csr()" << std::endl; +#endif + + vortex_->csr_io_req_valid = 1; + vortex_->csr_io_req_coreid = core_id; + vortex_->csr_io_req_addr = addr; + vortex_->csr_io_req_rw = 0; + vortex_->csr_io_rsp_ready = 1; + + csr_rsp_value_ = value; + csr_req_active_ = true; +} + void Simulator::run() { #ifndef NDEBUG std::cout << timestamp << ": [sim] run()" << std::endl; diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index e104b66b..a16218bb 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -31,6 +31,8 @@ public: Simulator(); virtual ~Simulator(); + void attach_ram(RAM* ram); + void load_bin(const char* program_file); void load_ihex(const char* program_file); @@ -39,12 +41,14 @@ public: void reset(); void step(); void wait(uint32_t cycles); + void flush_caches(uint32_t mem_addr, uint32_t size); - - void attach_ram(RAM* ram); + void set_csr(int core_id, int addr, unsigned value); + void get_csr(int core_id, int addr, unsigned *value); void run(); int get_last_wb_value(int reg) const; + void print_stats(std::ostream& out); private: @@ -60,8 +64,11 @@ private: int dram_rsp_active_; bool snp_req_active_; + bool csr_req_active_; + uint32_t snp_req_size_; uint32_t pending_snp_reqs_; + uint32_t* csr_rsp_value_; RAM *ram_; VVortex *vortex_;