More doc comments

This commit is contained in:
Hansung Kim
2023-09-10 14:45:23 -07:00
parent 9efdd2ebb7
commit c90fe56588
5 changed files with 15 additions and 1 deletions

View File

@@ -29,6 +29,10 @@ public:
}
}
// NOTE(hansung): This is code running on the CPU, but CPU is still the one
// that keeps track of allocation of the GPU memory. GPU kernel simply runs
// assuming that CPU has done the right thing and returned a safe and valid
// chunk of memory.
int allocate(uint64_t size, uint64_t* addr) {
if (size == 0 || addr == nullptr)
return -1;
@@ -403,4 +407,4 @@ private:
page_t* pages_;
};
} // namespace vortex
} // namespace vortex

View File

@@ -250,6 +250,8 @@ module VX_cache #(
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_c;
wire mem_rsp_ready_c;
// NOTE(hansung): non-cacheable addresses. Although is this applied for
// all address range?
if (NC_ENABLE) begin
VX_nc_bypass #(
.NUM_PORTS (NUM_PORTS),

View File

@@ -55,6 +55,7 @@
///////////////////////////////////////////////////////////////////////////////
// NOTE(hansung): what does CORE_TAG_ID_BITS == 0 mean?
`define CORE_RSP_TAGS ((CORE_TAG_ID_BITS != 0) ? 1 : NUM_REQS)
`define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}

View File

@@ -1326,6 +1326,9 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
} else {
tmask_.reset();
for (uint32_t t = 0; t < num_threads; ++t) {
// NOTE(hansung): `ts` is the left-most lane currently enabled.
// Doing this only respects the operand of that lane, even though
// every lane might have different operand for the tmask.
tmask_.set(t, rsdata.at(ts)[0].i & (1 << t));
}
}

View File

@@ -156,6 +156,8 @@ int main (int argc, char **argv) {
kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err));
// Set kernel arguments
// NOTE(hansung): clSetKernelArg doesn't seem to incur any device-specific
// operation
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_memobj));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_memobj));
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_memobj));
@@ -190,6 +192,8 @@ int main (int argc, char **argv) {
size_t local_work_size[1] = {1};
auto time_start = std::chrono::high_resolution_clock::now();
CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL));
// NOTE(hansung): clFinish blocks until all kernels in the command queue are
// finished. This seems to be what actually kicks off kernel execution.
CL_CHECK(clFinish(commandQueue));
auto time_end = std::chrono::high_resolution_clock::now();
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();