diff --git a/hw/dpi/float_dpi.cpp b/hw/dpi/float_dpi.cpp
index 7d78dde8..9a6b4a12 100644
--- a/hw/dpi/float_dpi.cpp
+++ b/hw/dpi/float_dpi.cpp
@@ -41,131 +41,131 @@ extern "C" {
 void dpi_fadd(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fadd(a, b, (*frm & 0x7), fflags);
+  *result = rv_fadd_s(a, b, (*frm & 0x7), fflags);
 }
 
 void dpi_fsub(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fsub(a, b, (*frm & 0x7), fflags);
+  *result = rv_fsub_s(a, b, (*frm & 0x7), fflags);
 }
 
 void dpi_fmul(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fmul(a, b, (*frm & 0x7), fflags);
+  *result = rv_fmul_s(a, b, (*frm & 0x7), fflags);
 }
 
 void dpi_fmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fmadd(a, b, c, (*frm & 0x7), fflags);
+  *result = rv_fmadd_s(a, b, c, (*frm & 0x7), fflags);
 }
 
 void dpi_fmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fmsub(a, b, c, (*frm & 0x7), fflags);
+  *result = rv_fmsub_s(a, b, c, (*frm & 0x7), fflags);
 }
 
 void dpi_fnmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fnmadd(a, b, c, (*frm & 0x7), fflags);
+  *result = rv_fnmadd_s(a, b, c, (*frm & 0x7), fflags);
 }
 
 void dpi_fnmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fnmsub(a, b, c, (*frm & 0x7), fflags);
+  *result = rv_fnmsub_s(a, b, c, (*frm & 0x7), fflags);
 }
 
 void dpi_fdiv(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fdiv(a, b, (*frm & 0x7), fflags);
+  *result = rv_fdiv_s(a, b, (*frm & 0x7), fflags);
 }
 
 void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fsqrt(a, (*frm & 0x7), fflags);
+  *result = rv_fsqrt_s(a, (*frm & 0x7), fflags);
 }
 
 void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_ftoi(a, (*frm & 0x7), fflags);
+  *result = rv_ftoi_s(a, (*frm & 0x7), fflags);
 }
 
 void dpi_ftou(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_ftou(a, (*frm & 0x7), fflags);
+  *result = rv_ftou_s(a, (*frm & 0x7), fflags);
 }
 
 void dpi_itof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_itof(a, (*frm & 0x7), fflags);
+  *result = rv_itof_s(a, (*frm & 0x7), fflags);
 }
 
 void dpi_utof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_utof(a, (*frm & 0x7), fflags);
+  *result = rv_utof_s(a, (*frm & 0x7), fflags);
 }
 
 void dpi_flt(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_flt(a, b, fflags);
+  *result = rv_flt_s(a, b, fflags);
 }
 
 void dpi_fle(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fle(a, b, fflags);
+  *result = rv_fle_s(a, b, fflags);
 }
 
 void dpi_feq(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_feq(a, b, fflags);
+  *result = rv_feq_s(a, b, fflags);
 }
 
 void dpi_fmin(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fmin(a, b, fflags);
+  *result = rv_fmin_s(a, b, fflags);
 }
 
 void dpi_fmax(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
   if (!enable) 
     return;
-  *result = rv_fmax(a, b, fflags);
+  *result = rv_fmax_s(a, b, fflags);
 }
 
 void dpi_fclss(bool enable, int a, int* result) {
   if (!enable) 
     return;
-  *result = rv_fclss(a);
+  *result = rv_fclss_s(a);
 }
 
 void dpi_fsgnj(bool enable, int a, int b, int* result) {
   if (!enable) 
     return;
-  *result = rv_fsgnj(a, b);
+  *result = rv_fsgnj_s(a, b);
 }
 
 void dpi_fsgnjn(bool enable, int a, int b, int* result) {
   if (!enable) 
     return;
-  *result = rv_fsgnjn(a, b);
+  *result = rv_fsgnjn_s(a, b);
 }
 
 void dpi_fsgnjx(bool enable, int a, int b, int* result) {
   if (!enable) 
     return;
-  *result = rv_fsgnjx(a, b);
+  *result = rv_fsgnjx_s(a, b);
 }
\ No newline at end of file
diff --git a/miscs/docker/vortex/Dockerfile b/miscs/docker/vortex/Dockerfile
deleted file mode 100644
index ec09dc9c..00000000
--- a/miscs/docker/vortex/Dockerfile
+++ /dev/null
@@ -1,48 +0,0 @@
-# Dockerfile for setting up the vortex development environment
-
-FROM ubuntu:18.04
-
-# Install dependencies
-RUN apt update && apt install -y \
-    git build-essential g++ libfl2 \
-    libfl-dev zlibc zlib1g zlib1g-dev \
-    ccache libgoogle-perftools-dev numactl perl-doc \
-    python3 device-tree-compiler gdb
-
-# Download vortex-toolchain-prebuilt
-RUN git clone https://github.com/SantoshSrivatsan24/vortex-toolchain-prebuilt.git /tmp/vortex-toolchain-prebuilt
-
-# Copy riscv-gnu-toolchain
-RUN cd /tmp/vortex-toolchain-prebuilt/riscv-gnu-toolchain/ubuntu/bionic; \
-    cat riscv-gnu-toolchain.tar.bz2.part* > riscv-gnu-toolchain.tar.bz2; \
-    tar -xf riscv-gnu-toolchain.tar.bz2 -C /opt/; 
-
-# Copy riscv64-gnu-toolchain
-RUN cd /tmp/vortex-toolchain-prebuilt/riscv64-gnu-toolchain/ubuntu/bionic; \
-    cat riscv64-gnu-toolchain.tar.bz2.part* > riscv64-gnu-toolchain.tar.bz2; \
-    tar -xf riscv64-gnu-toolchain.tar.bz2 -C /opt/; 
-
-# Copy llvm-riscv
-RUN cd /tmp/vortex-toolchain-prebuilt/llvm-riscv/ubuntu/bionic; \
-    cat llvm-riscv.tar.bz2.part* > llvm-riscv.tar.bz2; \
-    tar -xf llvm-riscv.tar.bz2 -C /opt/; 
-
-# Copy pocl
-RUN cd /tmp/vortex-toolchain-prebuilt/pocl/ubuntu/bionic; \
-    tar -xf pocl.tar.bz2 -C /opt/; 
-
-# Copy verilator
-RUN cd /tmp/vortex-toolchain-prebuilt/verilator/ubuntu/bionic; \
-    tar -xf verilator.tar.bz2 -C /opt/; 
-
-# Set environment variables
-ENV RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
-ENV RISCV64_TOOLCHAIN_PATH=/opt/riscv64-gnu-toolchain
-ENV VERILATOR_ROOT=/opt/verilator
-ENV PATH=$PATH:/${RISCV_TOOLCHAIN_PATH}/bin:${RISCV64_TOOLCHAIN_PATH}/bin:${RISCV64_TOOLCHAIN_PATH}/riscv64-unknown-elf/bin:${VERILATOR_ROOT}/bin
-
-# Cleanup
-RUN rm -rf /tmp/vortex-toolchain-prebuilt
-
-# Set working directory
-WORKDIR /home/vortex
\ No newline at end of file
diff --git a/sim/common/bitmanip.h b/sim/common/bitmanip.h
index b48df31f..bd199319 100644
--- a/sim/common/bitmanip.h
+++ b/sim/common/bitmanip.h
@@ -82,8 +82,8 @@ inline uint64_t sext64(uint64_t word, uint64_t width) {
   assert(width > 1);
   assert(width <= 64);
   uint64_t unity = 1;
-  uint64_t mask = (unity << width) - 0x1;
-  return ((word >> (width - 0x1)) & 0x1) ? (word | ~mask) : word;
+  uint64_t mask = (unity << width) - 1;
+  return ((word >> (width - 1)) & 0x1) ? (word | ~mask) : word;
 }
 
 inline __uint128_t sext128(__uint128_t word, uint32_t width) {
diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp
index b591a1b2..d8be2dac 100644
--- a/sim/simx/execute.cpp
+++ b/sim/simx/execute.cpp
@@ -742,7 +742,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
       uint32_t fflags = 0;
       switch (func7) {
       case 0x00: // RV32F: FADD.S
-        rddata[t] = rv_fadd(rsdata[t][0], rsdata[t][1], frm, &fflags);
+        rddata[t] = rv_fadd_s(rsdata[t][0], rsdata[t][1], frm, &fflags);
         trace->fpu.type = FpuType::FMA;
         trace->used_fregs.set(rsrc0);
         trace->used_fregs.set(rsrc1);
@@ -754,7 +754,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
         trace->used_fregs.set(rsrc1);
         break;
       case 0x04: // RV32F: FSUB.S
-        rddata[t] = rv_fsub(rsdata[t][0], rsdata[t][1], frm, &fflags);
+        rddata[t] = rv_fsub_s(rsdata[t][0], rsdata[t][1], frm, &fflags);
         trace->fpu.type = FpuType::FMA;
         trace->used_fregs.set(rsrc0);
         trace->used_fregs.set(rsrc1);
@@ -766,7 +766,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
         trace->used_fregs.set(rsrc1);
         break;
       case 0x08: // RV32F: FMUL.S
-        rddata[t] = rv_fmul(rsdata[t][0], rsdata[t][1], frm, &fflags);
+        rddata[t] = rv_fmul_s(rsdata[t][0], rsdata[t][1], frm, &fflags);
         trace->fpu.type = FpuType::FMA;
         trace->used_fregs.set(rsrc0);
         trace->used_fregs.set(rsrc1);
@@ -778,7 +778,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
         trace->used_fregs.set(rsrc1);
         break;
       case 0x0c: // RV32F: FDIV.S
-        rddata[t] = rv_fdiv(rsdata[t][0], rsdata[t][1], frm, &fflags);
+        rddata[t] = rv_fdiv_s(rsdata[t][0], rsdata[t][1], frm, &fflags);
         trace->fpu.type = FpuType::FDIV;
         trace->used_fregs.set(rsrc0);
         trace->used_fregs.set(rsrc1);
@@ -790,7 +790,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
         trace->used_fregs.set(rsrc1);
         break;
       case 0x2c: // RV32F: FSQRT.S
-        rddata[t] = rv_fsqrt(rsdata[t][0], frm, &fflags);
+        rddata[t] = rv_fsqrt_s(rsdata[t][0], frm, &fflags);
         trace->fpu.type = FpuType::FSQRT;
         trace->used_fregs.set(rsrc0);
         break;
@@ -802,13 +802,13 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
       case 0x10:
         switch (func3) {            
         case 0: // RV32F: FSGNJ.S
-          rddata[t] = rv_fsgnj(rsdata[t][0], rsdata[t][1]);
+          rddata[t] = rv_fsgnj_s(rsdata[t][0], rsdata[t][1]);
           break;          
         case 1: // RV32F: FSGNJN.S
-          rddata[t] = rv_fsgnjn(rsdata[t][0], rsdata[t][1]);
+          rddata[t] = rv_fsgnjn_s(rsdata[t][0], rsdata[t][1]);
           break;          
         case 2: // RV32F: FSGNJX.S
-          rddata[t] = rv_fsgnjx(rsdata[t][0], rsdata[t][1]);
+          rddata[t] = rv_fsgnjx_s(rsdata[t][0], rsdata[t][1]);
           break;
         }
       case 0x11:
@@ -830,10 +830,10 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
       case 0x14:              
         if (func3) {
           // RV32F: FMAX.S
-          rddata[t] = rv_fmax(rsdata[t][0], rsdata[t][1], &fflags);
+          rddata[t] = rv_fmax_s(rsdata[t][0], rsdata[t][1], &fflags);
         } else {
           // RV32F: FMIN.S
-          rddata[t] = rv_fmin(rsdata[t][0], rsdata[t][1], &fflags);
+          rddata[t] = rv_fmin_s(rsdata[t][0], rsdata[t][1], &fflags);
         }
         trace->fpu.type = FpuType::FNCP;
         trace->used_fregs.set(rsrc0);
@@ -855,19 +855,19 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
         switch(rsrc1) {
           case 0: 
             // RV32F: FCVT.W.S
-            rddata[t] = sext64(rv_ftoi(rsdata[t][0], frm, &fflags), 32);
+            rddata[t] = sext64(rv_ftoi_s(rsdata[t][0], frm, &fflags), 32);
             break;
           case 1:
             // RV32F: FCVT.WU.S
-            rddata[t] = sext64(rv_ftou(rsdata[t][0], frm, &fflags), 32);
+            rddata[t] = sext64(rv_ftou_s(rsdata[t][0], frm, &fflags), 32);
             break;
           case 2:
             // RV64F: FCVT.L.S
-            rddata[t] = rv_ftol(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_ftol_s(rsdata[t][0], frm, &fflags);
             break;
           case 3:
             // RV64F: FCVT.LU.S
-            rddata[t] = rv_ftolu(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_ftolu_s(rsdata[t][0], frm, &fflags);
             break;
         }
         trace->fpu.type = FpuType::FCVT;
@@ -898,7 +898,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
       case 0x70:      
         if (func3) {
           // RV32F: FCLASS.S
-          rddata[t] = rv_fclss(rsdata[t][0]);
+          rddata[t] = rv_fclss_s(rsdata[t][0]);
         } else {          
           // RV32F: FMV.X.W
           rddata[t] = rsdata[t][0];
@@ -908,7 +908,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
         break;
       case 0x71:      
         if (func3) {
-          // RV32D: FCLASS.S
+          // RV32D: FCLASS.D
           rddata[t] = rv_fclss_d(rsdata[t][0]);
         } else {          
           // RV64D: FMV.X.D
@@ -921,15 +921,15 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
         switch(func3) {              
         case 0:
           // RV32F: FLE.S
-          rddata[t] = rv_fle(rsdata[t][0], rsdata[t][1], &fflags);    
+          rddata[t] = rv_fle_s(rsdata[t][0], rsdata[t][1], &fflags);    
           break;              
         case 1:
           // RV32F: FLT.S
-          rddata[t] = rv_flt(rsdata[t][0], rsdata[t][1], &fflags);
+          rddata[t] = rv_flt_s(rsdata[t][0], rsdata[t][1], &fflags);
           break;              
         case 2:
           // RV32F: FEQ.S
-          rddata[t] = rv_feq(rsdata[t][0], rsdata[t][1], &fflags);
+          rddata[t] = rv_feq_s(rsdata[t][0], rsdata[t][1], &fflags);
           break;
         } 
         trace->fpu.type = FpuType::FNCP;
@@ -959,19 +959,19 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
         switch(rsrc1) {
           case 0: 
             // RV32F: FCVT.S.W
-            rddata[t] = rv_itof(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_itof_s(rsdata[t][0], frm, &fflags);
             break;
           case 1:
             // RV32F: FCVT.S.WU
-            rddata[t] = rv_utof(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_utof_s(rsdata[t][0], frm, &fflags);
             break;
           case 2:
             // RV64F: FCVT.S.L
-            rddata[t] = rv_ltof(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_ltof_s(rsdata[t][0], frm, &fflags);
             break;
           case 3:
             // RV64F: FCVT.S.LU
-            rddata[t] = rv_lutof(rsdata[t][0], frm, &fflags);
+            rddata[t] = rv_lutof_s(rsdata[t][0], frm, &fflags);
             break;
         }
         trace->fpu.type = FpuType::FCVT;
@@ -1030,7 +1030,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
           rddata[t] = rv_fmadd_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
         else
           // RV32F: FMADD.S
-          rddata[t] = rv_fmadd(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
+          rddata[t] = rv_fmadd_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
         break;
       case FMSUB:
         if (func2)
@@ -1038,7 +1038,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
           rddata[t] = rv_fmsub_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
         else 
           // RV32F: FMSUB.S
-          rddata[t] = rv_fmsub(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
+          rddata[t] = rv_fmsub_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
         break;
       case FMNMADD:
         if (func2)
@@ -1046,7 +1046,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
           rddata[t] = rv_fnmadd_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
         else
           // RV32F: FNMADD.S
-          rddata[t] = rv_fnmadd(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
+          rddata[t] = rv_fnmadd_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
         break; 
       case FMNMSUB:
         if (func2)
@@ -1054,7 +1054,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
           rddata[t] = rv_fnmsub_d(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
         else
           // RV32F: FNMSUB.S
-          rddata[t] = rv_fnmsub(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
+          rddata[t] = rv_fnmsub_s(rsdata[t][0], rsdata[t][1], rsdata[t][2], frm, &fflags);
         break;
       default:
         break;
diff --git a/tests/riscv/isa/ramulator.ddr4.log b/tests/riscv/isa/ramulator.ddr4.log
index 7a4efe9c..2889368e 100644
--- a/tests/riscv/isa/ramulator.ddr4.log
+++ b/tests/riscv/isa/ramulator.ddr4.log
@@ -1,19 +1,19 @@
                ramulator.active_cycles_0                  76                                      # Total active cycles for level _0
                  ramulator.busy_cycles_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0
             ramulator.serving_requests_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0
-    ramulator.average_serving_requests_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0
+    ramulator.average_serving_requests_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0
              ramulator.active_cycles_0_0                  76                                      # Total active cycles for level _0_0
                ramulator.busy_cycles_0_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0
           ramulator.serving_requests_0_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0
-  ramulator.average_serving_requests_0_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0
+  ramulator.average_serving_requests_0_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0
            ramulator.active_cycles_0_0_0                  76                                      # Total active cycles for level _0_0_0
              ramulator.busy_cycles_0_0_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0
         ramulator.serving_requests_0_0_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0
-ramulator.average_serving_requests_0_0_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0
+ramulator.average_serving_requests_0_0_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0
          ramulator.active_cycles_0_0_0_0                  76                                      # Total active cycles for level _0_0_0_0
            ramulator.busy_cycles_0_0_0_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_0
       ramulator.serving_requests_0_0_0_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_0
-ramulator.average_serving_requests_0_0_0_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_0
+ramulator.average_serving_requests_0_0_0_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_0
          ramulator.active_cycles_0_0_0_1                   0                                      # Total active cycles for level _0_0_0_1
            ramulator.busy_cycles_0_0_0_1                   0                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _0_0_0_1
       ramulator.serving_requests_0_0_0_1                   0                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _0_0_0_1
@@ -106,9 +106,9 @@ ramulator.write_row_conflicts_channel_0_core                   0
       ramulator.useless_activates_0_core                   0                                      # Number of useless activations. E.g, ACT -> PRE w/o RD or WR
             ramulator.read_latency_avg_0           26.333333                                      # The average memory latency cycles (in memory time domain) per request for all read requests in this channel
             ramulator.read_latency_sum_0                  79                                      # The memory latency cycles (in memory time domain) sum for all read requests in this channel
-        ramulator.req_queue_length_avg_0            0.046529                                      # Average of read and write queue length per memory cycle per channel.
+        ramulator.req_queue_length_avg_0            0.044681                                      # Average of read and write queue length per memory cycle per channel.
         ramulator.req_queue_length_sum_0                  63                                      # Sum of read and write queue length per memory cycle per channel.
-   ramulator.read_req_queue_length_avg_0            0.046529                                      # Read queue length average per memory cycle per channel.
+   ramulator.read_req_queue_length_avg_0            0.044681                                      # Read queue length average per memory cycle per channel.
    ramulator.read_req_queue_length_sum_0                  63                                      # Read queue length sum per memory cycle per channel.
   ramulator.write_req_queue_length_avg_0            0.000000                                      # Write queue length average per memory cycle per channel.
   ramulator.write_req_queue_length_sum_0                   0                                      # Write queue length sum per memory cycle per channel.
@@ -127,19 +127,19 @@ ramulator.write_row_conflicts_channel_0_core                   0
                ramulator.active_cycles_1                  76                                      # Total active cycles for level _1
                  ramulator.busy_cycles_1                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1
             ramulator.serving_requests_1                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1
-    ramulator.average_serving_requests_1            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1
+    ramulator.average_serving_requests_1            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1
              ramulator.active_cycles_1_0                  76                                      # Total active cycles for level _1_0
                ramulator.busy_cycles_1_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0
           ramulator.serving_requests_1_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0
-  ramulator.average_serving_requests_1_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0
+  ramulator.average_serving_requests_1_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0
            ramulator.active_cycles_1_0_0                  76                                      # Total active cycles for level _1_0_0
              ramulator.busy_cycles_1_0_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0
         ramulator.serving_requests_1_0_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0
-ramulator.average_serving_requests_1_0_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0
+ramulator.average_serving_requests_1_0_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0
          ramulator.active_cycles_1_0_0_0                  76                                      # Total active cycles for level _1_0_0_0
            ramulator.busy_cycles_1_0_0_0                  76                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_0
       ramulator.serving_requests_1_0_0_0                  76                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_0
-ramulator.average_serving_requests_1_0_0_0            0.056130                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_0
+ramulator.average_serving_requests_1_0_0_0            0.053901                                      # The average of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_0
          ramulator.active_cycles_1_0_0_1                   0                                      # Total active cycles for level _1_0_0_1
            ramulator.busy_cycles_1_0_0_1                   0                                      # (All-bank refresh only. busy cycles only include refresh time in rank level) The sum of cycles that the DRAM part is active or under refresh for level _1_0_0_1
       ramulator.serving_requests_1_0_0_1                   0                                      # The sum of read and write requests that are served in this DRAM element per memory cycle for level _1_0_0_1
@@ -232,9 +232,9 @@ ramulator.write_row_conflicts_channel_1_core                   0
       ramulator.useless_activates_1_core                   0                                      # Number of useless activations. E.g, ACT -> PRE w/o RD or WR
             ramulator.read_latency_avg_1           26.333333                                      # The average memory latency cycles (in memory time domain) per request for all read requests in this channel
             ramulator.read_latency_sum_1                  79                                      # The memory latency cycles (in memory time domain) sum for all read requests in this channel
-        ramulator.req_queue_length_avg_1            0.046529                                      # Average of read and write queue length per memory cycle per channel.
+        ramulator.req_queue_length_avg_1            0.044681                                      # Average of read and write queue length per memory cycle per channel.
         ramulator.req_queue_length_sum_1                  63                                      # Sum of read and write queue length per memory cycle per channel.
-   ramulator.read_req_queue_length_avg_1            0.046529                                      # Read queue length average per memory cycle per channel.
+   ramulator.read_req_queue_length_avg_1            0.044681                                      # Read queue length average per memory cycle per channel.
    ramulator.read_req_queue_length_sum_1                  63                                      # Read queue length sum per memory cycle per channel.
   ramulator.write_req_queue_length_avg_1            0.000000                                      # Write queue length average per memory cycle per channel.
   ramulator.write_req_queue_length_sum_1                   0                                      # Write queue length sum per memory cycle per channel.
@@ -251,7 +251,7 @@ ramulator.write_row_conflicts_channel_1_core                   0
         ramulator.record_write_conflicts                 0.0                                      # record write conflict for this core when it reaches request limit or to the end
                                      [0]                 0.0                                      # 
                  ramulator.dram_capacity          8589934592                                      # Number of bytes in simulated DRAM
-                   ramulator.dram_cycles                1354                                      # Number of DRAM cycles simulated
+                   ramulator.dram_cycles                1410                                      # Number of DRAM cycles simulated
              ramulator.incoming_requests                   6                                      # Number of incoming requests to DRAM
                  ramulator.read_requests                   6                                      # Number of incoming read requests to DRAM per core
                                      [0]                 6.0                                      # 
@@ -269,8 +269,8 @@ ramulator.incoming_read_reqs_per_channel                 6.0
           ramulator.in_queue_req_num_sum                 126                                      # Sum of read/write queue length
      ramulator.in_queue_read_req_num_sum                 126                                      # Sum of read queue length
     ramulator.in_queue_write_req_num_sum                   0                                      # Sum of write queue length
-          ramulator.in_queue_req_num_avg            0.093058                                      # Average of read/write queue length per memory cycle
-     ramulator.in_queue_read_req_num_avg            0.093058                                      # Average of read queue length per memory cycle
+          ramulator.in_queue_req_num_avg            0.089362                                      # Average of read/write queue length per memory cycle
+     ramulator.in_queue_read_req_num_avg            0.089362                                      # Average of read queue length per memory cycle
     ramulator.in_queue_write_req_num_avg            0.000000                                      # Average of write queue length per memory cycle
           ramulator.record_read_requests                 0.0                                      # record read requests for this core when it reaches request limit or to the end
                                      [0]                 0.0                                      #