Files
vortex/hw/rtl/core/VX_dispatch.sv

490 lines
22 KiB
Systemverilog

// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
`include "VX_trace.vh"
module VX_dispatch import VX_gpu_pkg::*; #(
parameter CORE_ID = 0,
parameter DOMAIN = WU_DOMAIN_SCALAR
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output wire [`PERF_CTR_BITS-1:0] perf_stalls [`NUM_EX_UNITS],
output wire [`PERF_CTR_BITS-1:0] perf_valids [`NUM_EX_UNITS],
output wire [`PERF_CTR_BITS-1:0] perf_fires [`NUM_EX_UNITS],
output wire [`PERF_CTR_BITS-1:0] perf_any_fire_cycles,
`endif
// inputs
VX_operands_if.slave operands_if [`ISSUE_WIDTH],
// outputs
VX_dispatch_if.master alu_dispatch_if [`ISSUE_WIDTH],
VX_dispatch_if.master lsu_dispatch_if [`ISSUE_WIDTH],
`ifdef EXT_F_ENABLE
VX_dispatch_if.master fpu_dispatch_if [`ISSUE_WIDTH],
`endif
`ifdef EXT_T_ENABLE
VX_dispatch_if.master tensor_alu_dispatch_if [`ISSUE_WIDTH],
VX_dispatch_if.master tensor_lsu_dispatch_if [`ISSUE_WIDTH],
VX_dispatch_if.master tensor_ctrl_dispatch_if [`ISSUE_WIDTH],
VX_dispatch_if.master tensor_dispatch_if [`ISSUE_WIDTH],
`endif
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_PARAM (DOMAIN)
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + (3 * `NUM_THREADS * `XLEN) + `NT_WIDTH;
wire [`ISSUE_WIDTH-1:0][`NT_WIDTH-1:0] last_active_tid;
wire [`NUM_THREADS-1:0][`NT_WIDTH-1:0] tids;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign tids[i] = `NT_WIDTH'(i);
end
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
VX_find_first #(
.N (`NUM_THREADS),
.DATAW (`NT_WIDTH),
.REVERSE (1)
) last_tid_select (
.valid_in (operands_if[i].data.tmask),
.data_in (tids),
.data_out (last_active_tid[i]),
`UNUSED_PIN (valid_out)
);
end
// ALU dispatch
VX_operands_if alu_operands_if[`ISSUE_WIDTH]();
wire [`ISSUE_WIDTH-1:0][`NW_WIDTH-1:0] operands_wid;
wire [`ISSUE_WIDTH-1:0] operands_is_tensor;
wire [`ISSUE_WIDTH-1:0] tensor_alu_allowed;
wire [`ISSUE_WIDTH-1:0] tensor_ctrl_allowed;
wire [`ISSUE_WIDTH-1:0] tensor_wctl_allowed;
wire [`ISSUE_WIDTH-1:0] tensor_sfu_allowed;
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign operands_wid[i] = wis_to_wid(operands_if[i].data.wis, ISSUE_ISW_W'(i));
assign operands_is_tensor[i] = operands_wid[i] >= `NW_WIDTH'(`NUM_SCALAR_WARPS);
assign tensor_alu_allowed[i] = !`INST_ALU_IS_M(operands_if[i].data.op_mod)
&& !`INST_ALU_IS_RED(operands_if[i].data.op_mod);
assign tensor_ctrl_allowed[i] = (operands_if[i].data.op_type == `INST_SFU_TMC)
|| (operands_if[i].data.op_type == `INST_SFU_CSRRS)
|| (operands_if[i].data.op_type == `INST_SFU_BAR)
|| (operands_if[i].data.op_type == `INST_SFU_BAR_MASK);
assign tensor_wctl_allowed[i] = (operands_if[i].data.op_type == `INST_SFU_BAR)
|| (operands_if[i].data.op_type == `INST_SFU_BAR_MASK);
assign tensor_sfu_allowed[i] = tensor_ctrl_allowed[i] || tensor_wctl_allowed[i];
end
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign alu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_ALU) && !operands_is_tensor[i];
assign alu_operands_if[i].data = operands_if[i].data;
`RESET_RELAY (alu_reset, reset);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2)
) alu_buffer (
.clk (clk),
.reset (alu_reset),
.valid_in (alu_operands_if[i].valid),
.ready_in (alu_operands_if[i].ready),
.data_in (`TO_DISPATCH_DATA(alu_operands_if[i].data, last_active_tid[i])),
.data_out (alu_dispatch_if[i].data),
.valid_out (alu_dispatch_if[i].valid),
.ready_out (alu_dispatch_if[i].ready)
);
end
`ifdef EXT_T_ENABLE
// Tensor INT/control dispatch
VX_operands_if tensor_alu_operands_if[`ISSUE_WIDTH]();
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign tensor_alu_operands_if[i].valid = operands_if[i].valid
&& (operands_if[i].data.ex_type == `EX_ALU)
&& operands_is_tensor[i]
&& tensor_alu_allowed[i];
assign tensor_alu_operands_if[i].data = operands_if[i].data;
`RESET_RELAY (tensor_alu_reset, reset);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2)
) tensor_alu_buffer (
.clk (clk),
.reset (tensor_alu_reset),
.valid_in (tensor_alu_operands_if[i].valid),
.ready_in (tensor_alu_operands_if[i].ready),
.data_in (`TO_DISPATCH_DATA(tensor_alu_operands_if[i].data, last_active_tid[i])),
.data_out (tensor_alu_dispatch_if[i].data),
.valid_out (tensor_alu_dispatch_if[i].valid),
.ready_out (tensor_alu_dispatch_if[i].ready)
);
end
`endif
// LSU dispatch
VX_operands_if lsu_operands_if[`ISSUE_WIDTH]();
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign lsu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_LSU) && !operands_is_tensor[i];
assign lsu_operands_if[i].data = operands_if[i].data;
`RESET_RELAY (lsu_reset, reset);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2)
) lsu_buffer (
.clk (clk),
.reset (lsu_reset),
.valid_in (lsu_operands_if[i].valid),
.ready_in (lsu_operands_if[i].ready),
.data_in (`TO_DISPATCH_DATA(lsu_operands_if[i].data, last_active_tid[i])),
.data_out (lsu_dispatch_if[i].data),
.valid_out (lsu_dispatch_if[i].valid),
.ready_out (lsu_dispatch_if[i].ready)
);
end
`ifdef EXT_T_ENABLE
// Tensor LSU dispatch
VX_operands_if tensor_lsu_operands_if[`ISSUE_WIDTH]();
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign tensor_lsu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_LSU) && operands_is_tensor[i];
assign tensor_lsu_operands_if[i].data = operands_if[i].data;
`RESET_RELAY (tensor_lsu_reset, reset);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2)
) tensor_lsu_buffer (
.clk (clk),
.reset (tensor_lsu_reset),
.valid_in (tensor_lsu_operands_if[i].valid),
.ready_in (tensor_lsu_operands_if[i].ready),
.data_in (`TO_DISPATCH_DATA(tensor_lsu_operands_if[i].data, last_active_tid[i])),
.data_out (tensor_lsu_dispatch_if[i].data),
.valid_out (tensor_lsu_dispatch_if[i].valid),
.ready_out (tensor_lsu_dispatch_if[i].ready)
);
end
`endif
// FPU dispatch
`ifdef EXT_F_ENABLE
VX_operands_if fpu_operands_if[`ISSUE_WIDTH]();
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign fpu_operands_if[i].valid = operands_if[i].valid && (operands_if[i].data.ex_type == `EX_FPU) && !operands_is_tensor[i];
assign fpu_operands_if[i].data = operands_if[i].data;
`RESET_RELAY (fpu_reset, reset);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2)
) fpu_buffer (
.clk (clk),
.reset (fpu_reset),
.valid_in (fpu_operands_if[i].valid),
.ready_in (fpu_operands_if[i].ready),
.data_in (`TO_DISPATCH_DATA(fpu_operands_if[i].data, last_active_tid[i])),
.data_out (fpu_dispatch_if[i].data),
.valid_out (fpu_dispatch_if[i].valid),
.ready_out (fpu_dispatch_if[i].ready)
);
end
`endif
// Tensor Core dispatch
`ifdef EXT_T_ENABLE
VX_operands_if tensor_operands_if[`ISSUE_WIDTH]();
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign tensor_operands_if[i].valid = operands_if[i].valid
&& (operands_if[i].data.ex_type == `EX_TENSOR)
&& operands_is_tensor[i];
assign tensor_operands_if[i].data = operands_if[i].data;
`RESET_RELAY (tensor_reset, reset);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2)
) tensor_buffer (
.clk (clk),
.reset (tensor_reset),
.valid_in (tensor_operands_if[i].valid),
.ready_in (tensor_operands_if[i].ready),
.data_in (`TO_DISPATCH_DATA(tensor_operands_if[i].data, last_active_tid[i])),
.data_out (tensor_dispatch_if[i].data),
.valid_out (tensor_dispatch_if[i].valid),
.ready_out (tensor_dispatch_if[i].ready)
);
end
`endif
`ifdef EXT_T_ENABLE
// Tensor control dispatch
VX_operands_if tensor_ctrl_operands_if[`ISSUE_WIDTH]();
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign tensor_ctrl_operands_if[i].valid = operands_if[i].valid
&& (operands_if[i].data.ex_type == `EX_SFU)
&& operands_is_tensor[i]
&& tensor_ctrl_allowed[i];
assign tensor_ctrl_operands_if[i].data = operands_if[i].data;
`RESET_RELAY (tensor_ctrl_reset, reset);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2)
) tensor_ctrl_buffer (
.clk (clk),
.reset (tensor_ctrl_reset),
.valid_in (tensor_ctrl_operands_if[i].valid),
.ready_in (tensor_ctrl_operands_if[i].ready),
.data_in (`TO_DISPATCH_DATA(tensor_ctrl_operands_if[i].data, last_active_tid[i])),
.data_out (tensor_ctrl_dispatch_if[i].data),
.valid_out (tensor_ctrl_dispatch_if[i].valid),
.ready_out (tensor_ctrl_dispatch_if[i].ready)
);
end
`endif
// SFU dispatch
VX_operands_if sfu_operands_if[`ISSUE_WIDTH]();
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign sfu_operands_if[i].valid = operands_if[i].valid
&& (operands_if[i].data.ex_type == `EX_SFU)
&& !operands_is_tensor[i];
assign sfu_operands_if[i].data = operands_if[i].data;
`RESET_RELAY (sfu_reset, reset);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2)
) sfu_buffer (
.clk (clk),
.reset (sfu_reset),
.valid_in (sfu_operands_if[i].valid),
.ready_in (sfu_operands_if[i].ready),
.data_in (`TO_DISPATCH_DATA(sfu_operands_if[i].data, last_active_tid[i])),
.data_out (sfu_dispatch_if[i].data),
.valid_out (sfu_dispatch_if[i].valid),
.ready_out (sfu_dispatch_if[i].ready)
);
end
// can take next request?
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
assign operands_if[i].ready = (alu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_ALU) && !operands_is_tensor[i])
|| (lsu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_LSU) && !operands_is_tensor[i])
`ifdef EXT_F_ENABLE
|| (fpu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_FPU) && !operands_is_tensor[i])
`endif
`ifdef EXT_T_ENABLE
|| (tensor_alu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_ALU) && operands_is_tensor[i] && tensor_alu_allowed[i])
|| (tensor_lsu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_LSU) && operands_is_tensor[i])
|| (tensor_ctrl_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_SFU) && operands_is_tensor[i] && tensor_ctrl_allowed[i])
|| (tensor_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_TENSOR) && operands_is_tensor[i])
`endif
|| (sfu_operands_if[i].ready && (operands_if[i].data.ex_type == `EX_SFU) && !operands_is_tensor[i]);
end
`ifdef SIMULATION
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
`RUNTIME_ASSERT(
!(operands_if[i].valid && (operands_if[i].data.ex_type == `EX_TENSOR)) || operands_is_tensor[i],
("%t: *** core%0d-dispatch-illegal-scalar-tensor-op: wid=%0d PC=0x%0h op=0x%0h (#%0d)",
$time, CORE_ID, operands_wid[i], operands_if[i].data.PC, operands_if[i].data.op_type, operands_if[i].data.uuid)
)
`RUNTIME_ASSERT(
!(operands_if[i].valid && operands_is_tensor[i] && (operands_if[i].data.ex_type == `EX_FPU)),
("%t: *** core%0d-dispatch-illegal-tensor-fpu-op: wid=%0d PC=0x%0h op=0x%0h (#%0d)",
$time, CORE_ID, operands_wid[i], operands_if[i].data.PC, operands_if[i].data.op_type, operands_if[i].data.uuid)
)
`RUNTIME_ASSERT(
!(operands_if[i].valid && operands_is_tensor[i] && (operands_if[i].data.ex_type == `EX_SFU) && !tensor_sfu_allowed[i]),
("%t: *** core%0d-dispatch-illegal-tensor-sfu-op: wid=%0d PC=0x%0h op=0x%0h (#%0d)",
$time, CORE_ID, operands_wid[i], operands_if[i].data.PC, operands_if[i].data.op_type, operands_if[i].data.uuid)
)
`RUNTIME_ASSERT(
!(operands_if[i].valid && operands_is_tensor[i] && (operands_if[i].data.ex_type == `EX_ALU)
&& (`INST_ALU_IS_M(operands_if[i].data.op_mod) || `INST_ALU_IS_RED(operands_if[i].data.op_mod))),
("%t: *** core%0d-dispatch-illegal-tensor-complex-alu-op: wid=%0d PC=0x%0h op=0x%0h mod=0x%0h (#%0d)",
$time, CORE_ID, operands_wid[i], operands_if[i].data.PC, operands_if[i].data.op_type, operands_if[i].data.op_mod, operands_if[i].data.uuid)
)
end
`endif
`ifdef PERF_ENABLE
wire [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_stalls_per_cycle_r;
wire [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_valids_per_cycle_r;
wire [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_fires_per_cycle_r;
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_stalls_per_cycle;
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_valids_per_cycle;
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_unit_fires_per_cycle;
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_stalls_per_cycle;
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_valids_per_cycle;
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_unit_fires_per_cycle;
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_stalls_r;
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_valids_r;
reg [`NUM_EX_UNITS-1:0][`PERF_CTR_BITS-1:0] perf_fires_r;
reg [`PERF_CTR_BITS-1:0] perf_any_fire_cycles_r;
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
always @(*) begin
perf_issue_unit_stalls_per_cycle[i] = '0;
perf_issue_unit_valids_per_cycle[i] = '0;
perf_issue_unit_fires_per_cycle[i] = '0;
if (operands_if[i].valid && ~operands_if[i].ready) begin
perf_issue_unit_stalls_per_cycle[i][operands_if[i].data.ex_type] = 1;
end
if (operands_if[i].valid) begin
perf_issue_unit_valids_per_cycle[i][operands_if[i].data.ex_type] = 1;
end
if (operands_if[i].valid && operands_if[i].ready) begin
perf_issue_unit_fires_per_cycle[i][operands_if[i].data.ex_type] = 1;
end
end
end
for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin
always @(*) begin
perf_unit_stalls_per_cycle[i] = '0;
perf_unit_valids_per_cycle[i] = '0;
perf_unit_fires_per_cycle[i] = '0;
for (integer isw = 0; isw < `ISSUE_WIDTH; ++isw) begin
perf_unit_stalls_per_cycle[i] = perf_unit_stalls_per_cycle[i] + perf_issue_unit_stalls_per_cycle[isw][i];
perf_unit_valids_per_cycle[i] = perf_unit_valids_per_cycle[i] + perf_issue_unit_valids_per_cycle[isw][i];
perf_unit_fires_per_cycle[i] = perf_unit_fires_per_cycle[i] + perf_issue_unit_fires_per_cycle[isw][i];
end
end
end
// VX_reduce #(
// .DATAW_IN (`NUM_EX_UNITS),
// .N (`ISSUE_WIDTH),
// .OP ("|")
// ) reduce (
// .data_in (perf_issue_unit_stalls_per_cycle),
// .data_out (perf_unit_stalls_per_cycle)
// );
`BUFFER(perf_unit_stalls_per_cycle_r, perf_unit_stalls_per_cycle);
`BUFFER(perf_unit_valids_per_cycle_r, perf_unit_valids_per_cycle);
`BUFFER(perf_unit_fires_per_cycle_r, perf_unit_fires_per_cycle);
reg perf_any_fire_per_cycle;
always @(*) begin
perf_any_fire_per_cycle = 1'b0;
for (integer i = 0; i < `NUM_EX_UNITS; ++i) begin
if (perf_unit_fires_per_cycle_r[i] != '0) begin
perf_any_fire_per_cycle = 1'b1;
end
end
end
for (genvar i = 0; i < `NUM_EX_UNITS; ++i) begin
always @(posedge clk) begin
if (reset) begin
perf_stalls_r[i] <= '0;
perf_valids_r[i] <= '0;
perf_fires_r[i] <= '0;
perf_any_fire_cycles_r <= '0;
end else begin
perf_stalls_r[i] <= perf_stalls_r[i] + `PERF_CTR_BITS'(perf_unit_stalls_per_cycle_r[i]);
perf_valids_r[i] <= perf_valids_r[i] + `PERF_CTR_BITS'(perf_unit_valids_per_cycle_r[i]);
perf_fires_r[i] <= perf_fires_r[i] + `PERF_CTR_BITS'(perf_unit_fires_per_cycle_r[i]);
perf_any_fire_cycles_r <= perf_any_fire_cycles_r + `PERF_CTR_BITS'(perf_any_fire_per_cycle);
end
end
end
for (genvar i=0; i < `NUM_EX_UNITS; ++i) begin
assign perf_stalls[i] = perf_stalls_r[i];
assign perf_valids[i] = perf_valids_r[i];
assign perf_fires[i] = perf_fires_r[i];
end
assign perf_any_fire_cycles = perf_any_fire_cycles_r;
`endif
`ifdef DBG_TRACE_CORE_PIPELINE_VCS
for (genvar i=0; i < `ISSUE_WIDTH; ++i) begin
always @(posedge clk) begin
if (!reset && ($time > `TRACE_STARTTIME)) begin
if ((CORE_ID == 0)
&& alu_dispatch_if[i].valid
&& ((alu_dispatch_if[i].data.PC == 32'h80000010) || (alu_dispatch_if[i].data.PC == 32'h80000014))) begin
`TRACE(1, ("%d: core%0d-alu-dispatch-buffer: isw=%0d, valid=%b, ready=%b, wid=%0d, PC=0x%0h, op=0x%0h, mod=%0d, wb=%0d, rd=%0d (#%0d)\n",
$time, CORE_ID, i, alu_dispatch_if[i].valid, alu_dispatch_if[i].ready,
wis_to_wid(alu_dispatch_if[i].data.wis, i), alu_dispatch_if[i].data.PC,
alu_dispatch_if[i].data.op_type, alu_dispatch_if[i].data.op_mod,
alu_dispatch_if[i].data.wb, alu_dispatch_if[i].data.rd,
alu_dispatch_if[i].data.uuid));
end
if (operands_if[i].valid && operands_if[i].ready) begin
`TRACE(1, ("%d: core%0d-issue: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, wis_to_wid(operands_if[i].data.wis, i), operands_if[i].data.PC));
trace_ex_type(1, operands_if[i].data.ex_type);
`TRACE(1, (", op="));
trace_ex_op(1, operands_if[i].data.ex_type, operands_if[i].data.op_type, operands_if[i].data.op_mod, operands_if[i].data.rd, '0, operands_if[i].data.use_imm, operands_if[i].data.imm);
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if[i].data.op_mod, operands_if[i].data.tmask, operands_if[i].data.wb, operands_if[i].data.rd));
`TRACE_ARRAY1D(1, operands_if[i].data.rs1_data, `NUM_THREADS);
`TRACE(1, (", rs2_data="));
`TRACE_ARRAY1D(1, operands_if[i].data.rs2_data, `NUM_THREADS);
`TRACE(1, (", rs3_data="));
`TRACE_ARRAY1D(1, operands_if[i].data.rs3_data, `NUM_THREADS);
`TRACE(1, (" (#%0d)\n", operands_if[i].data.uuid));
end
end
end
end
`endif
endmodule