tensor: Test many-commit per execute with an FSM
Trick is to set commit_if.data.eop to 0, since the commit module only signals instruction completion to VX_schedule if the eop bit is 1. Otherwise it underflows the pending_instr buffer. The same eop trick works for VX_scoreboard, which works around the invalid rd writeback error.
This commit is contained in:
@@ -11,7 +11,7 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
|||||||
VX_execute_if.slave execute_if,
|
VX_execute_if.slave execute_if,
|
||||||
VX_commit_if.master commit_if
|
VX_commit_if.master commit_if
|
||||||
);
|
);
|
||||||
localparam METADATA_QUEUE_DEPTH = 2; // FIXME: arbitrary
|
localparam METADATA_QUEUE_DEPTH = 16; // FIXME: arbitrary
|
||||||
|
|
||||||
/* commit_if.data_t parts that we need to keep around:
|
/* commit_if.data_t parts that we need to keep around:
|
||||||
- uuid
|
- uuid
|
||||||
@@ -37,6 +37,7 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
|||||||
};
|
};
|
||||||
|
|
||||||
wire [`NUM_WARPS-1:0][DATAW-1:0] execute_if_data_deq;
|
wire [`NUM_WARPS-1:0][DATAW-1:0] execute_if_data_deq;
|
||||||
|
logic [DATAW-1:0] execute_if_data_new_rd;
|
||||||
|
|
||||||
wire [`NUM_WARPS-1:0] metadata_queue_fulls;
|
wire [`NUM_WARPS-1:0] metadata_queue_fulls;
|
||||||
wire [`NUM_WARPS-1:0] metadata_queue_emptys;
|
wire [`NUM_WARPS-1:0] metadata_queue_emptys;
|
||||||
@@ -47,6 +48,8 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
|||||||
`RUNTIME_ASSERT((!execute_if.valid || execute_if.data.wid == `NW_WIDTH'(0)),
|
`RUNTIME_ASSERT((!execute_if.valid || execute_if.data.wid == `NW_WIDTH'(0)),
|
||||||
("runtime error: WGMMA execute not supported for warps other than 0!"))
|
("runtime error: WGMMA execute not supported for warps other than 0!"))
|
||||||
|
|
||||||
|
logic metadata_deq;
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||||
// Metadata queue for commit_if. This simply copies execute_if's
|
// Metadata queue for commit_if. This simply copies execute_if's
|
||||||
// metadata and pops them in conjunction with commit fire.
|
// metadata and pops them in conjunction with commit fire.
|
||||||
@@ -58,7 +61,7 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
wire enq = operand_enq_fire && (execute_if.data.wid == `NW_WIDTH'(i));
|
wire enq = operand_enq_fire && (execute_if.data.wid == `NW_WIDTH'(i));
|
||||||
// FIXME: commit only warp 0
|
// FIXME: commit only warp 0
|
||||||
wire deq = commit_if_fire && (`NW_WIDTH'(i) == `NW_WIDTH'(0));
|
wire deq = metadata_deq && commit_if.ready && (`NW_WIDTH'(i) == `NW_WIDTH'(0));
|
||||||
|
|
||||||
VX_fifo_queue #(
|
VX_fifo_queue #(
|
||||||
.DATAW(DATAW),
|
.DATAW(DATAW),
|
||||||
@@ -82,18 +85,58 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
|||||||
// the commit stage
|
// the commit stage
|
||||||
`RUNTIME_ASSERT(!(!reset && metadata_queue_full), ("tensor core uop queue is full!"))
|
`RUNTIME_ASSERT(!(!reset && metadata_queue_full), ("tensor core uop queue is full!"))
|
||||||
|
|
||||||
// FIXME: only checks warp 0 for commit!
|
// dummy FSM that generates commits
|
||||||
assign commit_if.valid = ~metadata_queue_emptys[0/*FIXME*/];
|
logic [1:0] state, state_n;
|
||||||
|
localparam STATE_IDLE = 4'd0;
|
||||||
|
|
||||||
|
always @(*) begin
|
||||||
|
state_n = state;
|
||||||
|
metadata_deq = 1'b0;
|
||||||
|
|
||||||
|
// when incremented to 1, count up until wrap-around to 0
|
||||||
|
if (state != STATE_IDLE) begin
|
||||||
|
state_n = state + 1'd1;
|
||||||
|
end else begin
|
||||||
|
// kick-off from idle when execute valid
|
||||||
|
// FIXME: only checks warp 0 for commit!
|
||||||
|
if (~metadata_queue_emptys[0/*FIXME*/]) begin
|
||||||
|
state_n = 4'd1;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
// dequeue metadata when wrapping around
|
||||||
|
if ((state != STATE_IDLE) && (state_n == STATE_IDLE)) begin
|
||||||
|
metadata_deq = 1'b1;
|
||||||
|
end
|
||||||
|
|
||||||
|
// change rd of the commit data according to state
|
||||||
|
execute_if_data_new_rd =
|
||||||
|
{execute_if_data_deq[0/*FIXME*/][DATAW-1:`NR_BITS],
|
||||||
|
(`NR_BITS'(`NUM_IREGS) + `NR_BITS'(state))};
|
||||||
|
end
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
state <= '0;
|
||||||
|
end else begin
|
||||||
|
state <= state_n;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
// assign commit_if.valid = metadata_deq;
|
||||||
|
assign commit_if.valid = (state != STATE_IDLE);
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] wb_data = '0;
|
wire [`NUM_THREADS-1:0][`XLEN-1:0] wb_data = '0;
|
||||||
|
|
||||||
localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1;
|
localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1;
|
||||||
wire [COMMIT_DATAW-1:0] commit_if_data = {
|
wire [COMMIT_DATAW-1:0] commit_if_data = {
|
||||||
execute_if_data_deq[0/*FIXME*/], /* uuid ~ rd */
|
// write-back to the correct rd only when eop
|
||||||
|
((state == 2'b11) ? execute_if_data_deq[0/*FIXME*/] : execute_if_data_new_rd), /* uuid ~ rd */
|
||||||
wb_data, /* data */
|
wb_data, /* data */
|
||||||
1'b0, /* pid */
|
1'b0, /* pid */
|
||||||
1'b1, /* sop */
|
1'b1, /* sop */
|
||||||
1'b1 /* eop */
|
(state == 2'b11) /* eop */
|
||||||
|
// 1'b1 /* eop */
|
||||||
};
|
};
|
||||||
|
|
||||||
assign commit_if.data = commit_if_data;
|
assign commit_if.data = commit_if_data;
|
||||||
|
|||||||
Reference in New Issue
Block a user