tensor: Remove redundant issue queue outside pdu

This commit is contained in:
Hansung Kim
2024-05-30 17:29:59 -07:00
parent 2743d32bd2
commit dfb2276657

View File

@@ -333,46 +333,55 @@ module VX_tensor_octet #(
wire operands_last_in_pair_buf; wire operands_last_in_pair_buf;
wire [1:0] operands_step_buf; wire [1:0] operands_step_buf;
wire inbuf_empty; // wire inbuf_empty;
wire inbuf_full; // wire inbuf_full;
wire inbuf_ready_in; // wire inbuf_ready_in;
assign inbuf_ready_in = !inbuf_full; // assign inbuf_ready_in = !inbuf_full;
assign operands_ready = inbuf_ready_in; // assign operands_ready = inbuf_ready_in;
assign operands_valid_buf = !inbuf_empty; // assign operands_valid_buf = !inbuf_empty;
// wire inbuf_enq = operands_ready && operands_valid && operands_last_in_pair; // // wire inbuf_enq = operands_ready && operands_valid && operands_last_in_pair;
wire inbuf_enq = operands_ready && operands_valid; // wire inbuf_enq = operands_ready && operands_valid;
wire inbuf_deq = operands_valid_buf && operands_ready_buf; // wire inbuf_deq = operands_valid_buf && operands_ready_buf;
// the 'issue queue' for the dpu. // // the 'issue queue' for the dpu.
// This exists to decouple the input of the dot-product unit from // // This exists to decouple the input of the dot-product unit from
// execute_if.ready. execute_if can arrive intermittently according to // // execute_if.ready. execute_if can arrive intermittently according to
// the frontend's behavior, and since the dpu can also stall for a fixed // // the frontend's behavior, and since the dpu can also stall for a fixed
// initiation latency, we need to decouple the two to efficiently feed the // // initiation latency, we need to decouple the two to efficiently feed the
// dpu. // // dpu.
// This only applies to the last instruction in a pair, since the first // // This only applies to the last instruction in a pair, since the first
// instruction only acts to buffer the operands and can execute // // instruction only acts to buffer the operands and can execute
// immediately without backpressure. So we don't enqueue them. // // immediately without backpressure. So we don't enqueue them.
VX_fifo_queue #( // VX_fifo_queue #(
.DATAW ($bits(A_in) + $bits(B_in) + $bits(C_in) + // .DATAW ($bits(A_in) + $bits(B_in) + $bits(C_in) +
$bits(operands_wid) + $bits(operands_step) + $bits(operands_last_in_pair)), // $bits(operands_wid) + $bits(operands_step) + $bits(operands_last_in_pair)),
.DEPTH (ISSUE_QUEUE_DEPTH) // .DEPTH (ISSUE_QUEUE_DEPTH)
) input_buffer ( // ) input_buffer (
.clk (clk), // .clk (clk),
.reset (reset), // .reset (reset),
.push (inbuf_enq), // .push (inbuf_enq),
.pop (inbuf_deq), // .pop (inbuf_deq),
.data_in ({A_in, B_in, C_in, operands_wid, operands_step, operands_last_in_pair}), // .data_in ({A_in, B_in, C_in, operands_wid, operands_step, operands_last_in_pair}),
.data_out ({A_in_buf, B_in_buf, C_in_buf, operands_wid_buf, operands_step_buf, operands_last_in_pair_buf}), // .data_out ({A_in_buf, B_in_buf, C_in_buf, operands_wid_buf, operands_step_buf, operands_last_in_pair_buf}),
.empty (inbuf_empty), // .empty (inbuf_empty),
`UNUSED_PIN(alm_empty), // `UNUSED_PIN(alm_empty),
.full (inbuf_full), // .full (inbuf_full),
`UNUSED_PIN(alm_full), // `UNUSED_PIN(alm_full),
`UNUSED_PIN(size) // `UNUSED_PIN(size)
); // );
// FIXME: this shouldn't be necessary // // FIXME: this shouldn't be necessary
`RUNTIME_ASSERT(reset || !inbuf_full, ("dpu issue queue is full!")) // `RUNTIME_ASSERT(reset || !inbuf_full, ("dpu issue queue is full!"))
assign A_in_buf = A_in;
assign B_in_buf = B_in;
assign C_in_buf = C_in;
assign operands_step_buf = operands_step;
assign operands_wid_buf = operands_wid;
assign operands_last_in_pair_buf = operands_last_in_pair;
assign operands_valid_buf = operands_valid;
assign operands_ready = operands_ready_buf;
typedef struct { typedef struct {
logic [3:0][31:0] A_half; logic [3:0][31:0] A_half;