Add 'tensor' bit to commit_if and writeback_if
For use in the asynchronous tensor instruction. When 1'b1, sets/unsets the inuse_tensor status bit in the scoreboard to signal kickoff/completion of the asynchronous tensor op.
This commit is contained in:
@@ -32,7 +32,7 @@ module VX_alu_unit #(
|
|||||||
localparam NUM_LANES = `NUM_ALU_LANES;
|
localparam NUM_LANES = `NUM_ALU_LANES;
|
||||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||||
localparam PID_WIDTH = `UP(PID_BITS);
|
localparam PID_WIDTH = `UP(PID_BITS);
|
||||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + 1 + PID_WIDTH + 1 + 1;
|
||||||
localparam RSP_ARB_SIZE = 2 + `EXT_M_ENABLED;
|
localparam RSP_ARB_SIZE = 2 + `EXT_M_ENABLED;
|
||||||
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value
|
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value
|
||||||
);
|
);
|
||||||
`UNUSED_PARAM (CORE_ID)
|
`UNUSED_PARAM (CORE_ID)
|
||||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
|
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1 + 1;
|
||||||
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
|
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
|
||||||
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
|
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
|
||||||
|
|
||||||
@@ -210,6 +210,7 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||||||
assign writeback_if[i].data.tmask= commit_if[i].data.tmask;
|
assign writeback_if[i].data.tmask= commit_if[i].data.tmask;
|
||||||
assign writeback_if[i].data.rd = commit_if[i].data.rd;
|
assign writeback_if[i].data.rd = commit_if[i].data.rd;
|
||||||
assign writeback_if[i].data.data = commit_if[i].data.data;
|
assign writeback_if[i].data.data = commit_if[i].data.data;
|
||||||
|
assign writeback_if[i].data.tensor = commit_if[i].data.tensor;
|
||||||
assign writeback_if[i].data.sop = commit_if[i].data.sop;
|
assign writeback_if[i].data.sop = commit_if[i].data.sop;
|
||||||
assign writeback_if[i].data.eop = commit_if[i].data.eop;
|
assign writeback_if[i].data.eop = commit_if[i].data.eop;
|
||||||
assign commit_if[i].ready = 1'b1; // writeback has no backpressure
|
assign commit_if[i].ready = 1'b1; // writeback has no backpressure
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||||||
`UNUSED_PARAM (CORE_ID)
|
`UNUSED_PARAM (CORE_ID)
|
||||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||||
localparam PID_WIDTH = `UP(PID_BITS);
|
localparam PID_WIDTH = `UP(PID_BITS);
|
||||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * 32 + PID_WIDTH + 1 + 1;
|
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * 32 + 1 + PID_WIDTH + 1 + 1;
|
||||||
|
|
||||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||||
|
|
||||||
@@ -174,8 +174,8 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_in (csr_req_valid),
|
.valid_in (csr_req_valid),
|
||||||
.ready_in (csr_req_ready),
|
.ready_in (csr_req_ready),
|
||||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, csr_read_data, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}),
|
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, csr_read_data, 1'b0/*tensor*/, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}),
|
||||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, csr_commit_data, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}),
|
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, csr_commit_data, commit_if.data.tensor, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}),
|
||||||
.valid_out (commit_if.valid),
|
.valid_out (commit_if.valid),
|
||||||
.ready_out (commit_if.ready)
|
.ready_out (commit_if.ready)
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||||||
localparam BLOCK_SIZE_W = `LOG2UP(BLOCK_SIZE);
|
localparam BLOCK_SIZE_W = `LOG2UP(BLOCK_SIZE);
|
||||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||||
localparam PID_WIDTH = `UP(PID_BITS);
|
localparam PID_WIDTH = `UP(PID_BITS);
|
||||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + NUM_LANES * `XLEN + 1 + PID_WIDTH + 1 + 1;
|
||||||
localparam DATA_WIS_OFF = DATAW - (`UUID_WIDTH + `NW_WIDTH);
|
localparam DATA_WIS_OFF = DATAW - (`UUID_WIDTH + `NW_WIDTH);
|
||||||
|
|
||||||
wire [BLOCK_SIZE-1:0] commit_in_valid;
|
wire [BLOCK_SIZE-1:0] commit_in_valid;
|
||||||
@@ -119,6 +119,7 @@ module VX_gather_unit import VX_gpu_pkg::*; #(
|
|||||||
commit_tmp_if.data.wb,
|
commit_tmp_if.data.wb,
|
||||||
commit_tmp_if.data.rd,
|
commit_tmp_if.data.rd,
|
||||||
commit_data_r,
|
commit_data_r,
|
||||||
|
commit_tmp_if.data.tensor,
|
||||||
1'b0, // PID
|
1'b0, // PID
|
||||||
commit_tmp_if.data.sop,
|
commit_tmp_if.data.sop,
|
||||||
commit_tmp_if.data.eop
|
commit_tmp_if.data.eop
|
||||||
|
|||||||
@@ -136,14 +136,14 @@ module VX_int_unit #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `NR_BITS + 1 + PID_WIDTH + 1 + 1 + (NUM_LANES * `XLEN) + `XLEN + `XLEN + 1 + `INST_BR_BITS + LANE_WIDTH)
|
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `NR_BITS + 1 + PID_WIDTH + 1 + 1 + 1 + (NUM_LANES * `XLEN) + `XLEN + `XLEN + 1 + `INST_BR_BITS + LANE_WIDTH)
|
||||||
) rsp_buf (
|
) rsp_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_in (execute_if.valid),
|
.valid_in (execute_if.valid),
|
||||||
.ready_in (execute_if.ready),
|
.ready_in (execute_if.ready),
|
||||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, execute_if.data.imm, is_br_op, br_op, tid}),
|
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, 1'b0/*tensor*/, alu_result, execute_if.data.PC, execute_if.data.imm, is_br_op, br_op, tid}),
|
||||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, alu_result_r, PC_r, imm_r, is_br_op_r, br_op_r, tid_r}),
|
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, commit_if.data.tensor, alu_result_r, PC_r, imm_r, is_br_op_r, br_op_r, tid_r}),
|
||||||
.valid_out (commit_if.valid),
|
.valid_out (commit_if.valid),
|
||||||
.ready_out (commit_if.ready)
|
.ready_out (commit_if.ready)
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||||||
localparam NUM_LANES = `NUM_LSU_LANES;
|
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||||
localparam PID_WIDTH = `UP(PID_BITS);
|
localparam PID_WIDTH = `UP(PID_BITS);
|
||||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + 1 + PID_WIDTH + 1 + 1;
|
||||||
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_SIZE);
|
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_SIZE);
|
||||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||||
localparam MEM_ADDRW = `XLEN - MEM_ASHIFT;
|
localparam MEM_ADDRW = `XLEN - MEM_ASHIFT;
|
||||||
@@ -527,15 +527,15 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||||||
// load commit
|
// load commit
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + (NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1),
|
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + (NUM_LANES * `XLEN) + 1 + PID_WIDTH + 1 + 1),
|
||||||
.SIZE (2)
|
.SIZE (2)
|
||||||
) ld_rsp_buf (
|
) ld_rsp_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_in (mem_rsp_valid),
|
.valid_in (mem_rsp_valid),
|
||||||
.ready_in (mem_rsp_ready),
|
.ready_in (mem_rsp_ready),
|
||||||
.data_in ({rsp_uuid, rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_data, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}),
|
.data_in ({rsp_uuid, rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_data, 1'b0/*tensor*/, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}),
|
||||||
.data_out ({commit_ld_if.data.uuid, commit_ld_if.data.wid, commit_ld_if.data.tmask, commit_ld_if.data.PC, commit_ld_if.data.rd, commit_ld_if.data.data, commit_ld_if.data.pid, commit_ld_if.data.sop, commit_ld_if.data.eop}),
|
.data_out ({commit_ld_if.data.uuid, commit_ld_if.data.wid, commit_ld_if.data.tmask, commit_ld_if.data.PC, commit_ld_if.data.rd, commit_ld_if.data.data, commit_ld_if.data.tensor, commit_ld_if.data.pid, commit_ld_if.data.sop, commit_ld_if.data.eop}),
|
||||||
.valid_out (commit_ld_if.valid),
|
.valid_out (commit_ld_if.valid),
|
||||||
.ready_out (commit_ld_if.ready)
|
.ready_out (commit_ld_if.ready)
|
||||||
);
|
);
|
||||||
@@ -545,15 +545,15 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||||||
// store commit
|
// store commit
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + PID_WIDTH + 1 + 1),
|
.DATAW (`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + PID_WIDTH + 1 + 1),
|
||||||
.SIZE (2)
|
.SIZE (2)
|
||||||
) st_rsp_buf (
|
) st_rsp_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_in (mem_req_fire && mem_req_rw),
|
.valid_in (mem_req_fire && mem_req_rw),
|
||||||
.ready_in (st_rsp_ready),
|
.ready_in (st_rsp_ready),
|
||||||
.data_in ({execute_if[0].data.uuid, execute_if[0].data.wid, execute_if[0].data.tmask, execute_if[0].data.PC, execute_if[0].data.pid, execute_if[0].data.sop, execute_if[0].data.eop}),
|
.data_in ({execute_if[0].data.uuid, execute_if[0].data.wid, execute_if[0].data.tmask, execute_if[0].data.PC, 1'b0/*tensor*/, execute_if[0].data.pid, execute_if[0].data.sop, execute_if[0].data.eop}),
|
||||||
.data_out ({commit_st_if.data.uuid, commit_st_if.data.wid, commit_st_if.data.tmask, commit_st_if.data.PC, commit_st_if.data.pid, commit_st_if.data.sop, commit_st_if.data.eop}),
|
.data_out ({commit_st_if.data.uuid, commit_st_if.data.wid, commit_st_if.data.tmask, commit_st_if.data.PC, commit_st_if.data.tensor, commit_st_if.data.pid, commit_st_if.data.sop, commit_st_if.data.eop}),
|
||||||
.valid_out (commit_st_if.valid),
|
.valid_out (commit_st_if.valid),
|
||||||
.ready_out (commit_st_if.ready)
|
.ready_out (commit_st_if.ready)
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -323,16 +323,16 @@ module VX_muldiv_unit #(
|
|||||||
|
|
||||||
VX_stream_arb #(
|
VX_stream_arb #(
|
||||||
.NUM_INPUTS (2),
|
.NUM_INPUTS (2),
|
||||||
.DATAW (TAGW + (NUM_LANES * `XLEN)),
|
.DATAW (1/*tensor field only in commit*/ + TAGW + (NUM_LANES * `XLEN)),
|
||||||
.OUT_REG (1)
|
.OUT_REG (1)
|
||||||
) rsp_buf (
|
) rsp_buf (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_in ({div_valid_out, mul_valid_out}),
|
.valid_in ({div_valid_out, mul_valid_out}),
|
||||||
.ready_in ({div_ready_out, mul_ready_out}),
|
.ready_in ({div_ready_out, mul_ready_out}),
|
||||||
.data_in ({{div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_pid_out, div_sop_out, div_eop_out, div_result_out},
|
.data_in ({{div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, 1'b0/*tensor*/, div_pid_out, div_sop_out, div_eop_out, div_result_out},
|
||||||
{mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_pid_out, mul_sop_out, mul_eop_out, mul_result_out}}),
|
{mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, 1'b0/*tensor*/, mul_pid_out, mul_sop_out, mul_eop_out, mul_result_out}}),
|
||||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, commit_if.data.data}),
|
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.tensor, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, commit_if.data.data}),
|
||||||
.valid_out (commit_if.valid),
|
.valid_out (commit_if.valid),
|
||||||
.ready_out (commit_if.ready),
|
.ready_out (commit_if.ready),
|
||||||
`UNUSED_PIN (sel_out)
|
`UNUSED_PIN (sel_out)
|
||||||
|
|||||||
@@ -269,7 +269,7 @@ module VX_reduce_unit #(
|
|||||||
);
|
);
|
||||||
|
|
||||||
VX_elastic_buffer #(
|
VX_elastic_buffer #(
|
||||||
.DATAW(`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + (`XLEN * NUM_LANES) + PID_WIDTH + 1 + 1)
|
.DATAW(`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + (`XLEN * NUM_LANES) + 1 + PID_WIDTH + 1 + 1)
|
||||||
) output_buffer (
|
) output_buffer (
|
||||||
.clk(clk),
|
.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
@@ -277,7 +277,7 @@ module VX_reduce_unit #(
|
|||||||
.ready_in(commit_if_ready),
|
.ready_in(commit_if_ready),
|
||||||
.data_in({execute_if.data.uuid, execute_if.data.wid, stored_tmask, execute_if.data.PC, execute_if.data.wb, execute_if.data.rd, broadcasted_accumulator, stored_pid, stored_sop, stored_eop}),
|
.data_in({execute_if.data.uuid, execute_if.data.wid, stored_tmask, execute_if.data.PC, execute_if.data.wb, execute_if.data.rd, broadcasted_accumulator, stored_pid, stored_sop, stored_eop}),
|
||||||
|
|
||||||
.data_out({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.wb, commit_if.data.rd, commit_if.data.data, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}),
|
.data_out({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.wb, commit_if.data.rd, commit_if.data.data, commit_if.data.tensor, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}),
|
||||||
.ready_out(commit_if.ready),
|
.ready_out(commit_if.ready),
|
||||||
.valid_out(commit_if.valid)
|
.valid_out(commit_if.valid)
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -142,6 +142,9 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||||
reg [`UP(ISSUE_RATIO)-1:0][`NUM_REGS-1:0] inuse_regs;
|
reg [`UP(ISSUE_RATIO)-1:0][`NUM_REGS-1:0] inuse_regs;
|
||||||
|
// busy bit for the asynchronous Tensor unit. Since the ISA does not
|
||||||
|
// have an explicit destination register, use a separate status bit.
|
||||||
|
reg [`UP(ISSUE_RATIO)-1:0] inuse_tensor;
|
||||||
|
|
||||||
wire writeback_fire = writeback_if[i].valid && writeback_if[i].data.eop;
|
wire writeback_fire = writeback_if[i].valid && writeback_if[i].data.eop;
|
||||||
|
|
||||||
@@ -227,6 +230,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
inuse_regs <= '0;
|
inuse_regs <= '0;
|
||||||
|
inuse_tensor <= '0;
|
||||||
end else begin
|
end else begin
|
||||||
if (writeback_fire) begin
|
if (writeback_fire) begin
|
||||||
inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0;
|
inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0;
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||||
localparam PID_WIDTH = `UP(PID_BITS);
|
localparam PID_WIDTH = `UP(PID_BITS);
|
||||||
|
|
||||||
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `XLEN + PID_WIDTH + 1 + 1;
|
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `XLEN + 1 + PID_WIDTH + 1 + 1;
|
||||||
localparam RSP_ARB_SIZE = 1 + 1;
|
localparam RSP_ARB_SIZE = 1 + 1;
|
||||||
localparam RSP_ARB_IDX_WCTL = 0;
|
localparam RSP_ARB_IDX_WCTL = 0;
|
||||||
localparam RSP_ARB_IDX_CSRS = 1;
|
localparam RSP_ARB_IDX_CSRS = 1;
|
||||||
|
|||||||
@@ -283,10 +283,11 @@ module VX_tensor_core_block import VX_gpu_pkg::*; #(
|
|||||||
assign commit_if_ready_override = commit_if.ready && (counter == 2'b0);
|
assign commit_if_ready_override = commit_if.ready && (counter == 2'b0);
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1;
|
localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1 + 1;
|
||||||
wire [COMMIT_DATAW-1:0] commit_if_data = {
|
wire [COMMIT_DATAW-1:0] commit_if_data = {
|
||||||
execute_if_data_deq[wb_wid], /* uuid ~ rd */
|
execute_if_data_deq[wb_wid], /* uuid ~ rd */
|
||||||
subcommit == 1'b0 ? wb_data_0 : wb_data_1, /* data */
|
subcommit == 1'b0 ? wb_data_0 : wb_data_1, /* data */
|
||||||
|
1'b0, /* tensor */
|
||||||
1'b0, /* pid */
|
1'b0, /* pid */
|
||||||
1'b1, /* sop */
|
1'b1, /* sop */
|
||||||
1'b1 /* eop */
|
1'b1 /* eop */
|
||||||
|
|||||||
@@ -128,11 +128,12 @@ module VX_tensor_hopper_core_block import VX_gpu_pkg::*; #(
|
|||||||
|
|
||||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] wb_data = '0;
|
wire [`NUM_THREADS-1:0][`XLEN-1:0] wb_data = '0;
|
||||||
|
|
||||||
localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1;
|
localparam COMMIT_DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `XLEN + 1 + `NR_BITS + (`NUM_THREADS * `XLEN) + 1 + 1 + 1 + 1;
|
||||||
wire [COMMIT_DATAW-1:0] commit_if_data = {
|
wire [COMMIT_DATAW-1:0] commit_if_data = {
|
||||||
// write-back to the correct rd only when eop
|
// write-back to the correct rd only when eop
|
||||||
((state == 2'b11) ? execute_if_data_deq[0/*FIXME*/] : execute_if_data_new_rd), /* uuid ~ rd */
|
((state == 2'b11) ? execute_if_data_deq[0/*FIXME*/] : execute_if_data_new_rd), /* uuid ~ rd */
|
||||||
wb_data, /* data */
|
wb_data, /* data */
|
||||||
|
1'b0, /* tensor */
|
||||||
1'b0, /* pid */
|
1'b0, /* pid */
|
||||||
1'b1, /* sop */
|
1'b1, /* sop */
|
||||||
(state == 2'b11) /* eop */
|
(state == 2'b11) /* eop */
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||||
localparam PID_WIDTH = `UP(PID_BITS);
|
localparam PID_WIDTH = `UP(PID_BITS);
|
||||||
localparam WCTL_WIDTH = $bits(tmc_t) + $bits(wspawn_t) + $bits(split_t) + $bits(join_t) + $bits(barrier_t);
|
localparam WCTL_WIDTH = $bits(tmc_t) + $bits(wspawn_t) + $bits(split_t) + $bits(join_t) + $bits(barrier_t);
|
||||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + WCTL_WIDTH + PID_WIDTH + 1 + 1;
|
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + WCTL_WIDTH + 1 + PID_WIDTH + 1 + 1;
|
||||||
|
|
||||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||||
|
|
||||||
@@ -141,8 +141,8 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_in (execute_if.valid),
|
.valid_in (execute_if.valid),
|
||||||
.ready_in (execute_if.ready),
|
.ready_in (execute_if.ready),
|
||||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, {tmc, wspawn, split, sjoin, barrier}}),
|
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, 1'b0/*tensor*/, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, {tmc, wspawn, split, sjoin, barrier}}),
|
||||||
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, {tmc_r, wspawn_r, split_r, sjoin_r, barrier_r}}),
|
.data_out ({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.rd, commit_if.data.wb, commit_if.data.tensor, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop, {tmc_r, wspawn_r, split_r, sjoin_r, barrier_r}}),
|
||||||
.valid_out (commit_if.valid),
|
.valid_out (commit_if.valid),
|
||||||
.ready_out (commit_if.ready)
|
.ready_out (commit_if.ready)
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ interface VX_commit_if #(
|
|||||||
logic wb;
|
logic wb;
|
||||||
logic [`NR_BITS-1:0] rd;
|
logic [`NR_BITS-1:0] rd;
|
||||||
logic [NUM_LANES-1:0][`XLEN-1:0] data;
|
logic [NUM_LANES-1:0][`XLEN-1:0] data;
|
||||||
|
logic tensor;
|
||||||
logic [PID_WIDTH-1:0] pid;
|
logic [PID_WIDTH-1:0] pid;
|
||||||
logic sop;
|
logic sop;
|
||||||
logic eop;
|
logic eop;
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ interface VX_writeback_if import VX_gpu_pkg::*; ();
|
|||||||
logic [`XLEN-1:0] PC;
|
logic [`XLEN-1:0] PC;
|
||||||
logic [`NR_BITS-1:0] rd;
|
logic [`NR_BITS-1:0] rd;
|
||||||
logic [`NUM_THREADS-1:0][`XLEN-1:0] data;
|
logic [`NUM_THREADS-1:0][`XLEN-1:0] data;
|
||||||
|
logic tensor;
|
||||||
logic sop;
|
logic sop;
|
||||||
logic eop;
|
logic eop;
|
||||||
} data_t;
|
} data_t;
|
||||||
|
|||||||
Reference in New Issue
Block a user