scoreboard optimization - using writeback's end-of-packet status
This commit is contained in:
@@ -106,6 +106,8 @@ module VX_alu_unit #(
|
||||
.data_in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}),
|
||||
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r})
|
||||
);
|
||||
|
||||
assign alu_commit_if.eop = 1'b1;
|
||||
|
||||
wire is_less = cmp_result_r[32];
|
||||
wire is_equal = ~(| cmp_result_r[31:0]);
|
||||
|
||||
@@ -63,6 +63,7 @@ module VX_csr_io_arb (
|
||||
assign csr_commit_if.PC = csr_pipe_rsp_if.PC;
|
||||
assign csr_commit_if.rd = csr_pipe_rsp_if.rd;
|
||||
assign csr_commit_if.wb = csr_pipe_rsp_if.wb;
|
||||
assign csr_commit_if.eop = csr_pipe_rsp_if.eop;
|
||||
assign csr_commit_if.data = csr_pipe_rsp_if.data;
|
||||
|
||||
assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_ready : csr_commit_if.ready;
|
||||
|
||||
@@ -125,6 +125,8 @@ module VX_csr_unit #(
|
||||
csr_read_data_s1;
|
||||
end
|
||||
|
||||
assign csr_pipe_rsp_if.eop = 1'b1;
|
||||
|
||||
// can accept new request?
|
||||
assign csr_pipe_req_if.ready = ~(stall_out || stall_in);
|
||||
|
||||
|
||||
@@ -161,6 +161,8 @@ module VX_fpu_unit #(
|
||||
.data_out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
|
||||
);
|
||||
|
||||
assign fpu_commit_if.eop = 1'b1;
|
||||
|
||||
assign ready_out = ~stall_out;
|
||||
|
||||
// CSR fflags Update
|
||||
|
||||
@@ -86,6 +86,8 @@ module VX_gpu_unit #(
|
||||
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
);
|
||||
|
||||
assign gpu_commit_if.eop = 1'b1;
|
||||
|
||||
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
assign warp_ctl_if.wid = gpu_commit_if.wid;
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ module VX_ibuffer #(
|
||||
VX_decode_if ibuf_enq_if,
|
||||
|
||||
// outputs
|
||||
output wire [`NW_BITS-1:0] deq_wid_next,
|
||||
VX_decode_if ibuf_deq_if
|
||||
);
|
||||
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
|
||||
@@ -194,6 +195,8 @@ module VX_ibuffer #(
|
||||
end
|
||||
end
|
||||
|
||||
assign deq_wid_next = deq_wid_n;
|
||||
|
||||
assign ibuf_enq_if.ready = ~q_full[ibuf_enq_if.wid];
|
||||
assign q_data_in = {ibuf_enq_if.tmask,
|
||||
ibuf_enq_if.PC,
|
||||
|
||||
@@ -27,6 +27,7 @@ module VX_issue #(
|
||||
VX_gpr_req_if gpr_req_if();
|
||||
VX_gpr_rsp_if gpr_rsp_if();
|
||||
|
||||
wire [`NW_BITS-1:0] deq_wid_next;
|
||||
wire scoreboard_delay;
|
||||
|
||||
VX_ibuffer #(
|
||||
@@ -36,7 +37,8 @@ module VX_issue #(
|
||||
.reset (reset),
|
||||
.freeze (1'b0),
|
||||
.ibuf_enq_if (decode_if),
|
||||
.ibuf_deq_if (ibuf_deq_if)
|
||||
.ibuf_deq_if (ibuf_deq_if),
|
||||
.deq_wid_next (deq_wid_next)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
@@ -46,6 +48,7 @@ module VX_issue #(
|
||||
.reset (reset),
|
||||
.ibuf_deq_if (ibuf_deq_if),
|
||||
.writeback_if (writeback_if),
|
||||
.deq_wid_next (deq_wid_next),
|
||||
.delay (scoreboard_delay)
|
||||
);
|
||||
|
||||
@@ -120,6 +123,7 @@ module VX_issue #(
|
||||
`SCOPE_ASSIGN (writeback_pc, writeback_if.PC);
|
||||
`SCOPE_ASSIGN (writeback_rd, writeback_if.rd);
|
||||
`SCOPE_ASSIGN (writeback_data, writeback_if.data);
|
||||
`SCOPE_ASSIGN (writeback_eop, writeback_if.eof);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [63:0] perf_ibf_stalls;
|
||||
|
||||
@@ -201,6 +201,7 @@ module VX_lsu_unit #(
|
||||
assign st_commit_if.PC = req_pc;
|
||||
assign st_commit_if.rd = 0;
|
||||
assign st_commit_if.wb = 0;
|
||||
assign st_commit_if.eop = 1'b1;
|
||||
assign st_commit_if.data = 0;
|
||||
|
||||
// send load commit
|
||||
@@ -210,14 +211,14 @@ module VX_lsu_unit #(
|
||||
wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1),
|
||||
.RESETW (1)
|
||||
) rsp_pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!load_rsp_stall),
|
||||
.data_in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}),
|
||||
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data})
|
||||
.data_in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
|
||||
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
|
||||
);
|
||||
|
||||
// Can accept new cache response?
|
||||
|
||||
@@ -138,6 +138,8 @@ module VX_mul_unit #(
|
||||
.data_out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
|
||||
);
|
||||
|
||||
assign mul_commit_if.eop = 1'b1;
|
||||
|
||||
// can accept new request?
|
||||
assign mul_req_if.ready = is_div_op ? div_ready_in : mul_ready_in;
|
||||
|
||||
|
||||
@@ -8,44 +8,41 @@ module VX_scoreboard #(
|
||||
|
||||
VX_decode_if ibuf_deq_if,
|
||||
VX_writeback_if writeback_if,
|
||||
input wire [`NW_BITS-1:0] deq_wid_next,
|
||||
output wire delay
|
||||
);
|
||||
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_reg_mask;
|
||||
wire [`NUM_REGS-1:0] inuse_regs;
|
||||
wire [`NUM_THREADS-1:0] inuse_registers_n;
|
||||
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
|
||||
reg [`NUM_REGS-1:0] deq_inuse_regs;
|
||||
wire [`NUM_REGS-1:0] deq_real_inuse_regs;
|
||||
|
||||
assign inuse_regs = inuse_reg_mask[ibuf_deq_if.wid] & ibuf_deq_if.used_regs;
|
||||
assign deq_real_inuse_regs = deq_inuse_regs & ibuf_deq_if.used_regs;
|
||||
|
||||
assign delay = (| inuse_regs);
|
||||
assign delay = (| deq_real_inuse_regs);
|
||||
|
||||
wire reserve_reg = ibuf_deq_if.valid && ibuf_deq_if.ready && (ibuf_deq_if.wb != 0);
|
||||
|
||||
wire release_reg = writeback_if.valid && writeback_if.ready;
|
||||
wire release_reg = writeback_if.valid && writeback_if.ready && writeback_if.eop;
|
||||
|
||||
assign inuse_registers_n = inuse_registers[{writeback_if.wid, writeback_if.rd}] & ~writeback_if.tmask;
|
||||
always @(*) begin
|
||||
inuse_regs_n = inuse_regs;
|
||||
if (reserve_reg) begin
|
||||
inuse_regs_n[ibuf_deq_if.wid][ibuf_deq_if.rd] = 1;
|
||||
end
|
||||
if (release_reg) begin
|
||||
inuse_regs_n[writeback_if.wid][writeback_if.rd] = 0;
|
||||
assert(inuse_regs[writeback_if.wid][writeback_if.rd] != 0)
|
||||
else $error("*** %t: core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
|
||||
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd);
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer w = 0; w < `NUM_WARPS; w++) begin
|
||||
for (integer i = 0; i < `NUM_REGS; i++) begin
|
||||
inuse_registers[w * `NUM_REGS + i] <= 0;
|
||||
end
|
||||
inuse_reg_mask[w] <= `NUM_REGS'(0);
|
||||
end
|
||||
inuse_regs <= (`NUM_WARPS*`NUM_REGS)'(0);
|
||||
end else begin
|
||||
if (reserve_reg) begin
|
||||
inuse_registers[{ibuf_deq_if.wid, ibuf_deq_if.rd}] <= ibuf_deq_if.tmask;
|
||||
inuse_reg_mask[ibuf_deq_if.wid][ibuf_deq_if.rd] <= 1;
|
||||
end
|
||||
if (release_reg) begin
|
||||
assert(inuse_reg_mask[writeback_if.wid][writeback_if.rd] != 0)
|
||||
else $error("*** %t: core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
|
||||
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd);
|
||||
inuse_registers[{writeback_if.wid, writeback_if.rd}] <= inuse_registers_n;
|
||||
inuse_reg_mask[writeback_if.wid][writeback_if.rd] <= (| inuse_registers_n);
|
||||
end
|
||||
end
|
||||
inuse_regs <= inuse_regs_n;
|
||||
end
|
||||
deq_inuse_regs <= inuse_regs_n[deq_wid_next];
|
||||
end
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
@@ -53,7 +50,7 @@ module VX_scoreboard #(
|
||||
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
|
||||
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
|
||||
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
|
||||
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3]);
|
||||
deq_real_inuse_regs[ibuf_deq_if.rd], deq_real_inuse_regs[ibuf_deq_if.rs1], deq_real_inuse_regs[ibuf_deq_if.rs2], deq_real_inuse_regs[ibuf_deq_if.rs3]);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
@@ -66,7 +63,7 @@ module VX_scoreboard #(
|
||||
stall_ctr <= stall_ctr + 1;
|
||||
assert(stall_ctr < 100000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
|
||||
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
|
||||
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3]);
|
||||
deq_real_inuse_regs[ibuf_deq_if.rd], deq_real_inuse_regs[ibuf_deq_if.rs1], deq_real_inuse_regs[ibuf_deq_if.rs2], deq_real_inuse_regs[ibuf_deq_if.rs3]);
|
||||
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin
|
||||
stall_ctr <= 0;
|
||||
end
|
||||
|
||||
@@ -20,68 +20,69 @@ module VX_writeback #(
|
||||
wire ld_valid = ld_commit_if.valid && ld_commit_if.wb;
|
||||
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
|
||||
wire mul_valid = mul_commit_if.valid && mul_commit_if.wb;
|
||||
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
|
||||
/*wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;*/
|
||||
|
||||
wire wb_valid;
|
||||
wire [`NW_BITS-1:0] wb_wid;
|
||||
wire [31:0] wb_PC;
|
||||
wire [31:0] wb_PC;
|
||||
wire [`NUM_THREADS-1:0] wb_tmask;
|
||||
wire [`NR_BITS-1:0] wb_rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] wb_data;
|
||||
wire wb_eop;
|
||||
|
||||
assign wb_valid = alu_valid ? alu_commit_if.valid :
|
||||
ld_valid ? ld_commit_if.valid :
|
||||
csr_valid ? csr_commit_if.valid :
|
||||
mul_valid ? mul_commit_if.valid :
|
||||
fpu_valid ? fpu_commit_if.valid :
|
||||
0;
|
||||
/*fpu_valid ?*/ fpu_commit_if.valid;
|
||||
|
||||
assign wb_wid = alu_valid ? alu_commit_if.wid :
|
||||
ld_valid ? ld_commit_if.wid :
|
||||
csr_valid ? csr_commit_if.wid :
|
||||
mul_valid ? mul_commit_if.wid :
|
||||
fpu_valid ? fpu_commit_if.wid :
|
||||
0;
|
||||
/*fpu_valid ?*/ fpu_commit_if.wid;
|
||||
|
||||
assign wb_PC = alu_valid ? alu_commit_if.PC :
|
||||
ld_valid ? ld_commit_if.PC :
|
||||
csr_valid ? csr_commit_if.PC :
|
||||
mul_valid ? mul_commit_if.PC :
|
||||
fpu_valid ? fpu_commit_if.PC :
|
||||
0;
|
||||
/*fpu_valid ?*/ fpu_commit_if.PC;
|
||||
|
||||
assign wb_tmask = alu_valid ? alu_commit_if.tmask :
|
||||
ld_valid ? ld_commit_if.tmask :
|
||||
csr_valid ? csr_commit_if.tmask :
|
||||
mul_valid ? mul_commit_if.tmask :
|
||||
fpu_valid ? fpu_commit_if.tmask :
|
||||
0;
|
||||
/*fpu_valid ?*/ fpu_commit_if.tmask;
|
||||
|
||||
assign wb_rd = alu_valid ? alu_commit_if.rd :
|
||||
ld_valid ? ld_commit_if.rd :
|
||||
csr_valid ? csr_commit_if.rd :
|
||||
mul_valid ? mul_commit_if.rd :
|
||||
fpu_valid ? fpu_commit_if.rd :
|
||||
0;
|
||||
/*fpu_valid ?*/ fpu_commit_if.rd;
|
||||
|
||||
assign wb_data = alu_valid ? alu_commit_if.data :
|
||||
ld_valid ? ld_commit_if.data :
|
||||
csr_valid ? csr_commit_if.data :
|
||||
mul_valid ? mul_commit_if.data :
|
||||
fpu_valid ? fpu_commit_if.data :
|
||||
0;
|
||||
/*fpu_valid ?*/ fpu_commit_if.data;
|
||||
|
||||
assign wb_eop = alu_valid ? alu_commit_if.eop :
|
||||
ld_valid ? ld_commit_if.eop :
|
||||
csr_valid ? csr_commit_if.eop :
|
||||
mul_valid ? mul_commit_if.eop :
|
||||
/*fpu_valid ?*/ fpu_commit_if.eop;
|
||||
|
||||
wire stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)),
|
||||
.DATAW (1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall),
|
||||
.data_in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}),
|
||||
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data})
|
||||
.data_in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data, wb_eop}),
|
||||
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data, writeback_if.eop})
|
||||
);
|
||||
|
||||
assign alu_commit_if.ready = !stall;
|
||||
|
||||
@@ -44,12 +44,6 @@ module VX_fp_ncomp #(
|
||||
fp_type_t [LANES-1:0] tmp_a_type, tmp_b_type;
|
||||
wire [LANES-1:0] tmp_a_smaller, tmp_ab_equal;
|
||||
|
||||
wire [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
|
||||
wire [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
|
||||
wire [LANES-1:0][31:0] fsgnj_res; // result of sign injection
|
||||
wire [LANES-1:0][31:0] fcmp_res; // result of comparison
|
||||
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
|
||||
|
||||
// Setup
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
assign tmp_a_sign[i] = dataa[i][31];
|
||||
@@ -103,6 +97,7 @@ module VX_fp_ncomp #(
|
||||
);
|
||||
|
||||
// FCLASS
|
||||
reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type_s0[i].is_normal) begin
|
||||
@@ -126,7 +121,8 @@ module VX_fp_ncomp #(
|
||||
end
|
||||
end
|
||||
|
||||
// Min/Max
|
||||
// Min/Max
|
||||
reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type_s0[i].is_nan && b_type_s0[i].is_nan)
|
||||
@@ -145,7 +141,8 @@ module VX_fp_ncomp #(
|
||||
end
|
||||
end
|
||||
|
||||
// Sign injection
|
||||
// Sign injection
|
||||
reg [LANES-1:0][31:0] fsgnj_res; // result of sign injection
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (frm_s0)
|
||||
@@ -158,6 +155,8 @@ module VX_fp_ncomp #(
|
||||
end
|
||||
|
||||
// Comparison
|
||||
reg [LANES-1:0][31:0] fcmp_res; // result of comparison
|
||||
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (frm_s0)
|
||||
|
||||
@@ -12,6 +12,7 @@ interface VX_commit_if ();
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire eop;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -15,6 +15,7 @@ interface VX_writeback_if ();
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
|
||||
wire eop;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -185,6 +185,7 @@
|
||||
"writeback_tmask":"`NUM_THREADS",
|
||||
"writeback_rd":"`NR_BITS",
|
||||
"writeback_data":"`NUM_THREADS * 32",
|
||||
"writeback_eop": 1,
|
||||
"!scoreboard_delay": 1,
|
||||
"!execute_delay": 1
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user