scoreboard optimization - using writeback's end-of-packet status

This commit is contained in:
Blaise Tine
2020-12-30 06:47:56 -08:00
parent e431162347
commit 9f128085d5
15 changed files with 76 additions and 57 deletions

View File

@@ -106,6 +106,8 @@ module VX_alu_unit #(
.data_in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, br_dest, cmp_result}),
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, branch_ctl_if.dest, cmp_result_r})
);
assign alu_commit_if.eop = 1'b1;
wire is_less = cmp_result_r[32];
wire is_equal = ~(| cmp_result_r[31:0]);

View File

@@ -63,6 +63,7 @@ module VX_csr_io_arb (
assign csr_commit_if.PC = csr_pipe_rsp_if.PC;
assign csr_commit_if.rd = csr_pipe_rsp_if.rd;
assign csr_commit_if.wb = csr_pipe_rsp_if.wb;
assign csr_commit_if.eop = csr_pipe_rsp_if.eop;
assign csr_commit_if.data = csr_pipe_rsp_if.data;
assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_ready : csr_commit_if.ready;

View File

@@ -125,6 +125,8 @@ module VX_csr_unit #(
csr_read_data_s1;
end
assign csr_pipe_rsp_if.eop = 1'b1;
// can accept new request?
assign csr_pipe_req_if.ready = ~(stall_out || stall_in);

View File

@@ -161,6 +161,8 @@ module VX_fpu_unit #(
.data_out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
);
assign fpu_commit_if.eop = 1'b1;
assign ready_out = ~stall_out;
// CSR fflags Update

View File

@@ -86,6 +86,8 @@ module VX_gpu_unit #(
.data_out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.tmask, gpu_commit_if.PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
);
assign gpu_commit_if.eop = 1'b1;
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready;
assign warp_ctl_if.wid = gpu_commit_if.wid;

View File

@@ -11,6 +11,7 @@ module VX_ibuffer #(
VX_decode_if ibuf_enq_if,
// outputs
output wire [`NW_BITS-1:0] deq_wid_next,
VX_decode_if ibuf_deq_if
);
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
@@ -194,6 +195,8 @@ module VX_ibuffer #(
end
end
assign deq_wid_next = deq_wid_n;
assign ibuf_enq_if.ready = ~q_full[ibuf_enq_if.wid];
assign q_data_in = {ibuf_enq_if.tmask,
ibuf_enq_if.PC,

View File

@@ -27,6 +27,7 @@ module VX_issue #(
VX_gpr_req_if gpr_req_if();
VX_gpr_rsp_if gpr_rsp_if();
wire [`NW_BITS-1:0] deq_wid_next;
wire scoreboard_delay;
VX_ibuffer #(
@@ -36,7 +37,8 @@ module VX_issue #(
.reset (reset),
.freeze (1'b0),
.ibuf_enq_if (decode_if),
.ibuf_deq_if (ibuf_deq_if)
.ibuf_deq_if (ibuf_deq_if),
.deq_wid_next (deq_wid_next)
);
VX_scoreboard #(
@@ -46,6 +48,7 @@ module VX_issue #(
.reset (reset),
.ibuf_deq_if (ibuf_deq_if),
.writeback_if (writeback_if),
.deq_wid_next (deq_wid_next),
.delay (scoreboard_delay)
);
@@ -120,6 +123,7 @@ module VX_issue #(
`SCOPE_ASSIGN (writeback_pc, writeback_if.PC);
`SCOPE_ASSIGN (writeback_rd, writeback_if.rd);
`SCOPE_ASSIGN (writeback_data, writeback_if.data);
`SCOPE_ASSIGN (writeback_eop, writeback_if.eof);
`ifdef PERF_ENABLE
reg [63:0] perf_ibf_stalls;

View File

@@ -201,6 +201,7 @@ module VX_lsu_unit #(
assign st_commit_if.PC = req_pc;
assign st_commit_if.rd = 0;
assign st_commit_if.wb = 0;
assign st_commit_if.eop = 1'b1;
assign st_commit_if.data = 0;
// send load commit
@@ -210,14 +211,14 @@ module VX_lsu_unit #(
wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1),
.RESETW (1)
) rsp_pipe_reg (
.clk (clk),
.reset (reset),
.enable (!load_rsp_stall),
.data_in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data}),
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data})
.data_in ({is_load_rsp, rsp_wid, dcache_rsp_if.valid, rsp_pc, rsp_rd, rsp_wb, rsp_data, mbuf_pop}),
.data_out ({ld_commit_if.valid, ld_commit_if.wid, ld_commit_if.tmask, ld_commit_if.PC, ld_commit_if.rd, ld_commit_if.wb, ld_commit_if.data, ld_commit_if.eop})
);
// Can accept new cache response?

View File

@@ -138,6 +138,8 @@ module VX_mul_unit #(
.data_out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
);
assign mul_commit_if.eop = 1'b1;
// can accept new request?
assign mul_req_if.ready = is_div_op ? div_ready_in : mul_ready_in;

View File

@@ -8,44 +8,41 @@ module VX_scoreboard #(
VX_decode_if ibuf_deq_if,
VX_writeback_if writeback_if,
input wire [`NW_BITS-1:0] deq_wid_next,
output wire delay
);
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_reg_mask;
wire [`NUM_REGS-1:0] inuse_regs;
wire [`NUM_THREADS-1:0] inuse_registers_n;
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
reg [`NUM_REGS-1:0] deq_inuse_regs;
wire [`NUM_REGS-1:0] deq_real_inuse_regs;
assign inuse_regs = inuse_reg_mask[ibuf_deq_if.wid] & ibuf_deq_if.used_regs;
assign deq_real_inuse_regs = deq_inuse_regs & ibuf_deq_if.used_regs;
assign delay = (| inuse_regs);
assign delay = (| deq_real_inuse_regs);
wire reserve_reg = ibuf_deq_if.valid && ibuf_deq_if.ready && (ibuf_deq_if.wb != 0);
wire release_reg = writeback_if.valid && writeback_if.ready;
wire release_reg = writeback_if.valid && writeback_if.ready && writeback_if.eop;
assign inuse_registers_n = inuse_registers[{writeback_if.wid, writeback_if.rd}] & ~writeback_if.tmask;
always @(*) begin
inuse_regs_n = inuse_regs;
if (reserve_reg) begin
inuse_regs_n[ibuf_deq_if.wid][ibuf_deq_if.rd] = 1;
end
if (release_reg) begin
inuse_regs_n[writeback_if.wid][writeback_if.rd] = 0;
assert(inuse_regs[writeback_if.wid][writeback_if.rd] != 0)
else $error("*** %t: core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd);
end
end
always @(posedge clk) begin
if (reset) begin
for (integer w = 0; w < `NUM_WARPS; w++) begin
for (integer i = 0; i < `NUM_REGS; i++) begin
inuse_registers[w * `NUM_REGS + i] <= 0;
end
inuse_reg_mask[w] <= `NUM_REGS'(0);
end
inuse_regs <= (`NUM_WARPS*`NUM_REGS)'(0);
end else begin
if (reserve_reg) begin
inuse_registers[{ibuf_deq_if.wid, ibuf_deq_if.rd}] <= ibuf_deq_if.tmask;
inuse_reg_mask[ibuf_deq_if.wid][ibuf_deq_if.rd] <= 1;
end
if (release_reg) begin
assert(inuse_reg_mask[writeback_if.wid][writeback_if.rd] != 0)
else $error("*** %t: core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd);
inuse_registers[{writeback_if.wid, writeback_if.rd}] <= inuse_registers_n;
inuse_reg_mask[writeback_if.wid][writeback_if.rd] <= (| inuse_registers_n);
end
end
inuse_regs <= inuse_regs_n;
end
deq_inuse_regs <= inuse_regs_n[deq_wid_next];
end
`ifdef DBG_PRINT_PIPELINE
@@ -53,7 +50,7 @@ module VX_scoreboard #(
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3]);
deq_real_inuse_regs[ibuf_deq_if.rd], deq_real_inuse_regs[ibuf_deq_if.rs1], deq_real_inuse_regs[ibuf_deq_if.rs2], deq_real_inuse_regs[ibuf_deq_if.rs3]);
end
end
`endif
@@ -66,7 +63,7 @@ module VX_scoreboard #(
stall_ctr <= stall_ctr + 1;
assert(stall_ctr < 100000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3]);
deq_real_inuse_regs[ibuf_deq_if.rd], deq_real_inuse_regs[ibuf_deq_if.rs1], deq_real_inuse_regs[ibuf_deq_if.rs2], deq_real_inuse_regs[ibuf_deq_if.rs3]);
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin
stall_ctr <= 0;
end

View File

@@ -20,68 +20,69 @@ module VX_writeback #(
wire ld_valid = ld_commit_if.valid && ld_commit_if.wb;
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
wire mul_valid = mul_commit_if.valid && mul_commit_if.wb;
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
/*wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;*/
wire wb_valid;
wire [`NW_BITS-1:0] wb_wid;
wire [31:0] wb_PC;
wire [31:0] wb_PC;
wire [`NUM_THREADS-1:0] wb_tmask;
wire [`NR_BITS-1:0] wb_rd;
wire [`NUM_THREADS-1:0][31:0] wb_data;
wire wb_eop;
assign wb_valid = alu_valid ? alu_commit_if.valid :
ld_valid ? ld_commit_if.valid :
csr_valid ? csr_commit_if.valid :
mul_valid ? mul_commit_if.valid :
fpu_valid ? fpu_commit_if.valid :
0;
/*fpu_valid ?*/ fpu_commit_if.valid;
assign wb_wid = alu_valid ? alu_commit_if.wid :
ld_valid ? ld_commit_if.wid :
csr_valid ? csr_commit_if.wid :
mul_valid ? mul_commit_if.wid :
fpu_valid ? fpu_commit_if.wid :
0;
/*fpu_valid ?*/ fpu_commit_if.wid;
assign wb_PC = alu_valid ? alu_commit_if.PC :
ld_valid ? ld_commit_if.PC :
csr_valid ? csr_commit_if.PC :
mul_valid ? mul_commit_if.PC :
fpu_valid ? fpu_commit_if.PC :
0;
/*fpu_valid ?*/ fpu_commit_if.PC;
assign wb_tmask = alu_valid ? alu_commit_if.tmask :
ld_valid ? ld_commit_if.tmask :
csr_valid ? csr_commit_if.tmask :
mul_valid ? mul_commit_if.tmask :
fpu_valid ? fpu_commit_if.tmask :
0;
/*fpu_valid ?*/ fpu_commit_if.tmask;
assign wb_rd = alu_valid ? alu_commit_if.rd :
ld_valid ? ld_commit_if.rd :
csr_valid ? csr_commit_if.rd :
mul_valid ? mul_commit_if.rd :
fpu_valid ? fpu_commit_if.rd :
0;
/*fpu_valid ?*/ fpu_commit_if.rd;
assign wb_data = alu_valid ? alu_commit_if.data :
ld_valid ? ld_commit_if.data :
csr_valid ? csr_commit_if.data :
mul_valid ? mul_commit_if.data :
fpu_valid ? fpu_commit_if.data :
0;
/*fpu_valid ?*/ fpu_commit_if.data;
assign wb_eop = alu_valid ? alu_commit_if.eop :
ld_valid ? ld_commit_if.eop :
csr_valid ? csr_commit_if.eop :
mul_valid ? mul_commit_if.eop :
/*fpu_valid ?*/ fpu_commit_if.eop;
wire stall = ~writeback_if.ready && writeback_if.valid;
VX_pipe_register #(
.DATAW (1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32)),
.DATAW (1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1),
.RESETW (1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (!stall),
.data_in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}),
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data})
.data_in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data, wb_eop}),
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data, writeback_if.eop})
);
assign alu_commit_if.ready = !stall;

View File

@@ -44,12 +44,6 @@ module VX_fp_ncomp #(
fp_type_t [LANES-1:0] tmp_a_type, tmp_b_type;
wire [LANES-1:0] tmp_a_smaller, tmp_ab_equal;
wire [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
wire [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
wire [LANES-1:0][31:0] fsgnj_res; // result of sign injection
wire [LANES-1:0][31:0] fcmp_res; // result of comparison
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
// Setup
for (genvar i = 0; i < LANES; i++) begin
assign tmp_a_sign[i] = dataa[i][31];
@@ -103,6 +97,7 @@ module VX_fp_ncomp #(
);
// FCLASS
reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
if (a_type_s0[i].is_normal) begin
@@ -126,7 +121,8 @@ module VX_fp_ncomp #(
end
end
// Min/Max
// Min/Max
reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
if (a_type_s0[i].is_nan && b_type_s0[i].is_nan)
@@ -145,7 +141,8 @@ module VX_fp_ncomp #(
end
end
// Sign injection
// Sign injection
reg [LANES-1:0][31:0] fsgnj_res; // result of sign injection
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (frm_s0)
@@ -158,6 +155,8 @@ module VX_fp_ncomp #(
end
// Comparison
reg [LANES-1:0][31:0] fcmp_res; // result of comparison
fflags_t [LANES-1:0] fcmp_fflags; // comparison fflags
for (genvar i = 0; i < LANES; i++) begin
always @(*) begin
case (frm_s0)

View File

@@ -12,6 +12,7 @@ interface VX_commit_if ();
wire [`NUM_THREADS-1:0][31:0] data;
wire [`NR_BITS-1:0] rd;
wire wb;
wire eop;
wire ready;
endinterface

View File

@@ -15,6 +15,7 @@ interface VX_writeback_if ();
wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] data;
wire eop;
wire ready;
endinterface

View File

@@ -185,6 +185,7 @@
"writeback_tmask":"`NUM_THREADS",
"writeback_rd":"`NR_BITS",
"writeback_data":"`NUM_THREADS * 32",
"writeback_eop": 1,
"!scoreboard_delay": 1,
"!execute_delay": 1
},