fixed FPU handshake, optimized writeback's critical path
This commit is contained in:
@@ -8,64 +8,52 @@ module VX_scheduler #(
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
input wire ex_busy,
|
||||
input wire gpr_busy,
|
||||
input wire alu_busy,
|
||||
input wire lsu_busy,
|
||||
input wire csr_busy,
|
||||
input wire mul_busy,
|
||||
input wire fpu_busy,
|
||||
input wire gpu_busy,
|
||||
output wire [`ISTAG_BITS-1:0] issue_tag
|
||||
output wire [`ISTAG_BITS-1:0] issue_tag,
|
||||
output wire schedule_delay
|
||||
);
|
||||
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
||||
reg [`NUM_THREADS-1:0] inuse_registers [`NUM_WARPS-1:0][`NUM_REGS-1:0];
|
||||
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[decode_if.warp_num] & decode_if.reg_use_mask;
|
||||
wire inuse_hazard = (inuse_mask != 0);
|
||||
|
||||
wire exu_stalled = (alu_busy && (decode_if.ex_type == `EX_ALU))
|
||||
|| (lsu_busy && (decode_if.ex_type == `EX_LSU))
|
||||
|| (csr_busy && (decode_if.ex_type == `EX_CSR))
|
||||
|| (mul_busy && (decode_if.ex_type == `EX_MUL))
|
||||
|| (fpu_busy && (decode_if.ex_type == `EX_FPU))
|
||||
|| (gpu_busy && (decode_if.ex_type == `EX_GPU));
|
||||
|
||||
wire issue_buf_full;
|
||||
|
||||
wire stall = (gpr_busy || exu_stalled || inuse_hazard || issue_buf_full) && decode_if.valid;
|
||||
wire stall = gpr_busy || ex_busy || inuse_hazard || issue_buf_full;
|
||||
|
||||
wire acquire_rd = decode_if.valid && (decode_if.wb != 0) && ~stall;
|
||||
wire issue_fire = decode_if.valid && ~stall;
|
||||
|
||||
wire acquire_rd = issue_fire && (decode_if.wb != 0);
|
||||
|
||||
wire release_rd = writeback_if.valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[writeback_if.warp_num][writeback_if.rd] & ~writeback_if.thread_mask;
|
||||
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[{writeback_if.warp_num, writeback_if.rd}] & ~writeback_if.thread_mask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
integer i, w;
|
||||
for (w = 0; w < `NUM_WARPS; w++) begin
|
||||
for (i = 0; i < `NUM_REGS; i++) begin
|
||||
inuse_registers[w][i] <= 0;
|
||||
for (integer w = 0; w < `NUM_WARPS; w++) begin
|
||||
for (integer i = 0; i < `NUM_REGS; i++) begin
|
||||
inuse_registers[w * `NUM_REGS + i] <= 0;
|
||||
end
|
||||
inuse_reg_mask[w] <= `NUM_REGS'(0);
|
||||
end
|
||||
end else begin
|
||||
if (acquire_rd) begin
|
||||
inuse_registers[decode_if.warp_num][decode_if.rd] <= decode_if.thread_mask;
|
||||
inuse_registers[{decode_if.warp_num, decode_if.rd}] <= decode_if.thread_mask;
|
||||
inuse_reg_mask[decode_if.warp_num][decode_if.rd] <= 1;
|
||||
end
|
||||
if (release_rd) begin
|
||||
assert(inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] != 0);
|
||||
inuse_registers[writeback_if.warp_num][writeback_if.rd] <= inuse_registers_n;
|
||||
inuse_registers[{writeback_if.warp_num, writeback_if.rd}] <= inuse_registers_n;
|
||||
inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] <= (| inuse_registers_n);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire issue_fire = decode_if.valid && ~stall;
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW ($bits(issue_data_t)),
|
||||
.SIZE (`ISSUEQ_SIZE),
|
||||
@@ -82,14 +70,14 @@ module VX_scheduler #(
|
||||
.full (issue_buf_full)
|
||||
);
|
||||
|
||||
assign decode_if.ready = ~stall;
|
||||
assign schedule_delay = stall;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (stall) begin
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, gpr=%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b",
|
||||
$time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full, inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1],
|
||||
inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], gpr_busy, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy);
|
||||
if (decode_if.valid && stall) begin
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, ex_busy=%b, gpr_busy=%b",
|
||||
$time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full,
|
||||
inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], ex_busy, gpr_busy);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
Reference in New Issue
Block a user