warp scheduler optimization

This commit is contained in:
Blaise Tine
2021-08-07 14:44:37 -07:00
parent b5af2065ee
commit 5b8e58e15e
2 changed files with 25 additions and 46 deletions

View File

@@ -114,11 +114,11 @@ module VX_ibuffer #(
) rr_arbiter ( ) rr_arbiter (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.enable (ibuffer_if.ready),
.requests (valid_table_n), .requests (valid_table_n),
.grant_index (deq_wid_rr_n), .grant_index (deq_wid_rr_n),
`UNUSED_PIN (grant_onehot), `UNUSED_PIN (grant_valid),
`UNUSED_PIN (grant_valid) `UNUSED_PIN (grant_onehot),
`UNUSED_PIN (enable)
); );
// schedule the next instruction to issue // schedule the next instruction to issue

View File

@@ -28,12 +28,10 @@ module VX_warp_sched #(
wire [`NUM_THREADS-1:0] join_tm; wire [`NUM_THREADS-1:0] join_tm;
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // real active warps (updated when a warp is activated or disabled) reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // real active warps (updated when a warp is activated or disabled)
reg [`NUM_WARPS-1:0] schedule_table, schedule_table_n; // enforces round-robin, barrier, and non-speculating branches
reg [`NUM_WARPS-1:0] stalled_warps; // asserted when a branch/gpgpu instructions are issued reg [`NUM_WARPS-1:0] stalled_warps; // asserted when a branch/gpgpu instructions are issued
// Lock warp until instruction decode to resolve branches // Lock warp until instruction decode to resolve branches
reg [`NUM_WARPS-1:0] fetch_lock; reg [`NUM_WARPS-1:0] fetch_lock;
reg [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks; reg [`NUM_WARPS-1:0][`NUM_THREADS-1:0] thread_masks;
reg [`NUM_WARPS-1:0][31:0] warp_pcs, warp_next_pcs; reg [`NUM_WARPS-1:0][31:0] warp_pcs, warp_next_pcs;
@@ -44,12 +42,13 @@ module VX_warp_sched #(
// wspawn // wspawn
reg [31:0] use_wspawn_pc; reg [31:0] use_wspawn_pc;
reg [`NUM_WARPS-1:0] use_wspawn; reg [`NUM_WARPS-1:0] use_wspawn;
reg [`NW_BITS-1:0] schedule_warp;
wire [`NW_BITS-1:0] schedule_warp;
wire warp_scheduled; wire warp_scheduled;
wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready; wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready;
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready; wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
wire tmc_active = (warp_ctl_if.tmc.tmask != 0); wire tmc_active = (warp_ctl_if.tmc.tmask != 0);
@@ -63,16 +62,6 @@ module VX_warp_sched #(
end end
end end
always @(*) begin
schedule_table_n = schedule_table;
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
schedule_table_n[warp_ctl_if.wid] = tmc_active;
end
if (warp_scheduled) begin // remove scheduled warp (round-robin)
schedule_table_n[schedule_warp] = 0;
end
end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
for (integer i = 0; i < `NUM_BARRIERS; i++) begin for (integer i = 0; i < `NUM_BARRIERS; i++) begin
@@ -83,16 +72,14 @@ module VX_warp_sched #(
use_wspawn <= 0; use_wspawn <= 0;
warp_pcs[0] <= `STARTUP_ADDR; warp_pcs[0] <= `STARTUP_ADDR;
active_warps[0] <= 1; // Activating first warp active_warps[0] <= 1; // Activating first warp
schedule_table[0] <= 1; // set first warp as ready
thread_masks[0] <= 1; // Activating first thread in first warp thread_masks[0] <= 1; // Activating first thread in first warp
stalled_warps <= 0; stalled_warps <= 0;
fetch_lock <= 0; fetch_lock <= 0;
for (integer i = 1; i < `NUM_WARPS; i++) begin for (integer i = 1; i < `NUM_WARPS; i++) begin
warp_pcs[i] <= 0; warp_pcs[i] <= 0;
active_warps[i] <= 0; active_warps[i] <= 0;
schedule_table[i] <= 0; thread_masks[i] <= 0;
thread_masks[i] <= 0;
end end
end else begin end else begin
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
@@ -158,9 +145,6 @@ module VX_warp_sched #(
end end
active_warps <= active_warps_n; active_warps <= active_warps_n;
// reset 'schedule_table' when it goes to zero
schedule_table <= (| schedule_table_n) ? schedule_table_n : active_warps_n;
end end
end end
@@ -219,29 +203,24 @@ module VX_warp_sched #(
assign {join_else, join_pc, join_tm} = ipdom [join_if.wid]; assign {join_else, join_pc, join_tm} = ipdom [join_if.wid];
// calculate next warp schedule // round-robin warp scheduling
reg [`NUM_THREADS-1:0] thread_mask; wire schedule_valid;
reg schedule_valid;
reg [31:0] warp_pc;
wire [`NUM_WARPS-1:0] schedule_ready = schedule_table & ~(stalled_warps | total_barrier_stall | fetch_lock);
always @(*) begin VX_rr_arbiter #(
schedule_valid = 0; .NUM_REQS (`NUM_WARPS)
thread_mask = 'x; ) rr_arbiter (
warp_pc = 'x; .clk (clk),
schedule_warp = 'x; .reset (reset),
for (integer i = 0; i < `NUM_WARPS; ++i) begin .requests (active_warps & ~(stalled_warps | total_barrier_stall | fetch_lock)),
if (schedule_ready[i]) begin .grant_index (schedule_warp),
schedule_valid = 1; .grant_valid (schedule_valid),
thread_mask = use_wspawn[i] ? `NUM_THREADS'(1) : thread_masks[i]; `UNUSED_PIN (grant_onehot),
warp_pc = use_wspawn[i] ? use_wspawn_pc : warp_pcs[i]; `UNUSED_PIN (enable)
schedule_warp = `NW_BITS'(i); );
break;
end wire [`NUM_THREADS-1:0] thread_mask = use_wspawn[schedule_warp] ? `NUM_THREADS'(1) : thread_masks[schedule_warp];
end wire [31:0] warp_pc = use_wspawn[schedule_warp] ? use_wspawn_pc : warp_pcs[schedule_warp];
end
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid; wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;