pipeline refactoring
This commit is contained in:
@@ -1,90 +1,38 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_warp_sched (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire stall,
|
||||
module VX_warp_sched #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Wspawn
|
||||
input wire wspawn,
|
||||
input wire[31:0] wsapwn_pc,
|
||||
input wire[`NUM_WARPS-1:0] wspawn_new_active,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if,
|
||||
VX_branch_rsp_if branch_rsp_if,
|
||||
|
||||
// CTM
|
||||
input wire ctm,
|
||||
input wire[`NUM_THREADS-1:0] ctm_mask,
|
||||
input wire[`NW_BITS-1:0] ctm_warp_num,
|
||||
VX_ifetch_rsp_if ifetch_rsp_if,
|
||||
VX_ifetch_req_if ifetch_req_if,
|
||||
|
||||
// WHALT
|
||||
input wire whalt,
|
||||
input wire[`NW_BITS-1:0] whalt_warp_num,
|
||||
|
||||
input wire is_barrier,
|
||||
`DEBUG_BEGIN
|
||||
input wire[31:0] barrier_id,
|
||||
`DEBUG_END
|
||||
input wire[$clog2(`NUM_WARPS):0] num_warps,
|
||||
input wire[`NW_BITS-1:0] barrier_warp_num,
|
||||
|
||||
// WSTALL
|
||||
input wire wstall,
|
||||
input wire [`NW_BITS-1:0] wstall_warp_num,
|
||||
|
||||
// Split
|
||||
input wire is_split,
|
||||
input wire dont_split,
|
||||
input wire [`NUM_THREADS-1:0] split_new_mask,
|
||||
input wire [`NUM_THREADS-1:0] split_later_mask,
|
||||
input wire [31:0] split_save_pc,
|
||||
input wire [`NW_BITS-1:0] split_warp_num,
|
||||
|
||||
// Join
|
||||
input wire is_join,
|
||||
input wire [`NW_BITS-1:0] join_warp_num,
|
||||
|
||||
// JAL
|
||||
input wire jal,
|
||||
input wire [31:0] dest,
|
||||
input wire [`NW_BITS-1:0] jal_warp_num,
|
||||
|
||||
// Branch
|
||||
input wire branch_valid,
|
||||
input wire branch_dir,
|
||||
input wire [31:0] branch_dest,
|
||||
input wire [`NW_BITS-1:0] branch_warp_num,
|
||||
|
||||
output wire [`NUM_THREADS-1:0] thread_mask,
|
||||
output wire [`NW_BITS-1:0] warp_num,
|
||||
output wire [31:0] warp_pc,
|
||||
output wire busy,
|
||||
output wire scheduled_warp,
|
||||
|
||||
input wire [`NW_BITS-1:0] icache_stage_wid,
|
||||
input wire icache_stage_response
|
||||
output wire busy
|
||||
);
|
||||
wire update_use_wspawn;
|
||||
wire update_visible_active;
|
||||
wire scheduled_warp;
|
||||
|
||||
wire[(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0];
|
||||
wire [(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0];
|
||||
|
||||
wire join_fall;
|
||||
wire[31:0] join_pc;
|
||||
wire[`NUM_THREADS-1:0] join_tm;
|
||||
wire join_fall;
|
||||
wire [31:0] join_pc;
|
||||
wire [`NUM_THREADS-1:0] join_tm;
|
||||
|
||||
`DEBUG_BEGIN
|
||||
wire in_wspawn = wspawn;
|
||||
wire in_ctm = ctm;
|
||||
wire in_whalt = whalt;
|
||||
wire in_wstall = wstall;
|
||||
`DEBUG_END
|
||||
|
||||
reg[`NUM_WARPS-1:0] warp_active;
|
||||
reg[`NUM_WARPS-1:0] warp_stalled;
|
||||
reg [`NUM_WARPS-1:0] warp_active;
|
||||
reg [`NUM_WARPS-1:0] warp_stalled;
|
||||
|
||||
reg [`NUM_WARPS-1:0] visible_active;
|
||||
wire[`NUM_WARPS-1:0] use_active;
|
||||
wire [`NUM_WARPS-1:0] use_active;
|
||||
|
||||
reg [`NUM_WARPS-1:0] warp_lock;
|
||||
reg [`NUM_WARPS-1:0] warp_lock;
|
||||
|
||||
wire wstall_this_cycle;
|
||||
|
||||
@@ -92,17 +40,23 @@ module VX_warp_sched (
|
||||
reg [31:0] warp_pcs[`NUM_WARPS-1:0];
|
||||
|
||||
// barriers
|
||||
reg [`NUM_WARPS-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
|
||||
wire reached_barrier_limit;
|
||||
reg [`NUM_WARPS-1:0] barrier_stall_mask[`NUM_BARRIERS-1:0];
|
||||
wire [`NUM_WARPS-1:0] b_mask;
|
||||
wire [$clog2(`NUM_WARPS):0] b_count;
|
||||
wire [`NW_BITS:0] b_count;
|
||||
|
||||
// wsapwn
|
||||
reg [31:0] use_wsapwn_pc;
|
||||
reg [`NUM_WARPS-1:0] use_wsapwn;
|
||||
wire reached_barrier_limit;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_to_schedule;
|
||||
wire schedule;
|
||||
// wspawn
|
||||
reg [31:0] use_wspawn_pc;
|
||||
reg [`NUM_WARPS-1:0] use_wspawn;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_to_schedule;
|
||||
wire schedule;
|
||||
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] warp_pc;
|
||||
wire scheduled_warp;
|
||||
|
||||
wire hazard;
|
||||
wire global_stall;
|
||||
@@ -115,15 +69,18 @@ module VX_warp_sched (
|
||||
|
||||
reg didnt_split;
|
||||
|
||||
integer w, b;
|
||||
wire stall;
|
||||
|
||||
integer i;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (b = 0; b < `NUM_BARRIERS; b=b+1) begin
|
||||
barrier_stall_mask[b] <= 0;
|
||||
for (i = 0; i < `NUM_BARRIERS; i++) begin
|
||||
barrier_stall_mask[i] <= 0;
|
||||
end
|
||||
use_wsapwn_pc <= 0;
|
||||
use_wsapwn <= 0;
|
||||
|
||||
use_wspawn_pc <= 0;
|
||||
use_wspawn <= 0;
|
||||
warp_pcs[0] <= `STARTUP_ADDR;
|
||||
warp_active[0] <= 1; // Activating first warp
|
||||
visible_active[0] <= 1; // Activating first warp
|
||||
@@ -131,63 +88,62 @@ module VX_warp_sched (
|
||||
warp_stalled <= 0;
|
||||
didnt_split <= 0;
|
||||
warp_lock <= 0;
|
||||
// total_barrier_stall = 0;
|
||||
for (w = 1; w < `NUM_WARPS; w=w+1) begin
|
||||
warp_pcs[w] <= 0;
|
||||
warp_active[w] <= 0; // Activating first warp
|
||||
visible_active[w] <= 0; // Activating first warp
|
||||
thread_masks[w] <= 1; // Activating first thread in first warp
|
||||
|
||||
for (i = 1; i < `NUM_WARPS; i++) begin
|
||||
warp_pcs[i] <= 0;
|
||||
warp_active[i] <= 0; // Activating first warp
|
||||
visible_active[i] <= 0; // Activating first warp
|
||||
thread_masks[i] <= 1; // Activating first thread in first warp
|
||||
end
|
||||
|
||||
end else begin
|
||||
// Wsapwning warps
|
||||
if (wspawn) begin
|
||||
warp_active <= wspawn_new_active;
|
||||
use_wsapwn_pc <= wsapwn_pc;
|
||||
use_wsapwn <= wspawn_new_active & (~`NUM_WARPS'b1);
|
||||
|
||||
if (warp_ctl_if.wspawn) begin
|
||||
warp_active <= warp_ctl_if.wspawn_new_active;
|
||||
use_wspawn_pc <= warp_ctl_if.wspawn_pc;
|
||||
use_wspawn <= warp_ctl_if.wspawn_new_active & (~`NUM_WARPS'b1);
|
||||
end
|
||||
|
||||
if (is_barrier) begin
|
||||
warp_stalled[barrier_warp_num] <= 0;
|
||||
if (warp_ctl_if.is_barrier) begin
|
||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
||||
if (reached_barrier_limit) begin
|
||||
barrier_stall_mask[barrier_id] <= 0;
|
||||
barrier_stall_mask[warp_ctl_if.barrier_id] <= 0;
|
||||
end else begin
|
||||
barrier_stall_mask[barrier_id][barrier_warp_num] <= 1;
|
||||
barrier_stall_mask[warp_ctl_if.barrier_id][warp_ctl_if.warp_num] <= 1;
|
||||
end
|
||||
end else if (ctm) begin
|
||||
thread_masks[ctm_warp_num] <= ctm_mask;
|
||||
warp_stalled[ctm_warp_num] <= 0;
|
||||
end else if (is_join && !didnt_split) begin
|
||||
end else if (warp_ctl_if.change_mask) begin
|
||||
thread_masks[warp_ctl_if.warp_num] <= warp_ctl_if.thread_mask;
|
||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
||||
end else if (join_if.is_join && !didnt_split) begin
|
||||
if (!join_fall) begin
|
||||
warp_pcs[join_warp_num] <= join_pc;
|
||||
warp_pcs[join_if.warp_num] <= join_pc;
|
||||
end
|
||||
thread_masks[join_warp_num] <= join_tm;
|
||||
didnt_split <= 0;
|
||||
end else if (is_split) begin
|
||||
warp_stalled[split_warp_num] <= 0;
|
||||
if (!dont_split) begin
|
||||
thread_masks[split_warp_num] <= split_new_mask;
|
||||
thread_masks[join_if.warp_num] <= join_tm;
|
||||
didnt_split <= 0;
|
||||
end else if (warp_ctl_if.is_split) begin
|
||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
||||
if (warp_ctl_if.do_split) begin
|
||||
thread_masks[warp_ctl_if.warp_num] <= warp_ctl_if.split_new_mask;
|
||||
didnt_split <= 0;
|
||||
end else begin
|
||||
didnt_split <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
if (whalt) begin
|
||||
warp_active[whalt_warp_num] <= 0;
|
||||
visible_active[whalt_warp_num] <= 0;
|
||||
if (warp_ctl_if.whalt) begin
|
||||
warp_active[warp_ctl_if.warp_num] <= 0;
|
||||
visible_active[warp_ctl_if.warp_num] <= 0;
|
||||
end
|
||||
|
||||
if (update_use_wspawn) begin
|
||||
use_wsapwn[warp_to_schedule] <= 0;
|
||||
use_wspawn[warp_to_schedule] <= 0;
|
||||
thread_masks[warp_to_schedule] <= 1;
|
||||
end
|
||||
|
||||
|
||||
// Stalling the scheduling of warps
|
||||
if (wstall) begin
|
||||
warp_stalled[wstall_warp_num] <= 1;
|
||||
visible_active[wstall_warp_num] <= 0;
|
||||
if (wstall_if.wstall) begin
|
||||
warp_stalled[wstall_if.warp_num] <= 1;
|
||||
visible_active[wstall_if.warp_num] <= 0;
|
||||
end
|
||||
|
||||
// Refilling active warps
|
||||
@@ -201,26 +157,20 @@ module VX_warp_sched (
|
||||
warp_pcs[warp_to_schedule] <= new_pc;
|
||||
end
|
||||
|
||||
// Jal
|
||||
if (jal) begin
|
||||
warp_pcs[jal_warp_num] <= dest;
|
||||
warp_stalled[jal_warp_num] <= 0;
|
||||
end
|
||||
|
||||
// Branch
|
||||
if (branch_valid) begin
|
||||
if (branch_dir) begin
|
||||
warp_pcs[branch_warp_num] <= branch_dest;
|
||||
if (branch_rsp_if.valid) begin
|
||||
if (branch_rsp_if.taken) begin
|
||||
warp_pcs[branch_rsp_if.warp_num] <= branch_rsp_if.dest;
|
||||
end
|
||||
warp_stalled[branch_warp_num] <= 0;
|
||||
warp_stalled[branch_rsp_if.warp_num] <= 0;
|
||||
end
|
||||
|
||||
// Lock/Release
|
||||
if (scheduled_warp && !stall) begin
|
||||
warp_lock[warp_num] <= 1'b1;
|
||||
warp_lock[warp_num] <= 1;
|
||||
end
|
||||
if (icache_stage_response) begin
|
||||
warp_lock[icache_stage_wid] <= 1'b0;
|
||||
if ((| ifetch_rsp_if.valid) && ifetch_rsp_if.ready) begin
|
||||
warp_lock[ifetch_rsp_if.warp_num] <= 0;
|
||||
end
|
||||
|
||||
end
|
||||
@@ -233,7 +183,7 @@ module VX_warp_sched (
|
||||
.count (b_count)
|
||||
);
|
||||
|
||||
wire [$clog2(`NUM_WARPS):0] count_visible_active;
|
||||
wire [`NW_BITS:0] count_visible_active;
|
||||
|
||||
VX_countones #(
|
||||
.N(`NUM_WARPS)
|
||||
@@ -242,30 +192,29 @@ module VX_warp_sched (
|
||||
.count (count_visible_active)
|
||||
);
|
||||
|
||||
// assign b_count = $countones(b_mask);
|
||||
assign b_mask = barrier_stall_mask[warp_ctl_if.barrier_id][`NUM_WARPS-1:0];
|
||||
|
||||
assign reached_barrier_limit = (b_count == warp_ctl_if.num_warps);
|
||||
|
||||
assign b_mask = barrier_stall_mask[barrier_id][`NUM_WARPS-1:0];
|
||||
assign reached_barrier_limit = b_count == (num_warps);
|
||||
|
||||
assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug
|
||||
assign wstall_this_cycle = wstall_if.wstall && (wstall_if.warp_num == warp_to_schedule); // Maybe bug
|
||||
|
||||
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
|
||||
|
||||
assign update_visible_active = (0 == count_visible_active) && !(stall || wstall_this_cycle || hazard || is_join);
|
||||
assign update_visible_active = (0 == count_visible_active) && !(stall || wstall_this_cycle || hazard || join_if.is_join);
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[split_warp_num]};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc, split_later_mask};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.warp_num]};
|
||||
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, warp_ctl_if.split_save_pc, warp_ctl_if.split_later_mask};
|
||||
|
||||
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
|
||||
assign {join_fall, join_pc, join_tm} = d[join_if.warp_num];
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin : stacks
|
||||
wire correct_warp_s = (i == split_warp_num);
|
||||
wire correct_warp_j = (i == join_warp_num);
|
||||
genvar j;
|
||||
|
||||
wire push = (is_split && !dont_split) && correct_warp_s;
|
||||
wire pop = is_join && correct_warp_j;
|
||||
for (j = 0; j < `NUM_WARPS; j++) begin : stacks
|
||||
wire correct_warp_s = (j == warp_ctl_if.warp_num);
|
||||
wire correct_warp_j = (j == join_if.warp_num);
|
||||
|
||||
wire push = (warp_ctl_if.is_split && warp_ctl_if.do_split) && correct_warp_s;
|
||||
wire pop = join_if.is_join && correct_warp_j;
|
||||
|
||||
VX_generic_stack #(
|
||||
.WIDTH(1+32+`NUM_THREADS),
|
||||
@@ -279,27 +228,27 @@ module VX_warp_sched (
|
||||
.q1 (q1),
|
||||
.q2 (q2)
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
end
|
||||
|
||||
wire should_jal = (jal && (warp_to_schedule == jal_warp_num));
|
||||
wire should_bra = (branch_valid && branch_dir && (warp_to_schedule == branch_warp_num));
|
||||
wire should_bra = (branch_rsp_if.valid && branch_rsp_if.taken && (warp_to_schedule == branch_rsp_if.warp_num));
|
||||
|
||||
assign hazard = (should_jal || should_bra) && schedule;
|
||||
assign hazard = should_bra && schedule;
|
||||
|
||||
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule] && !warp_lock[0];
|
||||
|
||||
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join);
|
||||
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || join_if.is_join);
|
||||
|
||||
assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || is_join) && !reset;
|
||||
assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || join_if.is_join) && !reset;
|
||||
|
||||
wire real_use_wspawn = use_wsapwn[warp_to_schedule];
|
||||
wire real_use_wspawn = use_wspawn[warp_to_schedule];
|
||||
|
||||
assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule];
|
||||
assign warp_pc = real_use_wspawn ? use_wspawn_pc : warp_pcs[warp_to_schedule];
|
||||
|
||||
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]);
|
||||
|
||||
assign warp_num = warp_to_schedule;
|
||||
|
||||
assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall;
|
||||
assign update_use_wspawn = use_wspawn[warp_to_schedule] && !global_stall;
|
||||
|
||||
assign new_pc = warp_pc + 4;
|
||||
|
||||
@@ -315,13 +264,21 @@ module VX_warp_sched (
|
||||
.grant_index (warp_to_schedule),
|
||||
.grant_valid (schedule),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
|
||||
assign stall = ~ifetch_req_if.ready && (| ifetch_req_if.valid);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + 32 + `NW_BITS)
|
||||
) fetch_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({thread_mask, warp_pc, warp_num}),
|
||||
.out ({ifetch_req_if.valid, ifetch_req_if.curr_PC, ifetch_req_if.warp_num})
|
||||
);
|
||||
|
||||
// always @(*) begin
|
||||
// $display("WarpPC: %h",warp_pc);
|
||||
// $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]);
|
||||
// end
|
||||
|
||||
assign busy = (warp_active != 0);
|
||||
assign busy = (warp_active != 0);
|
||||
|
||||
endmodule
|
||||
Reference in New Issue
Block a user