software updaet for new thread mask design

This commit is contained in:
Blaise Tine
2021-08-05 22:39:39 -04:00
parent e4d9fd8a00
commit d7948a1ce6
24 changed files with 8470 additions and 8430 deletions

View File

@@ -48,20 +48,20 @@ module VX_gpu_unit #(
// split
wire [`NUM_THREADS-1:0] split_then_mask;
wire [`NUM_THREADS-1:0] split_else_mask;
wire [`NUM_THREADS-1:0] split_then_tmask;
wire [`NUM_THREADS-1:0] split_else_tmask;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire taken = gpu_req_if.rs1_data[i][gpu_req_if.tid];
assign split_then_mask[i] = gpu_req_if.tmask[i] & taken;
assign split_else_mask[i] = gpu_req_if.tmask[i] & ~taken;
wire taken = gpu_req_if.rs1_data[i][0];
assign split_then_tmask[i] = gpu_req_if.tmask[i] & taken;
assign split_else_tmask[i] = gpu_req_if.tmask[i] & ~taken;
end
assign split.valid = is_split;
assign split.diverged = (| split_then_mask) && (| split_else_mask);
assign split.then_mask = split_then_mask;
assign split.else_mask = split_else_mask;
assign split.pc = gpu_req_if.next_PC;
assign split.valid = is_split;
assign split.diverged = (| split_then_tmask) && (| split_else_tmask);
assign split.then_tmask = split_then_tmask;
assign split.else_tmask = split_else_tmask;
assign split.pc = gpu_req_if.next_PC;
// barrier

View File

@@ -41,8 +41,8 @@ typedef struct packed {
typedef struct packed {
logic valid;
logic diverged;
logic [`NUM_THREADS-1:0] then_mask;
logic [`NUM_THREADS-1:0] else_mask;
logic [`NUM_THREADS-1:0] then_tmask;
logic [`NUM_THREADS-1:0] else_tmask;
logic [31:0] pc;
} gpu_split_t;

View File

@@ -47,7 +47,9 @@ module VX_warp_sched #(
reg [`NW_BITS-1:0] scheduled_warp;
wire warp_scheduled;
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
wire tmc_active = (warp_ctl_if.tmc.tmask != 0);
always @(*) begin
active_warps_n = active_warps;
@@ -55,14 +57,14 @@ module VX_warp_sched #(
active_warps_n = warp_ctl_if.wspawn.wmask;
end
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
active_warps_n[warp_ctl_if.wid] = (warp_ctl_if.tmc.tmask != 0);
active_warps_n[warp_ctl_if.wid] = tmc_active;
end
end
always @(*) begin
schedule_table_n = schedule_table;
if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
schedule_table_n[warp_ctl_if.wid] = (warp_ctl_if.tmc.tmask != 0);
schedule_table_n[warp_ctl_if.wid] = tmc_active;
end
if (warp_scheduled) begin // remove scheduled warp (round-robin)
schedule_table_n[scheduled_warp] = 0;
@@ -104,12 +106,12 @@ module VX_warp_sched #(
barrier_stall_mask[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1;
end
end else if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.tmask;
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.tmask;
stalled_warps[warp_ctl_if.wid] <= 0;
end else if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
stalled_warps[warp_ctl_if.wid] <= 0;
if (warp_ctl_if.split.diverged) begin
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_mask;
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_tmask;
end
end
@@ -178,6 +180,8 @@ module VX_warp_sched #(
// split/join stack management
wire [(1+32+`NUM_THREADS-1):0] ipdom [`NUM_WARPS-1:0];
wire [`NUM_THREADS-1:0] curr_tmask = thread_masks[warp_ctl_if.wid];
for (genvar i = 0; i < `NUM_WARPS; i++) begin
wire push = warp_ctl_if.valid
@@ -186,9 +190,9 @@ module VX_warp_sched #(
wire pop = join_if.valid && (i == join_if.wid);
wire [`NUM_THREADS-1:0] else_mask = warp_ctl_if.split.diverged ? warp_ctl_if.split.else_mask : thread_masks[warp_ctl_if.wid];
wire [(1+32+`NUM_THREADS-1):0] q_end = {1'b0, 32'b0, thread_masks[warp_ctl_if.wid]};
wire [(1+32+`NUM_THREADS-1):0] q_else = {1'b1, warp_ctl_if.split.pc, else_mask};
wire [`NUM_THREADS-1:0] else_tmask = warp_ctl_if.split.diverged ? warp_ctl_if.split.else_tmask : curr_tmask;
wire [(1+32+`NUM_THREADS-1):0] q_end = {1'b0, 32'b0, curr_tmask};
wire [(1+32+`NUM_THREADS-1):0] q_else = {1'b1, warp_ctl_if.split.pc, else_tmask};
VX_ipdom_stack #(
.WIDTH (1+32+`NUM_THREADS),