optimize warp_sched
This commit is contained in:
@@ -371,13 +371,15 @@ module VX_decode #(
|
||||
|
||||
wire decode_fire = decode_if.valid && decode_if.ready;
|
||||
|
||||
assign join_if.is_join = decode_fire && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.valid = decode_fire && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
assign wstall_if.wstall = decode_fire && (is_btype || is_jal || is_jalr
|
||||
|| (is_gpu && (gpu_op == `GPU_TMC
|
||||
|| gpu_op == `GPU_SPLIT
|
||||
|| gpu_op == `GPU_BAR)));
|
||||
assign wstall_if.valid = decode_fire && (is_btype
|
||||
|| is_jal
|
||||
|| is_jalr
|
||||
|| (is_gpu && (gpu_op == `GPU_TMC
|
||||
|| gpu_op == `GPU_SPLIT
|
||||
|| gpu_op == `GPU_BAR)));
|
||||
assign wstall_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -391,9 +393,7 @@ module VX_decode #(
|
||||
print_ex_type(decode_if.ex_type);
|
||||
$write(", op=");
|
||||
print_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
|
||||
$write(", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, frm=", decode_if.thread_mask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm);
|
||||
print_frm(decode_if.frm);
|
||||
$write("\n");
|
||||
$write("mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b\n", decode_if.op_mod, decode_if.thread_mask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -21,7 +21,7 @@ endtask
|
||||
task print_ex_op;
|
||||
input [`EX_BITS-1:0] ex_type;
|
||||
input [`OP_BITS-1:0] op_type;
|
||||
input [`OP_BITS-1:0] op_mod;
|
||||
input [`MOD_BITS-1:0] op_mod;
|
||||
begin
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
@@ -141,19 +141,4 @@ task print_ex_op;
|
||||
end
|
||||
endtask
|
||||
|
||||
task print_frm;
|
||||
input [`FRM_BITS-1:0] frm;
|
||||
begin
|
||||
case (frm)
|
||||
`FRM_RNE: $write("RNE");
|
||||
`FRM_RTZ: $write("RTZ");
|
||||
`FRM_RDN: $write("RDN");
|
||||
`FRM_RUP: $write("RUP");
|
||||
`FRM_RMM: $write("RMM");
|
||||
`FRM_DYN: $write("DYN");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
@@ -52,10 +52,7 @@ module VX_warp_sched #(
|
||||
&& warp_ctl_if.tmc.valid
|
||||
&& (0 == warp_ctl_if.tmc.thread_mask)) begin
|
||||
schedule_table_n[warp_ctl_if.wid] = 0;
|
||||
end
|
||||
if (wstall_if.wstall) begin
|
||||
schedule_table_n[wstall_if.wid] = 0;
|
||||
end
|
||||
end
|
||||
if (scheduled_warp) begin // remove scheduled warp (round-robin)
|
||||
schedule_table_n[warp_to_schedule] = 0;
|
||||
end
|
||||
@@ -103,7 +100,7 @@ module VX_warp_sched #(
|
||||
if (0 == warp_ctl_if.tmc.thread_mask) begin
|
||||
active_warps[warp_ctl_if.wid] <= 0;
|
||||
end
|
||||
end else if (join_if.is_join && !didnt_split) begin
|
||||
end else if (join_if.valid && !didnt_split) begin
|
||||
if (!join_fall) begin
|
||||
warp_pcs[join_if.wid] <= join_pc;
|
||||
end
|
||||
@@ -125,15 +122,10 @@ module VX_warp_sched #(
|
||||
end
|
||||
|
||||
// Stalling the scheduling of warps
|
||||
if (wstall_if.wstall) begin
|
||||
if (wstall_if.valid) begin
|
||||
stalled_warps[wstall_if.wid] <= 1;
|
||||
end
|
||||
|
||||
// Advance PC
|
||||
if (scheduled_warp) begin
|
||||
warp_pcs[warp_to_schedule] <= warp_pc + 4;
|
||||
end
|
||||
|
||||
// Branch
|
||||
if (branch_ctl_if.valid) begin
|
||||
if (branch_ctl_if.taken) begin
|
||||
@@ -148,6 +140,7 @@ module VX_warp_sched #(
|
||||
end
|
||||
if (ifetch_rsp_fire) begin
|
||||
fetch_lock[ifetch_rsp_if.wid] <= 0;
|
||||
warp_pcs[ifetch_rsp_if.wid] <= ifetch_rsp_if.curr_PC + 4;
|
||||
end
|
||||
|
||||
// reset 'schedule_table' when it goes to zero
|
||||
@@ -191,8 +184,7 @@ module VX_warp_sched #(
|
||||
&& warp_ctl_if.split.diverged
|
||||
&& (i == warp_ctl_if.wid);
|
||||
|
||||
wire pop = join_if.is_join
|
||||
&& (i == join_if.wid);
|
||||
wire pop = join_if.valid && (i == join_if.wid);
|
||||
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH(1+32+`NUM_THREADS),
|
||||
@@ -232,17 +224,9 @@ module VX_warp_sched #(
|
||||
end
|
||||
end
|
||||
|
||||
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||
|
||||
wire branch_hazard = branch_ctl_if.valid
|
||||
&& branch_ctl_if.taken
|
||||
&& (branch_ctl_if.wid == warp_to_schedule);
|
||||
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||
|
||||
wire wstall_this_cycle = wstall_if.wstall && (wstall_if.wid == warp_to_schedule);
|
||||
|
||||
wire stall = stall_out || wstall_this_cycle || branch_hazard || join_if.is_join;
|
||||
|
||||
assign scheduled_warp = schedule_valid && ~stall;
|
||||
assign scheduled_warp = schedule_valid && ~stall_out;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
interface VX_join_if ();
|
||||
|
||||
wire is_join;
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
interface VX_wstall_if();
|
||||
|
||||
wire wstall;
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
endinterface
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
//#define ALL_TESTS
|
||||
#define ALL_TESTS
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc == 1) {
|
||||
|
||||
Reference in New Issue
Block a user