WSPAWN imp + tested
This commit is contained in:
@@ -8,8 +8,7 @@ module VX_decode(
|
||||
// Outputs
|
||||
VX_frE_to_bckE_req_inter VX_frE_to_bckE_req,
|
||||
VX_wstall_inter VX_wstall,
|
||||
VX_join_inter VX_join,
|
||||
output wire out_ebreak
|
||||
VX_join_inter VX_join
|
||||
|
||||
);
|
||||
|
||||
@@ -218,7 +217,7 @@ module VX_decode(
|
||||
// assign is_ebreak = is_e_inst;
|
||||
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
|
||||
assign VX_frE_to_bckE_req.ebreak = ebreak;
|
||||
assign out_ebreak = ebreak;
|
||||
wire out_ebreak = ebreak;
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -27,51 +27,51 @@ module VX_fetch (
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[31:0] warp_pc;
|
||||
VX_warp_scheduler warp_scheduler(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipe_stall),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (pipe_stall),
|
||||
// Wspawn
|
||||
.wspawn (VX_warp_ctl.wspawn),
|
||||
.wsapwn_pc (VX_warp_ctl.wspawn_pc),
|
||||
// .wspawn_warp_num(VX_warp_ctl.warp_num),
|
||||
.wspawn (VX_warp_ctl.wspawn),
|
||||
.wsapwn_pc (VX_warp_ctl.wspawn_pc),
|
||||
.wspawn_new_active(VX_warp_ctl.wspawn_new_active),
|
||||
// CTM
|
||||
.ctm (VX_warp_ctl.change_mask),
|
||||
.ctm_mask (VX_warp_ctl.thread_mask),
|
||||
.ctm_warp_num (VX_warp_ctl.warp_num),
|
||||
.ctm (VX_warp_ctl.change_mask),
|
||||
.ctm_mask (VX_warp_ctl.thread_mask),
|
||||
.ctm_warp_num (VX_warp_ctl.warp_num),
|
||||
// WHALT
|
||||
.whalt (VX_warp_ctl.ebreak),
|
||||
.whalt_warp_num (VX_warp_ctl.warp_num),
|
||||
.whalt (VX_warp_ctl.ebreak),
|
||||
.whalt_warp_num (VX_warp_ctl.warp_num),
|
||||
// Wstall
|
||||
.wstall (VX_wstall.wstall),
|
||||
.wstall_warp_num(VX_wstall.warp_num),
|
||||
.wstall (VX_wstall.wstall),
|
||||
.wstall_warp_num (VX_wstall.warp_num),
|
||||
|
||||
// Join
|
||||
.is_join (VX_join.is_join),
|
||||
.join_warp_num (VX_join.join_warp_num),
|
||||
.is_join (VX_join.is_join),
|
||||
.join_warp_num (VX_join.join_warp_num),
|
||||
|
||||
// Split
|
||||
.is_split (VX_warp_ctl.is_split),
|
||||
.split_new_mask (VX_warp_ctl.split_new_mask),
|
||||
.split_later_mask(VX_warp_ctl.split_later_mask),
|
||||
.split_save_pc (VX_warp_ctl.split_save_pc),
|
||||
.split_warp_num (VX_warp_ctl.warp_num),
|
||||
.is_split (VX_warp_ctl.is_split),
|
||||
.split_new_mask (VX_warp_ctl.split_new_mask),
|
||||
.split_later_mask (VX_warp_ctl.split_later_mask),
|
||||
.split_save_pc (VX_warp_ctl.split_save_pc),
|
||||
.split_warp_num (VX_warp_ctl.warp_num),
|
||||
|
||||
// JAL
|
||||
.jal (VX_jal_rsp.jal),
|
||||
.jal_dest (VX_jal_rsp.jal_dest),
|
||||
.jal_warp_num (VX_jal_rsp.jal_warp_num),
|
||||
.jal (VX_jal_rsp.jal),
|
||||
.jal_dest (VX_jal_rsp.jal_dest),
|
||||
.jal_warp_num (VX_jal_rsp.jal_warp_num),
|
||||
|
||||
// Branch
|
||||
.branch_valid (VX_branch_rsp.valid_branch),
|
||||
.branch_dir (VX_branch_rsp.branch_dir),
|
||||
.branch_dest (VX_branch_rsp.branch_dest),
|
||||
.branch_warp_num(VX_branch_rsp.branch_warp_num),
|
||||
.branch_valid (VX_branch_rsp.valid_branch),
|
||||
.branch_dir (VX_branch_rsp.branch_dir),
|
||||
.branch_dest (VX_branch_rsp.branch_dest),
|
||||
.branch_warp_num (VX_branch_rsp.branch_warp_num),
|
||||
|
||||
// Outputs
|
||||
.thread_mask (thread_mask),
|
||||
.warp_num (warp_num),
|
||||
.warp_pc (warp_pc),
|
||||
.out_ebreak (out_ebreak)
|
||||
.thread_mask (thread_mask),
|
||||
.warp_num (warp_num),
|
||||
.warp_pc (warp_pc),
|
||||
.out_ebreak (out_ebreak)
|
||||
);
|
||||
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ wire decode_branch_stall;
|
||||
wire total_freeze = schedule_delay;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire real_fetch_ebreak;
|
||||
// wire real_fetch_ebreak;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ VX_fetch vx_fetch(
|
||||
|
||||
.icache_request (icache_request_fe),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.out_ebreak (real_fetch_ebreak), // fetch_ebreak
|
||||
.out_ebreak (fetch_ebreak), // fetch_ebreak
|
||||
.fe_inst_meta_fd (fe_inst_meta_fd)
|
||||
);
|
||||
|
||||
@@ -68,8 +68,7 @@ VX_decode vx_decode(
|
||||
.fd_inst_meta_de (fd_inst_meta_de),
|
||||
.VX_frE_to_bckE_req(VX_frE_to_bckE_req),
|
||||
.VX_wstall (VX_wstall),
|
||||
.VX_join (VX_join),
|
||||
.out_ebreak (fetch_ebreak)
|
||||
.VX_join (VX_join)
|
||||
);
|
||||
|
||||
wire no_br_stall = 0;
|
||||
|
||||
@@ -20,14 +20,27 @@ module VX_gpgpu_inst (
|
||||
wire valid_inst = (|curr_valids);
|
||||
|
||||
assign VX_warp_ctl.warp_num = VX_gpu_inst_req.warp_num;
|
||||
assign VX_warp_ctl.change_mask = (VX_gpu_inst_req.is_tmc || VX_gpu_inst_req.is_split) && valid_inst;
|
||||
assign VX_warp_ctl.change_mask = (VX_gpu_inst_req.is_tmc) && valid_inst;
|
||||
assign VX_warp_ctl.thread_mask = VX_gpu_inst_req.is_tmc ? tmc_new_mask : 0;
|
||||
|
||||
assign VX_warp_ctl.ebreak = (VX_gpu_inst_req.a_reg_data[0] == 0) && valid_inst;
|
||||
// assign VX_warp_ctl.ebreak = (VX_gpu_inst_req.a_reg_data[0] == 0) && valid_inst;
|
||||
assign VX_warp_ctl.ebreak = VX_warp_ctl.change_mask && (VX_warp_ctl.thread_mask == 0);
|
||||
|
||||
assign VX_warp_ctl.wspawn = 0;
|
||||
assign VX_warp_ctl.wspawn_pc = 0;
|
||||
|
||||
wire wspawn = VX_gpu_inst_req.is_wspawn;
|
||||
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
|
||||
wire[`NW-1:0] wspawn_new_active;
|
||||
genvar curr_w;
|
||||
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1)
|
||||
begin
|
||||
assign wspawn_new_active[curr_w] = curr_w < VX_gpu_inst_req.a_reg_data[0];
|
||||
end
|
||||
|
||||
|
||||
|
||||
assign VX_warp_ctl.wspawn = wspawn;
|
||||
assign VX_warp_ctl.wspawn_pc = wspawn_pc;
|
||||
assign VX_warp_ctl.wspawn_new_active = wspawn_new_active;
|
||||
|
||||
wire[`NT_M1:0] split_new_use_mask;
|
||||
wire[`NT_M1:0] split_new_later_mask;
|
||||
@@ -49,7 +62,7 @@ module VX_gpgpu_inst (
|
||||
);
|
||||
|
||||
|
||||
assign VX_warp_ctl.is_split = is_split && (num_valids > 1);
|
||||
assign VX_warp_ctl.is_split = is_split && (num_valids > 1) && (split_new_use_mask != 0) && (split_new_use_mask != {`NT{1'b1}});
|
||||
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
|
||||
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
|
||||
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;
|
||||
|
||||
@@ -7,6 +7,7 @@ module VX_warp_scheduler (
|
||||
// Wspawn
|
||||
input wire wspawn,
|
||||
input wire[31:0] wsapwn_pc,
|
||||
input wire[`NW-1:0] wspawn_new_active,
|
||||
|
||||
// CTM
|
||||
input wire ctm,
|
||||
@@ -50,6 +51,10 @@ module VX_warp_scheduler (
|
||||
|
||||
);
|
||||
|
||||
wire update_use_wspawn;
|
||||
|
||||
wire update_visible_active;
|
||||
|
||||
wire[(1+32+`NT_M1):0] d[`NW-1:0];
|
||||
|
||||
wire join_fall;
|
||||
@@ -72,9 +77,9 @@ module VX_warp_scheduler (
|
||||
reg[31:0] warp_pcs[`NW-1:0];
|
||||
|
||||
|
||||
// Choosing a warp to wsapwn
|
||||
wire[`NW_M1:0] warp_to_wsapwn;
|
||||
wire found_wspawn;
|
||||
// wsapwn
|
||||
reg[31:0] use_wsapwn_pc;
|
||||
reg[`NW-1:0] use_wsapwn;
|
||||
|
||||
wire[`NW_M1:0] warp_to_schedule;
|
||||
wire schedule;
|
||||
@@ -111,15 +116,19 @@ module VX_warp_scheduler (
|
||||
warp_pcs[curr_w_help] <= 0;
|
||||
warp_active[curr_w_help] <= 0; // Activating first warp
|
||||
visible_active[curr_w_help] <= 0; // Activating first warp
|
||||
thread_masks[curr_w_help] <= 0; // Activating first thread in first warp
|
||||
thread_masks[curr_w_help] <= 1; // Activating first thread in first warp
|
||||
end
|
||||
|
||||
end else begin
|
||||
// Wsapwning warps
|
||||
if (wspawn && found_wspawn) begin
|
||||
warp_pcs[warp_to_wsapwn] <= wsapwn_pc;
|
||||
warp_active[warp_to_wsapwn] <= 1;
|
||||
visible_active[warp_to_wsapwn] <= 1;
|
||||
if (wspawn) begin
|
||||
warp_active <= wspawn_new_active;
|
||||
use_wsapwn_pc <= wsapwn_pc;
|
||||
use_wsapwn <= wspawn_new_active & (~`NW'b1);
|
||||
end
|
||||
|
||||
if (update_use_wspawn) begin
|
||||
use_wsapwn[warp_to_schedule] <= 0;
|
||||
end
|
||||
// Halting warps
|
||||
if (whalt) begin
|
||||
@@ -152,13 +161,7 @@ module VX_warp_scheduler (
|
||||
end
|
||||
|
||||
// Refilling active warps
|
||||
if ((visible_active == 0) && !(stall || wstall || hazard || is_join)) begin
|
||||
visible_active <= warp_active & (~warp_stalled);
|
||||
end
|
||||
|
||||
// First cycle
|
||||
if (start <= 2) begin
|
||||
start <= 1;
|
||||
if (update_visible_active) begin
|
||||
visible_active <= warp_active & (~warp_stalled);
|
||||
end
|
||||
|
||||
@@ -182,6 +185,9 @@ module VX_warp_scheduler (
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign update_visible_active = ($countones(visible_active) < 1) && !(stall || wstall || hazard || is_join);
|
||||
|
||||
wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
|
||||
wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask};
|
||||
|
||||
@@ -220,15 +226,18 @@ module VX_warp_scheduler (
|
||||
assign global_stall = (stall || wstall || hazard || !real_schedule || is_join);
|
||||
|
||||
|
||||
assign warp_pc = warp_pcs[warp_to_schedule];
|
||||
wire real_use_wspawn = use_wsapwn[warp_to_schedule];
|
||||
|
||||
assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule];
|
||||
assign thread_mask = (global_stall) ? 0 : thread_masks[warp_to_schedule];
|
||||
assign warp_num = warp_to_schedule;
|
||||
|
||||
assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall;
|
||||
|
||||
assign new_pc = warp_pc + 4;
|
||||
|
||||
|
||||
assign use_active = (num_active <= 1) ? (warp_active & (~warp_stalled)) : visible_active;
|
||||
assign use_active = (num_active < 1) ? (warp_active & (~warp_stalled)) : visible_active;
|
||||
|
||||
// Choosing a warp to schedule
|
||||
VX_priority_encoder choose_schedule(
|
||||
@@ -238,13 +247,6 @@ module VX_warp_scheduler (
|
||||
);
|
||||
|
||||
|
||||
VX_priority_encoder choose_wsapwn(
|
||||
.valids(~warp_active),
|
||||
.index (warp_to_wsapwn),
|
||||
.found (found_wspawn)
|
||||
);
|
||||
|
||||
|
||||
// Valid counter
|
||||
VX_one_counter valid_counter(
|
||||
.valids(visible_active),
|
||||
@@ -252,7 +254,8 @@ module VX_warp_scheduler (
|
||||
);
|
||||
|
||||
|
||||
assign out_ebreak = (warp_active == 0);
|
||||
wire ebreak = (warp_active == 0);
|
||||
assign out_ebreak = ebreak;
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ interface VX_warp_ctl_inter ();
|
||||
|
||||
wire wspawn;
|
||||
wire[31:0] wspawn_pc;
|
||||
wire[`NW-1:0] wspawn_new_active;
|
||||
|
||||
wire ebreak;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Dynamic Instructions: 36
|
||||
# of total cycles: 50
|
||||
# Dynamic Instructions: 52686
|
||||
# of total cycles: 52701
|
||||
# of forwarding stalls: 0
|
||||
# of branch stalls: 0
|
||||
# CPI: 1.38889
|
||||
# time to simulate: 6.95313e-310 milliseconds
|
||||
# CPI: 1.00028
|
||||
# time to simulate: 0 milliseconds
|
||||
# GRADE: Failed on test: 4294967295
|
||||
|
||||
@@ -1 +1 @@
|
||||
#define VCD_OUTPUT
|
||||
#define VCD_OFF
|
||||
|
||||
@@ -385,7 +385,7 @@ bool Vortex::simulate(std::string file_to_simulate)
|
||||
// unsigned cycles;
|
||||
counter = 0;
|
||||
this->stats_total_cycles = 10;
|
||||
while (this->stop && ((counter < 6)))
|
||||
while (this->stop && ((counter < 2)))
|
||||
// while (this->stats_total_cycles < 10)
|
||||
{
|
||||
// std::cout << "Counter: " << counter << "\n";
|
||||
|
||||
Reference in New Issue
Block a user