WSPAWN imp + tested

This commit is contained in:
felsabbagh3
2019-10-21 23:35:53 -04:00
parent c21e400f9f
commit 31d3d51392
13 changed files with 2166 additions and 2138 deletions

View File

@@ -8,8 +8,7 @@ module VX_decode(
// Outputs
VX_frE_to_bckE_req_inter VX_frE_to_bckE_req,
VX_wstall_inter VX_wstall,
VX_join_inter VX_join,
output wire out_ebreak
VX_join_inter VX_join
);
@@ -218,7 +217,7 @@ module VX_decode(
// assign is_ebreak = is_e_inst;
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
assign VX_frE_to_bckE_req.ebreak = ebreak;
assign out_ebreak = ebreak;
wire out_ebreak = ebreak;

View File

@@ -27,51 +27,51 @@ module VX_fetch (
wire[`NW_M1:0] warp_num;
wire[31:0] warp_pc;
VX_warp_scheduler warp_scheduler(
.clk (clk),
.reset (reset),
.stall (pipe_stall),
.clk (clk),
.reset (reset),
.stall (pipe_stall),
// Wspawn
.wspawn (VX_warp_ctl.wspawn),
.wsapwn_pc (VX_warp_ctl.wspawn_pc),
// .wspawn_warp_num(VX_warp_ctl.warp_num),
.wspawn (VX_warp_ctl.wspawn),
.wsapwn_pc (VX_warp_ctl.wspawn_pc),
.wspawn_new_active(VX_warp_ctl.wspawn_new_active),
// CTM
.ctm (VX_warp_ctl.change_mask),
.ctm_mask (VX_warp_ctl.thread_mask),
.ctm_warp_num (VX_warp_ctl.warp_num),
.ctm (VX_warp_ctl.change_mask),
.ctm_mask (VX_warp_ctl.thread_mask),
.ctm_warp_num (VX_warp_ctl.warp_num),
// WHALT
.whalt (VX_warp_ctl.ebreak),
.whalt_warp_num (VX_warp_ctl.warp_num),
.whalt (VX_warp_ctl.ebreak),
.whalt_warp_num (VX_warp_ctl.warp_num),
// Wstall
.wstall (VX_wstall.wstall),
.wstall_warp_num(VX_wstall.warp_num),
.wstall (VX_wstall.wstall),
.wstall_warp_num (VX_wstall.warp_num),
// Join
.is_join (VX_join.is_join),
.join_warp_num (VX_join.join_warp_num),
.is_join (VX_join.is_join),
.join_warp_num (VX_join.join_warp_num),
// Split
.is_split (VX_warp_ctl.is_split),
.split_new_mask (VX_warp_ctl.split_new_mask),
.split_later_mask(VX_warp_ctl.split_later_mask),
.split_save_pc (VX_warp_ctl.split_save_pc),
.split_warp_num (VX_warp_ctl.warp_num),
.is_split (VX_warp_ctl.is_split),
.split_new_mask (VX_warp_ctl.split_new_mask),
.split_later_mask (VX_warp_ctl.split_later_mask),
.split_save_pc (VX_warp_ctl.split_save_pc),
.split_warp_num (VX_warp_ctl.warp_num),
// JAL
.jal (VX_jal_rsp.jal),
.jal_dest (VX_jal_rsp.jal_dest),
.jal_warp_num (VX_jal_rsp.jal_warp_num),
.jal (VX_jal_rsp.jal),
.jal_dest (VX_jal_rsp.jal_dest),
.jal_warp_num (VX_jal_rsp.jal_warp_num),
// Branch
.branch_valid (VX_branch_rsp.valid_branch),
.branch_dir (VX_branch_rsp.branch_dir),
.branch_dest (VX_branch_rsp.branch_dest),
.branch_warp_num(VX_branch_rsp.branch_warp_num),
.branch_valid (VX_branch_rsp.valid_branch),
.branch_dir (VX_branch_rsp.branch_dir),
.branch_dest (VX_branch_rsp.branch_dest),
.branch_warp_num (VX_branch_rsp.branch_warp_num),
// Outputs
.thread_mask (thread_mask),
.warp_num (warp_num),
.warp_pc (warp_pc),
.out_ebreak (out_ebreak)
.thread_mask (thread_mask),
.warp_num (warp_num),
.warp_pc (warp_pc),
.out_ebreak (out_ebreak)
);

View File

@@ -32,7 +32,7 @@ wire decode_branch_stall;
wire total_freeze = schedule_delay;
/* verilator lint_off UNUSED */
wire real_fetch_ebreak;
// wire real_fetch_ebreak;
/* verilator lint_on UNUSED */
@@ -51,7 +51,7 @@ VX_fetch vx_fetch(
.icache_request (icache_request_fe),
.VX_branch_rsp (VX_branch_rsp),
.out_ebreak (real_fetch_ebreak), // fetch_ebreak
.out_ebreak (fetch_ebreak), // fetch_ebreak
.fe_inst_meta_fd (fe_inst_meta_fd)
);
@@ -68,8 +68,7 @@ VX_decode vx_decode(
.fd_inst_meta_de (fd_inst_meta_de),
.VX_frE_to_bckE_req(VX_frE_to_bckE_req),
.VX_wstall (VX_wstall),
.VX_join (VX_join),
.out_ebreak (fetch_ebreak)
.VX_join (VX_join)
);
wire no_br_stall = 0;

View File

@@ -20,14 +20,27 @@ module VX_gpgpu_inst (
wire valid_inst = (|curr_valids);
assign VX_warp_ctl.warp_num = VX_gpu_inst_req.warp_num;
assign VX_warp_ctl.change_mask = (VX_gpu_inst_req.is_tmc || VX_gpu_inst_req.is_split) && valid_inst;
assign VX_warp_ctl.change_mask = (VX_gpu_inst_req.is_tmc) && valid_inst;
assign VX_warp_ctl.thread_mask = VX_gpu_inst_req.is_tmc ? tmc_new_mask : 0;
assign VX_warp_ctl.ebreak = (VX_gpu_inst_req.a_reg_data[0] == 0) && valid_inst;
// assign VX_warp_ctl.ebreak = (VX_gpu_inst_req.a_reg_data[0] == 0) && valid_inst;
assign VX_warp_ctl.ebreak = VX_warp_ctl.change_mask && (VX_warp_ctl.thread_mask == 0);
assign VX_warp_ctl.wspawn = 0;
assign VX_warp_ctl.wspawn_pc = 0;
wire wspawn = VX_gpu_inst_req.is_wspawn;
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
wire[`NW-1:0] wspawn_new_active;
genvar curr_w;
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1)
begin
assign wspawn_new_active[curr_w] = curr_w < VX_gpu_inst_req.a_reg_data[0];
end
assign VX_warp_ctl.wspawn = wspawn;
assign VX_warp_ctl.wspawn_pc = wspawn_pc;
assign VX_warp_ctl.wspawn_new_active = wspawn_new_active;
wire[`NT_M1:0] split_new_use_mask;
wire[`NT_M1:0] split_new_later_mask;
@@ -49,7 +62,7 @@ module VX_gpgpu_inst (
);
assign VX_warp_ctl.is_split = is_split && (num_valids > 1);
assign VX_warp_ctl.is_split = is_split && (num_valids > 1) && (split_new_use_mask != 0) && (split_new_use_mask != {`NT{1'b1}});
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;

View File

@@ -7,6 +7,7 @@ module VX_warp_scheduler (
// Wspawn
input wire wspawn,
input wire[31:0] wsapwn_pc,
input wire[`NW-1:0] wspawn_new_active,
// CTM
input wire ctm,
@@ -50,6 +51,10 @@ module VX_warp_scheduler (
);
wire update_use_wspawn;
wire update_visible_active;
wire[(1+32+`NT_M1):0] d[`NW-1:0];
wire join_fall;
@@ -72,9 +77,9 @@ module VX_warp_scheduler (
reg[31:0] warp_pcs[`NW-1:0];
// Choosing a warp to wsapwn
wire[`NW_M1:0] warp_to_wsapwn;
wire found_wspawn;
// wsapwn
reg[31:0] use_wsapwn_pc;
reg[`NW-1:0] use_wsapwn;
wire[`NW_M1:0] warp_to_schedule;
wire schedule;
@@ -111,15 +116,19 @@ module VX_warp_scheduler (
warp_pcs[curr_w_help] <= 0;
warp_active[curr_w_help] <= 0; // Activating first warp
visible_active[curr_w_help] <= 0; // Activating first warp
thread_masks[curr_w_help] <= 0; // Activating first thread in first warp
thread_masks[curr_w_help] <= 1; // Activating first thread in first warp
end
end else begin
// Wsapwning warps
if (wspawn && found_wspawn) begin
warp_pcs[warp_to_wsapwn] <= wsapwn_pc;
warp_active[warp_to_wsapwn] <= 1;
visible_active[warp_to_wsapwn] <= 1;
if (wspawn) begin
warp_active <= wspawn_new_active;
use_wsapwn_pc <= wsapwn_pc;
use_wsapwn <= wspawn_new_active & (~`NW'b1);
end
if (update_use_wspawn) begin
use_wsapwn[warp_to_schedule] <= 0;
end
// Halting warps
if (whalt) begin
@@ -152,13 +161,7 @@ module VX_warp_scheduler (
end
// Refilling active warps
if ((visible_active == 0) && !(stall || wstall || hazard || is_join)) begin
visible_active <= warp_active & (~warp_stalled);
end
// First cycle
if (start <= 2) begin
start <= 1;
if (update_visible_active) begin
visible_active <= warp_active & (~warp_stalled);
end
@@ -182,6 +185,9 @@ module VX_warp_scheduler (
end
end
assign update_visible_active = ($countones(visible_active) < 1) && !(stall || wstall || hazard || is_join);
wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask};
@@ -220,15 +226,18 @@ module VX_warp_scheduler (
assign global_stall = (stall || wstall || hazard || !real_schedule || is_join);
assign warp_pc = warp_pcs[warp_to_schedule];
wire real_use_wspawn = use_wsapwn[warp_to_schedule];
assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule];
assign thread_mask = (global_stall) ? 0 : thread_masks[warp_to_schedule];
assign warp_num = warp_to_schedule;
assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall;
assign new_pc = warp_pc + 4;
assign use_active = (num_active <= 1) ? (warp_active & (~warp_stalled)) : visible_active;
assign use_active = (num_active < 1) ? (warp_active & (~warp_stalled)) : visible_active;
// Choosing a warp to schedule
VX_priority_encoder choose_schedule(
@@ -238,13 +247,6 @@ module VX_warp_scheduler (
);
VX_priority_encoder choose_wsapwn(
.valids(~warp_active),
.index (warp_to_wsapwn),
.found (found_wspawn)
);
// Valid counter
VX_one_counter valid_counter(
.valids(visible_active),
@@ -252,7 +254,8 @@ module VX_warp_scheduler (
);
assign out_ebreak = (warp_active == 0);
wire ebreak = (warp_active == 0);
assign out_ebreak = ebreak;

View File

@@ -13,6 +13,7 @@ interface VX_warp_ctl_inter ();
wire wspawn;
wire[31:0] wspawn_pc;
wire[`NW-1:0] wspawn_new_active;
wire ebreak;

View File

@@ -1,7 +1,7 @@
# Dynamic Instructions: 36
# of total cycles: 50
# Dynamic Instructions: 52686
# of total cycles: 52701
# of forwarding stalls: 0
# of branch stalls: 0
# CPI: 1.38889
# time to simulate: 6.95313e-310 milliseconds
# CPI: 1.00028
# time to simulate: 0 milliseconds
# GRADE: Failed on test: 4294967295

View File

@@ -1 +1 @@
#define VCD_OUTPUT
#define VCD_OFF

View File

@@ -385,7 +385,7 @@ bool Vortex::simulate(std::string file_to_simulate)
// unsigned cycles;
counter = 0;
this->stats_total_cycles = 10;
while (this->stop && ((counter < 6)))
while (this->stop && ((counter < 2)))
// while (this->stats_total_cycles < 10)
{
// std::cout << "Counter: " << counter << "\n";