Proper SIMT with fine-grain scheduler implemented

This commit is contained in:
felsabbagh3
2019-05-10 00:49:54 -07:00
parent 96dac5e1ce
commit 48468ed26a
27 changed files with 6080 additions and 3375 deletions

View File

@@ -3,55 +3,55 @@
module VX_decode(
// Fetch Inputs
input wire clk,
input wire[31:0] in_instruction,
input wire[31:0] in_curr_PC,
input wire in_valid[`NT_M1:0],
input wire clk,
input wire[31:0] in_instruction,
input wire[31:0] in_curr_PC,
input wire in_valid[`NT_M1:0],
// WriteBack inputs
input wire[31:0] in_write_data[`NT_M1:0],
input wire[4:0] in_rd,
input wire[1:0] in_wb,
input wire in_wb_valid[`NT_M1:0],
input wire[`NW_M1:0] in_wb_warp_num,
input wire[31:0] in_write_data[`NT_M1:0],
input wire[4:0] in_rd,
input wire[1:0] in_wb,
input wire in_wb_valid[`NT_M1:0],
input wire[`NW_M1:0] in_wb_warp_num,
// FORWARDING INPUTS
input wire in_src1_fwd,
input wire[31:0] in_src1_fwd_data[`NT_M1:0],
input wire in_src2_fwd,
input wire[31:0] in_src2_fwd_data[`NT_M1:0],
input wire[`NW_M1:0] in_warp_num,
output wire[11:0] out_csr_address,
output wire out_is_csr,
output wire[31:0] out_csr_mask,
input wire in_src1_fwd,
input wire[31:0] in_src1_fwd_data[`NT_M1:0],
input wire in_src2_fwd,
input wire[31:0] in_src2_fwd_data[`NT_M1:0],
input wire[`NW_M1:0] in_warp_num,
output wire[11:0] out_csr_address,
output wire out_is_csr,
output wire[31:0] out_csr_mask,
// Outputs
output wire[4:0] out_rd,
output wire[4:0] out_rs1,
output wire[4:0] out_rs2,
output wire[31:0] out_a_reg_data[`NT_M1:0],
output wire[31:0] out_b_reg_data[`NT_M1:0],
output wire[1:0] out_wb,
output wire[4:0] out_alu_op,
output wire out_rs2_src,
output reg[31:0] out_itype_immed,
output wire[2:0] out_mem_read,
output wire[2:0] out_mem_write,
output reg[2:0] out_branch_type,
output reg out_branch_stall,
output reg out_jal,
output reg[31:0] out_jal_offset,
output reg[19:0] out_upper_immed,
output wire[31:0] out_PC_next,
output reg out_clone_stall,
output wire out_change_mask,
output wire out_thread_mask[`NT_M1:0],
output wire out_valid[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num
output wire[4:0] out_rd,
output wire[4:0] out_rs1,
output wire[4:0] out_rs2,
output wire[31:0] out_a_reg_data[`NT_M1:0],
output wire[31:0] out_b_reg_data[`NT_M1:0],
output wire[1:0] out_wb,
output wire[4:0] out_alu_op,
output wire out_rs2_src,
output reg[31:0] out_itype_immed,
output wire[2:0] out_mem_read,
output wire[2:0] out_mem_write,
output reg[2:0] out_branch_type,
output reg out_branch_stall,
output reg out_jal,
output reg[31:0] out_jal_offset,
output reg[19:0] out_upper_immed,
output wire[31:0] out_PC_next,
output reg out_clone_stall,
output wire out_change_mask,
output wire out_thread_mask[`NT_M1:0],
output wire out_valid[`NT_M1:0],
output wire[`NW_M1:0] out_warp_num,
output wire out_wspawn,
output wire[31:0] out_wspawn_pc,
output wire out_ebreak
);
wire[6:0] curr_opcode;
@@ -73,6 +73,7 @@ module VX_decode(
wire is_clone;
wire is_jalrs;
wire is_jmprt;
wire is_wspawn;
wire write_register;
@@ -110,11 +111,28 @@ module VX_decode(
reg[4:0] alu_op;
reg[4:0] mul_alu;
wire context_zero_valid = (in_wb_warp_num == 0);
wire[31:0] w0_t0_registers[31:0];
VX_context VX_Context(
wire context_zero_valid = (in_wb_warp_num == 0);
wire[31:0] zero_a_reg_data[`NT_M1:0];
wire[31:0] zero_b_reg_data[`NT_M1:0];
reg zero_clone_stall;
// always @(*) begin
// $display("DECODE WARP: %h", in_warp_num);
// end
wire curr_warp_zero = in_warp_num == 0;
wire curr_warp_one = in_warp_num == 1;
// always @(*) begin
// $display("DECODE WARP: %h PC: %h",in_warp_num, in_curr_PC);
// end
VX_context VX_Context_zero(
.clk (clk),
.in_warp (context_zero_valid),
.in_warp (curr_warp_zero),
.in_wb_warp (context_zero_valid),
.in_valid (in_wb_valid),
.in_rd (in_rd),
.in_src1 (out_rs1),
@@ -128,13 +146,52 @@ module VX_decode(
.in_src2_fwd_data (in_src2_fwd_data),
.in_write_register(write_register),
.in_write_data (in_write_data),
.out_a_reg_data (out_a_reg_data),
.out_b_reg_data (out_b_reg_data),
.out_clone_stall (out_clone_stall)
);
.out_a_reg_data (zero_a_reg_data),
.out_b_reg_data (zero_b_reg_data),
.out_clone_stall (zero_clone_stall),
.w0_t0_registers (w0_t0_registers)
);
wire context_one_valid = (in_wb_warp_num == 1);
wire[31:0] one_a_reg_data[`NT_M1:0];
wire[31:0] one_b_reg_data[`NT_M1:0];
reg one_clone_stall;
VX_context_slave VX_Context_one(
.clk (clk),
.in_warp (curr_warp_one),
.in_wb_warp (context_one_valid),
.in_valid (in_wb_valid),
.in_rd (in_rd),
.in_src1 (out_rs1),
.in_src2 (out_rs2),
.in_curr_PC (in_curr_PC),
.in_is_clone (is_clone),
.in_is_jal (is_jal),
.in_src1_fwd (in_src1_fwd),
.in_src1_fwd_data (in_src1_fwd_data),
.in_src2_fwd (in_src2_fwd),
.in_src2_fwd_data (in_src2_fwd_data),
.in_write_register(write_register),
.in_write_data (in_write_data),
.in_wspawn_regs (w0_t0_registers),
.in_wspawn (is_wspawn),
.out_a_reg_data (one_a_reg_data),
.out_b_reg_data (one_b_reg_data),
.out_clone_stall (one_clone_stall)
);
assign out_a_reg_data = curr_warp_zero ? zero_a_reg_data : one_a_reg_data;
assign out_b_reg_data = curr_warp_zero ? zero_b_reg_data : one_b_reg_data;
assign out_clone_stall = zero_clone_stall || one_clone_stall;
// always @(*) begin
// if (context_one_valid) begin
// $display("PC: %h -> src1: %h\tsrc2: %h",in_curr_PC, one_a_reg_data[0], one_b_reg_data[0]);
// end
// end
assign out_warp_num = in_warp_num;
assign out_valid = in_valid;
assign out_valid = in_valid;
assign write_register = (in_wb != 2'h0) ? (1'b1) : (1'b0);
@@ -171,6 +228,10 @@ module VX_decode(
assign is_clone = is_gpgpu && (func3 == 5);
assign is_jalrs = is_gpgpu && (func3 == 6);
assign is_jmprt = is_gpgpu && (func3 == 4);
assign is_wspawn = is_gpgpu && (func3 == 0);
assign out_wspawn = is_wspawn;
assign out_wspawn_pc = out_a_reg_data[0];
// always @(*) begin
// if (is_jalrs) begin
@@ -259,7 +320,7 @@ module VX_decode(
case(curr_opcode)
`LUI_INST: out_upper_immed = {func7, out_rs2, out_rs1, func3};
`AUIPC_INST: out_upper_immed = {func7, out_rs2, out_rs1, func3};
default: out_upper_immed = 20'h0;
default: out_upper_immed = 20'h0;
endcase // curr_opcode
end
@@ -306,6 +367,7 @@ module VX_decode(
end
`SYS_INST:
begin
// $display("SYS EBREAK %h", (jal_sys_jal && in_valid[0]) );
out_jal = jal_sys_jal && in_valid[0];
out_jal_offset = jal_sys_off;
end
@@ -317,6 +379,13 @@ module VX_decode(
endcase
end
wire is_ebreak;
assign is_ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && in_valid[0]);
assign out_ebreak = is_ebreak;
// CSR